Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

media: rkvdec: Add HEVC support for the VDPU381 variant

The VDPU381 supports HEVC decoding up to 7680x4320@30fps.
It could double that when using both decoder cores.

It support YUV420 (8 and 10 bits) as well as AFBC (not implemented
here)

The fluster score is 146/147 for JCT-VC-HEVC_V1, tested on ROCK 5B.
None of the other test suites works.

Tested-by: Diederik de Haas <didi.debian@cknow.org> # Rock 5B
Reviewed-by: Nicolas Dufresne <nicolas.dufresne@collabora.com>
Signed-off-by: Detlev Casanova <detlev.casanova@collabora.com>
Signed-off-by: Nicolas Dufresne <nicolas.dufresne@collabora.com>
Signed-off-by: Hans Verkuil <hverkuil+cisco@kernel.org>

authored by

Detlev Casanova and committed by
Hans Verkuil
c9a59dc2 fde24907

+1120
+1
drivers/media/platform/rockchip/rkvdec/Makefile
··· 9 9 rkvdec-hevc-common.o \ 10 10 rkvdec-rcb.o \ 11 11 rkvdec-vdpu381-h264.o \ 12 + rkvdec-vdpu381-hevc.o \ 12 13 rkvdec-vdpu383-h264.o \ 13 14 rkvdec-vp9.o
+335
drivers/media/platform/rockchip/rkvdec/rkvdec-hevc-common.c
··· 21 21 #include "rkvdec.h" 22 22 #include "rkvdec-hevc-common.h" 23 23 24 + /* Store the Short term ref pic set calculated values */ 25 + struct calculated_rps_st_set { 26 + u8 num_delta_pocs; 27 + u8 num_negative_pics; 28 + u8 num_positive_pics; 29 + u8 used_by_curr_pic_s0[16]; 30 + u8 used_by_curr_pic_s1[16]; 31 + s32 delta_poc_s0[16]; 32 + s32 delta_poc_s1[16]; 33 + }; 34 + 35 + void compute_tiles_uniform(struct rkvdec_hevc_run *run, u16 log2_min_cb_size, 36 + u16 width, u16 height, s32 pic_in_cts_width, 37 + s32 pic_in_cts_height, u16 *column_width, u16 *row_height) 38 + { 39 + const struct v4l2_ctrl_hevc_pps *pps = run->pps; 40 + int i; 41 + 42 + for (i = 0; i < pps->num_tile_columns_minus1 + 1; i++) 43 + column_width[i] = ((i + 1) * pic_in_cts_width) / 44 + (pps->num_tile_columns_minus1 + 1) - 45 + (i * pic_in_cts_width) / 46 + (pps->num_tile_columns_minus1 + 1); 47 + 48 + for (i = 0; i < pps->num_tile_rows_minus1 + 1; i++) 49 + row_height[i] = ((i + 1) * pic_in_cts_height) / 50 + (pps->num_tile_rows_minus1 + 1) - 51 + (i * pic_in_cts_height) / 52 + (pps->num_tile_rows_minus1 + 1); 53 + } 54 + 55 + void compute_tiles_non_uniform(struct rkvdec_hevc_run *run, u16 log2_min_cb_size, 56 + u16 width, u16 height, s32 pic_in_cts_width, 57 + s32 pic_in_cts_height, u16 *column_width, u16 *row_height) 58 + { 59 + const struct v4l2_ctrl_hevc_pps *pps = run->pps; 60 + s32 sum = 0; 61 + int i; 62 + 63 + for (i = 0; i < pps->num_tile_columns_minus1; i++) { 64 + column_width[i] = pps->column_width_minus1[i] + 1; 65 + sum += column_width[i]; 66 + } 67 + column_width[i] = pic_in_cts_width - sum; 68 + 69 + sum = 0; 70 + for (i = 0; i < pps->num_tile_rows_minus1; i++) { 71 + row_height[i] = pps->row_height_minus1[i] + 1; 72 + sum += row_height[i]; 73 + } 74 + row_height[i] = pic_in_cts_height - sum; 75 + } 76 + 77 + static void set_ref_poc(struct rkvdec_rps_short_term_ref_set *set, int poc, int value, int flag) 78 + { 79 + switch (poc) { 80 + case 0: 81 + set->delta_poc0 = value; 82 + set->used_flag0 = flag; 83 + break; 84 + case 1: 85 + set->delta_poc1 = value; 86 + set->used_flag1 = flag; 87 + break; 88 + case 2: 89 + set->delta_poc2 = value; 90 + set->used_flag2 = flag; 91 + break; 92 + case 3: 93 + set->delta_poc3 = value; 94 + set->used_flag3 = flag; 95 + break; 96 + case 4: 97 + set->delta_poc4 = value; 98 + set->used_flag4 = flag; 99 + break; 100 + case 5: 101 + set->delta_poc5 = value; 102 + set->used_flag5 = flag; 103 + break; 104 + case 6: 105 + set->delta_poc6 = value; 106 + set->used_flag6 = flag; 107 + break; 108 + case 7: 109 + set->delta_poc7 = value; 110 + set->used_flag7 = flag; 111 + break; 112 + case 8: 113 + set->delta_poc8 = value; 114 + set->used_flag8 = flag; 115 + break; 116 + case 9: 117 + set->delta_poc9 = value; 118 + set->used_flag9 = flag; 119 + break; 120 + case 10: 121 + set->delta_poc10 = value; 122 + set->used_flag10 = flag; 123 + break; 124 + case 11: 125 + set->delta_poc11 = value; 126 + set->used_flag11 = flag; 127 + break; 128 + case 12: 129 + set->delta_poc12 = value; 130 + set->used_flag12 = flag; 131 + break; 132 + case 13: 133 + set->delta_poc13 = value; 134 + set->used_flag13 = flag; 135 + break; 136 + case 14: 137 + set->delta_poc14 = value; 138 + set->used_flag14 = flag; 139 + break; 140 + } 141 + } 142 + 24 143 /* 25 144 * Flip one or more matrices along their main diagonal and flatten them 26 145 * before writing it to the memory. ··· 239 120 sizeof(struct v4l2_ctrl_hevc_scaling_matrix)); 240 121 } 241 122 123 + static void rkvdec_hevc_assemble_hw_lt_rps(struct rkvdec_hevc_run *run, struct rkvdec_rps *rps) 124 + { 125 + const struct v4l2_ctrl_hevc_sps *sps = run->sps; 126 + 127 + if (!run->ext_sps_lt_rps) 128 + return; 129 + 130 + for (int i = 0; i < sps->num_long_term_ref_pics_sps; i++) { 131 + rps->refs[i].lt_ref_pic_poc_lsb = 132 + run->ext_sps_lt_rps[i].lt_ref_pic_poc_lsb_sps; 133 + rps->refs[i].used_by_curr_pic_lt_flag = 134 + !!(run->ext_sps_lt_rps[i].flags & V4L2_HEVC_EXT_SPS_LT_RPS_FLAG_USED_LT); 135 + } 136 + } 137 + 138 + static void rkvdec_hevc_assemble_hw_st_rps(struct rkvdec_hevc_run *run, struct rkvdec_rps *rps, 139 + struct calculated_rps_st_set *calculated_rps_st_sets) 140 + { 141 + const struct v4l2_ctrl_hevc_sps *sps = run->sps; 142 + 143 + for (int i = 0; i < sps->num_short_term_ref_pic_sets; i++) { 144 + int poc = 0; 145 + int j = 0; 146 + const struct calculated_rps_st_set *set = &calculated_rps_st_sets[i]; 147 + 148 + rps->short_term_ref_sets[i].num_negative = set->num_negative_pics; 149 + rps->short_term_ref_sets[i].num_positive = set->num_positive_pics; 150 + 151 + for (; j < set->num_negative_pics; j++) { 152 + set_ref_poc(&rps->short_term_ref_sets[i], j, 153 + set->delta_poc_s0[j], set->used_by_curr_pic_s0[j]); 154 + } 155 + poc = j; 156 + 157 + for (j = 0; j < set->num_positive_pics; j++) { 158 + set_ref_poc(&rps->short_term_ref_sets[i], poc + j, 159 + set->delta_poc_s1[j], set->used_by_curr_pic_s1[j]); 160 + } 161 + } 162 + } 163 + 164 + /* 165 + * Compute the short term ref pic set parameters based on its reference short term ref pic 166 + */ 167 + static void st_ref_pic_set_prediction(struct rkvdec_hevc_run *run, int idx, 168 + struct calculated_rps_st_set *calculated_rps_st_sets) 169 + { 170 + const struct v4l2_ctrl_hevc_ext_sps_st_rps *rps_data = &run->ext_sps_st_rps[idx]; 171 + struct calculated_rps_st_set *st_rps = &calculated_rps_st_sets[idx]; 172 + struct calculated_rps_st_set *ref_rps; 173 + u8 st_rps_idx = idx; 174 + u8 ref_rps_idx = 0; 175 + s16 delta_rps = 0; 176 + u8 use_delta_flag[16] = { 0 }; 177 + u8 used_by_curr_pic_flag[16] = { 0 }; 178 + int i, j; 179 + int dPoc; 180 + 181 + ref_rps_idx = st_rps_idx - (rps_data->delta_idx_minus1 + 1); /* 7-59 */ 182 + delta_rps = (1 - 2 * rps_data->delta_rps_sign) * 183 + (rps_data->abs_delta_rps_minus1 + 1); /* 7-60 */ 184 + 185 + ref_rps = &calculated_rps_st_sets[ref_rps_idx]; 186 + 187 + for (j = 0; j <= ref_rps->num_delta_pocs; j++) { 188 + used_by_curr_pic_flag[j] = !!(rps_data->used_by_curr_pic & (1 << j)); 189 + use_delta_flag[j] = !!(rps_data->use_delta_flag & (1 << j)); 190 + } 191 + 192 + /* 7-61: calculate num_negative_pics, delta_poc_s0 and used_by_curr_pic_s0 */ 193 + i = 0; 194 + for (j = (ref_rps->num_positive_pics - 1); j >= 0; j--) { 195 + dPoc = ref_rps->delta_poc_s1[j] + delta_rps; 196 + if (dPoc < 0 && use_delta_flag[ref_rps->num_negative_pics + j]) { 197 + st_rps->delta_poc_s0[i] = dPoc; 198 + st_rps->used_by_curr_pic_s0[i++] = 199 + used_by_curr_pic_flag[ref_rps->num_negative_pics + j]; 200 + } 201 + } 202 + if (delta_rps < 0 && use_delta_flag[ref_rps->num_delta_pocs]) { 203 + st_rps->delta_poc_s0[i] = delta_rps; 204 + st_rps->used_by_curr_pic_s0[i++] = used_by_curr_pic_flag[ref_rps->num_delta_pocs]; 205 + } 206 + for (j = 0; j < ref_rps->num_negative_pics; j++) { 207 + dPoc = ref_rps->delta_poc_s0[j] + delta_rps; 208 + if (dPoc < 0 && use_delta_flag[j]) { 209 + st_rps->delta_poc_s0[i] = dPoc; 210 + st_rps->used_by_curr_pic_s0[i++] = used_by_curr_pic_flag[j]; 211 + } 212 + } 213 + st_rps->num_negative_pics = i; 214 + 215 + /* 7-62: calculate num_positive_pics, delta_poc_s1 and used_by_curr_pic_s1 */ 216 + i = 0; 217 + for (j = (ref_rps->num_negative_pics - 1); j >= 0; j--) { 218 + dPoc = ref_rps->delta_poc_s0[j] + delta_rps; 219 + if (dPoc > 0 && use_delta_flag[j]) { 220 + st_rps->delta_poc_s1[i] = dPoc; 221 + st_rps->used_by_curr_pic_s1[i++] = used_by_curr_pic_flag[j]; 222 + } 223 + } 224 + if (delta_rps > 0 && use_delta_flag[ref_rps->num_delta_pocs]) { 225 + st_rps->delta_poc_s1[i] = delta_rps; 226 + st_rps->used_by_curr_pic_s1[i++] = used_by_curr_pic_flag[ref_rps->num_delta_pocs]; 227 + } 228 + for (j = 0; j < ref_rps->num_positive_pics; j++) { 229 + dPoc = ref_rps->delta_poc_s1[j] + delta_rps; 230 + if (dPoc > 0 && use_delta_flag[ref_rps->num_negative_pics + j]) { 231 + st_rps->delta_poc_s1[i] = dPoc; 232 + st_rps->used_by_curr_pic_s1[i++] = 233 + used_by_curr_pic_flag[ref_rps->num_negative_pics + j]; 234 + } 235 + } 236 + st_rps->num_positive_pics = i; 237 + 238 + st_rps->num_delta_pocs = st_rps->num_positive_pics + st_rps->num_negative_pics; 239 + } 240 + 241 + /* 242 + * Compute the short term ref pic set parameters based on the control's data. 243 + */ 244 + static void st_ref_pic_set_calculate(struct rkvdec_hevc_run *run, int idx, 245 + struct calculated_rps_st_set *calculated_rps_st_sets) 246 + { 247 + const struct v4l2_ctrl_hevc_ext_sps_st_rps *rps_data = &run->ext_sps_st_rps[idx]; 248 + struct calculated_rps_st_set *st_rps = &calculated_rps_st_sets[idx]; 249 + int j, i = 0; 250 + 251 + /* 7-63 */ 252 + st_rps->num_negative_pics = rps_data->num_negative_pics; 253 + /* 7-64 */ 254 + st_rps->num_positive_pics = rps_data->num_positive_pics; 255 + 256 + for (i = 0; i < st_rps->num_negative_pics; i++) { 257 + /* 7-65 */ 258 + st_rps->used_by_curr_pic_s0[i] = !!(rps_data->used_by_curr_pic & (1 << i)); 259 + 260 + if (i == 0) { 261 + /* 7-67 */ 262 + st_rps->delta_poc_s0[i] = -(rps_data->delta_poc_s0_minus1[i] + 1); 263 + } else { 264 + /* 7-69 */ 265 + st_rps->delta_poc_s0[i] = 266 + st_rps->delta_poc_s0[i - 1] - 267 + (rps_data->delta_poc_s0_minus1[i] + 1); 268 + } 269 + } 270 + 271 + for (j = 0; j < st_rps->num_positive_pics; j++) { 272 + /* 7-66 */ 273 + st_rps->used_by_curr_pic_s1[j] = !!(rps_data->used_by_curr_pic & (1 << (i + j))); 274 + 275 + if (j == 0) { 276 + /* 7-68 */ 277 + st_rps->delta_poc_s1[j] = rps_data->delta_poc_s1_minus1[j] + 1; 278 + } else { 279 + /* 7-70 */ 280 + st_rps->delta_poc_s1[j] = 281 + st_rps->delta_poc_s1[j - 1] + 282 + (rps_data->delta_poc_s1_minus1[j] + 1); 283 + } 284 + } 285 + 286 + /* 7-71 */ 287 + st_rps->num_delta_pocs = st_rps->num_positive_pics + st_rps->num_negative_pics; 288 + } 289 + 290 + static void rkvdec_hevc_prepare_hw_st_rps(struct rkvdec_hevc_run *run, struct rkvdec_rps *rps, 291 + struct v4l2_ctrl_hevc_ext_sps_st_rps *cache) 292 + { 293 + int idx; 294 + 295 + if (!run->ext_sps_st_rps) 296 + return; 297 + 298 + if (!memcmp(cache, run->ext_sps_st_rps, sizeof(struct v4l2_ctrl_hevc_ext_sps_st_rps))) 299 + return; 300 + 301 + struct calculated_rps_st_set *calculated_rps_st_sets = 302 + kzalloc(sizeof(struct calculated_rps_st_set) * 303 + run->sps->num_short_term_ref_pic_sets, GFP_KERNEL); 304 + 305 + for (idx = 0; idx < run->sps->num_short_term_ref_pic_sets; idx++) { 306 + const struct v4l2_ctrl_hevc_ext_sps_st_rps *rps_data = &run->ext_sps_st_rps[idx]; 307 + 308 + if (rps_data->flags & V4L2_HEVC_EXT_SPS_ST_RPS_FLAG_INTER_REF_PIC_SET_PRED) 309 + st_ref_pic_set_prediction(run, idx, calculated_rps_st_sets); 310 + else 311 + st_ref_pic_set_calculate(run, idx, calculated_rps_st_sets); 312 + } 313 + 314 + rkvdec_hevc_assemble_hw_st_rps(run, rps, calculated_rps_st_sets); 315 + 316 + kfree(calculated_rps_st_sets); 317 + 318 + memcpy(cache, run->ext_sps_st_rps, sizeof(struct v4l2_ctrl_hevc_ext_sps_st_rps)); 319 + } 320 + 321 + void rkvdec_hevc_assemble_hw_rps(struct rkvdec_hevc_run *run, struct rkvdec_rps *rps, 322 + struct v4l2_ctrl_hevc_ext_sps_st_rps *st_cache) 323 + { 324 + rkvdec_hevc_prepare_hw_st_rps(run, rps, st_cache); 325 + rkvdec_hevc_assemble_hw_lt_rps(run, rps); 326 + } 327 + 242 328 struct vb2_buffer * 243 329 get_ref_buf(struct rkvdec_ctx *ctx, struct rkvdec_hevc_run *run, 244 330 unsigned int dpb_idx) ··· 524 200 ctrl = v4l2_ctrl_find(&ctx->ctrl_hdl, 525 201 V4L2_CID_STATELESS_HEVC_SCALING_MATRIX); 526 202 run->scaling_matrix = ctrl ? ctrl->p_cur.p : NULL; 203 + 204 + if (ctx->has_sps_st_rps) { 205 + ctrl = v4l2_ctrl_find(&ctx->ctrl_hdl, 206 + V4L2_CID_STATELESS_HEVC_EXT_SPS_ST_RPS); 207 + run->ext_sps_st_rps = ctrl ? ctrl->p_cur.p : NULL; 208 + } 209 + if (ctx->has_sps_lt_rps) { 210 + ctrl = v4l2_ctrl_find(&ctx->ctrl_hdl, 211 + V4L2_CID_STATELESS_HEVC_EXT_SPS_LT_RPS); 212 + run->ext_sps_lt_rps = ctrl ? ctrl->p_cur.p : NULL; 213 + } 527 214 528 215 rkvdec_run_preamble(ctx, &run->base); 529 216 }
+59
drivers/media/platform/rockchip/rkvdec/rkvdec-hevc-common.h
··· 16 16 */ 17 17 18 18 #include <media/v4l2-mem2mem.h> 19 + #include <linux/types.h> 19 20 20 21 #include "rkvdec.h" 22 + 23 + struct rkvdec_rps_refs { 24 + u16 lt_ref_pic_poc_lsb; 25 + u16 used_by_curr_pic_lt_flag : 1; 26 + u16 reserved : 15; 27 + } __packed; 28 + 29 + struct rkvdec_rps_short_term_ref_set { 30 + u32 num_negative : 4; 31 + u32 num_positive : 4; 32 + u32 delta_poc0 : 16; 33 + u32 used_flag0 : 1; 34 + u32 delta_poc1 : 16; 35 + u32 used_flag1 : 1; 36 + u32 delta_poc2 : 16; 37 + u32 used_flag2 : 1; 38 + u32 delta_poc3 : 16; 39 + u32 used_flag3 : 1; 40 + u32 delta_poc4 : 16; 41 + u32 used_flag4 : 1; 42 + u32 delta_poc5 : 16; 43 + u32 used_flag5 : 1; 44 + u32 delta_poc6 : 16; 45 + u32 used_flag6 : 1; 46 + u32 delta_poc7 : 16; 47 + u32 used_flag7 : 1; 48 + u32 delta_poc8 : 16; 49 + u32 used_flag8 : 1; 50 + u32 delta_poc9 : 16; 51 + u32 used_flag9 : 1; 52 + u32 delta_poc10 : 16; 53 + u32 used_flag10 : 1; 54 + u32 delta_poc11 : 16; 55 + u32 used_flag11 : 1; 56 + u32 delta_poc12 : 16; 57 + u32 used_flag12 : 1; 58 + u32 delta_poc13 : 16; 59 + u32 used_flag13 : 1; 60 + u32 delta_poc14 : 16; 61 + u32 used_flag14 : 1; 62 + u32 reserved_bits : 25; 63 + u32 reserved[3]; 64 + } __packed; 65 + 66 + struct rkvdec_rps { 67 + struct rkvdec_rps_refs refs[32]; 68 + struct rkvdec_rps_short_term_ref_set short_term_ref_sets[64]; 69 + } __packed; 21 70 22 71 struct rkvdec_hevc_run { 23 72 struct rkvdec_run base; ··· 75 26 const struct v4l2_ctrl_hevc_sps *sps; 76 27 const struct v4l2_ctrl_hevc_pps *pps; 77 28 const struct v4l2_ctrl_hevc_scaling_matrix *scaling_matrix; 29 + const struct v4l2_ctrl_hevc_ext_sps_st_rps *ext_sps_st_rps; 30 + const struct v4l2_ctrl_hevc_ext_sps_lt_rps *ext_sps_lt_rps; 78 31 int num_slices; 79 32 }; 80 33 ··· 87 36 u8 reserved[4]; /*16Bytes align*/ 88 37 }; 89 38 39 + void compute_tiles_uniform(struct rkvdec_hevc_run *run, u16 log2_min_cb_size, 40 + u16 width, u16 height, s32 pic_in_cts_width, 41 + s32 pic_in_cts_height, u16 *column_width, u16 *row_height); 42 + void compute_tiles_non_uniform(struct rkvdec_hevc_run *run, u16 log2_min_cb_size, 43 + u16 width, u16 height, s32 pic_in_cts_width, 44 + s32 pic_in_cts_height, u16 *column_width, u16 *row_height); 45 + void rkvdec_hevc_assemble_hw_rps(struct rkvdec_hevc_run *run, struct rkvdec_rps *rps, 46 + struct v4l2_ctrl_hevc_ext_sps_st_rps *st_cache); 90 47 void rkvdec_hevc_assemble_hw_scaling_list(struct rkvdec_hevc_run *run, 91 48 struct scaling_factor *scaling_factor, 92 49 struct v4l2_ctrl_hevc_scaling_matrix *cache);
+640
drivers/media/platform/rockchip/rkvdec/rkvdec-vdpu381-hevc.c
··· 1 + // SPDX-License-Identifier: GPL-2.0 2 + /* 3 + * Rockchip VDPU381 HEVC backend 4 + * 5 + * Copyright (C) 2025 Collabora, Ltd. 6 + * Detlev Casanova <detlev.casanova@collabora.com> 7 + */ 8 + 9 + #include <media/v4l2-mem2mem.h> 10 + 11 + #include "rkvdec.h" 12 + #include "rkvdec-cabac.h" 13 + #include "rkvdec-rcb.h" 14 + #include "rkvdec-hevc-common.h" 15 + #include "rkvdec-vdpu381-regs.h" 16 + 17 + // SPS 18 + struct rkvdec_hevc_sps { 19 + u16 video_parameters_set_id : 4; 20 + u16 seq_parameters_set_id_sps : 4; 21 + u16 chroma_format_idc : 2; 22 + u16 width : 16; 23 + u16 height : 16; 24 + u16 bit_depth_luma : 4; 25 + u16 bit_depth_chroma : 4; 26 + u16 max_pic_order_count_lsb : 5; 27 + u16 diff_max_min_luma_coding_block_size : 2; 28 + u16 min_luma_coding_block_size : 3; 29 + u16 min_transform_block_size : 3; 30 + u16 diff_max_min_transform_block_size : 2; 31 + u16 max_transform_hierarchy_depth_inter : 3; 32 + u16 max_transform_hierarchy_depth_intra : 3; 33 + u16 scaling_list_enabled_flag : 1; 34 + u16 amp_enabled_flag : 1; 35 + u16 sample_adaptive_offset_enabled_flag : 1; 36 + u16 pcm_enabled_flag : 1; 37 + u16 pcm_sample_bit_depth_luma : 4; 38 + u16 pcm_sample_bit_depth_chroma : 4; 39 + u16 pcm_loop_filter_disabled_flag : 1; 40 + u16 diff_max_min_pcm_luma_coding_block_size : 3; 41 + u16 min_pcm_luma_coding_block_size : 3; 42 + u16 num_short_term_ref_pic_sets : 7; 43 + u16 long_term_ref_pics_present_flag : 1; 44 + u16 num_long_term_ref_pics_sps : 6; 45 + u16 sps_temporal_mvp_enabled_flag : 1; 46 + u16 strong_intra_smoothing_enabled_flag : 1; 47 + u16 reserved_0 : 7; 48 + u16 sps_max_dec_pic_buffering_minus1 : 4; 49 + u16 reserved_0_2 : 3; 50 + u16 reserved_f : 8; 51 + } __packed; 52 + 53 + //PPS 54 + struct rkvdec_hevc_pps { 55 + u16 picture_parameters_set_id : 6; 56 + u16 seq_parameters_set_id_pps : 4; 57 + u16 dependent_slice_segments_enabled_flag : 1; 58 + u16 output_flag_present_flag : 1; 59 + u16 num_extra_slice_header_bits : 13; 60 + u16 sign_data_hiding_enabled_flag : 1; 61 + u16 cabac_init_present_flag : 1; 62 + u16 num_ref_idx_l0_default_active : 4; 63 + u16 num_ref_idx_l1_default_active : 4; 64 + u16 init_qp_minus26 : 7; 65 + u16 constrained_intra_pred_flag : 1; 66 + u16 transform_skip_enabled_flag : 1; 67 + u16 cu_qp_delta_enabled_flag : 1; 68 + u16 log2_min_cb_size : 3; 69 + u16 pps_cb_qp_offset : 5; 70 + u16 pps_cr_qp_offset : 5; 71 + u16 pps_slice_chroma_qp_offsets_present_flag : 1; 72 + u16 weighted_pred_flag : 1; 73 + u16 weighted_bipred_flag : 1; 74 + u16 transquant_bypass_enabled_flag : 1; 75 + u16 tiles_enabled_flag : 1; 76 + u16 entropy_coding_sync_enabled_flag : 1; 77 + u16 pps_loop_filter_across_slices_enabled_flag : 1; 78 + u16 loop_filter_across_tiles_enabled_flag : 1; 79 + u16 deblocking_filter_override_enabled_flag : 1; 80 + u16 pps_deblocking_filter_disabled_flag : 1; 81 + u16 pps_beta_offset_div2 : 4; 82 + u16 pps_tc_offset_div2 : 4; 83 + u16 lists_modification_present_flag : 1; 84 + u16 log2_parallel_merge_level : 3; 85 + u16 slice_segment_header_extension_present_flag : 1; 86 + u16 zeroes : 3; 87 + u16 num_tile_columns : 5; 88 + u16 num_tile_rows : 5; 89 + u16 sps_pps_mode : 4; 90 + u16 reserved_bits : 14; 91 + u16 reserved; 92 + } __packed; 93 + 94 + struct rkvdec_hevc_tile { 95 + u16 value0 : 12; 96 + u16 value1 : 12; 97 + } __packed; 98 + 99 + struct rkvdec_sps_pps_packet { 100 + struct rkvdec_hevc_sps sps; 101 + struct rkvdec_hevc_pps pps; 102 + struct rkvdec_hevc_tile column_width[10]; 103 + struct rkvdec_hevc_tile row_height[11]; 104 + u32 zeroes[3]; 105 + u32 zeroes_bits : 6; 106 + u32 padding_bits : 2; 107 + u32 padding; 108 + } __packed; 109 + 110 + struct rkvdec_hevc_priv_tbl { 111 + struct rkvdec_sps_pps_packet param_set[64]; 112 + struct rkvdec_rps rps; 113 + struct scaling_factor scaling_list; 114 + u8 cabac_table[27456]; 115 + }; 116 + 117 + struct rkvdec_hevc_ctx { 118 + struct rkvdec_aux_buf priv_tbl; 119 + struct v4l2_ctrl_hevc_scaling_matrix scaling_matrix_cache; 120 + struct v4l2_ctrl_hevc_ext_sps_st_rps st_cache; 121 + struct rkvdec_vdpu381_regs_hevc regs; 122 + }; 123 + 124 + static void assemble_hw_pps(struct rkvdec_ctx *ctx, 125 + struct rkvdec_hevc_run *run) 126 + { 127 + struct rkvdec_hevc_ctx *hevc_ctx = ctx->priv; 128 + const struct v4l2_ctrl_hevc_sps *sps = run->sps; 129 + const struct v4l2_ctrl_hevc_pps *pps = run->pps; 130 + struct rkvdec_hevc_priv_tbl *priv_tbl = hevc_ctx->priv_tbl.cpu; 131 + struct rkvdec_sps_pps_packet *hw_ps; 132 + bool tiles_enabled; 133 + s32 max_cu_width; 134 + s32 pic_in_cts_width; 135 + s32 pic_in_cts_height; 136 + u16 log2_min_cb_size, width, height; 137 + u16 column_width[20]; 138 + u16 row_height[22]; 139 + u8 pcm_enabled; 140 + u32 i; 141 + 142 + /* 143 + * HW read the SPS/PPS information from PPS packet index by PPS id. 144 + * offset from the base can be calculated by PPS_id * 32 (size per PPS 145 + * packet unit). so the driver copy SPS/PPS information to the exact PPS 146 + * packet unit for HW accessing. 147 + */ 148 + hw_ps = &priv_tbl->param_set[pps->pic_parameter_set_id]; 149 + memset(hw_ps, 0, sizeof(*hw_ps)); 150 + 151 + /* write sps */ 152 + hw_ps->sps.video_parameters_set_id = sps->video_parameter_set_id; 153 + hw_ps->sps.seq_parameters_set_id_sps = sps->seq_parameter_set_id; 154 + hw_ps->sps.chroma_format_idc = sps->chroma_format_idc; 155 + 156 + log2_min_cb_size = sps->log2_min_luma_coding_block_size_minus3 + 3; 157 + width = sps->pic_width_in_luma_samples; 158 + height = sps->pic_height_in_luma_samples; 159 + hw_ps->sps.width = width; 160 + hw_ps->sps.height = height; 161 + hw_ps->sps.bit_depth_luma = sps->bit_depth_luma_minus8 + 8; 162 + hw_ps->sps.bit_depth_chroma = sps->bit_depth_chroma_minus8 + 8; 163 + hw_ps->sps.max_pic_order_count_lsb = sps->log2_max_pic_order_cnt_lsb_minus4 + 4; 164 + hw_ps->sps.diff_max_min_luma_coding_block_size = 165 + sps->log2_diff_max_min_luma_coding_block_size; 166 + hw_ps->sps.min_luma_coding_block_size = sps->log2_min_luma_coding_block_size_minus3 + 3; 167 + hw_ps->sps.min_transform_block_size = sps->log2_min_luma_transform_block_size_minus2 + 2; 168 + hw_ps->sps.diff_max_min_transform_block_size = 169 + sps->log2_diff_max_min_luma_transform_block_size; 170 + hw_ps->sps.max_transform_hierarchy_depth_inter = sps->max_transform_hierarchy_depth_inter; 171 + hw_ps->sps.max_transform_hierarchy_depth_intra = sps->max_transform_hierarchy_depth_intra; 172 + hw_ps->sps.scaling_list_enabled_flag = 173 + !!(sps->flags & V4L2_HEVC_SPS_FLAG_SCALING_LIST_ENABLED); 174 + hw_ps->sps.amp_enabled_flag = !!(sps->flags & V4L2_HEVC_SPS_FLAG_AMP_ENABLED); 175 + hw_ps->sps.sample_adaptive_offset_enabled_flag = 176 + !!(sps->flags & V4L2_HEVC_SPS_FLAG_SAMPLE_ADAPTIVE_OFFSET); 177 + 178 + pcm_enabled = !!(sps->flags & V4L2_HEVC_SPS_FLAG_PCM_ENABLED); 179 + hw_ps->sps.pcm_enabled_flag = pcm_enabled; 180 + hw_ps->sps.pcm_sample_bit_depth_luma = 181 + pcm_enabled ? sps->pcm_sample_bit_depth_luma_minus1 + 1 : 0; 182 + hw_ps->sps.pcm_sample_bit_depth_chroma = 183 + pcm_enabled ? sps->pcm_sample_bit_depth_chroma_minus1 + 1 : 0; 184 + hw_ps->sps.pcm_loop_filter_disabled_flag = 185 + !!(sps->flags & V4L2_HEVC_SPS_FLAG_PCM_LOOP_FILTER_DISABLED); 186 + hw_ps->sps.diff_max_min_pcm_luma_coding_block_size = 187 + sps->log2_diff_max_min_pcm_luma_coding_block_size; 188 + hw_ps->sps.min_pcm_luma_coding_block_size = 189 + pcm_enabled ? sps->log2_min_pcm_luma_coding_block_size_minus3 + 3 : 0; 190 + hw_ps->sps.num_short_term_ref_pic_sets = sps->num_short_term_ref_pic_sets; 191 + hw_ps->sps.long_term_ref_pics_present_flag = 192 + !!(sps->flags & V4L2_HEVC_SPS_FLAG_LONG_TERM_REF_PICS_PRESENT); 193 + hw_ps->sps.num_long_term_ref_pics_sps = sps->num_long_term_ref_pics_sps; 194 + hw_ps->sps.sps_temporal_mvp_enabled_flag = 195 + !!(sps->flags & V4L2_HEVC_SPS_FLAG_SPS_TEMPORAL_MVP_ENABLED); 196 + hw_ps->sps.strong_intra_smoothing_enabled_flag = 197 + !!(sps->flags & V4L2_HEVC_SPS_FLAG_STRONG_INTRA_SMOOTHING_ENABLED); 198 + hw_ps->sps.sps_max_dec_pic_buffering_minus1 = sps->sps_max_dec_pic_buffering_minus1; 199 + hw_ps->sps.reserved_f = 0xff; 200 + 201 + /* write pps */ 202 + hw_ps->pps.picture_parameters_set_id = pps->pic_parameter_set_id; 203 + hw_ps->pps.seq_parameters_set_id_pps = sps->seq_parameter_set_id; 204 + hw_ps->pps.dependent_slice_segments_enabled_flag = 205 + !!(pps->flags & V4L2_HEVC_PPS_FLAG_DEPENDENT_SLICE_SEGMENT_ENABLED); 206 + hw_ps->pps.output_flag_present_flag = 207 + !!(pps->flags & V4L2_HEVC_PPS_FLAG_OUTPUT_FLAG_PRESENT); 208 + hw_ps->pps.num_extra_slice_header_bits = pps->num_extra_slice_header_bits; 209 + hw_ps->pps.sign_data_hiding_enabled_flag = 210 + !!(pps->flags & V4L2_HEVC_PPS_FLAG_SIGN_DATA_HIDING_ENABLED); 211 + hw_ps->pps.cabac_init_present_flag = 212 + !!(pps->flags & V4L2_HEVC_PPS_FLAG_CABAC_INIT_PRESENT); 213 + hw_ps->pps.num_ref_idx_l0_default_active = pps->num_ref_idx_l0_default_active_minus1 + 1; 214 + hw_ps->pps.num_ref_idx_l1_default_active = pps->num_ref_idx_l1_default_active_minus1 + 1; 215 + hw_ps->pps.init_qp_minus26 = pps->init_qp_minus26; 216 + hw_ps->pps.constrained_intra_pred_flag = 217 + !!(pps->flags & V4L2_HEVC_PPS_FLAG_CONSTRAINED_INTRA_PRED); 218 + hw_ps->pps.transform_skip_enabled_flag = 219 + !!(pps->flags & V4L2_HEVC_PPS_FLAG_TRANSFORM_SKIP_ENABLED); 220 + hw_ps->pps.cu_qp_delta_enabled_flag = 221 + !!(pps->flags & V4L2_HEVC_PPS_FLAG_CU_QP_DELTA_ENABLED); 222 + hw_ps->pps.log2_min_cb_size = log2_min_cb_size + 223 + sps->log2_diff_max_min_luma_coding_block_size - 224 + pps->diff_cu_qp_delta_depth; 225 + hw_ps->pps.pps_cb_qp_offset = pps->pps_cb_qp_offset; 226 + hw_ps->pps.pps_cr_qp_offset = pps->pps_cr_qp_offset; 227 + hw_ps->pps.pps_slice_chroma_qp_offsets_present_flag = 228 + !!(pps->flags & V4L2_HEVC_PPS_FLAG_PPS_SLICE_CHROMA_QP_OFFSETS_PRESENT); 229 + hw_ps->pps.weighted_pred_flag = !!(pps->flags & V4L2_HEVC_PPS_FLAG_WEIGHTED_PRED); 230 + hw_ps->pps.weighted_bipred_flag = !!(pps->flags & V4L2_HEVC_PPS_FLAG_WEIGHTED_BIPRED); 231 + hw_ps->pps.transquant_bypass_enabled_flag = 232 + !!(pps->flags & V4L2_HEVC_PPS_FLAG_TRANSQUANT_BYPASS_ENABLED); 233 + 234 + tiles_enabled = !!(pps->flags & V4L2_HEVC_PPS_FLAG_TILES_ENABLED); 235 + hw_ps->pps.tiles_enabled_flag = tiles_enabled; 236 + hw_ps->pps.entropy_coding_sync_enabled_flag = 237 + !!(pps->flags & V4L2_HEVC_PPS_FLAG_ENTROPY_CODING_SYNC_ENABLED); 238 + hw_ps->pps.pps_loop_filter_across_slices_enabled_flag = 239 + !!(pps->flags & V4L2_HEVC_PPS_FLAG_PPS_LOOP_FILTER_ACROSS_SLICES_ENABLED); 240 + hw_ps->pps.loop_filter_across_tiles_enabled_flag = 241 + !!(pps->flags & V4L2_HEVC_PPS_FLAG_LOOP_FILTER_ACROSS_TILES_ENABLED); 242 + hw_ps->pps.deblocking_filter_override_enabled_flag = 243 + !!(pps->flags & V4L2_HEVC_PPS_FLAG_DEBLOCKING_FILTER_OVERRIDE_ENABLED); 244 + hw_ps->pps.pps_deblocking_filter_disabled_flag = 245 + !!(pps->flags & V4L2_HEVC_PPS_FLAG_PPS_DISABLE_DEBLOCKING_FILTER); 246 + hw_ps->pps.pps_beta_offset_div2 = pps->pps_beta_offset_div2; 247 + hw_ps->pps.pps_tc_offset_div2 = pps->pps_tc_offset_div2; 248 + hw_ps->pps.lists_modification_present_flag = 249 + !!(pps->flags & V4L2_HEVC_PPS_FLAG_LISTS_MODIFICATION_PRESENT); 250 + hw_ps->pps.log2_parallel_merge_level = pps->log2_parallel_merge_level_minus2 + 2; 251 + hw_ps->pps.slice_segment_header_extension_present_flag = 252 + !!(pps->flags & V4L2_HEVC_PPS_FLAG_SLICE_SEGMENT_HEADER_EXTENSION_PRESENT); 253 + hw_ps->pps.num_tile_columns = tiles_enabled ? pps->num_tile_columns_minus1 + 1 : 0; 254 + hw_ps->pps.num_tile_rows = tiles_enabled ? pps->num_tile_rows_minus1 + 1 : 0; 255 + hw_ps->pps.sps_pps_mode = 0; 256 + hw_ps->pps.reserved_bits = 0x3fff; 257 + hw_ps->pps.reserved = 0xffff; 258 + 259 + // Setup tiles information 260 + memset(column_width, 0, sizeof(column_width)); 261 + memset(row_height, 0, sizeof(row_height)); 262 + 263 + max_cu_width = 1 << (sps->log2_diff_max_min_luma_coding_block_size + log2_min_cb_size); 264 + pic_in_cts_width = (width + max_cu_width - 1) / max_cu_width; 265 + pic_in_cts_height = (height + max_cu_width - 1) / max_cu_width; 266 + 267 + if (pps->flags & V4L2_HEVC_PPS_FLAG_TILES_ENABLED) { 268 + if (pps->flags & V4L2_HEVC_PPS_FLAG_UNIFORM_SPACING) { 269 + compute_tiles_uniform(run, log2_min_cb_size, width, height, 270 + pic_in_cts_width, pic_in_cts_height, 271 + column_width, row_height); 272 + } else { 273 + compute_tiles_non_uniform(run, log2_min_cb_size, width, height, 274 + pic_in_cts_width, pic_in_cts_height, 275 + column_width, row_height); 276 + } 277 + } else { 278 + column_width[0] = (width + max_cu_width - 1) / max_cu_width; 279 + row_height[0] = (height + max_cu_width - 1) / max_cu_width; 280 + } 281 + 282 + for (i = 0; i < 20; i++) { 283 + if (column_width[i] > 0) 284 + column_width[i]--; 285 + 286 + if (i & 1) 287 + hw_ps->column_width[i / 2].value1 = column_width[i]; 288 + else 289 + hw_ps->column_width[i / 2].value0 = column_width[i]; 290 + } 291 + 292 + for (i = 0; i < 22; i++) { 293 + if (row_height[i] > 0) 294 + row_height[i]--; 295 + 296 + if (i & 1) 297 + hw_ps->row_height[i / 2].value1 = row_height[i]; 298 + else 299 + hw_ps->row_height[i / 2].value0 = row_height[i]; 300 + } 301 + 302 + hw_ps->padding = 0xffffffff; 303 + hw_ps->padding_bits = 0x3; 304 + } 305 + 306 + static void set_ref_valid(struct rkvdec_vdpu381_regs_hevc *regs, int id, u32 valid) 307 + { 308 + switch (id) { 309 + case 0: 310 + regs->hevc_param.reg099_hevc_ref_valid.hevc_ref_valid_0 = valid; 311 + break; 312 + case 1: 313 + regs->hevc_param.reg099_hevc_ref_valid.hevc_ref_valid_1 = valid; 314 + break; 315 + case 2: 316 + regs->hevc_param.reg099_hevc_ref_valid.hevc_ref_valid_2 = valid; 317 + break; 318 + case 3: 319 + regs->hevc_param.reg099_hevc_ref_valid.hevc_ref_valid_3 = valid; 320 + break; 321 + case 4: 322 + regs->hevc_param.reg099_hevc_ref_valid.hevc_ref_valid_4 = valid; 323 + break; 324 + case 5: 325 + regs->hevc_param.reg099_hevc_ref_valid.hevc_ref_valid_5 = valid; 326 + break; 327 + case 6: 328 + regs->hevc_param.reg099_hevc_ref_valid.hevc_ref_valid_6 = valid; 329 + break; 330 + case 7: 331 + regs->hevc_param.reg099_hevc_ref_valid.hevc_ref_valid_7 = valid; 332 + break; 333 + case 8: 334 + regs->hevc_param.reg099_hevc_ref_valid.hevc_ref_valid_8 = valid; 335 + break; 336 + case 9: 337 + regs->hevc_param.reg099_hevc_ref_valid.hevc_ref_valid_9 = valid; 338 + break; 339 + case 10: 340 + regs->hevc_param.reg099_hevc_ref_valid.hevc_ref_valid_10 = valid; 341 + break; 342 + case 11: 343 + regs->hevc_param.reg099_hevc_ref_valid.hevc_ref_valid_11 = valid; 344 + break; 345 + case 12: 346 + regs->hevc_param.reg099_hevc_ref_valid.hevc_ref_valid_12 = valid; 347 + break; 348 + case 13: 349 + regs->hevc_param.reg099_hevc_ref_valid.hevc_ref_valid_13 = valid; 350 + break; 351 + case 14: 352 + regs->hevc_param.reg099_hevc_ref_valid.hevc_ref_valid_14 = valid; 353 + break; 354 + } 355 + } 356 + 357 + static void rkvdec_write_regs(struct rkvdec_ctx *ctx) 358 + { 359 + struct rkvdec_dev *rkvdec = ctx->dev; 360 + struct rkvdec_hevc_ctx *hevc_ctx = ctx->priv; 361 + 362 + rkvdec_memcpy_toio(rkvdec->regs + OFFSET_COMMON_REGS, 363 + &hevc_ctx->regs.common, 364 + sizeof(hevc_ctx->regs.common)); 365 + rkvdec_memcpy_toio(rkvdec->regs + OFFSET_CODEC_PARAMS_REGS, 366 + &hevc_ctx->regs.hevc_param, 367 + sizeof(hevc_ctx->regs.hevc_param)); 368 + rkvdec_memcpy_toio(rkvdec->regs + OFFSET_COMMON_ADDR_REGS, 369 + &hevc_ctx->regs.common_addr, 370 + sizeof(hevc_ctx->regs.common_addr)); 371 + rkvdec_memcpy_toio(rkvdec->regs + OFFSET_CODEC_ADDR_REGS, 372 + &hevc_ctx->regs.hevc_addr, 373 + sizeof(hevc_ctx->regs.hevc_addr)); 374 + rkvdec_memcpy_toio(rkvdec->regs + OFFSET_POC_HIGHBIT_REGS, 375 + &hevc_ctx->regs.hevc_highpoc, 376 + sizeof(hevc_ctx->regs.hevc_highpoc)); 377 + } 378 + 379 + static void config_registers(struct rkvdec_ctx *ctx, 380 + struct rkvdec_hevc_run *run) 381 + { 382 + const struct v4l2_ctrl_hevc_decode_params *dec_params = run->decode_params; 383 + const struct v4l2_hevc_dpb_entry *dpb = dec_params->dpb; 384 + struct rkvdec_hevc_ctx *hevc_ctx = ctx->priv; 385 + struct rkvdec_vdpu381_regs_hevc *regs = &hevc_ctx->regs; 386 + dma_addr_t priv_start_addr = hevc_ctx->priv_tbl.dma; 387 + const struct v4l2_pix_format_mplane *dst_fmt; 388 + struct vb2_v4l2_buffer *src_buf = run->base.bufs.src; 389 + struct vb2_v4l2_buffer *dst_buf = run->base.bufs.dst; 390 + const struct v4l2_format *f; 391 + dma_addr_t rlc_addr; 392 + u32 hor_virstride = 0; 393 + u32 ver_virstride = 0; 394 + u32 y_virstride = 0; 395 + u32 offset; 396 + u32 pixels; 397 + dma_addr_t dst_addr; 398 + u32 i; 399 + 400 + memset(regs, 0, sizeof(*regs)); 401 + 402 + /* Set HEVC mode */ 403 + regs->common.reg009_dec_mode.dec_mode = VDPU381_MODE_HEVC; 404 + 405 + /* Set config */ 406 + regs->common.reg011_important_en.buf_empty_en = 1; 407 + regs->common.reg011_important_en.dec_clkgate_e = 1; 408 + regs->common.reg011_important_en.dec_timeout_e = 1; 409 + regs->common.reg011_important_en.pix_range_det_e = 1; 410 + 411 + /* Set IDR flag */ 412 + regs->common.reg013_en_mode_set.cur_pic_is_idr = 413 + !!(dec_params->flags & V4L2_HEVC_DECODE_PARAM_FLAG_IDR_PIC); 414 + 415 + /* Set input stream length */ 416 + regs->common.reg016_stream_len = vb2_get_plane_payload(&src_buf->vb2_buf, 0); 417 + 418 + /* Set max slice number */ 419 + regs->common.reg017_slice_number.slice_num = 1; 420 + 421 + /* Set strides */ 422 + f = &ctx->decoded_fmt; 423 + dst_fmt = &f->fmt.pix_mp; 424 + hor_virstride = dst_fmt->plane_fmt[0].bytesperline; 425 + ver_virstride = dst_fmt->height; 426 + y_virstride = hor_virstride * ver_virstride; 427 + 428 + regs->common.reg018_y_hor_stride.y_hor_virstride = hor_virstride / 16; 429 + regs->common.reg019_uv_hor_stride.uv_hor_virstride = hor_virstride / 16; 430 + regs->common.reg020_y_stride.y_virstride = y_virstride / 16; 431 + 432 + /* Activate block gating */ 433 + regs->common.reg026_block_gating_en.inter_auto_gating_e = 1; 434 + regs->common.reg026_block_gating_en.filterd_auto_gating_e = 1; 435 + regs->common.reg026_block_gating_en.strmd_auto_gating_e = 1; 436 + regs->common.reg026_block_gating_en.mcp_auto_gating_e = 1; 437 + regs->common.reg026_block_gating_en.busifd_auto_gating_e = 0; 438 + regs->common.reg026_block_gating_en.dec_ctrl_auto_gating_e = 1; 439 + regs->common.reg026_block_gating_en.intra_auto_gating_e = 1; 440 + regs->common.reg026_block_gating_en.mc_auto_gating_e = 1; 441 + regs->common.reg026_block_gating_en.transd_auto_gating_e = 1; 442 + regs->common.reg026_block_gating_en.sram_auto_gating_e = 1; 443 + regs->common.reg026_block_gating_en.cru_auto_gating_e = 1; 444 + regs->common.reg026_block_gating_en.reg_cfg_gating_en = 1; 445 + 446 + /* Set timeout threshold */ 447 + pixels = dst_fmt->height * dst_fmt->width; 448 + if (pixels < RKVDEC_1080P_PIXELS) 449 + regs->common.reg032_timeout_threshold = RKVDEC_TIMEOUT_1080p; 450 + else if (pixels < RKVDEC_4K_PIXELS) 451 + regs->common.reg032_timeout_threshold = RKVDEC_TIMEOUT_4K; 452 + else if (pixels < RKVDEC_8K_PIXELS) 453 + regs->common.reg032_timeout_threshold = RKVDEC_TIMEOUT_8K; 454 + else 455 + regs->common.reg032_timeout_threshold = RKVDEC_TIMEOUT_MAX; 456 + 457 + /* Set POC val */ 458 + regs->hevc_param.reg065_cur_top_poc = dec_params->pic_order_cnt_val; 459 + 460 + /* Set ref pic address & poc */ 461 + for (i = 0; i < ARRAY_SIZE(dec_params->dpb); i++) { 462 + struct vb2_buffer *vb_buf = get_ref_buf(ctx, run, i); 463 + dma_addr_t buf_dma = vb2_dma_contig_plane_dma_addr(vb_buf, 0); 464 + u32 valid = !!(dec_params->num_active_dpb_entries > i); 465 + 466 + /* Set reference addresses */ 467 + regs->hevc_addr.reg164_180_ref_base[i] = buf_dma; 468 + 469 + /* Set COLMV addresses */ 470 + regs->hevc_addr.reg182_198_colmv_base[i] = buf_dma + ctx->colmv_offset; 471 + 472 + regs->hevc_param.reg067_082_ref_poc[i] = 473 + dpb[i].pic_order_cnt_val; 474 + 475 + set_ref_valid(regs, i, valid); 476 + regs->hevc_param.reg103_hevc_mvc0.ref_pic_layer_same_with_cur |= 1 << i; 477 + } 478 + 479 + /* Set rlc base address (input stream) */ 480 + rlc_addr = vb2_dma_contig_plane_dma_addr(&src_buf->vb2_buf, 0); 481 + regs->common_addr.rlc_base = rlc_addr; 482 + regs->common_addr.rlcwrite_base = rlc_addr; 483 + 484 + /* Set output base address */ 485 + dst_addr = vb2_dma_contig_plane_dma_addr(&dst_buf->vb2_buf, 0); 486 + regs->common_addr.decout_base = dst_addr; 487 + regs->common_addr.error_ref_base = dst_addr; 488 + 489 + /* Set colmv address */ 490 + regs->common_addr.colmv_cur_base = dst_addr + ctx->colmv_offset; 491 + 492 + /* Set RCB addresses */ 493 + for (i = 0; i < rkvdec_rcb_buf_count(ctx); i++) 494 + regs->common_addr.rcb_base[i] = rkvdec_rcb_buf_dma_addr(ctx, i); 495 + 496 + /* Set hw pps address */ 497 + offset = offsetof(struct rkvdec_hevc_priv_tbl, param_set); 498 + regs->hevc_addr.reg161_pps_base = priv_start_addr + offset; 499 + 500 + /* Set hw rps address */ 501 + offset = offsetof(struct rkvdec_hevc_priv_tbl, rps); 502 + regs->hevc_addr.reg163_rps_base = priv_start_addr + offset; 503 + 504 + /* Set cabac table */ 505 + offset = offsetof(struct rkvdec_hevc_priv_tbl, cabac_table); 506 + regs->hevc_addr.reg199_cabactbl_base = priv_start_addr + offset; 507 + 508 + /* Set scaling matrix */ 509 + offset = offsetof(struct rkvdec_hevc_priv_tbl, scaling_list); 510 + regs->hevc_addr.reg181_scanlist_addr = priv_start_addr + offset; 511 + 512 + rkvdec_write_regs(ctx); 513 + } 514 + 515 + static int rkvdec_hevc_validate_sps(struct rkvdec_ctx *ctx, 516 + const struct v4l2_ctrl_hevc_sps *sps) 517 + { 518 + if (sps->chroma_format_idc != 1) 519 + /* Only 4:2:0 is supported */ 520 + return -EINVAL; 521 + 522 + if (sps->bit_depth_luma_minus8 != sps->bit_depth_chroma_minus8) 523 + /* Luma and chroma bit depth mismatch */ 524 + return -EINVAL; 525 + 526 + if (sps->bit_depth_luma_minus8 != 0 && sps->bit_depth_luma_minus8 != 2) 527 + /* Only 8-bit and 10-bit are supported */ 528 + return -EINVAL; 529 + 530 + if (sps->pic_width_in_luma_samples > ctx->coded_fmt.fmt.pix_mp.width || 531 + sps->pic_height_in_luma_samples > ctx->coded_fmt.fmt.pix_mp.height) 532 + return -EINVAL; 533 + 534 + return 0; 535 + } 536 + 537 + static int rkvdec_hevc_start(struct rkvdec_ctx *ctx) 538 + { 539 + struct rkvdec_dev *rkvdec = ctx->dev; 540 + struct rkvdec_hevc_priv_tbl *priv_tbl; 541 + struct rkvdec_hevc_ctx *hevc_ctx; 542 + struct v4l2_ctrl *ctrl; 543 + int ret; 544 + 545 + ctrl = v4l2_ctrl_find(&ctx->ctrl_hdl, 546 + V4L2_CID_STATELESS_HEVC_SPS); 547 + if (!ctrl) 548 + return -EINVAL; 549 + 550 + ret = rkvdec_hevc_validate_sps(ctx, ctrl->p_new.p_hevc_sps); 551 + if (ret) 552 + return ret; 553 + 554 + hevc_ctx = kzalloc(sizeof(*hevc_ctx), GFP_KERNEL); 555 + if (!hevc_ctx) 556 + return -ENOMEM; 557 + 558 + priv_tbl = dma_alloc_coherent(rkvdec->dev, sizeof(*priv_tbl), 559 + &hevc_ctx->priv_tbl.dma, GFP_KERNEL); 560 + if (!priv_tbl) { 561 + ret = -ENOMEM; 562 + goto err_free_ctx; 563 + } 564 + 565 + hevc_ctx->priv_tbl.size = sizeof(*priv_tbl); 566 + hevc_ctx->priv_tbl.cpu = priv_tbl; 567 + memcpy(priv_tbl->cabac_table, rkvdec_hevc_cabac_table, 568 + sizeof(rkvdec_hevc_cabac_table)); 569 + 570 + ctx->priv = hevc_ctx; 571 + return 0; 572 + 573 + err_free_ctx: 574 + kfree(hevc_ctx); 575 + return ret; 576 + } 577 + 578 + static void rkvdec_hevc_stop(struct rkvdec_ctx *ctx) 579 + { 580 + struct rkvdec_hevc_ctx *hevc_ctx = ctx->priv; 581 + struct rkvdec_dev *rkvdec = ctx->dev; 582 + 583 + dma_free_coherent(rkvdec->dev, hevc_ctx->priv_tbl.size, 584 + hevc_ctx->priv_tbl.cpu, hevc_ctx->priv_tbl.dma); 585 + kfree(hevc_ctx); 586 + } 587 + 588 + static int rkvdec_hevc_run(struct rkvdec_ctx *ctx) 589 + { 590 + struct rkvdec_dev *rkvdec = ctx->dev; 591 + struct rkvdec_hevc_run run; 592 + struct rkvdec_hevc_ctx *hevc_ctx = ctx->priv; 593 + struct rkvdec_hevc_priv_tbl *tbl = hevc_ctx->priv_tbl.cpu; 594 + 595 + rkvdec_hevc_run_preamble(ctx, &run); 596 + 597 + rkvdec_hevc_assemble_hw_scaling_list(&run, 598 + &tbl->scaling_list, 599 + &hevc_ctx->scaling_matrix_cache); 600 + assemble_hw_pps(ctx, &run); 601 + 602 + /* 603 + * On vdpu381, not setting the long and short term ref sets will just output wrong frames. 604 + * Let's just warn about it and let the decoder run anyway. 605 + */ 606 + if ((!ctx->has_sps_lt_rps && run.sps->num_long_term_ref_pics_sps) || 607 + (!ctx->has_sps_st_rps && run.sps->num_short_term_ref_pic_sets)) { 608 + dev_warn_ratelimited(rkvdec->dev, "Long and short term RPS not set\n"); 609 + } else { 610 + rkvdec_hevc_assemble_hw_rps(&run, &tbl->rps, &hevc_ctx->st_cache); 611 + } 612 + 613 + config_registers(ctx, &run); 614 + 615 + rkvdec_run_postamble(ctx, &run.base); 616 + 617 + rkvdec_schedule_watchdog(rkvdec, hevc_ctx->regs.common.reg032_timeout_threshold); 618 + 619 + /* Start decoding! */ 620 + writel(VDPU381_DEC_E_BIT, rkvdec->regs + VDPU381_REG_DEC_E); 621 + 622 + return 0; 623 + } 624 + 625 + static int rkvdec_hevc_try_ctrl(struct rkvdec_ctx *ctx, struct v4l2_ctrl *ctrl) 626 + { 627 + if (ctrl->id == V4L2_CID_STATELESS_HEVC_SPS) 628 + return rkvdec_hevc_validate_sps(ctx, ctrl->p_new.p_hevc_sps); 629 + 630 + return 0; 631 + } 632 + 633 + const struct rkvdec_coded_fmt_ops rkvdec_vdpu381_hevc_fmt_ops = { 634 + .adjust_fmt = rkvdec_hevc_adjust_fmt, 635 + .start = rkvdec_hevc_start, 636 + .stop = rkvdec_hevc_stop, 637 + .run = rkvdec_hevc_run, 638 + .try_ctrl = rkvdec_hevc_try_ctrl, 639 + .get_image_fmt = rkvdec_hevc_get_image_fmt, 640 + };
+82
drivers/media/platform/rockchip/rkvdec/rkvdec.c
··· 153 153 enum rkvdec_image_fmt image_fmt; 154 154 struct vb2_queue *vq; 155 155 156 + if (ctrl->id == V4L2_CID_STATELESS_HEVC_EXT_SPS_ST_RPS) { 157 + ctx->has_sps_st_rps |= !!(ctrl->has_changed); 158 + return 0; 159 + } 160 + 161 + if (ctrl->id == V4L2_CID_STATELESS_HEVC_EXT_SPS_LT_RPS) { 162 + ctx->has_sps_lt_rps |= !!(ctrl->has_changed); 163 + return 0; 164 + } 165 + 156 166 /* Check if this change requires a capture format reset */ 157 167 if (!desc->ops->get_image_fmt) 158 168 return 0; ··· 234 224 static const struct rkvdec_ctrls rkvdec_hevc_ctrls = { 235 225 .ctrls = rkvdec_hevc_ctrl_descs, 236 226 .num_ctrls = ARRAY_SIZE(rkvdec_hevc_ctrl_descs), 227 + }; 228 + 229 + static const struct rkvdec_ctrl_desc vdpu38x_hevc_ctrl_descs[] = { 230 + { 231 + .cfg.id = V4L2_CID_STATELESS_HEVC_DECODE_PARAMS, 232 + }, 233 + { 234 + .cfg.id = V4L2_CID_STATELESS_HEVC_SPS, 235 + .cfg.ops = &rkvdec_ctrl_ops, 236 + }, 237 + { 238 + .cfg.id = V4L2_CID_STATELESS_HEVC_PPS, 239 + }, 240 + { 241 + .cfg.id = V4L2_CID_STATELESS_HEVC_SCALING_MATRIX, 242 + }, 243 + { 244 + .cfg.id = V4L2_CID_STATELESS_HEVC_DECODE_MODE, 245 + .cfg.min = V4L2_STATELESS_HEVC_DECODE_MODE_FRAME_BASED, 246 + .cfg.max = V4L2_STATELESS_HEVC_DECODE_MODE_FRAME_BASED, 247 + .cfg.def = V4L2_STATELESS_HEVC_DECODE_MODE_FRAME_BASED, 248 + }, 249 + { 250 + .cfg.id = V4L2_CID_STATELESS_HEVC_START_CODE, 251 + .cfg.min = V4L2_STATELESS_HEVC_START_CODE_ANNEX_B, 252 + .cfg.def = V4L2_STATELESS_HEVC_START_CODE_ANNEX_B, 253 + .cfg.max = V4L2_STATELESS_HEVC_START_CODE_ANNEX_B, 254 + }, 255 + { 256 + .cfg.id = V4L2_CID_MPEG_VIDEO_HEVC_PROFILE, 257 + .cfg.min = V4L2_MPEG_VIDEO_HEVC_PROFILE_MAIN, 258 + .cfg.max = V4L2_MPEG_VIDEO_HEVC_PROFILE_MAIN_10, 259 + .cfg.menu_skip_mask = 260 + BIT(V4L2_MPEG_VIDEO_HEVC_PROFILE_MAIN_STILL_PICTURE), 261 + .cfg.def = V4L2_MPEG_VIDEO_HEVC_PROFILE_MAIN, 262 + }, 263 + { 264 + .cfg.id = V4L2_CID_MPEG_VIDEO_HEVC_LEVEL, 265 + .cfg.min = V4L2_MPEG_VIDEO_HEVC_LEVEL_1, 266 + .cfg.max = V4L2_MPEG_VIDEO_HEVC_LEVEL_6_1, 267 + }, 268 + { 269 + .cfg.id = V4L2_CID_STATELESS_HEVC_EXT_SPS_ST_RPS, 270 + .cfg.ops = &rkvdec_ctrl_ops, 271 + .cfg.dims = { 65 }, 272 + }, 273 + { 274 + .cfg.id = V4L2_CID_STATELESS_HEVC_EXT_SPS_LT_RPS, 275 + .cfg.ops = &rkvdec_ctrl_ops, 276 + .cfg.dims = { 65 }, 277 + }, 278 + }; 279 + 280 + static const struct rkvdec_ctrls vdpu38x_hevc_ctrls = { 281 + .ctrls = vdpu38x_hevc_ctrl_descs, 282 + .num_ctrls = ARRAY_SIZE(vdpu38x_hevc_ctrl_descs), 237 283 }; 238 284 239 285 static const struct rkvdec_decoded_fmt_desc rkvdec_hevc_decoded_fmts[] = { ··· 511 445 }; 512 446 513 447 static const struct rkvdec_coded_fmt_desc vdpu381_coded_fmts[] = { 448 + { 449 + .fourcc = V4L2_PIX_FMT_HEVC_SLICE, 450 + .frmsize = { 451 + .min_width = 64, 452 + .max_width = 65472, 453 + .step_width = 64, 454 + .min_height = 64, 455 + .max_height = 65472, 456 + .step_height = 16, 457 + }, 458 + .ctrls = &vdpu38x_hevc_ctrls, 459 + .ops = &rkvdec_vdpu381_hevc_fmt_ops, 460 + .num_decoded_fmts = ARRAY_SIZE(rkvdec_hevc_decoded_fmts), 461 + .decoded_fmts = rkvdec_hevc_decoded_fmts, 462 + .subsystem_flags = VB2_V4L2_FL_SUPPORTS_M2M_HOLD_CAPTURE_BUF, 463 + }, 514 464 { 515 465 .fourcc = V4L2_PIX_FMT_H264_SLICE, 516 466 .frmsize = {
+3
drivers/media/platform/rockchip/rkvdec/rkvdec.h
··· 154 154 struct rkvdec_rcb_config *rcb_config; 155 155 u32 colmv_offset; 156 156 void *priv; 157 + u8 has_sps_st_rps: 1; 158 + u8 has_sps_lt_rps: 1; 157 159 }; 158 160 159 161 static inline struct rkvdec_ctx *file_to_rkvdec_ctx(struct file *filp) ··· 189 187 190 188 /* VDPU381 ops */ 191 189 extern const struct rkvdec_coded_fmt_ops rkvdec_vdpu381_h264_fmt_ops; 190 + extern const struct rkvdec_coded_fmt_ops rkvdec_vdpu381_hevc_fmt_ops; 192 191 193 192 /* VDPU383 ops */ 194 193 extern const struct rkvdec_coded_fmt_ops rkvdec_vdpu383_h264_fmt_ops;