Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

media: rkvdec: Add HEVC support for the VDPU383 variant

The VDPU383 decoder is used on the RK3576 SoC and has support for HEVC.

This patch also moves some functions to a common rkvdec-hevc-common.c
file and adds a specific scaling matrix flatten function.

The fluster score for JCT-VC-HEVC_V1 is 146/147.

Reviewed-by: Nicolas Dufresne <nicolas.dufresne@collabora.com>
Signed-off-by: Detlev Casanova <detlev.casanova@collabora.com>
Signed-off-by: Nicolas Dufresne <nicolas.dufresne@collabora.com>
Signed-off-by: Hans Verkuil <hverkuil+cisco@kernel.org>

authored by

Detlev Casanova and committed by
Hans Verkuil
e3b5b77e c9a59dc2

+765 -48
+1
drivers/media/platform/rockchip/rkvdec/Makefile
··· 11 11 rkvdec-vdpu381-h264.o \ 12 12 rkvdec-vdpu381-hevc.o \ 13 13 rkvdec-vdpu383-h264.o \ 14 + rkvdec-vdpu383-hevc.o \ 14 15 rkvdec-vp9.o
+15 -44
drivers/media/platform/rockchip/rkvdec/rkvdec-hevc-common.c
··· 140 140 } 141 141 } 142 142 143 - /* 144 - * Flip one or more matrices along their main diagonal and flatten them 145 - * before writing it to the memory. 146 - * Convert: 147 - * ABCD AEIM 148 - * EFGH => BFJN => AEIMBFJNCGKODHLP 149 - * IJKL CGKO 150 - * MNOP DHLP 151 - */ 152 - static void transpose_and_flatten_matrices(u8 *output, const u8 *input, 153 - int matrices, int row_length) 143 + static void assemble_scalingfactor0(struct rkvdec_ctx *ctx, u8 *output, 144 + const struct v4l2_ctrl_hevc_scaling_matrix *input) 154 145 { 155 - int i, j, row, x_offset, matrix_offset, rot_index, y_offset, matrix_size, new_value; 156 - 157 - matrix_size = row_length * row_length; 158 - for (i = 0; i < matrices; i++) { 159 - row = 0; 160 - x_offset = 0; 161 - matrix_offset = i * matrix_size; 162 - for (j = 0; j < matrix_size; j++) { 163 - y_offset = j - (row * row_length); 164 - rot_index = y_offset * row_length + x_offset; 165 - new_value = *(input + i * matrix_size + j); 166 - output[matrix_offset + rot_index] = new_value; 167 - if ((j + 1) % row_length == 0) { 168 - row += 1; 169 - x_offset += 1; 170 - } 171 - } 172 - } 173 - } 174 - 175 - static void assemble_scalingfactor0(u8 *output, const struct v4l2_ctrl_hevc_scaling_matrix *input) 176 - { 146 + const struct rkvdec_variant *variant = ctx->dev->variant; 177 147 int offset = 0; 178 148 179 - transpose_and_flatten_matrices(output, (const u8 *)input->scaling_list_4x4, 6, 4); 149 + variant->ops->flatten_matrices(output, (const u8 *)input->scaling_list_4x4, 6, 4); 180 150 offset = 6 * 16 * sizeof(u8); 181 - transpose_and_flatten_matrices(output + offset, (const u8 *)input->scaling_list_8x8, 6, 8); 151 + variant->ops->flatten_matrices(output + offset, (const u8 *)input->scaling_list_8x8, 6, 8); 182 152 offset += 6 * 64 * sizeof(u8); 183 - transpose_and_flatten_matrices(output + offset, 184 - (const u8 *)input->scaling_list_16x16, 6, 8); 153 + variant->ops->flatten_matrices(output + offset, (const u8 *)input->scaling_list_16x16, 154 + 6, 8); 185 155 offset += 6 * 64 * sizeof(u8); 186 156 /* Add a 128 byte padding with 0s between the two 32x32 matrices */ 187 - transpose_and_flatten_matrices(output + offset, 188 - (const u8 *)input->scaling_list_32x32, 1, 8); 157 + variant->ops->flatten_matrices(output + offset, (const u8 *)input->scaling_list_32x32, 158 + 1, 8); 189 159 offset += 64 * sizeof(u8); 190 160 memset(output + offset, 0, 128); 191 161 offset += 128 * sizeof(u8); 192 - transpose_and_flatten_matrices(output + offset, 162 + variant->ops->flatten_matrices(output + offset, 193 163 (const u8 *)input->scaling_list_32x32 + (64 * sizeof(u8)), 194 164 1, 8); 195 165 offset += 64 * sizeof(u8); ··· 184 214 memcpy(output + 6 * sizeof(u8), list_32x32, 6 * sizeof(u8)); 185 215 } 186 216 187 - static void translate_scaling_list(struct scaling_factor *output, 217 + static void translate_scaling_list(struct rkvdec_ctx *ctx, struct scaling_factor *output, 188 218 const struct v4l2_ctrl_hevc_scaling_matrix *input) 189 219 { 190 - assemble_scalingfactor0(output->scalingfactor0, input); 220 + assemble_scalingfactor0(ctx, output->scalingfactor0, input); 191 221 memcpy(output->scalingfactor1, (const u8 *)input->scaling_list_4x4, 96); 192 222 assemble_scalingdc(output->scalingdc, input); 193 223 memset(output->reserved, 0, 4 * sizeof(u8)); 194 224 } 195 225 196 - void rkvdec_hevc_assemble_hw_scaling_list(struct rkvdec_hevc_run *run, 226 + void rkvdec_hevc_assemble_hw_scaling_list(struct rkvdec_ctx *ctx, 227 + struct rkvdec_hevc_run *run, 197 228 struct scaling_factor *scaling_factor, 198 229 struct v4l2_ctrl_hevc_scaling_matrix *cache) 199 230 { ··· 204 233 sizeof(struct v4l2_ctrl_hevc_scaling_matrix))) 205 234 return; 206 235 207 - translate_scaling_list(scaling_factor, scaling); 236 + translate_scaling_list(ctx, scaling_factor, scaling); 208 237 209 238 memcpy(cache, scaling, 210 239 sizeof(struct v4l2_ctrl_hevc_scaling_matrix));
+2 -1
drivers/media/platform/rockchip/rkvdec/rkvdec-hevc-common.h
··· 95 95 s32 pic_in_cts_height, u16 *column_width, u16 *row_height); 96 96 void rkvdec_hevc_assemble_hw_rps(struct rkvdec_hevc_run *run, struct rkvdec_rps *rps, 97 97 struct v4l2_ctrl_hevc_ext_sps_st_rps *st_cache); 98 - void rkvdec_hevc_assemble_hw_scaling_list(struct rkvdec_hevc_run *run, 98 + void rkvdec_hevc_assemble_hw_scaling_list(struct rkvdec_ctx *ctx, 99 + struct rkvdec_hevc_run *run, 99 100 struct scaling_factor *scaling_factor, 100 101 struct v4l2_ctrl_hevc_scaling_matrix *cache); 101 102 struct vb2_buffer *get_ref_buf(struct rkvdec_ctx *ctx,
+1 -1
drivers/media/platform/rockchip/rkvdec/rkvdec-hevc.c
··· 568 568 569 569 rkvdec_hevc_run_preamble(ctx, &run); 570 570 571 - rkvdec_hevc_assemble_hw_scaling_list(&run, &tbl->scaling_list, 571 + rkvdec_hevc_assemble_hw_scaling_list(ctx, &run, &tbl->scaling_list, 572 572 &hevc_ctx->scaling_matrix_cache); 573 573 assemble_hw_pps(ctx, &run); 574 574 assemble_sw_rps(ctx, &run);
+1 -2
drivers/media/platform/rockchip/rkvdec/rkvdec-vdpu381-hevc.c
··· 594 594 595 595 rkvdec_hevc_run_preamble(ctx, &run); 596 596 597 - rkvdec_hevc_assemble_hw_scaling_list(&run, 598 - &tbl->scaling_list, 597 + rkvdec_hevc_assemble_hw_scaling_list(ctx, &run, &tbl->scaling_list, 599 598 &hevc_ctx->scaling_matrix_cache); 600 599 assemble_hw_pps(ctx, &run); 601 600
+652
drivers/media/platform/rockchip/rkvdec/rkvdec-vdpu383-hevc.c
··· 1 + // SPDX-License-Identifier: GPL-2.0 2 + /* 3 + * Rockchip VDPU383 HEVC backend 4 + * 5 + * Copyright (C) 2025 Collabora, Ltd. 6 + * Detlev Casanova <detlev.casanova@collabora.com> 7 + */ 8 + 9 + #include <media/v4l2-mem2mem.h> 10 + 11 + #include "rkvdec.h" 12 + #include "rkvdec-cabac.h" 13 + #include "rkvdec-rcb.h" 14 + #include "rkvdec-hevc-common.h" 15 + #include "rkvdec-vdpu383-regs.h" 16 + 17 + struct rkvdec_hevc_sps_pps { 18 + // SPS 19 + u16 video_parameters_set_id : 4; 20 + u16 seq_parameters_set_id_sps : 4; 21 + u16 chroma_format_idc : 2; 22 + u16 width : 16; 23 + u16 height : 16; 24 + u16 bit_depth_luma : 3; 25 + u16 bit_depth_chroma : 3; 26 + u16 max_pic_order_count_lsb : 5; 27 + u16 diff_max_min_luma_coding_block_size : 2; 28 + u16 min_luma_coding_block_size : 3; 29 + u16 min_transform_block_size : 3; 30 + u16 diff_max_min_transform_block_size : 2; 31 + u16 max_transform_hierarchy_depth_inter : 3; 32 + u16 max_transform_hierarchy_depth_intra : 3; 33 + u16 scaling_list_enabled_flag : 1; 34 + u16 amp_enabled_flag : 1; 35 + u16 sample_adaptive_offset_enabled_flag : 1; 36 + u16 pcm_enabled_flag : 1; 37 + u16 pcm_sample_bit_depth_luma : 4; 38 + u16 pcm_sample_bit_depth_chroma : 4; 39 + u16 pcm_loop_filter_disabled_flag : 1; 40 + u16 diff_max_min_pcm_luma_coding_block_size : 3; 41 + u16 min_pcm_luma_coding_block_size : 3; 42 + u16 num_short_term_ref_pic_sets : 7; 43 + u16 long_term_ref_pics_present_flag : 1; 44 + u16 num_long_term_ref_pics_sps : 6; 45 + u16 sps_temporal_mvp_enabled_flag : 1; 46 + u16 strong_intra_smoothing_enabled_flag : 1; 47 + u16 reserved0 : 7; 48 + u16 sps_max_dec_pic_buffering_minus1 : 4; 49 + u16 separate_colour_plane_flag : 1; 50 + u16 high_precision_offsets_enabled_flag : 1; 51 + u16 persistent_rice_adaptation_enabled_flag : 1; 52 + 53 + // PPS 54 + u16 picture_parameters_set_id : 6; 55 + u16 seq_parameters_set_id_pps : 4; 56 + u16 dependent_slice_segments_enabled_flag : 1; 57 + u16 output_flag_present_flag : 1; 58 + u16 num_extra_slice_header_bits : 13; 59 + u16 sign_data_hiding_enabled_flag : 1; 60 + u16 cabac_init_present_flag : 1; 61 + u16 num_ref_idx_l0_default_active : 4; 62 + u16 num_ref_idx_l1_default_active : 4; 63 + u16 init_qp_minus26 : 7; 64 + u16 constrained_intra_pred_flag : 1; 65 + u16 transform_skip_enabled_flag : 1; 66 + u16 cu_qp_delta_enabled_flag : 1; 67 + u16 log2_min_cb_size : 3; 68 + u16 pps_cb_qp_offset : 5; 69 + u16 pps_cr_qp_offset : 5; 70 + u16 pps_slice_chroma_qp_offsets_present_flag : 1; 71 + u16 weighted_pred_flag : 1; 72 + u16 weighted_bipred_flag : 1; 73 + u16 transquant_bypass_enabled_flag : 1; 74 + u16 tiles_enabled_flag : 1; 75 + u16 entropy_coding_sync_enabled_flag : 1; 76 + u16 pps_loop_filter_across_slices_enabled_flag : 1; 77 + u16 loop_filter_across_tiles_enabled_flag : 1; 78 + u16 deblocking_filter_override_enabled_flag : 1; 79 + u16 pps_deblocking_filter_disabled_flag : 1; 80 + u16 pps_beta_offset_div2 : 4; 81 + u16 pps_tc_offset_div2 : 4; 82 + u16 lists_modification_present_flag : 1; 83 + u16 log2_parallel_merge_level : 3; 84 + u16 slice_segment_header_extension_present_flag : 1; 85 + u16 reserved1 : 3; 86 + 87 + // pps extensions 88 + u16 log2_max_transform_skip_block_size : 2; 89 + u16 cross_component_prediction_enabled_flag : 1; 90 + u16 chroma_qp_offset_list_enabled_flag : 1; 91 + u16 log2_min_cu_chroma_qp_delta_size : 3; 92 + u16 cb_qp_offset_list0 : 5; 93 + u16 cb_qp_offset_list1 : 5; 94 + u16 cb_qp_offset_list2 : 5; 95 + u16 cb_qp_offset_list3 : 5; 96 + u16 cb_qp_offset_list4 : 5; 97 + u16 cb_qp_offset_list5 : 5; 98 + u16 cb_cr_offset_list0 : 5; 99 + u16 cb_cr_offset_list1 : 5; 100 + u16 cb_cr_offset_list2 : 5; 101 + u16 cb_cr_offset_list3 : 5; 102 + u16 cb_cr_offset_list4 : 5; 103 + u16 cb_cr_offset_list5 : 5; 104 + u16 chroma_qp_offset_list_len_minus1 : 3; 105 + 106 + /* mvc0 && mvc1 */ 107 + u16 mvc_ff : 16; 108 + u16 mvc_00 : 9; 109 + 110 + /* poc info */ 111 + u16 reserved2 : 3; 112 + u32 current_poc : 32; 113 + u32 ref_pic_poc0 : 32; 114 + u32 ref_pic_poc1 : 32; 115 + u32 ref_pic_poc2 : 32; 116 + u32 ref_pic_poc3 : 32; 117 + u32 ref_pic_poc4 : 32; 118 + u32 ref_pic_poc5 : 32; 119 + u32 ref_pic_poc6 : 32; 120 + u32 ref_pic_poc7 : 32; 121 + u32 ref_pic_poc8 : 32; 122 + u32 ref_pic_poc9 : 32; 123 + u32 ref_pic_poc10 : 32; 124 + u32 ref_pic_poc11 : 32; 125 + u32 ref_pic_poc12 : 32; 126 + u32 ref_pic_poc13 : 32; 127 + u32 ref_pic_poc14 : 32; 128 + u32 reserved3 : 32; 129 + u32 ref_is_valid : 15; 130 + u32 reserved4 : 1; 131 + 132 + /* tile info*/ 133 + u16 num_tile_columns : 5; 134 + u16 num_tile_rows : 5; 135 + u32 column_width0 : 24; 136 + u32 column_width1 : 24; 137 + u32 column_width2 : 24; 138 + u32 column_width3 : 24; 139 + u32 column_width4 : 24; 140 + u32 column_width5 : 24; 141 + u32 column_width6 : 24; 142 + u32 column_width7 : 24; 143 + u32 column_width8 : 24; 144 + u32 column_width9 : 24; 145 + u32 row_height0 : 24; 146 + u32 row_height1 : 24; 147 + u32 row_height2 : 24; 148 + u32 row_height3 : 24; 149 + u32 row_height4 : 24; 150 + u32 row_height5 : 24; 151 + u32 row_height6 : 24; 152 + u32 row_height7 : 24; 153 + u32 row_height8 : 24; 154 + u32 row_height9 : 24; 155 + u32 row_height10 : 24; 156 + u32 reserved5 : 2; 157 + u32 padding; 158 + } __packed; 159 + 160 + struct rkvdec_hevc_priv_tbl { 161 + struct rkvdec_hevc_sps_pps param_set; 162 + struct rkvdec_rps rps; 163 + struct scaling_factor scaling_list; 164 + u8 cabac_table[27456]; 165 + } __packed; 166 + 167 + struct rkvdec_hevc_ctx { 168 + struct rkvdec_aux_buf priv_tbl; 169 + struct v4l2_ctrl_hevc_scaling_matrix scaling_matrix_cache; 170 + struct v4l2_ctrl_hevc_ext_sps_st_rps st_cache; 171 + struct vdpu383_regs_h26x regs; 172 + }; 173 + 174 + static void set_column_row(struct rkvdec_hevc_sps_pps *hw_ps, u16 *column, u16 *row) 175 + { 176 + hw_ps->column_width0 = column[0] | (column[1] << 12); 177 + hw_ps->row_height0 = row[0] | (row[1] << 12); 178 + hw_ps->column_width1 = column[2] | (column[3] << 12); 179 + hw_ps->row_height1 = row[2] | (row[3] << 12); 180 + hw_ps->column_width2 = column[4] | (column[5] << 12); 181 + hw_ps->row_height2 = row[4] | (row[5] << 12); 182 + hw_ps->column_width3 = column[6] | (column[7] << 12); 183 + hw_ps->row_height3 = row[6] | (row[7] << 12); 184 + hw_ps->column_width4 = column[8] | (column[9] << 12); 185 + hw_ps->row_height4 = row[8] | (row[9] << 12); 186 + hw_ps->column_width5 = column[10] | (column[11] << 12); 187 + hw_ps->row_height5 = row[10] | (row[11] << 12); 188 + hw_ps->column_width6 = column[12] | (column[13] << 12); 189 + hw_ps->row_height6 = row[12] | (row[13] << 12); 190 + hw_ps->column_width7 = column[14] | (column[15] << 12); 191 + hw_ps->row_height7 = row[14] | (row[15] << 12); 192 + hw_ps->column_width8 = column[16] | (column[17] << 12); 193 + hw_ps->row_height8 = row[16] | (row[17] << 12); 194 + hw_ps->column_width9 = column[18] | (column[19] << 12); 195 + hw_ps->row_height9 = row[18] | (row[19] << 12); 196 + 197 + hw_ps->row_height10 = row[20] | (row[21] << 12); 198 + } 199 + 200 + static void set_pps_ref_pic_poc(struct rkvdec_hevc_sps_pps *hw_ps, const struct v4l2_hevc_dpb_entry *dpb) 201 + { 202 + hw_ps->ref_pic_poc0 = dpb[0].pic_order_cnt_val; 203 + hw_ps->ref_pic_poc1 = dpb[1].pic_order_cnt_val; 204 + hw_ps->ref_pic_poc2 = dpb[2].pic_order_cnt_val; 205 + hw_ps->ref_pic_poc3 = dpb[3].pic_order_cnt_val; 206 + hw_ps->ref_pic_poc4 = dpb[4].pic_order_cnt_val; 207 + hw_ps->ref_pic_poc5 = dpb[5].pic_order_cnt_val; 208 + hw_ps->ref_pic_poc6 = dpb[6].pic_order_cnt_val; 209 + hw_ps->ref_pic_poc7 = dpb[7].pic_order_cnt_val; 210 + hw_ps->ref_pic_poc8 = dpb[8].pic_order_cnt_val; 211 + hw_ps->ref_pic_poc9 = dpb[9].pic_order_cnt_val; 212 + hw_ps->ref_pic_poc10 = dpb[10].pic_order_cnt_val; 213 + hw_ps->ref_pic_poc11 = dpb[11].pic_order_cnt_val; 214 + hw_ps->ref_pic_poc12 = dpb[12].pic_order_cnt_val; 215 + hw_ps->ref_pic_poc13 = dpb[13].pic_order_cnt_val; 216 + hw_ps->ref_pic_poc14 = dpb[14].pic_order_cnt_val; 217 + } 218 + 219 + static void assemble_hw_pps(struct rkvdec_ctx *ctx, 220 + struct rkvdec_hevc_run *run) 221 + { 222 + struct rkvdec_hevc_ctx *h264_ctx = ctx->priv; 223 + const struct v4l2_ctrl_hevc_sps *sps = run->sps; 224 + const struct v4l2_ctrl_hevc_pps *pps = run->pps; 225 + const struct v4l2_ctrl_hevc_decode_params *dec_params = run->decode_params; 226 + struct rkvdec_hevc_priv_tbl *priv_tbl = h264_ctx->priv_tbl.cpu; 227 + struct rkvdec_hevc_sps_pps *hw_ps; 228 + bool tiles_enabled; 229 + s32 max_cu_width; 230 + s32 pic_in_cts_width; 231 + s32 pic_in_cts_height; 232 + u16 log2_min_cb_size, width, height; 233 + u16 column_width[22]; 234 + u16 row_height[22]; 235 + u8 pcm_enabled; 236 + u32 i; 237 + 238 + /* 239 + * HW read the SPS/PPS information from PPS packet index by PPS id. 240 + * offset from the base can be calculated by PPS_id * 32 (size per PPS 241 + * packet unit). so the driver copy SPS/PPS information to the exact PPS 242 + * packet unit for HW accessing. 243 + */ 244 + hw_ps = &priv_tbl->param_set; 245 + memset(hw_ps, 0, sizeof(*hw_ps)); 246 + 247 + /* write sps */ 248 + hw_ps->video_parameters_set_id = sps->video_parameter_set_id; 249 + hw_ps->seq_parameters_set_id_sps = sps->seq_parameter_set_id; 250 + hw_ps->chroma_format_idc = sps->chroma_format_idc; 251 + 252 + log2_min_cb_size = sps->log2_min_luma_coding_block_size_minus3 + 3; 253 + width = sps->pic_width_in_luma_samples; 254 + height = sps->pic_height_in_luma_samples; 255 + hw_ps->width = width; 256 + hw_ps->height = height; 257 + hw_ps->bit_depth_luma = sps->bit_depth_luma_minus8 + 8; 258 + hw_ps->bit_depth_chroma = sps->bit_depth_chroma_minus8 + 8; 259 + hw_ps->max_pic_order_count_lsb = sps->log2_max_pic_order_cnt_lsb_minus4 + 4; 260 + hw_ps->diff_max_min_luma_coding_block_size = sps->log2_diff_max_min_luma_coding_block_size; 261 + hw_ps->min_luma_coding_block_size = sps->log2_min_luma_coding_block_size_minus3 + 3; 262 + hw_ps->min_transform_block_size = sps->log2_min_luma_transform_block_size_minus2 + 2; 263 + hw_ps->diff_max_min_transform_block_size = 264 + sps->log2_diff_max_min_luma_transform_block_size; 265 + hw_ps->max_transform_hierarchy_depth_inter = sps->max_transform_hierarchy_depth_inter; 266 + hw_ps->max_transform_hierarchy_depth_intra = sps->max_transform_hierarchy_depth_intra; 267 + hw_ps->scaling_list_enabled_flag = 268 + !!(sps->flags & V4L2_HEVC_SPS_FLAG_SCALING_LIST_ENABLED); 269 + hw_ps->amp_enabled_flag = !!(sps->flags & V4L2_HEVC_SPS_FLAG_AMP_ENABLED); 270 + hw_ps->sample_adaptive_offset_enabled_flag = 271 + !!(sps->flags & V4L2_HEVC_SPS_FLAG_SAMPLE_ADAPTIVE_OFFSET); 272 + 273 + pcm_enabled = !!(sps->flags & V4L2_HEVC_SPS_FLAG_PCM_ENABLED); 274 + hw_ps->pcm_enabled_flag = pcm_enabled; 275 + hw_ps->pcm_sample_bit_depth_luma = 276 + pcm_enabled ? sps->pcm_sample_bit_depth_luma_minus1 + 1 : 0; 277 + hw_ps->pcm_sample_bit_depth_chroma = 278 + pcm_enabled ? sps->pcm_sample_bit_depth_chroma_minus1 + 1 : 0; 279 + hw_ps->pcm_loop_filter_disabled_flag = 280 + !!(sps->flags & V4L2_HEVC_SPS_FLAG_PCM_LOOP_FILTER_DISABLED); 281 + hw_ps->diff_max_min_pcm_luma_coding_block_size = 282 + sps->log2_diff_max_min_pcm_luma_coding_block_size; 283 + hw_ps->min_pcm_luma_coding_block_size = 284 + pcm_enabled ? sps->log2_min_pcm_luma_coding_block_size_minus3 + 3 : 0; 285 + hw_ps->num_short_term_ref_pic_sets = sps->num_short_term_ref_pic_sets; 286 + hw_ps->long_term_ref_pics_present_flag = 287 + !!(sps->flags & V4L2_HEVC_SPS_FLAG_LONG_TERM_REF_PICS_PRESENT); 288 + hw_ps->num_long_term_ref_pics_sps = sps->num_long_term_ref_pics_sps; 289 + hw_ps->sps_temporal_mvp_enabled_flag = 290 + !!(sps->flags & V4L2_HEVC_SPS_FLAG_SPS_TEMPORAL_MVP_ENABLED); 291 + hw_ps->strong_intra_smoothing_enabled_flag = 292 + !!(sps->flags & V4L2_HEVC_SPS_FLAG_STRONG_INTRA_SMOOTHING_ENABLED); 293 + hw_ps->sps_max_dec_pic_buffering_minus1 = sps->sps_max_dec_pic_buffering_minus1; 294 + 295 + /* write pps */ 296 + hw_ps->picture_parameters_set_id = pps->pic_parameter_set_id; 297 + hw_ps->seq_parameters_set_id_pps = sps->seq_parameter_set_id; 298 + hw_ps->dependent_slice_segments_enabled_flag = 299 + !!(pps->flags & V4L2_HEVC_PPS_FLAG_DEPENDENT_SLICE_SEGMENT_ENABLED); 300 + hw_ps->output_flag_present_flag = !!(pps->flags & V4L2_HEVC_PPS_FLAG_OUTPUT_FLAG_PRESENT); 301 + hw_ps->num_extra_slice_header_bits = pps->num_extra_slice_header_bits; 302 + hw_ps->sign_data_hiding_enabled_flag = 303 + !!(pps->flags & V4L2_HEVC_PPS_FLAG_SIGN_DATA_HIDING_ENABLED); 304 + hw_ps->cabac_init_present_flag = !!(pps->flags & V4L2_HEVC_PPS_FLAG_CABAC_INIT_PRESENT); 305 + hw_ps->num_ref_idx_l0_default_active = pps->num_ref_idx_l0_default_active_minus1 + 1; 306 + hw_ps->num_ref_idx_l1_default_active = pps->num_ref_idx_l1_default_active_minus1 + 1; 307 + hw_ps->init_qp_minus26 = pps->init_qp_minus26; 308 + hw_ps->constrained_intra_pred_flag = 309 + !!(pps->flags & V4L2_HEVC_PPS_FLAG_CONSTRAINED_INTRA_PRED); 310 + hw_ps->transform_skip_enabled_flag = 311 + !!(pps->flags & V4L2_HEVC_PPS_FLAG_TRANSFORM_SKIP_ENABLED); 312 + hw_ps->cu_qp_delta_enabled_flag = !!(pps->flags & V4L2_HEVC_PPS_FLAG_CU_QP_DELTA_ENABLED); 313 + hw_ps->log2_min_cb_size = log2_min_cb_size + 314 + sps->log2_diff_max_min_luma_coding_block_size - 315 + pps->diff_cu_qp_delta_depth; 316 + hw_ps->pps_cb_qp_offset = pps->pps_cb_qp_offset; 317 + hw_ps->pps_cr_qp_offset = pps->pps_cr_qp_offset; 318 + hw_ps->pps_slice_chroma_qp_offsets_present_flag = 319 + !!(pps->flags & V4L2_HEVC_PPS_FLAG_PPS_SLICE_CHROMA_QP_OFFSETS_PRESENT); 320 + hw_ps->weighted_pred_flag = !!(pps->flags & V4L2_HEVC_PPS_FLAG_WEIGHTED_PRED); 321 + hw_ps->weighted_bipred_flag = !!(pps->flags & V4L2_HEVC_PPS_FLAG_WEIGHTED_BIPRED); 322 + hw_ps->transquant_bypass_enabled_flag = 323 + !!(pps->flags & V4L2_HEVC_PPS_FLAG_TRANSQUANT_BYPASS_ENABLED); 324 + tiles_enabled = !!(pps->flags & V4L2_HEVC_PPS_FLAG_TILES_ENABLED); 325 + hw_ps->tiles_enabled_flag = tiles_enabled; 326 + hw_ps->entropy_coding_sync_enabled_flag = 327 + !!(pps->flags & V4L2_HEVC_PPS_FLAG_ENTROPY_CODING_SYNC_ENABLED); 328 + hw_ps->pps_loop_filter_across_slices_enabled_flag = 329 + !!(pps->flags & V4L2_HEVC_PPS_FLAG_PPS_LOOP_FILTER_ACROSS_SLICES_ENABLED); 330 + hw_ps->loop_filter_across_tiles_enabled_flag = 331 + !!(pps->flags & V4L2_HEVC_PPS_FLAG_LOOP_FILTER_ACROSS_TILES_ENABLED); 332 + hw_ps->deblocking_filter_override_enabled_flag = 333 + !!(pps->flags & V4L2_HEVC_PPS_FLAG_DEBLOCKING_FILTER_OVERRIDE_ENABLED); 334 + hw_ps->pps_deblocking_filter_disabled_flag = 335 + !!(pps->flags & V4L2_HEVC_PPS_FLAG_PPS_DISABLE_DEBLOCKING_FILTER); 336 + hw_ps->pps_beta_offset_div2 = pps->pps_beta_offset_div2; 337 + hw_ps->pps_tc_offset_div2 = pps->pps_tc_offset_div2; 338 + hw_ps->lists_modification_present_flag = 339 + !!(pps->flags & V4L2_HEVC_PPS_FLAG_LISTS_MODIFICATION_PRESENT); 340 + hw_ps->log2_parallel_merge_level = pps->log2_parallel_merge_level_minus2 + 2; 341 + hw_ps->slice_segment_header_extension_present_flag = 342 + !!(pps->flags & V4L2_HEVC_PPS_FLAG_SLICE_SEGMENT_HEADER_EXTENSION_PRESENT); 343 + hw_ps->num_tile_columns = tiles_enabled ? pps->num_tile_columns_minus1 + 1 : 1; 344 + hw_ps->num_tile_rows = tiles_enabled ? pps->num_tile_rows_minus1 + 1 : 1; 345 + hw_ps->mvc_ff = 0xffff; 346 + 347 + // Setup tiles information 348 + memset(column_width, 0, sizeof(column_width)); 349 + memset(row_height, 0, sizeof(row_height)); 350 + 351 + max_cu_width = 1 << (sps->log2_diff_max_min_luma_coding_block_size + log2_min_cb_size); 352 + pic_in_cts_width = (width + max_cu_width - 1) / max_cu_width; 353 + pic_in_cts_height = (height + max_cu_width - 1) / max_cu_width; 354 + 355 + if (tiles_enabled) { 356 + if (pps->flags & V4L2_HEVC_PPS_FLAG_UNIFORM_SPACING) { 357 + compute_tiles_uniform(run, log2_min_cb_size, width, height, 358 + pic_in_cts_width, pic_in_cts_height, 359 + column_width, row_height); 360 + } else { 361 + compute_tiles_non_uniform(run, log2_min_cb_size, width, height, 362 + pic_in_cts_width, pic_in_cts_height, 363 + column_width, row_height); 364 + } 365 + } else { 366 + column_width[0] = (width + max_cu_width - 1) / max_cu_width; 367 + row_height[0] = (height + max_cu_width - 1) / max_cu_width; 368 + } 369 + 370 + set_column_row(hw_ps, column_width, row_height); 371 + 372 + // Setup POC information 373 + hw_ps->current_poc = dec_params->pic_order_cnt_val; 374 + 375 + set_pps_ref_pic_poc(hw_ps, dec_params->dpb); 376 + for (i = 0; i < ARRAY_SIZE(dec_params->dpb); i++) { 377 + u32 valid = !!(dec_params->num_active_dpb_entries > i); 378 + hw_ps->ref_is_valid |= valid << i; 379 + } 380 + } 381 + 382 + static void rkvdec_write_regs(struct rkvdec_ctx *ctx) 383 + { 384 + struct rkvdec_dev *rkvdec = ctx->dev; 385 + struct rkvdec_hevc_ctx *h265_ctx = ctx->priv; 386 + 387 + rkvdec_memcpy_toio(rkvdec->regs + VDPU383_OFFSET_COMMON_REGS, 388 + &h265_ctx->regs.common, 389 + sizeof(h265_ctx->regs.common)); 390 + rkvdec_memcpy_toio(rkvdec->regs + VDPU383_OFFSET_COMMON_ADDR_REGS, 391 + &h265_ctx->regs.common_addr, 392 + sizeof(h265_ctx->regs.common_addr)); 393 + rkvdec_memcpy_toio(rkvdec->regs + VDPU383_OFFSET_CODEC_PARAMS_REGS, 394 + &h265_ctx->regs.h26x_params, 395 + sizeof(h265_ctx->regs.h26x_params)); 396 + rkvdec_memcpy_toio(rkvdec->regs + VDPU383_OFFSET_CODEC_ADDR_REGS, 397 + &h265_ctx->regs.h26x_addr, 398 + sizeof(h265_ctx->regs.h26x_addr)); 399 + } 400 + 401 + static void config_registers(struct rkvdec_ctx *ctx, 402 + struct rkvdec_hevc_run *run) 403 + { 404 + const struct v4l2_ctrl_hevc_decode_params *dec_params = run->decode_params; 405 + struct rkvdec_hevc_ctx *h265_ctx = ctx->priv; 406 + const struct v4l2_ctrl_hevc_sps *sps = run->sps; 407 + dma_addr_t priv_start_addr = h265_ctx->priv_tbl.dma; 408 + const struct v4l2_pix_format_mplane *dst_fmt; 409 + struct vb2_v4l2_buffer *src_buf = run->base.bufs.src; 410 + struct vb2_v4l2_buffer *dst_buf = run->base.bufs.dst; 411 + struct vdpu383_regs_h26x *regs = &h265_ctx->regs; 412 + const struct v4l2_format *f; 413 + dma_addr_t rlc_addr; 414 + dma_addr_t dst_addr; 415 + u32 hor_virstride; 416 + u32 ver_virstride; 417 + u32 y_virstride; 418 + u32 offset; 419 + u32 pixels; 420 + u32 i; 421 + 422 + memset(regs, 0, sizeof(*regs)); 423 + 424 + /* Set HEVC mode */ 425 + regs->common.reg008_dec_mode = VDPU383_MODE_HEVC; 426 + 427 + /* Set input stream length */ 428 + regs->h26x_params.reg066_stream_len = vb2_get_plane_payload(&src_buf->vb2_buf, 0); 429 + 430 + /* Set strides */ 431 + f = &ctx->decoded_fmt; 432 + dst_fmt = &f->fmt.pix_mp; 433 + hor_virstride = dst_fmt->plane_fmt[0].bytesperline; 434 + ver_virstride = dst_fmt->height; 435 + y_virstride = hor_virstride * ver_virstride; 436 + 437 + pixels = dst_fmt->height * dst_fmt->width; 438 + 439 + regs->h26x_params.reg068_hor_virstride = hor_virstride / 16; 440 + regs->h26x_params.reg069_raster_uv_hor_virstride = hor_virstride / 16; 441 + regs->h26x_params.reg070_y_virstride = y_virstride / 16; 442 + 443 + /* Activate block gating */ 444 + regs->common.reg010_block_gating_en.strmd_auto_gating_e = 1; 445 + regs->common.reg010_block_gating_en.inter_auto_gating_e = 1; 446 + regs->common.reg010_block_gating_en.intra_auto_gating_e = 1; 447 + regs->common.reg010_block_gating_en.transd_auto_gating_e = 1; 448 + regs->common.reg010_block_gating_en.recon_auto_gating_e = 1; 449 + regs->common.reg010_block_gating_en.filterd_auto_gating_e = 1; 450 + regs->common.reg010_block_gating_en.bus_auto_gating_e = 1; 451 + regs->common.reg010_block_gating_en.ctrl_auto_gating_e = 1; 452 + regs->common.reg010_block_gating_en.rcb_auto_gating_e = 1; 453 + regs->common.reg010_block_gating_en.err_prc_auto_gating_e = 1; 454 + 455 + /* Set timeout threshold */ 456 + if (pixels < RKVDEC_1080P_PIXELS) 457 + regs->common.reg013_core_timeout_threshold = VDPU383_TIMEOUT_1080p; 458 + else if (pixels < RKVDEC_4K_PIXELS) 459 + regs->common.reg013_core_timeout_threshold = VDPU383_TIMEOUT_4K; 460 + else if (pixels < RKVDEC_8K_PIXELS) 461 + regs->common.reg013_core_timeout_threshold = VDPU383_TIMEOUT_8K; 462 + else 463 + regs->common.reg013_core_timeout_threshold = VDPU383_TIMEOUT_MAX; 464 + 465 + regs->common.reg016_error_ctrl_set.error_proc_disable = 1; 466 + 467 + /* Set ref pic address & poc */ 468 + for (i = 0; i < ARRAY_SIZE(dec_params->dpb) - 1; i++) { 469 + struct vb2_buffer *vb_buf = get_ref_buf(ctx, run, i); 470 + dma_addr_t buf_dma; 471 + 472 + buf_dma = vb2_dma_contig_plane_dma_addr(vb_buf, 0); 473 + 474 + /* Set reference addresses */ 475 + regs->h26x_addr.reg170_185_ref_base[i] = buf_dma; 476 + regs->h26x_addr.reg195_210_payload_st_ref_base[i] = buf_dma; 477 + 478 + /* Set COLMV addresses */ 479 + regs->h26x_addr.reg217_232_colmv_ref_base[i] = buf_dma + ctx->colmv_offset; 480 + } 481 + 482 + /* Set rlc base address (input stream) */ 483 + rlc_addr = vb2_dma_contig_plane_dma_addr(&src_buf->vb2_buf, 0); 484 + regs->common_addr.reg128_strm_base = rlc_addr; 485 + 486 + /* Set output base address */ 487 + dst_addr = vb2_dma_contig_plane_dma_addr(&dst_buf->vb2_buf, 0); 488 + regs->h26x_addr.reg168_decout_base = dst_addr; 489 + regs->h26x_addr.reg169_error_ref_base = dst_addr; 490 + regs->h26x_addr.reg192_payload_st_cur_base = dst_addr; 491 + 492 + /* Set colmv address */ 493 + regs->h26x_addr.reg216_colmv_cur_base = dst_addr + ctx->colmv_offset; 494 + 495 + /* Set RCB addresses */ 496 + for (i = 0; i < rkvdec_rcb_buf_count(ctx); i++) { 497 + regs->common_addr.reg140_162_rcb_info[i].offset = rkvdec_rcb_buf_dma_addr(ctx, i); 498 + regs->common_addr.reg140_162_rcb_info[i].size = rkvdec_rcb_buf_size(ctx, i); 499 + } 500 + 501 + if (sps->flags & V4L2_HEVC_SPS_FLAG_SCALING_LIST_ENABLED) { 502 + /* Set scaling matrix */ 503 + offset = offsetof(struct rkvdec_hevc_priv_tbl, scaling_list); 504 + regs->common_addr.reg132_scanlist_addr = priv_start_addr + offset; 505 + } 506 + 507 + /* Set hw pps address */ 508 + offset = offsetof(struct rkvdec_hevc_priv_tbl, param_set); 509 + regs->common_addr.reg131_gbl_base = priv_start_addr + offset; 510 + regs->h26x_params.reg067_global_len = sizeof(struct rkvdec_hevc_sps_pps) / 16; 511 + 512 + /* Set hw rps address */ 513 + offset = offsetof(struct rkvdec_hevc_priv_tbl, rps); 514 + regs->common_addr.reg129_rps_base = priv_start_addr + offset; 515 + 516 + /* Set cabac table */ 517 + offset = offsetof(struct rkvdec_hevc_priv_tbl, cabac_table); 518 + regs->common_addr.reg130_cabactbl_base = priv_start_addr + offset; 519 + 520 + rkvdec_write_regs(ctx); 521 + } 522 + 523 + static int rkvdec_hevc_validate_sps(struct rkvdec_ctx *ctx, 524 + const struct v4l2_ctrl_hevc_sps *sps) 525 + { 526 + if (sps->chroma_format_idc != 1) 527 + /* Only 4:2:0 is supported */ 528 + return -EINVAL; 529 + 530 + if (sps->bit_depth_luma_minus8 != sps->bit_depth_chroma_minus8) 531 + /* Luma and chroma bit depth mismatch */ 532 + return -EINVAL; 533 + 534 + if (sps->bit_depth_luma_minus8 != 0 && sps->bit_depth_luma_minus8 != 2) 535 + /* Only 8-bit and 10-bit are supported */ 536 + return -EINVAL; 537 + 538 + if (sps->pic_width_in_luma_samples > ctx->coded_fmt.fmt.pix_mp.width || 539 + sps->pic_height_in_luma_samples > ctx->coded_fmt.fmt.pix_mp.height) 540 + return -EINVAL; 541 + 542 + return 0; 543 + } 544 + 545 + static int rkvdec_hevc_start(struct rkvdec_ctx *ctx) 546 + { 547 + struct rkvdec_dev *rkvdec = ctx->dev; 548 + struct rkvdec_hevc_priv_tbl *priv_tbl; 549 + struct rkvdec_hevc_ctx *hevc_ctx; 550 + struct v4l2_ctrl *ctrl; 551 + int ret; 552 + 553 + ctrl = v4l2_ctrl_find(&ctx->ctrl_hdl, 554 + V4L2_CID_STATELESS_HEVC_SPS); 555 + if (!ctrl) 556 + return -EINVAL; 557 + 558 + ret = rkvdec_hevc_validate_sps(ctx, ctrl->p_new.p_hevc_sps); 559 + if (ret) 560 + return ret; 561 + 562 + hevc_ctx = kzalloc(sizeof(*hevc_ctx), GFP_KERNEL); 563 + if (!hevc_ctx) 564 + return -ENOMEM; 565 + 566 + priv_tbl = dma_alloc_coherent(rkvdec->dev, sizeof(*priv_tbl), 567 + &hevc_ctx->priv_tbl.dma, GFP_KERNEL); 568 + if (!priv_tbl) { 569 + ret = -ENOMEM; 570 + goto err_free_ctx; 571 + } 572 + 573 + hevc_ctx->priv_tbl.size = sizeof(*priv_tbl); 574 + hevc_ctx->priv_tbl.cpu = priv_tbl; 575 + memcpy(priv_tbl->cabac_table, rkvdec_hevc_cabac_table, 576 + sizeof(rkvdec_hevc_cabac_table)); 577 + 578 + ctx->priv = hevc_ctx; 579 + return 0; 580 + 581 + err_free_ctx: 582 + kfree(hevc_ctx); 583 + return ret; 584 + } 585 + 586 + static void rkvdec_hevc_stop(struct rkvdec_ctx *ctx) 587 + { 588 + struct rkvdec_hevc_ctx *hevc_ctx = ctx->priv; 589 + struct rkvdec_dev *rkvdec = ctx->dev; 590 + 591 + dma_free_coherent(rkvdec->dev, hevc_ctx->priv_tbl.size, 592 + hevc_ctx->priv_tbl.cpu, hevc_ctx->priv_tbl.dma); 593 + kfree(hevc_ctx); 594 + } 595 + 596 + static int rkvdec_hevc_run(struct rkvdec_ctx *ctx) 597 + { 598 + struct rkvdec_dev *rkvdec = ctx->dev; 599 + struct rkvdec_hevc_run run; 600 + struct rkvdec_hevc_ctx *hevc_ctx = ctx->priv; 601 + struct rkvdec_hevc_priv_tbl *tbl = hevc_ctx->priv_tbl.cpu; 602 + u32 timeout_threshold; 603 + 604 + rkvdec_hevc_run_preamble(ctx, &run); 605 + 606 + /* 607 + * On vdpu383, not setting the long and short term ref sets leads to IOMMU page faults. 608 + * To be on the safe side for this new v4l2 control, write an error in the log and mark 609 + * the buffer as failed by returning an error here. 610 + */ 611 + if ((!ctx->has_sps_lt_rps && run.sps->num_long_term_ref_pics_sps) || 612 + (!ctx->has_sps_st_rps && run.sps->num_short_term_ref_pic_sets)) { 613 + dev_err_ratelimited(rkvdec->dev, "Long and short term RPS not set\n"); 614 + return -EINVAL; 615 + } 616 + 617 + rkvdec_hevc_assemble_hw_scaling_list(ctx, &run, &tbl->scaling_list, 618 + &hevc_ctx->scaling_matrix_cache); 619 + assemble_hw_pps(ctx, &run); 620 + rkvdec_hevc_assemble_hw_rps(&run, &tbl->rps, &hevc_ctx->st_cache); 621 + 622 + config_registers(ctx, &run); 623 + 624 + rkvdec_run_postamble(ctx, &run.base); 625 + 626 + timeout_threshold = hevc_ctx->regs.common.reg013_core_timeout_threshold; 627 + rkvdec_schedule_watchdog(rkvdec, timeout_threshold); 628 + 629 + /* Start decoding! */ 630 + writel(timeout_threshold, rkvdec->link + VDPU383_LINK_TIMEOUT_THRESHOLD); 631 + writel(VDPU383_IP_CRU_MODE, rkvdec->link + VDPU383_LINK_IP_ENABLE); 632 + writel(VDPU383_DEC_E_BIT, rkvdec->link + VDPU383_LINK_DEC_ENABLE); 633 + 634 + return 0; 635 + } 636 + 637 + static int rkvdec_hevc_try_ctrl(struct rkvdec_ctx *ctx, struct v4l2_ctrl *ctrl) 638 + { 639 + if (ctrl->id == V4L2_CID_STATELESS_HEVC_SPS) 640 + return rkvdec_hevc_validate_sps(ctx, ctrl->p_new.p_hevc_sps); 641 + 642 + return 0; 643 + } 644 + 645 + const struct rkvdec_coded_fmt_ops rkvdec_vdpu383_hevc_fmt_ops = { 646 + .adjust_fmt = rkvdec_hevc_adjust_fmt, 647 + .start = rkvdec_hevc_start, 648 + .stop = rkvdec_hevc_stop, 649 + .run = rkvdec_hevc_run, 650 + .try_ctrl = rkvdec_hevc_try_ctrl, 651 + .get_image_fmt = rkvdec_hevc_get_image_fmt, 652 + };
+91
drivers/media/platform/rockchip/rkvdec/rkvdec.c
··· 547 547 548 548 static const struct rkvdec_coded_fmt_desc vdpu383_coded_fmts[] = { 549 549 { 550 + .fourcc = V4L2_PIX_FMT_HEVC_SLICE, 551 + .frmsize = { 552 + .min_width = 64, 553 + .max_width = 65472, 554 + .step_width = 64, 555 + .min_height = 64, 556 + .max_height = 65472, 557 + .step_height = 16, 558 + }, 559 + .ctrls = &vdpu38x_hevc_ctrls, 560 + .ops = &rkvdec_vdpu383_hevc_fmt_ops, 561 + .num_decoded_fmts = ARRAY_SIZE(rkvdec_hevc_decoded_fmts), 562 + .decoded_fmts = rkvdec_hevc_decoded_fmts, 563 + .subsystem_flags = VB2_V4L2_FL_SUPPORTS_M2M_HOLD_CAPTURE_BUF, 564 + }, 565 + { 550 566 .fourcc = V4L2_PIX_FMT_H264_SLICE, 551 567 .frmsize = { 552 568 .min_width = 64, ··· 1524 1508 return variant->ops->irq_handler(ctx); 1525 1509 } 1526 1510 1511 + /* 1512 + * Flip one or more matrices along their main diagonal and flatten them 1513 + * before writing it to the memory. 1514 + * Convert: 1515 + * ABCD AEIM 1516 + * EFGH => BFJN => AEIMBFJNCGKODHLP 1517 + * IJKL CGKO 1518 + * MNOP DHLP 1519 + */ 1520 + static void transpose_and_flatten_matrices(u8 *output, const u8 *input, 1521 + int matrices, int row_length) 1522 + { 1523 + int i, j, row, x_offset, matrix_offset, rot_index, y_offset, matrix_size, new_value; 1524 + 1525 + matrix_size = row_length * row_length; 1526 + for (i = 0; i < matrices; i++) { 1527 + row = 0; 1528 + x_offset = 0; 1529 + matrix_offset = i * matrix_size; 1530 + for (j = 0; j < matrix_size; j++) { 1531 + y_offset = j - (row * row_length); 1532 + rot_index = y_offset * row_length + x_offset; 1533 + new_value = *(input + i * matrix_size + j); 1534 + output[matrix_offset + rot_index] = new_value; 1535 + if ((j + 1) % row_length == 0) { 1536 + row += 1; 1537 + x_offset += 1; 1538 + } 1539 + } 1540 + } 1541 + } 1542 + 1543 + /* 1544 + * VDPU383 needs a specific order: 1545 + * The 8x8 flatten matrix is based on 4x4 blocks. 1546 + * Each 4x4 block is written separately in order. 1547 + * 1548 + * Base data => Transposed VDPU383 transposed 1549 + * 1550 + * ABCDEFGH AIQYaiqy AIQYBJRZ 1551 + * IJKLMNOP BJRZbjrz CKS0DLT1 1552 + * QRSTUVWX CKS0cks6 aiqybjrz 1553 + * YZ012345 => DLT1dlt7 cks6dlt7 1554 + * abcdefgh EMU2emu8 EMU2FNV3 1555 + * ijklmnop FNV3fnv9 GOW4HPX5 1556 + * qrstuvwx GOW4gow# emu8fnv9 1557 + * yz6789#$ HPX5hpx$ gow#hpx$ 1558 + * 1559 + * As the function reads block of 4x4 it can be used for both 4x4 and 8x8 matrices. 1560 + * 1561 + */ 1562 + static void vdpu383_flatten_matrices(u8 *output, const u8 *input, int matrices, int row_length) 1563 + { 1564 + u8 block; 1565 + int i, j, matrix_offset, matrix_size, new_value, input_idx, line_offset, block_offset; 1566 + 1567 + matrix_size = row_length * row_length; 1568 + for (i = 0; i < matrices; i++) { 1569 + matrix_offset = i * matrix_size; 1570 + for (j = 0; j < matrix_size; j++) { 1571 + block = j / 16; 1572 + line_offset = (j % 16) / 4; 1573 + block_offset = (block & 1) * 32 + (block & 2) * 2; 1574 + input_idx = ((j % 4) * row_length) + line_offset + block_offset; 1575 + 1576 + new_value = *(input + i * matrix_size + input_idx); 1577 + 1578 + output[matrix_offset + j] = new_value; 1579 + } 1580 + } 1581 + } 1582 + 1527 1583 static void rkvdec_watchdog_func(struct work_struct *work) 1528 1584 { 1529 1585 struct rkvdec_dev *rkvdec; ··· 1657 1569 static const struct rkvdec_variant_ops rk3399_variant_ops = { 1658 1570 .irq_handler = rk3399_irq_handler, 1659 1571 .colmv_size = rkvdec_colmv_size, 1572 + .flatten_matrices = transpose_and_flatten_matrices, 1660 1573 }; 1661 1574 1662 1575 static const struct rkvdec_variant rk3288_rkvdec_variant = { ··· 1701 1612 static const struct rkvdec_variant_ops vdpu381_variant_ops = { 1702 1613 .irq_handler = vdpu381_irq_handler, 1703 1614 .colmv_size = rkvdec_colmv_size, 1615 + .flatten_matrices = transpose_and_flatten_matrices, 1704 1616 }; 1705 1617 1706 1618 static const struct rkvdec_variant vdpu381_variant = { ··· 1728 1638 static const struct rkvdec_variant_ops vdpu383_variant_ops = { 1729 1639 .irq_handler = vdpu383_irq_handler, 1730 1640 .colmv_size = vdpu383_colmv_size, 1641 + .flatten_matrices = vdpu383_flatten_matrices, 1731 1642 }; 1732 1643 1733 1644 static const struct rkvdec_variant vdpu383_variant = {
+2
drivers/media/platform/rockchip/rkvdec/rkvdec.h
··· 74 74 struct rkvdec_variant_ops { 75 75 irqreturn_t (*irq_handler)(struct rkvdec_ctx *ctx); 76 76 u32 (*colmv_size)(u16 width, u16 height); 77 + void (*flatten_matrices)(u8 *output, const u8 *input, int matrices, int row_length); 77 78 }; 78 79 79 80 struct rkvdec_variant { ··· 194 193 195 194 /* VDPU383 ops */ 196 195 extern const struct rkvdec_coded_fmt_ops rkvdec_vdpu383_h264_fmt_ops; 196 + extern const struct rkvdec_coded_fmt_ops rkvdec_vdpu383_hevc_fmt_ops; 197 197 198 198 #endif /* RKVDEC_H_ */