Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

media: rkvdec: reduce excessive stack usage in assemble_hw_pps()

The rkvdec_pps had a large set of bitfields, all of which
as misaligned. This causes clang-21 and likely other versions to
produce absolutely awful object code and a warning about very
large stack usage, on targets without unaligned access:

drivers/media/platform/rockchip/rkvdec/rkvdec-vp9.c:966:12: error: stack frame size (1472) exceeds limit (1280) in 'rkvdec_vp9_start' [-Werror,-Wframe-larger-than]

Part of the problem here is how all the bitfield accesses are
inlined into a function that already has large structures on
the stack.

Mark set_field_order_cnt() as noinline_for_stack, and split out
the following accesses in assemble_hw_pps() into another noinline
function, both of which now using around 800 bytes of stack in the
same configuration.

There is clearly still something wrong with clang here, but
splitting it into multiple functions reduces the risk of stack
overflow.

Fixes: fde24907570d ("media: rkvdec: Add H264 support for the VDPU383 variant")
Link: https://godbolt.org/z/acP1eKeq9
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Reviewed-by: Nicolas Dufresne <nicolas.dufresne@collabora.com>
Signed-off-by: Nicolas Dufresne <nicolas.dufresne@collabora.com>
Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>

authored by

Arnd Bergmann and committed by
Mauro Carvalho Chehab
446c6a25 daa87ca4

+27 -23
+27 -23
drivers/media/platform/rockchip/rkvdec/rkvdec-vdpu383-h264.c
··· 130 130 struct vdpu383_regs_h26x regs; 131 131 }; 132 132 133 - static void set_field_order_cnt(struct rkvdec_pps *pps, const struct v4l2_h264_dpb_entry *dpb) 133 + static noinline_for_stack void set_field_order_cnt(struct rkvdec_pps *pps, const struct v4l2_h264_dpb_entry *dpb) 134 134 { 135 135 pps->top_field_order_cnt0 = dpb[0].top_field_order_cnt; 136 136 pps->bot_field_order_cnt0 = dpb[0].bottom_field_order_cnt; ··· 166 166 pps->bot_field_order_cnt15 = dpb[15].bottom_field_order_cnt; 167 167 } 168 168 169 + static noinline_for_stack void set_dec_params(struct rkvdec_pps *pps, const struct v4l2_ctrl_h264_decode_params *dec_params) 170 + { 171 + const struct v4l2_h264_dpb_entry *dpb = dec_params->dpb; 172 + 173 + for (int i = 0; i < ARRAY_SIZE(dec_params->dpb); i++) { 174 + if (dpb[i].flags & V4L2_H264_DPB_ENTRY_FLAG_LONG_TERM) 175 + pps->is_longterm |= (1 << i); 176 + pps->ref_field_flags |= 177 + (!!(dpb[i].flags & V4L2_H264_DPB_ENTRY_FLAG_FIELD)) << i; 178 + pps->ref_colmv_use_flag |= 179 + (!!(dpb[i].flags & V4L2_H264_DPB_ENTRY_FLAG_ACTIVE)) << i; 180 + pps->ref_topfield_used |= 181 + (!!(dpb[i].fields & V4L2_H264_TOP_FIELD_REF)) << i; 182 + pps->ref_botfield_used |= 183 + (!!(dpb[i].fields & V4L2_H264_BOTTOM_FIELD_REF)) << i; 184 + } 185 + pps->pic_field_flag = 186 + !!(dec_params->flags & V4L2_H264_DECODE_PARAM_FLAG_FIELD_PIC); 187 + pps->pic_associated_flag = 188 + !!(dec_params->flags & V4L2_H264_DECODE_PARAM_FLAG_BOTTOM_FIELD); 189 + 190 + pps->cur_top_field = dec_params->top_field_order_cnt; 191 + pps->cur_bot_field = dec_params->bottom_field_order_cnt; 192 + } 193 + 169 194 static void assemble_hw_pps(struct rkvdec_ctx *ctx, 170 195 struct rkvdec_h264_run *run) 171 196 { ··· 202 177 struct rkvdec_h264_priv_tbl *priv_tbl = h264_ctx->priv_tbl.cpu; 203 178 struct rkvdec_sps_pps *hw_ps; 204 179 u32 pic_width, pic_height; 205 - u32 i; 206 180 207 181 /* 208 182 * HW read the SPS/PPS information from PPS packet index by PPS id. ··· 285 261 !!(pps->flags & V4L2_H264_PPS_FLAG_SCALING_MATRIX_PRESENT); 286 262 287 263 set_field_order_cnt(&hw_ps->pps, dpb); 264 + set_dec_params(&hw_ps->pps, dec_params); 288 265 289 - for (i = 0; i < ARRAY_SIZE(dec_params->dpb); i++) { 290 - if (dpb[i].flags & V4L2_H264_DPB_ENTRY_FLAG_LONG_TERM) 291 - hw_ps->pps.is_longterm |= (1 << i); 292 - 293 - hw_ps->pps.ref_field_flags |= 294 - (!!(dpb[i].flags & V4L2_H264_DPB_ENTRY_FLAG_FIELD)) << i; 295 - hw_ps->pps.ref_colmv_use_flag |= 296 - (!!(dpb[i].flags & V4L2_H264_DPB_ENTRY_FLAG_ACTIVE)) << i; 297 - hw_ps->pps.ref_topfield_used |= 298 - (!!(dpb[i].fields & V4L2_H264_TOP_FIELD_REF)) << i; 299 - hw_ps->pps.ref_botfield_used |= 300 - (!!(dpb[i].fields & V4L2_H264_BOTTOM_FIELD_REF)) << i; 301 - } 302 - 303 - hw_ps->pps.pic_field_flag = 304 - !!(dec_params->flags & V4L2_H264_DECODE_PARAM_FLAG_FIELD_PIC); 305 - hw_ps->pps.pic_associated_flag = 306 - !!(dec_params->flags & V4L2_H264_DECODE_PARAM_FLAG_BOTTOM_FIELD); 307 - 308 - hw_ps->pps.cur_top_field = dec_params->top_field_order_cnt; 309 - hw_ps->pps.cur_bot_field = dec_params->bottom_field_order_cnt; 310 266 } 311 267 312 268 static void rkvdec_write_regs(struct rkvdec_ctx *ctx)