Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

codel: annotate data-races in codel_dump_stats()

codel_dump_stats() only runs with RTNL held,
reading fields that can be changed in qdisc fast path.

Add READ_ONCE()/WRITE_ONCE() annotations.

Alternative would be to acquire the qdisc spinlock, but our long-term
goal is to make qdisc dump operations lockless as much as we can.

tc_codel_xstats fields don't need to be latched atomically,
otherwise this bug would have been caught earlier.

No change in kernel size:

$ scripts/bloat-o-meter -t vmlinux.0 vmlinux
add/remove: 0/0 grow/shrink: 1/1 up/down: 3/-1 (2)
Function old new delta
codel_qdisc_dequeue 2462 2465 +3
codel_dump_stats 250 249 -1
Total: Before=29739919, After=29739921, chg +0.00%

Fixes: 76e3cc126bb2 ("codel: Controlled Delay AQM")
Signed-off-by: Eric Dumazet <edumazet@google.com>
Link: https://patch.msgid.link/20260407143053.1570620-1-edumazet@google.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>

authored by

Eric Dumazet and committed by
Jakub Kicinski
ea25e03d dbc2bb4e

+35 -32
+24 -21
include/net/codel_impl.h
··· 120 120 } 121 121 122 122 skb_len = skb_len_func(skb); 123 - vars->ldelay = now - skb_time_func(skb); 123 + WRITE_ONCE(vars->ldelay, now - skb_time_func(skb)); 124 124 125 125 if (unlikely(skb_len > stats->maxpacket)) 126 - stats->maxpacket = skb_len; 126 + WRITE_ONCE(stats->maxpacket, skb_len); 127 127 128 128 if (codel_time_before(vars->ldelay, params->target) || 129 129 *backlog <= params->mtu) { ··· 159 159 160 160 if (!skb) { 161 161 vars->first_above_time = 0; 162 - vars->dropping = false; 162 + WRITE_ONCE(vars->dropping, false); 163 163 return skb; 164 164 } 165 165 now = codel_get_time(); ··· 168 168 if (vars->dropping) { 169 169 if (!drop) { 170 170 /* sojourn time below target - leave dropping state */ 171 - vars->dropping = false; 171 + WRITE_ONCE(vars->dropping, false); 172 172 } else if (codel_time_after_eq(now, vars->drop_next)) { 173 173 /* It's time for the next drop. Drop the current 174 174 * packet and dequeue the next. The dequeue might ··· 180 180 */ 181 181 while (vars->dropping && 182 182 codel_time_after_eq(now, vars->drop_next)) { 183 - vars->count++; /* dont care of possible wrap 184 - * since there is no more divide 185 - */ 183 + /* dont care of possible wrap 184 + * since there is no more divide. 185 + */ 186 + WRITE_ONCE(vars->count, vars->count + 1); 186 187 codel_Newton_step(vars); 187 188 if (params->ecn && INET_ECN_set_ce(skb)) { 188 - stats->ecn_mark++; 189 - vars->drop_next = 189 + WRITE_ONCE(stats->ecn_mark, 190 + stats->ecn_mark + 1); 191 + WRITE_ONCE(vars->drop_next, 190 192 codel_control_law(vars->drop_next, 191 193 params->interval, 192 - vars->rec_inv_sqrt); 194 + vars->rec_inv_sqrt)); 193 195 goto end; 194 196 } 195 197 stats->drop_len += skb_len_func(skb); ··· 204 202 skb_time_func, 205 203 backlog, now)) { 206 204 /* leave dropping state */ 207 - vars->dropping = false; 205 + WRITE_ONCE(vars->dropping, false); 208 206 } else { 209 207 /* and schedule the next drop */ 210 - vars->drop_next = 208 + WRITE_ONCE(vars->drop_next, 211 209 codel_control_law(vars->drop_next, 212 210 params->interval, 213 - vars->rec_inv_sqrt); 211 + vars->rec_inv_sqrt)); 214 212 } 215 213 } 216 214 } ··· 218 216 u32 delta; 219 217 220 218 if (params->ecn && INET_ECN_set_ce(skb)) { 221 - stats->ecn_mark++; 219 + WRITE_ONCE(stats->ecn_mark, stats->ecn_mark + 1); 222 220 } else { 223 221 stats->drop_len += skb_len_func(skb); 224 222 drop_func(skb, ctx); ··· 229 227 stats, skb_len_func, 230 228 skb_time_func, backlog, now); 231 229 } 232 - vars->dropping = true; 230 + WRITE_ONCE(vars->dropping, true); 233 231 /* if min went above target close to when we last went below it 234 232 * assume that the drop rate that controlled the queue on the 235 233 * last cycle is a good starting point to control it now. ··· 238 236 if (delta > 1 && 239 237 codel_time_before(now - vars->drop_next, 240 238 16 * params->interval)) { 241 - vars->count = delta; 239 + WRITE_ONCE(vars->count, delta); 242 240 /* we dont care if rec_inv_sqrt approximation 243 241 * is not very precise : 244 242 * Next Newton steps will correct it quadratically. 245 243 */ 246 244 codel_Newton_step(vars); 247 245 } else { 248 - vars->count = 1; 246 + WRITE_ONCE(vars->count, 1); 249 247 vars->rec_inv_sqrt = ~0U >> REC_INV_SQRT_SHIFT; 250 248 } 251 - vars->lastcount = vars->count; 252 - vars->drop_next = codel_control_law(now, params->interval, 253 - vars->rec_inv_sqrt); 249 + WRITE_ONCE(vars->lastcount, vars->count); 250 + WRITE_ONCE(vars->drop_next, 251 + codel_control_law(now, params->interval, 252 + vars->rec_inv_sqrt)); 254 253 } 255 254 end: 256 255 if (skb && codel_time_after(vars->ldelay, params->ce_threshold)) { ··· 265 262 params->ce_threshold_selector)); 266 263 } 267 264 if (set_ce && INET_ECN_set_ce(skb)) 268 - stats->ce_mark++; 265 + WRITE_ONCE(stats->ce_mark, stats->ce_mark + 1); 269 266 } 270 267 return skb; 271 268 }
+11 -11
net/sched/sch_codel.c
··· 85 85 return qdisc_enqueue_tail(skb, sch); 86 86 } 87 87 q = qdisc_priv(sch); 88 - q->drop_overlimit++; 88 + WRITE_ONCE(q->drop_overlimit, q->drop_overlimit + 1); 89 89 return qdisc_drop_reason(skb, sch, to_free, QDISC_DROP_OVERLIMIT); 90 90 } 91 91 ··· 221 221 { 222 222 const struct codel_sched_data *q = qdisc_priv(sch); 223 223 struct tc_codel_xstats st = { 224 - .maxpacket = q->stats.maxpacket, 225 - .count = q->vars.count, 226 - .lastcount = q->vars.lastcount, 227 - .drop_overlimit = q->drop_overlimit, 228 - .ldelay = codel_time_to_us(q->vars.ldelay), 229 - .dropping = q->vars.dropping, 230 - .ecn_mark = q->stats.ecn_mark, 231 - .ce_mark = q->stats.ce_mark, 224 + .maxpacket = READ_ONCE(q->stats.maxpacket), 225 + .count = READ_ONCE(q->vars.count), 226 + .lastcount = READ_ONCE(q->vars.lastcount), 227 + .drop_overlimit = READ_ONCE(q->drop_overlimit), 228 + .ldelay = codel_time_to_us(READ_ONCE(q->vars.ldelay)), 229 + .dropping = READ_ONCE(q->vars.dropping), 230 + .ecn_mark = READ_ONCE(q->stats.ecn_mark), 231 + .ce_mark = READ_ONCE(q->stats.ce_mark), 232 232 }; 233 233 234 - if (q->vars.dropping) { 235 - codel_tdiff_t delta = q->vars.drop_next - codel_get_time(); 234 + if (st.dropping) { 235 + codel_tdiff_t delta = READ_ONCE(q->vars.drop_next) - codel_get_time(); 236 236 237 237 if (delta >= 0) 238 238 st.drop_next = codel_time_to_us(delta);