Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

net/sched: sch_pie: annotate data-races in pie_dump_stats()

pie_dump_stats() only runs with RTNL held,
reading fields that can be changed in qdisc fast path.

Add READ_ONCE()/WRITE_ONCE() annotations.

Alternative would be to acquire the qdisc spinlock, but our long-term
goal is to make qdisc dump operations lockless as much as we can.

tc_pie_xstats fields don't need to be latched atomically,
otherwise this bug would have been caught earlier.

Fixes: edb09eb17ed8 ("net: sched: do not acquire qdisc spinlock in qdisc/class stats dump")
Signed-off-by: Eric Dumazet <edumazet@google.com>
Reviewed-by: Jamal Hadi Salim <jhs@mojatatu.com>
Link: https://patch.msgid.link/20260421142944.4009941-1-edumazet@google.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>

authored by

Eric Dumazet and committed by
Jakub Kicinski
5154561d a6edf2cd

+20 -20
+1 -1
include/net/pie.h
··· 104 104 vars->dq_tstamp = DTIME_INVALID; 105 105 vars->accu_prob = 0; 106 106 vars->dq_count = DQCOUNT_INVALID; 107 - vars->avg_dq_rate = 0; 107 + WRITE_ONCE(vars->avg_dq_rate, 0); 108 108 } 109 109 110 110 static inline struct pie_skb_cb *get_pie_cb(const struct sk_buff *skb)
+19 -19
net/sched/sch_pie.c
··· 90 90 bool enqueue = false; 91 91 92 92 if (unlikely(qdisc_qlen(sch) >= sch->limit)) { 93 - q->stats.overlimit++; 93 + WRITE_ONCE(q->stats.overlimit, q->stats.overlimit + 1); 94 94 goto out; 95 95 } 96 96 ··· 104 104 /* If packet is ecn capable, mark it if drop probability 105 105 * is lower than 10%, else drop it. 106 106 */ 107 - q->stats.ecn_mark++; 107 + WRITE_ONCE(q->stats.ecn_mark, q->stats.ecn_mark + 1); 108 108 enqueue = true; 109 109 } 110 110 ··· 114 114 if (!q->params.dq_rate_estimator) 115 115 pie_set_enqueue_time(skb); 116 116 117 - q->stats.packets_in++; 117 + WRITE_ONCE(q->stats.packets_in, q->stats.packets_in + 1); 118 118 if (qdisc_qlen(sch) > q->stats.maxq) 119 - q->stats.maxq = qdisc_qlen(sch); 119 + WRITE_ONCE(q->stats.maxq, qdisc_qlen(sch)); 120 120 121 121 return qdisc_enqueue_tail(skb, sch); 122 122 } 123 123 124 124 out: 125 - q->stats.dropped++; 125 + WRITE_ONCE(q->stats.dropped, q->stats.dropped + 1); 126 126 q->vars.accu_prob = 0; 127 127 return qdisc_drop_reason(skb, sch, to_free, reason); 128 128 } ··· 267 267 count = count / dtime; 268 268 269 269 if (vars->avg_dq_rate == 0) 270 - vars->avg_dq_rate = count; 270 + WRITE_ONCE(vars->avg_dq_rate, count); 271 271 else 272 - vars->avg_dq_rate = 272 + WRITE_ONCE(vars->avg_dq_rate, 273 273 (vars->avg_dq_rate - 274 - (vars->avg_dq_rate >> 3)) + (count >> 3); 274 + (vars->avg_dq_rate >> 3)) + (count >> 3)); 275 275 276 276 /* If the queue has receded below the threshold, we hold 277 277 * on to the last drain rate calculated, else we reset ··· 381 381 if (delta > 0) { 382 382 /* prevent overflow */ 383 383 if (vars->prob < oldprob) { 384 - vars->prob = MAX_PROB; 384 + WRITE_ONCE(vars->prob, MAX_PROB); 385 385 /* Prevent normalization error. If probability is at 386 386 * maximum value already, we normalize it here, and 387 387 * skip the check to do a non-linear drop in the next ··· 392 392 } else { 393 393 /* prevent underflow */ 394 394 if (vars->prob > oldprob) 395 - vars->prob = 0; 395 + WRITE_ONCE(vars->prob, 0); 396 396 } 397 397 398 398 /* Non-linear drop in probability: Reduce drop probability quickly if ··· 403 403 /* Reduce drop probability to 98.4% */ 404 404 vars->prob -= vars->prob / 64; 405 405 406 - vars->qdelay = qdelay; 406 + WRITE_ONCE(vars->qdelay, qdelay); 407 407 vars->backlog_old = backlog; 408 408 409 409 /* We restart the measurement cycle if the following conditions are met ··· 502 502 struct pie_sched_data *q = qdisc_priv(sch); 503 503 struct tc_pie_xstats st = { 504 504 .prob = q->vars.prob << BITS_PER_BYTE, 505 - .delay = ((u32)PSCHED_TICKS2NS(q->vars.qdelay)) / 505 + .delay = ((u32)PSCHED_TICKS2NS(READ_ONCE(q->vars.qdelay))) / 506 506 NSEC_PER_USEC, 507 - .packets_in = q->stats.packets_in, 508 - .overlimit = q->stats.overlimit, 509 - .maxq = q->stats.maxq, 510 - .dropped = q->stats.dropped, 511 - .ecn_mark = q->stats.ecn_mark, 507 + .packets_in = READ_ONCE(q->stats.packets_in), 508 + .overlimit = READ_ONCE(q->stats.overlimit), 509 + .maxq = READ_ONCE(q->stats.maxq), 510 + .dropped = READ_ONCE(q->stats.dropped), 511 + .ecn_mark = READ_ONCE(q->stats.ecn_mark), 512 512 }; 513 513 514 514 /* avg_dq_rate is only valid if dq_rate_estimator is enabled */ 515 515 st.dq_rate_estimating = q->params.dq_rate_estimator; 516 516 517 517 /* unscale and return dq_rate in bytes per sec */ 518 - if (q->params.dq_rate_estimator) 519 - st.avg_dq_rate = q->vars.avg_dq_rate * 518 + if (st.dq_rate_estimating) 519 + st.avg_dq_rate = READ_ONCE(q->vars.avg_dq_rate) * 520 520 (PSCHED_TICKS_PER_SEC) >> PIE_SCALE; 521 521 522 522 return gnet_stats_copy_app(d, &st, sizeof(st));