Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

Merge branch 'avoid-compiler-and-iq-oq-reordering'

Vimlesh Kumar says:

====================
avoid compiler and IQ/OQ reordering

Utilize READ_ONCE and WRITE_ONCE APIs to prevent compiler
optimization and reordering. Ensure IO queue OUT/IN_CNT
registers are flushed. Relocate IQ/OQ IN/OUT_CNTS updates
to occur before NAPI completion, and replace napi_complete
with napi_complete_done.
====================

Link: https://patch.msgid.link/20260227091402.1773833-1-vimleshk@marvell.com
Signed-off-by: Paolo Abeni <pabeni@redhat.com>

+105 -48
+32 -16
drivers/net/ethernet/marvell/octeon_ep/octep_main.c
··· 554 554 } 555 555 556 556 /** 557 + * octep_update_pkt() - Update IQ/OQ IN/OUT_CNT registers. 558 + * 559 + * @iq: Octeon Tx queue data structure. 560 + * @oq: Octeon Rx queue data structure. 561 + */ 562 + static void octep_update_pkt(struct octep_iq *iq, struct octep_oq *oq) 563 + { 564 + u32 pkts_pend = READ_ONCE(oq->pkts_pending); 565 + u32 last_pkt_count = READ_ONCE(oq->last_pkt_count); 566 + u32 pkts_processed = READ_ONCE(iq->pkts_processed); 567 + u32 pkt_in_done = READ_ONCE(iq->pkt_in_done); 568 + 569 + netdev_dbg(iq->netdev, "enabling intr for Q-%u\n", iq->q_no); 570 + if (pkts_processed) { 571 + writel(pkts_processed, iq->inst_cnt_reg); 572 + readl(iq->inst_cnt_reg); 573 + WRITE_ONCE(iq->pkt_in_done, (pkt_in_done - pkts_processed)); 574 + WRITE_ONCE(iq->pkts_processed, 0); 575 + } 576 + if (last_pkt_count - pkts_pend) { 577 + writel(last_pkt_count - pkts_pend, oq->pkts_sent_reg); 578 + readl(oq->pkts_sent_reg); 579 + WRITE_ONCE(oq->last_pkt_count, pkts_pend); 580 + } 581 + 582 + /* Flush the previous wrties before writing to RESEND bit */ 583 + smp_wmb(); 584 + } 585 + 586 + /** 557 587 * octep_enable_ioq_irq() - Enable MSI-x interrupt of a Tx/Rx queue. 558 588 * 559 589 * @iq: Octeon Tx queue data structure. ··· 591 561 */ 592 562 static void octep_enable_ioq_irq(struct octep_iq *iq, struct octep_oq *oq) 593 563 { 594 - u32 pkts_pend = oq->pkts_pending; 595 - 596 - netdev_dbg(iq->netdev, "enabling intr for Q-%u\n", iq->q_no); 597 - if (iq->pkts_processed) { 598 - writel(iq->pkts_processed, iq->inst_cnt_reg); 599 - iq->pkt_in_done -= iq->pkts_processed; 600 - iq->pkts_processed = 0; 601 - } 602 - if (oq->last_pkt_count - pkts_pend) { 603 - writel(oq->last_pkt_count - pkts_pend, oq->pkts_sent_reg); 604 - oq->last_pkt_count = pkts_pend; 605 - } 606 - 607 - /* Flush the previous wrties before writing to RESEND bit */ 608 - wmb(); 609 564 writeq(1UL << OCTEP_OQ_INTR_RESEND_BIT, oq->pkts_sent_reg); 610 565 writeq(1UL << OCTEP_IQ_INTR_RESEND_BIT, iq->inst_cnt_reg); 611 566 } ··· 616 601 if (tx_pending || rx_done >= budget) 617 602 return budget; 618 603 619 - napi_complete(napi); 604 + octep_update_pkt(ioq_vector->iq, ioq_vector->oq); 605 + napi_complete_done(napi, rx_done); 620 606 octep_enable_ioq_irq(ioq_vector->iq, ioq_vector->oq); 621 607 return rx_done; 622 608 }
+19 -8
drivers/net/ethernet/marvell/octeon_ep/octep_rx.c
··· 324 324 struct octep_oq *oq) 325 325 { 326 326 u32 pkt_count, new_pkts; 327 + u32 last_pkt_count, pkts_pending; 327 328 328 329 pkt_count = readl(oq->pkts_sent_reg); 329 - new_pkts = pkt_count - oq->last_pkt_count; 330 + last_pkt_count = READ_ONCE(oq->last_pkt_count); 331 + new_pkts = pkt_count - last_pkt_count; 330 332 333 + if (pkt_count < last_pkt_count) { 334 + dev_err(oq->dev, "OQ-%u pkt_count(%u) < oq->last_pkt_count(%u)\n", 335 + oq->q_no, pkt_count, last_pkt_count); 336 + } 331 337 /* Clear the hardware packets counter register if the rx queue is 332 338 * being processed continuously with-in a single interrupt and 333 339 * reached half its max value. ··· 344 338 pkt_count = readl(oq->pkts_sent_reg); 345 339 new_pkts += pkt_count; 346 340 } 347 - oq->last_pkt_count = pkt_count; 348 - oq->pkts_pending += new_pkts; 341 + WRITE_ONCE(oq->last_pkt_count, pkt_count); 342 + pkts_pending = READ_ONCE(oq->pkts_pending); 343 + WRITE_ONCE(oq->pkts_pending, (pkts_pending + new_pkts)); 349 344 return new_pkts; 350 345 } 351 346 ··· 421 414 u16 rx_ol_flags; 422 415 u32 read_idx; 423 416 424 - read_idx = oq->host_read_idx; 417 + read_idx = READ_ONCE(oq->host_read_idx); 425 418 rx_bytes = 0; 426 419 desc_used = 0; 427 420 for (pkt = 0; pkt < pkts_to_process; pkt++) { ··· 506 499 napi_gro_receive(oq->napi, skb); 507 500 } 508 501 509 - oq->host_read_idx = read_idx; 502 + WRITE_ONCE(oq->host_read_idx, read_idx); 510 503 oq->refill_count += desc_used; 511 504 oq->stats->packets += pkt; 512 505 oq->stats->bytes += rx_bytes; ··· 529 522 { 530 523 u32 pkts_available, pkts_processed, total_pkts_processed; 531 524 struct octep_device *oct = oq->octep_dev; 525 + u32 pkts_pending; 532 526 533 527 pkts_available = 0; 534 528 pkts_processed = 0; 535 529 total_pkts_processed = 0; 536 530 while (total_pkts_processed < budget) { 537 531 /* update pending count only when current one exhausted */ 538 - if (oq->pkts_pending == 0) 532 + pkts_pending = READ_ONCE(oq->pkts_pending); 533 + if (pkts_pending == 0) 539 534 octep_oq_check_hw_for_pkts(oct, oq); 535 + pkts_pending = READ_ONCE(oq->pkts_pending); 540 536 pkts_available = min(budget - total_pkts_processed, 541 - oq->pkts_pending); 537 + pkts_pending); 542 538 if (!pkts_available) 543 539 break; 544 540 545 541 pkts_processed = __octep_oq_process_rx(oct, oq, 546 542 pkts_available); 547 - oq->pkts_pending -= pkts_processed; 543 + pkts_pending = READ_ONCE(oq->pkts_pending); 544 + WRITE_ONCE(oq->pkts_pending, (pkts_pending - pkts_processed)); 548 545 total_pkts_processed += pkts_processed; 549 546 } 550 547
+34 -16
drivers/net/ethernet/marvell/octeon_ep_vf/octep_vf_main.c
··· 286 286 } 287 287 288 288 /** 289 + * octep_vf_update_pkt() - Update IQ/OQ IN/OUT_CNT registers. 290 + * 291 + * @iq: Octeon Tx queue data structure. 292 + * @oq: Octeon Rx queue data structure. 293 + */ 294 + 295 + static void octep_vf_update_pkt(struct octep_vf_iq *iq, struct octep_vf_oq *oq) 296 + { 297 + u32 pkts_pend = READ_ONCE(oq->pkts_pending); 298 + u32 last_pkt_count = READ_ONCE(oq->last_pkt_count); 299 + u32 pkts_processed = READ_ONCE(iq->pkts_processed); 300 + u32 pkt_in_done = READ_ONCE(iq->pkt_in_done); 301 + 302 + netdev_dbg(iq->netdev, "enabling intr for Q-%u\n", iq->q_no); 303 + if (pkts_processed) { 304 + writel(pkts_processed, iq->inst_cnt_reg); 305 + readl(iq->inst_cnt_reg); 306 + WRITE_ONCE(iq->pkt_in_done, (pkt_in_done - pkts_processed)); 307 + WRITE_ONCE(iq->pkts_processed, 0); 308 + } 309 + if (last_pkt_count - pkts_pend) { 310 + writel(last_pkt_count - pkts_pend, oq->pkts_sent_reg); 311 + readl(oq->pkts_sent_reg); 312 + WRITE_ONCE(oq->last_pkt_count, pkts_pend); 313 + } 314 + 315 + /* Flush the previous wrties before writing to RESEND bit */ 316 + smp_wmb(); 317 + } 318 + 319 + /** 289 320 * octep_vf_enable_ioq_irq() - Enable MSI-x interrupt of a Tx/Rx queue. 290 321 * 291 322 * @iq: Octeon Tx queue data structure. 292 323 * @oq: Octeon Rx queue data structure. 293 324 */ 294 - static void octep_vf_enable_ioq_irq(struct octep_vf_iq *iq, struct octep_vf_oq *oq) 325 + static void octep_vf_enable_ioq_irq(struct octep_vf_iq *iq, 326 + struct octep_vf_oq *oq) 295 327 { 296 - u32 pkts_pend = oq->pkts_pending; 297 - 298 - netdev_dbg(iq->netdev, "enabling intr for Q-%u\n", iq->q_no); 299 - if (iq->pkts_processed) { 300 - writel(iq->pkts_processed, iq->inst_cnt_reg); 301 - iq->pkt_in_done -= iq->pkts_processed; 302 - iq->pkts_processed = 0; 303 - } 304 - if (oq->last_pkt_count - pkts_pend) { 305 - writel(oq->last_pkt_count - pkts_pend, oq->pkts_sent_reg); 306 - oq->last_pkt_count = pkts_pend; 307 - } 308 - 309 - /* Flush the previous wrties before writing to RESEND bit */ 310 - smp_wmb(); 311 328 writeq(1UL << OCTEP_VF_OQ_INTR_RESEND_BIT, oq->pkts_sent_reg); 312 329 writeq(1UL << OCTEP_VF_IQ_INTR_RESEND_BIT, iq->inst_cnt_reg); 313 330 } ··· 350 333 if (tx_pending || rx_done >= budget) 351 334 return budget; 352 335 336 + octep_vf_update_pkt(ioq_vector->iq, ioq_vector->oq); 353 337 if (likely(napi_complete_done(napi, rx_done))) 354 338 octep_vf_enable_ioq_irq(ioq_vector->iq, ioq_vector->oq); 355 339
+20 -8
drivers/net/ethernet/marvell/octeon_ep_vf/octep_vf_rx.c
··· 325 325 struct octep_vf_oq *oq) 326 326 { 327 327 u32 pkt_count, new_pkts; 328 + u32 last_pkt_count, pkts_pending; 328 329 329 330 pkt_count = readl(oq->pkts_sent_reg); 330 - new_pkts = pkt_count - oq->last_pkt_count; 331 + last_pkt_count = READ_ONCE(oq->last_pkt_count); 332 + new_pkts = pkt_count - last_pkt_count; 333 + 334 + if (pkt_count < last_pkt_count) { 335 + dev_err(oq->dev, "OQ-%u pkt_count(%u) < oq->last_pkt_count(%u)\n", 336 + oq->q_no, pkt_count, last_pkt_count); 337 + } 331 338 332 339 /* Clear the hardware packets counter register if the rx queue is 333 340 * being processed continuously with-in a single interrupt and ··· 346 339 pkt_count = readl(oq->pkts_sent_reg); 347 340 new_pkts += pkt_count; 348 341 } 349 - oq->last_pkt_count = pkt_count; 350 - oq->pkts_pending += new_pkts; 342 + WRITE_ONCE(oq->last_pkt_count, pkt_count); 343 + pkts_pending = READ_ONCE(oq->pkts_pending); 344 + WRITE_ONCE(oq->pkts_pending, (pkts_pending + new_pkts)); 351 345 return new_pkts; 352 346 } 353 347 ··· 377 369 struct sk_buff *skb; 378 370 u32 read_idx; 379 371 380 - read_idx = oq->host_read_idx; 372 + read_idx = READ_ONCE(oq->host_read_idx); 381 373 rx_bytes = 0; 382 374 desc_used = 0; 383 375 for (pkt = 0; pkt < pkts_to_process; pkt++) { ··· 471 463 napi_gro_receive(oq->napi, skb); 472 464 } 473 465 474 - oq->host_read_idx = read_idx; 466 + WRITE_ONCE(oq->host_read_idx, read_idx); 475 467 oq->refill_count += desc_used; 476 468 oq->stats->packets += pkt; 477 469 oq->stats->bytes += rx_bytes; ··· 494 486 { 495 487 u32 pkts_available, pkts_processed, total_pkts_processed; 496 488 struct octep_vf_device *oct = oq->octep_vf_dev; 489 + u32 pkts_pending; 497 490 498 491 pkts_available = 0; 499 492 pkts_processed = 0; 500 493 total_pkts_processed = 0; 501 494 while (total_pkts_processed < budget) { 502 495 /* update pending count only when current one exhausted */ 503 - if (oq->pkts_pending == 0) 496 + pkts_pending = READ_ONCE(oq->pkts_pending); 497 + if (pkts_pending == 0) 504 498 octep_vf_oq_check_hw_for_pkts(oct, oq); 499 + pkts_pending = READ_ONCE(oq->pkts_pending); 505 500 pkts_available = min(budget - total_pkts_processed, 506 - oq->pkts_pending); 501 + pkts_pending); 507 502 if (!pkts_available) 508 503 break; 509 504 510 505 pkts_processed = __octep_vf_oq_process_rx(oct, oq, 511 506 pkts_available); 512 - oq->pkts_pending -= pkts_processed; 507 + pkts_pending = READ_ONCE(oq->pkts_pending); 508 + WRITE_ONCE(oq->pkts_pending, (pkts_pending - pkts_processed)); 513 509 total_pkts_processed += pkts_processed; 514 510 } 515 511