Merge branch 'eliminate-config_nr_cpus-dependency-in-dpaa-eth-and-enable-compile_test-in-fsl_qbman'

Vladimir Oltean says:

====================
Eliminate CONFIG_NR_CPUS dependency in dpaa-eth and enable COMPILE_TEST in fsl_qbman

Breno's previous attempt at enabling COMPILE_TEST for the fsl_qbman
driver (now included here as patch 5/5) triggered compilation warnings
for large CONFIG_NR_CPUS values:
https://lore.kernel.org/all/202406261920.l5pzM1rj-lkp@intel.com/

Patch 1/5 switches two NR_CPUS arrays in the dpaa-eth driver to dynamic
allocation to avoid that warning. There is more NR_CPUS usage in the
fsl-qbman driver, but that looks relatively harmless and I couldn't find
a good reason to change it.

I noticed, while testing, that the driver doesn't actually work properly
with high CONFIG_NR_CPUS values, and patch 2/5 addresses that.

During code analysis, I have identified two places which treat
conditions that can never happen. Patches 3/5 and 4/5 simplify the
probing code - dpaa_fq_setup() - just a little bit.

Finally we have at 5/5 the patch that triggered all of this. There is
an okay from Herbert to take it via netdev, despite it being on soc/qbman:
https://lore.kernel.org/all/Zns%2FeVVBc7pdv0yM@gondor.apana.org.au/

Link to v1:
https://lore.kernel.org/netdev/20240710230025.46487-1-vladimir.oltean@nxp.com/
====================

Link: https://patch.msgid.link/20240713225336.1746343-1-vladimir.oltean@nxp.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>

Jakub Kicinski 2 years ago e6c29506 62fdd170

+69 -39

4 changed files

expand all

drivers

net

ethernet

freescale

dpaa

dpaa_eth.c

dpaa_eth.h

dpaa_ethtool.c

soc

fsl

qbman

Kconfig

+45 -31

drivers/net/ethernet/freescale/dpaa/dpaa_eth.c

··· 371 371 void *type_data) 372 372 { 373 373 struct dpaa_priv *priv = netdev_priv(net_dev); 374 + int num_txqs_per_tc = dpaa_num_txqs_per_tc(); 374 375 struct tc_mqprio_qopt *mqprio = type_data; 375 376 u8 num_tc; 376 377 int i; ··· 399 398 netdev_set_num_tc(net_dev, num_tc); 400 399 401 400 for (i = 0; i < num_tc; i++) 402 - netdev_set_tc_queue(net_dev, i, DPAA_TC_TXQ_NUM, 403 - i * DPAA_TC_TXQ_NUM); 401 + netdev_set_tc_queue(net_dev, i, num_txqs_per_tc, 402 + i * num_txqs_per_tc); 404 403 405 404 out: 406 405 priv->num_tc = num_tc ? : 1; 407 - netif_set_real_num_tx_queues(net_dev, priv->num_tc * DPAA_TC_TXQ_NUM); 406 + netif_set_real_num_tx_queues(net_dev, priv->num_tc * num_txqs_per_tc); 408 407 return 0; 409 408 } 410 409 ··· 650 649 fq->wq = 6; 651 650 break; 652 651 case FQ_TYPE_TX: 653 - switch (idx / DPAA_TC_TXQ_NUM) { 652 + switch (idx / dpaa_num_txqs_per_tc()) { 654 653 case 0: 655 654 /* Low priority (best effort) */ 656 655 fq->wq = 6; ··· 668 667 fq->wq = 0; 669 668 break; 670 669 default: 671 - WARN(1, "Too many TX FQs: more than %d!\n", 672 - DPAA_ETH_TXQ_NUM); 670 + WARN(1, "Too many TX FQs: more than %zu!\n", 671 + dpaa_max_num_txqs()); 673 672 } 674 673 break; 675 674 default: ··· 741 740 742 741 port_fqs->rx_pcdq = &dpaa_fq[0]; 743 742 744 - if (!dpaa_fq_alloc(dev, 0, DPAA_ETH_TXQ_NUM, list, FQ_TYPE_TX_CONF_MQ)) 743 + if (!dpaa_fq_alloc(dev, 0, dpaa_max_num_txqs(), list, 744 + FQ_TYPE_TX_CONF_MQ)) 745 745 goto fq_alloc_failed; 746 746 747 747 dpaa_fq = dpaa_fq_alloc(dev, 0, 1, list, FQ_TYPE_TX_ERROR); ··· 757 755 758 756 port_fqs->tx_defq = &dpaa_fq[0]; 759 757 760 - if (!dpaa_fq_alloc(dev, 0, DPAA_ETH_TXQ_NUM, list, FQ_TYPE_TX)) 758 + if (!dpaa_fq_alloc(dev, 0, dpaa_max_num_txqs(), list, FQ_TYPE_TX)) 761 759 goto fq_alloc_failed; 762 760 763 761 return 0; ··· 933 931 } 934 932 } 935 933 936 - static void dpaa_fq_setup(struct dpaa_priv *priv, 937 - const struct dpaa_fq_cbs *fq_cbs, 938 - struct fman_port *tx_port) 934 + static int dpaa_fq_setup(struct dpaa_priv *priv, 935 + const struct dpaa_fq_cbs *fq_cbs, 936 + struct fman_port *tx_port) 939 937 { 940 938 int egress_cnt = 0, conf_cnt = 0, num_portals = 0, portal_cnt = 0, cpu; 941 939 const cpumask_t *affine_cpus = qman_affine_cpus(); 942 - u16 channels[NR_CPUS]; 943 940 struct dpaa_fq *fq; 941 + u16 *channels; 942 + 943 + channels = kcalloc(num_possible_cpus(), sizeof(u16), GFP_KERNEL); 944 + if (!channels) 945 + return -ENOMEM; 944 946 945 947 for_each_cpu_and(cpu, affine_cpus, cpu_online_mask) 946 948 channels[num_portals++] = qman_affine_channel(cpu); ··· 971 965 case FQ_TYPE_TX: 972 966 dpaa_setup_egress(priv, fq, tx_port, 973 967 &fq_cbs->egress_ern); 974 - /* If we have more Tx queues than the number of cores, 975 - * just ignore the extra ones. 976 - */ 977 - if (egress_cnt < DPAA_ETH_TXQ_NUM) 978 - priv->egress_fqs[egress_cnt++] = &fq->fq_base; 968 + priv->egress_fqs[egress_cnt++] = &fq->fq_base; 979 969 break; 980 970 case FQ_TYPE_TX_CONF_MQ: 981 971 priv->conf_fqs[conf_cnt++] = &fq->fq_base; ··· 989 987 } 990 988 } 991 989 992 - /* Make sure all CPUs receive a corresponding Tx queue. */ 993 - while (egress_cnt < DPAA_ETH_TXQ_NUM) { 994 - list_for_each_entry(fq, &priv->dpaa_fq_list, list) { 995 - if (fq->fq_type != FQ_TYPE_TX) 996 - continue; 997 - priv->egress_fqs[egress_cnt++] = &fq->fq_base; 998 - if (egress_cnt == DPAA_ETH_TXQ_NUM) 999 - break; 1000 - } 1001 - } 990 + kfree(channels); 991 + 992 + return 0; 1002 993 } 1003 994 1004 995 static inline int dpaa_tx_fq_to_id(const struct dpaa_priv *priv, ··· 999 1004 { 1000 1005 int i; 1001 1006 1002 - for (i = 0; i < DPAA_ETH_TXQ_NUM; i++) 1007 + for (i = 0; i < dpaa_max_num_txqs(); i++) 1003 1008 if (priv->egress_fqs[i] == tx_fq) 1004 1009 return i; 1005 1010 ··· 3319 3324 /* Allocate this early, so we can store relevant information in 3320 3325 * the private area 3321 3326 */ 3322 - net_dev = alloc_etherdev_mq(sizeof(*priv), DPAA_ETH_TXQ_NUM); 3327 + net_dev = alloc_etherdev_mq(sizeof(*priv), dpaa_max_num_txqs()); 3323 3328 if (!net_dev) { 3324 3329 dev_err(dev, "alloc_etherdev_mq() failed\n"); 3325 3330 return -ENOMEM; ··· 3333 3338 priv->net_dev = net_dev; 3334 3339 3335 3340 priv->msg_enable = netif_msg_init(debug, DPAA_MSG_DEFAULT); 3341 + 3342 + priv->egress_fqs = devm_kcalloc(dev, dpaa_max_num_txqs(), 3343 + sizeof(*priv->egress_fqs), 3344 + GFP_KERNEL); 3345 + if (!priv->egress_fqs) { 3346 + err = -ENOMEM; 3347 + goto free_netdev; 3348 + } 3349 + 3350 + priv->conf_fqs = devm_kcalloc(dev, dpaa_max_num_txqs(), 3351 + sizeof(*priv->conf_fqs), 3352 + GFP_KERNEL); 3353 + if (!priv->conf_fqs) { 3354 + err = -ENOMEM; 3355 + goto free_netdev; 3356 + } 3336 3357 3337 3358 mac_dev = dpaa_mac_dev_get(pdev); 3338 3359 if (IS_ERR(mac_dev)) { ··· 3427 3416 */ 3428 3417 dpaa_eth_add_channel(priv->channel, &pdev->dev); 3429 3418 3430 - dpaa_fq_setup(priv, &dpaa_fq_cbs, priv->mac_dev->port[TX]); 3419 + err = dpaa_fq_setup(priv, &dpaa_fq_cbs, priv->mac_dev->port[TX]); 3420 + if (err) 3421 + goto free_dpaa_bps; 3431 3422 3432 3423 /* Create a congestion group for this netdev, with 3433 3424 * dynamically-allocated CGR ID. ··· 3475 3462 } 3476 3463 3477 3464 priv->num_tc = 1; 3478 - netif_set_real_num_tx_queues(net_dev, priv->num_tc * DPAA_TC_TXQ_NUM); 3465 + netif_set_real_num_tx_queues(net_dev, 3466 + priv->num_tc * dpaa_num_txqs_per_tc()); 3479 3467 3480 3468 /* Initialize NAPI */ 3481 3469 err = dpaa_napi_add(net_dev);

+14 -6

drivers/net/ethernet/freescale/dpaa/dpaa_eth.h

··· 18 18 19 19 /* Number of prioritised traffic classes */ 20 20 #define DPAA_TC_NUM 4 21 - /* Number of Tx queues per traffic class */ 22 - #define DPAA_TC_TXQ_NUM NR_CPUS 23 - /* Total number of Tx queues */ 24 - #define DPAA_ETH_TXQ_NUM (DPAA_TC_NUM * DPAA_TC_TXQ_NUM) 25 21 26 22 /* More detailed FQ types - used for fine-grained WQ assignments */ 27 23 enum dpaa_fq_type { ··· 138 142 struct mac_device *mac_dev; 139 143 struct device *rx_dma_dev; 140 144 struct device *tx_dma_dev; 141 - struct qman_fq *egress_fqs[DPAA_ETH_TXQ_NUM]; 142 - struct qman_fq *conf_fqs[DPAA_ETH_TXQ_NUM]; 145 + struct qman_fq **egress_fqs; 146 + struct qman_fq **conf_fqs; 143 147 144 148 u16 channel; 145 149 struct list_head dpaa_fq_list; ··· 181 185 /* from dpaa_eth_sysfs.c */ 182 186 void dpaa_eth_sysfs_remove(struct device *dev); 183 187 void dpaa_eth_sysfs_init(struct device *dev); 188 + 189 + static inline size_t dpaa_num_txqs_per_tc(void) 190 + { 191 + return num_possible_cpus(); 192 + } 193 + 194 + /* Total number of Tx queues */ 195 + static inline size_t dpaa_max_num_txqs(void) 196 + { 197 + return DPAA_TC_NUM * dpaa_num_txqs_per_tc(); 198 + } 199 + 184 200 #endif /* __DPAA_H */

+9 -1

drivers/net/ethernet/freescale/dpaa/dpaa_ethtool.c

··· 457 457 struct netlink_ext_ack *extack) 458 458 { 459 459 const cpumask_t *cpus = qman_affine_cpus(); 460 - bool needs_revert[NR_CPUS] = {false}; 461 460 struct qman_portal *portal; 462 461 u32 period, prev_period; 463 462 u8 thresh, prev_thresh; 463 + bool *needs_revert; 464 464 int cpu, res; 465 + 466 + needs_revert = kcalloc(num_possible_cpus(), sizeof(bool), GFP_KERNEL); 467 + if (!needs_revert) 468 + return -ENOMEM; 465 469 466 470 period = c->rx_coalesce_usecs; 467 471 thresh = c->rx_max_coalesced_frames; ··· 489 485 needs_revert[cpu] = true; 490 486 } 491 487 488 + kfree(needs_revert); 489 + 492 490 return 0; 493 491 494 492 revert_values: ··· 503 497 qman_portal_set_iperiod(portal, prev_period); 504 498 qman_dqrr_set_ithresh(portal, prev_thresh); 505 499 } 500 + 501 + kfree(needs_revert); 506 502 507 503 return res; 508 504 }

+1 -1

drivers/soc/fsl/qbman/Kconfig

··· 1 1 # SPDX-License-Identifier: GPL-2.0-only 2 2 menuconfig FSL_DPAA 3 3 bool "QorIQ DPAA1 framework support" 4 - depends on ((FSL_SOC_BOOKE || ARCH_LAYERSCAPE) && ARCH_DMA_ADDR_T_64BIT) 4 + depends on ((FSL_SOC_BOOKE || ARCH_LAYERSCAPE || COMPILE_TEST) && ARCH_DMA_ADDR_T_64BIT) 5 5 select GENERIC_ALLOCATOR 6 6 help 7 7 The Freescale Data Path Acceleration Architecture (DPAA) is a set of

Configure Feed

Configure Feed