Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

Merge branch 'gve-support-larger-ring-sizes-in-dqo-qpl-mode'

Max Yuan says:

====================
gve: Support larger ring sizes in DQO-QPL mode

This patch series updates the gve driver to improve Queue Page List
(QPL) management and enable support for larger ring sizes when using the
DQO-QPL queue format.

Previously, the driver used hardcoded multipliers to determine the
number of pages to register for QPLs (e.g., 2x ring size for RX). This
rigid approach made it difficult to support larger ring sizes without
potentially exceeding the "max_registered_pages" limit reported by the
device.

The first patch introduces a unified and flexible logic for calculating
QPL page requirements. It balances TX and RX page allocations based on
the configured ring sizes and scales the total count down proportionally
if it would otherwise exceed the device's global registration limit.

The second patch leverages this new flexibility to stop ignoring the
maximum ring size supported by the device in DQO-QPL mode. Users can now
configure ring sizes up to the device-reported maximum, as the driver
will automatically adjust the QPL size to stay within allowed memory
bounds.
====================

Link: https://patch.msgid.link/20260225182342.1049816-1-joshwash@google.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>

+58 -41
+7 -11
drivers/net/ethernet/google/gve/gve.h
··· 79 79 80 80 #define GVE_DEFAULT_HEADER_BUFFER_SIZE 128 81 81 82 - #define DQO_QPL_DEFAULT_TX_PAGES 512 83 - 84 82 /* Maximum TSO size supported on DQO */ 85 83 #define GVE_DQO_TX_MAX 0x3FFFF 86 84 ··· 709 711 /* Parameters for allocating resources for tx queues */ 710 712 struct gve_tx_alloc_rings_cfg { 711 713 struct gve_tx_queue_config *qcfg; 714 + u16 pages_per_qpl; 712 715 713 716 u16 num_xdp_rings; 714 717 ··· 725 726 /* tx config is also needed to determine QPL ids */ 726 727 struct gve_rx_queue_config *qcfg_rx; 727 728 struct gve_tx_queue_config *qcfg_tx; 729 + u16 pages_per_qpl; 728 730 729 731 u16 ring_size; 730 732 u16 packet_buffer_size; ··· 816 816 u16 min_rx_desc_cnt; 817 817 bool modify_ring_size_enabled; 818 818 bool default_min_ring_size; 819 - u16 tx_pages_per_qpl; /* Suggested number of pages per qpl for TX queues by NIC */ 819 + u16 tx_pages_per_qpl; 820 + u16 rx_pages_per_qpl; 820 821 u64 max_registered_pages; 821 822 u64 num_registered_pages; /* num pages registered with NIC */ 822 823 struct bpf_prog *xdp_prog; /* XDP BPF program */ ··· 1151 1150 return gve_get_rx_qpl_id(tx_cfg, 0); 1152 1151 } 1153 1152 1154 - static inline u32 gve_get_rx_pages_per_qpl_dqo(u32 rx_desc_cnt) 1155 - { 1156 - /* For DQO, page count should be more than ring size for 1157 - * out-of-order completions. Set it to two times of ring size. 1158 - */ 1159 - return 2 * rx_desc_cnt; 1160 - } 1161 - 1162 1153 /* Returns the correct dma direction for tx and rx qpls */ 1163 1154 static inline enum dma_data_direction gve_qpl_dma_dir(struct gve_priv *priv, 1164 1155 int id) ··· 1301 1308 void gve_get_curr_alloc_cfgs(struct gve_priv *priv, 1302 1309 struct gve_tx_alloc_rings_cfg *tx_alloc_cfg, 1303 1310 struct gve_rx_alloc_rings_cfg *rx_alloc_cfg); 1311 + void gve_update_num_qpl_pages(struct gve_priv *priv, 1312 + struct gve_rx_alloc_rings_cfg *rx_alloc_cfg, 1313 + struct gve_tx_alloc_rings_cfg *tx_alloc_cfg); 1304 1314 int gve_adjust_config(struct gve_priv *priv, 1305 1315 struct gve_tx_alloc_rings_cfg *tx_alloc_cfg, 1306 1316 struct gve_rx_alloc_rings_cfg *rx_alloc_cfg);
+4 -14
drivers/net/ethernet/google/gve/gve_adminq.c
··· 970 970 priv->dev->max_mtu = be16_to_cpu(dev_op_jumbo_frames->max_mtu); 971 971 } 972 972 973 - /* Override pages for qpl for DQO-QPL */ 974 - if (dev_op_dqo_qpl) { 975 - priv->tx_pages_per_qpl = 976 - be16_to_cpu(dev_op_dqo_qpl->tx_pages_per_qpl); 977 - if (priv->tx_pages_per_qpl == 0) 978 - priv->tx_pages_per_qpl = DQO_QPL_DEFAULT_TX_PAGES; 979 - } 980 - 981 973 if (dev_op_buffer_sizes && 982 974 (supported_features_mask & GVE_SUP_BUFFER_SIZES_MASK)) { 983 975 priv->max_rx_buffer_size = ··· 989 997 if (dev_op_modify_ring && 990 998 (supported_features_mask & GVE_SUP_MODIFY_RING_MASK)) { 991 999 priv->modify_ring_size_enabled = true; 992 - 993 - /* max ring size for DQO QPL should not be overwritten because of device limit */ 994 - if (priv->queue_format != GVE_DQO_QPL_FORMAT) { 995 - priv->max_rx_desc_cnt = be16_to_cpu(dev_op_modify_ring->max_rx_ring_size); 996 - priv->max_tx_desc_cnt = be16_to_cpu(dev_op_modify_ring->max_tx_ring_size); 997 - } 1000 + priv->max_rx_desc_cnt = 1001 + be16_to_cpu(dev_op_modify_ring->max_rx_ring_size); 1002 + priv->max_tx_desc_cnt = 1003 + be16_to_cpu(dev_op_modify_ring->max_tx_ring_size); 998 1004 if (priv->default_min_ring_size) { 999 1005 /* If device hasn't provided minimums, use default minimums */ 1000 1006 priv->min_tx_desc_cnt = GVE_DEFAULT_MIN_TX_RING_SIZE;
+1 -1
drivers/net/ethernet/google/gve/gve_buffer_mgmt_dqo.c
··· 133 133 u32 idx; 134 134 135 135 idx = rx->dqo.next_qpl_page_idx; 136 - if (idx >= gve_get_rx_pages_per_qpl_dqo(priv->rx_desc_cnt)) { 136 + if (idx >= priv->rx_pages_per_qpl) { 137 137 net_err_ratelimited("%s: Out of QPL pages\n", 138 138 priv->dev->name); 139 139 return -ENOMEM;
+41
drivers/net/ethernet/google/gve/gve_main.c
··· 11 11 #include <linux/filter.h> 12 12 #include <linux/interrupt.h> 13 13 #include <linux/irq.h> 14 + #include <linux/math64.h> 14 15 #include <linux/module.h> 15 16 #include <linux/pci.h> 16 17 #include <linux/sched.h> ··· 967 966 cfg->qcfg = &priv->tx_cfg; 968 967 cfg->raw_addressing = !gve_is_qpl(priv); 969 968 cfg->ring_size = priv->tx_desc_cnt; 969 + cfg->pages_per_qpl = priv->tx_pages_per_qpl; 970 970 cfg->num_xdp_rings = cfg->qcfg->num_xdp_queues; 971 971 cfg->tx = priv->tx; 972 972 } ··· 999 997 } 1000 998 } 1001 999 1000 + void gve_update_num_qpl_pages(struct gve_priv *priv, 1001 + struct gve_rx_alloc_rings_cfg *rx_alloc_cfg, 1002 + struct gve_tx_alloc_rings_cfg *tx_alloc_cfg) 1003 + { 1004 + u64 ideal_tx_pages, ideal_rx_pages; 1005 + u16 tx_num_queues, rx_num_queues; 1006 + u64 max_pages, tx_pages; 1007 + 1008 + if (priv->queue_format == GVE_GQI_QPL_FORMAT) { 1009 + rx_alloc_cfg->pages_per_qpl = rx_alloc_cfg->ring_size; 1010 + } else if (priv->queue_format == GVE_DQO_QPL_FORMAT) { 1011 + /* 1012 + * We want 2 pages per RX descriptor and half a page per TX 1013 + * descriptor, which means the fraction ideal_tx_pages / 1014 + * (ideal_tx_pages + ideal_rx_pages) of the pages we allocate 1015 + * should be for TX. Shrink proportionally as necessary to avoid 1016 + * allocating more than max_registered_pages total pages. 1017 + */ 1018 + tx_num_queues = tx_alloc_cfg->qcfg->num_queues; 1019 + rx_num_queues = rx_alloc_cfg->qcfg_rx->num_queues; 1020 + 1021 + ideal_tx_pages = tx_alloc_cfg->ring_size * tx_num_queues / 2; 1022 + ideal_rx_pages = rx_alloc_cfg->ring_size * rx_num_queues * 2; 1023 + max_pages = min(priv->max_registered_pages, 1024 + ideal_tx_pages + ideal_rx_pages); 1025 + 1026 + tx_pages = div64_u64(max_pages * ideal_tx_pages, 1027 + ideal_tx_pages + ideal_rx_pages); 1028 + tx_alloc_cfg->pages_per_qpl = div_u64(tx_pages, tx_num_queues); 1029 + rx_alloc_cfg->pages_per_qpl = div_u64(max_pages - tx_pages, 1030 + rx_num_queues); 1031 + } 1032 + } 1033 + 1002 1034 static int gve_queues_mem_alloc(struct gve_priv *priv, 1003 1035 struct gve_tx_alloc_rings_cfg *tx_alloc_cfg, 1004 1036 struct gve_rx_alloc_rings_cfg *rx_alloc_cfg) 1005 1037 { 1006 1038 int err; 1039 + 1040 + gve_update_num_qpl_pages(priv, rx_alloc_cfg, tx_alloc_cfg); 1007 1041 1008 1042 if (gve_is_gqi(priv)) 1009 1043 err = gve_tx_alloc_rings_gqi(priv, tx_alloc_cfg); ··· 1331 1293 cfg->raw_addressing = !gve_is_qpl(priv); 1332 1294 cfg->enable_header_split = priv->header_split_enabled; 1333 1295 cfg->ring_size = priv->rx_desc_cnt; 1296 + cfg->pages_per_qpl = priv->rx_pages_per_qpl; 1334 1297 cfg->packet_buffer_size = priv->rx_cfg.packet_buffer_size; 1335 1298 cfg->rx = priv->rx; 1336 1299 cfg->xdp = !!cfg->qcfg_tx->num_xdp_queues; ··· 1411 1372 priv->rx_cfg = *rx_alloc_cfg->qcfg_rx; 1412 1373 priv->tx_desc_cnt = tx_alloc_cfg->ring_size; 1413 1374 priv->rx_desc_cnt = rx_alloc_cfg->ring_size; 1375 + priv->tx_pages_per_qpl = tx_alloc_cfg->pages_per_qpl; 1376 + priv->rx_pages_per_qpl = rx_alloc_cfg->pages_per_qpl; 1414 1377 1415 1378 gve_tx_start_rings(priv, gve_num_tx_queues(priv)); 1416 1379 gve_rx_start_rings(priv, rx_alloc_cfg->qcfg_rx->num_queues);
+1 -4
drivers/net/ethernet/google/gve/gve_rx.c
··· 278 278 struct device *hdev = &priv->pdev->dev; 279 279 u32 slots = cfg->ring_size; 280 280 int filled_pages; 281 - int qpl_page_cnt; 282 281 u32 qpl_id = 0; 283 282 size_t bytes; 284 283 int err; ··· 313 314 314 315 if (!rx->data.raw_addressing) { 315 316 qpl_id = gve_get_rx_qpl_id(cfg->qcfg_tx, rx->q_num); 316 - qpl_page_cnt = cfg->ring_size; 317 - 318 317 rx->data.qpl = gve_alloc_queue_page_list(priv, qpl_id, 319 - qpl_page_cnt); 318 + cfg->pages_per_qpl); 320 319 if (!rx->data.qpl) { 321 320 err = -ENOMEM; 322 321 goto abort_with_copy_pool;
+2 -4
drivers/net/ethernet/google/gve/gve_rx_dqo.c
··· 218 218 { 219 219 struct device *hdev = &priv->pdev->dev; 220 220 struct page_pool *pool; 221 - int qpl_page_cnt; 222 221 size_t size; 223 222 u32 qpl_id; 224 223 ··· 245 246 XSK_CHECK_PRIV_TYPE(struct gve_xdp_buff); 246 247 247 248 rx->dqo.num_buf_states = cfg->raw_addressing ? buffer_queue_slots : 248 - gve_get_rx_pages_per_qpl_dqo(cfg->ring_size); 249 + cfg->pages_per_qpl; 249 250 rx->dqo.buf_states = kvcalloc_node(rx->dqo.num_buf_states, 250 251 sizeof(rx->dqo.buf_states[0]), 251 252 GFP_KERNEL, priv->numa_node); ··· 280 281 rx->dqo.page_pool = pool; 281 282 } else { 282 283 qpl_id = gve_get_rx_qpl_id(cfg->qcfg_tx, rx->q_num); 283 - qpl_page_cnt = gve_get_rx_pages_per_qpl_dqo(cfg->ring_size); 284 284 285 285 rx->dqo.qpl = gve_alloc_queue_page_list(priv, qpl_id, 286 - qpl_page_cnt); 286 + cfg->pages_per_qpl); 287 287 if (!rx->dqo.qpl) 288 288 goto err; 289 289 rx->dqo.next_qpl_page_idx = 0;
+1 -4
drivers/net/ethernet/google/gve/gve_tx.c
··· 264 264 int idx) 265 265 { 266 266 struct device *hdev = &priv->pdev->dev; 267 - int qpl_page_cnt; 268 267 u32 qpl_id = 0; 269 268 size_t bytes; 270 269 ··· 290 291 tx->dev = hdev; 291 292 if (!tx->raw_addressing) { 292 293 qpl_id = gve_tx_qpl_id(priv, tx->q_num); 293 - qpl_page_cnt = priv->tx_pages_per_qpl; 294 - 295 294 tx->tx_fifo.qpl = gve_alloc_queue_page_list(priv, qpl_id, 296 - qpl_page_cnt); 295 + cfg->pages_per_qpl); 297 296 if (!tx->tx_fifo.qpl) 298 297 goto abort_with_desc; 299 298
+1 -3
drivers/net/ethernet/google/gve/gve_tx_dqo.c
··· 311 311 { 312 312 struct device *hdev = &priv->pdev->dev; 313 313 int num_pending_packets; 314 - int qpl_page_cnt; 315 314 size_t bytes; 316 315 u32 qpl_id; 317 316 int i; ··· 391 392 392 393 if (!cfg->raw_addressing) { 393 394 qpl_id = gve_tx_qpl_id(priv, tx->q_num); 394 - qpl_page_cnt = priv->tx_pages_per_qpl; 395 395 396 396 tx->dqo.qpl = gve_alloc_queue_page_list(priv, qpl_id, 397 - qpl_page_cnt); 397 + cfg->pages_per_qpl); 398 398 if (!tx->dqo.qpl) 399 399 goto err; 400 400