Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

Merge branch 'gve-af_xdp-zero-copy-for-dqo-rda'

Joshua Washington says:

====================
gve: AF_XDP zero-copy for DQO RDA

This patch series adds support for AF_XDP zero-copy in the DQO RDA queue
format.

XSK infrastructure is updated to re-post buffers when adding XSK pools
because XSK umem will be posted directly to the NIC, a departure from
the bounce buffer model used in GQI QPL. A registry of XSK pools is
introduced to prevent the usage of XSK pools when in copy mode.

v1: https://lore.kernel.org/netdev/20250714160451.124671-1-jeroendb@google.com/
====================

Link: https://patch.msgid.link/20250717152839.973004-1-jeroendb@google.com
Signed-off-by: Paolo Abeni <pabeni@redhat.com>

Paolo Abeni cdb79400 3fc89472

+423 -101
+20 -4
drivers/net/ethernet/google/gve/gve.h
··· 190 190 /* The page posted to HW. */ 191 191 struct gve_rx_slot_page_info page_info; 192 192 193 + /* XSK buffer */ 194 + struct xdp_buff *xsk_buff; 195 + 193 196 /* The DMA address corresponding to `page_info`. */ 194 197 dma_addr_t addr; 195 198 ··· 334 331 335 332 /* XDP stuff */ 336 333 struct xdp_rxq_info xdp_rxq; 337 - struct xdp_rxq_info xsk_rxq; 338 334 struct xsk_buff_pool *xsk_pool; 339 335 struct page_frag_cache page_cache; /* Page cache to allocate XDP frames */ 340 336 }; ··· 402 400 GVE_PACKET_STATE_PENDING_REINJECT_COMPL, 403 401 /* No valid completion received within the specified timeout. */ 404 402 GVE_PACKET_STATE_TIMED_OUT_COMPL, 403 + /* XSK pending packet has received a packet/reinjection completion, or 404 + * has timed out. At this point, the pending packet can be counted by 405 + * xsk_tx_complete and freed. 406 + */ 407 + GVE_PACKET_STATE_XSK_COMPLETE, 405 408 }; 406 409 407 410 enum gve_tx_pending_packet_dqo_type { 408 411 GVE_TX_PENDING_PACKET_DQO_SKB, 409 - GVE_TX_PENDING_PACKET_DQO_XDP_FRAME 412 + GVE_TX_PENDING_PACKET_DQO_XDP_FRAME, 413 + GVE_TX_PENDING_PACKET_DQO_XSK, 410 414 }; 411 415 412 416 struct gve_tx_pending_packet_dqo { ··· 449 441 /* Identifies the current state of the packet as defined in 450 442 * `enum gve_packet_state`. 451 443 */ 452 - u8 state : 2; 444 + u8 state : 3; 453 445 454 446 /* gve_tx_pending_packet_dqo_type */ 455 - u8 type : 1; 447 + u8 type : 2; 456 448 457 449 /* If packet is an outstanding miss completion, then the packet is 458 450 * freed if the corresponding re-injection completion is not received ··· 521 513 /* Cached value of `dqo_compl.free_tx_qpl_buf_cnt` */ 522 514 u32 free_tx_qpl_buf_cnt; 523 515 }; 516 + 517 + atomic_t xsk_reorder_queue_tail; 524 518 } dqo_tx; 525 519 }; 526 520 ··· 555 545 556 546 /* Last TX ring index fetched by HW */ 557 547 atomic_t hw_tx_head; 548 + 549 + u16 xsk_reorder_queue_head; 550 + u16 xsk_reorder_queue_tail; 558 551 559 552 /* List to track pending packets which received a miss 560 553 * completion but not a corresponding reinjection. ··· 611 598 612 599 struct gve_tx_pending_packet_dqo *pending_packets; 613 600 s16 num_pending_packets; 601 + 602 + u16 *xsk_reorder_queue; 614 603 615 604 u32 complq_mask; /* complq size is complq_mask + 1 */ 616 605 ··· 818 803 819 804 struct gve_tx_queue_config tx_cfg; 820 805 struct gve_rx_queue_config rx_cfg; 806 + unsigned long *xsk_pools; /* bitmap of RX queues with XSK pools */ 821 807 u32 num_ntfy_blks; /* split between TX and RX so must be even */ 822 808 int numa_node; 823 809
+23 -1
drivers/net/ethernet/google/gve/gve_buffer_mgmt_dqo.c
··· 4 4 * Copyright (C) 2015-2024 Google, Inc. 5 5 */ 6 6 7 + #include <net/xdp_sock_drv.h> 7 8 #include "gve.h" 8 9 #include "gve_utils.h" 9 10 ··· 29 28 30 29 /* Point buf_state to itself to mark it as allocated */ 31 30 buf_state->next = buffer_id; 31 + 32 + /* Clear the buffer pointers */ 33 + buf_state->page_info.page = NULL; 34 + buf_state->xsk_buff = NULL; 32 35 33 36 return buf_state; 34 37 } ··· 291 286 { 292 287 struct gve_rx_buf_state_dqo *buf_state; 293 288 294 - if (rx->dqo.page_pool) { 289 + if (rx->xsk_pool) { 290 + buf_state = gve_alloc_buf_state(rx); 291 + if (unlikely(!buf_state)) 292 + return -ENOMEM; 293 + 294 + buf_state->xsk_buff = xsk_buff_alloc(rx->xsk_pool); 295 + if (unlikely(!buf_state->xsk_buff)) { 296 + xsk_set_rx_need_wakeup(rx->xsk_pool); 297 + gve_free_buf_state(rx, buf_state); 298 + return -ENOMEM; 299 + } 300 + /* Allocated xsk buffer. Clear wakeup in case it was set. */ 301 + xsk_clear_rx_need_wakeup(rx->xsk_pool); 302 + desc->buf_id = cpu_to_le16(buf_state - rx->dqo.buf_states); 303 + desc->buf_addr = 304 + cpu_to_le64(xsk_buff_xdp_get_dma(buf_state->xsk_buff)); 305 + return 0; 306 + } else if (rx->dqo.page_pool) { 295 307 buf_state = gve_alloc_buf_state(rx); 296 308 if (WARN_ON_ONCE(!buf_state)) 297 309 return -ENOMEM;
+1
drivers/net/ethernet/google/gve/gve_dqo.h
··· 38 38 netdev_features_t features); 39 39 bool gve_tx_poll_dqo(struct gve_notify_block *block, bool do_clean); 40 40 bool gve_xdp_poll_dqo(struct gve_notify_block *block); 41 + bool gve_xsk_tx_poll_dqo(struct gve_notify_block *block, int budget); 41 42 int gve_rx_poll_dqo(struct gve_notify_block *block, int budget); 42 43 int gve_tx_alloc_rings_dqo(struct gve_priv *priv, 43 44 struct gve_tx_alloc_rings_cfg *cfg);
+140 -93
drivers/net/ethernet/google/gve/gve_main.c
··· 4 4 * Copyright (C) 2015-2024 Google LLC 5 5 */ 6 6 7 + #include <linux/bitmap.h> 7 8 #include <linux/bpf.h> 8 9 #include <linux/cpumask.h> 9 10 #include <linux/etherdevice.h> ··· 427 426 428 427 if (block->rx) { 429 428 work_done = gve_rx_poll_dqo(block, budget); 429 + 430 + /* Poll XSK TX as part of RX NAPI. Setup re-poll based on if 431 + * either datapath has more work to do. 432 + */ 433 + if (priv->xdp_prog) 434 + reschedule |= gve_xsk_tx_poll_dqo(block, budget); 430 435 reschedule |= work_done == budget; 431 436 } 432 437 ··· 1165 1158 static void gve_turndown(struct gve_priv *priv); 1166 1159 static void gve_turnup(struct gve_priv *priv); 1167 1160 1161 + static void gve_unreg_xsk_pool(struct gve_priv *priv, u16 qid) 1162 + { 1163 + struct gve_rx_ring *rx; 1164 + 1165 + if (!priv->rx) 1166 + return; 1167 + 1168 + rx = &priv->rx[qid]; 1169 + rx->xsk_pool = NULL; 1170 + if (xdp_rxq_info_is_reg(&rx->xdp_rxq)) 1171 + xdp_rxq_info_unreg_mem_model(&rx->xdp_rxq); 1172 + 1173 + if (!priv->tx) 1174 + return; 1175 + priv->tx[gve_xdp_tx_queue_id(priv, qid)].xsk_pool = NULL; 1176 + } 1177 + 1178 + static int gve_reg_xsk_pool(struct gve_priv *priv, struct net_device *dev, 1179 + struct xsk_buff_pool *pool, u16 qid) 1180 + { 1181 + struct gve_rx_ring *rx; 1182 + u16 tx_qid; 1183 + int err; 1184 + 1185 + rx = &priv->rx[qid]; 1186 + err = xdp_rxq_info_reg_mem_model(&rx->xdp_rxq, 1187 + MEM_TYPE_XSK_BUFF_POOL, pool); 1188 + if (err) { 1189 + gve_unreg_xsk_pool(priv, qid); 1190 + return err; 1191 + } 1192 + 1193 + rx->xsk_pool = pool; 1194 + 1195 + tx_qid = gve_xdp_tx_queue_id(priv, qid); 1196 + priv->tx[tx_qid].xsk_pool = pool; 1197 + 1198 + return 0; 1199 + } 1200 + 1201 + static void gve_unreg_xdp_info(struct gve_priv *priv) 1202 + { 1203 + int i; 1204 + 1205 + if (!priv->tx_cfg.num_xdp_queues || !priv->rx) 1206 + return; 1207 + 1208 + for (i = 0; i < priv->rx_cfg.num_queues; i++) { 1209 + struct gve_rx_ring *rx = &priv->rx[i]; 1210 + 1211 + if (xdp_rxq_info_is_reg(&rx->xdp_rxq)) 1212 + xdp_rxq_info_unreg(&rx->xdp_rxq); 1213 + 1214 + gve_unreg_xsk_pool(priv, i); 1215 + } 1216 + } 1217 + 1218 + static struct xsk_buff_pool *gve_get_xsk_pool(struct gve_priv *priv, int qid) 1219 + { 1220 + if (!test_bit(qid, priv->xsk_pools)) 1221 + return NULL; 1222 + 1223 + return xsk_get_pool_from_qid(priv->dev, qid); 1224 + } 1225 + 1168 1226 static int gve_reg_xdp_info(struct gve_priv *priv, struct net_device *dev) 1169 1227 { 1170 1228 struct napi_struct *napi; 1171 1229 struct gve_rx_ring *rx; 1172 1230 int err = 0; 1173 - int i, j; 1174 - u32 tx_qid; 1231 + int i; 1175 1232 1176 1233 if (!priv->tx_cfg.num_xdp_queues) 1177 1234 return 0; 1178 1235 1179 1236 for (i = 0; i < priv->rx_cfg.num_queues; i++) { 1237 + struct xsk_buff_pool *xsk_pool; 1238 + 1180 1239 rx = &priv->rx[i]; 1181 1240 napi = &priv->ntfy_blocks[rx->ntfy_id].napi; 1182 1241 ··· 1250 1177 napi->napi_id); 1251 1178 if (err) 1252 1179 goto err; 1253 - if (gve_is_qpl(priv)) 1180 + 1181 + xsk_pool = gve_get_xsk_pool(priv, i); 1182 + if (xsk_pool) 1183 + err = gve_reg_xsk_pool(priv, dev, xsk_pool, i); 1184 + else if (gve_is_qpl(priv)) 1254 1185 err = xdp_rxq_info_reg_mem_model(&rx->xdp_rxq, 1255 1186 MEM_TYPE_PAGE_SHARED, 1256 1187 NULL); ··· 1264 1187 rx->dqo.page_pool); 1265 1188 if (err) 1266 1189 goto err; 1267 - rx->xsk_pool = xsk_get_pool_from_qid(dev, i); 1268 - if (rx->xsk_pool) { 1269 - err = xdp_rxq_info_reg(&rx->xsk_rxq, dev, i, 1270 - napi->napi_id); 1271 - if (err) 1272 - goto err; 1273 - err = xdp_rxq_info_reg_mem_model(&rx->xsk_rxq, 1274 - MEM_TYPE_XSK_BUFF_POOL, NULL); 1275 - if (err) 1276 - goto err; 1277 - xsk_pool_set_rxq_info(rx->xsk_pool, 1278 - &rx->xsk_rxq); 1279 - } 1280 - } 1281 - 1282 - for (i = 0; i < priv->tx_cfg.num_xdp_queues; i++) { 1283 - tx_qid = gve_xdp_tx_queue_id(priv, i); 1284 - priv->tx[tx_qid].xsk_pool = xsk_get_pool_from_qid(dev, i); 1285 1190 } 1286 1191 return 0; 1287 1192 1288 1193 err: 1289 - for (j = i; j >= 0; j--) { 1290 - rx = &priv->rx[j]; 1291 - if (xdp_rxq_info_is_reg(&rx->xdp_rxq)) 1292 - xdp_rxq_info_unreg(&rx->xdp_rxq); 1293 - if (xdp_rxq_info_is_reg(&rx->xsk_rxq)) 1294 - xdp_rxq_info_unreg(&rx->xsk_rxq); 1295 - } 1194 + gve_unreg_xdp_info(priv); 1296 1195 return err; 1297 1196 } 1298 1197 1299 - static void gve_unreg_xdp_info(struct gve_priv *priv) 1300 - { 1301 - int i, tx_qid; 1302 - 1303 - if (!priv->tx_cfg.num_xdp_queues || !priv->rx || !priv->tx) 1304 - return; 1305 - 1306 - for (i = 0; i < priv->rx_cfg.num_queues; i++) { 1307 - struct gve_rx_ring *rx = &priv->rx[i]; 1308 - 1309 - xdp_rxq_info_unreg(&rx->xdp_rxq); 1310 - if (rx->xsk_pool) { 1311 - xdp_rxq_info_unreg(&rx->xsk_rxq); 1312 - rx->xsk_pool = NULL; 1313 - } 1314 - } 1315 - 1316 - for (i = 0; i < priv->tx_cfg.num_xdp_queues; i++) { 1317 - tx_qid = gve_xdp_tx_queue_id(priv, i); 1318 - priv->tx[tx_qid].xsk_pool = NULL; 1319 - } 1320 - } 1321 1198 1322 1199 static void gve_drain_page_cache(struct gve_priv *priv) 1323 1200 { ··· 1586 1555 u16 qid) 1587 1556 { 1588 1557 struct gve_priv *priv = netdev_priv(dev); 1589 - struct napi_struct *napi; 1590 - struct gve_rx_ring *rx; 1591 - int tx_qid; 1592 1558 int err; 1593 1559 1594 1560 if (qid >= priv->rx_cfg.num_queues) { ··· 1603 1575 if (err) 1604 1576 return err; 1605 1577 1578 + set_bit(qid, priv->xsk_pools); 1579 + 1606 1580 /* If XDP prog is not installed or interface is down, return. */ 1607 1581 if (!priv->xdp_prog || !netif_running(dev)) 1608 1582 return 0; 1609 1583 1610 - rx = &priv->rx[qid]; 1611 - napi = &priv->ntfy_blocks[rx->ntfy_id].napi; 1612 - err = xdp_rxq_info_reg(&rx->xsk_rxq, dev, qid, napi->napi_id); 1584 + err = gve_reg_xsk_pool(priv, dev, pool, qid); 1613 1585 if (err) 1614 - goto err; 1586 + goto err_xsk_pool_dma_mapped; 1615 1587 1616 - err = xdp_rxq_info_reg_mem_model(&rx->xsk_rxq, 1617 - MEM_TYPE_XSK_BUFF_POOL, NULL); 1618 - if (err) 1619 - goto err; 1620 - 1621 - xsk_pool_set_rxq_info(pool, &rx->xsk_rxq); 1622 - rx->xsk_pool = pool; 1623 - 1624 - tx_qid = gve_xdp_tx_queue_id(priv, qid); 1625 - priv->tx[tx_qid].xsk_pool = pool; 1626 - 1588 + /* Stop and start RDA queues to repost buffers. */ 1589 + if (!gve_is_qpl(priv)) { 1590 + err = gve_configure_rings_xdp(priv, priv->rx_cfg.num_queues); 1591 + if (err) 1592 + goto err_xsk_pool_registered; 1593 + } 1627 1594 return 0; 1628 - err: 1629 - if (xdp_rxq_info_is_reg(&rx->xsk_rxq)) 1630 - xdp_rxq_info_unreg(&rx->xsk_rxq); 1631 1595 1596 + err_xsk_pool_registered: 1597 + gve_unreg_xsk_pool(priv, qid); 1598 + err_xsk_pool_dma_mapped: 1599 + clear_bit(qid, priv->xsk_pools); 1632 1600 xsk_pool_dma_unmap(pool, 1633 - DMA_ATTR_SKIP_CPU_SYNC | DMA_ATTR_WEAK_ORDERING); 1601 + DMA_ATTR_SKIP_CPU_SYNC | 1602 + DMA_ATTR_WEAK_ORDERING); 1634 1603 return err; 1635 1604 } 1636 1605 ··· 1639 1614 struct napi_struct *napi_tx; 1640 1615 struct xsk_buff_pool *pool; 1641 1616 int tx_qid; 1617 + int err; 1642 1618 1643 - pool = xsk_get_pool_from_qid(dev, qid); 1644 - if (!pool) 1645 - return -EINVAL; 1646 1619 if (qid >= priv->rx_cfg.num_queues) 1647 1620 return -EINVAL; 1648 1621 1649 - /* If XDP prog is not installed or interface is down, unmap DMA and 1650 - * return. 1651 - */ 1652 - if (!priv->xdp_prog || !netif_running(dev)) 1653 - goto done; 1622 + clear_bit(qid, priv->xsk_pools); 1623 + 1624 + pool = xsk_get_pool_from_qid(dev, qid); 1625 + if (pool) 1626 + xsk_pool_dma_unmap(pool, 1627 + DMA_ATTR_SKIP_CPU_SYNC | 1628 + DMA_ATTR_WEAK_ORDERING); 1629 + 1630 + if (!netif_running(dev) || !priv->tx_cfg.num_xdp_queues) 1631 + return 0; 1632 + 1633 + /* Stop and start RDA queues to repost buffers. */ 1634 + if (!gve_is_qpl(priv) && priv->xdp_prog) { 1635 + err = gve_configure_rings_xdp(priv, priv->rx_cfg.num_queues); 1636 + if (err) 1637 + return err; 1638 + } 1654 1639 1655 1640 napi_rx = &priv->ntfy_blocks[priv->rx[qid].ntfy_id].napi; 1656 1641 napi_disable(napi_rx); /* make sure current rx poll is done */ ··· 1669 1634 napi_tx = &priv->ntfy_blocks[priv->tx[tx_qid].ntfy_id].napi; 1670 1635 napi_disable(napi_tx); /* make sure current tx poll is done */ 1671 1636 1672 - priv->rx[qid].xsk_pool = NULL; 1673 - xdp_rxq_info_unreg(&priv->rx[qid].xsk_rxq); 1674 - priv->tx[tx_qid].xsk_pool = NULL; 1637 + gve_unreg_xsk_pool(priv, qid); 1675 1638 smp_mb(); /* Make sure it is visible to the workers on datapath */ 1676 1639 1677 1640 napi_enable(napi_rx); 1678 - if (gve_rx_work_pending(&priv->rx[qid])) 1679 - napi_schedule(napi_rx); 1680 - 1681 1641 napi_enable(napi_tx); 1682 - if (gve_tx_clean_pending(priv, &priv->tx[tx_qid])) 1683 - napi_schedule(napi_tx); 1642 + if (gve_is_gqi(priv)) { 1643 + if (gve_rx_work_pending(&priv->rx[qid])) 1644 + napi_schedule(napi_rx); 1684 1645 1685 - done: 1686 - xsk_pool_dma_unmap(pool, 1687 - DMA_ATTR_SKIP_CPU_SYNC | DMA_ATTR_WEAK_ORDERING); 1646 + if (gve_tx_clean_pending(priv, &priv->tx[tx_qid])) 1647 + napi_schedule(napi_tx); 1648 + } 1649 + 1688 1650 return 0; 1689 1651 } 1690 1652 ··· 2307 2275 } else if (priv->queue_format == GVE_DQO_RDA_FORMAT) { 2308 2276 xdp_features = NETDEV_XDP_ACT_BASIC; 2309 2277 xdp_features |= NETDEV_XDP_ACT_REDIRECT; 2278 + xdp_features |= NETDEV_XDP_ACT_XSK_ZEROCOPY; 2310 2279 } else { 2311 2280 xdp_features = 0; 2312 2281 } ··· 2403 2370 priv->ts_config.rx_filter = HWTSTAMP_FILTER_NONE; 2404 2371 2405 2372 setup_device: 2373 + priv->xsk_pools = bitmap_zalloc(priv->rx_cfg.max_queues, GFP_KERNEL); 2374 + if (!priv->xsk_pools) { 2375 + err = -ENOMEM; 2376 + goto err; 2377 + } 2378 + 2406 2379 gve_set_netdev_xdp_features(priv); 2407 2380 err = gve_setup_device_resources(priv); 2408 - if (!err) 2409 - return 0; 2381 + if (err) 2382 + goto err_free_xsk_bitmap; 2383 + 2384 + return 0; 2385 + 2386 + err_free_xsk_bitmap: 2387 + bitmap_free(priv->xsk_pools); 2388 + priv->xsk_pools = NULL; 2410 2389 err: 2411 2390 gve_adminq_free(&priv->pdev->dev, priv); 2412 2391 return err; ··· 2428 2383 { 2429 2384 gve_teardown_device_resources(priv); 2430 2385 gve_adminq_free(&priv->pdev->dev, priv); 2386 + bitmap_free(priv->xsk_pools); 2387 + priv->xsk_pools = NULL; 2431 2388 } 2432 2389 2433 2390 static void gve_trigger_reset(struct gve_priv *priv)
+91 -3
drivers/net/ethernet/google/gve/gve_rx_dqo.c
··· 16 16 #include <net/ip6_checksum.h> 17 17 #include <net/ipv6.h> 18 18 #include <net/tcp.h> 19 + #include <net/xdp_sock_drv.h> 19 20 20 21 static void gve_rx_free_hdr_bufs(struct gve_priv *priv, struct gve_rx_ring *rx) 21 22 { ··· 150 149 gve_free_to_page_pool(rx, bs, false); 151 150 else 152 151 gve_free_qpl_page_dqo(bs); 152 + if (gve_buf_state_is_allocated(rx, bs) && bs->xsk_buff) { 153 + xsk_buff_free(bs->xsk_buff); 154 + bs->xsk_buff = NULL; 155 + } 153 156 } 154 157 155 158 if (rx->dqo.qpl) { ··· 585 580 int err; 586 581 587 582 xdpf = xdp_convert_buff_to_frame(xdp); 588 - if (unlikely(!xdpf)) 583 + if (unlikely(!xdpf)) { 584 + if (rx->xsk_pool) 585 + xsk_buff_free(xdp); 589 586 return -ENOSPC; 587 + } 590 588 591 589 tx_qid = gve_xdp_tx_queue_id(priv, rx->q_num); 592 590 tx = &priv->tx[tx_qid]; ··· 598 590 spin_unlock(&tx->dqo_tx.xdp_lock); 599 591 600 592 return err; 593 + } 594 + 595 + static void gve_xsk_done_dqo(struct gve_priv *priv, struct gve_rx_ring *rx, 596 + struct xdp_buff *xdp, struct bpf_prog *xprog, 597 + int xdp_act) 598 + { 599 + switch (xdp_act) { 600 + case XDP_ABORTED: 601 + case XDP_DROP: 602 + default: 603 + xsk_buff_free(xdp); 604 + break; 605 + case XDP_TX: 606 + if (unlikely(gve_xdp_tx_dqo(priv, rx, xdp))) 607 + goto err; 608 + break; 609 + case XDP_REDIRECT: 610 + if (unlikely(xdp_do_redirect(priv->dev, xdp, xprog))) 611 + goto err; 612 + break; 613 + } 614 + 615 + u64_stats_update_begin(&rx->statss); 616 + if ((u32)xdp_act < GVE_XDP_ACTIONS) 617 + rx->xdp_actions[xdp_act]++; 618 + u64_stats_update_end(&rx->statss); 619 + return; 620 + 621 + err: 622 + u64_stats_update_begin(&rx->statss); 623 + if (xdp_act == XDP_TX) 624 + rx->xdp_tx_errors++; 625 + if (xdp_act == XDP_REDIRECT) 626 + rx->xdp_redirect_errors++; 627 + u64_stats_update_end(&rx->statss); 601 628 } 602 629 603 630 static void gve_xdp_done_dqo(struct gve_priv *priv, struct gve_rx_ring *rx, ··· 676 633 return; 677 634 } 678 635 636 + static int gve_rx_xsk_dqo(struct napi_struct *napi, struct gve_rx_ring *rx, 637 + struct gve_rx_buf_state_dqo *buf_state, int buf_len, 638 + struct bpf_prog *xprog) 639 + { 640 + struct xdp_buff *xdp = buf_state->xsk_buff; 641 + struct gve_priv *priv = rx->gve; 642 + int xdp_act; 643 + 644 + xdp->data_end = xdp->data + buf_len; 645 + xsk_buff_dma_sync_for_cpu(xdp); 646 + 647 + if (xprog) { 648 + xdp_act = bpf_prog_run_xdp(xprog, xdp); 649 + buf_len = xdp->data_end - xdp->data; 650 + if (xdp_act != XDP_PASS) { 651 + gve_xsk_done_dqo(priv, rx, xdp, xprog, xdp_act); 652 + gve_free_buf_state(rx, buf_state); 653 + return 0; 654 + } 655 + } 656 + 657 + /* Copy the data to skb */ 658 + rx->ctx.skb_head = gve_rx_copy_data(priv->dev, napi, 659 + xdp->data, buf_len); 660 + if (unlikely(!rx->ctx.skb_head)) { 661 + xsk_buff_free(xdp); 662 + gve_free_buf_state(rx, buf_state); 663 + return -ENOMEM; 664 + } 665 + rx->ctx.skb_tail = rx->ctx.skb_head; 666 + 667 + /* Free XSK buffer and Buffer state */ 668 + xsk_buff_free(xdp); 669 + gve_free_buf_state(rx, buf_state); 670 + 671 + /* Update Stats */ 672 + u64_stats_update_begin(&rx->statss); 673 + rx->xdp_actions[XDP_PASS]++; 674 + u64_stats_update_end(&rx->statss); 675 + return 0; 676 + } 677 + 679 678 /* Returns 0 if descriptor is completed successfully. 680 679 * Returns -EINVAL if descriptor is invalid. 681 680 * Returns -ENOMEM if data cannot be copied to skb. ··· 756 671 buf_len = compl_desc->packet_len; 757 672 hdr_len = compl_desc->header_len; 758 673 759 - /* Page might have not been used for a while and was likely last written 674 + xprog = READ_ONCE(priv->xdp_prog); 675 + if (buf_state->xsk_buff) 676 + return gve_rx_xsk_dqo(napi, rx, buf_state, buf_len, xprog); 677 + 678 + /* Page might have not been used for awhile and was likely last written 760 679 * by a different thread. 761 680 */ 762 681 if (rx->dqo.page_pool) { ··· 810 721 return 0; 811 722 } 812 723 813 - xprog = READ_ONCE(priv->xdp_prog); 814 724 if (xprog) { 815 725 struct xdp_buff xdp; 816 726 void *old_data;
+148
drivers/net/ethernet/google/gve/gve_tx_dqo.c
··· 13 13 #include <linux/tcp.h> 14 14 #include <linux/slab.h> 15 15 #include <linux/skbuff.h> 16 + #include <net/xdp_sock_drv.h> 16 17 17 18 /* Returns true if tx_bufs are available. */ 18 19 static bool gve_has_free_tx_qpl_bufs(struct gve_tx_ring *tx, int count) ··· 242 241 tx->dqo.tx_ring = NULL; 243 242 } 244 243 244 + kvfree(tx->dqo.xsk_reorder_queue); 245 + tx->dqo.xsk_reorder_queue = NULL; 246 + 245 247 kvfree(tx->dqo.pending_packets); 246 248 tx->dqo.pending_packets = NULL; 247 249 ··· 349 345 350 346 tx->dqo.pending_packets[tx->dqo.num_pending_packets - 1].next = -1; 351 347 atomic_set_release(&tx->dqo_compl.free_pending_packets, -1); 348 + 349 + /* Only alloc xsk pool for XDP queues */ 350 + if (idx >= cfg->qcfg->num_queues && cfg->num_xdp_rings) { 351 + tx->dqo.xsk_reorder_queue = 352 + kvcalloc(tx->dqo.complq_mask + 1, 353 + sizeof(tx->dqo.xsk_reorder_queue[0]), 354 + GFP_KERNEL); 355 + if (!tx->dqo.xsk_reorder_queue) 356 + goto err; 357 + } 358 + 352 359 tx->dqo_compl.miss_completions.head = -1; 353 360 tx->dqo_compl.miss_completions.tail = -1; 354 361 tx->dqo_compl.timed_out_completions.head = -1; ··· 1007 992 return 0; 1008 993 } 1009 994 995 + static void gve_xsk_reorder_queue_push_dqo(struct gve_tx_ring *tx, 996 + u16 completion_tag) 997 + { 998 + u32 tail = atomic_read(&tx->dqo_tx.xsk_reorder_queue_tail); 999 + 1000 + tx->dqo.xsk_reorder_queue[tail] = completion_tag; 1001 + tail = (tail + 1) & tx->dqo.complq_mask; 1002 + atomic_set_release(&tx->dqo_tx.xsk_reorder_queue_tail, tail); 1003 + } 1004 + 1005 + static struct gve_tx_pending_packet_dqo * 1006 + gve_xsk_reorder_queue_head(struct gve_tx_ring *tx) 1007 + { 1008 + u32 head = tx->dqo_compl.xsk_reorder_queue_head; 1009 + 1010 + if (head == tx->dqo_compl.xsk_reorder_queue_tail) { 1011 + tx->dqo_compl.xsk_reorder_queue_tail = 1012 + atomic_read_acquire(&tx->dqo_tx.xsk_reorder_queue_tail); 1013 + 1014 + if (head == tx->dqo_compl.xsk_reorder_queue_tail) 1015 + return NULL; 1016 + } 1017 + 1018 + return &tx->dqo.pending_packets[tx->dqo.xsk_reorder_queue[head]]; 1019 + } 1020 + 1021 + static void gve_xsk_reorder_queue_pop_dqo(struct gve_tx_ring *tx) 1022 + { 1023 + tx->dqo_compl.xsk_reorder_queue_head++; 1024 + tx->dqo_compl.xsk_reorder_queue_head &= tx->dqo.complq_mask; 1025 + } 1026 + 1010 1027 /* Transmit a given skb and ring the doorbell. */ 1011 1028 netdev_tx_t gve_tx_dqo(struct sk_buff *skb, struct net_device *dev) 1012 1029 { ··· 1060 1013 1061 1014 gve_tx_put_doorbell_dqo(priv, tx->q_resources, tx->dqo_tx.tail); 1062 1015 return NETDEV_TX_OK; 1016 + } 1017 + 1018 + static bool gve_xsk_tx_dqo(struct gve_priv *priv, struct gve_tx_ring *tx, 1019 + int budget) 1020 + { 1021 + struct xsk_buff_pool *pool = tx->xsk_pool; 1022 + struct xdp_desc desc; 1023 + bool repoll = false; 1024 + int sent = 0; 1025 + 1026 + spin_lock(&tx->dqo_tx.xdp_lock); 1027 + for (; sent < budget; sent++) { 1028 + struct gve_tx_pending_packet_dqo *pkt; 1029 + s16 completion_tag; 1030 + dma_addr_t addr; 1031 + u32 desc_idx; 1032 + 1033 + if (unlikely(!gve_has_avail_slots_tx_dqo(tx, 1, 1))) { 1034 + repoll = true; 1035 + break; 1036 + } 1037 + 1038 + if (!xsk_tx_peek_desc(pool, &desc)) 1039 + break; 1040 + 1041 + pkt = gve_alloc_pending_packet(tx); 1042 + pkt->type = GVE_TX_PENDING_PACKET_DQO_XSK; 1043 + pkt->num_bufs = 0; 1044 + completion_tag = pkt - tx->dqo.pending_packets; 1045 + 1046 + addr = xsk_buff_raw_get_dma(pool, desc.addr); 1047 + xsk_buff_raw_dma_sync_for_device(pool, addr, desc.len); 1048 + 1049 + desc_idx = tx->dqo_tx.tail; 1050 + gve_tx_fill_pkt_desc_dqo(tx, &desc_idx, 1051 + true, desc.len, 1052 + addr, completion_tag, true, 1053 + false); 1054 + ++pkt->num_bufs; 1055 + gve_tx_update_tail(tx, desc_idx); 1056 + tx->dqo_tx.posted_packet_desc_cnt += pkt->num_bufs; 1057 + gve_xsk_reorder_queue_push_dqo(tx, completion_tag); 1058 + } 1059 + 1060 + if (sent) { 1061 + gve_tx_put_doorbell_dqo(priv, tx->q_resources, tx->dqo_tx.tail); 1062 + xsk_tx_release(pool); 1063 + } 1064 + 1065 + spin_unlock(&tx->dqo_tx.xdp_lock); 1066 + 1067 + u64_stats_update_begin(&tx->statss); 1068 + tx->xdp_xsk_sent += sent; 1069 + u64_stats_update_end(&tx->statss); 1070 + 1071 + return (sent == budget) || repoll; 1063 1072 } 1064 1073 1065 1074 static void add_to_list(struct gve_tx_ring *tx, struct gve_index_list *list, ··· 1255 1152 pending_packet->xdpf = NULL; 1256 1153 gve_free_pending_packet(tx, pending_packet); 1257 1154 break; 1155 + case GVE_TX_PENDING_PACKET_DQO_XSK: 1156 + pending_packet->state = GVE_PACKET_STATE_XSK_COMPLETE; 1157 + break; 1258 1158 default: 1259 1159 WARN_ON_ONCE(1); 1260 1160 } ··· 1357 1251 1358 1252 remove_from_list(tx, &tx->dqo_compl.timed_out_completions, 1359 1253 pending_packet); 1254 + 1255 + /* Need to count XSK packets in xsk_tx_completed. */ 1256 + if (pending_packet->type == GVE_TX_PENDING_PACKET_DQO_XSK) 1257 + pending_packet->state = GVE_PACKET_STATE_XSK_COMPLETE; 1258 + else 1259 + gve_free_pending_packet(tx, pending_packet); 1260 + } 1261 + } 1262 + 1263 + static void gve_tx_process_xsk_completions(struct gve_tx_ring *tx) 1264 + { 1265 + u32 num_xsks = 0; 1266 + 1267 + while (true) { 1268 + struct gve_tx_pending_packet_dqo *pending_packet = 1269 + gve_xsk_reorder_queue_head(tx); 1270 + 1271 + if (!pending_packet || 1272 + pending_packet->state != GVE_PACKET_STATE_XSK_COMPLETE) 1273 + break; 1274 + 1275 + num_xsks++; 1276 + gve_xsk_reorder_queue_pop_dqo(tx); 1360 1277 gve_free_pending_packet(tx, pending_packet); 1361 1278 } 1279 + 1280 + if (num_xsks) 1281 + xsk_tx_completed(tx->xsk_pool, num_xsks); 1362 1282 } 1363 1283 1364 1284 int gve_clean_tx_done_dqo(struct gve_priv *priv, struct gve_tx_ring *tx, ··· 1465 1333 remove_miss_completions(priv, tx); 1466 1334 remove_timed_out_completions(priv, tx); 1467 1335 1336 + if (tx->xsk_pool) 1337 + gve_tx_process_xsk_completions(tx); 1338 + 1468 1339 u64_stats_update_begin(&tx->statss); 1469 1340 tx->bytes_done += pkt_compl_bytes + reinject_compl_bytes; 1470 1341 tx->pkt_done += pkt_compl_pkts + reinject_compl_pkts; ··· 1498 1363 /* Return true if we still have work. */ 1499 1364 compl_desc = &tx->dqo.compl_ring[tx->dqo_compl.head]; 1500 1365 return compl_desc->generation != tx->dqo_compl.cur_gen_bit; 1366 + } 1367 + 1368 + bool gve_xsk_tx_poll_dqo(struct gve_notify_block *rx_block, int budget) 1369 + { 1370 + struct gve_rx_ring *rx = rx_block->rx; 1371 + struct gve_priv *priv = rx->gve; 1372 + struct gve_tx_ring *tx; 1373 + 1374 + tx = &priv->tx[gve_xdp_tx_queue_id(priv, rx->q_num)]; 1375 + if (tx->xsk_pool) 1376 + return gve_xsk_tx_dqo(priv, tx, budget); 1377 + 1378 + return 0; 1501 1379 } 1502 1380 1503 1381 bool gve_xdp_poll_dqo(struct gve_notify_block *block)