Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

Merge branch 'gve-add-support-for-non-4k-page-sizes'

John Fraker says:

====================
gve: Add support for non-4k page sizes.

This patch series adds support for non-4k page sizes to the driver. Prior
to this patch series, the driver assumes a 4k page size in many small
ways, and will crash in a kernel compiled for a different page size.

This changeset aims to be a minimal changeset that unblocks certain arm
platforms with large page sizes.
====================

Link: https://lore.kernel.org/r/20231128002648.320892-1-jfraker@google.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>

+81 -52
+7 -1
drivers/net/ethernet/google/gve/gve.h
··· 8 8 #define _GVE_H_ 9 9 10 10 #include <linux/dma-mapping.h> 11 + #include <linux/dmapool.h> 11 12 #include <linux/netdevice.h> 12 13 #include <linux/pci.h> 13 14 #include <linux/u64_stats_sync.h> ··· 42 41 #define NIC_TX_STATS_REPORT_NUM 0 43 42 #define NIC_RX_STATS_REPORT_NUM 4 44 43 44 + #define GVE_ADMINQ_BUFFER_SIZE 4096 45 + 45 46 #define GVE_DATA_SLOT_ADDR_PAGE_MASK (~(PAGE_SIZE - 1)) 46 47 47 48 /* PTYPEs are always 10 bits. */ 48 49 #define GVE_NUM_PTYPES 1024 49 50 50 - #define GVE_RX_BUFFER_SIZE_DQO 2048 51 + #define GVE_DEFAULT_RX_BUFFER_SIZE 2048 52 + 53 + #define GVE_DEFAULT_RX_BUFFER_OFFSET 2048 51 54 52 55 #define GVE_XDP_ACTIONS 5 53 56 ··· 677 672 /* Admin queue - see gve_adminq.h*/ 678 673 union gve_adminq_command *adminq; 679 674 dma_addr_t adminq_bus_addr; 675 + struct dma_pool *adminq_pool; 680 676 u32 adminq_mask; /* masks prod_cnt to adminq size */ 681 677 u32 adminq_prod_cnt; /* free-running count of AQ cmds executed */ 682 678 u32 adminq_cmd_fail; /* free-running count of AQ cmds failed */
+54 -34
drivers/net/ethernet/google/gve/gve_adminq.c
··· 194 194 195 195 int gve_adminq_alloc(struct device *dev, struct gve_priv *priv) 196 196 { 197 - priv->adminq = dma_alloc_coherent(dev, PAGE_SIZE, 198 - &priv->adminq_bus_addr, GFP_KERNEL); 199 - if (unlikely(!priv->adminq)) 197 + priv->adminq_pool = dma_pool_create("adminq_pool", dev, 198 + GVE_ADMINQ_BUFFER_SIZE, 0, 0); 199 + if (unlikely(!priv->adminq_pool)) 200 200 return -ENOMEM; 201 + priv->adminq = dma_pool_alloc(priv->adminq_pool, GFP_KERNEL, 202 + &priv->adminq_bus_addr); 203 + if (unlikely(!priv->adminq)) { 204 + dma_pool_destroy(priv->adminq_pool); 205 + return -ENOMEM; 206 + } 201 207 202 - priv->adminq_mask = (PAGE_SIZE / sizeof(union gve_adminq_command)) - 1; 208 + priv->adminq_mask = 209 + (GVE_ADMINQ_BUFFER_SIZE / sizeof(union gve_adminq_command)) - 1; 203 210 priv->adminq_prod_cnt = 0; 204 211 priv->adminq_cmd_fail = 0; 205 212 priv->adminq_timeouts = 0; ··· 225 218 priv->adminq_get_ptype_map_cnt = 0; 226 219 227 220 /* Setup Admin queue with the device */ 228 - iowrite32be(priv->adminq_bus_addr / PAGE_SIZE, 229 - &priv->reg_bar0->adminq_pfn); 230 - 221 + if (priv->pdev->revision < 0x1) { 222 + iowrite32be(priv->adminq_bus_addr / PAGE_SIZE, 223 + &priv->reg_bar0->adminq_pfn); 224 + } else { 225 + iowrite16be(GVE_ADMINQ_BUFFER_SIZE, 226 + &priv->reg_bar0->adminq_length); 227 + #ifdef CONFIG_ARCH_DMA_ADDR_T_64BIT 228 + iowrite32be(priv->adminq_bus_addr >> 32, 229 + &priv->reg_bar0->adminq_base_address_hi); 230 + #endif 231 + iowrite32be(priv->adminq_bus_addr, 232 + &priv->reg_bar0->adminq_base_address_lo); 233 + iowrite32be(GVE_DRIVER_STATUS_RUN_MASK, &priv->reg_bar0->driver_status); 234 + } 231 235 gve_set_admin_queue_ok(priv); 232 236 return 0; 233 237 } ··· 248 230 int i = 0; 249 231 250 232 /* Tell the device the adminq is leaving */ 251 - iowrite32be(0x0, &priv->reg_bar0->adminq_pfn); 252 - while (ioread32be(&priv->reg_bar0->adminq_pfn)) { 253 - /* If this is reached the device is unrecoverable and still 254 - * holding memory. Continue looping to avoid memory corruption, 255 - * but WARN so it is visible what is going on. 256 - */ 257 - if (i == GVE_MAX_ADMINQ_RELEASE_CHECK) 258 - WARN(1, "Unrecoverable platform error!"); 259 - i++; 260 - msleep(GVE_ADMINQ_SLEEP_LEN); 233 + if (priv->pdev->revision < 0x1) { 234 + iowrite32be(0x0, &priv->reg_bar0->adminq_pfn); 235 + while (ioread32be(&priv->reg_bar0->adminq_pfn)) { 236 + /* If this is reached the device is unrecoverable and still 237 + * holding memory. Continue looping to avoid memory corruption, 238 + * but WARN so it is visible what is going on. 239 + */ 240 + if (i == GVE_MAX_ADMINQ_RELEASE_CHECK) 241 + WARN(1, "Unrecoverable platform error!"); 242 + i++; 243 + msleep(GVE_ADMINQ_SLEEP_LEN); 244 + } 245 + } else { 246 + iowrite32be(GVE_DRIVER_STATUS_RESET_MASK, &priv->reg_bar0->driver_status); 247 + while (!(ioread32be(&priv->reg_bar0->device_status) 248 + & GVE_DEVICE_STATUS_DEVICE_IS_RESET)) { 249 + if (i == GVE_MAX_ADMINQ_RELEASE_CHECK) 250 + WARN(1, "Unrecoverable platform error!"); 251 + i++; 252 + msleep(GVE_ADMINQ_SLEEP_LEN); 253 + } 261 254 } 262 255 gve_clear_device_rings_ok(priv); 263 256 gve_clear_device_resources_ok(priv); ··· 280 251 if (!gve_get_admin_queue_ok(priv)) 281 252 return; 282 253 gve_adminq_release(priv); 283 - dma_free_coherent(dev, PAGE_SIZE, priv->adminq, priv->adminq_bus_addr); 254 + dma_pool_free(priv->adminq_pool, priv->adminq, priv->adminq_bus_addr); 255 + dma_pool_destroy(priv->adminq_pool); 284 256 gve_clear_admin_queue_ok(priv); 285 257 } 286 258 ··· 727 697 struct gve_device_descriptor *descriptor) 728 698 { 729 699 priv->tx_desc_cnt = be16_to_cpu(descriptor->tx_queue_entries); 730 - if (priv->tx_desc_cnt * sizeof(priv->tx->desc[0]) < PAGE_SIZE) { 731 - dev_err(&priv->pdev->dev, "Tx desc count %d too low\n", 732 - priv->tx_desc_cnt); 733 - return -EINVAL; 734 - } 735 700 priv->rx_desc_cnt = be16_to_cpu(descriptor->rx_queue_entries); 736 - if (priv->rx_desc_cnt * sizeof(priv->rx->desc.desc_ring[0]) 737 - < PAGE_SIZE) { 738 - dev_err(&priv->pdev->dev, "Rx desc count %d too low\n", 739 - priv->rx_desc_cnt); 740 - return -EINVAL; 741 - } 742 701 return 0; 743 702 } 744 703 ··· 797 778 u16 mtu; 798 779 799 780 memset(&cmd, 0, sizeof(cmd)); 800 - descriptor = dma_alloc_coherent(&priv->pdev->dev, PAGE_SIZE, 801 - &descriptor_bus, GFP_KERNEL); 781 + descriptor = dma_pool_alloc(priv->adminq_pool, GFP_KERNEL, 782 + &descriptor_bus); 802 783 if (!descriptor) 803 784 return -ENOMEM; 804 785 cmd.opcode = cpu_to_be32(GVE_ADMINQ_DESCRIBE_DEVICE); ··· 806 787 cpu_to_be64(descriptor_bus); 807 788 cmd.describe_device.device_descriptor_version = 808 789 cpu_to_be32(GVE_ADMINQ_DEVICE_DESCRIPTOR_VERSION); 809 - cmd.describe_device.available_length = cpu_to_be32(PAGE_SIZE); 790 + cmd.describe_device.available_length = 791 + cpu_to_be32(GVE_ADMINQ_BUFFER_SIZE); 810 792 811 793 err = gve_adminq_execute_cmd(priv, &cmd); 812 794 if (err) ··· 888 868 dev_op_jumbo_frames, dev_op_dqo_qpl); 889 869 890 870 free_device_descriptor: 891 - dma_free_coherent(&priv->pdev->dev, PAGE_SIZE, descriptor, 892 - descriptor_bus); 871 + dma_pool_free(priv->adminq_pool, descriptor, descriptor_bus); 893 872 return err; 894 873 } 895 874 ··· 917 898 .page_list_id = cpu_to_be32(qpl->id), 918 899 .num_pages = cpu_to_be32(num_entries), 919 900 .page_address_list_addr = cpu_to_be64(page_list_bus), 901 + .page_size = cpu_to_be64(PAGE_SIZE), 920 902 }; 921 903 922 904 err = gve_adminq_execute_cmd(priv, &cmd);
+2 -1
drivers/net/ethernet/google/gve/gve_adminq.h
··· 219 219 __be32 page_list_id; 220 220 __be32 num_pages; 221 221 __be64 page_address_list_addr; 222 + __be64 page_size; 222 223 }; 223 224 224 - static_assert(sizeof(struct gve_adminq_register_page_list) == 16); 225 + static_assert(sizeof(struct gve_adminq_register_page_list) == 24); 225 226 226 227 struct gve_adminq_unregister_page_list { 227 228 __be32 page_list_id;
+1 -1
drivers/net/ethernet/google/gve/gve_ethtool.c
··· 519 519 case ETHTOOL_RX_COPYBREAK: 520 520 { 521 521 u32 max_copybreak = gve_is_gqi(priv) ? 522 - (PAGE_SIZE / 2) : priv->data_buffer_size_dqo; 522 + GVE_DEFAULT_RX_BUFFER_SIZE : priv->data_buffer_size_dqo; 523 523 524 524 len = *(u32 *)value; 525 525 if (len > max_copybreak)
+2 -2
drivers/net/ethernet/google/gve/gve_main.c
··· 1328 1328 /* Hard code this for now. This may be tuned in the future for 1329 1329 * performance. 1330 1330 */ 1331 - priv->data_buffer_size_dqo = GVE_RX_BUFFER_SIZE_DQO; 1331 + priv->data_buffer_size_dqo = GVE_DEFAULT_RX_BUFFER_SIZE; 1332 1332 } 1333 1333 err = gve_create_rings(priv); 1334 1334 if (err) ··· 1664 1664 return -EOPNOTSUPP; 1665 1665 } 1666 1666 1667 - if (dev->mtu > (PAGE_SIZE / 2) - sizeof(struct ethhdr) - GVE_RX_PAD) { 1667 + if (dev->mtu > GVE_DEFAULT_RX_BUFFER_SIZE - sizeof(struct ethhdr) - GVE_RX_PAD) { 1668 1668 netdev_warn(dev, "XDP is not supported for mtu %d.\n", 1669 1669 dev->mtu); 1670 1670 return -EOPNOTSUPP;
+9
drivers/net/ethernet/google/gve/gve_register.h
··· 18 18 __be32 adminq_event_counter; 19 19 u8 reserved[3]; 20 20 u8 driver_version; 21 + __be32 adminq_base_address_hi; 22 + __be32 adminq_base_address_lo; 23 + __be16 adminq_length; 21 24 }; 22 25 23 26 enum gve_device_status_flags { 24 27 GVE_DEVICE_STATUS_RESET_MASK = BIT(1), 25 28 GVE_DEVICE_STATUS_LINK_STATUS_MASK = BIT(2), 26 29 GVE_DEVICE_STATUS_REPORT_STATS_MASK = BIT(3), 30 + GVE_DEVICE_STATUS_DEVICE_IS_RESET = BIT(4), 31 + }; 32 + 33 + enum gve_driver_status_flags { 34 + GVE_DRIVER_STATUS_RUN_MASK = BIT(0), 35 + GVE_DRIVER_STATUS_RESET_MASK = BIT(1), 27 36 }; 28 37 #endif /* _GVE_REGISTER_H_ */
+5 -12
drivers/net/ethernet/google/gve/gve_rx.c
··· 211 211 { 212 212 struct gve_rx_ring *rx = &priv->rx[idx]; 213 213 struct device *hdev = &priv->pdev->dev; 214 - u32 slots, npages; 215 214 int filled_pages; 216 215 size_t bytes; 216 + u32 slots; 217 217 int err; 218 218 219 219 netif_dbg(priv, drv, priv->dev, "allocating rx ring\n"); ··· 270 270 271 271 /* alloc rx desc ring */ 272 272 bytes = sizeof(struct gve_rx_desc) * priv->rx_desc_cnt; 273 - npages = bytes / PAGE_SIZE; 274 - if (npages * PAGE_SIZE != bytes) { 275 - err = -EIO; 276 - goto abort_with_q_resources; 277 - } 278 - 279 273 rx->desc.desc_ring = dma_alloc_coherent(hdev, bytes, &rx->desc.bus, 280 274 GFP_KERNEL); 281 275 if (!rx->desc.desc_ring) { ··· 283 289 /* Allocating half-page buffers allows page-flipping which is faster 284 290 * than copying or allocating new pages. 285 291 */ 286 - rx->packet_buffer_size = PAGE_SIZE / 2; 292 + rx->packet_buffer_size = GVE_DEFAULT_RX_BUFFER_SIZE; 287 293 gve_rx_ctx_clear(&rx->ctx); 288 294 gve_rx_add_to_block(priv, idx); 289 295 ··· 399 405 400 406 static void gve_rx_flip_buff(struct gve_rx_slot_page_info *page_info, __be64 *slot_addr) 401 407 { 402 - const __be64 offset = cpu_to_be64(PAGE_SIZE / 2); 408 + const __be64 offset = cpu_to_be64(GVE_DEFAULT_RX_BUFFER_OFFSET); 403 409 404 410 /* "flip" to other packet buffer on this page */ 405 - page_info->page_offset ^= PAGE_SIZE / 2; 411 + page_info->page_offset ^= GVE_DEFAULT_RX_BUFFER_OFFSET; 406 412 *(slot_addr) ^= offset; 407 413 } 408 414 ··· 507 513 return NULL; 508 514 509 515 gve_dec_pagecnt_bias(copy_page_info); 510 - copy_page_info->page_offset += rx->packet_buffer_size; 511 - copy_page_info->page_offset &= (PAGE_SIZE - 1); 516 + copy_page_info->page_offset ^= GVE_DEFAULT_RX_BUFFER_OFFSET; 512 517 513 518 if (copy_page_info->can_flip) { 514 519 /* We have used both halves of this copy page, it
+1 -1
drivers/net/ethernet/google/gve/gve_tx.c
··· 819 819 return 0; 820 820 } 821 821 822 - #define GVE_TX_START_THRESH PAGE_SIZE 822 + #define GVE_TX_START_THRESH 4096 823 823 824 824 static int gve_clean_tx_done(struct gve_priv *priv, struct gve_tx_ring *tx, 825 825 u32 to_do, bool try_to_wake)