Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

Merge tag 'ntb-7.0' of https://github.com/jonmason/ntb

Pull NTB (PCIe non-transparent bridge) updates from Jon Mason:
"NTB updates include debugfs improvements, correctness fixes, cleanups,
and new hardware support:

ntb_transport QP stats are converted to seq_file, a tx_memcpy_offload
module parameter is introduced with associated ordering fixes, and a
debugfs queue name truncation bug is corrected.

Additional fixes address format specifier mismatches in ntb_tool and
boundary conditions in the Switchtec driver, while unused MSI helpers
are removed and the codebase migrates to dma_map_phys().

Intel Gen6 (Diamond Rapids) NTB support is also added"

* tag 'ntb-7.0' of https://github.com/jonmason/ntb:
NTB: ntb_transport: Use seq_file for QP stats debugfs
NTB: ntb_transport: Fix too small buffer for debugfs_name
ntb/ntb_tool: correct sscanf format for u64 and size_t in tool_peer_mw_trans_write
ntb: intel: Add Intel Gen6 NTB support for DiamondRapids
NTB/msi: Remove unused functions
ntb: ntb_hw_switchtec: Increase MAX_MWS limit to 256
ntb: ntb_hw_switchtec: Fix array-index-out-of-bounds access
ntb: ntb_hw_switchtec: Fix shift-out-of-bounds for 0 mw lut
NTB: epf: allow built-in build
ntb: migrate to dma_map_phys instead of map_page
NTB: ntb_transport: Add 'tx_memcpy_offload' module option
NTB: ntb_transport: Remove unused 'retries' field from ntb_queue_entry

+192 -202
-1
drivers/ntb/hw/epf/Kconfig
··· 1 1 config NTB_EPF 2 2 tristate "Generic EPF Non-Transparent Bridge support" 3 - depends on m 4 3 help 5 4 This driver supports EPF NTB on configurable endpoint. 6 5 If unsure, say N.
+10 -4
drivers/ntb/hw/intel/ntb_hw_gen1.c
··· 763 763 return ndev_ntb_debugfs_read(filp, ubuf, count, offp); 764 764 else if (pdev_is_gen3(ndev->ntb.pdev)) 765 765 return ndev_ntb3_debugfs_read(filp, ubuf, count, offp); 766 - else if (pdev_is_gen4(ndev->ntb.pdev) || pdev_is_gen5(ndev->ntb.pdev)) 766 + else if (pdev_is_gen4(ndev->ntb.pdev) || pdev_is_gen5(ndev->ntb.pdev) || 767 + pdev_is_gen6(ndev->ntb.pdev)) 767 768 return ndev_ntb4_debugfs_read(filp, ubuf, count, offp); 768 769 769 770 return -ENXIO; ··· 1873 1872 rc = gen3_init_dev(ndev); 1874 1873 if (rc) 1875 1874 goto err_init_dev; 1876 - } else if (pdev_is_gen4(pdev) || pdev_is_gen5(pdev)) { 1875 + } else if (pdev_is_gen4(pdev) || pdev_is_gen5(pdev) || 1876 + pdev_is_gen6(pdev)) { 1877 1877 ndev->ntb.ops = &intel_ntb4_ops; 1878 1878 rc = intel_ntb_init_pci(ndev, pdev); 1879 1879 if (rc) ··· 1905 1903 err_register: 1906 1904 ndev_deinit_debugfs(ndev); 1907 1905 if (pdev_is_gen1(pdev) || pdev_is_gen3(pdev) || 1908 - pdev_is_gen4(pdev) || pdev_is_gen5(pdev)) 1906 + pdev_is_gen4(pdev) || pdev_is_gen5(pdev) || 1907 + pdev_is_gen6(pdev)) 1909 1908 xeon_deinit_dev(ndev); 1910 1909 err_init_dev: 1911 1910 intel_ntb_deinit_pci(ndev); ··· 1923 1920 ntb_unregister_device(&ndev->ntb); 1924 1921 ndev_deinit_debugfs(ndev); 1925 1922 if (pdev_is_gen1(pdev) || pdev_is_gen3(pdev) || 1926 - pdev_is_gen4(pdev) || pdev_is_gen5(pdev)) 1923 + pdev_is_gen4(pdev) || pdev_is_gen5(pdev) || 1924 + pdev_is_gen6(pdev)) 1927 1925 xeon_deinit_dev(ndev); 1928 1926 intel_ntb_deinit_pci(ndev); 1929 1927 kfree(ndev); ··· 2053 2049 {PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_NTB_B2B_ICX)}, 2054 2050 /* GEN5 PCIe */ 2055 2051 {PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_NTB_B2B_GNR)}, 2052 + /* GEN6 PCIe */ 2053 + {PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_NTB_B2B_DMR)}, 2056 2054 {0} 2057 2055 }; 2058 2056 MODULE_DEVICE_TABLE(pci, intel_ntb_pci_tbl);
+17 -5
drivers/ntb/hw/intel/ntb_hw_gen4.c
··· 46 46 .spad = GEN4_EM_SPAD_OFFSET, 47 47 }; 48 48 49 + static u64 get_ppd0(struct pci_dev *pdev) 50 + { 51 + if (pdev_is_gen4(pdev) || pdev_is_gen5(pdev)) 52 + return GEN4_PPD0_OFFSET; 53 + else if (pdev_is_gen6(pdev)) 54 + return GEN6_PPD0_OFFSET; 55 + 56 + return ULLONG_MAX; 57 + } 58 + 49 59 static int gen4_poll_link(struct intel_ntb_dev *ndev) 50 60 { 51 61 u16 reg_val; ··· 193 183 int gen4_init_dev(struct intel_ntb_dev *ndev) 194 184 { 195 185 struct pci_dev *pdev = ndev->ntb.pdev; 196 - u32 ppd1/*, ppd0*/; 186 + u32 ppd1; 197 187 u16 lnkctl; 198 188 int rc; 199 189 ··· 207 197 ppd1 = ioread32(ndev->self_mmio + GEN4_PPD1_OFFSET); 208 198 if (pdev_is_ICX(pdev)) 209 199 ndev->ntb.topo = gen4_ppd_topo(ndev, ppd1); 210 - else if (pdev_is_SPR(pdev) || pdev_is_gen5(pdev)) 200 + else if (pdev_is_SPR(pdev) || pdev_is_gen5(pdev) || pdev_is_gen6(pdev)) 211 201 ndev->ntb.topo = spr_ppd_topo(ndev, ppd1); 212 202 dev_dbg(&pdev->dev, "ppd %#x topo %s\n", ppd1, 213 203 ntb_topo_string(ndev->ntb.topo)); ··· 442 432 enum ntb_speed max_speed, enum ntb_width max_width) 443 433 { 444 434 struct intel_ntb_dev *ndev; 435 + struct pci_dev *pdev; 445 436 u32 ntb_ctl, ppd0; 446 437 u16 lnkctl; 447 438 448 439 ndev = container_of(ntb, struct intel_ntb_dev, ntb); 440 + pdev = ntb->pdev; 449 441 450 442 dev_dbg(&ntb->pdev->dev, 451 443 "Enabling link with max_speed %d max_width %d\n", ··· 488 476 iowrite16(lnkctl, ndev->self_mmio + GEN4_LINK_CTRL_OFFSET); 489 477 490 478 /* start link training in PPD0 */ 491 - ppd0 = ioread32(ndev->self_mmio + GEN4_PPD0_OFFSET); 479 + ppd0 = ioread32(ndev->self_mmio + get_ppd0(pdev)); 492 480 ppd0 |= GEN4_PPD_LINKTRN; 493 - iowrite32(ppd0, ndev->self_mmio + GEN4_PPD0_OFFSET); 481 + iowrite32(ppd0, ndev->self_mmio + get_ppd0(pdev)); 494 482 495 483 /* make sure link training has started */ 496 - ppd0 = ioread32(ndev->self_mmio + GEN4_PPD0_OFFSET); 484 + ppd0 = ioread32(ndev->self_mmio + get_ppd0(pdev)); 497 485 if (!(ppd0 & GEN4_PPD_LINKTRN)) { 498 486 dev_warn(&ntb->pdev->dev, "Link is not training\n"); 499 487 return -ENXIO;
+2
drivers/ntb/hw/intel/ntb_hw_gen4.h
··· 103 103 #define NTB_LTR_IDLE_LATSCALE 0x0800 /* 1us scale */ 104 104 #define NTB_LTR_IDLE_REQMNT 0x8000 /* snoop req enable */ 105 105 106 + #define GEN6_PPD0_OFFSET 0xf0d4 107 + 106 108 ssize_t ndev_ntb4_debugfs_read(struct file *filp, char __user *ubuf, 107 109 size_t count, loff_t *offp); 108 110 int gen4_init_dev(struct intel_ntb_dev *ndev);
+6
drivers/ntb/hw/intel/ntb_hw_intel.h
··· 71 71 #define PCI_DEVICE_ID_INTEL_NTB_B2B_SKX 0x201C 72 72 #define PCI_DEVICE_ID_INTEL_NTB_B2B_ICX 0x347e 73 73 #define PCI_DEVICE_ID_INTEL_NTB_B2B_GNR 0x0db4 74 + #define PCI_DEVICE_ID_INTEL_NTB_B2B_DMR 0x7868 74 75 75 76 /* Ntb control and link status */ 76 77 #define NTB_CTL_CFG_LOCK BIT(0) ··· 234 233 static inline int pdev_is_gen5(struct pci_dev *pdev) 235 234 { 236 235 return pdev->device == PCI_DEVICE_ID_INTEL_NTB_B2B_GNR; 236 + } 237 + 238 + static inline int pdev_is_gen6(struct pci_dev *pdev) 239 + { 240 + return pdev->device == PCI_DEVICE_ID_INTEL_NTB_B2B_DMR; 237 241 } 238 242 239 243 #endif
+11 -3
drivers/ntb/hw/mscc/ntb_hw_switchtec.c
··· 29 29 "Enable the use of the LUT based memory windows"); 30 30 31 31 #define SWITCHTEC_NTB_MAGIC 0x45CC0001 32 - #define MAX_MWS 128 32 + #define MAX_MWS 256 33 33 34 34 struct shared_mw { 35 35 u32 magic; ··· 1202 1202 sndev->mmio_self_ctrl); 1203 1203 1204 1204 sndev->nr_lut_mw = ioread16(&sndev->mmio_self_ctrl->lut_table_entries); 1205 - sndev->nr_lut_mw = rounddown_pow_of_two(sndev->nr_lut_mw); 1205 + if (sndev->nr_lut_mw) 1206 + sndev->nr_lut_mw = rounddown_pow_of_two(sndev->nr_lut_mw); 1206 1207 1207 1208 dev_dbg(&sndev->stdev->dev, "MWs: %d direct, %d lut\n", 1208 1209 sndev->nr_direct_mw, sndev->nr_lut_mw); ··· 1213 1212 1214 1213 sndev->peer_nr_lut_mw = 1215 1214 ioread16(&sndev->mmio_peer_ctrl->lut_table_entries); 1216 - sndev->peer_nr_lut_mw = rounddown_pow_of_two(sndev->peer_nr_lut_mw); 1215 + if (sndev->peer_nr_lut_mw) 1216 + sndev->peer_nr_lut_mw = rounddown_pow_of_two(sndev->peer_nr_lut_mw); 1217 1217 1218 1218 dev_dbg(&sndev->stdev->dev, "Peer MWs: %d direct, %d lut\n", 1219 1219 sndev->peer_nr_direct_mw, sndev->peer_nr_lut_mw); ··· 1315 1313 1316 1314 for (i = 0; i < sndev->nr_lut_mw; i++) { 1317 1315 int idx = sndev->nr_direct_mw + i; 1316 + 1317 + if (idx >= MAX_MWS) { 1318 + dev_err(&sndev->stdev->dev, 1319 + "Total number of MW cannot be bigger than %d", MAX_MWS); 1320 + break; 1321 + } 1318 1322 1319 1323 sndev->self_shared->mw_sizes[idx] = LUT_SIZE; 1320 1324 }
-64
drivers/ntb/msi.c
··· 315 315 } 316 316 EXPORT_SYMBOL(ntbm_msi_request_threaded_irq); 317 317 318 - static int ntbm_msi_callback_match(struct device *dev, void *res, void *data) 319 - { 320 - struct ntb_dev *ntb = dev_ntb(dev); 321 - struct ntb_msi_devres *dr = res; 322 - 323 - return dr->ntb == ntb && dr->entry == data; 324 - } 325 - 326 - /** 327 - * ntbm_msi_free_irq() - free an interrupt 328 - * @ntb: NTB device context 329 - * @irq: Interrupt line to free 330 - * @dev_id: Device identity to free 331 - * 332 - * This function should be used to manually free IRQs allocated with 333 - * ntbm_request_[threaded_]irq(). 334 - */ 335 - void ntbm_msi_free_irq(struct ntb_dev *ntb, unsigned int irq, void *dev_id) 336 - { 337 - struct msi_desc *entry = irq_get_msi_desc(irq); 338 - 339 - entry->write_msi_msg = NULL; 340 - entry->write_msi_msg_data = NULL; 341 - 342 - WARN_ON(devres_destroy(&ntb->dev, ntbm_msi_callback_release, 343 - ntbm_msi_callback_match, entry)); 344 - 345 - devm_free_irq(&ntb->dev, irq, dev_id); 346 - } 347 - EXPORT_SYMBOL(ntbm_msi_free_irq); 348 - 349 318 /** 350 319 * ntb_msi_peer_trigger() - Trigger an interrupt handler on a peer 351 320 * @ntb: NTB device context ··· 342 373 return 0; 343 374 } 344 375 EXPORT_SYMBOL(ntb_msi_peer_trigger); 345 - 346 - /** 347 - * ntb_msi_peer_addr() - Get the DMA address to trigger a peer's MSI interrupt 348 - * @ntb: NTB device context 349 - * @peer: Peer index 350 - * @desc: MSI descriptor data which triggers the interrupt 351 - * @msi_addr: Physical address to trigger the interrupt 352 - * 353 - * This function allows using DMA engines to trigger an interrupt 354 - * (for example, trigger an interrupt to process the data after 355 - * sending it). To trigger the interrupt, write @desc.data to the address 356 - * returned in @msi_addr 357 - * 358 - * Return: Zero on success, otherwise a negative error number. 359 - */ 360 - int ntb_msi_peer_addr(struct ntb_dev *ntb, int peer, 361 - struct ntb_msi_desc *desc, 362 - phys_addr_t *msi_addr) 363 - { 364 - int peer_widx = ntb_peer_mw_count(ntb) - 1 - peer; 365 - phys_addr_t mw_phys_addr; 366 - int ret; 367 - 368 - ret = ntb_peer_mw_get_addr(ntb, peer_widx, &mw_phys_addr, NULL); 369 - if (ret) 370 - return ret; 371 - 372 - if (msi_addr) 373 - *msi_addr = mw_phys_addr + desc->addr_offset; 374 - 375 - return 0; 376 - } 377 - EXPORT_SYMBOL(ntb_msi_peer_addr);
+145 -110
drivers/ntb/ntb_transport.c
··· 54 54 #include <linux/errno.h> 55 55 #include <linux/export.h> 56 56 #include <linux/interrupt.h> 57 + #include <linux/kthread.h> 57 58 #include <linux/module.h> 58 59 #include <linux/pci.h> 59 60 #include <linux/slab.h> 61 + #include <linux/seq_file.h> 60 62 #include <linux/types.h> 61 63 #include <linux/uaccess.h> 62 64 #include <linux/mutex.h> 65 + #include <linux/wait.h> 63 66 #include "linux/ntb.h" 64 67 #include "linux/ntb_transport.h" 65 68 ··· 103 100 MODULE_PARM_DESC(use_msi, "Use MSI interrupts instead of doorbells"); 104 101 #endif 105 102 103 + static bool tx_memcpy_offload; 104 + module_param(tx_memcpy_offload, bool, 0644); 105 + MODULE_PARM_DESC(tx_memcpy_offload, "Offload TX memcpy_toio() to a kernel thread"); 106 + 106 107 static struct dentry *nt_debugfs_dir; 107 108 108 109 /* Only two-ports NTB devices are supported */ ··· 120 113 void *buf; 121 114 unsigned int len; 122 115 unsigned int flags; 123 - int retries; 124 116 int errors; 125 117 unsigned int tx_index; 126 118 unsigned int rx_index; ··· 155 149 void (*tx_handler)(struct ntb_transport_qp *qp, void *qp_data, 156 150 void *data, int len); 157 151 struct list_head tx_free_q; 152 + struct list_head tx_offl_q; 158 153 spinlock_t ntb_tx_free_q_lock; 154 + spinlock_t ntb_tx_offl_q_lock; 159 155 void __iomem *tx_mw; 160 156 phys_addr_t tx_mw_phys; 161 157 size_t tx_mw_size; ··· 208 200 int msi_irq; 209 201 struct ntb_msi_desc msi_desc; 210 202 struct ntb_msi_desc peer_msi_desc; 203 + 204 + struct task_struct *tx_offload_thread; 205 + wait_queue_head_t tx_offload_wq; 211 206 }; 212 207 213 208 struct ntb_transport_mw { ··· 296 285 static void ntb_memcpy_tx(struct ntb_queue_entry *entry, void __iomem *offset); 297 286 static int ntb_async_rx_submit(struct ntb_queue_entry *entry, void *offset); 298 287 static void ntb_memcpy_rx(struct ntb_queue_entry *entry, void *offset); 288 + static int ntb_tx_memcpy_kthread(void *data); 299 289 290 + 291 + static inline bool ntb_tx_offload_enabled(struct ntb_transport_qp *qp) 292 + { 293 + return tx_memcpy_offload && qp && qp->tx_offload_thread; 294 + } 300 295 301 296 static int ntb_transport_bus_match(struct device *dev, 302 297 const struct device_driver *drv) ··· 483 466 } 484 467 EXPORT_SYMBOL_GPL(ntb_transport_unregister_client); 485 468 486 - static ssize_t debugfs_read(struct file *filp, char __user *ubuf, size_t count, 487 - loff_t *offp) 469 + static int ntb_qp_debugfs_stats_show(struct seq_file *s, void *v) 488 470 { 489 - struct ntb_transport_qp *qp; 490 - char *buf; 491 - ssize_t ret, out_offset, out_count; 492 - 493 - qp = filp->private_data; 471 + struct ntb_transport_qp *qp = s->private; 494 472 495 473 if (!qp || !qp->link_is_up) 496 474 return 0; 497 475 498 - out_count = 1000; 476 + seq_puts(s, "\nNTB QP stats:\n\n"); 499 477 500 - buf = kmalloc(out_count, GFP_KERNEL); 501 - if (!buf) 502 - return -ENOMEM; 478 + seq_printf(s, "rx_bytes - \t%llu\n", qp->rx_bytes); 479 + seq_printf(s, "rx_pkts - \t%llu\n", qp->rx_pkts); 480 + seq_printf(s, "rx_memcpy - \t%llu\n", qp->rx_memcpy); 481 + seq_printf(s, "rx_async - \t%llu\n", qp->rx_async); 482 + seq_printf(s, "rx_ring_empty - %llu\n", qp->rx_ring_empty); 483 + seq_printf(s, "rx_err_no_buf - %llu\n", qp->rx_err_no_buf); 484 + seq_printf(s, "rx_err_oflow - \t%llu\n", qp->rx_err_oflow); 485 + seq_printf(s, "rx_err_ver - \t%llu\n", qp->rx_err_ver); 486 + seq_printf(s, "rx_buff - \t0x%p\n", qp->rx_buff); 487 + seq_printf(s, "rx_index - \t%u\n", qp->rx_index); 488 + seq_printf(s, "rx_max_entry - \t%u\n", qp->rx_max_entry); 489 + seq_printf(s, "rx_alloc_entry - \t%u\n\n", qp->rx_alloc_entry); 503 490 504 - out_offset = 0; 505 - out_offset += scnprintf(buf + out_offset, out_count - out_offset, 506 - "\nNTB QP stats:\n\n"); 507 - out_offset += scnprintf(buf + out_offset, out_count - out_offset, 508 - "rx_bytes - \t%llu\n", qp->rx_bytes); 509 - out_offset += scnprintf(buf + out_offset, out_count - out_offset, 510 - "rx_pkts - \t%llu\n", qp->rx_pkts); 511 - out_offset += scnprintf(buf + out_offset, out_count - out_offset, 512 - "rx_memcpy - \t%llu\n", qp->rx_memcpy); 513 - out_offset += scnprintf(buf + out_offset, out_count - out_offset, 514 - "rx_async - \t%llu\n", qp->rx_async); 515 - out_offset += scnprintf(buf + out_offset, out_count - out_offset, 516 - "rx_ring_empty - %llu\n", qp->rx_ring_empty); 517 - out_offset += scnprintf(buf + out_offset, out_count - out_offset, 518 - "rx_err_no_buf - %llu\n", qp->rx_err_no_buf); 519 - out_offset += scnprintf(buf + out_offset, out_count - out_offset, 520 - "rx_err_oflow - \t%llu\n", qp->rx_err_oflow); 521 - out_offset += scnprintf(buf + out_offset, out_count - out_offset, 522 - "rx_err_ver - \t%llu\n", qp->rx_err_ver); 523 - out_offset += scnprintf(buf + out_offset, out_count - out_offset, 524 - "rx_buff - \t0x%p\n", qp->rx_buff); 525 - out_offset += scnprintf(buf + out_offset, out_count - out_offset, 526 - "rx_index - \t%u\n", qp->rx_index); 527 - out_offset += scnprintf(buf + out_offset, out_count - out_offset, 528 - "rx_max_entry - \t%u\n", qp->rx_max_entry); 529 - out_offset += scnprintf(buf + out_offset, out_count - out_offset, 530 - "rx_alloc_entry - \t%u\n\n", qp->rx_alloc_entry); 491 + seq_printf(s, "tx_bytes - \t%llu\n", qp->tx_bytes); 492 + seq_printf(s, "tx_pkts - \t%llu\n", qp->tx_pkts); 493 + seq_printf(s, "tx_memcpy - \t%llu\n", qp->tx_memcpy); 494 + seq_printf(s, "tx_async - \t%llu\n", qp->tx_async); 495 + seq_printf(s, "tx_ring_full - \t%llu\n", qp->tx_ring_full); 496 + seq_printf(s, "tx_err_no_buf - %llu\n", qp->tx_err_no_buf); 497 + seq_printf(s, "tx_mw - \t0x%p\n", qp->tx_mw); 498 + seq_printf(s, "tx_index (H) - \t%u\n", qp->tx_index); 499 + seq_printf(s, "RRI (T) - \t%u\n", qp->remote_rx_info->entry); 500 + seq_printf(s, "tx_max_entry - \t%u\n", qp->tx_max_entry); 501 + seq_printf(s, "free tx - \t%u\n", ntb_transport_tx_free_entry(qp)); 502 + seq_putc(s, '\n'); 531 503 532 - out_offset += scnprintf(buf + out_offset, out_count - out_offset, 533 - "tx_bytes - \t%llu\n", qp->tx_bytes); 534 - out_offset += scnprintf(buf + out_offset, out_count - out_offset, 535 - "tx_pkts - \t%llu\n", qp->tx_pkts); 536 - out_offset += scnprintf(buf + out_offset, out_count - out_offset, 537 - "tx_memcpy - \t%llu\n", qp->tx_memcpy); 538 - out_offset += scnprintf(buf + out_offset, out_count - out_offset, 539 - "tx_async - \t%llu\n", qp->tx_async); 540 - out_offset += scnprintf(buf + out_offset, out_count - out_offset, 541 - "tx_ring_full - \t%llu\n", qp->tx_ring_full); 542 - out_offset += scnprintf(buf + out_offset, out_count - out_offset, 543 - "tx_err_no_buf - %llu\n", qp->tx_err_no_buf); 544 - out_offset += scnprintf(buf + out_offset, out_count - out_offset, 545 - "tx_mw - \t0x%p\n", qp->tx_mw); 546 - out_offset += scnprintf(buf + out_offset, out_count - out_offset, 547 - "tx_index (H) - \t%u\n", qp->tx_index); 548 - out_offset += scnprintf(buf + out_offset, out_count - out_offset, 549 - "RRI (T) - \t%u\n", 550 - qp->remote_rx_info->entry); 551 - out_offset += scnprintf(buf + out_offset, out_count - out_offset, 552 - "tx_max_entry - \t%u\n", qp->tx_max_entry); 553 - out_offset += scnprintf(buf + out_offset, out_count - out_offset, 554 - "free tx - \t%u\n", 555 - ntb_transport_tx_free_entry(qp)); 504 + seq_printf(s, "Using TX DMA - \t%s\n", qp->tx_dma_chan ? "Yes" : "No"); 505 + seq_printf(s, "Using RX DMA - \t%s\n", qp->rx_dma_chan ? "Yes" : "No"); 506 + seq_printf(s, "QP Link - \t%s\n", qp->link_is_up ? "Up" : "Down"); 507 + seq_putc(s, '\n'); 556 508 557 - out_offset += scnprintf(buf + out_offset, out_count - out_offset, 558 - "\n"); 559 - out_offset += scnprintf(buf + out_offset, out_count - out_offset, 560 - "Using TX DMA - \t%s\n", 561 - qp->tx_dma_chan ? "Yes" : "No"); 562 - out_offset += scnprintf(buf + out_offset, out_count - out_offset, 563 - "Using RX DMA - \t%s\n", 564 - qp->rx_dma_chan ? "Yes" : "No"); 565 - out_offset += scnprintf(buf + out_offset, out_count - out_offset, 566 - "QP Link - \t%s\n", 567 - qp->link_is_up ? "Up" : "Down"); 568 - out_offset += scnprintf(buf + out_offset, out_count - out_offset, 569 - "\n"); 570 - 571 - if (out_offset > out_count) 572 - out_offset = out_count; 573 - 574 - ret = simple_read_from_buffer(ubuf, count, offp, buf, out_offset); 575 - kfree(buf); 576 - return ret; 509 + return 0; 577 510 } 578 - 579 - static const struct file_operations ntb_qp_debugfs_stats = { 580 - .owner = THIS_MODULE, 581 - .open = simple_open, 582 - .read = debugfs_read, 583 - }; 511 + DEFINE_SHOW_ATTRIBUTE(ntb_qp_debugfs_stats); 584 512 585 513 static void ntb_list_add(spinlock_t *lock, struct list_head *entry, 586 514 struct list_head *list) ··· 1198 1236 qp->tx_max_entry = tx_size / qp->tx_max_frame; 1199 1237 1200 1238 if (nt->debugfs_node_dir) { 1201 - char debugfs_name[4]; 1239 + char debugfs_name[8]; 1202 1240 1203 - snprintf(debugfs_name, 4, "qp%d", qp_num); 1241 + snprintf(debugfs_name, sizeof(debugfs_name), "qp%d", qp_num); 1204 1242 qp->debugfs_dir = debugfs_create_dir(debugfs_name, 1205 1243 nt->debugfs_node_dir); 1206 1244 1207 1245 qp->debugfs_stats = debugfs_create_file("stats", S_IRUSR, 1208 1246 qp->debugfs_dir, qp, 1209 - &ntb_qp_debugfs_stats); 1247 + &ntb_qp_debugfs_stats_fops); 1210 1248 } else { 1211 1249 qp->debugfs_dir = NULL; 1212 1250 qp->debugfs_stats = NULL; ··· 1217 1255 1218 1256 spin_lock_init(&qp->ntb_rx_q_lock); 1219 1257 spin_lock_init(&qp->ntb_tx_free_q_lock); 1258 + spin_lock_init(&qp->ntb_tx_offl_q_lock); 1220 1259 1221 1260 INIT_LIST_HEAD(&qp->rx_post_q); 1222 1261 INIT_LIST_HEAD(&qp->rx_pend_q); 1223 1262 INIT_LIST_HEAD(&qp->rx_free_q); 1224 1263 INIT_LIST_HEAD(&qp->tx_free_q); 1264 + INIT_LIST_HEAD(&qp->tx_offl_q); 1225 1265 1226 1266 tasklet_init(&qp->rxc_db_work, ntb_transport_rxc_db, 1227 1267 (unsigned long)qp); ··· 1535 1571 goto err; 1536 1572 1537 1573 unmap->len = len; 1538 - unmap->addr[0] = dma_map_page(device->dev, virt_to_page(offset), 1539 - pay_off, len, DMA_TO_DEVICE); 1574 + unmap->addr[0] = dma_map_phys(device->dev, virt_to_phys(offset), 1575 + len, DMA_TO_DEVICE, 0); 1540 1576 if (dma_mapping_error(device->dev, unmap->addr[0])) 1541 1577 goto err_get_unmap; 1542 1578 1543 1579 unmap->to_cnt = 1; 1544 1580 1545 - unmap->addr[1] = dma_map_page(device->dev, virt_to_page(buf), 1546 - buff_off, len, DMA_FROM_DEVICE); 1581 + unmap->addr[1] = dma_map_phys(device->dev, virt_to_phys(buf), 1582 + len, DMA_FROM_DEVICE, 0); 1547 1583 if (dma_mapping_error(device->dev, unmap->addr[1])) 1548 1584 goto err_get_unmap; 1549 1585 ··· 1595 1631 if (res < 0) 1596 1632 goto err; 1597 1633 1598 - if (!entry->retries) 1599 - qp->rx_async++; 1600 - 1634 + qp->rx_async++; 1601 1635 return; 1602 1636 1603 1637 err: ··· 1750 1788 1751 1789 iowrite32(entry->flags | DESC_DONE_FLAG, &hdr->flags); 1752 1790 1791 + /* 1792 + * Make DONE flag visible before DB/MSI. WC + posted MWr may reorder 1793 + * across iATU/bridge (platform-dependent). Order and flush here. 1794 + */ 1795 + dma_mb(); 1796 + ioread32(&hdr->flags); 1797 + 1753 1798 if (qp->use_msi) 1754 1799 ntb_msi_peer_trigger(qp->ndev, PIDX, &qp->peer_msi_desc); 1755 1800 else ··· 1777 1808 ntb_list_add(&qp->ntb_tx_free_q_lock, &entry->entry, &qp->tx_free_q); 1778 1809 } 1779 1810 1780 - static void ntb_memcpy_tx(struct ntb_queue_entry *entry, void __iomem *offset) 1811 + static void ntb_memcpy_tx_on_stack(struct ntb_queue_entry *entry, void __iomem *offset) 1781 1812 { 1782 1813 #ifdef ARCH_HAS_NOCACHE_UACCESS 1783 1814 /* ··· 1793 1824 wmb(); 1794 1825 1795 1826 ntb_tx_copy_callback(entry, NULL); 1827 + } 1828 + 1829 + static int ntb_tx_memcpy_kthread(void *data) 1830 + { 1831 + struct ntb_transport_qp *qp = data; 1832 + struct ntb_queue_entry *entry, *tmp; 1833 + const int resched_nr = 64; 1834 + LIST_HEAD(local_list); 1835 + void __iomem *offset; 1836 + int processed = 0; 1837 + 1838 + while (!kthread_should_stop()) { 1839 + spin_lock_irq(&qp->ntb_tx_offl_q_lock); 1840 + wait_event_interruptible_lock_irq_timeout(qp->tx_offload_wq, 1841 + kthread_should_stop() || 1842 + !list_empty(&qp->tx_offl_q), 1843 + qp->ntb_tx_offl_q_lock, 5*HZ); 1844 + list_splice_tail_init(&qp->tx_offl_q, &local_list); 1845 + spin_unlock_irq(&qp->ntb_tx_offl_q_lock); 1846 + 1847 + list_for_each_entry_safe(entry, tmp, &local_list, entry) { 1848 + list_del(&entry->entry); 1849 + offset = qp->tx_mw + qp->tx_max_frame * entry->tx_index; 1850 + ntb_memcpy_tx_on_stack(entry, offset); 1851 + if (++processed >= resched_nr) { 1852 + cond_resched(); 1853 + processed = 0; 1854 + } 1855 + } 1856 + cond_resched(); 1857 + } 1858 + 1859 + return 0; 1860 + } 1861 + 1862 + static void ntb_memcpy_tx(struct ntb_queue_entry *entry, void __iomem *offset) 1863 + { 1864 + struct ntb_transport_qp *qp = entry->qp; 1865 + 1866 + if (WARN_ON_ONCE(!qp)) 1867 + return; 1868 + 1869 + if (ntb_tx_offload_enabled(qp)) { 1870 + ntb_list_add(&qp->ntb_tx_offl_q_lock, &entry->entry, 1871 + &qp->tx_offl_q); 1872 + wake_up(&qp->tx_offload_wq); 1873 + } else 1874 + ntb_memcpy_tx_on_stack(entry, offset); 1796 1875 } 1797 1876 1798 1877 static int ntb_async_tx_submit(struct ntb_transport_qp *qp, ··· 1869 1852 goto err; 1870 1853 1871 1854 unmap->len = len; 1872 - unmap->addr[0] = dma_map_page(device->dev, virt_to_page(buf), 1873 - buff_off, len, DMA_TO_DEVICE); 1855 + unmap->addr[0] = dma_map_phys(device->dev, virt_to_phys(buf), 1856 + len, DMA_TO_DEVICE, 0); 1874 1857 if (dma_mapping_error(device->dev, unmap->addr[0])) 1875 1858 goto err_get_unmap; 1876 1859 ··· 1915 1898 hdr = offset + qp->tx_max_frame - sizeof(struct ntb_payload_header); 1916 1899 entry->tx_hdr = hdr; 1917 1900 1901 + WARN_ON_ONCE(!ntb_transport_tx_free_entry(qp)); 1902 + WRITE_ONCE(qp->tx_index, (qp->tx_index + 1) % qp->tx_max_entry); 1903 + 1918 1904 iowrite32(entry->len, &hdr->len); 1919 1905 iowrite32((u32)qp->tx_pkts, &hdr->ver); 1920 1906 ··· 1931 1911 if (res < 0) 1932 1912 goto err; 1933 1913 1934 - if (!entry->retries) 1935 - qp->tx_async++; 1936 - 1914 + qp->tx_async++; 1937 1915 return; 1938 1916 1939 1917 err: ··· 1957 1939 } 1958 1940 1959 1941 ntb_async_tx(qp, entry); 1960 - 1961 - qp->tx_index++; 1962 - qp->tx_index %= qp->tx_max_entry; 1963 1942 1964 1943 qp->tx_pkts++; 1965 1944 ··· 2053 2038 qp->rx_handler = handlers->rx_handler; 2054 2039 qp->tx_handler = handlers->tx_handler; 2055 2040 qp->event_handler = handlers->event_handler; 2041 + 2042 + init_waitqueue_head(&qp->tx_offload_wq); 2043 + if (tx_memcpy_offload) { 2044 + qp->tx_offload_thread = kthread_run(ntb_tx_memcpy_kthread, qp, 2045 + "ntb-txcpy/%s/%u", 2046 + pci_name(ndev->pdev), qp->qp_num); 2047 + if (IS_ERR(qp->tx_offload_thread)) { 2048 + dev_warn(&nt->ndev->dev, 2049 + "tx memcpy offload thread creation failed: %ld; falling back to inline copy\n", 2050 + PTR_ERR(qp->tx_offload_thread)); 2051 + qp->tx_offload_thread = NULL; 2052 + } 2053 + } else 2054 + qp->tx_offload_thread = NULL; 2056 2055 2057 2056 dma_cap_zero(dma_mask); 2058 2057 dma_cap_set(DMA_MEMCPY, dma_mask); ··· 2175 2146 2176 2147 qp->active = false; 2177 2148 2149 + if (qp->tx_offload_thread) { 2150 + kthread_stop(qp->tx_offload_thread); 2151 + qp->tx_offload_thread = NULL; 2152 + } 2153 + 2178 2154 if (qp->tx_dma_chan) { 2179 2155 struct dma_chan *chan = qp->tx_dma_chan; 2180 2156 /* Putting the dma_chan to NULL will force any new traffic to be ··· 2241 2207 } 2242 2208 2243 2209 while ((entry = ntb_list_rm(&qp->ntb_tx_free_q_lock, &qp->tx_free_q))) 2210 + kfree(entry); 2211 + 2212 + while ((entry = ntb_list_rm(&qp->ntb_tx_offl_q_lock, &qp->tx_offl_q))) 2244 2213 kfree(entry); 2245 2214 2246 2215 qp->transport->qp_bitmap_free |= qp_bit; ··· 2311 2274 entry->buf = data; 2312 2275 entry->len = len; 2313 2276 entry->flags = 0; 2314 - entry->retries = 0; 2315 2277 entry->errors = 0; 2316 2278 entry->rx_index = 0; 2317 2279 ··· 2360 2324 entry->len = len; 2361 2325 entry->flags = 0; 2362 2326 entry->errors = 0; 2363 - entry->retries = 0; 2364 2327 entry->tx_index = 0; 2365 2328 2366 2329 rc = ntb_process_tx(qp, entry);
+1 -1
drivers/ntb/test/ntb_tool.c
··· 936 936 937 937 buf[buf_size] = '\0'; 938 938 939 - n = sscanf(buf, "%lli:%zi", &addr, &wsize); 939 + n = sscanf(buf, "%llu:%zu", &addr, &wsize); 940 940 if (n != 2) 941 941 return -EINVAL; 942 942
-14
include/linux/ntb.h
··· 1647 1647 irq_handler_t thread_fn, 1648 1648 const char *name, void *dev_id, 1649 1649 struct ntb_msi_desc *msi_desc); 1650 - void ntbm_msi_free_irq(struct ntb_dev *ntb, unsigned int irq, void *dev_id); 1651 1650 int ntb_msi_peer_trigger(struct ntb_dev *ntb, int peer, 1652 1651 struct ntb_msi_desc *desc); 1653 - int ntb_msi_peer_addr(struct ntb_dev *ntb, int peer, 1654 - struct ntb_msi_desc *desc, 1655 - phys_addr_t *msi_addr); 1656 1652 1657 1653 #else /* not CONFIG_NTB_MSI */ 1658 1654 ··· 1670 1674 { 1671 1675 return -EOPNOTSUPP; 1672 1676 } 1673 - static inline void ntbm_msi_free_irq(struct ntb_dev *ntb, unsigned int irq, 1674 - void *dev_id) {} 1675 1677 static inline int ntb_msi_peer_trigger(struct ntb_dev *ntb, int peer, 1676 1678 struct ntb_msi_desc *desc) 1677 1679 { 1678 1680 return -EOPNOTSUPP; 1679 1681 } 1680 - static inline int ntb_msi_peer_addr(struct ntb_dev *ntb, int peer, 1681 - struct ntb_msi_desc *desc, 1682 - phys_addr_t *msi_addr) 1683 - { 1684 - return -EOPNOTSUPP; 1685 - 1686 - } 1687 - 1688 1682 #endif /* CONFIG_NTB_MSI */ 1689 1683 1690 1684 static inline int ntbm_msi_request_irq(struct ntb_dev *ntb,