Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

Merge patch series "can: m_can: Optimizations for m_can/tcan part 2"

Markus Schneider-Pargmann <msp@baylibre.com> says:

The series implements many small and bigger throughput improvements and
adds rx/tx coalescing at the end.

Changes in v7:
- Rebased to v6.8-rc1
- Fixed NULL pointer dereference in m_can_clean() on am62 that happened
when doing ip link up, ip link down, ip link up
- Fixed a racecondition on am62 observed with high throughput tests.
netdev_completed_queue() was called before netdev_sent_queue() as the
interrupt was processed so fast. netdev_sent_queue() is now reported
before the actual sent is done.
- Fixed an initializing issue on am62 where active interrupts are
getting lost between runs. Fixed by resetting cdev->active_interrupts
in m_can_disable_all_interrupts()
- Removed m_can_start_fast_xmit() because of a reordering of operations
due to above mentioned race condition

Changes in v6:
- Rebased to v6.6-rc2
- Added two small changes for the newly integrated polling feature
- Reuse the polling hrtimer for coalescing as the timer used for
coalescing has a similar purpose as the one for polling. Also polling
and coalescing will never be active at the same time.

Changes in v5:
- Add back parenthesis in m_can_set_coalesce(). This will make
checkpatch unhappy but gcc happy.
- Remove unused fifo_header variable in m_can_tx_handler().
- Rebased to v6.5-rc1

Changes in v4:
- Create and use struct m_can_fifo_element in m_can_tx_handler
- Fix memcpy_and_pad to copy the full buffer
- Fixed a few checkpatch warnings
- Change putidx to be unsigned
- Print hard_xmit error only once when TX FIFO is full

Changes in v3:
- Remove parenthesis in error messages
- Use memcpy_and_pad for buffer copy in 'can: m_can: Write transmit
header and data in one transaction'.
- Replace spin_lock with spin_lock_irqsave. I got a report of a
interrupt that was calling start_xmit just after the netqueue was
woken up before the locked region was exited. spin_lock_irqsave should
fix this. I attached the full stack at the end of the mail if someone
wants to know.
- Rebased to v6.3-rc1.
- Removed tcan4x5x patches from this series.

Changes in v2:
- Rebased on v6.2-rc5
- Fixed missing/broken accounting for non peripheral m_can devices.

previous versions:
v1 - https://lore.kernel.org/lkml/20221221152537.751564-1-msp@baylibre.com
v2 - https://lore.kernel.org/lkml/20230125195059.630377-1-msp@baylibre.com
v3 - https://lore.kernel.org/lkml/20230315110546.2518305-1-msp@baylibre.com
v4 - https://lore.kernel.org/lkml/20230621092350.3130866-1-msp@baylibre.com
v5 - https://lore.kernel.org/lkml/20230718075708.958094-1-msp@baylibre.com
v6 - https://lore.kernel.org/lkml/20230929141304.3934380-1-msp@baylibre.com

Link: https://lore.kernel.org/all/20240207093220.2681425-1-msp@baylibre.com
Signed-off-by: Marc Kleine-Budde <mkl@pengutronix.de>

+439 -150
+407 -144
drivers/net/can/m_can/m_can.c
··· 255 255 #define TXESC_TBDS_64B 0x7 256 256 257 257 /* Tx Event FIFO Configuration (TXEFC) */ 258 + #define TXEFC_EFWM_MASK GENMASK(29, 24) 258 259 #define TXEFC_EFS_MASK GENMASK(21, 16) 259 260 260 261 /* Tx Event FIFO Status (TXEFS) */ ··· 321 320 u32 dlc; 322 321 }; 323 322 323 + struct m_can_fifo_element { 324 + u32 id; 325 + u32 dlc; 326 + u8 data[CANFD_MAX_DLEN]; 327 + }; 328 + 324 329 static inline u32 m_can_read(struct m_can_classdev *cdev, enum m_can_reg reg) 325 330 { 326 331 return cdev->ops->read_reg(cdev, reg); ··· 379 372 return cdev->ops->read_fifo(cdev, addr_offset, val, 1); 380 373 } 381 374 382 - static inline bool _m_can_tx_fifo_full(u32 txfqs) 383 - { 384 - return !!(txfqs & TXFQS_TFQF); 385 - } 386 - 387 - static inline bool m_can_tx_fifo_full(struct m_can_classdev *cdev) 388 - { 389 - return _m_can_tx_fifo_full(m_can_read(cdev, M_CAN_TXFQS)); 390 - } 391 - 392 375 static void m_can_config_endisable(struct m_can_classdev *cdev, bool enable) 393 376 { 394 377 u32 cccr = m_can_read(cdev, M_CAN_CCCR); ··· 413 416 } 414 417 } 415 418 419 + static void m_can_interrupt_enable(struct m_can_classdev *cdev, u32 interrupts) 420 + { 421 + if (cdev->active_interrupts == interrupts) 422 + return; 423 + cdev->ops->write_reg(cdev, M_CAN_IE, interrupts); 424 + cdev->active_interrupts = interrupts; 425 + } 426 + 427 + static void m_can_coalescing_disable(struct m_can_classdev *cdev) 428 + { 429 + u32 new_interrupts = cdev->active_interrupts | IR_RF0N | IR_TEFN; 430 + 431 + if (!cdev->net->irq) 432 + return; 433 + 434 + hrtimer_cancel(&cdev->hrtimer); 435 + m_can_interrupt_enable(cdev, new_interrupts); 436 + } 437 + 416 438 static inline void m_can_enable_all_interrupts(struct m_can_classdev *cdev) 417 439 { 440 + if (!cdev->net->irq) { 441 + dev_dbg(cdev->dev, "Start hrtimer\n"); 442 + hrtimer_start(&cdev->hrtimer, 443 + ms_to_ktime(HRTIMER_POLL_INTERVAL_MS), 444 + HRTIMER_MODE_REL_PINNED); 445 + } 446 + 418 447 /* Only interrupt line 0 is used in this driver */ 419 448 m_can_write(cdev, M_CAN_ILE, ILE_EINT0); 420 449 } 421 450 422 451 static inline void m_can_disable_all_interrupts(struct m_can_classdev *cdev) 423 452 { 453 + m_can_coalescing_disable(cdev); 424 454 m_can_write(cdev, M_CAN_ILE, 0x0); 455 + cdev->active_interrupts = 0x0; 456 + 457 + if (!cdev->net->irq) { 458 + dev_dbg(cdev->dev, "Stop hrtimer\n"); 459 + hrtimer_cancel(&cdev->hrtimer); 460 + } 425 461 } 426 462 427 463 /* Retrieve internal timestamp counter from TSCV.TSC, and shift it to 32-bit ··· 474 444 static void m_can_clean(struct net_device *net) 475 445 { 476 446 struct m_can_classdev *cdev = netdev_priv(net); 447 + unsigned long irqflags; 477 448 478 - if (cdev->tx_skb) { 479 - int putidx = 0; 449 + if (cdev->tx_ops) { 450 + for (int i = 0; i != cdev->tx_fifo_size; ++i) { 451 + if (!cdev->tx_ops[i].skb) 452 + continue; 480 453 481 - net->stats.tx_errors++; 482 - if (cdev->version > 30) 483 - putidx = FIELD_GET(TXFQS_TFQPI_MASK, 484 - m_can_read(cdev, M_CAN_TXFQS)); 485 - 486 - can_free_echo_skb(cdev->net, putidx, NULL); 487 - cdev->tx_skb = NULL; 454 + net->stats.tx_errors++; 455 + cdev->tx_ops[i].skb = NULL; 456 + } 488 457 } 458 + 459 + for (int i = 0; i != cdev->can.echo_skb_max; ++i) 460 + can_free_echo_skb(cdev->net, i, NULL); 461 + 462 + netdev_reset_queue(cdev->net); 463 + 464 + spin_lock_irqsave(&cdev->tx_handling_spinlock, irqflags); 465 + cdev->tx_fifo_in_flight = 0; 466 + spin_unlock_irqrestore(&cdev->tx_handling_spinlock, irqflags); 489 467 } 490 468 491 469 /* For peripherals, pass skb to rx-offload, which will push skb from ··· 1045 1007 * echo. timestamp is used for peripherals to ensure correct ordering 1046 1008 * by rx-offload, and is ignored for non-peripherals. 1047 1009 */ 1048 - static void m_can_tx_update_stats(struct m_can_classdev *cdev, 1049 - unsigned int msg_mark, 1050 - u32 timestamp) 1010 + static unsigned int m_can_tx_update_stats(struct m_can_classdev *cdev, 1011 + unsigned int msg_mark, u32 timestamp) 1051 1012 { 1052 1013 struct net_device *dev = cdev->net; 1053 1014 struct net_device_stats *stats = &dev->stats; 1015 + unsigned int frame_len; 1054 1016 1055 1017 if (cdev->is_peripheral) 1056 1018 stats->tx_bytes += 1057 1019 can_rx_offload_get_echo_skb_queue_timestamp(&cdev->offload, 1058 1020 msg_mark, 1059 1021 timestamp, 1060 - NULL); 1022 + &frame_len); 1061 1023 else 1062 - stats->tx_bytes += can_get_echo_skb(dev, msg_mark, NULL); 1024 + stats->tx_bytes += can_get_echo_skb(dev, msg_mark, &frame_len); 1063 1025 1064 1026 stats->tx_packets++; 1027 + 1028 + return frame_len; 1029 + } 1030 + 1031 + static void m_can_finish_tx(struct m_can_classdev *cdev, int transmitted, 1032 + unsigned int transmitted_frame_len) 1033 + { 1034 + unsigned long irqflags; 1035 + 1036 + netdev_completed_queue(cdev->net, transmitted, transmitted_frame_len); 1037 + 1038 + spin_lock_irqsave(&cdev->tx_handling_spinlock, irqflags); 1039 + if (cdev->tx_fifo_in_flight >= cdev->tx_fifo_size && transmitted > 0) 1040 + netif_wake_queue(cdev->net); 1041 + cdev->tx_fifo_in_flight -= transmitted; 1042 + spin_unlock_irqrestore(&cdev->tx_handling_spinlock, irqflags); 1043 + } 1044 + 1045 + static netdev_tx_t m_can_start_tx(struct m_can_classdev *cdev) 1046 + { 1047 + unsigned long irqflags; 1048 + int tx_fifo_in_flight; 1049 + 1050 + spin_lock_irqsave(&cdev->tx_handling_spinlock, irqflags); 1051 + tx_fifo_in_flight = cdev->tx_fifo_in_flight + 1; 1052 + if (tx_fifo_in_flight >= cdev->tx_fifo_size) { 1053 + netif_stop_queue(cdev->net); 1054 + if (tx_fifo_in_flight > cdev->tx_fifo_size) { 1055 + netdev_err_once(cdev->net, "hard_xmit called while TX FIFO full\n"); 1056 + spin_unlock_irqrestore(&cdev->tx_handling_spinlock, irqflags); 1057 + return NETDEV_TX_BUSY; 1058 + } 1059 + } 1060 + cdev->tx_fifo_in_flight = tx_fifo_in_flight; 1061 + spin_unlock_irqrestore(&cdev->tx_handling_spinlock, irqflags); 1062 + 1063 + return NETDEV_TX_OK; 1065 1064 } 1066 1065 1067 1066 static int m_can_echo_tx_event(struct net_device *dev) ··· 1110 1035 int i = 0; 1111 1036 int err = 0; 1112 1037 unsigned int msg_mark; 1038 + int processed = 0; 1039 + unsigned int processed_frame_len = 0; 1113 1040 1114 1041 struct m_can_classdev *cdev = netdev_priv(dev); 1115 1042 ··· 1140 1063 fgi = (++fgi >= cdev->mcfg[MRAM_TXE].num ? 0 : fgi); 1141 1064 1142 1065 /* update stats */ 1143 - m_can_tx_update_stats(cdev, msg_mark, timestamp); 1066 + processed_frame_len += m_can_tx_update_stats(cdev, msg_mark, 1067 + timestamp); 1068 + 1069 + ++processed; 1144 1070 } 1145 1071 1146 1072 if (ack_fgi != -1) 1147 1073 m_can_write(cdev, M_CAN_TXEFA, FIELD_PREP(TXEFA_EFAI_MASK, 1148 1074 ack_fgi)); 1149 1075 1076 + m_can_finish_tx(cdev, processed, processed_frame_len); 1077 + 1150 1078 return err; 1079 + } 1080 + 1081 + static void m_can_coalescing_update(struct m_can_classdev *cdev, u32 ir) 1082 + { 1083 + u32 new_interrupts = cdev->active_interrupts; 1084 + bool enable_rx_timer = false; 1085 + bool enable_tx_timer = false; 1086 + 1087 + if (!cdev->net->irq) 1088 + return; 1089 + 1090 + if (cdev->rx_coalesce_usecs_irq > 0 && (ir & (IR_RF0N | IR_RF0W))) { 1091 + enable_rx_timer = true; 1092 + new_interrupts &= ~IR_RF0N; 1093 + } 1094 + if (cdev->tx_coalesce_usecs_irq > 0 && (ir & (IR_TEFN | IR_TEFW))) { 1095 + enable_tx_timer = true; 1096 + new_interrupts &= ~IR_TEFN; 1097 + } 1098 + if (!enable_rx_timer && !hrtimer_active(&cdev->hrtimer)) 1099 + new_interrupts |= IR_RF0N; 1100 + if (!enable_tx_timer && !hrtimer_active(&cdev->hrtimer)) 1101 + new_interrupts |= IR_TEFN; 1102 + 1103 + m_can_interrupt_enable(cdev, new_interrupts); 1104 + if (enable_rx_timer | enable_tx_timer) 1105 + hrtimer_start(&cdev->hrtimer, cdev->irq_timer_wait, 1106 + HRTIMER_MODE_REL); 1151 1107 } 1152 1108 1153 1109 static irqreturn_t m_can_isr(int irq, void *dev_id) ··· 1189 1079 struct m_can_classdev *cdev = netdev_priv(dev); 1190 1080 u32 ir; 1191 1081 1192 - if (pm_runtime_suspended(cdev->dev)) 1082 + if (pm_runtime_suspended(cdev->dev)) { 1083 + m_can_coalescing_disable(cdev); 1193 1084 return IRQ_NONE; 1085 + } 1086 + 1194 1087 ir = m_can_read(cdev, M_CAN_IR); 1088 + m_can_coalescing_update(cdev, ir); 1195 1089 if (!ir) 1196 1090 return IRQ_NONE; 1197 1091 ··· 1210 1096 * - state change IRQ 1211 1097 * - bus error IRQ and bus error reporting 1212 1098 */ 1213 - if ((ir & IR_RF0N) || (ir & IR_ERR_ALL_30X)) { 1099 + if (ir & (IR_RF0N | IR_RF0W | IR_ERR_ALL_30X)) { 1214 1100 cdev->irqstatus = ir; 1215 1101 if (!cdev->is_peripheral) { 1216 1102 m_can_disable_all_interrupts(cdev); 1217 1103 napi_schedule(&cdev->napi); 1218 - } else if (m_can_rx_peripheral(dev, ir) < 0) { 1219 - goto out_fail; 1104 + } else { 1105 + int pkts; 1106 + 1107 + pkts = m_can_rx_peripheral(dev, ir); 1108 + if (pkts < 0) 1109 + goto out_fail; 1220 1110 } 1221 1111 } 1222 1112 ··· 1228 1110 if (ir & IR_TC) { 1229 1111 /* Transmission Complete Interrupt*/ 1230 1112 u32 timestamp = 0; 1113 + unsigned int frame_len; 1231 1114 1232 1115 if (cdev->is_peripheral) 1233 1116 timestamp = m_can_get_timestamp(cdev); 1234 - m_can_tx_update_stats(cdev, 0, timestamp); 1235 - netif_wake_queue(dev); 1117 + frame_len = m_can_tx_update_stats(cdev, 0, timestamp); 1118 + m_can_finish_tx(cdev, 1, frame_len); 1236 1119 } 1237 1120 } else { 1238 - if (ir & IR_TEFN) { 1121 + if (ir & (IR_TEFN | IR_TEFW)) { 1239 1122 /* New TX FIFO Element arrived */ 1240 1123 if (m_can_echo_tx_event(dev) != 0) 1241 1124 goto out_fail; 1242 - 1243 - if (netif_queue_stopped(dev) && 1244 - !m_can_tx_fifo_full(cdev)) 1245 - netif_wake_queue(dev); 1246 1125 } 1247 1126 } 1248 1127 ··· 1251 1136 out_fail: 1252 1137 m_can_disable_all_interrupts(cdev); 1253 1138 return IRQ_HANDLED; 1139 + } 1140 + 1141 + static enum hrtimer_restart m_can_coalescing_timer(struct hrtimer *timer) 1142 + { 1143 + struct m_can_classdev *cdev = container_of(timer, struct m_can_classdev, hrtimer); 1144 + 1145 + irq_wake_thread(cdev->net->irq, cdev->net); 1146 + 1147 + return HRTIMER_NORESTART; 1254 1148 } 1255 1149 1256 1150 static const struct can_bittiming_const m_can_bittiming_const_30X = { ··· 1400 1276 } 1401 1277 1402 1278 /* Disable unused interrupts */ 1403 - interrupts &= ~(IR_ARA | IR_ELO | IR_DRX | IR_TEFF | IR_TEFW | IR_TFE | 1404 - IR_TCF | IR_HPM | IR_RF1F | IR_RF1W | IR_RF1N | 1405 - IR_RF0F | IR_RF0W); 1279 + interrupts &= ~(IR_ARA | IR_ELO | IR_DRX | IR_TEFF | IR_TFE | IR_TCF | 1280 + IR_HPM | IR_RF1F | IR_RF1W | IR_RF1N | IR_RF0F); 1406 1281 1407 1282 m_can_config_endisable(cdev, true); 1408 1283 ··· 1438 1315 } else { 1439 1316 /* Full TX Event FIFO is used */ 1440 1317 m_can_write(cdev, M_CAN_TXEFC, 1318 + FIELD_PREP(TXEFC_EFWM_MASK, 1319 + cdev->tx_max_coalesced_frames_irq) | 1441 1320 FIELD_PREP(TXEFC_EFS_MASK, 1442 1321 cdev->mcfg[MRAM_TXE].num) | 1443 1322 cdev->mcfg[MRAM_TXE].off); ··· 1447 1322 1448 1323 /* rx fifo configuration, blocking mode, fifo size 1 */ 1449 1324 m_can_write(cdev, M_CAN_RXF0C, 1325 + FIELD_PREP(RXFC_FWM_MASK, cdev->rx_max_coalesced_frames_irq) | 1450 1326 FIELD_PREP(RXFC_FS_MASK, cdev->mcfg[MRAM_RXF0].num) | 1451 1327 cdev->mcfg[MRAM_RXF0].off); 1452 1328 ··· 1506 1380 else 1507 1381 interrupts &= ~(IR_ERR_LEC_31X); 1508 1382 } 1509 - m_can_write(cdev, M_CAN_IE, interrupts); 1383 + m_can_interrupt_enable(cdev, interrupts); 1510 1384 1511 1385 /* route all interrupts to INT0 */ 1512 1386 m_can_write(cdev, M_CAN_ILS, ILS_ALL_INT0); ··· 1539 1413 if (ret) 1540 1414 return ret; 1541 1415 1416 + netdev_queue_set_dql_min_limit(netdev_get_tx_queue(cdev->net, 0), 1417 + cdev->tx_max_coalesced_frames); 1418 + 1542 1419 cdev->can.state = CAN_STATE_ERROR_ACTIVE; 1543 1420 1544 1421 m_can_enable_all_interrupts(cdev); 1545 1422 1546 - if (!dev->irq) { 1547 - dev_dbg(cdev->dev, "Start hrtimer\n"); 1548 - hrtimer_start(&cdev->hrtimer, ms_to_ktime(HRTIMER_POLL_INTERVAL_MS), 1549 - HRTIMER_MODE_REL_PINNED); 1550 - } 1423 + if (cdev->version > 30) 1424 + cdev->tx_fifo_putidx = FIELD_GET(TXFQS_TFQPI_MASK, 1425 + m_can_read(cdev, M_CAN_TXFQS)); 1551 1426 1552 1427 return 0; 1553 1428 } ··· 1704 1577 { 1705 1578 struct m_can_classdev *cdev = netdev_priv(dev); 1706 1579 1707 - if (!dev->irq) { 1708 - dev_dbg(cdev->dev, "Stop hrtimer\n"); 1709 - hrtimer_cancel(&cdev->hrtimer); 1710 - } 1711 - 1712 1580 /* disable all interrupts */ 1713 1581 m_can_disable_all_interrupts(cdev); 1714 1582 ··· 1727 1605 m_can_clk_stop(cdev); 1728 1606 free_irq(dev->irq, dev); 1729 1607 1608 + m_can_clean(dev); 1609 + 1730 1610 if (cdev->is_peripheral) { 1731 - cdev->tx_skb = NULL; 1732 1611 destroy_workqueue(cdev->tx_wq); 1733 1612 cdev->tx_wq = NULL; 1734 1613 can_rx_offload_disable(&cdev->offload); ··· 1742 1619 return 0; 1743 1620 } 1744 1621 1745 - static int m_can_next_echo_skb_occupied(struct net_device *dev, int putidx) 1622 + static netdev_tx_t m_can_tx_handler(struct m_can_classdev *cdev, 1623 + struct sk_buff *skb) 1746 1624 { 1747 - struct m_can_classdev *cdev = netdev_priv(dev); 1748 - /*get wrap around for loopback skb index */ 1749 - unsigned int wrap = cdev->can.echo_skb_max; 1750 - int next_idx; 1751 - 1752 - /* calculate next index */ 1753 - next_idx = (++putidx >= wrap ? 0 : putidx); 1754 - 1755 - /* check if occupied */ 1756 - return !!cdev->can.echo_skb[next_idx]; 1757 - } 1758 - 1759 - static netdev_tx_t m_can_tx_handler(struct m_can_classdev *cdev) 1760 - { 1761 - struct canfd_frame *cf = (struct canfd_frame *)cdev->tx_skb->data; 1625 + struct canfd_frame *cf = (struct canfd_frame *)skb->data; 1626 + u8 len_padded = DIV_ROUND_UP(cf->len, 4); 1627 + struct m_can_fifo_element fifo_element; 1762 1628 struct net_device *dev = cdev->net; 1763 - struct sk_buff *skb = cdev->tx_skb; 1764 - struct id_and_dlc fifo_header; 1765 1629 u32 cccr, fdflags; 1766 - u32 txfqs; 1767 1630 int err; 1768 - int putidx; 1769 - 1770 - cdev->tx_skb = NULL; 1631 + u32 putidx; 1632 + unsigned int frame_len = can_skb_get_frame_len(skb); 1771 1633 1772 1634 /* Generate ID field for TX buffer Element */ 1773 1635 /* Common to all supported M_CAN versions */ 1774 1636 if (cf->can_id & CAN_EFF_FLAG) { 1775 - fifo_header.id = cf->can_id & CAN_EFF_MASK; 1776 - fifo_header.id |= TX_BUF_XTD; 1637 + fifo_element.id = cf->can_id & CAN_EFF_MASK; 1638 + fifo_element.id |= TX_BUF_XTD; 1777 1639 } else { 1778 - fifo_header.id = ((cf->can_id & CAN_SFF_MASK) << 18); 1640 + fifo_element.id = ((cf->can_id & CAN_SFF_MASK) << 18); 1779 1641 } 1780 1642 1781 1643 if (cf->can_id & CAN_RTR_FLAG) 1782 - fifo_header.id |= TX_BUF_RTR; 1644 + fifo_element.id |= TX_BUF_RTR; 1783 1645 1784 1646 if (cdev->version == 30) { 1785 1647 netif_stop_queue(dev); 1786 1648 1787 - fifo_header.dlc = can_fd_len2dlc(cf->len) << 16; 1649 + fifo_element.dlc = can_fd_len2dlc(cf->len) << 16; 1788 1650 1789 1651 /* Write the frame ID, DLC, and payload to the FIFO element. */ 1790 - err = m_can_fifo_write(cdev, 0, M_CAN_FIFO_ID, &fifo_header, 2); 1652 + err = m_can_fifo_write(cdev, 0, M_CAN_FIFO_ID, &fifo_element, 2); 1791 1653 if (err) 1792 1654 goto out_fail; 1793 1655 1794 1656 err = m_can_fifo_write(cdev, 0, M_CAN_FIFO_DATA, 1795 - cf->data, DIV_ROUND_UP(cf->len, 4)); 1657 + cf->data, len_padded); 1796 1658 if (err) 1797 1659 goto out_fail; 1798 1660 ··· 1798 1690 } 1799 1691 m_can_write(cdev, M_CAN_TXBTIE, 0x1); 1800 1692 1801 - can_put_echo_skb(skb, dev, 0, 0); 1693 + can_put_echo_skb(skb, dev, 0, frame_len); 1802 1694 1803 1695 m_can_write(cdev, M_CAN_TXBAR, 0x1); 1804 1696 /* End of xmit function for version 3.0.x */ 1805 1697 } else { 1806 1698 /* Transmit routine for version >= v3.1.x */ 1807 1699 1808 - txfqs = m_can_read(cdev, M_CAN_TXFQS); 1809 - 1810 - /* Check if FIFO full */ 1811 - if (_m_can_tx_fifo_full(txfqs)) { 1812 - /* This shouldn't happen */ 1813 - netif_stop_queue(dev); 1814 - netdev_warn(dev, 1815 - "TX queue active although FIFO is full."); 1816 - 1817 - if (cdev->is_peripheral) { 1818 - kfree_skb(skb); 1819 - dev->stats.tx_dropped++; 1820 - return NETDEV_TX_OK; 1821 - } else { 1822 - return NETDEV_TX_BUSY; 1823 - } 1824 - } 1825 - 1826 1700 /* get put index for frame */ 1827 - putidx = FIELD_GET(TXFQS_TFQPI_MASK, txfqs); 1701 + putidx = cdev->tx_fifo_putidx; 1828 1702 1829 1703 /* Construct DLC Field, with CAN-FD configuration. 1830 1704 * Use the put index of the fifo as the message marker, ··· 1821 1731 fdflags |= TX_BUF_BRS; 1822 1732 } 1823 1733 1824 - fifo_header.dlc = FIELD_PREP(TX_BUF_MM_MASK, putidx) | 1734 + fifo_element.dlc = FIELD_PREP(TX_BUF_MM_MASK, putidx) | 1825 1735 FIELD_PREP(TX_BUF_DLC_MASK, can_fd_len2dlc(cf->len)) | 1826 1736 fdflags | TX_BUF_EFC; 1827 - err = m_can_fifo_write(cdev, putidx, M_CAN_FIFO_ID, &fifo_header, 2); 1828 - if (err) 1829 - goto out_fail; 1830 1737 1831 - err = m_can_fifo_write(cdev, putidx, M_CAN_FIFO_DATA, 1832 - cf->data, DIV_ROUND_UP(cf->len, 4)); 1738 + memcpy_and_pad(fifo_element.data, CANFD_MAX_DLEN, &cf->data, 1739 + cf->len, 0); 1740 + 1741 + err = m_can_fifo_write(cdev, putidx, M_CAN_FIFO_ID, 1742 + &fifo_element, 2 + len_padded); 1833 1743 if (err) 1834 1744 goto out_fail; 1835 1745 1836 1746 /* Push loopback echo. 1837 1747 * Will be looped back on TX interrupt based on message marker 1838 1748 */ 1839 - can_put_echo_skb(skb, dev, putidx, 0); 1749 + can_put_echo_skb(skb, dev, putidx, frame_len); 1840 1750 1841 - /* Enable TX FIFO element to start transfer */ 1842 - m_can_write(cdev, M_CAN_TXBAR, (1 << putidx)); 1843 - 1844 - /* stop network queue if fifo full */ 1845 - if (m_can_tx_fifo_full(cdev) || 1846 - m_can_next_echo_skb_occupied(dev, putidx)) 1847 - netif_stop_queue(dev); 1751 + if (cdev->is_peripheral) { 1752 + /* Delay enabling TX FIFO element */ 1753 + cdev->tx_peripheral_submit |= BIT(putidx); 1754 + } else { 1755 + /* Enable TX FIFO element to start transfer */ 1756 + m_can_write(cdev, M_CAN_TXBAR, BIT(putidx)); 1757 + } 1758 + cdev->tx_fifo_putidx = (++cdev->tx_fifo_putidx >= cdev->can.echo_skb_max ? 1759 + 0 : cdev->tx_fifo_putidx); 1848 1760 } 1849 1761 1850 1762 return NETDEV_TX_OK; ··· 1857 1765 return NETDEV_TX_BUSY; 1858 1766 } 1859 1767 1768 + static void m_can_tx_submit(struct m_can_classdev *cdev) 1769 + { 1770 + if (cdev->version == 30) 1771 + return; 1772 + if (!cdev->is_peripheral) 1773 + return; 1774 + 1775 + m_can_write(cdev, M_CAN_TXBAR, cdev->tx_peripheral_submit); 1776 + cdev->tx_peripheral_submit = 0; 1777 + } 1778 + 1860 1779 static void m_can_tx_work_queue(struct work_struct *ws) 1861 1780 { 1862 - struct m_can_classdev *cdev = container_of(ws, struct m_can_classdev, 1863 - tx_work); 1781 + struct m_can_tx_op *op = container_of(ws, struct m_can_tx_op, work); 1782 + struct m_can_classdev *cdev = op->cdev; 1783 + struct sk_buff *skb = op->skb; 1864 1784 1865 - m_can_tx_handler(cdev); 1785 + op->skb = NULL; 1786 + m_can_tx_handler(cdev, skb); 1787 + if (op->submit) 1788 + m_can_tx_submit(cdev); 1789 + } 1790 + 1791 + static void m_can_tx_queue_skb(struct m_can_classdev *cdev, struct sk_buff *skb, 1792 + bool submit) 1793 + { 1794 + cdev->tx_ops[cdev->next_tx_op].skb = skb; 1795 + cdev->tx_ops[cdev->next_tx_op].submit = submit; 1796 + queue_work(cdev->tx_wq, &cdev->tx_ops[cdev->next_tx_op].work); 1797 + 1798 + ++cdev->next_tx_op; 1799 + if (cdev->next_tx_op >= cdev->tx_fifo_size) 1800 + cdev->next_tx_op = 0; 1801 + } 1802 + 1803 + static netdev_tx_t m_can_start_peripheral_xmit(struct m_can_classdev *cdev, 1804 + struct sk_buff *skb) 1805 + { 1806 + bool submit; 1807 + 1808 + ++cdev->nr_txs_without_submit; 1809 + if (cdev->nr_txs_without_submit >= cdev->tx_max_coalesced_frames || 1810 + !netdev_xmit_more()) { 1811 + cdev->nr_txs_without_submit = 0; 1812 + submit = true; 1813 + } else { 1814 + submit = false; 1815 + } 1816 + m_can_tx_queue_skb(cdev, skb, submit); 1817 + 1818 + return NETDEV_TX_OK; 1866 1819 } 1867 1820 1868 1821 static netdev_tx_t m_can_start_xmit(struct sk_buff *skb, 1869 1822 struct net_device *dev) 1870 1823 { 1871 1824 struct m_can_classdev *cdev = netdev_priv(dev); 1825 + unsigned int frame_len; 1826 + netdev_tx_t ret; 1872 1827 1873 1828 if (can_dev_dropped_skb(dev, skb)) 1874 1829 return NETDEV_TX_OK; 1875 1830 1876 - if (cdev->is_peripheral) { 1877 - if (cdev->tx_skb) { 1878 - netdev_err(dev, "hard_xmit called while tx busy\n"); 1879 - return NETDEV_TX_BUSY; 1880 - } 1831 + frame_len = can_skb_get_frame_len(skb); 1881 1832 1882 - if (cdev->can.state == CAN_STATE_BUS_OFF) { 1883 - m_can_clean(dev); 1884 - } else { 1885 - /* Need to stop the queue to avoid numerous requests 1886 - * from being sent. Suggested improvement is to create 1887 - * a queueing mechanism that will queue the skbs and 1888 - * process them in order. 1889 - */ 1890 - cdev->tx_skb = skb; 1891 - netif_stop_queue(cdev->net); 1892 - queue_work(cdev->tx_wq, &cdev->tx_work); 1893 - } 1894 - } else { 1895 - cdev->tx_skb = skb; 1896 - return m_can_tx_handler(cdev); 1833 + if (cdev->can.state == CAN_STATE_BUS_OFF) { 1834 + m_can_clean(cdev->net); 1835 + return NETDEV_TX_OK; 1897 1836 } 1898 1837 1899 - return NETDEV_TX_OK; 1838 + ret = m_can_start_tx(cdev); 1839 + if (ret != NETDEV_TX_OK) 1840 + return ret; 1841 + 1842 + netdev_sent_queue(dev, frame_len); 1843 + 1844 + if (cdev->is_peripheral) 1845 + ret = m_can_start_peripheral_xmit(cdev, skb); 1846 + else 1847 + ret = m_can_tx_handler(cdev, skb); 1848 + 1849 + if (ret != NETDEV_TX_OK) 1850 + netdev_completed_queue(dev, 1, frame_len); 1851 + 1852 + return ret; 1900 1853 } 1901 1854 1902 1855 static enum hrtimer_restart hrtimer_callback(struct hrtimer *timer) ··· 1981 1844 1982 1845 /* register interrupt handler */ 1983 1846 if (cdev->is_peripheral) { 1984 - cdev->tx_skb = NULL; 1985 - cdev->tx_wq = alloc_workqueue("mcan_wq", 1986 - WQ_FREEZABLE | WQ_MEM_RECLAIM, 0); 1847 + cdev->tx_wq = alloc_ordered_workqueue("mcan_wq", 1848 + WQ_FREEZABLE | WQ_MEM_RECLAIM); 1987 1849 if (!cdev->tx_wq) { 1988 1850 err = -ENOMEM; 1989 1851 goto out_wq_fail; 1990 1852 } 1991 1853 1992 - INIT_WORK(&cdev->tx_work, m_can_tx_work_queue); 1854 + for (int i = 0; i != cdev->tx_fifo_size; ++i) { 1855 + cdev->tx_ops[i].cdev = cdev; 1856 + INIT_WORK(&cdev->tx_ops[i].work, m_can_tx_work_queue); 1857 + } 1993 1858 1994 1859 err = request_threaded_irq(dev->irq, NULL, m_can_isr, 1995 1860 IRQF_ONESHOT, ··· 2039 1900 .ndo_change_mtu = can_change_mtu, 2040 1901 }; 2041 1902 1903 + static int m_can_get_coalesce(struct net_device *dev, 1904 + struct ethtool_coalesce *ec, 1905 + struct kernel_ethtool_coalesce *kec, 1906 + struct netlink_ext_ack *ext_ack) 1907 + { 1908 + struct m_can_classdev *cdev = netdev_priv(dev); 1909 + 1910 + ec->rx_max_coalesced_frames_irq = cdev->rx_max_coalesced_frames_irq; 1911 + ec->rx_coalesce_usecs_irq = cdev->rx_coalesce_usecs_irq; 1912 + ec->tx_max_coalesced_frames = cdev->tx_max_coalesced_frames; 1913 + ec->tx_max_coalesced_frames_irq = cdev->tx_max_coalesced_frames_irq; 1914 + ec->tx_coalesce_usecs_irq = cdev->tx_coalesce_usecs_irq; 1915 + 1916 + return 0; 1917 + } 1918 + 1919 + static int m_can_set_coalesce(struct net_device *dev, 1920 + struct ethtool_coalesce *ec, 1921 + struct kernel_ethtool_coalesce *kec, 1922 + struct netlink_ext_ack *ext_ack) 1923 + { 1924 + struct m_can_classdev *cdev = netdev_priv(dev); 1925 + 1926 + if (cdev->can.state != CAN_STATE_STOPPED) { 1927 + netdev_err(dev, "Device is in use, please shut it down first\n"); 1928 + return -EBUSY; 1929 + } 1930 + 1931 + if (ec->rx_max_coalesced_frames_irq > cdev->mcfg[MRAM_RXF0].num) { 1932 + netdev_err(dev, "rx-frames-irq %u greater than the RX FIFO %u\n", 1933 + ec->rx_max_coalesced_frames_irq, 1934 + cdev->mcfg[MRAM_RXF0].num); 1935 + return -EINVAL; 1936 + } 1937 + if ((ec->rx_max_coalesced_frames_irq == 0) != (ec->rx_coalesce_usecs_irq == 0)) { 1938 + netdev_err(dev, "rx-frames-irq and rx-usecs-irq can only be set together\n"); 1939 + return -EINVAL; 1940 + } 1941 + if (ec->tx_max_coalesced_frames_irq > cdev->mcfg[MRAM_TXE].num) { 1942 + netdev_err(dev, "tx-frames-irq %u greater than the TX event FIFO %u\n", 1943 + ec->tx_max_coalesced_frames_irq, 1944 + cdev->mcfg[MRAM_TXE].num); 1945 + return -EINVAL; 1946 + } 1947 + if (ec->tx_max_coalesced_frames_irq > cdev->mcfg[MRAM_TXB].num) { 1948 + netdev_err(dev, "tx-frames-irq %u greater than the TX FIFO %u\n", 1949 + ec->tx_max_coalesced_frames_irq, 1950 + cdev->mcfg[MRAM_TXB].num); 1951 + return -EINVAL; 1952 + } 1953 + if ((ec->tx_max_coalesced_frames_irq == 0) != (ec->tx_coalesce_usecs_irq == 0)) { 1954 + netdev_err(dev, "tx-frames-irq and tx-usecs-irq can only be set together\n"); 1955 + return -EINVAL; 1956 + } 1957 + if (ec->tx_max_coalesced_frames > cdev->mcfg[MRAM_TXE].num) { 1958 + netdev_err(dev, "tx-frames %u greater than the TX event FIFO %u\n", 1959 + ec->tx_max_coalesced_frames, 1960 + cdev->mcfg[MRAM_TXE].num); 1961 + return -EINVAL; 1962 + } 1963 + if (ec->tx_max_coalesced_frames > cdev->mcfg[MRAM_TXB].num) { 1964 + netdev_err(dev, "tx-frames %u greater than the TX FIFO %u\n", 1965 + ec->tx_max_coalesced_frames, 1966 + cdev->mcfg[MRAM_TXB].num); 1967 + return -EINVAL; 1968 + } 1969 + if (ec->rx_coalesce_usecs_irq != 0 && ec->tx_coalesce_usecs_irq != 0 && 1970 + ec->rx_coalesce_usecs_irq != ec->tx_coalesce_usecs_irq) { 1971 + netdev_err(dev, "rx-usecs-irq %u needs to be equal to tx-usecs-irq %u if both are enabled\n", 1972 + ec->rx_coalesce_usecs_irq, 1973 + ec->tx_coalesce_usecs_irq); 1974 + return -EINVAL; 1975 + } 1976 + 1977 + cdev->rx_max_coalesced_frames_irq = ec->rx_max_coalesced_frames_irq; 1978 + cdev->rx_coalesce_usecs_irq = ec->rx_coalesce_usecs_irq; 1979 + cdev->tx_max_coalesced_frames = ec->tx_max_coalesced_frames; 1980 + cdev->tx_max_coalesced_frames_irq = ec->tx_max_coalesced_frames_irq; 1981 + cdev->tx_coalesce_usecs_irq = ec->tx_coalesce_usecs_irq; 1982 + 1983 + if (cdev->rx_coalesce_usecs_irq) 1984 + cdev->irq_timer_wait = 1985 + ns_to_ktime(cdev->rx_coalesce_usecs_irq * NSEC_PER_USEC); 1986 + else 1987 + cdev->irq_timer_wait = 1988 + ns_to_ktime(cdev->tx_coalesce_usecs_irq * NSEC_PER_USEC); 1989 + 1990 + return 0; 1991 + } 1992 + 2042 1993 static const struct ethtool_ops m_can_ethtool_ops = { 1994 + .supported_coalesce_params = ETHTOOL_COALESCE_RX_USECS_IRQ | 1995 + ETHTOOL_COALESCE_RX_MAX_FRAMES_IRQ | 1996 + ETHTOOL_COALESCE_TX_USECS_IRQ | 1997 + ETHTOOL_COALESCE_TX_MAX_FRAMES | 1998 + ETHTOOL_COALESCE_TX_MAX_FRAMES_IRQ, 1999 + .get_ts_info = ethtool_op_get_ts_info, 2000 + .get_coalesce = m_can_get_coalesce, 2001 + .set_coalesce = m_can_set_coalesce, 2002 + }; 2003 + 2004 + static const struct ethtool_ops m_can_ethtool_ops_polling = { 2043 2005 .get_ts_info = ethtool_op_get_ts_info, 2044 2006 }; 2045 2007 ··· 2148 1908 { 2149 1909 dev->flags |= IFF_ECHO; /* we support local echo */ 2150 1910 dev->netdev_ops = &m_can_netdev_ops; 2151 - dev->ethtool_ops = &m_can_ethtool_ops; 1911 + if (dev->irq) 1912 + dev->ethtool_ops = &m_can_ethtool_ops; 1913 + else 1914 + dev->ethtool_ops = &m_can_ethtool_ops_polling; 2152 1915 2153 1916 return register_candev(dev); 2154 1917 } ··· 2299 2056 { 2300 2057 int ret; 2301 2058 2059 + cdev->tx_fifo_size = max(1, min(cdev->mcfg[MRAM_TXB].num, 2060 + cdev->mcfg[MRAM_TXE].num)); 2061 + if (cdev->is_peripheral) { 2062 + cdev->tx_ops = 2063 + devm_kzalloc(cdev->dev, 2064 + cdev->tx_fifo_size * sizeof(*cdev->tx_ops), 2065 + GFP_KERNEL); 2066 + if (!cdev->tx_ops) { 2067 + dev_err(cdev->dev, "Failed to allocate tx_ops for workqueue\n"); 2068 + return -ENOMEM; 2069 + } 2070 + } 2071 + 2302 2072 if (cdev->pm_clock_support) { 2303 2073 ret = m_can_clk_start(cdev); 2304 2074 if (ret) ··· 2325 2069 goto clk_disable; 2326 2070 } 2327 2071 2328 - if (!cdev->net->irq) 2072 + if (!cdev->net->irq) { 2073 + dev_dbg(cdev->dev, "Polling enabled, initialize hrtimer"); 2074 + hrtimer_init(&cdev->hrtimer, CLOCK_MONOTONIC, 2075 + HRTIMER_MODE_REL_PINNED); 2329 2076 cdev->hrtimer.function = &hrtimer_callback; 2077 + } else { 2078 + hrtimer_init(&cdev->hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); 2079 + cdev->hrtimer.function = m_can_coalescing_timer; 2080 + } 2330 2081 2331 2082 ret = m_can_dev_setup(cdev); 2332 2083 if (ret)
+32 -2
drivers/net/can/m_can/m_can.h
··· 70 70 int (*init)(struct m_can_classdev *cdev); 71 71 }; 72 72 73 + struct m_can_tx_op { 74 + struct m_can_classdev *cdev; 75 + struct work_struct work; 76 + struct sk_buff *skb; 77 + bool submit; 78 + }; 79 + 73 80 struct m_can_classdev { 74 81 struct can_priv can; 75 82 struct can_rx_offload offload; ··· 87 80 struct clk *cclk; 88 81 89 82 struct workqueue_struct *tx_wq; 90 - struct work_struct tx_work; 91 - struct sk_buff *tx_skb; 92 83 struct phy *transceiver; 84 + 85 + ktime_t irq_timer_wait; 93 86 94 87 struct m_can_ops *ops; 95 88 ··· 98 91 99 92 int pm_clock_support; 100 93 int is_peripheral; 94 + 95 + // Cached M_CAN_IE register content 96 + u32 active_interrupts; 97 + u32 rx_max_coalesced_frames_irq; 98 + u32 rx_coalesce_usecs_irq; 99 + u32 tx_max_coalesced_frames; 100 + u32 tx_max_coalesced_frames_irq; 101 + u32 tx_coalesce_usecs_irq; 102 + 103 + // Store this internally to avoid fetch delays on peripheral chips 104 + u32 tx_fifo_putidx; 105 + 106 + /* Protects shared state between start_xmit and m_can_isr */ 107 + spinlock_t tx_handling_spinlock; 108 + int tx_fifo_in_flight; 109 + 110 + struct m_can_tx_op *tx_ops; 111 + int tx_fifo_size; 112 + int next_tx_op; 113 + 114 + int nr_txs_without_submit; 115 + /* bitfield of fifo elements that will be submitted together */ 116 + u32 tx_peripheral_submit; 101 117 102 118 struct mram_cfg mcfg[MRAM_CFG_NUM]; 103 119
-4
drivers/net/can/m_can/m_can_platform.c
··· 109 109 ret = irq; 110 110 goto probe_fail; 111 111 } 112 - } else { 113 - dev_dbg(mcan_class->dev, "Polling enabled, initialize hrtimer"); 114 - hrtimer_init(&mcan_class->hrtimer, CLOCK_MONOTONIC, 115 - HRTIMER_MODE_REL_PINNED); 116 112 } 117 113 118 114 /* message ram could be shared */