Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

Merge branch '1GbE' of git://git.kernel.org/pub/scm/linux/kernel/git/tnguy/net-queue

Tony Nguyen says:

====================
igc: Fix PTM timeout

Christopher S M Hall says:

There have been sporadic reports of PTM timeouts using i225/i226 devices

These timeouts have been root caused to:

1) Manipulating the PTM status register while PTM is enabled
and triggered
2) The hardware retrying too quickly when an inappropriate response
is received from the upstream device

The issue can be reproduced with the following:

$ sudo phc2sys -R 1000 -O 0 -i tsn0 -m

Note: 1000 Hz (-R 1000) is unrealistically large, but provides a way to
quickly reproduce the issue.

PHC2SYS exits with:

"ioctl PTP_OFFSET_PRECISE: Connection timed out" when the PTM transaction
fails

The first patch in this series also resolves an issue reported by Corinna
Vinschen relating to kdump:

This patch also fixes a hang in igc_probe() when loading the igc
driver in the kdump kernel on systems supporting PTM.

The igc driver running in the base kernel enables PTM trigger in
igc_probe(). Therefore the driver is always in PTM trigger mode,
except in brief periods when manually triggering a PTM cycle.

When a crash occurs, the NIC is reset while PTM trigger is enabled.
Due to a hardware problem, the NIC is subsequently in a bad busmaster
state and doesn't handle register reads/writes. When running
igc_probe() in the kdump kernel, the first register access to a NIC
register hangs driver probing and ultimately breaks kdump.

With this patch, igc has PTM trigger disabled most of the time,
and the trigger is only enabled for very brief (10 - 100 us) periods
when manually triggering a PTM cycle. Chances that a crash occurs
during a PTM trigger are not zero, but extremly reduced.

* '1GbE' of git://git.kernel.org/pub/scm/linux/kernel/git/tnguy/net-queue:
igc: add lock preventing multiple simultaneous PTM transactions
igc: cleanup PTP module if probe fails
igc: handle the IGC_PTP_ENABLED flag correctly
igc: move ktime snapshot into PTM retry loop
igc: increase wait time before retrying PTM
igc: fix PTM cycle trigger logic
====================

Link: https://patch.msgid.link/20250411162857.2754883-1-anthony.l.nguyen@intel.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>

+81 -40
+1
drivers/net/ethernet/intel/igc/igc.h
··· 319 319 struct timespec64 prev_ptp_time; /* Pre-reset PTP clock */ 320 320 ktime_t ptp_reset_start; /* Reset time in clock mono */ 321 321 struct system_time_snapshot snapshot; 322 + struct mutex ptm_lock; /* Only allow one PTM transaction at a time */ 322 323 323 324 char fw_version[32]; 324 325
+5 -1
drivers/net/ethernet/intel/igc/igc_defines.h
··· 574 574 #define IGC_PTM_CTRL_SHRT_CYC(usec) (((usec) & 0x3f) << 2) 575 575 #define IGC_PTM_CTRL_PTM_TO(usec) (((usec) & 0xff) << 8) 576 576 577 - #define IGC_PTM_SHORT_CYC_DEFAULT 1 /* Default short cycle interval */ 577 + /* A short cycle time of 1us theoretically should work, but appears to be too 578 + * short in practice. 579 + */ 580 + #define IGC_PTM_SHORT_CYC_DEFAULT 4 /* Default short cycle interval */ 578 581 #define IGC_PTM_CYC_TIME_DEFAULT 5 /* Default PTM cycle time */ 579 582 #define IGC_PTM_TIMEOUT_DEFAULT 255 /* Default timeout for PTM errors */ 580 583 ··· 596 593 #define IGC_PTM_STAT_T4M1_OVFL BIT(3) /* T4 minus T1 overflow */ 597 594 #define IGC_PTM_STAT_ADJUST_1ST BIT(4) /* 1588 timer adjusted during 1st PTM cycle */ 598 595 #define IGC_PTM_STAT_ADJUST_CYC BIT(5) /* 1588 timer adjusted during non-1st PTM cycle */ 596 + #define IGC_PTM_STAT_ALL GENMASK(5, 0) /* Used to clear all status */ 599 597 600 598 /* PCIe PTM Cycle Control */ 601 599 #define IGC_PTM_CYCLE_CTRL_CYC_TIME(msec) ((msec) & 0x3ff) /* PTM Cycle Time (msec) */
+1
drivers/net/ethernet/intel/igc/igc_main.c
··· 7231 7231 7232 7232 err_register: 7233 7233 igc_release_hw_control(adapter); 7234 + igc_ptp_stop(adapter); 7234 7235 err_eeprom: 7235 7236 if (!igc_check_reset_block(hw)) 7236 7237 igc_reset_phy(hw);
+74 -39
drivers/net/ethernet/intel/igc/igc_ptp.c
··· 974 974 } 975 975 } 976 976 977 + /* The PTM lock: adapter->ptm_lock must be held when calling igc_ptm_trigger() */ 978 + static void igc_ptm_trigger(struct igc_hw *hw) 979 + { 980 + u32 ctrl; 981 + 982 + /* To "manually" start the PTM cycle we need to set the 983 + * trigger (TRIG) bit 984 + */ 985 + ctrl = rd32(IGC_PTM_CTRL); 986 + ctrl |= IGC_PTM_CTRL_TRIG; 987 + wr32(IGC_PTM_CTRL, ctrl); 988 + /* Perform flush after write to CTRL register otherwise 989 + * transaction may not start 990 + */ 991 + wrfl(); 992 + } 993 + 994 + /* The PTM lock: adapter->ptm_lock must be held when calling igc_ptm_reset() */ 995 + static void igc_ptm_reset(struct igc_hw *hw) 996 + { 997 + u32 ctrl; 998 + 999 + ctrl = rd32(IGC_PTM_CTRL); 1000 + ctrl &= ~IGC_PTM_CTRL_TRIG; 1001 + wr32(IGC_PTM_CTRL, ctrl); 1002 + /* Write to clear all status */ 1003 + wr32(IGC_PTM_STAT, IGC_PTM_STAT_ALL); 1004 + } 1005 + 977 1006 static int igc_phc_get_syncdevicetime(ktime_t *device, 978 1007 struct system_counterval_t *system, 979 1008 void *ctx) 980 1009 { 981 - u32 stat, t2_curr_h, t2_curr_l, ctrl; 982 1010 struct igc_adapter *adapter = ctx; 983 1011 struct igc_hw *hw = &adapter->hw; 1012 + u32 stat, t2_curr_h, t2_curr_l; 984 1013 int err, count = 100; 985 1014 ktime_t t1, t2_curr; 986 1015 987 - /* Get a snapshot of system clocks to use as historic value. */ 988 - ktime_get_snapshot(&adapter->snapshot); 989 - 1016 + /* Doing this in a loop because in the event of a 1017 + * badly timed (ha!) system clock adjustment, we may 1018 + * get PTM errors from the PCI root, but these errors 1019 + * are transitory. Repeating the process returns valid 1020 + * data eventually. 1021 + */ 990 1022 do { 991 - /* Doing this in a loop because in the event of a 992 - * badly timed (ha!) system clock adjustment, we may 993 - * get PTM errors from the PCI root, but these errors 994 - * are transitory. Repeating the process returns valid 995 - * data eventually. 996 - */ 1023 + /* Get a snapshot of system clocks to use as historic value. */ 1024 + ktime_get_snapshot(&adapter->snapshot); 997 1025 998 - /* To "manually" start the PTM cycle we need to clear and 999 - * then set again the TRIG bit. 1000 - */ 1001 - ctrl = rd32(IGC_PTM_CTRL); 1002 - ctrl &= ~IGC_PTM_CTRL_TRIG; 1003 - wr32(IGC_PTM_CTRL, ctrl); 1004 - ctrl |= IGC_PTM_CTRL_TRIG; 1005 - wr32(IGC_PTM_CTRL, ctrl); 1006 - 1007 - /* The cycle only starts "for real" when software notifies 1008 - * that it has read the registers, this is done by setting 1009 - * VALID bit. 1010 - */ 1011 - wr32(IGC_PTM_STAT, IGC_PTM_STAT_VALID); 1026 + igc_ptm_trigger(hw); 1012 1027 1013 1028 err = readx_poll_timeout(rd32, IGC_PTM_STAT, stat, 1014 1029 stat, IGC_PTM_STAT_SLEEP, 1015 1030 IGC_PTM_STAT_TIMEOUT); 1031 + igc_ptm_reset(hw); 1032 + 1016 1033 if (err < 0) { 1017 1034 netdev_err(adapter->netdev, "Timeout reading IGC_PTM_STAT register\n"); 1018 1035 return err; ··· 1038 1021 if ((stat & IGC_PTM_STAT_VALID) == IGC_PTM_STAT_VALID) 1039 1022 break; 1040 1023 1041 - if (stat & ~IGC_PTM_STAT_VALID) { 1042 - /* An error occurred, log it. */ 1043 - igc_ptm_log_error(adapter, stat); 1044 - /* The STAT register is write-1-to-clear (W1C), 1045 - * so write the previous error status to clear it. 1046 - */ 1047 - wr32(IGC_PTM_STAT, stat); 1048 - continue; 1049 - } 1024 + igc_ptm_log_error(adapter, stat); 1050 1025 } while (--count); 1051 1026 1052 1027 if (!count) { ··· 1070 1061 { 1071 1062 struct igc_adapter *adapter = container_of(ptp, struct igc_adapter, 1072 1063 ptp_caps); 1064 + int ret; 1073 1065 1074 - return get_device_system_crosststamp(igc_phc_get_syncdevicetime, 1075 - adapter, &adapter->snapshot, cts); 1066 + /* This blocks until any in progress PTM transactions complete */ 1067 + mutex_lock(&adapter->ptm_lock); 1068 + 1069 + ret = get_device_system_crosststamp(igc_phc_get_syncdevicetime, 1070 + adapter, &adapter->snapshot, cts); 1071 + mutex_unlock(&adapter->ptm_lock); 1072 + 1073 + return ret; 1076 1074 } 1077 1075 1078 1076 static int igc_ptp_getcyclesx64(struct ptp_clock_info *ptp, ··· 1178 1162 spin_lock_init(&adapter->ptp_tx_lock); 1179 1163 spin_lock_init(&adapter->free_timer_lock); 1180 1164 spin_lock_init(&adapter->tmreg_lock); 1165 + mutex_init(&adapter->ptm_lock); 1181 1166 1182 1167 adapter->tstamp_config.rx_filter = HWTSTAMP_FILTER_NONE; 1183 1168 adapter->tstamp_config.tx_type = HWTSTAMP_TX_OFF; ··· 1191 1174 if (IS_ERR(adapter->ptp_clock)) { 1192 1175 adapter->ptp_clock = NULL; 1193 1176 netdev_err(netdev, "ptp_clock_register failed\n"); 1177 + mutex_destroy(&adapter->ptm_lock); 1194 1178 } else if (adapter->ptp_clock) { 1195 1179 netdev_info(netdev, "PHC added\n"); 1196 1180 adapter->ptp_flags |= IGC_PTP_ENABLED; ··· 1221 1203 struct igc_hw *hw = &adapter->hw; 1222 1204 u32 ctrl; 1223 1205 1206 + mutex_lock(&adapter->ptm_lock); 1224 1207 ctrl = rd32(IGC_PTM_CTRL); 1225 1208 ctrl &= ~IGC_PTM_CTRL_EN; 1226 1209 1227 1210 wr32(IGC_PTM_CTRL, ctrl); 1211 + mutex_unlock(&adapter->ptm_lock); 1228 1212 } 1229 1213 1230 1214 /** ··· 1257 1237 **/ 1258 1238 void igc_ptp_stop(struct igc_adapter *adapter) 1259 1239 { 1240 + if (!(adapter->ptp_flags & IGC_PTP_ENABLED)) 1241 + return; 1242 + 1260 1243 igc_ptp_suspend(adapter); 1261 1244 1245 + adapter->ptp_flags &= ~IGC_PTP_ENABLED; 1262 1246 if (adapter->ptp_clock) { 1263 1247 ptp_clock_unregister(adapter->ptp_clock); 1264 1248 netdev_info(adapter->netdev, "PHC removed\n"); 1265 1249 adapter->ptp_flags &= ~IGC_PTP_ENABLED; 1266 1250 } 1251 + mutex_destroy(&adapter->ptm_lock); 1267 1252 } 1268 1253 1269 1254 /** ··· 1280 1255 void igc_ptp_reset(struct igc_adapter *adapter) 1281 1256 { 1282 1257 struct igc_hw *hw = &adapter->hw; 1283 - u32 cycle_ctrl, ctrl; 1258 + u32 cycle_ctrl, ctrl, stat; 1284 1259 unsigned long flags; 1285 1260 u32 timadj; 1261 + 1262 + if (!(adapter->ptp_flags & IGC_PTP_ENABLED)) 1263 + return; 1286 1264 1287 1265 /* reset the tstamp_config */ 1288 1266 igc_ptp_set_timestamp_mode(adapter, &adapter->tstamp_config); ··· 1308 1280 if (!igc_is_crosststamp_supported(adapter)) 1309 1281 break; 1310 1282 1283 + mutex_lock(&adapter->ptm_lock); 1311 1284 wr32(IGC_PCIE_DIG_DELAY, IGC_PCIE_DIG_DELAY_DEFAULT); 1312 1285 wr32(IGC_PCIE_PHY_DELAY, IGC_PCIE_PHY_DELAY_DEFAULT); 1313 1286 ··· 1319 1290 ctrl = IGC_PTM_CTRL_EN | 1320 1291 IGC_PTM_CTRL_START_NOW | 1321 1292 IGC_PTM_CTRL_SHRT_CYC(IGC_PTM_SHORT_CYC_DEFAULT) | 1322 - IGC_PTM_CTRL_PTM_TO(IGC_PTM_TIMEOUT_DEFAULT) | 1323 - IGC_PTM_CTRL_TRIG; 1293 + IGC_PTM_CTRL_PTM_TO(IGC_PTM_TIMEOUT_DEFAULT); 1324 1294 1325 1295 wr32(IGC_PTM_CTRL, ctrl); 1326 1296 1327 1297 /* Force the first cycle to run. */ 1328 - wr32(IGC_PTM_STAT, IGC_PTM_STAT_VALID); 1298 + igc_ptm_trigger(hw); 1329 1299 1300 + if (readx_poll_timeout_atomic(rd32, IGC_PTM_STAT, stat, 1301 + stat, IGC_PTM_STAT_SLEEP, 1302 + IGC_PTM_STAT_TIMEOUT)) 1303 + netdev_err(adapter->netdev, "Timeout reading IGC_PTM_STAT register\n"); 1304 + 1305 + igc_ptm_reset(hw); 1306 + mutex_unlock(&adapter->ptm_lock); 1330 1307 break; 1331 1308 default: 1332 1309 /* No work to do. */