Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

accel/habanalabs: disable device access after CPLD_SHUTDOWN

After a CPLD shutdown event the device becomes unusable. Prevent further
device access once this event is received.

Signed-off-by: Konstantin Sinyuk <konstantin.sinyuk@intel.com>
Reviewed-by: Koby Elbaz <koby.elbaz@intel.com>
Signed-off-by: Koby Elbaz <koby.elbaz@intel.com>

authored by

Konstantin Sinyuk and committed by
Koby Elbaz
083c53a8 cade027e

+28
+25
drivers/accel/habanalabs/common/device.c
··· 1630 1630 from_watchdog_thread = !!(flags & HL_DRV_RESET_FROM_WD_THR); 1631 1631 reset_upon_device_release = hdev->reset_upon_device_release && from_dev_release; 1632 1632 1633 + if (hdev->cpld_shutdown) { 1634 + dev_err(hdev->dev, "Cannot reset device, cpld is shutdown! Device is NOT usable\n"); 1635 + return -EIO; 1636 + } 1637 + 1633 1638 if (!hard_reset && (hl_device_status(hdev) == HL_DEVICE_STATUS_MALFUNCTION)) { 1634 1639 dev_dbg(hdev->dev, "soft-reset isn't supported on a malfunctioning device\n"); 1635 1640 return 0; ··· 2581 2576 if (rc) 2582 2577 dev_err(hdev->dev, "hw_fini failed in device fini while removing device %d\n", rc); 2583 2578 2579 + /* Reset the H/W (if it accessible). It will be in idle state after this returns */ 2580 + if (!hdev->cpld_shutdown) { 2581 + rc = hdev->asic_funcs->hw_fini(hdev, true, false); 2582 + if (rc) 2583 + dev_err(hdev->dev, 2584 + "hw_fini failed in device fini while removing device %d\n", rc); 2585 + } 2586 + 2584 2587 hdev->fw_loader.fw_comp_loaded = FW_TYPE_NONE; 2585 2588 2586 2589 /* Release kernel context */ ··· 2955 2942 } 2956 2943 2957 2944 mutex_unlock(&clk_throttle->lock); 2945 + } 2946 + 2947 + void hl_eq_cpld_shutdown_event_handle(struct hl_device *hdev, u16 event_id, u64 *event_mask) 2948 + { 2949 + hl_handle_critical_hw_err(hdev, event_id, event_mask); 2950 + *event_mask |= HL_NOTIFIER_EVENT_DEVICE_UNAVAILABLE; 2951 + 2952 + /* Avoid any new accesses to the H/W */ 2953 + hdev->disabled = true; 2954 + hdev->cpld_shutdown = true; 2955 + hl_cn_hard_reset_prepare(hdev); 2956 + hl_cn_stop(hdev); 2958 2957 }
+3
drivers/accel/habanalabs/common/habanalabs.h
··· 3386 3386 * addresses. 3387 3387 * @is_in_dram_scrub: true if dram scrub operation is on going. 3388 3388 * @disabled: is device disabled. 3389 + * @cpld_shutdown: is cpld shutdown. 3389 3390 * @late_init_done: is late init stage was done during initialization. 3390 3391 * @hwmon_initialized: is H/W monitor sensors was initialized. 3391 3392 * @reset_on_lockup: true if a reset should be done in case of stuck CS, false ··· 3563 3562 u16 cpu_pci_msb_addr; 3564 3563 u8 is_in_dram_scrub; 3565 3564 u8 disabled; 3565 + u8 cpld_shutdown; 3566 3566 u8 late_init_done; 3567 3567 u8 hwmon_initialized; 3568 3568 u8 reset_on_lockup; ··· 4121 4119 void hl_set_irq_affinity(struct hl_device *hdev, int irq); 4122 4120 void hl_eq_heartbeat_event_handle(struct hl_device *hdev); 4123 4121 void hl_handle_clk_change_event(struct hl_device *hdev, u16 event_type, u64 *event_mask); 4122 + void hl_eq_cpld_shutdown_event_handle(struct hl_device *hdev, u16 event_id, u64 *event_mask); 4124 4123 4125 4124 #ifdef CONFIG_DEBUG_FS 4126 4125