Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

Merge tag 'char-misc-5.8-rc7' of git://git.kernel.org/pub/scm/linux/kernel/git/gregkh/char-misc into master

Pull char/misc driver fixes from Greg KH:
"Here are a few small driver fixes for 5.8-rc7

They include:

- habanalabs fixes

- tiny fpga driver fixes

- /dev/mem fixup from previous changes

- interconnect driver fixes

- binder fix

All of these have been in linux-next for a while with no reported
issues"

* tag 'char-misc-5.8-rc7' of git://git.kernel.org/pub/scm/linux/kernel/git/gregkh/char-misc:
interconnect: msm8916: Fix buswidth of pcnoc_s nodes
interconnect: Do not skip aggregation for disabled paths
/dev/mem: Add missing memory barriers for devmem_inode
binder: Don't use mmput() from shrinker function.
habanalabs: prevent possible out-of-bounds array access
fpga: dfl: fix bug in port reset handshake
fpga: dfl: pci: reduce the scope of variable 'ret'
habanalabs: set 4s timeout for message to device CPU
habanalabs: set clock gating per engine
habanalabs: block WREG_BULK packet on PDMA

+186 -112
+10 -1
Documentation/ABI/testing/debugfs-driver-habanalabs
··· 16 16 gating mechanism in Gaudi. Due to how Gaudi is built, the 17 17 clock gating needs to be disabled in order to access the 18 18 registers of the TPC and MME engines. This is sometimes needed 19 - during debug by the user and hence the user needs this option 19 + during debug by the user and hence the user needs this option. 20 + The user can supply a bitmask value, each bit represents 21 + a different engine to disable/enable its clock gating feature. 22 + The bitmask is composed of 20 bits: 23 + 0 - 7 : DMA channels 24 + 8 - 11 : MME engines 25 + 12 - 19 : TPC engines 26 + The bit's location of a specific engine can be determined 27 + using (1 << GAUDI_ENGINE_ID_*). GAUDI_ENGINE_ID_* values 28 + are defined in uapi habanalabs.h file in enum gaudi_engine_id 20 29 21 30 What: /sys/kernel/debug/habanalabs/hl<n>/command_buffers 22 31 Date: Jan 2019
+1 -1
drivers/android/binder_alloc.c
··· 947 947 trace_binder_unmap_user_end(alloc, index); 948 948 } 949 949 mmap_read_unlock(mm); 950 - mmput(mm); 950 + mmput_async(mm); 951 951 952 952 trace_binder_unmap_kernel_start(alloc, index); 953 953
+7 -3
drivers/char/mem.c
··· 814 814 #ifdef CONFIG_IO_STRICT_DEVMEM 815 815 void revoke_devmem(struct resource *res) 816 816 { 817 - struct inode *inode = READ_ONCE(devmem_inode); 817 + /* pairs with smp_store_release() in devmem_init_inode() */ 818 + struct inode *inode = smp_load_acquire(&devmem_inode); 818 819 819 820 /* 820 821 * Check that the initialization has completed. Losing the race ··· 1029 1028 return rc; 1030 1029 } 1031 1030 1032 - /* publish /dev/mem initialized */ 1033 - WRITE_ONCE(devmem_inode, inode); 1031 + /* 1032 + * Publish /dev/mem initialized. 1033 + * Pairs with smp_load_acquire() in revoke_devmem(). 1034 + */ 1035 + smp_store_release(&devmem_inode, inode); 1034 1036 1035 1037 return 0; 1036 1038 }
+2 -1
drivers/fpga/dfl-afu-main.c
··· 83 83 * on this port and minimum soft reset pulse width has elapsed. 84 84 * Driver polls port_soft_reset_ack to determine if reset done by HW. 85 85 */ 86 - if (readq_poll_timeout(base + PORT_HDR_CTRL, v, v & PORT_CTRL_SFTRST, 86 + if (readq_poll_timeout(base + PORT_HDR_CTRL, v, 87 + v & PORT_CTRL_SFTRST_ACK, 87 88 RST_POLL_INVL, RST_POLL_TIMEOUT)) { 88 89 dev_err(&pdev->dev, "timeout, fail to reset device\n"); 89 90 return -ETIMEDOUT;
+2 -1
drivers/fpga/dfl-pci.c
··· 227 227 { 228 228 struct cci_drvdata *drvdata = pci_get_drvdata(pcidev); 229 229 struct dfl_fpga_cdev *cdev = drvdata->cdev; 230 - int ret = 0; 231 230 232 231 if (!num_vfs) { 233 232 /* ··· 238 239 dfl_fpga_cdev_config_ports_pf(cdev); 239 240 240 241 } else { 242 + int ret; 243 + 241 244 /* 242 245 * before enable SRIOV, put released ports into VF access mode 243 246 * first of all.
+9 -3
drivers/interconnect/core.c
··· 243 243 { 244 244 struct icc_provider *p = node->provider; 245 245 struct icc_req *r; 246 + u32 avg_bw, peak_bw; 246 247 247 248 node->avg_bw = 0; 248 249 node->peak_bw = 0; ··· 252 251 p->pre_aggregate(node); 253 252 254 253 hlist_for_each_entry(r, &node->req_list, req_node) { 255 - if (!r->enabled) 256 - continue; 257 - p->aggregate(node, r->tag, r->avg_bw, r->peak_bw, 254 + if (r->enabled) { 255 + avg_bw = r->avg_bw; 256 + peak_bw = r->peak_bw; 257 + } else { 258 + avg_bw = 0; 259 + peak_bw = 0; 260 + } 261 + p->aggregate(node, r->tag, avg_bw, peak_bw, 258 262 &node->avg_bw, &node->peak_bw); 259 263 } 260 264
+7 -7
drivers/interconnect/qcom/msm8916.c
··· 197 197 DEFINE_QNODE(pcnoc_int_1, MSM8916_PNOC_INT_1, 8, -1, -1, MSM8916_PNOC_SNOC_MAS); 198 198 DEFINE_QNODE(pcnoc_m_0, MSM8916_PNOC_MAS_0, 8, -1, -1, MSM8916_PNOC_INT_0); 199 199 DEFINE_QNODE(pcnoc_m_1, MSM8916_PNOC_MAS_1, 8, -1, -1, MSM8916_PNOC_SNOC_MAS); 200 - DEFINE_QNODE(pcnoc_s_0, MSM8916_PNOC_SLV_0, 8, -1, -1, MSM8916_SLAVE_CLK_CTL, MSM8916_SLAVE_TLMM, MSM8916_SLAVE_TCSR, MSM8916_SLAVE_SECURITY, MSM8916_SLAVE_MSS); 201 - DEFINE_QNODE(pcnoc_s_1, MSM8916_PNOC_SLV_1, 8, -1, -1, MSM8916_SLAVE_IMEM_CFG, MSM8916_SLAVE_CRYPTO_0_CFG, MSM8916_SLAVE_MSG_RAM, MSM8916_SLAVE_PDM, MSM8916_SLAVE_PRNG); 202 - DEFINE_QNODE(pcnoc_s_2, MSM8916_PNOC_SLV_2, 8, -1, -1, MSM8916_SLAVE_SPDM, MSM8916_SLAVE_BOOT_ROM, MSM8916_SLAVE_BIMC_CFG, MSM8916_SLAVE_PNOC_CFG, MSM8916_SLAVE_PMIC_ARB); 203 - DEFINE_QNODE(pcnoc_s_3, MSM8916_PNOC_SLV_3, 8, -1, -1, MSM8916_SLAVE_MPM, MSM8916_SLAVE_SNOC_CFG, MSM8916_SLAVE_RBCPR_CFG, MSM8916_SLAVE_QDSS_CFG, MSM8916_SLAVE_DEHR_CFG); 204 - DEFINE_QNODE(pcnoc_s_4, MSM8916_PNOC_SLV_4, 8, -1, -1, MSM8916_SLAVE_VENUS_CFG, MSM8916_SLAVE_CAMERA_CFG, MSM8916_SLAVE_DISPLAY_CFG); 205 - DEFINE_QNODE(pcnoc_s_8, MSM8916_PNOC_SLV_8, 8, -1, -1, MSM8916_SLAVE_USB_HS, MSM8916_SLAVE_SDCC_1, MSM8916_SLAVE_BLSP_1); 206 - DEFINE_QNODE(pcnoc_s_9, MSM8916_PNOC_SLV_9, 8, -1, -1, MSM8916_SLAVE_SDCC_2, MSM8916_SLAVE_LPASS, MSM8916_SLAVE_GRAPHICS_3D_CFG); 200 + DEFINE_QNODE(pcnoc_s_0, MSM8916_PNOC_SLV_0, 4, -1, -1, MSM8916_SLAVE_CLK_CTL, MSM8916_SLAVE_TLMM, MSM8916_SLAVE_TCSR, MSM8916_SLAVE_SECURITY, MSM8916_SLAVE_MSS); 201 + DEFINE_QNODE(pcnoc_s_1, MSM8916_PNOC_SLV_1, 4, -1, -1, MSM8916_SLAVE_IMEM_CFG, MSM8916_SLAVE_CRYPTO_0_CFG, MSM8916_SLAVE_MSG_RAM, MSM8916_SLAVE_PDM, MSM8916_SLAVE_PRNG); 202 + DEFINE_QNODE(pcnoc_s_2, MSM8916_PNOC_SLV_2, 4, -1, -1, MSM8916_SLAVE_SPDM, MSM8916_SLAVE_BOOT_ROM, MSM8916_SLAVE_BIMC_CFG, MSM8916_SLAVE_PNOC_CFG, MSM8916_SLAVE_PMIC_ARB); 203 + DEFINE_QNODE(pcnoc_s_3, MSM8916_PNOC_SLV_3, 4, -1, -1, MSM8916_SLAVE_MPM, MSM8916_SLAVE_SNOC_CFG, MSM8916_SLAVE_RBCPR_CFG, MSM8916_SLAVE_QDSS_CFG, MSM8916_SLAVE_DEHR_CFG); 204 + DEFINE_QNODE(pcnoc_s_4, MSM8916_PNOC_SLV_4, 4, -1, -1, MSM8916_SLAVE_VENUS_CFG, MSM8916_SLAVE_CAMERA_CFG, MSM8916_SLAVE_DISPLAY_CFG); 205 + DEFINE_QNODE(pcnoc_s_8, MSM8916_PNOC_SLV_8, 4, -1, -1, MSM8916_SLAVE_USB_HS, MSM8916_SLAVE_SDCC_1, MSM8916_SLAVE_BLSP_1); 206 + DEFINE_QNODE(pcnoc_s_9, MSM8916_PNOC_SLV_9, 4, -1, -1, MSM8916_SLAVE_SDCC_2, MSM8916_SLAVE_LPASS, MSM8916_SLAVE_GRAPHICS_3D_CFG); 207 207 DEFINE_QNODE(pcnoc_snoc_mas, MSM8916_PNOC_SNOC_MAS, 8, 29, -1, MSM8916_PNOC_SNOC_SLV); 208 208 DEFINE_QNODE(pcnoc_snoc_slv, MSM8916_PNOC_SNOC_SLV, 8, -1, 45, MSM8916_SNOC_INT_0, MSM8916_SNOC_INT_BIMC, MSM8916_SNOC_INT_1); 209 209 DEFINE_QNODE(qdss_int, MSM8916_SNOC_QDSS_INT, 8, -1, -1, MSM8916_SNOC_INT_0, MSM8916_SNOC_INT_BIMC);
+11 -3
drivers/misc/habanalabs/command_submission.c
··· 499 499 struct asic_fixed_properties *asic = &hdev->asic_prop; 500 500 struct hw_queue_properties *hw_queue_prop; 501 501 502 + /* This must be checked here to prevent out-of-bounds access to 503 + * hw_queues_props array 504 + */ 505 + if (chunk->queue_index >= HL_MAX_QUEUES) { 506 + dev_err(hdev->dev, "Queue index %d is invalid\n", 507 + chunk->queue_index); 508 + return -EINVAL; 509 + } 510 + 502 511 hw_queue_prop = &asic->hw_queues_props[chunk->queue_index]; 503 512 504 - if ((chunk->queue_index >= HL_MAX_QUEUES) || 505 - (hw_queue_prop->type == QUEUE_TYPE_NA)) { 506 - dev_err(hdev->dev, "Queue index %d is invalid\n", 513 + if (hw_queue_prop->type == QUEUE_TYPE_NA) { 514 + dev_err(hdev->dev, "Queue index %d is not applicable\n", 507 515 chunk->queue_index); 508 516 return -EINVAL; 509 517 }
+8 -15
drivers/misc/habanalabs/debugfs.c
··· 36 36 pkt.i2c_reg = i2c_reg; 37 37 38 38 rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt), 39 - HL_DEVICE_TIMEOUT_USEC, (long *) val); 39 + 0, (long *) val); 40 40 41 41 if (rc) 42 42 dev_err(hdev->dev, "Failed to read from I2C, error %d\n", rc); ··· 63 63 pkt.value = cpu_to_le64(val); 64 64 65 65 rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt), 66 - HL_DEVICE_TIMEOUT_USEC, NULL); 66 + 0, NULL); 67 67 68 68 if (rc) 69 69 dev_err(hdev->dev, "Failed to write to I2C, error %d\n", rc); ··· 87 87 pkt.value = cpu_to_le64(state); 88 88 89 89 rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt), 90 - HL_DEVICE_TIMEOUT_USEC, NULL); 90 + 0, NULL); 91 91 92 92 if (rc) 93 93 dev_err(hdev->dev, "Failed to set LED %d, error %d\n", led, rc); ··· 981 981 if (*ppos) 982 982 return 0; 983 983 984 - sprintf(tmp_buf, "%d\n", hdev->clock_gating); 984 + sprintf(tmp_buf, "0x%llx\n", hdev->clock_gating_mask); 985 985 rc = simple_read_from_buffer(buf, strlen(tmp_buf) + 1, ppos, tmp_buf, 986 986 strlen(tmp_buf) + 1); 987 987 ··· 993 993 { 994 994 struct hl_dbg_device_entry *entry = file_inode(f)->i_private; 995 995 struct hl_device *hdev = entry->hdev; 996 - u32 value; 996 + u64 value; 997 997 ssize_t rc; 998 998 999 999 if (atomic_read(&hdev->in_reset)) { ··· 1002 1002 return 0; 1003 1003 } 1004 1004 1005 - rc = kstrtouint_from_user(buf, count, 10, &value); 1005 + rc = kstrtoull_from_user(buf, count, 16, &value); 1006 1006 if (rc) 1007 1007 return rc; 1008 1008 1009 - if (value) { 1010 - hdev->clock_gating = 1; 1011 - if (hdev->asic_funcs->enable_clock_gating) 1012 - hdev->asic_funcs->enable_clock_gating(hdev); 1013 - } else { 1014 - if (hdev->asic_funcs->disable_clock_gating) 1015 - hdev->asic_funcs->disable_clock_gating(hdev); 1016 - hdev->clock_gating = 0; 1017 - } 1009 + hdev->clock_gating_mask = value; 1010 + hdev->asic_funcs->set_clock_gating(hdev); 1018 1011 1019 1012 return count; 1020 1013 }
+1 -1
drivers/misc/habanalabs/device.c
··· 608 608 hdev->in_debug = 0; 609 609 610 610 if (!hdev->hard_reset_pending) 611 - hdev->asic_funcs->enable_clock_gating(hdev); 611 + hdev->asic_funcs->set_clock_gating(hdev); 612 612 613 613 goto out; 614 614 }
+5 -5
drivers/misc/habanalabs/firmware_if.c
··· 61 61 pkt.ctl = cpu_to_le32(opcode << ARMCP_PKT_CTL_OPCODE_SHIFT); 62 62 63 63 return hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, 64 - sizeof(pkt), HL_DEVICE_TIMEOUT_USEC, NULL); 64 + sizeof(pkt), 0, NULL); 65 65 } 66 66 67 67 int hl_fw_send_cpu_message(struct hl_device *hdev, u32 hw_queue_id, u32 *msg, ··· 144 144 pkt.value = cpu_to_le64(event_type); 145 145 146 146 rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt), 147 - HL_DEVICE_TIMEOUT_USEC, &result); 147 + 0, &result); 148 148 149 149 if (rc) 150 150 dev_err(hdev->dev, "failed to unmask RAZWI IRQ %d", event_type); ··· 183 183 ARMCP_PKT_CTL_OPCODE_SHIFT); 184 184 185 185 rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) pkt, 186 - total_pkt_size, HL_DEVICE_TIMEOUT_USEC, &result); 186 + total_pkt_size, 0, &result); 187 187 188 188 if (rc) 189 189 dev_err(hdev->dev, "failed to unmask IRQ array\n"); ··· 204 204 test_pkt.value = cpu_to_le64(ARMCP_PACKET_FENCE_VAL); 205 205 206 206 rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &test_pkt, 207 - sizeof(test_pkt), HL_DEVICE_TIMEOUT_USEC, &result); 207 + sizeof(test_pkt), 0, &result); 208 208 209 209 if (!rc) { 210 210 if (result != ARMCP_PACKET_FENCE_VAL) ··· 248 248 hb_pkt.value = cpu_to_le64(ARMCP_PACKET_FENCE_VAL); 249 249 250 250 rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &hb_pkt, 251 - sizeof(hb_pkt), HL_DEVICE_TIMEOUT_USEC, &result); 251 + sizeof(hb_pkt), 0, &result); 252 252 253 253 if ((rc) || (result != ARMCP_PACKET_FENCE_VAL)) 254 254 rc = -EIO;
+84 -39
drivers/misc/habanalabs/gaudi/gaudi.c
··· 80 80 #define GAUDI_PLDM_QMAN0_TIMEOUT_USEC (HL_DEVICE_TIMEOUT_USEC * 30) 81 81 #define GAUDI_PLDM_TPC_KERNEL_WAIT_USEC (HL_DEVICE_TIMEOUT_USEC * 30) 82 82 #define GAUDI_BOOT_FIT_REQ_TIMEOUT_USEC 1000000 /* 1s */ 83 + #define GAUDI_MSG_TO_CPU_TIMEOUT_USEC 4000000 /* 4s */ 83 84 84 85 #define GAUDI_QMAN0_FENCE_VAL 0x72E91AB9 85 86 ··· 99 98 100 99 #define GAUDI_ARB_WDT_TIMEOUT 0x1000000 101 100 101 + #define GAUDI_CLK_GATE_DEBUGFS_MASK (\ 102 + BIT(GAUDI_ENGINE_ID_MME_0) |\ 103 + BIT(GAUDI_ENGINE_ID_MME_2) |\ 104 + GENMASK_ULL(GAUDI_ENGINE_ID_TPC_7, GAUDI_ENGINE_ID_TPC_0)) 105 + 102 106 static const char gaudi_irq_name[GAUDI_MSI_ENTRIES][GAUDI_MAX_STRING_LEN] = { 103 107 "gaudi cq 0_0", "gaudi cq 0_1", "gaudi cq 0_2", "gaudi cq 0_3", 104 108 "gaudi cq 1_0", "gaudi cq 1_1", "gaudi cq 1_2", "gaudi cq 1_3", ··· 112 106 }; 113 107 114 108 static const u8 gaudi_dma_assignment[GAUDI_DMA_MAX] = { 115 - [GAUDI_PCI_DMA_1] = 0, 116 - [GAUDI_PCI_DMA_2] = 1, 117 - [GAUDI_PCI_DMA_3] = 5, 118 - [GAUDI_HBM_DMA_1] = 2, 119 - [GAUDI_HBM_DMA_2] = 3, 120 - [GAUDI_HBM_DMA_3] = 4, 121 - [GAUDI_HBM_DMA_4] = 6, 122 - [GAUDI_HBM_DMA_5] = 7 109 + [GAUDI_PCI_DMA_1] = GAUDI_ENGINE_ID_DMA_0, 110 + [GAUDI_PCI_DMA_2] = GAUDI_ENGINE_ID_DMA_1, 111 + [GAUDI_PCI_DMA_3] = GAUDI_ENGINE_ID_DMA_5, 112 + [GAUDI_HBM_DMA_1] = GAUDI_ENGINE_ID_DMA_2, 113 + [GAUDI_HBM_DMA_2] = GAUDI_ENGINE_ID_DMA_3, 114 + [GAUDI_HBM_DMA_3] = GAUDI_ENGINE_ID_DMA_4, 115 + [GAUDI_HBM_DMA_4] = GAUDI_ENGINE_ID_DMA_6, 116 + [GAUDI_HBM_DMA_5] = GAUDI_ENGINE_ID_DMA_7 123 117 }; 124 118 125 119 static const u8 gaudi_cq_assignment[NUMBER_OF_CMPLT_QUEUES] = { ··· 1825 1819 1826 1820 gaudi_init_rate_limiter(hdev); 1827 1821 1828 - gaudi_disable_clock_gating(hdev); 1822 + hdev->asic_funcs->disable_clock_gating(hdev); 1829 1823 1830 1824 for (tpc_id = 0, tpc_offset = 0; 1831 1825 tpc_id < TPC_NUMBER_OF_ENGINES; ··· 2537 2531 WREG32(mmTPC7_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT); 2538 2532 } 2539 2533 2540 - static void gaudi_enable_clock_gating(struct hl_device *hdev) 2534 + static void gaudi_set_clock_gating(struct hl_device *hdev) 2541 2535 { 2542 2536 struct gaudi_device *gaudi = hdev->asic_specific; 2543 2537 u32 qman_offset; 2544 2538 int i; 2545 - 2546 - if (!hdev->clock_gating) 2547 - return; 2548 - 2549 - if (gaudi->hw_cap_initialized & HW_CAP_CLK_GATE) 2550 - return; 2551 2539 2552 2540 /* In case we are during debug session, don't enable the clock gate 2553 2541 * as it may interfere ··· 2549 2549 if (hdev->in_debug) 2550 2550 return; 2551 2551 2552 - for (i = 0, qman_offset = 0 ; i < PCI_DMA_NUMBER_OF_CHNLS ; i++) { 2552 + for (i = GAUDI_PCI_DMA_1, qman_offset = 0 ; i < GAUDI_HBM_DMA_1 ; i++) { 2553 + if (!(hdev->clock_gating_mask & 2554 + (BIT_ULL(gaudi_dma_assignment[i])))) 2555 + continue; 2556 + 2553 2557 qman_offset = gaudi_dma_assignment[i] * DMA_QMAN_OFFSET; 2554 2558 WREG32(mmDMA0_QM_CGM_CFG1 + qman_offset, QMAN_CGM1_PWR_GATE_EN); 2555 2559 WREG32(mmDMA0_QM_CGM_CFG + qman_offset, 2556 2560 QMAN_UPPER_CP_CGM_PWR_GATE_EN); 2557 2561 } 2558 2562 2559 - for (; i < HBM_DMA_NUMBER_OF_CHNLS ; i++) { 2563 + for (i = GAUDI_HBM_DMA_1 ; i < GAUDI_DMA_MAX ; i++) { 2564 + if (!(hdev->clock_gating_mask & 2565 + (BIT_ULL(gaudi_dma_assignment[i])))) 2566 + continue; 2567 + 2560 2568 qman_offset = gaudi_dma_assignment[i] * DMA_QMAN_OFFSET; 2561 2569 WREG32(mmDMA0_QM_CGM_CFG1 + qman_offset, QMAN_CGM1_PWR_GATE_EN); 2562 2570 WREG32(mmDMA0_QM_CGM_CFG + qman_offset, 2563 2571 QMAN_COMMON_CP_CGM_PWR_GATE_EN); 2564 2572 } 2565 2573 2566 - WREG32(mmMME0_QM_CGM_CFG1, QMAN_CGM1_PWR_GATE_EN); 2567 - WREG32(mmMME0_QM_CGM_CFG, 2568 - QMAN_COMMON_CP_CGM_PWR_GATE_EN); 2569 - WREG32(mmMME2_QM_CGM_CFG1, QMAN_CGM1_PWR_GATE_EN); 2570 - WREG32(mmMME2_QM_CGM_CFG, 2571 - QMAN_COMMON_CP_CGM_PWR_GATE_EN); 2574 + if (hdev->clock_gating_mask & (BIT_ULL(GAUDI_ENGINE_ID_MME_0))) { 2575 + WREG32(mmMME0_QM_CGM_CFG1, QMAN_CGM1_PWR_GATE_EN); 2576 + WREG32(mmMME0_QM_CGM_CFG, QMAN_COMMON_CP_CGM_PWR_GATE_EN); 2577 + } 2578 + 2579 + if (hdev->clock_gating_mask & (BIT_ULL(GAUDI_ENGINE_ID_MME_2))) { 2580 + WREG32(mmMME2_QM_CGM_CFG1, QMAN_CGM1_PWR_GATE_EN); 2581 + WREG32(mmMME2_QM_CGM_CFG, QMAN_COMMON_CP_CGM_PWR_GATE_EN); 2582 + } 2572 2583 2573 2584 for (i = 0, qman_offset = 0 ; i < TPC_NUMBER_OF_ENGINES ; i++) { 2585 + if (!(hdev->clock_gating_mask & 2586 + (BIT_ULL(GAUDI_ENGINE_ID_TPC_0 + i)))) 2587 + continue; 2588 + 2574 2589 WREG32(mmTPC0_QM_CGM_CFG1 + qman_offset, 2575 2590 QMAN_CGM1_PWR_GATE_EN); 2576 2591 WREG32(mmTPC0_QM_CGM_CFG + qman_offset, ··· 2678 2663 gaudi_stop_hbm_dma_qmans(hdev); 2679 2664 gaudi_stop_pci_dma_qmans(hdev); 2680 2665 2681 - gaudi_disable_clock_gating(hdev); 2666 + hdev->asic_funcs->disable_clock_gating(hdev); 2682 2667 2683 2668 msleep(wait_timeout_ms); 2684 2669 ··· 3018 3003 3019 3004 gaudi_init_tpc_qmans(hdev); 3020 3005 3021 - gaudi_enable_clock_gating(hdev); 3006 + hdev->asic_funcs->set_clock_gating(hdev); 3022 3007 3023 3008 gaudi_enable_timestamp(hdev); 3024 3009 ··· 3127 3112 HW_CAP_HBM_DMA | HW_CAP_PLL | 3128 3113 HW_CAP_MMU | 3129 3114 HW_CAP_SRAM_SCRAMBLER | 3130 - HW_CAP_HBM_SCRAMBLER); 3115 + HW_CAP_HBM_SCRAMBLER | 3116 + HW_CAP_CLK_GATE); 3117 + 3131 3118 memset(gaudi->events_stat, 0, sizeof(gaudi->events_stat)); 3132 3119 } 3133 3120 ··· 3479 3462 *result = 0; 3480 3463 return 0; 3481 3464 } 3465 + 3466 + if (!timeout) 3467 + timeout = GAUDI_MSG_TO_CPU_TIMEOUT_USEC; 3482 3468 3483 3469 return hl_fw_send_cpu_message(hdev, GAUDI_QUEUE_ID_CPU_PQ, msg, len, 3484 3470 timeout, result); ··· 3885 3865 rc = -EPERM; 3886 3866 break; 3887 3867 3868 + case PACKET_WREG_BULK: 3869 + dev_err(hdev->dev, 3870 + "User not allowed to use WREG_BULK\n"); 3871 + rc = -EPERM; 3872 + break; 3873 + 3888 3874 case PACKET_LOAD_AND_EXE: 3889 3875 rc = gaudi_validate_load_and_exe_pkt(hdev, parser, 3890 3876 (struct packet_load_and_exe *) user_pkt); ··· 3906 3880 break; 3907 3881 3908 3882 case PACKET_WREG_32: 3909 - case PACKET_WREG_BULK: 3910 3883 case PACKET_MSG_LONG: 3911 3884 case PACKET_MSG_SHORT: 3912 3885 case PACKET_REPEAT: ··· 4546 4521 int rc = 0; 4547 4522 4548 4523 if ((addr >= CFG_BASE) && (addr < CFG_BASE + CFG_SIZE)) { 4549 - if (gaudi->hw_cap_initialized & HW_CAP_CLK_GATE) { 4524 + 4525 + if ((gaudi->hw_cap_initialized & HW_CAP_CLK_GATE) && 4526 + (hdev->clock_gating_mask & 4527 + GAUDI_CLK_GATE_DEBUGFS_MASK)) { 4528 + 4550 4529 dev_err_ratelimited(hdev->dev, 4551 4530 "Can't read register - clock gating is enabled!\n"); 4552 4531 rc = -EFAULT; 4553 4532 } else { 4554 4533 *val = RREG32(addr - CFG_BASE); 4555 4534 } 4535 + 4556 4536 } else if ((addr >= SRAM_BASE_ADDR) && 4557 4537 (addr < SRAM_BASE_ADDR + SRAM_BAR_SIZE)) { 4558 4538 *val = readl(hdev->pcie_bar[SRAM_BAR_ID] + ··· 4593 4563 int rc = 0; 4594 4564 4595 4565 if ((addr >= CFG_BASE) && (addr < CFG_BASE + CFG_SIZE)) { 4596 - if (gaudi->hw_cap_initialized & HW_CAP_CLK_GATE) { 4566 + 4567 + if ((gaudi->hw_cap_initialized & HW_CAP_CLK_GATE) && 4568 + (hdev->clock_gating_mask & 4569 + GAUDI_CLK_GATE_DEBUGFS_MASK)) { 4570 + 4597 4571 dev_err_ratelimited(hdev->dev, 4598 4572 "Can't write register - clock gating is enabled!\n"); 4599 4573 rc = -EFAULT; 4600 4574 } else { 4601 4575 WREG32(addr - CFG_BASE, val); 4602 4576 } 4577 + 4603 4578 } else if ((addr >= SRAM_BASE_ADDR) && 4604 4579 (addr < SRAM_BASE_ADDR + SRAM_BAR_SIZE)) { 4605 4580 writel(val, hdev->pcie_bar[SRAM_BAR_ID] + ··· 4640 4605 int rc = 0; 4641 4606 4642 4607 if ((addr >= CFG_BASE) && (addr <= CFG_BASE + CFG_SIZE - sizeof(u64))) { 4643 - if (gaudi->hw_cap_initialized & HW_CAP_CLK_GATE) { 4608 + 4609 + if ((gaudi->hw_cap_initialized & HW_CAP_CLK_GATE) && 4610 + (hdev->clock_gating_mask & 4611 + GAUDI_CLK_GATE_DEBUGFS_MASK)) { 4612 + 4644 4613 dev_err_ratelimited(hdev->dev, 4645 4614 "Can't read register - clock gating is enabled!\n"); 4646 4615 rc = -EFAULT; ··· 4654 4615 4655 4616 *val = (((u64) val_h) << 32) | val_l; 4656 4617 } 4618 + 4657 4619 } else if ((addr >= SRAM_BASE_ADDR) && 4658 4620 (addr <= SRAM_BASE_ADDR + SRAM_BAR_SIZE - sizeof(u64))) { 4659 4621 *val = readq(hdev->pcie_bar[SRAM_BAR_ID] + ··· 4691 4651 int rc = 0; 4692 4652 4693 4653 if ((addr >= CFG_BASE) && (addr <= CFG_BASE + CFG_SIZE - sizeof(u64))) { 4694 - if (gaudi->hw_cap_initialized & HW_CAP_CLK_GATE) { 4654 + 4655 + if ((gaudi->hw_cap_initialized & HW_CAP_CLK_GATE) && 4656 + (hdev->clock_gating_mask & 4657 + GAUDI_CLK_GATE_DEBUGFS_MASK)) { 4658 + 4695 4659 dev_err_ratelimited(hdev->dev, 4696 4660 "Can't write register - clock gating is enabled!\n"); 4697 4661 rc = -EFAULT; ··· 4704 4660 WREG32(addr + sizeof(u32) - CFG_BASE, 4705 4661 upper_32_bits(val)); 4706 4662 } 4663 + 4707 4664 } else if ((addr >= SRAM_BASE_ADDR) && 4708 4665 (addr <= SRAM_BASE_ADDR + SRAM_BAR_SIZE - sizeof(u64))) { 4709 4666 writeq(val, hdev->pcie_bar[SRAM_BAR_ID] + ··· 4926 4881 gaudi_mmu_prepare_reg(hdev, mmPSOC_GLOBAL_CONF_TRACE_ARUSER, asid); 4927 4882 gaudi_mmu_prepare_reg(hdev, mmPSOC_GLOBAL_CONF_TRACE_AWUSER, asid); 4928 4883 4929 - hdev->asic_funcs->enable_clock_gating(hdev); 4884 + hdev->asic_funcs->set_clock_gating(hdev); 4930 4885 4931 4886 mutex_unlock(&gaudi->clk_gate_mutex); 4932 4887 } ··· 5307 5262 } 5308 5263 5309 5264 if (disable_clock_gating) { 5310 - hdev->asic_funcs->enable_clock_gating(hdev); 5265 + hdev->asic_funcs->set_clock_gating(hdev); 5311 5266 mutex_unlock(&gaudi->clk_gate_mutex); 5312 5267 } 5313 5268 } ··· 5794 5749 /* Clear interrupts */ 5795 5750 WREG32(mmTPC0_CFG_TPC_INTR_CAUSE + tpc_offset, 0); 5796 5751 5797 - hdev->asic_funcs->enable_clock_gating(hdev); 5752 + hdev->asic_funcs->set_clock_gating(hdev); 5798 5753 5799 5754 mutex_unlock(&gaudi->clk_gate_mutex); 5800 5755 ··· 6310 6265 if (s) 6311 6266 seq_puts(s, "\n"); 6312 6267 6313 - hdev->asic_funcs->enable_clock_gating(hdev); 6268 + hdev->asic_funcs->set_clock_gating(hdev); 6314 6269 6315 6270 mutex_unlock(&gaudi->clk_gate_mutex); 6316 6271 ··· 6411 6366 dev_err(hdev->dev, 6412 6367 "Timeout while waiting for TPC%d icache prefetch\n", 6413 6368 tpc_id); 6414 - hdev->asic_funcs->enable_clock_gating(hdev); 6369 + hdev->asic_funcs->set_clock_gating(hdev); 6415 6370 mutex_unlock(&gaudi->clk_gate_mutex); 6416 6371 return -EIO; 6417 6372 } ··· 6440 6395 1000, 6441 6396 kernel_timeout); 6442 6397 6443 - hdev->asic_funcs->enable_clock_gating(hdev); 6398 + hdev->asic_funcs->set_clock_gating(hdev); 6444 6399 mutex_unlock(&gaudi->clk_gate_mutex); 6445 6400 6446 6401 if (rc) { ··· 6781 6736 .mmu_invalidate_cache = gaudi_mmu_invalidate_cache, 6782 6737 .mmu_invalidate_cache_range = gaudi_mmu_invalidate_cache_range, 6783 6738 .send_heartbeat = gaudi_send_heartbeat, 6784 - .enable_clock_gating = gaudi_enable_clock_gating, 6739 + .set_clock_gating = gaudi_set_clock_gating, 6785 6740 .disable_clock_gating = gaudi_disable_clock_gating, 6786 6741 .debug_coresight = gaudi_debug_coresight, 6787 6742 .is_device_idle = gaudi_is_device_idle,
+12 -8
drivers/misc/habanalabs/goya/goya.c
··· 88 88 #define GOYA_PLDM_MMU_TIMEOUT_USEC (MMU_CONFIG_TIMEOUT_USEC * 100) 89 89 #define GOYA_PLDM_QMAN0_TIMEOUT_USEC (HL_DEVICE_TIMEOUT_USEC * 30) 90 90 #define GOYA_BOOT_FIT_REQ_TIMEOUT_USEC 1000000 /* 1s */ 91 + #define GOYA_MSG_TO_CPU_TIMEOUT_USEC 4000000 /* 4s */ 91 92 92 93 #define GOYA_QMAN0_FENCE_VAL 0xD169B243 93 94 ··· 2831 2830 return 0; 2832 2831 } 2833 2832 2833 + if (!timeout) 2834 + timeout = GOYA_MSG_TO_CPU_TIMEOUT_USEC; 2835 + 2834 2836 return hl_fw_send_cpu_message(hdev, GOYA_QUEUE_ID_CPU_PQ, msg, len, 2835 2837 timeout, result); 2836 2838 } ··· 4435 4431 pkt->armcp_pkt.ctl = cpu_to_le32(ARMCP_PACKET_UNMASK_RAZWI_IRQ_ARRAY << 4436 4432 ARMCP_PKT_CTL_OPCODE_SHIFT); 4437 4433 4438 - rc = goya_send_cpu_message(hdev, (u32 *) pkt, total_pkt_size, 4439 - HL_DEVICE_TIMEOUT_USEC, &result); 4434 + rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) pkt, 4435 + total_pkt_size, 0, &result); 4440 4436 4441 4437 if (rc) 4442 4438 dev_err(hdev->dev, "failed to unmask IRQ array\n"); ··· 4468 4464 ARMCP_PKT_CTL_OPCODE_SHIFT); 4469 4465 pkt.value = cpu_to_le64(event_type); 4470 4466 4471 - rc = goya_send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt), 4472 - HL_DEVICE_TIMEOUT_USEC, &result); 4467 + rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt), 4468 + 0, &result); 4473 4469 4474 4470 if (rc) 4475 4471 dev_err(hdev->dev, "failed to unmask RAZWI IRQ %d", event_type); ··· 5032 5028 return 0; 5033 5029 } 5034 5030 5035 - static void goya_enable_clock_gating(struct hl_device *hdev) 5031 + static void goya_set_clock_gating(struct hl_device *hdev) 5036 5032 { 5037 - 5033 + /* clock gating not supported in Goya */ 5038 5034 } 5039 5035 5040 5036 static void goya_disable_clock_gating(struct hl_device *hdev) 5041 5037 { 5042 - 5038 + /* clock gating not supported in Goya */ 5043 5039 } 5044 5040 5045 5041 static bool goya_is_device_idle(struct hl_device *hdev, u32 *mask, ··· 5263 5259 .mmu_invalidate_cache = goya_mmu_invalidate_cache, 5264 5260 .mmu_invalidate_cache_range = goya_mmu_invalidate_cache_range, 5265 5261 .send_heartbeat = goya_send_heartbeat, 5266 - .enable_clock_gating = goya_enable_clock_gating, 5262 + .set_clock_gating = goya_set_clock_gating, 5267 5263 .disable_clock_gating = goya_disable_clock_gating, 5268 5264 .debug_coresight = goya_debug_coresight, 5269 5265 .is_device_idle = goya_is_device_idle,
+13 -6
drivers/misc/habanalabs/habanalabs.h
··· 578 578 * @mmu_invalidate_cache_range: flush specific MMU STLB cache lines with 579 579 * ASID-VA-size mask. 580 580 * @send_heartbeat: send is-alive packet to ArmCP and verify response. 581 - * @enable_clock_gating: enable clock gating for reducing power consumption. 582 - * @disable_clock_gating: disable clock for accessing registers on HBW. 581 + * @set_clock_gating: enable/disable clock gating per engine according to 582 + * clock gating mask in hdev 583 + * @disable_clock_gating: disable clock gating completely 583 584 * @debug_coresight: perform certain actions on Coresight for debugging. 584 585 * @is_device_idle: return true if device is idle, false otherwise. 585 586 * @soft_reset_late_init: perform certain actions needed after soft reset. ··· 588 587 * @hw_queues_unlock: release H/W queues lock. 589 588 * @get_pci_id: retrieve PCI ID. 590 589 * @get_eeprom_data: retrieve EEPROM data from F/W. 591 - * @send_cpu_message: send buffer to ArmCP. 590 + * @send_cpu_message: send message to F/W. If the message is timedout, the 591 + * driver will eventually reset the device. The timeout can 592 + * be determined by the calling function or it can be 0 and 593 + * then the timeout is the default timeout for the specific 594 + * ASIC 592 595 * @get_hw_state: retrieve the H/W state 593 596 * @pci_bars_map: Map PCI BARs. 594 597 * @set_dram_bar_base: Set DRAM BAR to map specific device address. Returns ··· 685 680 int (*mmu_invalidate_cache_range)(struct hl_device *hdev, bool is_hard, 686 681 u32 asid, u64 va, u64 size); 687 682 int (*send_heartbeat)(struct hl_device *hdev); 688 - void (*enable_clock_gating)(struct hl_device *hdev); 683 + void (*set_clock_gating)(struct hl_device *hdev); 689 684 void (*disable_clock_gating)(struct hl_device *hdev); 690 685 int (*debug_coresight)(struct hl_device *hdev, void *data); 691 686 bool (*is_device_idle)(struct hl_device *hdev, u32 *mask, ··· 1403 1398 * @max_power: the max power of the device, as configured by the sysadmin. This 1404 1399 * value is saved so in case of hard-reset, the driver will restore 1405 1400 * this value and update the F/W after the re-initialization 1401 + * @clock_gating_mask: is clock gating enabled. bitmask that represents the 1402 + * different engines. See debugfs-driver-habanalabs for 1403 + * details. 1406 1404 * @in_reset: is device in reset flow. 1407 1405 * @curr_pll_profile: current PLL profile. 1408 1406 * @cs_active_cnt: number of active command submissions on this device (active ··· 1433 1425 * @init_done: is the initialization of the device done. 1434 1426 * @mmu_enable: is MMU enabled. 1435 1427 * @mmu_huge_page_opt: is MMU huge pages optimization enabled. 1436 - * @clock_gating: is clock gating enabled. 1437 1428 * @device_cpu_disabled: is the device CPU disabled (due to timeouts) 1438 1429 * @dma_mask: the dma mask that was set for this device 1439 1430 * @in_debug: is device under debug. This, together with fpriv_list, enforces ··· 1500 1493 atomic64_t dram_used_mem; 1501 1494 u64 timeout_jiffies; 1502 1495 u64 max_power; 1496 + u64 clock_gating_mask; 1503 1497 atomic_t in_reset; 1504 1498 enum hl_pll_frequency curr_pll_profile; 1505 1499 int cs_active_cnt; ··· 1522 1514 u8 dram_default_page_mapping; 1523 1515 u8 pmmu_huge_range; 1524 1516 u8 init_done; 1525 - u8 clock_gating; 1526 1517 u8 device_cpu_disabled; 1527 1518 u8 dma_mask; 1528 1519 u8 in_debug;
+1 -1
drivers/misc/habanalabs/habanalabs_drv.c
··· 232 232 hdev->fw_loading = 1; 233 233 hdev->cpu_queues_enable = 1; 234 234 hdev->heartbeat = 1; 235 - hdev->clock_gating = 1; 235 + hdev->clock_gating_mask = ULONG_MAX; 236 236 237 237 hdev->reset_pcilink = 0; 238 238 hdev->axi_drain = 0;
+9 -10
drivers/misc/habanalabs/hwmon.c
··· 10 10 #include <linux/pci.h> 11 11 #include <linux/hwmon.h> 12 12 13 - #define SENSORS_PKT_TIMEOUT 1000000 /* 1s */ 14 13 #define HWMON_NR_SENSOR_TYPES (hwmon_pwm + 1) 15 14 16 15 int hl_build_hwmon_channel_info(struct hl_device *hdev, ··· 322 323 pkt.type = __cpu_to_le16(attr); 323 324 324 325 rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt), 325 - SENSORS_PKT_TIMEOUT, value); 326 + 0, value); 326 327 327 328 if (rc) { 328 329 dev_err(hdev->dev, ··· 349 350 pkt.value = __cpu_to_le64(value); 350 351 351 352 rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt), 352 - SENSORS_PKT_TIMEOUT, NULL); 353 + 0, NULL); 353 354 354 355 if (rc) 355 356 dev_err(hdev->dev, ··· 373 374 pkt.type = __cpu_to_le16(attr); 374 375 375 376 rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt), 376 - SENSORS_PKT_TIMEOUT, value); 377 + 0, value); 377 378 378 379 if (rc) { 379 380 dev_err(hdev->dev, ··· 399 400 pkt.type = __cpu_to_le16(attr); 400 401 401 402 rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt), 402 - SENSORS_PKT_TIMEOUT, value); 403 + 0, value); 403 404 404 405 if (rc) { 405 406 dev_err(hdev->dev, ··· 425 426 pkt.type = __cpu_to_le16(attr); 426 427 427 428 rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt), 428 - SENSORS_PKT_TIMEOUT, value); 429 + 0, value); 429 430 430 431 if (rc) { 431 432 dev_err(hdev->dev, ··· 451 452 pkt.type = __cpu_to_le16(attr); 452 453 453 454 rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt), 454 - SENSORS_PKT_TIMEOUT, value); 455 + 0, value); 455 456 456 457 if (rc) { 457 458 dev_err(hdev->dev, ··· 478 479 pkt.value = cpu_to_le64(value); 479 480 480 481 rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt), 481 - SENSORS_PKT_TIMEOUT, NULL); 482 + 0, NULL); 482 483 483 484 if (rc) 484 485 dev_err(hdev->dev, ··· 501 502 pkt.value = __cpu_to_le64(value); 502 503 503 504 rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt), 504 - SENSORS_PKT_TIMEOUT, NULL); 505 + 0, NULL); 505 506 506 507 if (rc) 507 508 dev_err(hdev->dev, ··· 526 527 pkt.value = __cpu_to_le64(value); 527 528 528 529 rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt), 529 - SENSORS_PKT_TIMEOUT, NULL); 530 + 0, NULL); 530 531 531 532 if (rc) 532 533 dev_err(hdev->dev,
+4 -7
drivers/misc/habanalabs/sysfs.c
··· 9 9 10 10 #include <linux/pci.h> 11 11 12 - #define SET_CLK_PKT_TIMEOUT 1000000 /* 1s */ 13 - #define SET_PWR_PKT_TIMEOUT 1000000 /* 1s */ 14 - 15 12 long hl_get_frequency(struct hl_device *hdev, u32 pll_index, bool curr) 16 13 { 17 14 struct armcp_packet pkt; ··· 26 29 pkt.pll_index = cpu_to_le32(pll_index); 27 30 28 31 rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt), 29 - SET_CLK_PKT_TIMEOUT, &result); 32 + 0, &result); 30 33 31 34 if (rc) { 32 35 dev_err(hdev->dev, ··· 51 54 pkt.value = cpu_to_le64(freq); 52 55 53 56 rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt), 54 - SET_CLK_PKT_TIMEOUT, NULL); 57 + 0, NULL); 55 58 56 59 if (rc) 57 60 dev_err(hdev->dev, ··· 71 74 ARMCP_PKT_CTL_OPCODE_SHIFT); 72 75 73 76 rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt), 74 - SET_PWR_PKT_TIMEOUT, &result); 77 + 0, &result); 75 78 76 79 if (rc) { 77 80 dev_err(hdev->dev, "Failed to get max power, error %d\n", rc); ··· 93 96 pkt.value = cpu_to_le64(value); 94 97 95 98 rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt), 96 - SET_PWR_PKT_TIMEOUT, NULL); 99 + 0, NULL); 97 100 98 101 if (rc) 99 102 dev_err(hdev->dev, "Failed to set max power, error %d\n", rc);