Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

drm/amdkfd: KFD interrupt access ih_fifo data in-place

To handle 40000 to 80000 interrupts per second running CPX mode with 4
streams/queues per KFD node, KFD interrupt handler becomes the
performance bottleneck.

Remove the kfifo_out memcpy overhead by accessing ih_fifo data in-place
and updating rptr with kfifo_skip_count.

Signed-off-by: Philip Yang <Philip.Yang@amd.com>
Reviewed-by: Felix Kuehling <felix.kuehling@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>

authored by

Philip Yang and committed by
Alex Deucher
f607b2b8 11815bb0

+14 -21
+14 -21
drivers/gpu/drm/amd/amdkfd/kfd_interrupt.c
··· 114 114 */ 115 115 bool enqueue_ih_ring_entry(struct kfd_node *node, const void *ih_ring_entry) 116 116 { 117 - int count; 118 - 119 - count = kfifo_in(&node->ih_fifo, ih_ring_entry, 120 - node->kfd->device_info.ih_ring_entry_size); 121 - if (count != node->kfd->device_info.ih_ring_entry_size) { 117 + if (kfifo_is_full(&node->ih_fifo)) { 122 118 dev_dbg_ratelimited(node->adev->dev, 123 - "Interrupt ring overflow, dropping interrupt %d\n", 124 - count); 119 + "Interrupt ring overflow, dropping interrupt\n"); 125 120 return false; 126 121 } 127 122 123 + kfifo_in(&node->ih_fifo, ih_ring_entry, node->kfd->device_info.ih_ring_entry_size); 128 124 return true; 129 125 } 130 126 131 127 /* 132 128 * Assumption: single reader/writer. This function is not re-entrant 133 129 */ 134 - static bool dequeue_ih_ring_entry(struct kfd_node *node, void *ih_ring_entry) 130 + static bool dequeue_ih_ring_entry(struct kfd_node *node, u32 **ih_ring_entry) 135 131 { 136 132 int count; 137 133 138 - count = kfifo_out(&node->ih_fifo, ih_ring_entry, 139 - node->kfd->device_info.ih_ring_entry_size); 134 + if (kfifo_is_empty(&node->ih_fifo)) 135 + return false; 140 136 141 - WARN_ON(count && count != node->kfd->device_info.ih_ring_entry_size); 142 - 137 + count = kfifo_out_linear_ptr(&node->ih_fifo, ih_ring_entry, 138 + node->kfd->device_info.ih_ring_entry_size); 139 + WARN_ON(count != node->kfd->device_info.ih_ring_entry_size); 143 140 return count == node->kfd->device_info.ih_ring_entry_size; 144 141 } 145 142 146 143 static void interrupt_wq(struct work_struct *work) 147 144 { 148 - struct kfd_node *dev = container_of(work, struct kfd_node, 149 - interrupt_work); 150 - uint32_t ih_ring_entry[KFD_MAX_RING_ENTRY_SIZE]; 145 + struct kfd_node *dev = container_of(work, struct kfd_node, interrupt_work); 146 + uint32_t *ih_ring_entry; 151 147 unsigned long start_jiffies = jiffies; 152 148 153 - if (dev->kfd->device_info.ih_ring_entry_size > sizeof(ih_ring_entry)) { 154 - dev_err_once(dev->adev->dev, "Ring entry too small\n"); 155 - return; 156 - } 157 - 158 - while (dequeue_ih_ring_entry(dev, ih_ring_entry)) { 149 + while (dequeue_ih_ring_entry(dev, &ih_ring_entry)) { 159 150 dev->kfd->device_info.event_interrupt_class->interrupt_wq(dev, 160 151 ih_ring_entry); 152 + kfifo_skip_count(&dev->ih_fifo, dev->kfd->device_info.ih_ring_entry_size); 153 + 161 154 if (time_is_before_jiffies(start_jiffies + HZ)) { 162 155 /* If we spent more than a second processing signals, 163 156 * reschedule the worker to avoid soft-lockup warnings