Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

drm/amdkfd: Improve concurrency of event handling

Use rcu_read_lock to read p->event_idr concurrently with other readers
and writers. Use p->event_mutex only for creating and destroying events
and in kfd_wait_on_events.

Protect the contents of the kfd_event structure with a per-event
spinlock that can be taken inside the rcu_read_lock critical section.

This eliminates contention of p->event_mutex in set_event, which tends
to be on the critical path for dispatch latency even when busy waiting
is used. It also eliminates lock contention in event interrupt handlers.
Since the p->event_mutex is now used much less, the impact of requiring
it in kfd_wait_on_events should also be much smaller.

This should improve event handling latency for processes using multiple
GPUs concurrently.

v2: Reschedule the worker periodically to avoid soft lockup warnings

Signed-off-by: Felix Kuehling <Felix.Kuehling@amd.com>
Reviewed-by: Sean Keely <Sean.Keely@amd.com> # v1
Tested-by: Sanjay Tripathi <sanjay.tripathi@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>

authored by

Felix Kuehling and committed by
Alex Deucher
5273e82c 8d2aad98

+88 -43
+77 -42
drivers/gpu/drm/amd/amdkfd/kfd_events.c
··· 128 128 } 129 129 130 130 /* 131 - * Assumes that p->event_mutex is held and of course that p is not going 132 - * away (current or locked). 131 + * Assumes that p->event_mutex or rcu_readlock is held and of course that p is 132 + * not going away. 133 133 */ 134 134 static struct kfd_event *lookup_event_by_id(struct kfd_process *p, uint32_t id) 135 135 { ··· 251 251 struct kfd_event_waiter *waiter; 252 252 253 253 /* Wake up pending waiters. They will return failure */ 254 + spin_lock(&ev->lock); 254 255 list_for_each_entry(waiter, &ev->wq.head, wait.entry) 255 - waiter->event = NULL; 256 + WRITE_ONCE(waiter->event, NULL); 256 257 wake_up_all(&ev->wq); 258 + spin_unlock(&ev->lock); 257 259 258 260 if (ev->type == KFD_EVENT_TYPE_SIGNAL || 259 261 ev->type == KFD_EVENT_TYPE_DEBUG) 260 262 p->signal_event_count--; 261 263 262 264 idr_remove(&p->event_idr, ev->event_id); 265 + synchronize_rcu(); 263 266 kfree(ev); 264 267 } 265 268 ··· 395 392 ev->auto_reset = auto_reset; 396 393 ev->signaled = false; 397 394 395 + spin_lock_init(&ev->lock); 398 396 init_waitqueue_head(&ev->wq); 399 397 400 398 *event_page_offset = 0; ··· 470 466 ev->auto_reset = ev_priv->auto_reset; 471 467 ev->signaled = ev_priv->signaled; 472 468 469 + spin_lock_init(&ev->lock); 473 470 init_waitqueue_head(&ev->wq); 474 471 475 472 mutex_lock(&p->event_mutex); ··· 614 609 615 610 /* Auto reset if the list is non-empty and we're waking 616 611 * someone. waitqueue_active is safe here because we're 617 - * protected by the p->event_mutex, which is also held when 612 + * protected by the ev->lock, which is also held when 618 613 * updating the wait queues in kfd_wait_on_events. 619 614 */ 620 615 ev->signaled = !ev->auto_reset || !waitqueue_active(&ev->wq); 621 616 622 617 list_for_each_entry(waiter, &ev->wq.head, wait.entry) 623 - waiter->activated = true; 618 + WRITE_ONCE(waiter->activated, true); 624 619 625 620 wake_up_all(&ev->wq); 626 621 } ··· 631 626 int ret = 0; 632 627 struct kfd_event *ev; 633 628 634 - mutex_lock(&p->event_mutex); 629 + rcu_read_lock(); 635 630 636 631 ev = lookup_event_by_id(p, event_id); 632 + spin_lock(&ev->lock); 637 633 638 634 if (ev && event_can_be_cpu_signaled(ev)) 639 635 set_event(ev); 640 636 else 641 637 ret = -EINVAL; 642 638 643 - mutex_unlock(&p->event_mutex); 639 + spin_unlock(&ev->lock); 640 + rcu_read_unlock(); 644 641 return ret; 645 642 } 646 643 ··· 657 650 int ret = 0; 658 651 struct kfd_event *ev; 659 652 660 - mutex_lock(&p->event_mutex); 653 + rcu_read_lock(); 661 654 662 655 ev = lookup_event_by_id(p, event_id); 656 + spin_lock(&ev->lock); 663 657 664 658 if (ev && event_can_be_cpu_signaled(ev)) 665 659 reset_event(ev); 666 660 else 667 661 ret = -EINVAL; 668 662 669 - mutex_unlock(&p->event_mutex); 663 + spin_unlock(&ev->lock); 664 + rcu_read_unlock(); 670 665 return ret; 671 666 672 667 } 673 668 674 669 static void acknowledge_signal(struct kfd_process *p, struct kfd_event *ev) 675 670 { 676 - page_slots(p->signal_page)[ev->event_id] = UNSIGNALED_EVENT_SLOT; 671 + WRITE_ONCE(page_slots(p->signal_page)[ev->event_id], UNSIGNALED_EVENT_SLOT); 677 672 } 678 673 679 674 static void set_event_from_interrupt(struct kfd_process *p, ··· 683 674 { 684 675 if (ev && event_can_be_gpu_signaled(ev)) { 685 676 acknowledge_signal(p, ev); 677 + spin_lock(&ev->lock); 686 678 set_event(ev); 679 + spin_unlock(&ev->lock); 687 680 } 688 681 } 689 682 ··· 704 693 if (!p) 705 694 return; /* Presumably process exited. */ 706 695 707 - mutex_lock(&p->event_mutex); 696 + rcu_read_lock(); 708 697 709 698 if (valid_id_bits) 710 699 ev = lookup_signaled_event_by_partial_id(p, partial_id, ··· 732 721 if (id >= KFD_SIGNAL_EVENT_LIMIT) 733 722 break; 734 723 735 - if (slots[id] != UNSIGNALED_EVENT_SLOT) 724 + if (READ_ONCE(slots[id]) != UNSIGNALED_EVENT_SLOT) 736 725 set_event_from_interrupt(p, ev); 737 726 } 738 727 } else { ··· 741 730 * only signaled events from the IDR. 742 731 */ 743 732 for (id = 0; id < KFD_SIGNAL_EVENT_LIMIT; id++) 744 - if (slots[id] != UNSIGNALED_EVENT_SLOT) { 733 + if (READ_ONCE(slots[id]) != UNSIGNALED_EVENT_SLOT) { 745 734 ev = lookup_event_by_id(p, id); 746 735 set_event_from_interrupt(p, ev); 747 736 } 748 737 } 749 738 } 750 739 751 - mutex_unlock(&p->event_mutex); 740 + rcu_read_unlock(); 752 741 kfd_unref_process(p); 753 742 } 754 743 ··· 780 769 if (!ev) 781 770 return -EINVAL; 782 771 772 + spin_lock(&ev->lock); 783 773 waiter->event = ev; 784 774 waiter->activated = ev->signaled; 785 775 ev->signaled = ev->signaled && !ev->auto_reset; 776 + spin_unlock(&ev->lock); 786 777 787 778 return 0; 788 779 } ··· 796 783 /* Only add to the wait list if we actually need to 797 784 * wait on this event. 798 785 */ 799 - if (!waiter->activated) 786 + if (!waiter->activated) { 787 + spin_lock(&ev->lock); 800 788 add_wait_queue(&ev->wq, &waiter->wait); 789 + spin_unlock(&ev->lock); 790 + } 801 791 } 802 792 803 793 /* test_event_condition - Test condition of events being waited for ··· 820 804 uint32_t activated_count = 0; 821 805 822 806 for (i = 0; i < num_events; i++) { 823 - if (!event_waiters[i].event) 807 + if (!READ_ONCE(event_waiters[i].event)) 824 808 return KFD_IOC_WAIT_RESULT_FAIL; 825 809 826 - if (event_waiters[i].activated) { 810 + if (READ_ONCE(event_waiters[i].activated)) { 827 811 if (!all) 828 812 return KFD_IOC_WAIT_RESULT_COMPLETE; 829 813 ··· 852 836 for (i = 0; i < num_events; i++) { 853 837 waiter = &event_waiters[i]; 854 838 event = waiter->event; 839 + if (!event) 840 + return -EINVAL; /* event was destroyed */ 855 841 if (waiter->activated && event->type == KFD_EVENT_TYPE_MEMORY) { 856 842 dst = &data[i].memory_exception_data; 857 843 src = &event->memory_exception_data; ··· 864 846 } 865 847 866 848 return 0; 867 - 868 849 } 869 - 870 - 871 850 872 851 static long user_timeout_to_jiffies(uint32_t user_timeout_ms) 873 852 { ··· 889 874 uint32_t i; 890 875 891 876 for (i = 0; i < num_events; i++) 892 - if (waiters[i].event) 877 + if (waiters[i].event) { 878 + spin_lock(&waiters[i].event->lock); 893 879 remove_wait_queue(&waiters[i].event->wq, 894 880 &waiters[i].wait); 881 + spin_unlock(&waiters[i].event->lock); 882 + } 895 883 896 884 kfree(waiters); 897 885 } ··· 918 900 goto out; 919 901 } 920 902 903 + /* Use p->event_mutex here to protect against concurrent creation and 904 + * destruction of events while we initialize event_waiters. 905 + */ 921 906 mutex_lock(&p->event_mutex); 922 907 923 908 for (i = 0; i < num_events; i++) { ··· 999 978 } 1000 979 __set_current_state(TASK_RUNNING); 1001 980 981 + mutex_lock(&p->event_mutex); 1002 982 /* copy_signaled_event_data may sleep. So this has to happen 1003 983 * after the task state is set back to RUNNING. 984 + * 985 + * The event may also have been destroyed after signaling. So 986 + * copy_signaled_event_data also must confirm that the event 987 + * still exists. Therefore this must be under the p->event_mutex 988 + * which is also held when events are destroyed. 1004 989 */ 1005 990 if (!ret && *wait_result == KFD_IOC_WAIT_RESULT_COMPLETE) 1006 991 ret = copy_signaled_event_data(num_events, 1007 992 event_waiters, events); 1008 993 1009 - mutex_lock(&p->event_mutex); 1010 994 out_unlock: 1011 995 free_waiters(num_events, event_waiters); 1012 996 mutex_unlock(&p->event_mutex); ··· 1070 1044 } 1071 1045 1072 1046 /* 1073 - * Assumes that p->event_mutex is held and of course 1074 - * that p is not going away (current or locked). 1047 + * Assumes that p is not going away. 1075 1048 */ 1076 1049 static void lookup_events_by_type_and_signal(struct kfd_process *p, 1077 1050 int type, void *event_data) ··· 1082 1057 1083 1058 ev_data = (struct kfd_hsa_memory_exception_data *) event_data; 1084 1059 1060 + rcu_read_lock(); 1061 + 1085 1062 id = KFD_FIRST_NONSIGNAL_EVENT_ID; 1086 1063 idr_for_each_entry_continue(&p->event_idr, ev, id) 1087 1064 if (ev->type == type) { ··· 1091 1064 dev_dbg(kfd_device, 1092 1065 "Event found: id %X type %d", 1093 1066 ev->event_id, ev->type); 1067 + spin_lock(&ev->lock); 1094 1068 set_event(ev); 1095 1069 if (ev->type == KFD_EVENT_TYPE_MEMORY && ev_data) 1096 1070 ev->memory_exception_data = *ev_data; 1071 + spin_unlock(&ev->lock); 1097 1072 } 1098 1073 1099 1074 if (type == KFD_EVENT_TYPE_MEMORY) { ··· 1118 1089 p->lead_thread->pid, p->pasid); 1119 1090 } 1120 1091 } 1092 + 1093 + rcu_read_unlock(); 1121 1094 } 1122 1095 1123 1096 #ifdef KFD_SUPPORT_IOMMU_V2 ··· 1195 1164 1196 1165 if (KFD_GC_VERSION(dev) != IP_VERSION(9, 1, 0) && 1197 1166 KFD_GC_VERSION(dev) != IP_VERSION(9, 2, 2) && 1198 - KFD_GC_VERSION(dev) != IP_VERSION(9, 3, 0)) { 1199 - mutex_lock(&p->event_mutex); 1200 - 1201 - /* Lookup events by type and signal them */ 1167 + KFD_GC_VERSION(dev) != IP_VERSION(9, 3, 0)) 1202 1168 lookup_events_by_type_and_signal(p, KFD_EVENT_TYPE_MEMORY, 1203 1169 &memory_exception_data); 1204 - 1205 - mutex_unlock(&p->event_mutex); 1206 - } 1207 1170 1208 1171 kfd_unref_process(p); 1209 1172 } ··· 1215 1190 if (!p) 1216 1191 return; /* Presumably process exited. */ 1217 1192 1218 - mutex_lock(&p->event_mutex); 1219 - 1220 - /* Lookup events by type and signal them */ 1221 1193 lookup_events_by_type_and_signal(p, KFD_EVENT_TYPE_HW_EXCEPTION, NULL); 1222 - 1223 - mutex_unlock(&p->event_mutex); 1224 1194 kfd_unref_process(p); 1225 1195 } 1226 1196 ··· 1251 1231 info->prot_write ? 1 : 0; 1252 1232 memory_exception_data.failure.imprecise = 0; 1253 1233 } 1254 - mutex_lock(&p->event_mutex); 1234 + 1235 + rcu_read_lock(); 1255 1236 1256 1237 id = KFD_FIRST_NONSIGNAL_EVENT_ID; 1257 1238 idr_for_each_entry_continue(&p->event_idr, ev, id) 1258 1239 if (ev->type == KFD_EVENT_TYPE_MEMORY) { 1240 + spin_lock(&ev->lock); 1259 1241 ev->memory_exception_data = memory_exception_data; 1260 1242 set_event(ev); 1243 + spin_unlock(&ev->lock); 1261 1244 } 1262 1245 1263 - mutex_unlock(&p->event_mutex); 1246 + rcu_read_unlock(); 1264 1247 kfd_unref_process(p); 1265 1248 } 1266 1249 ··· 1297 1274 continue; 1298 1275 } 1299 1276 1300 - mutex_lock(&p->event_mutex); 1277 + rcu_read_lock(); 1278 + 1301 1279 id = KFD_FIRST_NONSIGNAL_EVENT_ID; 1302 1280 idr_for_each_entry_continue(&p->event_idr, ev, id) { 1303 1281 if (ev->type == KFD_EVENT_TYPE_HW_EXCEPTION) { 1282 + spin_lock(&ev->lock); 1304 1283 ev->hw_exception_data = hw_exception_data; 1305 1284 ev->hw_exception_data.gpu_id = user_gpu_id; 1306 1285 set_event(ev); 1286 + spin_unlock(&ev->lock); 1307 1287 } 1308 1288 if (ev->type == KFD_EVENT_TYPE_MEMORY && 1309 1289 reset_cause == KFD_HW_EXCEPTION_ECC) { 1290 + spin_lock(&ev->lock); 1310 1291 ev->memory_exception_data = memory_exception_data; 1311 1292 ev->memory_exception_data.gpu_id = user_gpu_id; 1312 1293 set_event(ev); 1294 + spin_unlock(&ev->lock); 1313 1295 } 1314 1296 } 1315 - mutex_unlock(&p->event_mutex); 1297 + 1298 + rcu_read_unlock(); 1316 1299 } 1317 1300 srcu_read_unlock(&kfd_processes_srcu, idx); 1318 1301 } ··· 1351 1322 memory_exception_data.gpu_id = user_gpu_id; 1352 1323 memory_exception_data.failure.imprecise = true; 1353 1324 1354 - mutex_lock(&p->event_mutex); 1325 + rcu_read_lock(); 1326 + 1355 1327 idr_for_each_entry_continue(&p->event_idr, ev, id) { 1356 1328 if (ev->type == KFD_EVENT_TYPE_HW_EXCEPTION) { 1329 + spin_lock(&ev->lock); 1357 1330 ev->hw_exception_data = hw_exception_data; 1358 1331 set_event(ev); 1332 + spin_unlock(&ev->lock); 1359 1333 } 1360 1334 1361 1335 if (ev->type == KFD_EVENT_TYPE_MEMORY) { 1336 + spin_lock(&ev->lock); 1362 1337 ev->memory_exception_data = memory_exception_data; 1363 1338 set_event(ev); 1339 + spin_unlock(&ev->lock); 1364 1340 } 1365 1341 } 1366 - mutex_unlock(&p->event_mutex); 1342 + 1343 + rcu_read_unlock(); 1367 1344 1368 1345 /* user application will handle SIGBUS signal */ 1369 1346 send_sig(SIGBUS, p->lead_thread, 0);
+1
drivers/gpu/drm/amd/amdkfd/kfd_events.h
··· 59 59 60 60 int type; 61 61 62 + spinlock_t lock; 62 63 wait_queue_head_t wq; /* List of event waiters. */ 63 64 64 65 /* Only for signal events. */
+10 -1
drivers/gpu/drm/amd/amdkfd/kfd_interrupt.c
··· 146 146 struct kfd_dev *dev = container_of(work, struct kfd_dev, 147 147 interrupt_work); 148 148 uint32_t ih_ring_entry[KFD_MAX_RING_ENTRY_SIZE]; 149 + long start_jiffies = jiffies; 149 150 150 151 if (dev->device_info.ih_ring_entry_size > sizeof(ih_ring_entry)) { 151 152 dev_err_once(dev->adev->dev, "Ring entry too small\n"); 152 153 return; 153 154 } 154 155 155 - while (dequeue_ih_ring_entry(dev, ih_ring_entry)) 156 + while (dequeue_ih_ring_entry(dev, ih_ring_entry)) { 156 157 dev->device_info.event_interrupt_class->interrupt_wq(dev, 157 158 ih_ring_entry); 159 + if (jiffies - start_jiffies > HZ) { 160 + /* If we spent more than a second processing signals, 161 + * reschedule the worker to avoid soft-lockup warnings 162 + */ 163 + queue_work(dev->ih_wq, &dev->interrupt_work); 164 + break; 165 + } 166 + } 158 167 } 159 168 160 169 bool interrupt_is_wanted(struct kfd_dev *dev,