drm/xe/guc: Add test for G2G communications

+2

drivers/gpu/drm/xe/abi/guc_actions_abi.h

··· 155 155 XE_GUC_ACTION_NOTIFY_FLUSH_LOG_BUFFER_TO_FILE = 0x8003, 156 156 XE_GUC_ACTION_NOTIFY_CRASH_DUMP_POSTED = 0x8004, 157 157 XE_GUC_ACTION_NOTIFY_EXCEPTION = 0x8005, 158 + XE_GUC_ACTION_TEST_G2G_SEND = 0xF001, 159 + XE_GUC_ACTION_TEST_G2G_RECV = 0xF002, 158 160 XE_GUC_ACTION_LIMIT 159 161 }; 160 162

+776

drivers/gpu/drm/xe/tests/xe_guc_g2g_test.c

··· 1 + // SPDX-License-Identifier: GPL-2.0 AND MIT 2 + /* 3 + * Copyright © 2025 Intel Corporation 4 + */ 5 + 6 + #include <linux/delay.h> 7 + 8 + #include <kunit/test.h> 9 + #include <kunit/visibility.h> 10 + 11 + #include "tests/xe_kunit_helpers.h" 12 + #include "tests/xe_pci_test.h" 13 + #include "tests/xe_test.h" 14 + 15 + #include "xe_bo.h" 16 + #include "xe_device.h" 17 + #include "xe_pm.h" 18 + 19 + /* 20 + * There are different ways to allocate the G2G buffers. The plan for this test 21 + * is to make sure that all the possible options work. The particular option 22 + * chosen by the driver may vary from one platform to another, it may also change 23 + * with time. So to ensure consistency of testing, the relevant driver code is 24 + * replicated here to guarantee it won't change without the test being updated 25 + * to keep testing the other options. 26 + * 27 + * In order to test the actual code being used by the driver, there is also the 28 + * 'default' scheme. That will use the official driver routines to test whatever 29 + * method the driver is using on the current platform at the current time. 30 + */ 31 + enum { 32 + /* Driver defined allocation scheme */ 33 + G2G_CTB_TYPE_DEFAULT, 34 + /* Single buffer in host memory */ 35 + G2G_CTB_TYPE_HOST, 36 + /* Single buffer in a specific tile, loops across all tiles */ 37 + G2G_CTB_TYPE_TILE, 38 + }; 39 + 40 + /* 41 + * Payload is opaque to GuC. So KMD can define any structure or size it wants. 42 + */ 43 + struct g2g_test_payload { 44 + u32 tx_dev; 45 + u32 tx_tile; 46 + u32 rx_dev; 47 + u32 rx_tile; 48 + u32 seqno; 49 + }; 50 + 51 + static void g2g_test_send(struct kunit *test, struct xe_guc *guc, 52 + u32 far_tile, u32 far_dev, 53 + struct g2g_test_payload *payload) 54 + { 55 + struct xe_device *xe = guc_to_xe(guc); 56 + struct xe_gt *gt = guc_to_gt(guc); 57 + u32 *action, total; 58 + size_t payload_len; 59 + int ret; 60 + 61 + static_assert(IS_ALIGNED(sizeof(*payload), sizeof(u32))); 62 + payload_len = sizeof(*payload) / sizeof(u32); 63 + 64 + total = 4 + payload_len; 65 + action = kunit_kmalloc_array(test, total, sizeof(*action), GFP_KERNEL); 66 + KUNIT_ASSERT_NOT_ERR_OR_NULL(test, action); 67 + 68 + action[0] = XE_GUC_ACTION_TEST_G2G_SEND; 69 + action[1] = far_tile; 70 + action[2] = far_dev; 71 + action[3] = payload_len; 72 + memcpy(action + 4, payload, payload_len * sizeof(u32)); 73 + 74 + atomic_inc(&xe->g2g_test_count); 75 + 76 + /* 77 + * Should specify the expected response notification here. Problem is that 78 + * the response will be coming from a different GuC. By the end, it should 79 + * all add up as long as an equal number of messages are sent from each GuC 80 + * and to each GuC. However, in the middle negative reservation space errors 81 + * and such like can occur. Rather than add intrusive changes to the CT layer 82 + * it is simpler to just not bother counting it at all. The system should be 83 + * idle when running the selftest, and the selftest's notification total size 84 + * is well within the G2H allocation size. So there should be no issues with 85 + * needing to block for space, which is all the tracking code is really for. 86 + */ 87 + ret = xe_guc_ct_send(&guc->ct, action, total, 0, 0); 88 + kunit_kfree(test, action); 89 + KUNIT_ASSERT_EQ_MSG(test, 0, ret, "G2G send failed: %d [%d:%d -> %d:%d]\n", ret, 90 + gt_to_tile(gt)->id, G2G_DEV(gt), far_tile, far_dev); 91 + } 92 + 93 + /* 94 + * NB: Can't use KUNIT_ASSERT and friends in here as this is called asynchronously 95 + * from the G2H notification handler. Need that to actually complete rather than 96 + * thread-abort in order to keep the rest of the driver alive! 97 + */ 98 + int xe_guc_g2g_test_notification(struct xe_guc *guc, u32 *msg, u32 len) 99 + { 100 + struct xe_device *xe = guc_to_xe(guc); 101 + struct xe_gt *rx_gt = guc_to_gt(guc), *test_gt, *tx_gt = NULL; 102 + u32 tx_tile, tx_dev, rx_tile, rx_dev, idx, got_len; 103 + struct g2g_test_payload *payload; 104 + size_t payload_len; 105 + int ret = 0, i; 106 + 107 + payload_len = sizeof(*payload) / sizeof(u32); 108 + 109 + if (unlikely(len != (G2H_LEN_DW_G2G_NOTIFY_MIN + payload_len))) { 110 + xe_gt_err(rx_gt, "G2G test notification invalid length %u", len); 111 + ret = -EPROTO; 112 + goto done; 113 + } 114 + 115 + tx_tile = msg[0]; 116 + tx_dev = msg[1]; 117 + got_len = msg[2]; 118 + payload = (struct g2g_test_payload *)(msg + 3); 119 + 120 + rx_tile = gt_to_tile(rx_gt)->id; 121 + rx_dev = G2G_DEV(rx_gt); 122 + 123 + if (got_len != payload_len) { 124 + xe_gt_err(rx_gt, "G2G: Invalid payload length: %u vs %zu\n", got_len, payload_len); 125 + ret = -EPROTO; 126 + goto done; 127 + } 128 + 129 + if (payload->tx_dev != tx_dev || payload->tx_tile != tx_tile || 130 + payload->rx_dev != rx_dev || payload->rx_tile != rx_tile) { 131 + xe_gt_err(rx_gt, "G2G: Invalid payload: %d:%d -> %d:%d vs %d:%d -> %d:%d! [%d]\n", 132 + payload->tx_tile, payload->tx_dev, payload->rx_tile, payload->rx_dev, 133 + tx_tile, tx_dev, rx_tile, rx_dev, payload->seqno); 134 + ret = -EPROTO; 135 + goto done; 136 + } 137 + 138 + if (!xe->g2g_test_array) { 139 + xe_gt_err(rx_gt, "G2G: Missing test array!\n"); 140 + ret = -ENOMEM; 141 + goto done; 142 + } 143 + 144 + for_each_gt(test_gt, xe, i) { 145 + if (gt_to_tile(test_gt)->id != tx_tile) 146 + continue; 147 + 148 + if (G2G_DEV(test_gt) != tx_dev) 149 + continue; 150 + 151 + if (tx_gt) { 152 + xe_gt_err(rx_gt, "G2G: Got duplicate TX GTs: %d vs %d for %d:%d!\n", 153 + tx_gt->info.id, test_gt->info.id, tx_tile, tx_dev); 154 + ret = -EINVAL; 155 + goto done; 156 + } 157 + 158 + tx_gt = test_gt; 159 + } 160 + if (!tx_gt) { 161 + xe_gt_err(rx_gt, "G2G: Failed to find a TX GT for %d:%d!\n", tx_tile, tx_dev); 162 + ret = -EINVAL; 163 + goto done; 164 + } 165 + 166 + idx = (tx_gt->info.id * xe->info.gt_count) + rx_gt->info.id; 167 + 168 + if (xe->g2g_test_array[idx] != payload->seqno - 1) { 169 + xe_gt_err(rx_gt, "G2G: Seqno mismatch %d vs %d for %d:%d -> %d:%d!\n", 170 + xe->g2g_test_array[idx], payload->seqno - 1, 171 + tx_tile, tx_dev, rx_tile, rx_dev); 172 + ret = -EINVAL; 173 + goto done; 174 + } 175 + 176 + xe->g2g_test_array[idx] = payload->seqno; 177 + 178 + done: 179 + atomic_dec(&xe->g2g_test_count); 180 + return ret; 181 + } 182 + 183 + /* 184 + * Send the given seqno from all GuCs to all other GuCs in tile/GT order 185 + */ 186 + static void g2g_test_in_order(struct kunit *test, struct xe_device *xe, u32 seqno) 187 + { 188 + struct xe_gt *near_gt, *far_gt; 189 + int i, j; 190 + 191 + for_each_gt(near_gt, xe, i) { 192 + u32 near_tile = gt_to_tile(near_gt)->id; 193 + u32 near_dev = G2G_DEV(near_gt); 194 + 195 + for_each_gt(far_gt, xe, j) { 196 + u32 far_tile = gt_to_tile(far_gt)->id; 197 + u32 far_dev = G2G_DEV(far_gt); 198 + struct g2g_test_payload payload; 199 + 200 + if (far_gt->info.id == near_gt->info.id) 201 + continue; 202 + 203 + payload.tx_dev = near_dev; 204 + payload.tx_tile = near_tile; 205 + payload.rx_dev = far_dev; 206 + payload.rx_tile = far_tile; 207 + payload.seqno = seqno; 208 + g2g_test_send(test, &near_gt->uc.guc, far_tile, far_dev, &payload); 209 + } 210 + } 211 + } 212 + 213 + #define WAIT_TIME_MS 100 214 + #define WAIT_COUNT (1000 / WAIT_TIME_MS) 215 + 216 + static void g2g_wait_for_complete(void *_xe) 217 + { 218 + struct xe_device *xe = (struct xe_device *)_xe; 219 + struct kunit *test = kunit_get_current_test(); 220 + int wait = 0; 221 + 222 + /* Wait for all G2H messages to be received */ 223 + while (atomic_read(&xe->g2g_test_count)) { 224 + if (++wait > WAIT_COUNT) 225 + break; 226 + 227 + msleep(WAIT_TIME_MS); 228 + } 229 + 230 + KUNIT_ASSERT_EQ_MSG(test, 0, atomic_read(&xe->g2g_test_count), 231 + "Timed out waiting for notifications\n"); 232 + kunit_info(test, "Got all notifications back\n"); 233 + } 234 + 235 + #undef WAIT_TIME_MS 236 + #undef WAIT_COUNT 237 + 238 + static void g2g_clean_array(void *_xe) 239 + { 240 + struct xe_device *xe = (struct xe_device *)_xe; 241 + 242 + xe->g2g_test_array = NULL; 243 + } 244 + 245 + #define NUM_LOOPS 16 246 + 247 + static void g2g_run_test(struct kunit *test, struct xe_device *xe) 248 + { 249 + u32 seqno, max_array; 250 + int ret, i, j; 251 + 252 + max_array = xe->info.gt_count * xe->info.gt_count; 253 + xe->g2g_test_array = kunit_kcalloc(test, max_array, sizeof(u32), GFP_KERNEL); 254 + KUNIT_ASSERT_NOT_ERR_OR_NULL(test, xe->g2g_test_array); 255 + 256 + ret = kunit_add_action_or_reset(test, g2g_clean_array, xe); 257 + KUNIT_ASSERT_EQ_MSG(test, 0, ret, "Failed to register clean up action\n"); 258 + 259 + /* 260 + * Send incrementing seqnos from all GuCs to all other GuCs in tile/GT order. 261 + * Tile/GT order doesn't really mean anything to the hardware but it is going 262 + * to be a fixed sequence every time. 263 + * 264 + * Verify that each one comes back having taken the correct route. 265 + */ 266 + ret = kunit_add_action(test, g2g_wait_for_complete, xe); 267 + KUNIT_ASSERT_EQ_MSG(test, 0, ret, "Failed to register clean up action\n"); 268 + for (seqno = 1; seqno < NUM_LOOPS; seqno++) 269 + g2g_test_in_order(test, xe, seqno); 270 + seqno--; 271 + 272 + kunit_release_action(test, &g2g_wait_for_complete, xe); 273 + 274 + /* Check for the final seqno in each slot */ 275 + for (i = 0; i < xe->info.gt_count; i++) { 276 + for (j = 0; j < xe->info.gt_count; j++) { 277 + u32 idx = (j * xe->info.gt_count) + i; 278 + 279 + if (i == j) 280 + KUNIT_ASSERT_EQ_MSG(test, 0, xe->g2g_test_array[idx], 281 + "identity seqno modified: %d for %dx%d!\n", 282 + xe->g2g_test_array[idx], i, j); 283 + else 284 + KUNIT_ASSERT_EQ_MSG(test, seqno, xe->g2g_test_array[idx], 285 + "invalid seqno: %d vs %d for %dx%d!\n", 286 + xe->g2g_test_array[idx], seqno, i, j); 287 + } 288 + } 289 + 290 + kunit_kfree(test, xe->g2g_test_array); 291 + kunit_release_action(test, &g2g_clean_array, xe); 292 + 293 + kunit_info(test, "Test passed\n"); 294 + } 295 + 296 + #undef NUM_LOOPS 297 + 298 + static void g2g_ct_stop(struct xe_guc *guc) 299 + { 300 + struct xe_gt *remote_gt, *gt = guc_to_gt(guc); 301 + struct xe_device *xe = gt_to_xe(gt); 302 + int i, t; 303 + 304 + for_each_gt(remote_gt, xe, i) { 305 + u32 tile, dev; 306 + 307 + if (remote_gt->info.id == gt->info.id) 308 + continue; 309 + 310 + tile = gt_to_tile(remote_gt)->id; 311 + dev = G2G_DEV(remote_gt); 312 + 313 + for (t = 0; t < XE_G2G_TYPE_LIMIT; t++) 314 + guc_g2g_deregister(guc, tile, dev, t); 315 + } 316 + } 317 + 318 + /* Size of a single allocation that contains all G2G CTBs across all GTs */ 319 + static u32 g2g_ctb_size(struct kunit *test, struct xe_device *xe) 320 + { 321 + unsigned int count = xe->info.gt_count; 322 + u32 num_channels = (count * (count - 1)) / 2; 323 + 324 + kunit_info(test, "Size: (%d * %d / 2) * %d * 0x%08X + 0x%08X => 0x%08X [%d]\n", 325 + count, count - 1, XE_G2G_TYPE_LIMIT, G2G_BUFFER_SIZE, G2G_DESC_AREA_SIZE, 326 + num_channels * XE_G2G_TYPE_LIMIT * G2G_BUFFER_SIZE + G2G_DESC_AREA_SIZE, 327 + num_channels * XE_G2G_TYPE_LIMIT); 328 + 329 + return num_channels * XE_G2G_TYPE_LIMIT * G2G_BUFFER_SIZE + G2G_DESC_AREA_SIZE; 330 + } 331 + 332 + /* 333 + * Use the driver's regular CTB allocation scheme. 334 + */ 335 + static void g2g_alloc_default(struct kunit *test, struct xe_device *xe) 336 + { 337 + struct xe_gt *gt; 338 + int i; 339 + 340 + kunit_info(test, "Default [tiles = %d, GTs = %d]\n", 341 + xe->info.tile_count, xe->info.gt_count); 342 + 343 + for_each_gt(gt, xe, i) { 344 + struct xe_guc *guc = &gt->uc.guc; 345 + int ret; 346 + 347 + ret = guc_g2g_alloc(guc); 348 + KUNIT_ASSERT_EQ_MSG(test, 0, ret, "G2G alloc failed: %pe", ERR_PTR(ret)); 349 + continue; 350 + } 351 + } 352 + 353 + static void g2g_distribute(struct kunit *test, struct xe_device *xe, struct xe_bo *bo) 354 + { 355 + struct xe_gt *root_gt, *gt; 356 + int i; 357 + 358 + root_gt = xe_device_get_gt(xe, 0); 359 + root_gt->uc.guc.g2g.bo = bo; 360 + root_gt->uc.guc.g2g.owned = true; 361 + kunit_info(test, "[%d.%d] Assigned 0x%p\n", gt_to_tile(root_gt)->id, root_gt->info.id, bo); 362 + 363 + for_each_gt(gt, xe, i) { 364 + if (gt->info.id != 0) { 365 + gt->uc.guc.g2g.owned = false; 366 + gt->uc.guc.g2g.bo = xe_bo_get(bo); 367 + kunit_info(test, "[%d.%d] Pinned 0x%p\n", 368 + gt_to_tile(gt)->id, gt->info.id, gt->uc.guc.g2g.bo); 369 + } 370 + 371 + KUNIT_ASSERT_NOT_ERR_OR_NULL(test, gt->uc.guc.g2g.bo); 372 + } 373 + } 374 + 375 + /* 376 + * Allocate a single blob on the host and split between all G2G CTBs. 377 + */ 378 + static void g2g_alloc_host(struct kunit *test, struct xe_device *xe) 379 + { 380 + struct xe_bo *bo; 381 + u32 g2g_size; 382 + 383 + kunit_info(test, "Host [tiles = %d, GTs = %d]\n", xe->info.tile_count, xe->info.gt_count); 384 + 385 + g2g_size = g2g_ctb_size(test, xe); 386 + bo = xe_managed_bo_create_pin_map(xe, xe_device_get_root_tile(xe), g2g_size, 387 + XE_BO_FLAG_SYSTEM | 388 + XE_BO_FLAG_GGTT | 389 + XE_BO_FLAG_GGTT_ALL | 390 + XE_BO_FLAG_GGTT_INVALIDATE); 391 + KUNIT_ASSERT_NOT_ERR_OR_NULL(test, bo); 392 + kunit_info(test, "[HST] G2G buffer create: 0x%p\n", bo); 393 + 394 + xe_map_memset(xe, &bo->vmap, 0, 0, g2g_size); 395 + 396 + g2g_distribute(test, xe, bo); 397 + } 398 + 399 + /* 400 + * Allocate a single blob on the given tile and split between all G2G CTBs. 401 + */ 402 + static void g2g_alloc_tile(struct kunit *test, struct xe_device *xe, struct xe_tile *tile) 403 + { 404 + struct xe_bo *bo; 405 + u32 g2g_size; 406 + 407 + KUNIT_ASSERT_TRUE(test, IS_DGFX(xe)); 408 + KUNIT_ASSERT_NOT_ERR_OR_NULL(test, tile); 409 + 410 + kunit_info(test, "Tile %d [tiles = %d, GTs = %d]\n", 411 + tile->id, xe->info.tile_count, xe->info.gt_count); 412 + 413 + g2g_size = g2g_ctb_size(test, xe); 414 + bo = xe_managed_bo_create_pin_map(xe, tile, g2g_size, 415 + XE_BO_FLAG_VRAM_IF_DGFX(tile) | 416 + XE_BO_FLAG_GGTT | 417 + XE_BO_FLAG_GGTT_ALL | 418 + XE_BO_FLAG_GGTT_INVALIDATE); 419 + KUNIT_ASSERT_NOT_ERR_OR_NULL(test, bo); 420 + kunit_info(test, "[%d.*] G2G buffer create: 0x%p\n", tile->id, bo); 421 + 422 + xe_map_memset(xe, &bo->vmap, 0, 0, g2g_size); 423 + 424 + g2g_distribute(test, xe, bo); 425 + } 426 + 427 + static void g2g_free(struct kunit *test, struct xe_device *xe) 428 + { 429 + struct xe_gt *gt; 430 + struct xe_bo *bo; 431 + int i; 432 + 433 + for_each_gt(gt, xe, i) { 434 + bo = gt->uc.guc.g2g.bo; 435 + if (!bo) 436 + continue; 437 + 438 + if (gt->uc.guc.g2g.owned) { 439 + xe_managed_bo_unpin_map_no_vm(bo); 440 + kunit_info(test, "[%d.%d] Unmapped 0x%p\n", 441 + gt_to_tile(gt)->id, gt->info.id, bo); 442 + } else { 443 + xe_bo_put(bo); 444 + kunit_info(test, "[%d.%d] Unpinned 0x%p\n", 445 + gt_to_tile(gt)->id, gt->info.id, bo); 446 + } 447 + 448 + gt->uc.guc.g2g.bo = NULL; 449 + } 450 + } 451 + 452 + static void g2g_stop(struct kunit *test, struct xe_device *xe) 453 + { 454 + struct xe_gt *gt; 455 + int i; 456 + 457 + for_each_gt(gt, xe, i) { 458 + struct xe_guc *guc = &gt->uc.guc; 459 + 460 + if (!guc->g2g.bo) 461 + continue; 462 + 463 + g2g_ct_stop(guc); 464 + } 465 + 466 + g2g_free(test, xe); 467 + } 468 + 469 + /* 470 + * Generate a unique id for each bi-directional CTB for each pair of 471 + * near and far tiles/devices. The id can then be used as an index into 472 + * a single allocation that is sub-divided into multiple CTBs. 473 + * 474 + * For example, with two devices per tile and two tiles, the table should 475 + * look like: 476 + * Far <tile>.<dev> 477 + * 0.0 0.1 1.0 1.1 478 + * N 0.0 --/-- 00/01 02/03 04/05 479 + * e 0.1 01/00 --/-- 06/07 08/09 480 + * a 1.0 03/02 07/06 --/-- 10/11 481 + * r 1.1 05/04 09/08 11/10 --/-- 482 + * 483 + * Where each entry is Rx/Tx channel id. 484 + * 485 + * So GuC #3 (tile 1, dev 1) talking to GuC #2 (tile 1, dev 0) would 486 + * be reading from channel #11 and writing to channel #10. Whereas, 487 + * GuC #2 talking to GuC #3 would be read on #10 and write to #11. 488 + */ 489 + static int g2g_slot_flat(u32 near_tile, u32 near_dev, u32 far_tile, u32 far_dev, 490 + u32 type, u32 max_inst, bool have_dev) 491 + { 492 + u32 near = near_tile, far = far_tile; 493 + u32 idx = 0, x, y, direction; 494 + int i; 495 + 496 + if (have_dev) { 497 + near = (near << 1) | near_dev; 498 + far = (far << 1) | far_dev; 499 + } 500 + 501 + /* No need to send to one's self */ 502 + if (far == near) 503 + return -1; 504 + 505 + if (far > near) { 506 + /* Top right table half */ 507 + x = far; 508 + y = near; 509 + 510 + /* T/R is 'forwards' direction */ 511 + direction = type; 512 + } else { 513 + /* Bottom left table half */ 514 + x = near; 515 + y = far; 516 + 517 + /* B/L is 'backwards' direction */ 518 + direction = (1 - type); 519 + } 520 + 521 + /* Count the rows prior to the target */ 522 + for (i = y; i > 0; i--) 523 + idx += max_inst - i; 524 + 525 + /* Count this row up to the target */ 526 + idx += (x - 1 - y); 527 + 528 + /* Slots are in Rx/Tx pairs */ 529 + idx *= 2; 530 + 531 + /* Pick Rx/Tx direction */ 532 + idx += direction; 533 + 534 + return idx; 535 + } 536 + 537 + static int g2g_register_flat(struct xe_guc *guc, u32 far_tile, u32 far_dev, u32 type, bool have_dev) 538 + { 539 + struct xe_gt *gt = guc_to_gt(guc); 540 + struct xe_device *xe = gt_to_xe(gt); 541 + u32 near_tile = gt_to_tile(gt)->id; 542 + u32 near_dev = G2G_DEV(gt); 543 + u32 max = xe->info.gt_count; 544 + int idx; 545 + u32 base, desc, buf; 546 + 547 + if (!guc->g2g.bo) 548 + return -ENODEV; 549 + 550 + idx = g2g_slot_flat(near_tile, near_dev, far_tile, far_dev, type, max, have_dev); 551 + xe_assert(xe, idx >= 0); 552 + 553 + base = guc_bo_ggtt_addr(guc, guc->g2g.bo); 554 + desc = base + idx * G2G_DESC_SIZE; 555 + buf = base + idx * G2G_BUFFER_SIZE + G2G_DESC_AREA_SIZE; 556 + 557 + xe_assert(xe, (desc - base + G2G_DESC_SIZE) <= G2G_DESC_AREA_SIZE); 558 + xe_assert(xe, (buf - base + G2G_BUFFER_SIZE) <= xe_bo_size(guc->g2g.bo)); 559 + 560 + return guc_action_register_g2g_buffer(guc, type, far_tile, far_dev, 561 + desc, buf, G2G_BUFFER_SIZE); 562 + } 563 + 564 + static void g2g_start(struct kunit *test, struct xe_guc *guc) 565 + { 566 + struct xe_gt *remote_gt, *gt = guc_to_gt(guc); 567 + struct xe_device *xe = gt_to_xe(gt); 568 + unsigned int i; 569 + int t, ret; 570 + bool have_dev; 571 + 572 + KUNIT_ASSERT_NOT_ERR_OR_NULL(test, guc->g2g.bo); 573 + 574 + /* GuC interface will need extending if more GT device types are ever created. */ 575 + KUNIT_ASSERT_TRUE(test, 576 + (gt->info.type == XE_GT_TYPE_MAIN) || 577 + (gt->info.type == XE_GT_TYPE_MEDIA)); 578 + 579 + /* Channel numbering depends on whether there are multiple GTs per tile */ 580 + have_dev = xe->info.gt_count > xe->info.tile_count; 581 + 582 + for_each_gt(remote_gt, xe, i) { 583 + u32 tile, dev; 584 + 585 + if (remote_gt->info.id == gt->info.id) 586 + continue; 587 + 588 + tile = gt_to_tile(remote_gt)->id; 589 + dev = G2G_DEV(remote_gt); 590 + 591 + for (t = 0; t < XE_G2G_TYPE_LIMIT; t++) { 592 + ret = g2g_register_flat(guc, tile, dev, t, have_dev); 593 + KUNIT_ASSERT_EQ_MSG(test, 0, ret, "G2G register failed: %pe", ERR_PTR(ret)); 594 + } 595 + } 596 + } 597 + 598 + static void g2g_reinit(struct kunit *test, struct xe_device *xe, int ctb_type, struct xe_tile *tile) 599 + { 600 + struct xe_gt *gt; 601 + int i, found = 0; 602 + 603 + g2g_stop(test, xe); 604 + 605 + for_each_gt(gt, xe, i) { 606 + struct xe_guc *guc = &gt->uc.guc; 607 + 608 + KUNIT_ASSERT_NULL(test, guc->g2g.bo); 609 + } 610 + 611 + switch (ctb_type) { 612 + case G2G_CTB_TYPE_DEFAULT: 613 + g2g_alloc_default(test, xe); 614 + break; 615 + 616 + case G2G_CTB_TYPE_HOST: 617 + g2g_alloc_host(test, xe); 618 + break; 619 + 620 + case G2G_CTB_TYPE_TILE: 621 + g2g_alloc_tile(test, xe, tile); 622 + break; 623 + 624 + default: 625 + KUNIT_ASSERT_TRUE(test, false); 626 + } 627 + 628 + for_each_gt(gt, xe, i) { 629 + struct xe_guc *guc = &gt->uc.guc; 630 + 631 + if (!guc->g2g.bo) 632 + continue; 633 + 634 + if (ctb_type == G2G_CTB_TYPE_DEFAULT) 635 + guc_g2g_start(guc); 636 + else 637 + g2g_start(test, guc); 638 + found++; 639 + } 640 + 641 + KUNIT_ASSERT_GT_MSG(test, found, 1, "insufficient G2G channels running: %d", found); 642 + 643 + kunit_info(test, "Testing across %d GTs\n", found); 644 + } 645 + 646 + static void g2g_recreate_ctb(void *_xe) 647 + { 648 + struct xe_device *xe = (struct xe_device *)_xe; 649 + struct kunit *test = kunit_get_current_test(); 650 + 651 + g2g_stop(test, xe); 652 + 653 + if (xe_guc_g2g_wanted(xe)) 654 + g2g_reinit(test, xe, G2G_CTB_TYPE_DEFAULT, NULL); 655 + } 656 + 657 + static void g2g_pm_runtime_put(void *_xe) 658 + { 659 + struct xe_device *xe = (struct xe_device *)_xe; 660 + 661 + xe_pm_runtime_put(xe); 662 + } 663 + 664 + static void g2g_pm_runtime_get(struct kunit *test) 665 + { 666 + struct xe_device *xe = test->priv; 667 + int ret; 668 + 669 + xe_pm_runtime_get(xe); 670 + ret = kunit_add_action_or_reset(test, g2g_pm_runtime_put, xe); 671 + KUNIT_ASSERT_EQ_MSG(test, 0, ret, "Failed to register runtime PM action\n"); 672 + } 673 + 674 + static void g2g_check_skip(struct kunit *test) 675 + { 676 + struct xe_device *xe = test->priv; 677 + struct xe_gt *gt; 678 + int i; 679 + 680 + if (IS_SRIOV_VF(xe)) 681 + kunit_skip(test, "not supported from a VF"); 682 + 683 + if (xe->info.gt_count <= 1) 684 + kunit_skip(test, "not enough GTs"); 685 + 686 + for_each_gt(gt, xe, i) { 687 + struct xe_guc *guc = &gt->uc.guc; 688 + 689 + if (guc->fw.build_type == CSS_UKERNEL_INFO_BUILDTYPE_PROD) 690 + kunit_skip(test, 691 + "G2G test interface not available in production firmware builds\n"); 692 + } 693 + } 694 + 695 + /* 696 + * Simple test that does not try to recreate the CTBs. 697 + * Requires that the platform already enables G2G comms 698 + * but has no risk of leaving the system in a broken state 699 + * afterwards. 700 + */ 701 + static void xe_live_guc_g2g_kunit_default(struct kunit *test) 702 + { 703 + struct xe_device *xe = test->priv; 704 + 705 + if (!xe_guc_g2g_wanted(xe)) 706 + kunit_skip(test, "G2G not enabled"); 707 + 708 + g2g_check_skip(test); 709 + 710 + g2g_pm_runtime_get(test); 711 + 712 + kunit_info(test, "Testing default CTBs\n"); 713 + g2g_run_test(test, xe); 714 + 715 + kunit_release_action(test, &g2g_pm_runtime_put, xe); 716 + } 717 + 718 + /* 719 + * More complex test that re-creates the CTBs in various location to 720 + * test access to each location from each GuC. Can be run even on 721 + * systems that do not enable G2G by default. On the other hand, 722 + * because it recreates the CTBs, if something goes wrong it could 723 + * leave the system with broken G2G comms. 724 + */ 725 + static void xe_live_guc_g2g_kunit_allmem(struct kunit *test) 726 + { 727 + struct xe_device *xe = test->priv; 728 + int ret; 729 + 730 + g2g_check_skip(test); 731 + 732 + g2g_pm_runtime_get(test); 733 + 734 + /* Make sure to leave the system as we found it */ 735 + ret = kunit_add_action_or_reset(test, g2g_recreate_ctb, xe); 736 + KUNIT_ASSERT_EQ_MSG(test, 0, ret, "Failed to register CTB re-creation action\n"); 737 + 738 + kunit_info(test, "Testing CTB type 'default'...\n"); 739 + g2g_reinit(test, xe, G2G_CTB_TYPE_DEFAULT, NULL); 740 + g2g_run_test(test, xe); 741 + 742 + kunit_info(test, "Testing CTB type 'host'...\n"); 743 + g2g_reinit(test, xe, G2G_CTB_TYPE_HOST, NULL); 744 + g2g_run_test(test, xe); 745 + 746 + if (IS_DGFX(xe)) { 747 + struct xe_tile *tile; 748 + int id; 749 + 750 + for_each_tile(tile, xe, id) { 751 + kunit_info(test, "Testing CTB type 'tile: #%d'...\n", id); 752 + 753 + g2g_reinit(test, xe, G2G_CTB_TYPE_TILE, tile); 754 + g2g_run_test(test, xe); 755 + } 756 + } else { 757 + kunit_info(test, "Skipping local memory on integrated platform\n"); 758 + } 759 + 760 + kunit_release_action(test, g2g_recreate_ctb, xe); 761 + kunit_release_action(test, g2g_pm_runtime_put, xe); 762 + } 763 + 764 + static struct kunit_case xe_guc_g2g_tests[] = { 765 + KUNIT_CASE_PARAM(xe_live_guc_g2g_kunit_default, xe_pci_live_device_gen_param), 766 + KUNIT_CASE_PARAM(xe_live_guc_g2g_kunit_allmem, xe_pci_live_device_gen_param), 767 + {} 768 + }; 769 + 770 + VISIBLE_IF_KUNIT 771 + struct kunit_suite xe_guc_g2g_test_suite = { 772 + .name = "xe_guc_g2g", 773 + .test_cases = xe_guc_g2g_tests, 774 + .init = xe_kunit_helper_xe_device_live_test_init, 775 + }; 776 + EXPORT_SYMBOL_IF_KUNIT(xe_guc_g2g_test_suite);

+2

drivers/gpu/drm/xe/tests/xe_live_test_mod.c

··· 10 10 extern struct kunit_suite xe_dma_buf_test_suite; 11 11 extern struct kunit_suite xe_migrate_test_suite; 12 12 extern struct kunit_suite xe_mocs_test_suite; 13 + extern struct kunit_suite xe_guc_g2g_test_suite; 13 14 14 15 kunit_test_suite(xe_bo_test_suite); 15 16 kunit_test_suite(xe_bo_shrink_test_suite); 16 17 kunit_test_suite(xe_dma_buf_test_suite); 17 18 kunit_test_suite(xe_migrate_test_suite); 18 19 kunit_test_suite(xe_mocs_test_suite); 20 + kunit_test_suite(xe_guc_g2g_test_suite); 19 21 20 22 MODULE_AUTHOR("Intel Corporation"); 21 23 MODULE_LICENSE("GPL");

+7

drivers/gpu/drm/xe/xe_device_types.h

··· 595 595 u8 region_mask; 596 596 } psmi; 597 597 598 + #if IS_ENABLED(CONFIG_DRM_XE_KUNIT_TEST) 599 + /** @g2g_test_array: for testing G2G communications */ 600 + u32 *g2g_test_array; 601 + /** @g2g_test_count: for testing G2G communications */ 602 + atomic_t g2g_test_count; 603 + #endif 604 + 598 605 /* private: */ 599 606 600 607 #if IS_ENABLED(CONFIG_DRM_XE_DISPLAY)

+4

drivers/gpu/drm/xe/xe_guc.c

··· 1684 1684 xe_guc_ct_stop(&guc->ct); 1685 1685 xe_guc_submit_wedge(guc); 1686 1686 } 1687 + 1688 + #if IS_ENABLED(CONFIG_DRM_XE_KUNIT_TEST) 1689 + #include "tests/xe_guc_g2g_test.c" 1690 + #endif

+4

drivers/gpu/drm/xe/xe_guc.h

··· 53 53 int xe_guc_start(struct xe_guc *guc); 54 54 void xe_guc_declare_wedged(struct xe_guc *guc); 55 55 56 + #if IS_ENABLED(CONFIG_DRM_XE_KUNIT_TEST) 57 + int xe_guc_g2g_test_notification(struct xe_guc *guc, u32 *payload, u32 len); 58 + #endif 59 + 56 60 static inline u16 xe_engine_class_to_guc_class(enum xe_engine_class class) 57 61 { 58 62 switch (class) {

+5

drivers/gpu/drm/xe/xe_guc_ct.c

··· 1486 1486 case XE_GUC_ACTION_NOTIFY_EXCEPTION: 1487 1487 ret = guc_crash_process_msg(ct, action); 1488 1488 break; 1489 + #if IS_ENABLED(CONFIG_DRM_XE_KUNIT_TEST) 1490 + case XE_GUC_ACTION_TEST_G2G_RECV: 1491 + ret = xe_guc_g2g_test_notification(guc, payload, adj_len); 1492 + break; 1493 + #endif 1489 1494 default: 1490 1495 xe_gt_err(gt, "unexpected G2H action 0x%04x\n", action); 1491 1496 }

+1

drivers/gpu/drm/xe/xe_guc_fwif.h

··· 15 15 #define G2H_LEN_DW_SCHED_CONTEXT_MODE_SET 4 16 16 #define G2H_LEN_DW_DEREGISTER_CONTEXT 3 17 17 #define G2H_LEN_DW_TLB_INVALIDATE 3 18 + #define G2H_LEN_DW_G2G_NOTIFY_MIN 3 18 19 19 20 #define GUC_ID_MAX 65535 20 21 #define GUC_ID_UNKNOWN 0xffffffff

Configure Feed

Configure Feed