Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

drm/xe/guc: Add test for G2G communications

Add a test for sending messages from every GuC to every other GuC to
test G2G communications.

Note that, being a debug only feature, the test interface only exists
in pre-production builds of the GuC firmware.

v2: Fix 'default' case to actually use the driver's registration code
as well as allocation. Add comments explaining the different test
types. Fix (C) date and an assert. Review feedback from Daniele.

Signed-off-by: John Harrison <John.C.Harrison@Intel.com>
Reviewed-by: Daniele Ceraolo Spurio <daniele.ceraolospurio@intel.com>
Link: https://lore.kernel.org/r/20250910210237.603576-5-John.C.Harrison@Intel.com

+801
+2
drivers/gpu/drm/xe/abi/guc_actions_abi.h
··· 155 155 XE_GUC_ACTION_NOTIFY_FLUSH_LOG_BUFFER_TO_FILE = 0x8003, 156 156 XE_GUC_ACTION_NOTIFY_CRASH_DUMP_POSTED = 0x8004, 157 157 XE_GUC_ACTION_NOTIFY_EXCEPTION = 0x8005, 158 + XE_GUC_ACTION_TEST_G2G_SEND = 0xF001, 159 + XE_GUC_ACTION_TEST_G2G_RECV = 0xF002, 158 160 XE_GUC_ACTION_LIMIT 159 161 }; 160 162
+776
drivers/gpu/drm/xe/tests/xe_guc_g2g_test.c
··· 1 + // SPDX-License-Identifier: GPL-2.0 AND MIT 2 + /* 3 + * Copyright © 2025 Intel Corporation 4 + */ 5 + 6 + #include <linux/delay.h> 7 + 8 + #include <kunit/test.h> 9 + #include <kunit/visibility.h> 10 + 11 + #include "tests/xe_kunit_helpers.h" 12 + #include "tests/xe_pci_test.h" 13 + #include "tests/xe_test.h" 14 + 15 + #include "xe_bo.h" 16 + #include "xe_device.h" 17 + #include "xe_pm.h" 18 + 19 + /* 20 + * There are different ways to allocate the G2G buffers. The plan for this test 21 + * is to make sure that all the possible options work. The particular option 22 + * chosen by the driver may vary from one platform to another, it may also change 23 + * with time. So to ensure consistency of testing, the relevant driver code is 24 + * replicated here to guarantee it won't change without the test being updated 25 + * to keep testing the other options. 26 + * 27 + * In order to test the actual code being used by the driver, there is also the 28 + * 'default' scheme. That will use the official driver routines to test whatever 29 + * method the driver is using on the current platform at the current time. 30 + */ 31 + enum { 32 + /* Driver defined allocation scheme */ 33 + G2G_CTB_TYPE_DEFAULT, 34 + /* Single buffer in host memory */ 35 + G2G_CTB_TYPE_HOST, 36 + /* Single buffer in a specific tile, loops across all tiles */ 37 + G2G_CTB_TYPE_TILE, 38 + }; 39 + 40 + /* 41 + * Payload is opaque to GuC. So KMD can define any structure or size it wants. 42 + */ 43 + struct g2g_test_payload { 44 + u32 tx_dev; 45 + u32 tx_tile; 46 + u32 rx_dev; 47 + u32 rx_tile; 48 + u32 seqno; 49 + }; 50 + 51 + static void g2g_test_send(struct kunit *test, struct xe_guc *guc, 52 + u32 far_tile, u32 far_dev, 53 + struct g2g_test_payload *payload) 54 + { 55 + struct xe_device *xe = guc_to_xe(guc); 56 + struct xe_gt *gt = guc_to_gt(guc); 57 + u32 *action, total; 58 + size_t payload_len; 59 + int ret; 60 + 61 + static_assert(IS_ALIGNED(sizeof(*payload), sizeof(u32))); 62 + payload_len = sizeof(*payload) / sizeof(u32); 63 + 64 + total = 4 + payload_len; 65 + action = kunit_kmalloc_array(test, total, sizeof(*action), GFP_KERNEL); 66 + KUNIT_ASSERT_NOT_ERR_OR_NULL(test, action); 67 + 68 + action[0] = XE_GUC_ACTION_TEST_G2G_SEND; 69 + action[1] = far_tile; 70 + action[2] = far_dev; 71 + action[3] = payload_len; 72 + memcpy(action + 4, payload, payload_len * sizeof(u32)); 73 + 74 + atomic_inc(&xe->g2g_test_count); 75 + 76 + /* 77 + * Should specify the expected response notification here. Problem is that 78 + * the response will be coming from a different GuC. By the end, it should 79 + * all add up as long as an equal number of messages are sent from each GuC 80 + * and to each GuC. However, in the middle negative reservation space errors 81 + * and such like can occur. Rather than add intrusive changes to the CT layer 82 + * it is simpler to just not bother counting it at all. The system should be 83 + * idle when running the selftest, and the selftest's notification total size 84 + * is well within the G2H allocation size. So there should be no issues with 85 + * needing to block for space, which is all the tracking code is really for. 86 + */ 87 + ret = xe_guc_ct_send(&guc->ct, action, total, 0, 0); 88 + kunit_kfree(test, action); 89 + KUNIT_ASSERT_EQ_MSG(test, 0, ret, "G2G send failed: %d [%d:%d -> %d:%d]\n", ret, 90 + gt_to_tile(gt)->id, G2G_DEV(gt), far_tile, far_dev); 91 + } 92 + 93 + /* 94 + * NB: Can't use KUNIT_ASSERT and friends in here as this is called asynchronously 95 + * from the G2H notification handler. Need that to actually complete rather than 96 + * thread-abort in order to keep the rest of the driver alive! 97 + */ 98 + int xe_guc_g2g_test_notification(struct xe_guc *guc, u32 *msg, u32 len) 99 + { 100 + struct xe_device *xe = guc_to_xe(guc); 101 + struct xe_gt *rx_gt = guc_to_gt(guc), *test_gt, *tx_gt = NULL; 102 + u32 tx_tile, tx_dev, rx_tile, rx_dev, idx, got_len; 103 + struct g2g_test_payload *payload; 104 + size_t payload_len; 105 + int ret = 0, i; 106 + 107 + payload_len = sizeof(*payload) / sizeof(u32); 108 + 109 + if (unlikely(len != (G2H_LEN_DW_G2G_NOTIFY_MIN + payload_len))) { 110 + xe_gt_err(rx_gt, "G2G test notification invalid length %u", len); 111 + ret = -EPROTO; 112 + goto done; 113 + } 114 + 115 + tx_tile = msg[0]; 116 + tx_dev = msg[1]; 117 + got_len = msg[2]; 118 + payload = (struct g2g_test_payload *)(msg + 3); 119 + 120 + rx_tile = gt_to_tile(rx_gt)->id; 121 + rx_dev = G2G_DEV(rx_gt); 122 + 123 + if (got_len != payload_len) { 124 + xe_gt_err(rx_gt, "G2G: Invalid payload length: %u vs %zu\n", got_len, payload_len); 125 + ret = -EPROTO; 126 + goto done; 127 + } 128 + 129 + if (payload->tx_dev != tx_dev || payload->tx_tile != tx_tile || 130 + payload->rx_dev != rx_dev || payload->rx_tile != rx_tile) { 131 + xe_gt_err(rx_gt, "G2G: Invalid payload: %d:%d -> %d:%d vs %d:%d -> %d:%d! [%d]\n", 132 + payload->tx_tile, payload->tx_dev, payload->rx_tile, payload->rx_dev, 133 + tx_tile, tx_dev, rx_tile, rx_dev, payload->seqno); 134 + ret = -EPROTO; 135 + goto done; 136 + } 137 + 138 + if (!xe->g2g_test_array) { 139 + xe_gt_err(rx_gt, "G2G: Missing test array!\n"); 140 + ret = -ENOMEM; 141 + goto done; 142 + } 143 + 144 + for_each_gt(test_gt, xe, i) { 145 + if (gt_to_tile(test_gt)->id != tx_tile) 146 + continue; 147 + 148 + if (G2G_DEV(test_gt) != tx_dev) 149 + continue; 150 + 151 + if (tx_gt) { 152 + xe_gt_err(rx_gt, "G2G: Got duplicate TX GTs: %d vs %d for %d:%d!\n", 153 + tx_gt->info.id, test_gt->info.id, tx_tile, tx_dev); 154 + ret = -EINVAL; 155 + goto done; 156 + } 157 + 158 + tx_gt = test_gt; 159 + } 160 + if (!tx_gt) { 161 + xe_gt_err(rx_gt, "G2G: Failed to find a TX GT for %d:%d!\n", tx_tile, tx_dev); 162 + ret = -EINVAL; 163 + goto done; 164 + } 165 + 166 + idx = (tx_gt->info.id * xe->info.gt_count) + rx_gt->info.id; 167 + 168 + if (xe->g2g_test_array[idx] != payload->seqno - 1) { 169 + xe_gt_err(rx_gt, "G2G: Seqno mismatch %d vs %d for %d:%d -> %d:%d!\n", 170 + xe->g2g_test_array[idx], payload->seqno - 1, 171 + tx_tile, tx_dev, rx_tile, rx_dev); 172 + ret = -EINVAL; 173 + goto done; 174 + } 175 + 176 + xe->g2g_test_array[idx] = payload->seqno; 177 + 178 + done: 179 + atomic_dec(&xe->g2g_test_count); 180 + return ret; 181 + } 182 + 183 + /* 184 + * Send the given seqno from all GuCs to all other GuCs in tile/GT order 185 + */ 186 + static void g2g_test_in_order(struct kunit *test, struct xe_device *xe, u32 seqno) 187 + { 188 + struct xe_gt *near_gt, *far_gt; 189 + int i, j; 190 + 191 + for_each_gt(near_gt, xe, i) { 192 + u32 near_tile = gt_to_tile(near_gt)->id; 193 + u32 near_dev = G2G_DEV(near_gt); 194 + 195 + for_each_gt(far_gt, xe, j) { 196 + u32 far_tile = gt_to_tile(far_gt)->id; 197 + u32 far_dev = G2G_DEV(far_gt); 198 + struct g2g_test_payload payload; 199 + 200 + if (far_gt->info.id == near_gt->info.id) 201 + continue; 202 + 203 + payload.tx_dev = near_dev; 204 + payload.tx_tile = near_tile; 205 + payload.rx_dev = far_dev; 206 + payload.rx_tile = far_tile; 207 + payload.seqno = seqno; 208 + g2g_test_send(test, &near_gt->uc.guc, far_tile, far_dev, &payload); 209 + } 210 + } 211 + } 212 + 213 + #define WAIT_TIME_MS 100 214 + #define WAIT_COUNT (1000 / WAIT_TIME_MS) 215 + 216 + static void g2g_wait_for_complete(void *_xe) 217 + { 218 + struct xe_device *xe = (struct xe_device *)_xe; 219 + struct kunit *test = kunit_get_current_test(); 220 + int wait = 0; 221 + 222 + /* Wait for all G2H messages to be received */ 223 + while (atomic_read(&xe->g2g_test_count)) { 224 + if (++wait > WAIT_COUNT) 225 + break; 226 + 227 + msleep(WAIT_TIME_MS); 228 + } 229 + 230 + KUNIT_ASSERT_EQ_MSG(test, 0, atomic_read(&xe->g2g_test_count), 231 + "Timed out waiting for notifications\n"); 232 + kunit_info(test, "Got all notifications back\n"); 233 + } 234 + 235 + #undef WAIT_TIME_MS 236 + #undef WAIT_COUNT 237 + 238 + static void g2g_clean_array(void *_xe) 239 + { 240 + struct xe_device *xe = (struct xe_device *)_xe; 241 + 242 + xe->g2g_test_array = NULL; 243 + } 244 + 245 + #define NUM_LOOPS 16 246 + 247 + static void g2g_run_test(struct kunit *test, struct xe_device *xe) 248 + { 249 + u32 seqno, max_array; 250 + int ret, i, j; 251 + 252 + max_array = xe->info.gt_count * xe->info.gt_count; 253 + xe->g2g_test_array = kunit_kcalloc(test, max_array, sizeof(u32), GFP_KERNEL); 254 + KUNIT_ASSERT_NOT_ERR_OR_NULL(test, xe->g2g_test_array); 255 + 256 + ret = kunit_add_action_or_reset(test, g2g_clean_array, xe); 257 + KUNIT_ASSERT_EQ_MSG(test, 0, ret, "Failed to register clean up action\n"); 258 + 259 + /* 260 + * Send incrementing seqnos from all GuCs to all other GuCs in tile/GT order. 261 + * Tile/GT order doesn't really mean anything to the hardware but it is going 262 + * to be a fixed sequence every time. 263 + * 264 + * Verify that each one comes back having taken the correct route. 265 + */ 266 + ret = kunit_add_action(test, g2g_wait_for_complete, xe); 267 + KUNIT_ASSERT_EQ_MSG(test, 0, ret, "Failed to register clean up action\n"); 268 + for (seqno = 1; seqno < NUM_LOOPS; seqno++) 269 + g2g_test_in_order(test, xe, seqno); 270 + seqno--; 271 + 272 + kunit_release_action(test, &g2g_wait_for_complete, xe); 273 + 274 + /* Check for the final seqno in each slot */ 275 + for (i = 0; i < xe->info.gt_count; i++) { 276 + for (j = 0; j < xe->info.gt_count; j++) { 277 + u32 idx = (j * xe->info.gt_count) + i; 278 + 279 + if (i == j) 280 + KUNIT_ASSERT_EQ_MSG(test, 0, xe->g2g_test_array[idx], 281 + "identity seqno modified: %d for %dx%d!\n", 282 + xe->g2g_test_array[idx], i, j); 283 + else 284 + KUNIT_ASSERT_EQ_MSG(test, seqno, xe->g2g_test_array[idx], 285 + "invalid seqno: %d vs %d for %dx%d!\n", 286 + xe->g2g_test_array[idx], seqno, i, j); 287 + } 288 + } 289 + 290 + kunit_kfree(test, xe->g2g_test_array); 291 + kunit_release_action(test, &g2g_clean_array, xe); 292 + 293 + kunit_info(test, "Test passed\n"); 294 + } 295 + 296 + #undef NUM_LOOPS 297 + 298 + static void g2g_ct_stop(struct xe_guc *guc) 299 + { 300 + struct xe_gt *remote_gt, *gt = guc_to_gt(guc); 301 + struct xe_device *xe = gt_to_xe(gt); 302 + int i, t; 303 + 304 + for_each_gt(remote_gt, xe, i) { 305 + u32 tile, dev; 306 + 307 + if (remote_gt->info.id == gt->info.id) 308 + continue; 309 + 310 + tile = gt_to_tile(remote_gt)->id; 311 + dev = G2G_DEV(remote_gt); 312 + 313 + for (t = 0; t < XE_G2G_TYPE_LIMIT; t++) 314 + guc_g2g_deregister(guc, tile, dev, t); 315 + } 316 + } 317 + 318 + /* Size of a single allocation that contains all G2G CTBs across all GTs */ 319 + static u32 g2g_ctb_size(struct kunit *test, struct xe_device *xe) 320 + { 321 + unsigned int count = xe->info.gt_count; 322 + u32 num_channels = (count * (count - 1)) / 2; 323 + 324 + kunit_info(test, "Size: (%d * %d / 2) * %d * 0x%08X + 0x%08X => 0x%08X [%d]\n", 325 + count, count - 1, XE_G2G_TYPE_LIMIT, G2G_BUFFER_SIZE, G2G_DESC_AREA_SIZE, 326 + num_channels * XE_G2G_TYPE_LIMIT * G2G_BUFFER_SIZE + G2G_DESC_AREA_SIZE, 327 + num_channels * XE_G2G_TYPE_LIMIT); 328 + 329 + return num_channels * XE_G2G_TYPE_LIMIT * G2G_BUFFER_SIZE + G2G_DESC_AREA_SIZE; 330 + } 331 + 332 + /* 333 + * Use the driver's regular CTB allocation scheme. 334 + */ 335 + static void g2g_alloc_default(struct kunit *test, struct xe_device *xe) 336 + { 337 + struct xe_gt *gt; 338 + int i; 339 + 340 + kunit_info(test, "Default [tiles = %d, GTs = %d]\n", 341 + xe->info.tile_count, xe->info.gt_count); 342 + 343 + for_each_gt(gt, xe, i) { 344 + struct xe_guc *guc = &gt->uc.guc; 345 + int ret; 346 + 347 + ret = guc_g2g_alloc(guc); 348 + KUNIT_ASSERT_EQ_MSG(test, 0, ret, "G2G alloc failed: %pe", ERR_PTR(ret)); 349 + continue; 350 + } 351 + } 352 + 353 + static void g2g_distribute(struct kunit *test, struct xe_device *xe, struct xe_bo *bo) 354 + { 355 + struct xe_gt *root_gt, *gt; 356 + int i; 357 + 358 + root_gt = xe_device_get_gt(xe, 0); 359 + root_gt->uc.guc.g2g.bo = bo; 360 + root_gt->uc.guc.g2g.owned = true; 361 + kunit_info(test, "[%d.%d] Assigned 0x%p\n", gt_to_tile(root_gt)->id, root_gt->info.id, bo); 362 + 363 + for_each_gt(gt, xe, i) { 364 + if (gt->info.id != 0) { 365 + gt->uc.guc.g2g.owned = false; 366 + gt->uc.guc.g2g.bo = xe_bo_get(bo); 367 + kunit_info(test, "[%d.%d] Pinned 0x%p\n", 368 + gt_to_tile(gt)->id, gt->info.id, gt->uc.guc.g2g.bo); 369 + } 370 + 371 + KUNIT_ASSERT_NOT_ERR_OR_NULL(test, gt->uc.guc.g2g.bo); 372 + } 373 + } 374 + 375 + /* 376 + * Allocate a single blob on the host and split between all G2G CTBs. 377 + */ 378 + static void g2g_alloc_host(struct kunit *test, struct xe_device *xe) 379 + { 380 + struct xe_bo *bo; 381 + u32 g2g_size; 382 + 383 + kunit_info(test, "Host [tiles = %d, GTs = %d]\n", xe->info.tile_count, xe->info.gt_count); 384 + 385 + g2g_size = g2g_ctb_size(test, xe); 386 + bo = xe_managed_bo_create_pin_map(xe, xe_device_get_root_tile(xe), g2g_size, 387 + XE_BO_FLAG_SYSTEM | 388 + XE_BO_FLAG_GGTT | 389 + XE_BO_FLAG_GGTT_ALL | 390 + XE_BO_FLAG_GGTT_INVALIDATE); 391 + KUNIT_ASSERT_NOT_ERR_OR_NULL(test, bo); 392 + kunit_info(test, "[HST] G2G buffer create: 0x%p\n", bo); 393 + 394 + xe_map_memset(xe, &bo->vmap, 0, 0, g2g_size); 395 + 396 + g2g_distribute(test, xe, bo); 397 + } 398 + 399 + /* 400 + * Allocate a single blob on the given tile and split between all G2G CTBs. 401 + */ 402 + static void g2g_alloc_tile(struct kunit *test, struct xe_device *xe, struct xe_tile *tile) 403 + { 404 + struct xe_bo *bo; 405 + u32 g2g_size; 406 + 407 + KUNIT_ASSERT_TRUE(test, IS_DGFX(xe)); 408 + KUNIT_ASSERT_NOT_ERR_OR_NULL(test, tile); 409 + 410 + kunit_info(test, "Tile %d [tiles = %d, GTs = %d]\n", 411 + tile->id, xe->info.tile_count, xe->info.gt_count); 412 + 413 + g2g_size = g2g_ctb_size(test, xe); 414 + bo = xe_managed_bo_create_pin_map(xe, tile, g2g_size, 415 + XE_BO_FLAG_VRAM_IF_DGFX(tile) | 416 + XE_BO_FLAG_GGTT | 417 + XE_BO_FLAG_GGTT_ALL | 418 + XE_BO_FLAG_GGTT_INVALIDATE); 419 + KUNIT_ASSERT_NOT_ERR_OR_NULL(test, bo); 420 + kunit_info(test, "[%d.*] G2G buffer create: 0x%p\n", tile->id, bo); 421 + 422 + xe_map_memset(xe, &bo->vmap, 0, 0, g2g_size); 423 + 424 + g2g_distribute(test, xe, bo); 425 + } 426 + 427 + static void g2g_free(struct kunit *test, struct xe_device *xe) 428 + { 429 + struct xe_gt *gt; 430 + struct xe_bo *bo; 431 + int i; 432 + 433 + for_each_gt(gt, xe, i) { 434 + bo = gt->uc.guc.g2g.bo; 435 + if (!bo) 436 + continue; 437 + 438 + if (gt->uc.guc.g2g.owned) { 439 + xe_managed_bo_unpin_map_no_vm(bo); 440 + kunit_info(test, "[%d.%d] Unmapped 0x%p\n", 441 + gt_to_tile(gt)->id, gt->info.id, bo); 442 + } else { 443 + xe_bo_put(bo); 444 + kunit_info(test, "[%d.%d] Unpinned 0x%p\n", 445 + gt_to_tile(gt)->id, gt->info.id, bo); 446 + } 447 + 448 + gt->uc.guc.g2g.bo = NULL; 449 + } 450 + } 451 + 452 + static void g2g_stop(struct kunit *test, struct xe_device *xe) 453 + { 454 + struct xe_gt *gt; 455 + int i; 456 + 457 + for_each_gt(gt, xe, i) { 458 + struct xe_guc *guc = &gt->uc.guc; 459 + 460 + if (!guc->g2g.bo) 461 + continue; 462 + 463 + g2g_ct_stop(guc); 464 + } 465 + 466 + g2g_free(test, xe); 467 + } 468 + 469 + /* 470 + * Generate a unique id for each bi-directional CTB for each pair of 471 + * near and far tiles/devices. The id can then be used as an index into 472 + * a single allocation that is sub-divided into multiple CTBs. 473 + * 474 + * For example, with two devices per tile and two tiles, the table should 475 + * look like: 476 + * Far <tile>.<dev> 477 + * 0.0 0.1 1.0 1.1 478 + * N 0.0 --/-- 00/01 02/03 04/05 479 + * e 0.1 01/00 --/-- 06/07 08/09 480 + * a 1.0 03/02 07/06 --/-- 10/11 481 + * r 1.1 05/04 09/08 11/10 --/-- 482 + * 483 + * Where each entry is Rx/Tx channel id. 484 + * 485 + * So GuC #3 (tile 1, dev 1) talking to GuC #2 (tile 1, dev 0) would 486 + * be reading from channel #11 and writing to channel #10. Whereas, 487 + * GuC #2 talking to GuC #3 would be read on #10 and write to #11. 488 + */ 489 + static int g2g_slot_flat(u32 near_tile, u32 near_dev, u32 far_tile, u32 far_dev, 490 + u32 type, u32 max_inst, bool have_dev) 491 + { 492 + u32 near = near_tile, far = far_tile; 493 + u32 idx = 0, x, y, direction; 494 + int i; 495 + 496 + if (have_dev) { 497 + near = (near << 1) | near_dev; 498 + far = (far << 1) | far_dev; 499 + } 500 + 501 + /* No need to send to one's self */ 502 + if (far == near) 503 + return -1; 504 + 505 + if (far > near) { 506 + /* Top right table half */ 507 + x = far; 508 + y = near; 509 + 510 + /* T/R is 'forwards' direction */ 511 + direction = type; 512 + } else { 513 + /* Bottom left table half */ 514 + x = near; 515 + y = far; 516 + 517 + /* B/L is 'backwards' direction */ 518 + direction = (1 - type); 519 + } 520 + 521 + /* Count the rows prior to the target */ 522 + for (i = y; i > 0; i--) 523 + idx += max_inst - i; 524 + 525 + /* Count this row up to the target */ 526 + idx += (x - 1 - y); 527 + 528 + /* Slots are in Rx/Tx pairs */ 529 + idx *= 2; 530 + 531 + /* Pick Rx/Tx direction */ 532 + idx += direction; 533 + 534 + return idx; 535 + } 536 + 537 + static int g2g_register_flat(struct xe_guc *guc, u32 far_tile, u32 far_dev, u32 type, bool have_dev) 538 + { 539 + struct xe_gt *gt = guc_to_gt(guc); 540 + struct xe_device *xe = gt_to_xe(gt); 541 + u32 near_tile = gt_to_tile(gt)->id; 542 + u32 near_dev = G2G_DEV(gt); 543 + u32 max = xe->info.gt_count; 544 + int idx; 545 + u32 base, desc, buf; 546 + 547 + if (!guc->g2g.bo) 548 + return -ENODEV; 549 + 550 + idx = g2g_slot_flat(near_tile, near_dev, far_tile, far_dev, type, max, have_dev); 551 + xe_assert(xe, idx >= 0); 552 + 553 + base = guc_bo_ggtt_addr(guc, guc->g2g.bo); 554 + desc = base + idx * G2G_DESC_SIZE; 555 + buf = base + idx * G2G_BUFFER_SIZE + G2G_DESC_AREA_SIZE; 556 + 557 + xe_assert(xe, (desc - base + G2G_DESC_SIZE) <= G2G_DESC_AREA_SIZE); 558 + xe_assert(xe, (buf - base + G2G_BUFFER_SIZE) <= xe_bo_size(guc->g2g.bo)); 559 + 560 + return guc_action_register_g2g_buffer(guc, type, far_tile, far_dev, 561 + desc, buf, G2G_BUFFER_SIZE); 562 + } 563 + 564 + static void g2g_start(struct kunit *test, struct xe_guc *guc) 565 + { 566 + struct xe_gt *remote_gt, *gt = guc_to_gt(guc); 567 + struct xe_device *xe = gt_to_xe(gt); 568 + unsigned int i; 569 + int t, ret; 570 + bool have_dev; 571 + 572 + KUNIT_ASSERT_NOT_ERR_OR_NULL(test, guc->g2g.bo); 573 + 574 + /* GuC interface will need extending if more GT device types are ever created. */ 575 + KUNIT_ASSERT_TRUE(test, 576 + (gt->info.type == XE_GT_TYPE_MAIN) || 577 + (gt->info.type == XE_GT_TYPE_MEDIA)); 578 + 579 + /* Channel numbering depends on whether there are multiple GTs per tile */ 580 + have_dev = xe->info.gt_count > xe->info.tile_count; 581 + 582 + for_each_gt(remote_gt, xe, i) { 583 + u32 tile, dev; 584 + 585 + if (remote_gt->info.id == gt->info.id) 586 + continue; 587 + 588 + tile = gt_to_tile(remote_gt)->id; 589 + dev = G2G_DEV(remote_gt); 590 + 591 + for (t = 0; t < XE_G2G_TYPE_LIMIT; t++) { 592 + ret = g2g_register_flat(guc, tile, dev, t, have_dev); 593 + KUNIT_ASSERT_EQ_MSG(test, 0, ret, "G2G register failed: %pe", ERR_PTR(ret)); 594 + } 595 + } 596 + } 597 + 598 + static void g2g_reinit(struct kunit *test, struct xe_device *xe, int ctb_type, struct xe_tile *tile) 599 + { 600 + struct xe_gt *gt; 601 + int i, found = 0; 602 + 603 + g2g_stop(test, xe); 604 + 605 + for_each_gt(gt, xe, i) { 606 + struct xe_guc *guc = &gt->uc.guc; 607 + 608 + KUNIT_ASSERT_NULL(test, guc->g2g.bo); 609 + } 610 + 611 + switch (ctb_type) { 612 + case G2G_CTB_TYPE_DEFAULT: 613 + g2g_alloc_default(test, xe); 614 + break; 615 + 616 + case G2G_CTB_TYPE_HOST: 617 + g2g_alloc_host(test, xe); 618 + break; 619 + 620 + case G2G_CTB_TYPE_TILE: 621 + g2g_alloc_tile(test, xe, tile); 622 + break; 623 + 624 + default: 625 + KUNIT_ASSERT_TRUE(test, false); 626 + } 627 + 628 + for_each_gt(gt, xe, i) { 629 + struct xe_guc *guc = &gt->uc.guc; 630 + 631 + if (!guc->g2g.bo) 632 + continue; 633 + 634 + if (ctb_type == G2G_CTB_TYPE_DEFAULT) 635 + guc_g2g_start(guc); 636 + else 637 + g2g_start(test, guc); 638 + found++; 639 + } 640 + 641 + KUNIT_ASSERT_GT_MSG(test, found, 1, "insufficient G2G channels running: %d", found); 642 + 643 + kunit_info(test, "Testing across %d GTs\n", found); 644 + } 645 + 646 + static void g2g_recreate_ctb(void *_xe) 647 + { 648 + struct xe_device *xe = (struct xe_device *)_xe; 649 + struct kunit *test = kunit_get_current_test(); 650 + 651 + g2g_stop(test, xe); 652 + 653 + if (xe_guc_g2g_wanted(xe)) 654 + g2g_reinit(test, xe, G2G_CTB_TYPE_DEFAULT, NULL); 655 + } 656 + 657 + static void g2g_pm_runtime_put(void *_xe) 658 + { 659 + struct xe_device *xe = (struct xe_device *)_xe; 660 + 661 + xe_pm_runtime_put(xe); 662 + } 663 + 664 + static void g2g_pm_runtime_get(struct kunit *test) 665 + { 666 + struct xe_device *xe = test->priv; 667 + int ret; 668 + 669 + xe_pm_runtime_get(xe); 670 + ret = kunit_add_action_or_reset(test, g2g_pm_runtime_put, xe); 671 + KUNIT_ASSERT_EQ_MSG(test, 0, ret, "Failed to register runtime PM action\n"); 672 + } 673 + 674 + static void g2g_check_skip(struct kunit *test) 675 + { 676 + struct xe_device *xe = test->priv; 677 + struct xe_gt *gt; 678 + int i; 679 + 680 + if (IS_SRIOV_VF(xe)) 681 + kunit_skip(test, "not supported from a VF"); 682 + 683 + if (xe->info.gt_count <= 1) 684 + kunit_skip(test, "not enough GTs"); 685 + 686 + for_each_gt(gt, xe, i) { 687 + struct xe_guc *guc = &gt->uc.guc; 688 + 689 + if (guc->fw.build_type == CSS_UKERNEL_INFO_BUILDTYPE_PROD) 690 + kunit_skip(test, 691 + "G2G test interface not available in production firmware builds\n"); 692 + } 693 + } 694 + 695 + /* 696 + * Simple test that does not try to recreate the CTBs. 697 + * Requires that the platform already enables G2G comms 698 + * but has no risk of leaving the system in a broken state 699 + * afterwards. 700 + */ 701 + static void xe_live_guc_g2g_kunit_default(struct kunit *test) 702 + { 703 + struct xe_device *xe = test->priv; 704 + 705 + if (!xe_guc_g2g_wanted(xe)) 706 + kunit_skip(test, "G2G not enabled"); 707 + 708 + g2g_check_skip(test); 709 + 710 + g2g_pm_runtime_get(test); 711 + 712 + kunit_info(test, "Testing default CTBs\n"); 713 + g2g_run_test(test, xe); 714 + 715 + kunit_release_action(test, &g2g_pm_runtime_put, xe); 716 + } 717 + 718 + /* 719 + * More complex test that re-creates the CTBs in various location to 720 + * test access to each location from each GuC. Can be run even on 721 + * systems that do not enable G2G by default. On the other hand, 722 + * because it recreates the CTBs, if something goes wrong it could 723 + * leave the system with broken G2G comms. 724 + */ 725 + static void xe_live_guc_g2g_kunit_allmem(struct kunit *test) 726 + { 727 + struct xe_device *xe = test->priv; 728 + int ret; 729 + 730 + g2g_check_skip(test); 731 + 732 + g2g_pm_runtime_get(test); 733 + 734 + /* Make sure to leave the system as we found it */ 735 + ret = kunit_add_action_or_reset(test, g2g_recreate_ctb, xe); 736 + KUNIT_ASSERT_EQ_MSG(test, 0, ret, "Failed to register CTB re-creation action\n"); 737 + 738 + kunit_info(test, "Testing CTB type 'default'...\n"); 739 + g2g_reinit(test, xe, G2G_CTB_TYPE_DEFAULT, NULL); 740 + g2g_run_test(test, xe); 741 + 742 + kunit_info(test, "Testing CTB type 'host'...\n"); 743 + g2g_reinit(test, xe, G2G_CTB_TYPE_HOST, NULL); 744 + g2g_run_test(test, xe); 745 + 746 + if (IS_DGFX(xe)) { 747 + struct xe_tile *tile; 748 + int id; 749 + 750 + for_each_tile(tile, xe, id) { 751 + kunit_info(test, "Testing CTB type 'tile: #%d'...\n", id); 752 + 753 + g2g_reinit(test, xe, G2G_CTB_TYPE_TILE, tile); 754 + g2g_run_test(test, xe); 755 + } 756 + } else { 757 + kunit_info(test, "Skipping local memory on integrated platform\n"); 758 + } 759 + 760 + kunit_release_action(test, g2g_recreate_ctb, xe); 761 + kunit_release_action(test, g2g_pm_runtime_put, xe); 762 + } 763 + 764 + static struct kunit_case xe_guc_g2g_tests[] = { 765 + KUNIT_CASE_PARAM(xe_live_guc_g2g_kunit_default, xe_pci_live_device_gen_param), 766 + KUNIT_CASE_PARAM(xe_live_guc_g2g_kunit_allmem, xe_pci_live_device_gen_param), 767 + {} 768 + }; 769 + 770 + VISIBLE_IF_KUNIT 771 + struct kunit_suite xe_guc_g2g_test_suite = { 772 + .name = "xe_guc_g2g", 773 + .test_cases = xe_guc_g2g_tests, 774 + .init = xe_kunit_helper_xe_device_live_test_init, 775 + }; 776 + EXPORT_SYMBOL_IF_KUNIT(xe_guc_g2g_test_suite);
+2
drivers/gpu/drm/xe/tests/xe_live_test_mod.c
··· 10 10 extern struct kunit_suite xe_dma_buf_test_suite; 11 11 extern struct kunit_suite xe_migrate_test_suite; 12 12 extern struct kunit_suite xe_mocs_test_suite; 13 + extern struct kunit_suite xe_guc_g2g_test_suite; 13 14 14 15 kunit_test_suite(xe_bo_test_suite); 15 16 kunit_test_suite(xe_bo_shrink_test_suite); 16 17 kunit_test_suite(xe_dma_buf_test_suite); 17 18 kunit_test_suite(xe_migrate_test_suite); 18 19 kunit_test_suite(xe_mocs_test_suite); 20 + kunit_test_suite(xe_guc_g2g_test_suite); 19 21 20 22 MODULE_AUTHOR("Intel Corporation"); 21 23 MODULE_LICENSE("GPL");
+7
drivers/gpu/drm/xe/xe_device_types.h
··· 595 595 u8 region_mask; 596 596 } psmi; 597 597 598 + #if IS_ENABLED(CONFIG_DRM_XE_KUNIT_TEST) 599 + /** @g2g_test_array: for testing G2G communications */ 600 + u32 *g2g_test_array; 601 + /** @g2g_test_count: for testing G2G communications */ 602 + atomic_t g2g_test_count; 603 + #endif 604 + 598 605 /* private: */ 599 606 600 607 #if IS_ENABLED(CONFIG_DRM_XE_DISPLAY)
+4
drivers/gpu/drm/xe/xe_guc.c
··· 1684 1684 xe_guc_ct_stop(&guc->ct); 1685 1685 xe_guc_submit_wedge(guc); 1686 1686 } 1687 + 1688 + #if IS_ENABLED(CONFIG_DRM_XE_KUNIT_TEST) 1689 + #include "tests/xe_guc_g2g_test.c" 1690 + #endif
+4
drivers/gpu/drm/xe/xe_guc.h
··· 53 53 int xe_guc_start(struct xe_guc *guc); 54 54 void xe_guc_declare_wedged(struct xe_guc *guc); 55 55 56 + #if IS_ENABLED(CONFIG_DRM_XE_KUNIT_TEST) 57 + int xe_guc_g2g_test_notification(struct xe_guc *guc, u32 *payload, u32 len); 58 + #endif 59 + 56 60 static inline u16 xe_engine_class_to_guc_class(enum xe_engine_class class) 57 61 { 58 62 switch (class) {
+5
drivers/gpu/drm/xe/xe_guc_ct.c
··· 1486 1486 case XE_GUC_ACTION_NOTIFY_EXCEPTION: 1487 1487 ret = guc_crash_process_msg(ct, action); 1488 1488 break; 1489 + #if IS_ENABLED(CONFIG_DRM_XE_KUNIT_TEST) 1490 + case XE_GUC_ACTION_TEST_G2G_RECV: 1491 + ret = xe_guc_g2g_test_notification(guc, payload, adj_len); 1492 + break; 1493 + #endif 1489 1494 default: 1490 1495 xe_gt_err(gt, "unexpected G2H action 0x%04x\n", action); 1491 1496 }
+1
drivers/gpu/drm/xe/xe_guc_fwif.h
··· 15 15 #define G2H_LEN_DW_SCHED_CONTEXT_MODE_SET 4 16 16 #define G2H_LEN_DW_DEREGISTER_CONTEXT 3 17 17 #define G2H_LEN_DW_TLB_INVALIDATE 3 18 + #define G2H_LEN_DW_G2G_NOTIFY_MIN 3 18 19 19 20 #define GUC_ID_MAX 65535 20 21 #define GUC_ID_UNKNOWN 0xffffffff