A Modern GPGPU API & wip linux RDNA2+ Driver
rdna driver linux gpu
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

examples: move into new folder

+120 -12
+48 -12
test/CMakeLists.txt
··· 1 + include(FetchContent) 2 + 3 + FetchContent_Declare( 4 + Catch2 5 + GIT_REPOSITORY https://github.com/catchorg/Catch2.git 6 + GIT_TAG v3.5.1 7 + ) 8 + FetchContent_MakeAvailable(Catch2) 1 9 2 - file(GLOB TEST_DIRS CONFIGURE_DEPENDS RELATIVE ${CMAKE_CURRENT_SOURCE_DIR}/test ${CMAKE_CURRENT_SOURCE_DIR}/test/*) 10 + add_library(kestrel_test_common STATIC 11 + common/test_utils.cpp 12 + ) 13 + target_link_libraries(kestrel_test_common PUBLIC kestrel Catch2::Catch2) 14 + target_include_directories(kestrel_test_common PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}/common) 3 15 4 - foreach(test_dir ${TEST_DIRS}) 16 + file(GLOB UNIT_TEST_SOURCES CONFIGURE_DEPENDS "unit/*.cpp") 17 + 18 + if(UNIT_TEST_SOURCES) 19 + add_executable(kestrel_unit_tests ${UNIT_TEST_SOURCES}) 20 + target_link_libraries(kestrel_unit_tests PRIVATE kestrel_test_common Catch2::Catch2WithMain) 21 + add_sanitizers(kestrel_unit_tests) 22 + 23 + # Register with CTest 24 + add_test(NAME unit_tests COMMAND kestrel_unit_tests) 25 + 26 + # These tests need GPU access 27 + set_tests_properties(unit_tests PROPERTIES 28 + LABELS "gpu" 29 + TIMEOUT 300 30 + ) 31 + endif() 32 + 33 + include(CTest) 34 + include(Catch) 35 + 36 + if(TARGET kestrel_unit_tests) 37 + catch_discover_tests(kestrel_unit_tests) 38 + endif() 39 + 40 + file(GLOB EXAMPLE_DIRS CONFIGURE_DEPENDS RELATIVE ${CMAKE_CURRENT_SOURCE_DIR}/examples ${CMAKE_CURRENT_SOURCE_DIR}/examples/*) 41 + foreach(example_dir ${EXAMPLE_DIRS}) 5 42 # Ensure we are looking at a directory, not a random file 6 - if(IS_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}/test/${test_dir}) 43 + if(IS_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}/examples/${example_dir}) 7 44 8 45 # 2. Define the executable name based on the folder name 9 - set(test_name "test_${test_dir}") 46 + set(example_name "example_${example_dir}") 10 47 11 - # 3. Find all .cpp files within that specific test folder 12 - file(GLOB_RECURSE TEST_SOURCES "${CMAKE_CURRENT_SOURCE_DIR}/test/${test_dir}/*.cpp") 48 + # 3. Find all .cpp files within that specific folder 49 + file(GLOB_RECURSE EXAMPLE_SOURCES "${CMAKE_CURRENT_SOURCE_DIR}/examples/${example_dir}/*.cpp") 13 50 14 - if(TEST_SOURCES) 15 - add_executable(${test_name} ${TEST_SOURCES}) 51 + if(EXAMPLE_SOURCES) 52 + add_executable(${example_name} ${EXAMPLE_SOURCES}) 16 53 17 - # 4. Link your library (libvektor) 18 - target_link_libraries(${test_name} PRIVATE kestrel) 54 + target_link_libraries(${example_name} PRIVATE kestrel) 19 55 20 - add_sanitizers(${test_name}) 56 + add_sanitizers(${example_name}) 21 57 22 - message(STATUS "Added test: ${test_name}") 58 + message(STATUS "Added example: ${example_name}") 23 59 endif() 24 60 endif() 25 61 endforeach()
+2
test/common/test_utils.cpp
··· 1 + 2 + int x = 0;
+66
test/common/test_utils.h
··· 1 + #pragma once 2 + 3 + #include <kestrel/kestrel.h> 4 + #include <catch2/catch_test_macros.hpp> 5 + #include <catch2/generators/catch_generators.hpp> 6 + #include <vector> 7 + #include <chrono> 8 + #include <thread> 9 + 10 + struct DeviceGuard { 11 + KesDevice dev; 12 + 13 + DeviceGuard() : dev(kes_create()) { 14 + REQUIRE(dev != nullptr); 15 + } 16 + 17 + ~DeviceGuard() { 18 + if (dev) kes_destroy(dev); 19 + } 20 + 21 + DeviceGuard(const DeviceGuard&) = delete; 22 + DeviceGuard& operator=(const DeviceGuard&) = delete; 23 + 24 + KesAllocation alloc(size_t size, size_t align = 4, KesMemory type = KesMemoryDefault) { 25 + return kes_malloc(dev, size, align, type); 26 + } 27 + }; 28 + 29 + struct TimestampReader { 30 + KesAllocation alloc; 31 + 32 + TimestampReader(KesDevice dev, size_t count = 8) { 33 + alloc = kes_malloc(dev, sizeof(uint64_t) * count, 8, KesMemoryDefault); 34 + } 35 + 36 + uint64_t get(size_t index) const { 37 + return ((uint64_t *)alloc.cpu)[index]; 38 + } 39 + 40 + kes_gpuptr_t gpu_ptr(size_t index = 0) const { 41 + return alloc.gpu + index * sizeof(uint64_t); 42 + } 43 + 44 + // Check ordering: timestamps should be non-decreasing 45 + void check_ordering(std::vector<size_t> indices) const { 46 + for (size_t i = 1; i < indices.size(); i++) { 47 + uint64_t prev = get(indices[i-1]); 48 + uint64_t curr = get(indices[i]); 49 + INFO("Timestamp[" << indices[i-1] << "] = " << prev); 50 + INFO("Timestamp[" << indices[i] << "] = " << curr); 51 + REQUIRE(prev <= curr); 52 + } 53 + } 54 + }; 55 + 56 + // CPU-side wait for GPU completion (for testing) 57 + inline void busy_wait_for_value(void* ptr, uint64_t expected, 58 + std::chrono::milliseconds timeout = std::chrono::seconds(5)) { 59 + auto start = std::chrono::steady_clock::now(); 60 + while (*static_cast<volatile uint64_t*>(ptr) != expected) { 61 + if (std::chrono::steady_clock::now() - start > timeout) { 62 + FAIL("Timeout waiting for GPU value"); 63 + } 64 + std::this_thread::sleep_for(std::chrono::microseconds(100)); 65 + } 66 + }
test/test/01_hello_malloc/hello_malloc.cpp test/examples/01_hello_malloc/hello_malloc.cpp
test/test/02_hello_queue/hello_queue.cpp test/examples/02_hello_queue/hello_queue.cpp
test/test/03_hello_2queue/hello_2queue.cpp test/examples/03_hello_2queue/hello_2queue.cpp
test/test/04_hello_timestamp/hello_timestamp.cpp test/examples/04_hello_timestamp/hello_timestamp.cpp
test/test/05_hello_transfer_copy/transfer_copy.cpp test/examples/05_hello_transfer_copy/transfer_copy.cpp
+4
test/test/06_hello_sync/hello_sync.cpp test/examples/06_hello_sync/hello_sync.cpp
··· 11 11 auto x = kes_malloc(dev, size, 4, KesMemoryDefault); 12 12 auto y = kes_malloc(dev, 8, 4, KesMemoryDefault); 13 13 auto ts = kes_malloc(dev, 8 * 4, 4, KesMemoryDefault); 14 + sleep(1); 14 15 15 16 printf("x: %p (%p) (%llu bytes)\n", x.cpu, x.gpu, x.size); 16 17 printf("y: %p (%p) (%llu bytes)\n", y.cpu, y.gpu, y.size); 17 18 18 19 auto dma = kes_create_queue(dev, KesQueueTypeTransfer); 19 20 auto compute = kes_create_queue(dev, KesQueueTypeCompute); 21 + sleep(1); 20 22 21 23 auto l1 = kes_start_recording(dma); 22 24 { ··· 44 46 printf("x[0]: %u\n", ((uint32_t *)x.cpu)[0]); 45 47 sleep(1); 46 48 printf("x[0]: %u\n", ((uint32_t *)x.cpu)[0]); 49 + 50 + // in this case, synch should ensure that t0 <= t1, t1 <= t3, t2 <= t3 47 51 48 52 printf("\n"); 49 53 printf("ts0: %lu\n", ((uint64_t *)ts.cpu)[0]);