Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

at master 507 lines 13 kB view raw
1// SPDX-License-Identifier: GPL-2.0-only OR MIT 2/* Copyright 2024-2025 Tomeu Vizoso <tomeu@tomeuvizoso.net> */ 3/* Copyright 2025 Arm, Ltd. */ 4 5#include <linux/bitfield.h> 6#include <linux/genalloc.h> 7#include <linux/interrupt.h> 8#include <linux/iopoll.h> 9#include <linux/platform_device.h> 10#include <linux/pm_runtime.h> 11 12#include <drm/drm_file.h> 13#include <drm/drm_gem.h> 14#include <drm/drm_gem_dma_helper.h> 15#include <drm/drm_print.h> 16#include <drm/ethosu_accel.h> 17 18#include "ethosu_device.h" 19#include "ethosu_drv.h" 20#include "ethosu_gem.h" 21#include "ethosu_job.h" 22 23#define JOB_TIMEOUT_MS 500 24 25static struct ethosu_job *to_ethosu_job(struct drm_sched_job *sched_job) 26{ 27 return container_of(sched_job, struct ethosu_job, base); 28} 29 30static const char *ethosu_fence_get_driver_name(struct dma_fence *fence) 31{ 32 return "ethosu"; 33} 34 35static const char *ethosu_fence_get_timeline_name(struct dma_fence *fence) 36{ 37 return "ethosu-npu"; 38} 39 40static const struct dma_fence_ops ethosu_fence_ops = { 41 .get_driver_name = ethosu_fence_get_driver_name, 42 .get_timeline_name = ethosu_fence_get_timeline_name, 43}; 44 45static void ethosu_job_hw_submit(struct ethosu_device *dev, struct ethosu_job *job) 46{ 47 struct drm_gem_dma_object *cmd_bo = to_drm_gem_dma_obj(job->cmd_bo); 48 struct ethosu_validated_cmdstream_info *cmd_info = to_ethosu_bo(job->cmd_bo)->info; 49 50 for (int i = 0; i < job->region_cnt; i++) { 51 struct drm_gem_dma_object *bo; 52 int region = job->region_bo_num[i]; 53 54 bo = to_drm_gem_dma_obj(job->region_bo[i]); 55 writel_relaxed(lower_32_bits(bo->dma_addr), dev->regs + NPU_REG_BASEP(region)); 56 writel_relaxed(upper_32_bits(bo->dma_addr), dev->regs + NPU_REG_BASEP_HI(region)); 57 dev_dbg(dev->base.dev, "Region %d base addr = %pad\n", region, &bo->dma_addr); 58 } 59 60 if (job->sram_size) { 61 writel_relaxed(lower_32_bits(dev->sramphys), 62 dev->regs + NPU_REG_BASEP(ETHOSU_SRAM_REGION)); 63 writel_relaxed(upper_32_bits(dev->sramphys), 64 dev->regs + NPU_REG_BASEP_HI(ETHOSU_SRAM_REGION)); 65 dev_dbg(dev->base.dev, "Region %d base addr = %pad (SRAM)\n", 66 ETHOSU_SRAM_REGION, &dev->sramphys); 67 } 68 69 writel_relaxed(lower_32_bits(cmd_bo->dma_addr), dev->regs + NPU_REG_QBASE); 70 writel_relaxed(upper_32_bits(cmd_bo->dma_addr), dev->regs + NPU_REG_QBASE_HI); 71 writel_relaxed(cmd_info->cmd_size, dev->regs + NPU_REG_QSIZE); 72 73 writel(CMD_TRANSITION_TO_RUN, dev->regs + NPU_REG_CMD); 74 75 dev_dbg(dev->base.dev, 76 "Submitted cmd at %pad to core\n", &cmd_bo->dma_addr); 77} 78 79static int ethosu_acquire_object_fences(struct ethosu_job *job) 80{ 81 int i, ret; 82 struct drm_gem_object **bos = job->region_bo; 83 struct ethosu_validated_cmdstream_info *info = to_ethosu_bo(job->cmd_bo)->info; 84 85 for (i = 0; i < job->region_cnt; i++) { 86 bool is_write; 87 88 if (!bos[i]) 89 break; 90 91 ret = dma_resv_reserve_fences(bos[i]->resv, 1); 92 if (ret) 93 return ret; 94 95 is_write = info->output_region[job->region_bo_num[i]]; 96 ret = drm_sched_job_add_implicit_dependencies(&job->base, bos[i], 97 is_write); 98 if (ret) 99 return ret; 100 } 101 102 return 0; 103} 104 105static void ethosu_attach_object_fences(struct ethosu_job *job) 106{ 107 int i; 108 struct dma_fence *fence = job->inference_done_fence; 109 struct drm_gem_object **bos = job->region_bo; 110 struct ethosu_validated_cmdstream_info *info = to_ethosu_bo(job->cmd_bo)->info; 111 112 for (i = 0; i < job->region_cnt; i++) 113 if (info->output_region[job->region_bo_num[i]]) 114 dma_resv_add_fence(bos[i]->resv, fence, DMA_RESV_USAGE_WRITE); 115} 116 117static int ethosu_job_push(struct ethosu_job *job) 118{ 119 struct ww_acquire_ctx acquire_ctx; 120 int ret; 121 122 ret = drm_gem_lock_reservations(job->region_bo, job->region_cnt, &acquire_ctx); 123 if (ret) 124 return ret; 125 126 ret = ethosu_acquire_object_fences(job); 127 if (ret) 128 goto out; 129 130 ret = pm_runtime_resume_and_get(job->dev->base.dev); 131 if (!ret) { 132 guard(mutex)(&job->dev->sched_lock); 133 134 drm_sched_job_arm(&job->base); 135 job->inference_done_fence = dma_fence_get(&job->base.s_fence->finished); 136 kref_get(&job->refcount); /* put by scheduler job completion */ 137 drm_sched_entity_push_job(&job->base); 138 ethosu_attach_object_fences(job); 139 } 140 141out: 142 drm_gem_unlock_reservations(job->region_bo, job->region_cnt, &acquire_ctx); 143 return ret; 144} 145 146static void ethosu_job_err_cleanup(struct ethosu_job *job) 147{ 148 unsigned int i; 149 150 for (i = 0; i < job->region_cnt; i++) 151 drm_gem_object_put(job->region_bo[i]); 152 153 drm_gem_object_put(job->cmd_bo); 154 155 kfree(job); 156} 157 158static void ethosu_job_cleanup(struct kref *ref) 159{ 160 struct ethosu_job *job = container_of(ref, struct ethosu_job, 161 refcount); 162 163 pm_runtime_put_autosuspend(job->dev->base.dev); 164 165 dma_fence_put(job->done_fence); 166 dma_fence_put(job->inference_done_fence); 167 168 ethosu_job_err_cleanup(job); 169} 170 171static void ethosu_job_put(struct ethosu_job *job) 172{ 173 kref_put(&job->refcount, ethosu_job_cleanup); 174} 175 176static void ethosu_job_free(struct drm_sched_job *sched_job) 177{ 178 struct ethosu_job *job = to_ethosu_job(sched_job); 179 180 drm_sched_job_cleanup(sched_job); 181 ethosu_job_put(job); 182} 183 184static struct dma_fence *ethosu_job_run(struct drm_sched_job *sched_job) 185{ 186 struct ethosu_job *job = to_ethosu_job(sched_job); 187 struct ethosu_device *dev = job->dev; 188 struct dma_fence *fence = job->done_fence; 189 190 if (unlikely(job->base.s_fence->finished.error)) 191 return NULL; 192 193 dma_fence_init(fence, &ethosu_fence_ops, &dev->fence_lock, 194 dev->fence_context, ++dev->emit_seqno); 195 dma_fence_get(fence); 196 197 scoped_guard(mutex, &dev->job_lock) { 198 dev->in_flight_job = job; 199 ethosu_job_hw_submit(dev, job); 200 } 201 202 return fence; 203} 204 205static void ethosu_job_handle_irq(struct ethosu_device *dev) 206{ 207 u32 status = readl_relaxed(dev->regs + NPU_REG_STATUS); 208 209 if (status & (STATUS_BUS_STATUS | STATUS_CMD_PARSE_ERR)) { 210 dev_err(dev->base.dev, "Error IRQ - %x\n", status); 211 drm_sched_fault(&dev->sched); 212 return; 213 } 214 215 scoped_guard(mutex, &dev->job_lock) { 216 if (dev->in_flight_job) { 217 dma_fence_signal(dev->in_flight_job->done_fence); 218 dev->in_flight_job = NULL; 219 } 220 } 221} 222 223static irqreturn_t ethosu_job_irq_handler_thread(int irq, void *data) 224{ 225 struct ethosu_device *dev = data; 226 227 ethosu_job_handle_irq(dev); 228 229 return IRQ_HANDLED; 230} 231 232static irqreturn_t ethosu_job_irq_handler(int irq, void *data) 233{ 234 struct ethosu_device *dev = data; 235 u32 status = readl_relaxed(dev->regs + NPU_REG_STATUS); 236 237 if (!(status & STATUS_IRQ_RAISED)) 238 return IRQ_NONE; 239 240 writel_relaxed(CMD_CLEAR_IRQ, dev->regs + NPU_REG_CMD); 241 return IRQ_WAKE_THREAD; 242} 243 244static enum drm_gpu_sched_stat ethosu_job_timedout(struct drm_sched_job *bad) 245{ 246 struct ethosu_job *job = to_ethosu_job(bad); 247 struct ethosu_device *dev = job->dev; 248 bool running; 249 u32 *bocmds = to_drm_gem_dma_obj(job->cmd_bo)->vaddr; 250 u32 cmdaddr; 251 252 cmdaddr = readl_relaxed(dev->regs + NPU_REG_QREAD); 253 running = FIELD_GET(STATUS_STATE_RUNNING, readl_relaxed(dev->regs + NPU_REG_STATUS)); 254 255 if (running) { 256 int ret; 257 u32 reg; 258 259 ret = readl_relaxed_poll_timeout(dev->regs + NPU_REG_QREAD, 260 reg, 261 reg != cmdaddr, 262 USEC_PER_MSEC, 100 * USEC_PER_MSEC); 263 264 /* If still running and progress is being made, just return */ 265 if (!ret) 266 return DRM_GPU_SCHED_STAT_NO_HANG; 267 } 268 269 dev_err(dev->base.dev, "NPU sched timed out: NPU %s, cmdstream offset 0x%x: 0x%x\n", 270 running ? "running" : "stopped", 271 cmdaddr, bocmds[cmdaddr / 4]); 272 273 drm_sched_stop(&dev->sched, bad); 274 275 scoped_guard(mutex, &dev->job_lock) 276 dev->in_flight_job = NULL; 277 278 /* Proceed with reset now. */ 279 pm_runtime_force_suspend(dev->base.dev); 280 pm_runtime_force_resume(dev->base.dev); 281 282 /* Restart the scheduler */ 283 drm_sched_start(&dev->sched, 0); 284 285 return DRM_GPU_SCHED_STAT_RESET; 286} 287 288static const struct drm_sched_backend_ops ethosu_sched_ops = { 289 .run_job = ethosu_job_run, 290 .timedout_job = ethosu_job_timedout, 291 .free_job = ethosu_job_free 292}; 293 294int ethosu_job_init(struct ethosu_device *edev) 295{ 296 struct device *dev = edev->base.dev; 297 struct drm_sched_init_args args = { 298 .ops = &ethosu_sched_ops, 299 .num_rqs = DRM_SCHED_PRIORITY_COUNT, 300 .credit_limit = 1, 301 .timeout = msecs_to_jiffies(JOB_TIMEOUT_MS), 302 .name = dev_name(dev), 303 .dev = dev, 304 }; 305 int ret; 306 307 spin_lock_init(&edev->fence_lock); 308 ret = devm_mutex_init(dev, &edev->job_lock); 309 if (ret) 310 return ret; 311 ret = devm_mutex_init(dev, &edev->sched_lock); 312 if (ret) 313 return ret; 314 315 edev->irq = platform_get_irq(to_platform_device(dev), 0); 316 if (edev->irq < 0) 317 return edev->irq; 318 319 ret = devm_request_threaded_irq(dev, edev->irq, 320 ethosu_job_irq_handler, 321 ethosu_job_irq_handler_thread, 322 IRQF_SHARED, KBUILD_MODNAME, 323 edev); 324 if (ret) { 325 dev_err(dev, "failed to request irq\n"); 326 return ret; 327 } 328 329 edev->fence_context = dma_fence_context_alloc(1); 330 331 ret = drm_sched_init(&edev->sched, &args); 332 if (ret) { 333 dev_err(dev, "Failed to create scheduler: %d\n", ret); 334 goto err_sched; 335 } 336 337 return 0; 338 339err_sched: 340 drm_sched_fini(&edev->sched); 341 return ret; 342} 343 344void ethosu_job_fini(struct ethosu_device *dev) 345{ 346 drm_sched_fini(&dev->sched); 347} 348 349int ethosu_job_open(struct ethosu_file_priv *ethosu_priv) 350{ 351 struct ethosu_device *dev = ethosu_priv->edev; 352 struct drm_gpu_scheduler *sched = &dev->sched; 353 int ret; 354 355 ret = drm_sched_entity_init(&ethosu_priv->sched_entity, 356 DRM_SCHED_PRIORITY_NORMAL, 357 &sched, 1, NULL); 358 return WARN_ON(ret); 359} 360 361void ethosu_job_close(struct ethosu_file_priv *ethosu_priv) 362{ 363 struct drm_sched_entity *entity = &ethosu_priv->sched_entity; 364 365 drm_sched_entity_destroy(entity); 366} 367 368static int ethosu_ioctl_submit_job(struct drm_device *dev, struct drm_file *file, 369 struct drm_ethosu_job *job) 370{ 371 struct ethosu_device *edev = to_ethosu_device(dev); 372 struct ethosu_file_priv *file_priv = file->driver_priv; 373 struct ethosu_job *ejob = NULL; 374 struct ethosu_validated_cmdstream_info *cmd_info; 375 int ret = 0; 376 377 /* BO region 2 is reserved if SRAM is used */ 378 if (job->region_bo_handles[ETHOSU_SRAM_REGION] && job->sram_size) 379 return -EINVAL; 380 381 if (edev->npu_info.sram_size < job->sram_size) 382 return -EINVAL; 383 384 ejob = kzalloc_obj(*ejob); 385 if (!ejob) 386 return -ENOMEM; 387 388 kref_init(&ejob->refcount); 389 390 ejob->dev = edev; 391 ejob->sram_size = job->sram_size; 392 393 ejob->done_fence = kzalloc_obj(*ejob->done_fence); 394 if (!ejob->done_fence) { 395 ret = -ENOMEM; 396 goto out_cleanup_job; 397 } 398 399 ret = drm_sched_job_init(&ejob->base, 400 &file_priv->sched_entity, 401 1, NULL, file->client_id); 402 if (ret) 403 goto out_put_job; 404 405 ejob->cmd_bo = drm_gem_object_lookup(file, job->cmd_bo); 406 if (!ejob->cmd_bo) { 407 ret = -ENOENT; 408 goto out_cleanup_job; 409 } 410 cmd_info = to_ethosu_bo(ejob->cmd_bo)->info; 411 if (!cmd_info) { 412 ret = -EINVAL; 413 goto out_cleanup_job; 414 } 415 416 for (int i = 0; i < NPU_BASEP_REGION_MAX; i++) { 417 struct drm_gem_object *gem; 418 419 /* Can only omit a BO handle if the region is not used or used for SRAM */ 420 if (!job->region_bo_handles[i] && 421 (!cmd_info->region_size[i] || (i == ETHOSU_SRAM_REGION && job->sram_size))) 422 continue; 423 424 if (job->region_bo_handles[i] && !cmd_info->region_size[i]) { 425 dev_err(dev->dev, 426 "Cmdstream BO handle %d set for unused region %d\n", 427 job->region_bo_handles[i], i); 428 ret = -EINVAL; 429 goto out_cleanup_job; 430 } 431 432 gem = drm_gem_object_lookup(file, job->region_bo_handles[i]); 433 if (!gem) { 434 dev_err(dev->dev, 435 "Invalid BO handle %d for region %d\n", 436 job->region_bo_handles[i], i); 437 ret = -ENOENT; 438 goto out_cleanup_job; 439 } 440 441 ejob->region_bo[ejob->region_cnt] = gem; 442 ejob->region_bo_num[ejob->region_cnt] = i; 443 ejob->region_cnt++; 444 445 if (to_ethosu_bo(gem)->info) { 446 dev_err(dev->dev, 447 "Cmdstream BO handle %d used for region %d\n", 448 job->region_bo_handles[i], i); 449 ret = -EINVAL; 450 goto out_cleanup_job; 451 } 452 453 /* Verify the command stream doesn't have accesses outside the BO */ 454 if (cmd_info->region_size[i] > gem->size) { 455 dev_err(dev->dev, 456 "cmd stream region %d size greater than BO size (%llu > %zu)\n", 457 i, cmd_info->region_size[i], gem->size); 458 ret = -EOVERFLOW; 459 goto out_cleanup_job; 460 } 461 } 462 ret = ethosu_job_push(ejob); 463 if (!ret) { 464 ethosu_job_put(ejob); 465 return 0; 466 } 467 468out_cleanup_job: 469 if (ret) 470 drm_sched_job_cleanup(&ejob->base); 471out_put_job: 472 ethosu_job_err_cleanup(ejob); 473 474 return ret; 475} 476 477int ethosu_ioctl_submit(struct drm_device *dev, void *data, struct drm_file *file) 478{ 479 struct drm_ethosu_submit *args = data; 480 int ret = 0; 481 unsigned int i = 0; 482 483 if (args->pad) { 484 drm_dbg(dev, "Reserved field in drm_ethosu_submit struct should be 0.\n"); 485 return -EINVAL; 486 } 487 488 struct drm_ethosu_job __free(kvfree) *jobs = 489 kvmalloc_objs(*jobs, args->job_count); 490 if (!jobs) 491 return -ENOMEM; 492 493 if (copy_from_user(jobs, 494 (void __user *)(uintptr_t)args->jobs, 495 args->job_count * sizeof(*jobs))) { 496 drm_dbg(dev, "Failed to copy incoming job array\n"); 497 return -EFAULT; 498 } 499 500 for (i = 0; i < args->job_count; i++) { 501 ret = ethosu_ioctl_submit_job(dev, file, &jobs[i]); 502 if (ret) 503 return ret; 504 } 505 506 return 0; 507}