drm/panfrost: Add fdinfo support GPU load metrics

+1

Documentation/gpu/drm-usage-stats.rst

··· 169 169 ------------------------------- 170 170 171 171 :ref:`i915-usage-stats` 172 + :ref:`panfrost-usage-stats`

+40

Documentation/gpu/panfrost.rst

··· 1 + .. SPDX-License-Identifier: GPL-2.0+ 2 + 3 + ========================= 4 + drm/Panfrost Mali Driver 5 + ========================= 6 + 7 + .. _panfrost-usage-stats: 8 + 9 + Panfrost DRM client usage stats implementation 10 + ============================================== 11 + 12 + The drm/Panfrost driver implements the DRM client usage stats specification as 13 + documented in :ref:`drm-client-usage-stats`. 14 + 15 + Example of the output showing the implemented key value pairs and entirety of 16 + the currently possible format options: 17 + 18 + :: 19 + pos: 0 20 + flags: 02400002 21 + mnt_id: 27 22 + ino: 531 23 + drm-driver: panfrost 24 + drm-client-id: 14 25 + drm-engine-fragment: 1846584880 ns 26 + drm-cycles-fragment: 1424359409 27 + drm-maxfreq-fragment: 799999987 Hz 28 + drm-curfreq-fragment: 799999987 Hz 29 + drm-engine-vertex-tiler: 71932239 ns 30 + drm-cycles-vertex-tiler: 52617357 31 + drm-maxfreq-vertex-tiler: 799999987 Hz 32 + drm-curfreq-vertex-tiler: 799999987 Hz 33 + drm-total-memory: 290 MiB 34 + drm-shared-memory: 0 MiB 35 + drm-active-memory: 226 MiB 36 + drm-resident-memory: 36496 KiB 37 + drm-purgeable-memory: 128 KiB 38 + 39 + Possible `drm-engine-` key names are: `fragment`, and `vertex-tiler`. 40 + `drm-curfreq-` values convey the current operating frequency for that engine.

+1

MAINTAINERS

··· 1620 1620 L: dri-devel@lists.freedesktop.org 1621 1621 S: Supported 1622 1622 T: git git://anongit.freedesktop.org/drm/drm-misc 1623 + F: Documentation/gpu/panfrost.rst 1623 1624 F: drivers/gpu/drm/panfrost/ 1624 1625 F: include/uapi/drm/panfrost_drm.h 1625 1626

+2

drivers/gpu/drm/panfrost/Makefile

··· 12 12 panfrost_perfcnt.o \ 13 13 panfrost_dump.o 14 14 15 + panfrost-$(CONFIG_DEBUG_FS) += panfrost_debugfs.o 16 + 15 17 obj-$(CONFIG_DRM_PANFROST) += panfrost.o

+21

drivers/gpu/drm/panfrost/panfrost_debugfs.c

··· 1 + // SPDX-License-Identifier: GPL-2.0 2 + /* Copyright 2023 Collabora ltd. */ 3 + /* Copyright 2023 Amazon.com, Inc. or its affiliates. */ 4 + 5 + #include <linux/debugfs.h> 6 + #include <linux/platform_device.h> 7 + #include <drm/drm_debugfs.h> 8 + #include <drm/drm_file.h> 9 + #include <drm/panfrost_drm.h> 10 + 11 + #include "panfrost_device.h" 12 + #include "panfrost_gpu.h" 13 + #include "panfrost_debugfs.h" 14 + 15 + void panfrost_debugfs_init(struct drm_minor *minor) 16 + { 17 + struct drm_device *dev = minor->dev; 18 + struct panfrost_device *pfdev = platform_get_drvdata(to_platform_device(dev->dev)); 19 + 20 + debugfs_create_atomic_t("profile", 0600, minor->debugfs_root, &pfdev->profile_mode); 21 + }

+14

drivers/gpu/drm/panfrost/panfrost_debugfs.h

··· 1 + /* SPDX-License-Identifier: GPL-2.0 */ 2 + /* 3 + * Copyright 2023 Collabora ltd. 4 + * Copyright 2023 Amazon.com, Inc. or its affiliates. 5 + */ 6 + 7 + #ifndef PANFROST_DEBUGFS_H 8 + #define PANFROST_DEBUGFS_H 9 + 10 + #ifdef CONFIG_DEBUG_FS 11 + void panfrost_debugfs_init(struct drm_minor *minor); 12 + #endif 13 + 14 + #endif /* PANFROST_DEBUGFS_H */

+8

drivers/gpu/drm/panfrost/panfrost_devfreq.c

··· 58 58 spin_lock_irqsave(&pfdevfreq->lock, irqflags); 59 59 60 60 panfrost_devfreq_update_utilization(pfdevfreq); 61 + pfdevfreq->current_frequency = status->current_frequency; 61 62 62 63 status->total_time = ktime_to_ns(ktime_add(pfdevfreq->busy_time, 63 64 pfdevfreq->idle_time)); ··· 118 117 struct devfreq *devfreq; 119 118 struct thermal_cooling_device *cooling; 120 119 struct panfrost_devfreq *pfdevfreq = &pfdev->pfdevfreq; 120 + unsigned long freq = ULONG_MAX; 121 121 122 122 if (pfdev->comp->num_supplies > 1) { 123 123 /* ··· 173 171 DRM_DEV_ERROR(dev, "Couldn't set recommended OPP\n"); 174 172 return ret; 175 173 } 174 + 175 + /* Find the fastest defined rate */ 176 + opp = dev_pm_opp_find_freq_floor(dev, &freq); 177 + if (IS_ERR(opp)) 178 + return PTR_ERR(opp); 179 + pfdevfreq->fast_rate = freq; 176 180 177 181 dev_pm_opp_put(opp); 178 182

+3

drivers/gpu/drm/panfrost/panfrost_devfreq.h

··· 19 19 struct devfreq_simple_ondemand_data gov_data; 20 20 bool opp_of_table_added; 21 21 22 + unsigned long current_frequency; 23 + unsigned long fast_rate; 24 + 22 25 ktime_t busy_time; 23 26 ktime_t idle_time; 24 27 ktime_t time_last_update;

+2

drivers/gpu/drm/panfrost/panfrost_device.c

··· 207 207 208 208 spin_lock_init(&pfdev->as_lock); 209 209 210 + spin_lock_init(&pfdev->cycle_counter.lock); 211 + 210 212 err = panfrost_clk_init(pfdev); 211 213 if (err) { 212 214 dev_err(pfdev->dev, "clk init failed %d\n", err);

+13

drivers/gpu/drm/panfrost/panfrost_device.h

··· 107 107 struct list_head scheduled_jobs; 108 108 109 109 struct panfrost_perfcnt *perfcnt; 110 + atomic_t profile_mode; 110 111 111 112 struct mutex sched_lock; 112 113 ··· 122 121 struct shrinker shrinker; 123 122 124 123 struct panfrost_devfreq pfdevfreq; 124 + 125 + struct { 126 + atomic_t use_count; 127 + spinlock_t lock; 128 + } cycle_counter; 125 129 }; 126 130 127 131 struct panfrost_mmu { ··· 141 135 struct list_head list; 142 136 }; 143 137 138 + struct panfrost_engine_usage { 139 + unsigned long long elapsed_ns[NUM_JOB_SLOTS]; 140 + unsigned long long cycles[NUM_JOB_SLOTS]; 141 + }; 142 + 144 143 struct panfrost_file_priv { 145 144 struct panfrost_device *pfdev; 146 145 147 146 struct drm_sched_entity sched_entity[NUM_JOB_SLOTS]; 148 147 149 148 struct panfrost_mmu *mmu; 149 + 150 + struct panfrost_engine_usage engine_usage; 150 151 }; 151 152 152 153 static inline struct panfrost_device *to_panfrost_device(struct drm_device *ddev)

+57 -1

drivers/gpu/drm/panfrost/panfrost_drv.c

··· 20 20 #include "panfrost_job.h" 21 21 #include "panfrost_gpu.h" 22 22 #include "panfrost_perfcnt.h" 23 + #include "panfrost_debugfs.h" 23 24 24 25 static bool unstable_ioctls; 25 26 module_param_unsafe(unstable_ioctls, bool, 0600); ··· 268 267 job->requirements = args->requirements; 269 268 job->flush_id = panfrost_gpu_get_latest_flush_id(pfdev); 270 269 job->mmu = file_priv->mmu; 270 + job->engine_usage = &file_priv->engine_usage; 271 271 272 272 slot = panfrost_job_get_slot(job); 273 273 ··· 525 523 PANFROST_IOCTL(MADVISE, madvise, DRM_RENDER_ALLOW), 526 524 }; 527 525 528 - DEFINE_DRM_GEM_FOPS(panfrost_drm_driver_fops); 526 + static void panfrost_gpu_show_fdinfo(struct panfrost_device *pfdev, 527 + struct panfrost_file_priv *panfrost_priv, 528 + struct drm_printer *p) 529 + { 530 + int i; 531 + 532 + /* 533 + * IMPORTANT NOTE: drm-cycles and drm-engine measurements are not 534 + * accurate, as they only provide a rough estimation of the number of 535 + * GPU cycles and CPU time spent in a given context. This is due to two 536 + * different factors: 537 + * - Firstly, we must consider the time the CPU and then the kernel 538 + * takes to process the GPU interrupt, which means additional time and 539 + * GPU cycles will be added in excess to the real figure. 540 + * - Secondly, the pipelining done by the Job Manager (2 job slots per 541 + * engine) implies there is no way to know exactly how much time each 542 + * job spent on the GPU. 543 + */ 544 + 545 + static const char * const engine_names[] = { 546 + "fragment", "vertex-tiler", "compute-only" 547 + }; 548 + 549 + BUILD_BUG_ON(ARRAY_SIZE(engine_names) != NUM_JOB_SLOTS); 550 + 551 + for (i = 0; i < NUM_JOB_SLOTS - 1; i++) { 552 + drm_printf(p, "drm-engine-%s:\t%llu ns\n", 553 + engine_names[i], panfrost_priv->engine_usage.elapsed_ns[i]); 554 + drm_printf(p, "drm-cycles-%s:\t%llu\n", 555 + engine_names[i], panfrost_priv->engine_usage.cycles[i]); 556 + drm_printf(p, "drm-maxfreq-%s:\t%lu Hz\n", 557 + engine_names[i], pfdev->pfdevfreq.fast_rate); 558 + drm_printf(p, "drm-curfreq-%s:\t%lu Hz\n", 559 + engine_names[i], pfdev->pfdevfreq.current_frequency); 560 + } 561 + } 562 + 563 + static void panfrost_show_fdinfo(struct drm_printer *p, struct drm_file *file) 564 + { 565 + struct drm_device *dev = file->minor->dev; 566 + struct panfrost_device *pfdev = dev->dev_private; 567 + 568 + panfrost_gpu_show_fdinfo(pfdev, file->driver_priv, p); 569 + } 570 + 571 + static const struct file_operations panfrost_drm_driver_fops = { 572 + .owner = THIS_MODULE, 573 + DRM_GEM_FOPS, 574 + .show_fdinfo = drm_show_fdinfo, 575 + }; 529 576 530 577 /* 531 578 * Panfrost driver version: ··· 586 535 .driver_features = DRIVER_RENDER | DRIVER_GEM | DRIVER_SYNCOBJ, 587 536 .open = panfrost_open, 588 537 .postclose = panfrost_postclose, 538 + .show_fdinfo = panfrost_show_fdinfo, 589 539 .ioctls = panfrost_drm_driver_ioctls, 590 540 .num_ioctls = ARRAY_SIZE(panfrost_drm_driver_ioctls), 591 541 .fops = &panfrost_drm_driver_fops, ··· 598 546 599 547 .gem_create_object = panfrost_gem_create_object, 600 548 .gem_prime_import_sg_table = panfrost_gem_prime_import_sg_table, 549 + 550 + #ifdef CONFIG_DEBUG_FS 551 + .debugfs_init = panfrost_debugfs_init, 552 + #endif 601 553 }; 602 554 603 555 static int panfrost_probe(struct platform_device *pdev)

+41

drivers/gpu/drm/panfrost/panfrost_gpu.c

··· 73 73 gpu_write(pfdev, GPU_INT_CLEAR, GPU_IRQ_MASK_ALL); 74 74 gpu_write(pfdev, GPU_INT_MASK, GPU_IRQ_MASK_ALL); 75 75 76 + /* 77 + * All in-flight jobs should have released their cycle 78 + * counter references upon reset, but let us make sure 79 + */ 80 + if (drm_WARN_ON(pfdev->ddev, atomic_read(&pfdev->cycle_counter.use_count) != 0)) 81 + atomic_set(&pfdev->cycle_counter.use_count, 0); 82 + 76 83 return 0; 77 84 } 78 85 ··· 326 319 327 320 dev_info(pfdev->dev, "shader_present=0x%0llx l2_present=0x%0llx", 328 321 pfdev->features.shader_present, pfdev->features.l2_present); 322 + } 323 + 324 + void panfrost_cycle_counter_get(struct panfrost_device *pfdev) 325 + { 326 + if (atomic_inc_not_zero(&pfdev->cycle_counter.use_count)) 327 + return; 328 + 329 + spin_lock(&pfdev->cycle_counter.lock); 330 + if (atomic_inc_return(&pfdev->cycle_counter.use_count) == 1) 331 + gpu_write(pfdev, GPU_CMD, GPU_CMD_CYCLE_COUNT_START); 332 + spin_unlock(&pfdev->cycle_counter.lock); 333 + } 334 + 335 + void panfrost_cycle_counter_put(struct panfrost_device *pfdev) 336 + { 337 + if (atomic_add_unless(&pfdev->cycle_counter.use_count, -1, 1)) 338 + return; 339 + 340 + spin_lock(&pfdev->cycle_counter.lock); 341 + if (atomic_dec_return(&pfdev->cycle_counter.use_count) == 0) 342 + gpu_write(pfdev, GPU_CMD, GPU_CMD_CYCLE_COUNT_STOP); 343 + spin_unlock(&pfdev->cycle_counter.lock); 344 + } 345 + 346 + unsigned long long panfrost_cycle_counter_read(struct panfrost_device *pfdev) 347 + { 348 + u32 hi, lo; 349 + 350 + do { 351 + hi = gpu_read(pfdev, GPU_CYCLE_COUNT_HI); 352 + lo = gpu_read(pfdev, GPU_CYCLE_COUNT_LO); 353 + } while (hi != gpu_read(pfdev, GPU_CYCLE_COUNT_HI)); 354 + 355 + return ((u64)hi << 32) | lo; 329 356 } 330 357 331 358 void panfrost_gpu_power_on(struct panfrost_device *pfdev)

+4

drivers/gpu/drm/panfrost/panfrost_gpu.h

··· 16 16 void panfrost_gpu_power_on(struct panfrost_device *pfdev); 17 17 void panfrost_gpu_power_off(struct panfrost_device *pfdev); 18 18 19 + void panfrost_cycle_counter_get(struct panfrost_device *pfdev); 20 + void panfrost_cycle_counter_put(struct panfrost_device *pfdev); 21 + unsigned long long panfrost_cycle_counter_read(struct panfrost_device *pfdev); 22 + 19 23 void panfrost_gpu_amlogic_quirk(struct panfrost_device *pfdev); 20 24 21 25 #endif

+24

drivers/gpu/drm/panfrost/panfrost_job.c

··· 159 159 struct panfrost_job *job = pfdev->jobs[slot][0]; 160 160 161 161 WARN_ON(!job); 162 + if (job->is_profiled) { 163 + if (job->engine_usage) { 164 + job->engine_usage->elapsed_ns[slot] += 165 + ktime_to_ns(ktime_sub(ktime_get(), job->start_time)); 166 + job->engine_usage->cycles[slot] += 167 + panfrost_cycle_counter_read(pfdev) - job->start_cycles; 168 + } 169 + panfrost_cycle_counter_put(job->pfdev); 170 + } 171 + 162 172 pfdev->jobs[slot][0] = pfdev->jobs[slot][1]; 163 173 pfdev->jobs[slot][1] = NULL; 164 174 ··· 243 233 subslot = panfrost_enqueue_job(pfdev, js, job); 244 234 /* Don't queue the job if a reset is in progress */ 245 235 if (!atomic_read(&pfdev->reset.pending)) { 236 + if (atomic_read(&pfdev->profile_mode)) { 237 + panfrost_cycle_counter_get(pfdev); 238 + job->is_profiled = true; 239 + job->start_time = ktime_get(); 240 + job->start_cycles = panfrost_cycle_counter_read(pfdev); 241 + } 242 + 246 243 job_write(pfdev, JS_COMMAND_NEXT(js), JS_COMMAND_START); 247 244 dev_dbg(pfdev->dev, 248 245 "JS: Submitting atom %p to js[%d][%d] with head=0x%llx AS %d", ··· 677 660 * stuck jobs. Let's make sure the PM counters stay balanced by 678 661 * manually calling pm_runtime_put_noidle() and 679 662 * panfrost_devfreq_record_idle() for each stuck job. 663 + * Let's also make sure the cycle counting register's refcnt is 664 + * kept balanced to prevent it from running forever 680 665 */ 681 666 spin_lock(&pfdev->js->job_lock); 682 667 for (i = 0; i < NUM_JOB_SLOTS; i++) { 683 668 for (j = 0; j < ARRAY_SIZE(pfdev->jobs[0]) && pfdev->jobs[i][j]; j++) { 669 + if (pfdev->jobs[i][j]->is_profiled) 670 + panfrost_cycle_counter_put(pfdev->jobs[i][j]->pfdev); 684 671 pm_runtime_put_noidle(pfdev->dev); 685 672 panfrost_devfreq_record_idle(&pfdev->pfdevfreq); 686 673 } ··· 947 926 } 948 927 949 928 job_write(pfdev, JS_COMMAND(i), cmd); 929 + 930 + /* Jobs can outlive their file context */ 931 + job->engine_usage = NULL; 950 932 } 951 933 } 952 934 spin_unlock(&pfdev->js->job_lock);

+5

drivers/gpu/drm/panfrost/panfrost_job.h

··· 32 32 33 33 /* Fence to be signaled by drm-sched once its done with the job */ 34 34 struct dma_fence *render_done_fence; 35 + 36 + struct panfrost_engine_usage *engine_usage; 37 + bool is_profiled; 38 + ktime_t start_time; 39 + u64 start_cycles; 35 40 }; 36 41 37 42 int panfrost_job_init(struct panfrost_device *pfdev);

Configure Feed

Configure Feed