Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

drm/nouveau: Add DRM_IOCTL_NOUVEAU_GET_ZCULL_INFO

Add kernel-side support for using the zcull hardware in nvidia gpus.
zcull aims to improve memory bandwidth by using an early approximate
depth test, similar to hierarchical Z on an AMD card.

Add a new ioctl that exposes zcull information that has been read
from the hardware. Userspace uses each of these parameters either
in a heuristic for determining zcull region parameters or in the
calculation of a buffer size.

It appears the hardware hasn't changed its structure for these
values since FERMI_C (circa 2011), so the assumption is that it
won't change on us too quickly, and is therefore reasonable to
include in UAPI.

This bypasses the nvif layer and instead accesses nvkm_gr directly,
which mirrors existing usage of nvkm_gr_units(). There is no nvif
object for nvkm_gr yet, and adding one is not trivial.

Signed-off-by: Mel Henning <mhenning@darkrefraction.com>
Link: https://patch.msgid.link/20260219-zcull3-v3-2-dbe6a716f104@darkrefraction.com
Signed-off-by: Danilo Krummrich <dakr@kernel.org>

authored by

Mel Henning and committed by
Danilo Krummrich
196b2b95 27e125e5

+97
+29
drivers/gpu/drm/nouveau/nouveau_abi16.c
··· 334 334 } 335 335 336 336 int 337 + nouveau_abi16_ioctl_get_zcull_info(ABI16_IOCTL_ARGS) 338 + { 339 + struct nouveau_drm *drm = nouveau_drm(dev); 340 + struct nvkm_gr *gr = nvxx_gr(drm); 341 + struct drm_nouveau_get_zcull_info *out = data; 342 + 343 + if (gr->has_zcull_info) { 344 + const struct nvkm_gr_zcull_info *i = &gr->zcull_info; 345 + 346 + out->width_align_pixels = i->width_align_pixels; 347 + out->height_align_pixels = i->height_align_pixels; 348 + out->pixel_squares_by_aliquots = i->pixel_squares_by_aliquots; 349 + out->aliquot_total = i->aliquot_total; 350 + out->zcull_region_byte_multiplier = i->zcull_region_byte_multiplier; 351 + out->zcull_region_header_size = i->zcull_region_header_size; 352 + out->zcull_subregion_header_size = i->zcull_subregion_header_size; 353 + out->subregion_count = i->subregion_count; 354 + out->subregion_width_align_pixels = i->subregion_width_align_pixels; 355 + out->subregion_height_align_pixels = i->subregion_height_align_pixels; 356 + out->ctxsw_size = i->ctxsw_size; 357 + out->ctxsw_align = i->ctxsw_align; 358 + 359 + return 0; 360 + } else { 361 + return -ENOTTY; 362 + } 363 + } 364 + 365 + int 337 366 nouveau_abi16_ioctl_channel_alloc(ABI16_IOCTL_ARGS) 338 367 { 339 368 struct drm_nouveau_channel_alloc *init = data;
+1
drivers/gpu/drm/nouveau/nouveau_abi16.h
··· 6 6 struct drm_device *dev, void *data, struct drm_file *file_priv 7 7 8 8 int nouveau_abi16_ioctl_getparam(ABI16_IOCTL_ARGS); 9 + int nouveau_abi16_ioctl_get_zcull_info(ABI16_IOCTL_ARGS); 9 10 int nouveau_abi16_ioctl_channel_alloc(ABI16_IOCTL_ARGS); 10 11 int nouveau_abi16_ioctl_channel_free(ABI16_IOCTL_ARGS); 11 12 int nouveau_abi16_ioctl_grobj_alloc(ABI16_IOCTL_ARGS);
+1
drivers/gpu/drm/nouveau/nouveau_drm.c
··· 1304 1304 DRM_IOCTL_DEF_DRV(NOUVEAU_GROBJ_ALLOC, nouveau_abi16_ioctl_grobj_alloc, DRM_RENDER_ALLOW), 1305 1305 DRM_IOCTL_DEF_DRV(NOUVEAU_NOTIFIEROBJ_ALLOC, nouveau_abi16_ioctl_notifierobj_alloc, DRM_RENDER_ALLOW), 1306 1306 DRM_IOCTL_DEF_DRV(NOUVEAU_GPUOBJ_FREE, nouveau_abi16_ioctl_gpuobj_free, DRM_RENDER_ALLOW), 1307 + DRM_IOCTL_DEF_DRV(NOUVEAU_GET_ZCULL_INFO, nouveau_abi16_ioctl_get_zcull_info, DRM_RENDER_ALLOW), 1307 1308 DRM_IOCTL_DEF_DRV(NOUVEAU_SVM_INIT, nouveau_svmm_init, DRM_RENDER_ALLOW), 1308 1309 DRM_IOCTL_DEF_DRV(NOUVEAU_SVM_BIND, nouveau_svmm_bind, DRM_RENDER_ALLOW), 1309 1310 DRM_IOCTL_DEF_DRV(NOUVEAU_GEM_NEW, nouveau_gem_ioctl_new, DRM_RENDER_ALLOW),
+66
include/uapi/drm/nouveau_drm.h
··· 432 432 __u64 push_ptr; 433 433 }; 434 434 435 + struct drm_nouveau_get_zcull_info { 436 + /** 437 + * @width_align_pixels: required alignment for region widths, in pixels 438 + * (typically #TPC's * 16). 439 + */ 440 + __u32 width_align_pixels; 441 + /** 442 + * @height_align_pixels: required alignment for region heights, in 443 + * pixels (typically 32). 444 + */ 445 + __u32 height_align_pixels; 446 + /** 447 + * @pixel_squares_by_aliquots: the pixel area covered by an aliquot 448 + * (typically #Zcull_banks * 16 * 16). 449 + */ 450 + __u32 pixel_squares_by_aliquots; 451 + /** 452 + * @aliquot_total: the total aliquot pool available in hardware 453 + */ 454 + __u32 aliquot_total; 455 + /** 456 + * @zcull_region_byte_multiplier: the size of an aliquot in bytes, which 457 + * is used for save/restore operations on a region 458 + */ 459 + __u32 zcull_region_byte_multiplier; 460 + /** 461 + * @zcull_region_header_size: the region header size in bytes, which is 462 + * used for save/restore operations on a region 463 + */ 464 + __u32 zcull_region_header_size; 465 + /** 466 + * @zcull_subregion_header_size: the subregion header size in bytes, 467 + * which is used for save/restore operations on a region 468 + */ 469 + __u32 zcull_subregion_header_size; 470 + /** 471 + * @subregion_count: the total number of subregions the hardware 472 + * supports 473 + */ 474 + __u32 subregion_count; 475 + /** 476 + * @subregion_width_align_pixels: required alignment for subregion 477 + * widths, in pixels (typically #TPC's * 16). 478 + */ 479 + __u32 subregion_width_align_pixels; 480 + /** 481 + * @subregion_height_align_pixels: required alignment for subregion 482 + * heights, in pixels 483 + */ 484 + __u32 subregion_height_align_pixels; 485 + 486 + /** 487 + * @ctxsw_size: the size, in bytes, of a zcull context switching region. 488 + * Will be zero if the kernel does not support zcull context switching. 489 + */ 490 + __u32 ctxsw_size; 491 + /** 492 + * @ctxsw_align: the alignment, in bytes, of a zcull context switching 493 + * region 494 + */ 495 + __u32 ctxsw_align; 496 + }; 497 + 435 498 #define DRM_NOUVEAU_GETPARAM 0x00 436 499 #define DRM_NOUVEAU_SETPARAM 0x01 /* deprecated */ 437 500 #define DRM_NOUVEAU_CHANNEL_ALLOC 0x02 ··· 508 445 #define DRM_NOUVEAU_VM_INIT 0x10 509 446 #define DRM_NOUVEAU_VM_BIND 0x11 510 447 #define DRM_NOUVEAU_EXEC 0x12 448 + #define DRM_NOUVEAU_GET_ZCULL_INFO 0x13 511 449 #define DRM_NOUVEAU_GEM_NEW 0x40 512 450 #define DRM_NOUVEAU_GEM_PUSHBUF 0x41 513 451 #define DRM_NOUVEAU_GEM_CPU_PREP 0x42 ··· 577 513 #define DRM_IOCTL_NOUVEAU_VM_INIT DRM_IOWR(DRM_COMMAND_BASE + DRM_NOUVEAU_VM_INIT, struct drm_nouveau_vm_init) 578 514 #define DRM_IOCTL_NOUVEAU_VM_BIND DRM_IOWR(DRM_COMMAND_BASE + DRM_NOUVEAU_VM_BIND, struct drm_nouveau_vm_bind) 579 515 #define DRM_IOCTL_NOUVEAU_EXEC DRM_IOWR(DRM_COMMAND_BASE + DRM_NOUVEAU_EXEC, struct drm_nouveau_exec) 516 + 517 + #define DRM_IOCTL_NOUVEAU_GET_ZCULL_INFO DRM_IOR (DRM_COMMAND_BASE + DRM_NOUVEAU_GET_ZCULL_INFO, struct drm_nouveau_get_zcull_info) 580 518 #if defined(__cplusplus) 581 519 } 582 520 #endif