Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

x86: move ZMM exclusion list into CPU feature flag

Lift zmm_exclusion_list in aesni-intel_glue.c into the x86 CPU setup
code, and add a new x86 CPU feature flag X86_FEATURE_PREFER_YMM that is
set when the CPU is on this list.

This allows other code in arch/x86/, such as the CRC library code, to
apply the same exclusion list when deciding whether to execute 256-bit
or 512-bit optimized functions.

Note that full AVX512 support including ZMM registers is still exposed
to userspace and is still supported for in-kernel use. This flag just
indicates whether in-kernel code should prefer to use YMM registers.

Acked-by: Ard Biesheuvel <ardb@kernel.org>
Acked-by: Ingo Molnar <mingo@kernel.org>
Acked-by: Keith Busch <kbusch@kernel.org>
Reviewed-by: "Martin K. Petersen" <martin.petersen@oracle.com>
Link: https://lore.kernel.org/r/20250210174540.161705-2-ebiggers@kernel.org
Signed-off-by: Eric Biggers <ebiggers@google.com>

+24 -21
+1 -21
arch/x86/crypto/aesni-intel_glue.c
··· 1536 1536 AES_GCM_KEY_AVX10_SIZE, 800); 1537 1537 #endif /* CONFIG_AS_VAES && CONFIG_AS_VPCLMULQDQ */ 1538 1538 1539 - /* 1540 - * This is a list of CPU models that are known to suffer from downclocking when 1541 - * zmm registers (512-bit vectors) are used. On these CPUs, the AES mode 1542 - * implementations with zmm registers won't be used by default. Implementations 1543 - * with ymm registers (256-bit vectors) will be used by default instead. 1544 - */ 1545 - static const struct x86_cpu_id zmm_exclusion_list[] = { 1546 - X86_MATCH_VFM(INTEL_SKYLAKE_X, 0), 1547 - X86_MATCH_VFM(INTEL_ICELAKE_X, 0), 1548 - X86_MATCH_VFM(INTEL_ICELAKE_D, 0), 1549 - X86_MATCH_VFM(INTEL_ICELAKE, 0), 1550 - X86_MATCH_VFM(INTEL_ICELAKE_L, 0), 1551 - X86_MATCH_VFM(INTEL_ICELAKE_NNPI, 0), 1552 - X86_MATCH_VFM(INTEL_TIGERLAKE_L, 0), 1553 - X86_MATCH_VFM(INTEL_TIGERLAKE, 0), 1554 - /* Allow Rocket Lake and later, and Sapphire Rapids and later. */ 1555 - /* Also allow AMD CPUs (starting with Zen 4, the first with AVX-512). */ 1556 - {}, 1557 - }; 1558 - 1559 1539 static int __init register_avx_algs(void) 1560 1540 { 1561 1541 int err; ··· 1580 1600 if (err) 1581 1601 return err; 1582 1602 1583 - if (x86_match_cpu(zmm_exclusion_list)) { 1603 + if (boot_cpu_has(X86_FEATURE_PREFER_YMM)) { 1584 1604 int i; 1585 1605 1586 1606 aes_xts_alg_vaes_avx10_512.base.cra_priority = 1;
+1
arch/x86/include/asm/cpufeatures.h
··· 483 483 #define X86_FEATURE_AMD_FAST_CPPC (21*32 + 5) /* Fast CPPC */ 484 484 #define X86_FEATURE_AMD_HETEROGENEOUS_CORES (21*32 + 6) /* Heterogeneous Core Topology */ 485 485 #define X86_FEATURE_AMD_WORKLOAD_CLASS (21*32 + 7) /* Workload Classification */ 486 + #define X86_FEATURE_PREFER_YMM (21*32 + 8) /* Avoid ZMM registers due to downclocking */ 486 487 487 488 /* 488 489 * BUG word(s)
+22
arch/x86/kernel/cpu/intel.c
··· 521 521 wrmsrl(MSR_MISC_FEATURES_ENABLES, msr); 522 522 } 523 523 524 + /* 525 + * This is a list of Intel CPUs that are known to suffer from downclocking when 526 + * ZMM registers (512-bit vectors) are used. On these CPUs, when the kernel 527 + * executes SIMD-optimized code such as cryptography functions or CRCs, it 528 + * should prefer 256-bit (YMM) code to 512-bit (ZMM) code. 529 + */ 530 + static const struct x86_cpu_id zmm_exclusion_list[] = { 531 + X86_MATCH_VFM(INTEL_SKYLAKE_X, 0), 532 + X86_MATCH_VFM(INTEL_ICELAKE_X, 0), 533 + X86_MATCH_VFM(INTEL_ICELAKE_D, 0), 534 + X86_MATCH_VFM(INTEL_ICELAKE, 0), 535 + X86_MATCH_VFM(INTEL_ICELAKE_L, 0), 536 + X86_MATCH_VFM(INTEL_ICELAKE_NNPI, 0), 537 + X86_MATCH_VFM(INTEL_TIGERLAKE_L, 0), 538 + X86_MATCH_VFM(INTEL_TIGERLAKE, 0), 539 + /* Allow Rocket Lake and later, and Sapphire Rapids and later. */ 540 + {}, 541 + }; 542 + 524 543 static void init_intel(struct cpuinfo_x86 *c) 525 544 { 526 545 early_init_intel(c); ··· 619 600 strcpy(c->x86_model_id, p); 620 601 } 621 602 #endif 603 + 604 + if (x86_match_cpu(zmm_exclusion_list)) 605 + set_cpu_cap(c, X86_FEATURE_PREFER_YMM); 622 606 623 607 /* Work around errata */ 624 608 srat_detect_node(c);