Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

Merge tag 'x86_urgent_for_v6.12_rc4' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip

Pull x86 fixes from Borislav Petkov:

- Explicitly disable the TSC deadline timer when going idle to address
some CPU errata in that area

- Do not apply the Zenbleed fix on anything else except AMD Zen2 on the
late microcode loading path

- Clear CPU buffers later in the NMI exit path on 32-bit to avoid
register clearing while they still contain sensitive data, for the
RDFS mitigation

- Do not clobber EFLAGS.ZF with VERW on the opportunistic SYSRET exit
path on 32-bit

- Fix parsing issues of memory bandwidth specification in sysfs for
resctrl's memory bandwidth allocation feature

- Other small cleanups and improvements

* tag 'x86_urgent_for_v6.12_rc4' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip:
x86/apic: Always explicitly disarm TSC-deadline timer
x86/CPU/AMD: Only apply Zenbleed fix for Zen2 during late microcode load
x86/bugs: Use code segment selector for VERW operand
x86/entry_32: Clear CPU buffers after register restore in NMI return
x86/entry_32: Do not clobber user EFLAGS.ZF
x86/resctrl: Annotate get_mem_config() functions as __init
x86/resctrl: Avoid overflow in MB settings in bw_validate()
x86/amd_nb: Add new PCI ID for AMD family 1Ah model 20h

+47 -16
+4 -2
arch/x86/entry/entry_32.S
··· 871 871 872 872 /* Now ready to switch the cr3 */ 873 873 SWITCH_TO_USER_CR3 scratch_reg=%eax 874 + /* Clobbers ZF */ 875 + CLEAR_CPU_BUFFERS 874 876 875 877 /* 876 878 * Restore all flags except IF. (We restore IF separately because ··· 883 881 BUG_IF_WRONG_CR3 no_user_check=1 884 882 popfl 885 883 popl %eax 886 - CLEAR_CPU_BUFFERS 887 884 888 885 /* 889 886 * Return back to the vDSO, which will pop ecx and edx. ··· 1145 1144 1146 1145 /* Not on SYSENTER stack. */ 1147 1146 call exc_nmi 1148 - CLEAR_CPU_BUFFERS 1149 1147 jmp .Lnmi_return 1150 1148 1151 1149 .Lnmi_from_sysenter_stack: ··· 1165 1165 1166 1166 CHECK_AND_APPLY_ESPFIX 1167 1167 RESTORE_ALL_NMI cr3_reg=%edi pop=4 1168 + CLEAR_CPU_BUFFERS 1168 1169 jmp .Lirq_return 1169 1170 1170 1171 #ifdef CONFIG_X86_ESPFIX32 ··· 1207 1206 * 1 - orig_ax 1208 1207 */ 1209 1208 lss (1+5+6)*4(%esp), %esp # back to espfix stack 1209 + CLEAR_CPU_BUFFERS 1210 1210 jmp .Lirq_return 1211 1211 #endif 1212 1212 SYM_CODE_END(asm_exc_nmi)
+10 -1
arch/x86/include/asm/nospec-branch.h
··· 323 323 * Note: Only the memory operand variant of VERW clears the CPU buffers. 324 324 */ 325 325 .macro CLEAR_CPU_BUFFERS 326 - ALTERNATIVE "", __stringify(verw _ASM_RIP(mds_verw_sel)), X86_FEATURE_CLEAR_CPU_BUF 326 + #ifdef CONFIG_X86_64 327 + ALTERNATIVE "", "verw mds_verw_sel(%rip)", X86_FEATURE_CLEAR_CPU_BUF 328 + #else 329 + /* 330 + * In 32bit mode, the memory operand must be a %cs reference. The data 331 + * segments may not be usable (vm86 mode), and the stack segment may not 332 + * be flat (ESPFIX32). 333 + */ 334 + ALTERNATIVE "", "verw %cs:mds_verw_sel", X86_FEATURE_CLEAR_CPU_BUF 335 + #endif 327 336 .endm 328 337 329 338 #ifdef CONFIG_X86_64
+2
arch/x86/kernel/amd_nb.c
··· 44 44 #define PCI_DEVICE_ID_AMD_19H_M70H_DF_F4 0x14f4 45 45 #define PCI_DEVICE_ID_AMD_19H_M78H_DF_F4 0x12fc 46 46 #define PCI_DEVICE_ID_AMD_1AH_M00H_DF_F4 0x12c4 47 + #define PCI_DEVICE_ID_AMD_1AH_M20H_DF_F4 0x16fc 47 48 #define PCI_DEVICE_ID_AMD_1AH_M60H_DF_F4 0x124c 48 49 #define PCI_DEVICE_ID_AMD_1AH_M70H_DF_F4 0x12bc 49 50 #define PCI_DEVICE_ID_AMD_MI200_DF_F4 0x14d4 ··· 128 127 { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_19H_M78H_DF_F4) }, 129 128 { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_CNB17H_F4) }, 130 129 { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_1AH_M00H_DF_F4) }, 130 + { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_1AH_M20H_DF_F4) }, 131 131 { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_1AH_M60H_DF_F4) }, 132 132 { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_1AH_M70H_DF_F4) }, 133 133 { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_MI200_DF_F4) },
+13 -1
arch/x86/kernel/apic/apic.c
··· 440 440 v = apic_read(APIC_LVTT); 441 441 v |= (APIC_LVT_MASKED | LOCAL_TIMER_VECTOR); 442 442 apic_write(APIC_LVTT, v); 443 - apic_write(APIC_TMICT, 0); 443 + 444 + /* 445 + * Setting APIC_LVT_MASKED (above) should be enough to tell 446 + * the hardware that this timer will never fire. But AMD 447 + * erratum 411 and some Intel CPU behavior circa 2024 say 448 + * otherwise. Time for belt and suspenders programming: mask 449 + * the timer _and_ zero the counter registers: 450 + */ 451 + if (v & APIC_LVT_TIMER_TSCDEADLINE) 452 + wrmsrl(MSR_IA32_TSC_DEADLINE, 0); 453 + else 454 + apic_write(APIC_TMICT, 0); 455 + 444 456 return 0; 445 457 } 446 458
+2 -1
arch/x86/kernel/cpu/amd.c
··· 1202 1202 if (boot_cpu_data.x86_vendor != X86_VENDOR_AMD) 1203 1203 return; 1204 1204 1205 - on_each_cpu(zenbleed_check_cpu, NULL, 1); 1205 + if (cpu_feature_enabled(X86_FEATURE_ZEN2)) 1206 + on_each_cpu(zenbleed_check_cpu, NULL, 1); 1206 1207 }
+2 -2
arch/x86/kernel/cpu/resctrl/core.c
··· 207 207 return false; 208 208 } 209 209 210 - static bool __get_mem_config_intel(struct rdt_resource *r) 210 + static __init bool __get_mem_config_intel(struct rdt_resource *r) 211 211 { 212 212 struct rdt_hw_resource *hw_res = resctrl_to_arch_res(r); 213 213 union cpuid_0x10_3_eax eax; ··· 241 241 return true; 242 242 } 243 243 244 - static bool __rdt_get_mem_config_amd(struct rdt_resource *r) 244 + static __init bool __rdt_get_mem_config_amd(struct rdt_resource *r) 245 245 { 246 246 struct rdt_hw_resource *hw_res = resctrl_to_arch_res(r); 247 247 u32 eax, ebx, ecx, edx, subleaf;
+14 -9
arch/x86/kernel/cpu/resctrl/ctrlmondata.c
··· 29 29 * hardware. The allocated bandwidth percentage is rounded to the next 30 30 * control step available on the hardware. 31 31 */ 32 - static bool bw_validate(char *buf, unsigned long *data, struct rdt_resource *r) 32 + static bool bw_validate(char *buf, u32 *data, struct rdt_resource *r) 33 33 { 34 - unsigned long bw; 35 34 int ret; 35 + u32 bw; 36 36 37 37 /* 38 38 * Only linear delay values is supported for current Intel SKUs. ··· 42 42 return false; 43 43 } 44 44 45 - ret = kstrtoul(buf, 10, &bw); 45 + ret = kstrtou32(buf, 10, &bw); 46 46 if (ret) { 47 - rdt_last_cmd_printf("Non-decimal digit in MB value %s\n", buf); 47 + rdt_last_cmd_printf("Invalid MB value %s\n", buf); 48 48 return false; 49 49 } 50 50 51 - if ((bw < r->membw.min_bw || bw > r->default_ctrl) && 52 - !is_mba_sc(r)) { 53 - rdt_last_cmd_printf("MB value %ld out of range [%d,%d]\n", bw, 54 - r->membw.min_bw, r->default_ctrl); 51 + /* Nothing else to do if software controller is enabled. */ 52 + if (is_mba_sc(r)) { 53 + *data = bw; 54 + return true; 55 + } 56 + 57 + if (bw < r->membw.min_bw || bw > r->default_ctrl) { 58 + rdt_last_cmd_printf("MB value %u out of range [%d,%d]\n", 59 + bw, r->membw.min_bw, r->default_ctrl); 55 60 return false; 56 61 } 57 62 ··· 70 65 struct resctrl_staged_config *cfg; 71 66 u32 closid = data->rdtgrp->closid; 72 67 struct rdt_resource *r = s->res; 73 - unsigned long bw_val; 68 + u32 bw_val; 74 69 75 70 cfg = &d->staged_config[s->conf_type]; 76 71 if (cfg->have_new_ctrl) {