Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

Merge tag 'x86-urgent-2024-08-04' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip

Pull x86 fixes from Thomas Gleixner:

- Prevent a deadlock on cpu_hotplug_lock in the aperf/mperf driver.

A recent change in the ACPI code which consolidated code pathes moved
the invocation of init_freq_invariance_cppc() to be moved to a CPU
hotplug handler. The first invocation on AMD CPUs ends up enabling a
static branch which dead locks because the static branch enable tries
to acquire cpu_hotplug_lock but that lock is already held write by
the hotplug machinery.

Use static_branch_enable_cpuslocked() instead and take the hotplug
lock read for the Intel code path which is invoked from the
architecture code outside of the CPU hotplug operations.

- Fix the number of reserved bits in the sev_config structure bit field
so that the bitfield does not exceed 64 bit.

- Add missing Zen5 model numbers

- Fix the alignment assumptions of pti_clone_pgtable() and
clone_entry_text() on 32-bit:

The code assumes PMD aligned code sections, but on 32-bit the kernel
entry text is not PMD aligned. So depending on the code size and
location, which is configuration and compiler dependent, entry text
can cross a PMD boundary. As the start is not PMD aligned adding PMD
size to the start address is larger than the end address which
results in partially mapped entry code for user space. That causes
endless recursion on the first entry from userspace (usually #PF).

Cure this by aligning the start address in the addition so it ends up
at the next PMD start address.

clone_entry_text() enforces PMD mapping, but on 32-bit the tail might
eventually be PTE mapped, which causes a map fail because the PMD for
the tail is not a large page mapping. Use PTI_LEVEL_KERNEL_IMAGE for
the clone() invocation which resolves to PTE on 32-bit and PMD on
64-bit.

- Zero the 8-byte case for get_user() on range check failure on 32-bit

The recend consolidation of the 8-byte get_user() case broke the
zeroing in the failure case again. Establish it by clearing ECX
before the range check and not afterwards as that obvioulsy can't be
reached when the range check fails

* tag 'x86-urgent-2024-08-04' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip:
x86/uaccess: Zero the 8-byte get_range case on failure on 32-bit
x86/mm: Fix pti_clone_entry_text() for i386
x86/mm: Fix pti_clone_pgtable() alignment assumption
x86/setup: Parse the builtin command line before merging
x86/CPU/AMD: Add models 0x60-0x6f to the Zen5 range
x86/sev: Fix __reserved field in sev_config
x86/aperfmperf: Fix deadlock on cpu_hotplug_lock

+36 -17
+1 -1
arch/x86/coco/sev/core.c
··· 163 163 */ 164 164 use_cas : 1, 165 165 166 - __reserved : 62; 166 + __reserved : 61; 167 167 }; 168 168 169 169 static struct sev_config sev_cfg __read_mostly;
+4
arch/x86/include/asm/cmdline.h
··· 2 2 #ifndef _ASM_X86_CMDLINE_H 3 3 #define _ASM_X86_CMDLINE_H 4 4 5 + #include <asm/setup.h> 6 + 7 + extern char builtin_cmdline[COMMAND_LINE_SIZE]; 8 + 5 9 int cmdline_find_option_bool(const char *cmdline_ptr, const char *option); 6 10 int cmdline_find_option(const char *cmdline_ptr, const char *option, 7 11 char *buffer, int bufsize);
+1 -1
arch/x86/kernel/cpu/amd.c
··· 462 462 switch (c->x86_model) { 463 463 case 0x00 ... 0x2f: 464 464 case 0x40 ... 0x4f: 465 - case 0x70 ... 0x7f: 465 + case 0x60 ... 0x7f: 466 466 setup_force_cpu_cap(X86_FEATURE_ZEN5); 467 467 break; 468 468 default:
+4 -2
arch/x86/kernel/cpu/aperfmperf.c
··· 306 306 WARN_ON_ONCE(1); 307 307 return; 308 308 } 309 - static_branch_enable(&arch_scale_freq_key); 309 + static_branch_enable_cpuslocked(&arch_scale_freq_key); 310 310 register_freq_invariance_syscore_ops(); 311 311 pr_info("Estimated ratio of average max frequency by base frequency (times 1024): %llu\n", arch_max_freq_ratio); 312 312 } ··· 323 323 if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL) 324 324 return; 325 325 326 - if (intel_set_max_freq_ratio()) 326 + if (intel_set_max_freq_ratio()) { 327 + guard(cpus_read_lock)(); 327 328 freq_invariance_enable(); 329 + } 328 330 } 329 331 330 332 static void disable_freq_invariance_workfn(struct work_struct *work)
+1 -1
arch/x86/kernel/setup.c
··· 164 164 165 165 static char __initdata command_line[COMMAND_LINE_SIZE]; 166 166 #ifdef CONFIG_CMDLINE_BOOL 167 - static char __initdata builtin_cmdline[COMMAND_LINE_SIZE] = CONFIG_CMDLINE; 167 + char builtin_cmdline[COMMAND_LINE_SIZE] = CONFIG_CMDLINE; 168 168 bool builtin_cmdline_added __ro_after_init; 169 169 #endif 170 170
+18 -7
arch/x86/lib/cmdline.c
··· 207 207 208 208 int cmdline_find_option_bool(const char *cmdline, const char *option) 209 209 { 210 - if (IS_ENABLED(CONFIG_CMDLINE_BOOL)) 211 - WARN_ON_ONCE(!builtin_cmdline_added); 210 + int ret; 212 211 213 - return __cmdline_find_option_bool(cmdline, COMMAND_LINE_SIZE, option); 212 + ret = __cmdline_find_option_bool(cmdline, COMMAND_LINE_SIZE, option); 213 + if (ret > 0) 214 + return ret; 215 + 216 + if (IS_ENABLED(CONFIG_CMDLINE_BOOL) && !builtin_cmdline_added) 217 + return __cmdline_find_option_bool(builtin_cmdline, COMMAND_LINE_SIZE, option); 218 + 219 + return ret; 214 220 } 215 221 216 222 int cmdline_find_option(const char *cmdline, const char *option, char *buffer, 217 223 int bufsize) 218 224 { 219 - if (IS_ENABLED(CONFIG_CMDLINE_BOOL)) 220 - WARN_ON_ONCE(!builtin_cmdline_added); 225 + int ret; 221 226 222 - return __cmdline_find_option(cmdline, COMMAND_LINE_SIZE, option, 223 - buffer, bufsize); 227 + ret = __cmdline_find_option(cmdline, COMMAND_LINE_SIZE, option, buffer, bufsize); 228 + if (ret > 0) 229 + return ret; 230 + 231 + if (IS_ENABLED(CONFIG_CMDLINE_BOOL) && !builtin_cmdline_added) 232 + return __cmdline_find_option(builtin_cmdline, COMMAND_LINE_SIZE, option, buffer, bufsize); 233 + 234 + return ret; 224 235 }
+3 -1
arch/x86/lib/getuser.S
··· 88 88 EXPORT_SYMBOL(__get_user_4) 89 89 90 90 SYM_FUNC_START(__get_user_8) 91 + #ifndef CONFIG_X86_64 92 + xor %ecx,%ecx 93 + #endif 91 94 check_range size=8 92 95 ASM_STAC 93 96 #ifdef CONFIG_X86_64 94 97 UACCESS movq (%_ASM_AX),%rdx 95 98 #else 96 - xor %ecx,%ecx 97 99 UACCESS movl (%_ASM_AX),%edx 98 100 UACCESS movl 4(%_ASM_AX),%ecx 99 101 #endif
+4 -4
arch/x86/mm/pti.c
··· 374 374 */ 375 375 *target_pmd = *pmd; 376 376 377 - addr += PMD_SIZE; 377 + addr = round_up(addr + 1, PMD_SIZE); 378 378 379 379 } else if (level == PTI_CLONE_PTE) { 380 380 381 381 /* Walk the page-table down to the pte level */ 382 382 pte = pte_offset_kernel(pmd, addr); 383 383 if (pte_none(*pte)) { 384 - addr += PAGE_SIZE; 384 + addr = round_up(addr + 1, PAGE_SIZE); 385 385 continue; 386 386 } 387 387 ··· 401 401 /* Clone the PTE */ 402 402 *target_pte = *pte; 403 403 404 - addr += PAGE_SIZE; 404 + addr = round_up(addr + 1, PAGE_SIZE); 405 405 406 406 } else { 407 407 BUG(); ··· 496 496 { 497 497 pti_clone_pgtable((unsigned long) __entry_text_start, 498 498 (unsigned long) __entry_text_end, 499 - PTI_CLONE_PMD); 499 + PTI_LEVEL_KERNEL_IMAGE); 500 500 } 501 501 502 502 /*