Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

Merge tag 'x86-urgent-2025-05-17' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip

Pull misc x86 fixes from Ingo Molnar:

- Fix SEV-SNP kdump bugs

- Update the email address of Alexey Makhalov in MAINTAINERS

- Add the CPU feature flag for the Zen6 microarchitecture

- Fix typo in system message

* tag 'x86-urgent-2025-05-17' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip:
x86/mm: Remove duplicated word in warning message
x86/CPU/AMD: Add X86_FEATURE_ZEN6
x86/sev: Make sure pages are not skipped during kdump
x86/sev: Do not touch VMSA pages during SNP guest memory kdump
MAINTAINERS: Update Alexey Makhalov's email address
x86/sev: Fix operator precedence in GHCB_MSR_VMPL_REQ_LEVEL macro

+176 -96
+3 -3
MAINTAINERS
··· 18439 18439 PARAVIRT_OPS INTERFACE 18440 18440 M: Juergen Gross <jgross@suse.com> 18441 18441 R: Ajay Kaher <ajay.kaher@broadcom.com> 18442 - R: Alexey Makhalov <alexey.amakhalov@broadcom.com> 18442 + R: Alexey Makhalov <alexey.makhalov@broadcom.com> 18443 18443 R: Broadcom internal kernel review list <bcm-kernel-feedback-list@broadcom.com> 18444 18444 L: virtualization@lists.linux.dev 18445 18445 L: x86@kernel.org ··· 25924 25924 25925 25925 VMWARE HYPERVISOR INTERFACE 25926 25926 M: Ajay Kaher <ajay.kaher@broadcom.com> 25927 - M: Alexey Makhalov <alexey.amakhalov@broadcom.com> 25927 + M: Alexey Makhalov <alexey.makhalov@broadcom.com> 25928 25928 R: Broadcom internal kernel review list <bcm-kernel-feedback-list@broadcom.com> 25929 25929 L: virtualization@lists.linux.dev 25930 25930 L: x86@kernel.org ··· 25952 25952 VMWARE VIRTUAL PTP CLOCK DRIVER 25953 25953 M: Nick Shi <nick.shi@broadcom.com> 25954 25954 R: Ajay Kaher <ajay.kaher@broadcom.com> 25955 - R: Alexey Makhalov <alexey.amakhalov@broadcom.com> 25955 + R: Alexey Makhalov <alexey.makhalov@broadcom.com> 25956 25956 R: Broadcom internal kernel review list <bcm-kernel-feedback-list@broadcom.com> 25957 25957 L: netdev@vger.kernel.org 25958 25958 S: Supported
+165 -90
arch/x86/coco/sev/core.c
··· 959 959 set_pages_state(vaddr, npages, SNP_PAGE_STATE_PRIVATE); 960 960 } 961 961 962 + static int vmgexit_ap_control(u64 event, struct sev_es_save_area *vmsa, u32 apic_id) 963 + { 964 + bool create = event != SVM_VMGEXIT_AP_DESTROY; 965 + struct ghcb_state state; 966 + unsigned long flags; 967 + struct ghcb *ghcb; 968 + int ret = 0; 969 + 970 + local_irq_save(flags); 971 + 972 + ghcb = __sev_get_ghcb(&state); 973 + 974 + vc_ghcb_invalidate(ghcb); 975 + 976 + if (create) 977 + ghcb_set_rax(ghcb, vmsa->sev_features); 978 + 979 + ghcb_set_sw_exit_code(ghcb, SVM_VMGEXIT_AP_CREATION); 980 + ghcb_set_sw_exit_info_1(ghcb, 981 + ((u64)apic_id << 32) | 982 + ((u64)snp_vmpl << 16) | 983 + event); 984 + ghcb_set_sw_exit_info_2(ghcb, __pa(vmsa)); 985 + 986 + sev_es_wr_ghcb_msr(__pa(ghcb)); 987 + VMGEXIT(); 988 + 989 + if (!ghcb_sw_exit_info_1_is_valid(ghcb) || 990 + lower_32_bits(ghcb->save.sw_exit_info_1)) { 991 + pr_err("SNP AP %s error\n", (create ? "CREATE" : "DESTROY")); 992 + ret = -EINVAL; 993 + } 994 + 995 + __sev_put_ghcb(&state); 996 + 997 + local_irq_restore(flags); 998 + 999 + return ret; 1000 + } 1001 + 1002 + static int snp_set_vmsa(void *va, void *caa, int apic_id, bool make_vmsa) 1003 + { 1004 + int ret; 1005 + 1006 + if (snp_vmpl) { 1007 + struct svsm_call call = {}; 1008 + unsigned long flags; 1009 + 1010 + local_irq_save(flags); 1011 + 1012 + call.caa = this_cpu_read(svsm_caa); 1013 + call.rcx = __pa(va); 1014 + 1015 + if (make_vmsa) { 1016 + /* Protocol 0, Call ID 2 */ 1017 + call.rax = SVSM_CORE_CALL(SVSM_CORE_CREATE_VCPU); 1018 + call.rdx = __pa(caa); 1019 + call.r8 = apic_id; 1020 + } else { 1021 + /* Protocol 0, Call ID 3 */ 1022 + call.rax = SVSM_CORE_CALL(SVSM_CORE_DELETE_VCPU); 1023 + } 1024 + 1025 + ret = svsm_perform_call_protocol(&call); 1026 + 1027 + local_irq_restore(flags); 1028 + } else { 1029 + /* 1030 + * If the kernel runs at VMPL0, it can change the VMSA 1031 + * bit for a page using the RMPADJUST instruction. 1032 + * However, for the instruction to succeed it must 1033 + * target the permissions of a lesser privileged (higher 1034 + * numbered) VMPL level, so use VMPL1. 1035 + */ 1036 + u64 attrs = 1; 1037 + 1038 + if (make_vmsa) 1039 + attrs |= RMPADJUST_VMSA_PAGE_BIT; 1040 + 1041 + ret = rmpadjust((unsigned long)va, RMP_PG_SIZE_4K, attrs); 1042 + } 1043 + 1044 + return ret; 1045 + } 1046 + 1047 + static void snp_cleanup_vmsa(struct sev_es_save_area *vmsa, int apic_id) 1048 + { 1049 + int err; 1050 + 1051 + err = snp_set_vmsa(vmsa, NULL, apic_id, false); 1052 + if (err) 1053 + pr_err("clear VMSA page failed (%u), leaking page\n", err); 1054 + else 1055 + free_page((unsigned long)vmsa); 1056 + } 1057 + 962 1058 static void set_pte_enc(pte_t *kpte, int level, void *va) 963 1059 { 964 1060 struct pte_enc_desc d = { ··· 1101 1005 data = per_cpu(runtime_data, cpu); 1102 1006 ghcb = (unsigned long)&data->ghcb_page; 1103 1007 1104 - if (addr <= ghcb && ghcb <= addr + size) { 1008 + /* Handle the case of a huge page containing the GHCB page */ 1009 + if (addr <= ghcb && ghcb < addr + size) { 1105 1010 skipped_addr = true; 1106 1011 break; 1107 1012 } ··· 1152 1055 pr_warn("Failed to stop shared<->private conversions\n"); 1153 1056 } 1154 1057 1058 + /* 1059 + * Shutdown all APs except the one handling kexec/kdump and clearing 1060 + * the VMSA tag on AP's VMSA pages as they are not being used as 1061 + * VMSA page anymore. 1062 + */ 1063 + static void shutdown_all_aps(void) 1064 + { 1065 + struct sev_es_save_area *vmsa; 1066 + int apic_id, this_cpu, cpu; 1067 + 1068 + this_cpu = get_cpu(); 1069 + 1070 + /* 1071 + * APs are already in HLT loop when enc_kexec_finish() callback 1072 + * is invoked. 1073 + */ 1074 + for_each_present_cpu(cpu) { 1075 + vmsa = per_cpu(sev_vmsa, cpu); 1076 + 1077 + /* 1078 + * The BSP or offlined APs do not have guest allocated VMSA 1079 + * and there is no need to clear the VMSA tag for this page. 1080 + */ 1081 + if (!vmsa) 1082 + continue; 1083 + 1084 + /* 1085 + * Cannot clear the VMSA tag for the currently running vCPU. 1086 + */ 1087 + if (this_cpu == cpu) { 1088 + unsigned long pa; 1089 + struct page *p; 1090 + 1091 + pa = __pa(vmsa); 1092 + /* 1093 + * Mark the VMSA page of the running vCPU as offline 1094 + * so that is excluded and not touched by makedumpfile 1095 + * while generating vmcore during kdump. 1096 + */ 1097 + p = pfn_to_online_page(pa >> PAGE_SHIFT); 1098 + if (p) 1099 + __SetPageOffline(p); 1100 + continue; 1101 + } 1102 + 1103 + apic_id = cpuid_to_apicid[cpu]; 1104 + 1105 + /* 1106 + * Issue AP destroy to ensure AP gets kicked out of guest mode 1107 + * to allow using RMPADJUST to remove the VMSA tag on it's 1108 + * VMSA page. 1109 + */ 1110 + vmgexit_ap_control(SVM_VMGEXIT_AP_DESTROY, vmsa, apic_id); 1111 + snp_cleanup_vmsa(vmsa, apic_id); 1112 + } 1113 + 1114 + put_cpu(); 1115 + } 1116 + 1155 1117 void snp_kexec_finish(void) 1156 1118 { 1157 1119 struct sev_es_runtime_data *data; 1120 + unsigned long size, addr; 1158 1121 unsigned int level, cpu; 1159 - unsigned long size; 1160 1122 struct ghcb *ghcb; 1161 1123 pte_t *pte; 1162 1124 ··· 1224 1068 1225 1069 if (!IS_ENABLED(CONFIG_KEXEC_CORE)) 1226 1070 return; 1071 + 1072 + shutdown_all_aps(); 1227 1073 1228 1074 unshare_all_memory(); 1229 1075 ··· 1243 1085 ghcb = &data->ghcb_page; 1244 1086 pte = lookup_address((unsigned long)ghcb, &level); 1245 1087 size = page_level_size(level); 1246 - set_pte_enc(pte, level, (void *)ghcb); 1247 - snp_set_memory_private((unsigned long)ghcb, (size / PAGE_SIZE)); 1088 + /* Handle the case of a huge page containing the GHCB page */ 1089 + addr = (unsigned long)ghcb & page_level_mask(level); 1090 + set_pte_enc(pte, level, (void *)addr); 1091 + snp_set_memory_private(addr, (size / PAGE_SIZE)); 1248 1092 } 1249 - } 1250 - 1251 - static int snp_set_vmsa(void *va, void *caa, int apic_id, bool make_vmsa) 1252 - { 1253 - int ret; 1254 - 1255 - if (snp_vmpl) { 1256 - struct svsm_call call = {}; 1257 - unsigned long flags; 1258 - 1259 - local_irq_save(flags); 1260 - 1261 - call.caa = this_cpu_read(svsm_caa); 1262 - call.rcx = __pa(va); 1263 - 1264 - if (make_vmsa) { 1265 - /* Protocol 0, Call ID 2 */ 1266 - call.rax = SVSM_CORE_CALL(SVSM_CORE_CREATE_VCPU); 1267 - call.rdx = __pa(caa); 1268 - call.r8 = apic_id; 1269 - } else { 1270 - /* Protocol 0, Call ID 3 */ 1271 - call.rax = SVSM_CORE_CALL(SVSM_CORE_DELETE_VCPU); 1272 - } 1273 - 1274 - ret = svsm_perform_call_protocol(&call); 1275 - 1276 - local_irq_restore(flags); 1277 - } else { 1278 - /* 1279 - * If the kernel runs at VMPL0, it can change the VMSA 1280 - * bit for a page using the RMPADJUST instruction. 1281 - * However, for the instruction to succeed it must 1282 - * target the permissions of a lesser privileged (higher 1283 - * numbered) VMPL level, so use VMPL1. 1284 - */ 1285 - u64 attrs = 1; 1286 - 1287 - if (make_vmsa) 1288 - attrs |= RMPADJUST_VMSA_PAGE_BIT; 1289 - 1290 - ret = rmpadjust((unsigned long)va, RMP_PG_SIZE_4K, attrs); 1291 - } 1292 - 1293 - return ret; 1294 1093 } 1295 1094 1296 1095 #define __ATTR_BASE (SVM_SELECTOR_P_MASK | SVM_SELECTOR_S_MASK) ··· 1281 1166 return page_address(p + 1); 1282 1167 } 1283 1168 1284 - static void snp_cleanup_vmsa(struct sev_es_save_area *vmsa, int apic_id) 1285 - { 1286 - int err; 1287 - 1288 - err = snp_set_vmsa(vmsa, NULL, apic_id, false); 1289 - if (err) 1290 - pr_err("clear VMSA page failed (%u), leaking page\n", err); 1291 - else 1292 - free_page((unsigned long)vmsa); 1293 - } 1294 - 1295 1169 static int wakeup_cpu_via_vmgexit(u32 apic_id, unsigned long start_ip) 1296 1170 { 1297 1171 struct sev_es_save_area *cur_vmsa, *vmsa; 1298 - struct ghcb_state state; 1299 1172 struct svsm_ca *caa; 1300 - unsigned long flags; 1301 - struct ghcb *ghcb; 1302 1173 u8 sipi_vector; 1303 1174 int cpu, ret; 1304 1175 u64 cr4; ··· 1398 1297 } 1399 1298 1400 1299 /* Issue VMGEXIT AP Creation NAE event */ 1401 - local_irq_save(flags); 1402 - 1403 - ghcb = __sev_get_ghcb(&state); 1404 - 1405 - vc_ghcb_invalidate(ghcb); 1406 - ghcb_set_rax(ghcb, vmsa->sev_features); 1407 - ghcb_set_sw_exit_code(ghcb, SVM_VMGEXIT_AP_CREATION); 1408 - ghcb_set_sw_exit_info_1(ghcb, 1409 - ((u64)apic_id << 32) | 1410 - ((u64)snp_vmpl << 16) | 1411 - SVM_VMGEXIT_AP_CREATE); 1412 - ghcb_set_sw_exit_info_2(ghcb, __pa(vmsa)); 1413 - 1414 - sev_es_wr_ghcb_msr(__pa(ghcb)); 1415 - VMGEXIT(); 1416 - 1417 - if (!ghcb_sw_exit_info_1_is_valid(ghcb) || 1418 - lower_32_bits(ghcb->save.sw_exit_info_1)) { 1419 - pr_err("SNP AP Creation error\n"); 1420 - ret = -EINVAL; 1421 - } 1422 - 1423 - __sev_put_ghcb(&state); 1424 - 1425 - local_irq_restore(flags); 1426 - 1427 - /* Perform cleanup if there was an error */ 1300 + ret = vmgexit_ap_control(SVM_VMGEXIT_AP_CREATE, vmsa, apic_id); 1428 1301 if (ret) { 1429 1302 snp_cleanup_vmsa(vmsa, apic_id); 1430 1303 vmsa = NULL;
+1 -1
arch/x86/include/asm/cpufeatures.h
··· 75 75 #define X86_FEATURE_CENTAUR_MCR ( 3*32+ 3) /* "centaur_mcr" Centaur MCRs (= MTRRs) */ 76 76 #define X86_FEATURE_K8 ( 3*32+ 4) /* Opteron, Athlon64 */ 77 77 #define X86_FEATURE_ZEN5 ( 3*32+ 5) /* CPU based on Zen5 microarchitecture */ 78 - /* Free ( 3*32+ 6) */ 78 + #define X86_FEATURE_ZEN6 ( 3*32+ 6) /* CPU based on Zen6 microarchitecture */ 79 79 /* Free ( 3*32+ 7) */ 80 80 #define X86_FEATURE_CONSTANT_TSC ( 3*32+ 8) /* "constant_tsc" TSC ticks at a constant rate */ 81 81 #define X86_FEATURE_UP ( 3*32+ 9) /* "up" SMP kernel running on UP */
+1 -1
arch/x86/include/asm/sev-common.h
··· 116 116 #define GHCB_MSR_VMPL_REQ 0x016 117 117 #define GHCB_MSR_VMPL_REQ_LEVEL(v) \ 118 118 /* GHCBData[39:32] */ \ 119 - (((u64)(v) & GENMASK_ULL(7, 0) << 32) | \ 119 + ((((u64)(v) & GENMASK_ULL(7, 0)) << 32) | \ 120 120 /* GHCBDdata[11:0] */ \ 121 121 GHCB_MSR_VMPL_REQ) 122 122
+5
arch/x86/kernel/cpu/amd.c
··· 472 472 case 0x60 ... 0x7f: 473 473 setup_force_cpu_cap(X86_FEATURE_ZEN5); 474 474 break; 475 + case 0x50 ... 0x5f: 476 + case 0x90 ... 0xaf: 477 + case 0xc0 ... 0xcf: 478 + setup_force_cpu_cap(X86_FEATURE_ZEN6); 479 + break; 475 480 default: 476 481 goto warn; 477 482 }
+1 -1
arch/x86/mm/init_32.c
··· 566 566 "only %luMB highmem pages available, ignoring highmem size of %luMB!\n" 567 567 568 568 #define MSG_HIGHMEM_TRIMMED \ 569 - "Warning: only 4GB will be used. Support for for CONFIG_HIGHMEM64G was removed!\n" 569 + "Warning: only 4GB will be used. Support for CONFIG_HIGHMEM64G was removed!\n" 570 570 /* 571 571 * We have more RAM than fits into lowmem - we try to put it into 572 572 * highmem, also taking the highmem=x boot parameter into account: