Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

Merge branch 'x86/apic' into x86/sev, to resolve conflict

Conflicts:
arch/x86/include/asm/sev-internal.h

Signed-off-by: Ingo Molnar <mingo@kernel.org>

+679 -59
+13
arch/x86/Kconfig
··· 483 483 484 484 If in doubt, say Y. 485 485 486 + config AMD_SECURE_AVIC 487 + bool "AMD Secure AVIC" 488 + depends on AMD_MEM_ENCRYPT && X86_X2APIC 489 + help 490 + Enable this to get AMD Secure AVIC support on guests that have this feature. 491 + 492 + AMD Secure AVIC provides hardware acceleration for performance sensitive 493 + APIC accesses and support for managing guest owned APIC state for SEV-SNP 494 + guests. Secure AVIC does not support xAPIC mode. It has functional 495 + dependency on x2apic being enabled in the guest. 496 + 497 + If you don't know what to do here, say N. 498 + 486 499 config X86_POSTED_MSI 487 500 bool "Enable MSI and MSI-x delivery by posted interrupts" 488 501 depends on X86_64 && IRQ_REMAP
+9 -1
arch/x86/boot/compressed/sev.c
··· 186 186 MSR_AMD64_SNP_VMSA_REG_PROT | \ 187 187 MSR_AMD64_SNP_RESERVED_BIT13 | \ 188 188 MSR_AMD64_SNP_RESERVED_BIT15 | \ 189 + MSR_AMD64_SNP_SECURE_AVIC | \ 189 190 MSR_AMD64_SNP_RESERVED_MASK) 191 + 192 + #ifdef CONFIG_AMD_SECURE_AVIC 193 + #define SNP_FEATURE_SECURE_AVIC MSR_AMD64_SNP_SECURE_AVIC 194 + #else 195 + #define SNP_FEATURE_SECURE_AVIC 0 196 + #endif 190 197 191 198 /* 192 199 * SNP_FEATURES_PRESENT is the mask of SNP features that are implemented ··· 201 194 * guest kernel, a corresponding bit should be added to the mask. 202 195 */ 203 196 #define SNP_FEATURES_PRESENT (MSR_AMD64_SNP_DEBUG_SWAP | \ 204 - MSR_AMD64_SNP_SECURE_TSC) 197 + MSR_AMD64_SNP_SECURE_TSC | \ 198 + SNP_FEATURE_SECURE_AVIC) 205 199 206 200 u64 snp_get_unsupported_features(u64 status) 207 201 {
+3
arch/x86/coco/core.c
··· 104 104 case CC_ATTR_HOST_SEV_SNP: 105 105 return cc_flags.host_sev_snp; 106 106 107 + case CC_ATTR_SNP_SECURE_AVIC: 108 + return sev_status & MSR_AMD64_SNP_SECURE_AVIC; 109 + 107 110 default: 108 111 return false; 109 112 }
+103
arch/x86/coco/sev/core.c
··· 121 121 [MSR_AMD64_SNP_IBS_VIRT_BIT] = "IBSVirt", 122 122 [MSR_AMD64_SNP_VMSA_REG_PROT_BIT] = "VMSARegProt", 123 123 [MSR_AMD64_SNP_SMT_PROT_BIT] = "SMTProt", 124 + [MSR_AMD64_SNP_SECURE_AVIC_BIT] = "SecureAVIC", 124 125 }; 125 126 126 127 /* ··· 1106 1105 vmsa->x87_ftw = AP_INIT_X87_FTW_DEFAULT; 1107 1106 vmsa->x87_fcw = AP_INIT_X87_FCW_DEFAULT; 1108 1107 1108 + if (cc_platform_has(CC_ATTR_SNP_SECURE_AVIC)) 1109 + vmsa->vintr_ctrl |= V_GIF_MASK | V_NMI_ENABLE_MASK; 1110 + 1109 1111 /* SVME must be set. */ 1110 1112 vmsa->efer = EFER_SVME; 1111 1113 ··· 1241 1237 } 1242 1238 1243 1239 return 0; 1240 + } 1241 + 1242 + u64 savic_ghcb_msr_read(u32 reg) 1243 + { 1244 + u64 msr = APIC_BASE_MSR + (reg >> 4); 1245 + struct pt_regs regs = { .cx = msr }; 1246 + struct es_em_ctxt ctxt = { .regs = &regs }; 1247 + struct ghcb_state state; 1248 + enum es_result res; 1249 + struct ghcb *ghcb; 1250 + 1251 + guard(irqsave)(); 1252 + 1253 + ghcb = __sev_get_ghcb(&state); 1254 + vc_ghcb_invalidate(ghcb); 1255 + 1256 + res = sev_es_ghcb_handle_msr(ghcb, &ctxt, false); 1257 + if (res != ES_OK) { 1258 + pr_err("Secure AVIC MSR (0x%llx) read returned error (%d)\n", msr, res); 1259 + /* MSR read failures are treated as fatal errors */ 1260 + sev_es_terminate(SEV_TERM_SET_LINUX, GHCB_TERM_SAVIC_FAIL); 1261 + } 1262 + 1263 + __sev_put_ghcb(&state); 1264 + 1265 + return regs.ax | regs.dx << 32; 1266 + } 1267 + 1268 + void savic_ghcb_msr_write(u32 reg, u64 value) 1269 + { 1270 + u64 msr = APIC_BASE_MSR + (reg >> 4); 1271 + struct pt_regs regs = { 1272 + .cx = msr, 1273 + .ax = lower_32_bits(value), 1274 + .dx = upper_32_bits(value) 1275 + }; 1276 + struct es_em_ctxt ctxt = { .regs = &regs }; 1277 + struct ghcb_state state; 1278 + enum es_result res; 1279 + struct ghcb *ghcb; 1280 + 1281 + guard(irqsave)(); 1282 + 1283 + ghcb = __sev_get_ghcb(&state); 1284 + vc_ghcb_invalidate(ghcb); 1285 + 1286 + res = sev_es_ghcb_handle_msr(ghcb, &ctxt, true); 1287 + if (res != ES_OK) { 1288 + pr_err("Secure AVIC MSR (0x%llx) write returned error (%d)\n", msr, res); 1289 + /* MSR writes should never fail. Any failure is fatal error for SNP guest */ 1290 + sev_es_terminate(SEV_TERM_SET_LINUX, GHCB_TERM_SAVIC_FAIL); 1291 + } 1292 + 1293 + __sev_put_ghcb(&state); 1294 + } 1295 + 1296 + enum es_result savic_register_gpa(u64 gpa) 1297 + { 1298 + struct ghcb_state state; 1299 + struct es_em_ctxt ctxt; 1300 + enum es_result res; 1301 + struct ghcb *ghcb; 1302 + 1303 + guard(irqsave)(); 1304 + 1305 + ghcb = __sev_get_ghcb(&state); 1306 + vc_ghcb_invalidate(ghcb); 1307 + 1308 + ghcb_set_rax(ghcb, SVM_VMGEXIT_SAVIC_SELF_GPA); 1309 + ghcb_set_rbx(ghcb, gpa); 1310 + res = sev_es_ghcb_hv_call(ghcb, &ctxt, SVM_VMGEXIT_SAVIC, 1311 + SVM_VMGEXIT_SAVIC_REGISTER_GPA, 0); 1312 + 1313 + __sev_put_ghcb(&state); 1314 + 1315 + return res; 1316 + } 1317 + 1318 + enum es_result savic_unregister_gpa(u64 *gpa) 1319 + { 1320 + struct ghcb_state state; 1321 + struct es_em_ctxt ctxt; 1322 + enum es_result res; 1323 + struct ghcb *ghcb; 1324 + 1325 + guard(irqsave)(); 1326 + 1327 + ghcb = __sev_get_ghcb(&state); 1328 + vc_ghcb_invalidate(ghcb); 1329 + 1330 + ghcb_set_rax(ghcb, SVM_VMGEXIT_SAVIC_SELF_GPA); 1331 + res = sev_es_ghcb_hv_call(ghcb, &ctxt, SVM_VMGEXIT_SAVIC, 1332 + SVM_VMGEXIT_SAVIC_UNREGISTER_GPA, 0); 1333 + if (gpa && res == ES_OK) 1334 + *gpa = ghcb->save.rbx; 1335 + 1336 + __sev_put_ghcb(&state); 1337 + 1338 + return res; 1244 1339 } 1245 1340 1246 1341 static void snp_register_per_cpu_ghcb(void)
+15 -5
arch/x86/coco/sev/vc-handle.c
··· 404 404 return ES_OK; 405 405 } 406 406 407 - static enum es_result vc_handle_msr(struct ghcb *ghcb, struct es_em_ctxt *ctxt) 407 + enum es_result sev_es_ghcb_handle_msr(struct ghcb *ghcb, struct es_em_ctxt *ctxt, bool write) 408 408 { 409 409 struct pt_regs *regs = ctxt->regs; 410 410 enum es_result ret; 411 - bool write; 412 - 413 - /* Is it a WRMSR? */ 414 - write = ctxt->insn.opcode.bytes[1] == 0x30; 415 411 416 412 switch (regs->cx) { 417 413 case MSR_SVSM_CAA: ··· 416 420 case MSR_AMD64_GUEST_TSC_FREQ: 417 421 if (sev_status & MSR_AMD64_SNP_SECURE_TSC) 418 422 return __vc_handle_secure_tsc_msrs(ctxt, write); 423 + break; 424 + case MSR_AMD64_SAVIC_CONTROL: 425 + /* 426 + * AMD64_SAVIC_CONTROL should not be intercepted when 427 + * Secure AVIC is enabled. Terminate the Secure AVIC guest 428 + * if the interception is enabled. 429 + */ 430 + if (cc_platform_has(CC_ATTR_SNP_SECURE_AVIC)) 431 + return ES_VMM_ERROR; 419 432 break; 420 433 default: 421 434 break; ··· 444 439 } 445 440 446 441 return ret; 442 + } 443 + 444 + static enum es_result vc_handle_msr(struct ghcb *ghcb, struct es_em_ctxt *ctxt) 445 + { 446 + return sev_es_ghcb_handle_msr(ghcb, ctxt, ctxt->insn.opcode.bytes[1] == 0x30); 447 447 } 448 448 449 449 static void __init vc_early_forward_exception(struct es_em_ctxt *ctxt)
+11
arch/x86/include/asm/apic.h
··· 305 305 306 306 /* Probe, setup and smpboot functions */ 307 307 int (*probe)(void); 308 + void (*setup)(void); 309 + void (*teardown)(void); 308 310 int (*acpi_madt_oem_check)(char *oem_id, char *oem_table_id); 309 311 310 312 void (*init_apic_ldr)(void); ··· 318 316 int (*wakeup_secondary_cpu)(u32 apicid, unsigned long start_eip, unsigned int cpu); 319 317 /* wakeup secondary CPU using 64-bit wakeup point */ 320 318 int (*wakeup_secondary_cpu_64)(u32 apicid, unsigned long start_eip, unsigned int cpu); 319 + 320 + void (*update_vector)(unsigned int cpu, unsigned int vector, bool set); 321 321 322 322 char *name; 323 323 }; ··· 474 470 return apic_id <= apic->max_apic_id; 475 471 } 476 472 473 + static __always_inline void apic_update_vector(unsigned int cpu, unsigned int vector, bool set) 474 + { 475 + if (apic->update_vector) 476 + apic->update_vector(cpu, vector, set); 477 + } 478 + 477 479 #else /* CONFIG_X86_LOCAL_APIC */ 478 480 479 481 static inline u32 apic_read(u32 reg) { return 0; } ··· 491 481 static inline u32 safe_apic_wait_icr_idle(void) { return 0; } 492 482 static inline void apic_native_eoi(void) { WARN_ON_ONCE(1); } 493 483 static inline void apic_setup_apic_calls(void) { } 484 + static inline void apic_update_vector(unsigned int cpu, unsigned int vector, bool set) { } 494 485 495 486 #define apic_update_callback(_callback, _fn) do { } while (0) 496 487
+2
arch/x86/include/asm/apicdef.h
··· 135 135 #define APIC_TDR_DIV_128 0xA 136 136 #define APIC_EFEAT 0x400 137 137 #define APIC_ECTRL 0x410 138 + #define APIC_SEOI 0x420 139 + #define APIC_IER 0x480 138 140 #define APIC_EILVTn(n) (0x500 + 0x10 * n) 139 141 #define APIC_EILVT_NR_AMD_K8 1 /* # of extended interrupts */ 140 142 #define APIC_EILVT_NR_AMD_10H 4
+8 -1
arch/x86/include/asm/msr-index.h
··· 699 699 #define MSR_AMD64_SNP_VMSA_REG_PROT BIT_ULL(MSR_AMD64_SNP_VMSA_REG_PROT_BIT) 700 700 #define MSR_AMD64_SNP_SMT_PROT_BIT 17 701 701 #define MSR_AMD64_SNP_SMT_PROT BIT_ULL(MSR_AMD64_SNP_SMT_PROT_BIT) 702 - #define MSR_AMD64_SNP_RESV_BIT 18 702 + #define MSR_AMD64_SNP_SECURE_AVIC_BIT 18 703 + #define MSR_AMD64_SNP_SECURE_AVIC BIT_ULL(MSR_AMD64_SNP_SECURE_AVIC_BIT) 704 + #define MSR_AMD64_SNP_RESV_BIT 19 703 705 #define MSR_AMD64_SNP_RESERVED_MASK GENMASK_ULL(63, MSR_AMD64_SNP_RESV_BIT) 706 + #define MSR_AMD64_SAVIC_CONTROL 0xc0010138 707 + #define MSR_AMD64_SAVIC_EN_BIT 0 708 + #define MSR_AMD64_SAVIC_EN BIT_ULL(MSR_AMD64_SAVIC_EN_BIT) 709 + #define MSR_AMD64_SAVIC_ALLOWEDNMI_BIT 1 710 + #define MSR_AMD64_SAVIC_ALLOWEDNMI BIT_ULL(MSR_AMD64_SAVIC_ALLOWEDNMI_BIT) 704 711 #define MSR_AMD64_RMP_BASE 0xc0010132 705 712 #define MSR_AMD64_RMP_END 0xc0010133 706 713 #define MSR_AMD64_RMP_CFG 0xc0010136
+1
arch/x86/include/asm/sev-common.h
··· 208 208 #define GHCB_TERM_SVSM_CAA 9 /* SVSM is present but CAA is not page aligned */ 209 209 #define GHCB_TERM_SECURE_TSC 10 /* Secure TSC initialization failed */ 210 210 #define GHCB_TERM_SVSM_CA_REMAP_FAIL 11 /* SVSM is present but CA could not be remapped */ 211 + #define GHCB_TERM_SAVIC_FAIL 12 /* Secure AVIC-specific failure */ 211 212 212 213 #define GHCB_RESP_CODE(v) ((v) & GHCB_MSR_INFO_MASK) 213 214
+2
arch/x86/include/asm/sev-internal.h
··· 80 80 native_wrmsr(MSR_AMD64_SEV_ES_GHCB, low, high); 81 81 } 82 82 83 + enum es_result sev_es_ghcb_handle_msr(struct ghcb *ghcb, struct es_em_ctxt *ctxt, bool write); 84 + 83 85 u64 get_hv_features(void); 84 86 85 87 const struct snp_cpuid_table *snp_cpuid_get_table(void);
+8
arch/x86/include/asm/sev.h
··· 532 532 533 533 void __init snp_secure_tsc_prepare(void); 534 534 void __init snp_secure_tsc_init(void); 535 + enum es_result savic_register_gpa(u64 gpa); 536 + enum es_result savic_unregister_gpa(u64 *gpa); 537 + u64 savic_ghcb_msr_read(u32 reg); 538 + void savic_ghcb_msr_write(u32 reg, u64 value); 535 539 536 540 static __always_inline void vc_ghcb_invalidate(struct ghcb *ghcb) 537 541 { ··· 621 617 static inline int snp_svsm_vtpm_send_command(u8 *buffer) { return -ENODEV; } 622 618 static inline void __init snp_secure_tsc_prepare(void) { } 623 619 static inline void __init snp_secure_tsc_init(void) { } 620 + static inline enum es_result savic_register_gpa(u64 gpa) { return ES_UNSUPPORTED; } 621 + static inline enum es_result savic_unregister_gpa(u64 *gpa) { return ES_UNSUPPORTED; } 622 + static inline void savic_ghcb_msr_write(u32 reg, u64 value) { } 623 + static inline u64 savic_ghcb_msr_read(u32 reg) { return 0; } 624 624 625 625 #endif /* CONFIG_AMD_MEM_ENCRYPT */ 626 626
+4
arch/x86/include/uapi/asm/svm.h
··· 118 118 #define SVM_VMGEXIT_AP_CREATE 1 119 119 #define SVM_VMGEXIT_AP_DESTROY 2 120 120 #define SVM_VMGEXIT_SNP_RUN_VMPL 0x80000018 121 + #define SVM_VMGEXIT_SAVIC 0x8000001a 122 + #define SVM_VMGEXIT_SAVIC_REGISTER_GPA 0 123 + #define SVM_VMGEXIT_SAVIC_UNREGISTER_GPA 1 124 + #define SVM_VMGEXIT_SAVIC_SELF_GPA ~0ULL 121 125 #define SVM_VMGEXIT_HV_FEATURES 0x8000fffd 122 126 #define SVM_VMGEXIT_TERM_REQUEST 0x8000fffe 123 127 #define SVM_VMGEXIT_TERM_REASON(reason_set, reason_code) \
+1
arch/x86/kernel/apic/Makefile
··· 18 18 # APIC probe will depend on the listing order here 19 19 obj-$(CONFIG_X86_NUMACHIP) += apic_numachip.o 20 20 obj-$(CONFIG_X86_UV) += x2apic_uv_x.o 21 + obj-$(CONFIG_AMD_SECURE_AVIC) += x2apic_savic.o 21 22 obj-$(CONFIG_X86_X2APIC) += x2apic_phys.o 22 23 obj-$(CONFIG_X86_X2APIC) += x2apic_cluster.o 23 24 obj-y += apic_flat_64.o
+46 -41
arch/x86/kernel/apic/apic.c
··· 592 592 0xF, ~0UL); 593 593 } else 594 594 clockevents_register_device(levt); 595 + 596 + apic_update_vector(smp_processor_id(), LOCAL_TIMER_VECTOR, true); 595 597 } 596 598 597 599 /* ··· 1170 1168 if (!apic_accessible()) 1171 1169 return; 1172 1170 1171 + if (apic->teardown) 1172 + apic->teardown(); 1173 + 1173 1174 apic_soft_disable(); 1174 1175 1175 1176 #ifdef CONFIG_X86_32 ··· 1433 1428 u32 regs[APIC_IR_REGS]; 1434 1429 }; 1435 1430 1436 - static bool apic_check_and_ack(union apic_ir *irr, union apic_ir *isr) 1431 + static bool apic_check_and_eoi_isr(union apic_ir *isr) 1437 1432 { 1438 1433 int i, bit; 1439 - 1440 - /* Read the IRRs */ 1441 - for (i = 0; i < APIC_IR_REGS; i++) 1442 - irr->regs[i] = apic_read(APIC_IRR + i * 0x10); 1443 1434 1444 1435 /* Read the ISRs */ 1445 1436 for (i = 0; i < APIC_IR_REGS; i++) 1446 1437 isr->regs[i] = apic_read(APIC_ISR + i * 0x10); 1447 1438 1448 - /* 1449 - * If the ISR map is not empty. ACK the APIC and run another round 1450 - * to verify whether a pending IRR has been unblocked and turned 1451 - * into a ISR. 1452 - */ 1453 - if (!bitmap_empty(isr->map, APIC_IR_BITS)) { 1454 - /* 1455 - * There can be multiple ISR bits set when a high priority 1456 - * interrupt preempted a lower priority one. Issue an ACK 1457 - * per set bit. 1458 - */ 1459 - for_each_set_bit(bit, isr->map, APIC_IR_BITS) 1460 - apic_eoi(); 1439 + /* If the ISR map empty, nothing to do here. */ 1440 + if (bitmap_empty(isr->map, APIC_IR_BITS)) 1461 1441 return true; 1462 - } 1463 1442 1464 - return !bitmap_empty(irr->map, APIC_IR_BITS); 1443 + /* 1444 + * There can be multiple ISR bits set when a high priority 1445 + * interrupt preempted a lower priority one. Issue an EOI for each 1446 + * set bit. The priority traversal order does not matter as there 1447 + * can't be new ISR bits raised at this point. What matters is that 1448 + * an EOI is issued for each ISR bit. 1449 + */ 1450 + for_each_set_bit(bit, isr->map, APIC_IR_BITS) 1451 + apic_eoi(); 1452 + 1453 + /* Reread the ISRs, they should be empty now */ 1454 + for (i = 0; i < APIC_IR_REGS; i++) 1455 + isr->regs[i] = apic_read(APIC_ISR + i * 0x10); 1456 + 1457 + return bitmap_empty(isr->map, APIC_IR_BITS); 1465 1458 } 1466 1459 1467 1460 /* 1468 - * After a crash, we no longer service the interrupts and a pending 1469 - * interrupt from previous kernel might still have ISR bit set. 1461 + * If a CPU services an interrupt and crashes before issuing EOI to the 1462 + * local APIC, the corresponding ISR bit is still set when the crashing CPU 1463 + * jumps into a crash kernel. Read the ISR and issue an EOI for each set 1464 + * bit to acknowledge it as otherwise these slots would be locked forever 1465 + * waiting for an EOI. 1470 1466 * 1471 - * Most probably by now the CPU has serviced that pending interrupt and it 1472 - * might not have done the apic_eoi() because it thought, interrupt 1473 - * came from i8259 as ExtInt. LAPIC did not get EOI so it does not clear 1474 - * the ISR bit and cpu thinks it has already serviced the interrupt. Hence 1475 - * a vector might get locked. It was noticed for timer irq (vector 1476 - * 0x31). Issue an extra EOI to clear ISR. 1467 + * If there are pending bits in the IRR, then they won't be converted into 1468 + * ISR bits as the CPU has interrupts disabled. They will be delivered once 1469 + * the CPU enables interrupts and there is nothing which can prevent that. 1477 1470 * 1478 - * If there are pending IRR bits they turn into ISR bits after a higher 1479 - * priority ISR bit has been acked. 1471 + * In the worst case this results in spurious interrupt warnings. 1480 1472 */ 1481 - static void apic_pending_intr_clear(void) 1473 + static void apic_clear_isr(void) 1482 1474 { 1483 - union apic_ir irr, isr; 1475 + union apic_ir ir; 1484 1476 unsigned int i; 1485 1477 1486 - /* 512 loops are way oversized and give the APIC a chance to obey. */ 1487 - for (i = 0; i < 512; i++) { 1488 - if (!apic_check_and_ack(&irr, &isr)) 1489 - return; 1490 - } 1491 - /* Dump the IRR/ISR content if that failed */ 1492 - pr_warn("APIC: Stale IRR: %256pb ISR: %256pb\n", irr.map, isr.map); 1478 + if (!apic_check_and_eoi_isr(&ir)) 1479 + pr_warn("APIC: Stale ISR: %256pb\n", ir.map); 1480 + 1481 + for (i = 0; i < APIC_IR_REGS; i++) 1482 + ir.regs[i] = apic_read(APIC_IRR + i * 0x10); 1483 + 1484 + if (!bitmap_empty(ir.map, APIC_IR_BITS)) 1485 + pr_warn("APIC: Stale IRR: %256pb\n", ir.map); 1493 1486 } 1494 1487 1495 1488 /** ··· 1505 1502 disable_ioapic_support(); 1506 1503 return; 1507 1504 } 1505 + 1506 + if (apic->setup) 1507 + apic->setup(); 1508 1508 1509 1509 /* 1510 1510 * If this comes from kexec/kcrash the APIC might be enabled in ··· 1547 1541 value |= 0x10; 1548 1542 apic_write(APIC_TASKPRI, value); 1549 1543 1550 - /* Clear eventually stale ISR/IRR bits */ 1551 - apic_pending_intr_clear(); 1544 + apic_clear_isr(); 1552 1545 1553 1546 /* 1554 1547 * Now that we are all set up, enable the APIC
+17 -11
arch/x86/kernel/apic/vector.c
··· 134 134 135 135 apicd->hw_irq_cfg.vector = vector; 136 136 apicd->hw_irq_cfg.dest_apicid = apic->calc_dest_apicid(cpu); 137 + 138 + apic_update_vector(cpu, vector, true); 139 + 137 140 irq_data_update_effective_affinity(irqd, cpumask_of(cpu)); 138 - trace_vector_config(irqd->irq, vector, cpu, 139 - apicd->hw_irq_cfg.dest_apicid); 141 + trace_vector_config(irqd->irq, vector, cpu, apicd->hw_irq_cfg.dest_apicid); 140 142 } 141 143 142 - static void apic_update_vector(struct irq_data *irqd, unsigned int newvec, 143 - unsigned int newcpu) 144 + static void apic_free_vector(unsigned int cpu, unsigned int vector, bool managed) 145 + { 146 + apic_update_vector(cpu, vector, false); 147 + irq_matrix_free(vector_matrix, cpu, vector, managed); 148 + } 149 + 150 + static void chip_data_update(struct irq_data *irqd, unsigned int newvec, unsigned int newcpu) 144 151 { 145 152 struct apic_chip_data *apicd = apic_chip_data(irqd); 146 153 struct irq_desc *desc = irq_data_to_desc(irqd); ··· 181 174 apicd->prev_cpu = apicd->cpu; 182 175 WARN_ON_ONCE(apicd->cpu == newcpu); 183 176 } else { 184 - irq_matrix_free(vector_matrix, apicd->cpu, apicd->vector, 185 - managed); 177 + apic_free_vector(apicd->cpu, apicd->vector, managed); 186 178 } 187 179 188 180 setnew: ··· 267 261 trace_vector_alloc(irqd->irq, vector, resvd, vector); 268 262 if (vector < 0) 269 263 return vector; 270 - apic_update_vector(irqd, vector, cpu); 264 + chip_data_update(irqd, vector, cpu); 271 265 272 266 return 0; 273 267 } ··· 343 337 trace_vector_alloc_managed(irqd->irq, vector, vector); 344 338 if (vector < 0) 345 339 return vector; 346 - apic_update_vector(irqd, vector, cpu); 340 + chip_data_update(irqd, vector, cpu); 347 341 348 342 return 0; 349 343 } ··· 363 357 apicd->prev_cpu); 364 358 365 359 per_cpu(vector_irq, apicd->cpu)[vector] = VECTOR_SHUTDOWN; 366 - irq_matrix_free(vector_matrix, apicd->cpu, vector, managed); 360 + apic_free_vector(apicd->cpu, vector, managed); 367 361 apicd->vector = 0; 368 362 369 363 /* Clean up move in progress */ ··· 372 366 return; 373 367 374 368 per_cpu(vector_irq, apicd->prev_cpu)[vector] = VECTOR_SHUTDOWN; 375 - irq_matrix_free(vector_matrix, apicd->prev_cpu, vector, managed); 369 + apic_free_vector(apicd->prev_cpu, vector, managed); 376 370 apicd->prev_vector = 0; 377 371 apicd->move_in_progress = 0; 378 372 hlist_del_init(&apicd->clist); ··· 911 905 * affinity mask comes online. 912 906 */ 913 907 trace_vector_free_moved(apicd->irq, cpu, vector, managed); 914 - irq_matrix_free(vector_matrix, cpu, vector, managed); 908 + apic_free_vector(cpu, vector, managed); 915 909 per_cpu(vector_irq, cpu)[vector] = VECTOR_UNUSED; 916 910 hlist_del_init(&apicd->clist); 917 911 apicd->prev_vector = 0;
+428
arch/x86/kernel/apic/x2apic_savic.c
··· 1 + // SPDX-License-Identifier: GPL-2.0-only 2 + /* 3 + * AMD Secure AVIC Support (SEV-SNP Guests) 4 + * 5 + * Copyright (C) 2024 Advanced Micro Devices, Inc. 6 + * 7 + * Author: Neeraj Upadhyay <Neeraj.Upadhyay@amd.com> 8 + */ 9 + 10 + #include <linux/cc_platform.h> 11 + #include <linux/cpumask.h> 12 + #include <linux/percpu-defs.h> 13 + #include <linux/align.h> 14 + 15 + #include <asm/apic.h> 16 + #include <asm/sev.h> 17 + 18 + #include "local.h" 19 + 20 + struct secure_avic_page { 21 + u8 regs[PAGE_SIZE]; 22 + } __aligned(PAGE_SIZE); 23 + 24 + static struct secure_avic_page __percpu *savic_page __ro_after_init; 25 + 26 + static int savic_acpi_madt_oem_check(char *oem_id, char *oem_table_id) 27 + { 28 + return x2apic_enabled() && cc_platform_has(CC_ATTR_SNP_SECURE_AVIC); 29 + } 30 + 31 + static inline void *get_reg_bitmap(unsigned int cpu, unsigned int offset) 32 + { 33 + return &per_cpu_ptr(savic_page, cpu)->regs[offset]; 34 + } 35 + 36 + static inline void update_vector(unsigned int cpu, unsigned int offset, 37 + unsigned int vector, bool set) 38 + { 39 + void *bitmap = get_reg_bitmap(cpu, offset); 40 + 41 + if (set) 42 + apic_set_vector(vector, bitmap); 43 + else 44 + apic_clear_vector(vector, bitmap); 45 + } 46 + 47 + #define SAVIC_ALLOWED_IRR 0x204 48 + 49 + /* 50 + * When Secure AVIC is enabled, RDMSR/WRMSR of the APIC registers 51 + * result in #VC exception (for non-accelerated register accesses) 52 + * with VMEXIT_AVIC_NOACCEL error code. The #VC exception handler 53 + * can read/write the x2APIC register in the guest APIC backing page. 54 + * 55 + * Since doing this would increase the latency of accessing x2APIC 56 + * registers, instead of doing RDMSR/WRMSR based accesses and 57 + * handling the APIC register reads/writes in the #VC exception handler, 58 + * the read() and write() callbacks directly read/write the APIC register 59 + * from/to the vCPU's APIC backing page. 60 + */ 61 + static u32 savic_read(u32 reg) 62 + { 63 + void *ap = this_cpu_ptr(savic_page); 64 + 65 + switch (reg) { 66 + case APIC_LVTT: 67 + case APIC_TMICT: 68 + case APIC_TMCCT: 69 + case APIC_TDCR: 70 + case APIC_LVTTHMR: 71 + case APIC_LVTPC: 72 + case APIC_LVT0: 73 + case APIC_LVT1: 74 + case APIC_LVTERR: 75 + return savic_ghcb_msr_read(reg); 76 + case APIC_ID: 77 + case APIC_LVR: 78 + case APIC_TASKPRI: 79 + case APIC_ARBPRI: 80 + case APIC_PROCPRI: 81 + case APIC_LDR: 82 + case APIC_SPIV: 83 + case APIC_ESR: 84 + case APIC_EFEAT: 85 + case APIC_ECTRL: 86 + case APIC_SEOI: 87 + case APIC_IER: 88 + case APIC_EILVTn(0) ... APIC_EILVTn(3): 89 + return apic_get_reg(ap, reg); 90 + case APIC_ICR: 91 + return (u32)apic_get_reg64(ap, reg); 92 + case APIC_ISR ... APIC_ISR + 0x70: 93 + case APIC_TMR ... APIC_TMR + 0x70: 94 + if (WARN_ONCE(!IS_ALIGNED(reg, 16), 95 + "APIC register read offset 0x%x not aligned at 16 bytes", reg)) 96 + return 0; 97 + return apic_get_reg(ap, reg); 98 + /* IRR and ALLOWED_IRR offset range */ 99 + case APIC_IRR ... APIC_IRR + 0x74: 100 + /* 101 + * Valid APIC_IRR/SAVIC_ALLOWED_IRR registers are at 16 bytes strides from 102 + * their respective base offset. APIC_IRRs are in the range 103 + * 104 + * (0x200, 0x210, ..., 0x270) 105 + * 106 + * while the SAVIC_ALLOWED_IRR range starts 4 bytes later, in the range 107 + * 108 + * (0x204, 0x214, ..., 0x274). 109 + * 110 + * Filter out everything else. 111 + */ 112 + if (WARN_ONCE(!(IS_ALIGNED(reg, 16) || 113 + IS_ALIGNED(reg - 4, 16)), 114 + "Misaligned APIC_IRR/ALLOWED_IRR APIC register read offset 0x%x", reg)) 115 + return 0; 116 + return apic_get_reg(ap, reg); 117 + default: 118 + pr_err("Error reading unknown Secure AVIC reg offset 0x%x\n", reg); 119 + return 0; 120 + } 121 + } 122 + 123 + #define SAVIC_NMI_REQ 0x278 124 + 125 + /* 126 + * On WRMSR to APIC_SELF_IPI register by the guest, Secure AVIC hardware 127 + * updates the APIC_IRR in the APIC backing page of the vCPU. In addition, 128 + * hardware evaluates the new APIC_IRR update for interrupt injection to 129 + * the vCPU. So, self IPIs are hardware-accelerated. 130 + */ 131 + static inline void self_ipi_reg_write(unsigned int vector) 132 + { 133 + native_apic_msr_write(APIC_SELF_IPI, vector); 134 + } 135 + 136 + static void send_ipi_dest(unsigned int cpu, unsigned int vector, bool nmi) 137 + { 138 + if (nmi) 139 + apic_set_reg(per_cpu_ptr(savic_page, cpu), SAVIC_NMI_REQ, 1); 140 + else 141 + update_vector(cpu, APIC_IRR, vector, true); 142 + } 143 + 144 + static void send_ipi_allbut(unsigned int vector, bool nmi) 145 + { 146 + unsigned int cpu, src_cpu; 147 + 148 + guard(irqsave)(); 149 + 150 + src_cpu = raw_smp_processor_id(); 151 + 152 + for_each_cpu(cpu, cpu_online_mask) { 153 + if (cpu == src_cpu) 154 + continue; 155 + send_ipi_dest(cpu, vector, nmi); 156 + } 157 + } 158 + 159 + static inline void self_ipi(unsigned int vector, bool nmi) 160 + { 161 + u32 icr_low = APIC_SELF_IPI | vector; 162 + 163 + if (nmi) 164 + icr_low |= APIC_DM_NMI; 165 + 166 + native_x2apic_icr_write(icr_low, 0); 167 + } 168 + 169 + static void savic_icr_write(u32 icr_low, u32 icr_high) 170 + { 171 + unsigned int dsh, vector; 172 + u64 icr_data; 173 + bool nmi; 174 + 175 + dsh = icr_low & APIC_DEST_ALLBUT; 176 + vector = icr_low & APIC_VECTOR_MASK; 177 + nmi = ((icr_low & APIC_DM_FIXED_MASK) == APIC_DM_NMI); 178 + 179 + switch (dsh) { 180 + case APIC_DEST_SELF: 181 + self_ipi(vector, nmi); 182 + break; 183 + case APIC_DEST_ALLINC: 184 + self_ipi(vector, nmi); 185 + fallthrough; 186 + case APIC_DEST_ALLBUT: 187 + send_ipi_allbut(vector, nmi); 188 + break; 189 + default: 190 + send_ipi_dest(icr_high, vector, nmi); 191 + break; 192 + } 193 + 194 + icr_data = ((u64)icr_high) << 32 | icr_low; 195 + if (dsh != APIC_DEST_SELF) 196 + savic_ghcb_msr_write(APIC_ICR, icr_data); 197 + apic_set_reg64(this_cpu_ptr(savic_page), APIC_ICR, icr_data); 198 + } 199 + 200 + static void savic_write(u32 reg, u32 data) 201 + { 202 + void *ap = this_cpu_ptr(savic_page); 203 + 204 + switch (reg) { 205 + case APIC_LVTT: 206 + case APIC_TMICT: 207 + case APIC_TDCR: 208 + case APIC_LVT0: 209 + case APIC_LVT1: 210 + case APIC_LVTTHMR: 211 + case APIC_LVTPC: 212 + case APIC_LVTERR: 213 + savic_ghcb_msr_write(reg, data); 214 + break; 215 + case APIC_TASKPRI: 216 + case APIC_EOI: 217 + case APIC_SPIV: 218 + case SAVIC_NMI_REQ: 219 + case APIC_ESR: 220 + case APIC_ECTRL: 221 + case APIC_SEOI: 222 + case APIC_IER: 223 + case APIC_EILVTn(0) ... APIC_EILVTn(3): 224 + apic_set_reg(ap, reg, data); 225 + break; 226 + case APIC_ICR: 227 + savic_icr_write(data, 0); 228 + break; 229 + case APIC_SELF_IPI: 230 + self_ipi_reg_write(data); 231 + break; 232 + /* ALLOWED_IRR offsets are writable */ 233 + case SAVIC_ALLOWED_IRR ... SAVIC_ALLOWED_IRR + 0x70: 234 + if (IS_ALIGNED(reg - 4, 16)) { 235 + apic_set_reg(ap, reg, data); 236 + break; 237 + } 238 + fallthrough; 239 + default: 240 + pr_err("Error writing unknown Secure AVIC reg offset 0x%x\n", reg); 241 + } 242 + } 243 + 244 + static void send_ipi(u32 dest, unsigned int vector, unsigned int dsh) 245 + { 246 + unsigned int icr_low; 247 + 248 + icr_low = __prepare_ICR(dsh, vector, APIC_DEST_PHYSICAL); 249 + savic_icr_write(icr_low, dest); 250 + } 251 + 252 + static void savic_send_ipi(int cpu, int vector) 253 + { 254 + u32 dest = per_cpu(x86_cpu_to_apicid, cpu); 255 + 256 + send_ipi(dest, vector, 0); 257 + } 258 + 259 + static void send_ipi_mask(const struct cpumask *mask, unsigned int vector, bool excl_self) 260 + { 261 + unsigned int cpu, this_cpu; 262 + 263 + guard(irqsave)(); 264 + 265 + this_cpu = raw_smp_processor_id(); 266 + 267 + for_each_cpu(cpu, mask) { 268 + if (excl_self && cpu == this_cpu) 269 + continue; 270 + send_ipi(per_cpu(x86_cpu_to_apicid, cpu), vector, 0); 271 + } 272 + } 273 + 274 + static void savic_send_ipi_mask(const struct cpumask *mask, int vector) 275 + { 276 + send_ipi_mask(mask, vector, false); 277 + } 278 + 279 + static void savic_send_ipi_mask_allbutself(const struct cpumask *mask, int vector) 280 + { 281 + send_ipi_mask(mask, vector, true); 282 + } 283 + 284 + static void savic_send_ipi_allbutself(int vector) 285 + { 286 + send_ipi(0, vector, APIC_DEST_ALLBUT); 287 + } 288 + 289 + static void savic_send_ipi_all(int vector) 290 + { 291 + send_ipi(0, vector, APIC_DEST_ALLINC); 292 + } 293 + 294 + static void savic_send_ipi_self(int vector) 295 + { 296 + self_ipi_reg_write(vector); 297 + } 298 + 299 + static void savic_update_vector(unsigned int cpu, unsigned int vector, bool set) 300 + { 301 + update_vector(cpu, SAVIC_ALLOWED_IRR, vector, set); 302 + } 303 + 304 + static void savic_eoi(void) 305 + { 306 + unsigned int cpu; 307 + int vec; 308 + 309 + cpu = raw_smp_processor_id(); 310 + vec = apic_find_highest_vector(get_reg_bitmap(cpu, APIC_ISR)); 311 + if (WARN_ONCE(vec == -1, "EOI write while no active interrupt in APIC_ISR")) 312 + return; 313 + 314 + /* Is level-triggered interrupt? */ 315 + if (apic_test_vector(vec, get_reg_bitmap(cpu, APIC_TMR))) { 316 + update_vector(cpu, APIC_ISR, vec, false); 317 + /* 318 + * Propagate the EOI write to the hypervisor for level-triggered 319 + * interrupts. Return to the guest from GHCB protocol event takes 320 + * care of re-evaluating interrupt state. 321 + */ 322 + savic_ghcb_msr_write(APIC_EOI, 0); 323 + } else { 324 + /* 325 + * Hardware clears APIC_ISR and re-evaluates the interrupt state 326 + * to determine if there is any pending interrupt which can be 327 + * delivered to CPU. 328 + */ 329 + native_apic_msr_eoi(); 330 + } 331 + } 332 + 333 + static void savic_teardown(void) 334 + { 335 + /* Disable Secure AVIC */ 336 + native_wrmsrq(MSR_AMD64_SAVIC_CONTROL, 0); 337 + savic_unregister_gpa(NULL); 338 + } 339 + 340 + static void savic_setup(void) 341 + { 342 + void *ap = this_cpu_ptr(savic_page); 343 + enum es_result res; 344 + unsigned long gpa; 345 + 346 + /* 347 + * Before Secure AVIC is enabled, APIC MSR reads are intercepted. 348 + * APIC_ID MSR read returns the value from the hypervisor. 349 + */ 350 + apic_set_reg(ap, APIC_ID, native_apic_msr_read(APIC_ID)); 351 + 352 + gpa = __pa(ap); 353 + 354 + /* 355 + * The NPT entry for a vCPU's APIC backing page must always be 356 + * present when the vCPU is running in order for Secure AVIC to 357 + * function. A VMEXIT_BUSY is returned on VMRUN and the vCPU cannot 358 + * be resumed if the NPT entry for the APIC backing page is not 359 + * present. Notify GPA of the vCPU's APIC backing page to the 360 + * hypervisor by calling savic_register_gpa(). Before executing 361 + * VMRUN, the hypervisor makes use of this information to make sure 362 + * the APIC backing page is mapped in NPT. 363 + */ 364 + res = savic_register_gpa(gpa); 365 + if (res != ES_OK) 366 + sev_es_terminate(SEV_TERM_SET_LINUX, GHCB_TERM_SAVIC_FAIL); 367 + 368 + native_wrmsrq(MSR_AMD64_SAVIC_CONTROL, 369 + gpa | MSR_AMD64_SAVIC_EN | MSR_AMD64_SAVIC_ALLOWEDNMI); 370 + } 371 + 372 + static int savic_probe(void) 373 + { 374 + if (!cc_platform_has(CC_ATTR_SNP_SECURE_AVIC)) 375 + return 0; 376 + 377 + if (!x2apic_mode) { 378 + pr_err("Secure AVIC enabled in non x2APIC mode\n"); 379 + sev_es_terminate(SEV_TERM_SET_LINUX, GHCB_TERM_SAVIC_FAIL); 380 + /* unreachable */ 381 + } 382 + 383 + savic_page = alloc_percpu(struct secure_avic_page); 384 + if (!savic_page) 385 + sev_es_terminate(SEV_TERM_SET_LINUX, GHCB_TERM_SAVIC_FAIL); 386 + 387 + return 1; 388 + } 389 + 390 + static struct apic apic_x2apic_savic __ro_after_init = { 391 + 392 + .name = "secure avic x2apic", 393 + .probe = savic_probe, 394 + .acpi_madt_oem_check = savic_acpi_madt_oem_check, 395 + .setup = savic_setup, 396 + .teardown = savic_teardown, 397 + 398 + .dest_mode_logical = false, 399 + 400 + .disable_esr = 0, 401 + 402 + .cpu_present_to_apicid = default_cpu_present_to_apicid, 403 + 404 + .max_apic_id = UINT_MAX, 405 + .x2apic_set_max_apicid = true, 406 + .get_apic_id = x2apic_get_apic_id, 407 + 408 + .calc_dest_apicid = apic_default_calc_apicid, 409 + 410 + .send_IPI = savic_send_ipi, 411 + .send_IPI_mask = savic_send_ipi_mask, 412 + .send_IPI_mask_allbutself = savic_send_ipi_mask_allbutself, 413 + .send_IPI_allbutself = savic_send_ipi_allbutself, 414 + .send_IPI_all = savic_send_ipi_all, 415 + .send_IPI_self = savic_send_ipi_self, 416 + 417 + .nmi_to_offline_cpu = true, 418 + 419 + .read = savic_read, 420 + .write = savic_write, 421 + .eoi = savic_eoi, 422 + .icr_read = native_x2apic_icr_read, 423 + .icr_write = savic_icr_write, 424 + 425 + .update_vector = savic_update_vector, 426 + }; 427 + 428 + apic_driver(apic_x2apic_savic);
+8
include/linux/cc_platform.h
··· 96 96 * enabled to run SEV-SNP guests. 97 97 */ 98 98 CC_ATTR_HOST_SEV_SNP, 99 + 100 + /** 101 + * @CC_ATTR_SNP_SECURE_AVIC: Secure AVIC mode is active. 102 + * 103 + * The host kernel is running with the necessary features enabled 104 + * to run SEV-SNP guests with full Secure AVIC capabilities. 105 + */ 106 + CC_ATTR_SNP_SECURE_AVIC, 99 107 }; 100 108 101 109 #ifdef CONFIG_ARCH_HAS_CC_PLATFORM