Merge tag 'powerpc-5.14-3' of git://git.kernel.org/pub/scm/linux/kernel/git/powerpc/linux

Pull powerpc fixes from Michael Ellerman:

- Fix guest to host memory corruption in H_RTAS due to missing nargs
check.

- Fix guest triggerable host crashes due to bad handling of nested
guest TM state.

- Fix possible crashes due to incorrect reference counting in
kvm_arch_vcpu_ioctl().

- Two commits fixing some regressions in KVM transactional memory
handling introduced by the recent rework of the KVM code.

Thanks to Nicholas Piggin, Alexey Kardashevskiy, and Michael Neuling.

* tag 'powerpc-5.14-3' of git://git.kernel.org/pub/scm/linux/kernel/git/powerpc/linux:
KVM: PPC: Book3S HV Nested: Sanitise H_ENTER_NESTED TM state
KVM: PPC: Book3S: Fix H_RTAS rets buffer overflow
KVM: PPC: Fix kvm_arch_vcpu_ioctl vcpu_load leak
KVM: PPC: Book3S: Fix CONFIG_TRANSACTIONAL_MEM=n crash
KVM: PPC: Book3S HV P9: Fix guest TM support

Linus Torvalds 4 years ago 3c0ce149 12e9bd16

+68 -8

5 changed files

expand all

arch

powerpc

kvm

book3s_hv.c

book3s_hv_nested.c

book3s_hv_p9_entry.c

book3s_rtas.c

powerpc.c

arch/powerpc/kvm/book3s_hv.c

··· 2697 2697 HFSCR_DSCR | HFSCR_VECVSX | HFSCR_FP | HFSCR_PREFIX; 2698 2698 if (cpu_has_feature(CPU_FTR_HVMODE)) { 2699 2699 vcpu->arch.hfscr &= mfspr(SPRN_HFSCR); 2700 + #ifdef CONFIG_PPC_TRANSACTIONAL_MEM 2700 2701 if (cpu_has_feature(CPU_FTR_P9_TM_HV_ASSIST)) 2701 2702 vcpu->arch.hfscr |= HFSCR_TM; 2703 + #endif 2702 2704 } 2703 2705 if (cpu_has_feature(CPU_FTR_TM_COMP)) 2704 2706 vcpu->arch.hfscr |= HFSCR_TM;

+20

arch/powerpc/kvm/book3s_hv_nested.c

··· 302 302 if (vcpu->kvm->arch.l1_ptcr == 0) 303 303 return H_NOT_AVAILABLE; 304 304 305 + if (MSR_TM_TRANSACTIONAL(vcpu->arch.shregs.msr)) 306 + return H_BAD_MODE; 307 + 305 308 /* copy parameters in */ 306 309 hv_ptr = kvmppc_get_gpr(vcpu, 4); 307 310 regs_ptr = kvmppc_get_gpr(vcpu, 5); ··· 324 321 byteswap_pt_regs(&l2_regs); 325 322 if (l2_hv.vcpu_token >= NR_CPUS) 326 323 return H_PARAMETER; 324 + 325 + /* 326 + * L1 must have set up a suspended state to enter the L2 in a 327 + * transactional state, and only in that case. These have to be 328 + * filtered out here to prevent causing a TM Bad Thing in the 329 + * host HRFID. We could synthesize a TM Bad Thing back to the L1 330 + * here but there doesn't seem like much point. 331 + */ 332 + if (MSR_TM_SUSPENDED(vcpu->arch.shregs.msr)) { 333 + if (!MSR_TM_ACTIVE(l2_regs.msr)) 334 + return H_BAD_MODE; 335 + } else { 336 + if (l2_regs.msr & MSR_TS_MASK) 337 + return H_BAD_MODE; 338 + if (WARN_ON_ONCE(vcpu->arch.shregs.msr & MSR_TS_MASK)) 339 + return H_BAD_MODE; 340 + } 327 341 328 342 /* translate lpid */ 329 343 l2 = kvmhv_get_nested(vcpu->kvm, l2_hv.lpid, true);

+22 -3

arch/powerpc/kvm/book3s_hv_p9_entry.c

··· 317 317 */ 318 318 mtspr(SPRN_HDEC, hdec); 319 319 320 + #ifdef CONFIG_PPC_TRANSACTIONAL_MEM 321 + tm_return_to_guest: 322 + #endif 320 323 mtspr(SPRN_DAR, vcpu->arch.shregs.dar); 321 324 mtspr(SPRN_DSISR, vcpu->arch.shregs.dsisr); 322 325 mtspr(SPRN_SRR0, vcpu->arch.shregs.srr0); ··· 418 415 * is in real suspend mode and is trying to transition to 419 416 * transactional mode. 420 417 */ 421 - if (local_paca->kvm_hstate.fake_suspend && 418 + if (!local_paca->kvm_hstate.fake_suspend && 422 419 (vcpu->arch.shregs.msr & MSR_TS_S)) { 423 420 if (kvmhv_p9_tm_emulation_early(vcpu)) { 424 - /* Prevent it being handled again. */ 425 - trap = 0; 421 + /* 422 + * Go straight back into the guest with the 423 + * new NIP/MSR as set by TM emulation. 424 + */ 425 + mtspr(SPRN_HSRR0, vcpu->arch.regs.nip); 426 + mtspr(SPRN_HSRR1, vcpu->arch.shregs.msr); 427 + 428 + /* 429 + * tm_return_to_guest re-loads SRR0/1, DAR, 430 + * DSISR after RI is cleared, in case they had 431 + * been clobbered by a MCE. 432 + */ 433 + __mtmsrd(0, 1); /* clear RI */ 434 + goto tm_return_to_guest; 426 435 } 427 436 } 428 437 #endif ··· 514 499 * If we are in real mode, only switch MMU on after the MMU is 515 500 * switched to host, to avoid the P9_RADIX_PREFETCH_BUG. 516 501 */ 502 + if (IS_ENABLED(CONFIG_PPC_TRANSACTIONAL_MEM) && 503 + vcpu->arch.shregs.msr & MSR_TS_MASK) 504 + msr |= MSR_TS_S; 505 + 517 506 __mtmsrd(msr, 0); 518 507 519 508 end_timing(vcpu);

+22 -3

arch/powerpc/kvm/book3s_rtas.c

··· 242 242 * value so we can restore it on the way out. 243 243 */ 244 244 orig_rets = args.rets; 245 + if (be32_to_cpu(args.nargs) >= ARRAY_SIZE(args.args)) { 246 + /* 247 + * Don't overflow our args array: ensure there is room for 248 + * at least rets[0] (even if the call specifies 0 nret). 249 + * 250 + * Each handler must then check for the correct nargs and nret 251 + * values, but they may always return failure in rets[0]. 252 + */ 253 + rc = -EINVAL; 254 + goto fail; 255 + } 245 256 args.rets = &args.args[be32_to_cpu(args.nargs)]; 246 257 247 258 mutex_lock(&vcpu->kvm->arch.rtas_token_lock); ··· 280 269 fail: 281 270 /* 282 271 * We only get here if the guest has called RTAS with a bogus 283 - * args pointer. That means we can't get to the args, and so we 284 - * can't fail the RTAS call. So fail right out to userspace, 285 - * which should kill the guest. 272 + * args pointer or nargs/nret values that would overflow the 273 + * array. That means we can't get to the args, and so we can't 274 + * fail the RTAS call. So fail right out to userspace, which 275 + * should kill the guest. 276 + * 277 + * SLOF should actually pass the hcall return value from the 278 + * rtas handler call in r3, so enter_rtas could be modified to 279 + * return a failure indication in r3 and we could return such 280 + * errors to the guest rather than failing to host userspace. 281 + * However old guests that don't test for failure could then 282 + * continue silently after errors, so for now we won't do this. 286 283 */ 287 284 return rc; 288 285 }

+2 -2

arch/powerpc/kvm/powerpc.c

··· 2048 2048 { 2049 2049 struct kvm_enable_cap cap; 2050 2050 r = -EFAULT; 2051 - vcpu_load(vcpu); 2052 2051 if (copy_from_user(&cap, argp, sizeof(cap))) 2053 2052 goto out; 2053 + vcpu_load(vcpu); 2054 2054 r = kvm_vcpu_ioctl_enable_cap(vcpu, &cap); 2055 2055 vcpu_put(vcpu); 2056 2056 break; ··· 2074 2074 case KVM_DIRTY_TLB: { 2075 2075 struct kvm_dirty_tlb dirty; 2076 2076 r = -EFAULT; 2077 - vcpu_load(vcpu); 2078 2077 if (copy_from_user(&dirty, argp, sizeof(dirty))) 2079 2078 goto out; 2079 + vcpu_load(vcpu); 2080 2080 r = kvm_vcpu_ioctl_dirty_tlb(vcpu, &dirty); 2081 2081 vcpu_put(vcpu); 2082 2082 break;

Configure Feed

Configure Feed