Merge tag 'kvm-s390-next-6.5-1' of https://git.kernel.org/pub/scm/linux/kernel/git/kvms390/linux into HEAD

+4

arch/s390/boot/uv.c

··· 47 47 uv_info.conf_dump_finalize_len = uvcb.conf_dump_finalize_len; 48 48 uv_info.supp_att_req_hdr_ver = uvcb.supp_att_req_hdr_ver; 49 49 uv_info.supp_att_pflags = uvcb.supp_att_pflags; 50 + uv_info.supp_add_secret_req_ver = uvcb.supp_add_secret_req_ver; 51 + uv_info.supp_add_secret_pcf = uvcb.supp_add_secret_pcf; 52 + uv_info.supp_secret_types = uvcb.supp_secret_types; 53 + uv_info.max_secrets = uvcb.max_secrets; 50 54 } 51 55 52 56 #ifdef CONFIG_PROTECTED_VIRTUALIZATION_GUEST

+30 -2

arch/s390/include/asm/uv.h

··· 58 58 #define UVC_CMD_SET_SHARED_ACCESS 0x1000 59 59 #define UVC_CMD_REMOVE_SHARED_ACCESS 0x1001 60 60 #define UVC_CMD_RETR_ATTEST 0x1020 61 + #define UVC_CMD_ADD_SECRET 0x1031 62 + #define UVC_CMD_LIST_SECRETS 0x1033 63 + #define UVC_CMD_LOCK_SECRETS 0x1034 61 64 62 65 /* Bits in installed uv calls */ 63 66 enum uv_cmds_inst { ··· 91 88 BIT_UVC_CMD_DUMP_CPU = 26, 92 89 BIT_UVC_CMD_DUMP_COMPLETE = 27, 93 90 BIT_UVC_CMD_RETR_ATTEST = 28, 91 + BIT_UVC_CMD_ADD_SECRET = 29, 92 + BIT_UVC_CMD_LIST_SECRETS = 30, 93 + BIT_UVC_CMD_LOCK_SECRETS = 31, 94 94 }; 95 95 96 96 enum uv_feat_ind { ··· 123 117 u32 reserved70[3]; /* 0x0070 */ 124 118 u32 max_num_sec_conf; /* 0x007c */ 125 119 u64 max_guest_stor_addr; /* 0x0080 */ 126 - u8 reserved88[158 - 136]; /* 0x0088 */ 120 + u8 reserved88[0x9e - 0x88]; /* 0x0088 */ 127 121 u16 max_guest_cpu_id; /* 0x009e */ 128 122 u64 uv_feature_indications; /* 0x00a0 */ 129 123 u64 reserveda8; /* 0x00a8 */ ··· 135 129 u64 reservedd8; /* 0x00d8 */ 136 130 u64 supp_att_req_hdr_ver; /* 0x00e0 */ 137 131 u64 supp_att_pflags; /* 0x00e8 */ 138 - u8 reservedf0[256 - 240]; /* 0x00f0 */ 132 + u64 reservedf0; /* 0x00f0 */ 133 + u64 supp_add_secret_req_ver; /* 0x00f8 */ 134 + u64 supp_add_secret_pcf; /* 0x0100 */ 135 + u64 supp_secret_types; /* 0x0180 */ 136 + u16 max_secrets; /* 0x0110 */ 137 + u8 reserved112[0x120 - 0x112]; /* 0x0112 */ 139 138 } __packed __aligned(8); 140 139 141 140 /* Initialize Ultravisor */ ··· 303 292 u64 reserved30[5]; 304 293 } __packed __aligned(8); 305 294 295 + /* 296 + * A common UV call struct for pv guests that contains a single address 297 + * Examples: 298 + * Add Secret 299 + * List Secrets 300 + */ 301 + struct uv_cb_guest_addr { 302 + struct uv_cb_header header; 303 + u64 reserved08[3]; 304 + u64 addr; 305 + u64 reserved28[4]; 306 + } __packed __aligned(8); 307 + 306 308 static inline int __uv_call(unsigned long r1, unsigned long r2) 307 309 { 308 310 int cc; ··· 389 365 unsigned long conf_dump_finalize_len; 390 366 unsigned long supp_att_req_hdr_ver; 391 367 unsigned long supp_att_pflags; 368 + unsigned long supp_add_secret_req_ver; 369 + unsigned long supp_add_secret_pcf; 370 + unsigned long supp_secret_types; 371 + unsigned short max_secrets; 392 372 }; 393 373 394 374 extern struct uv_info uv_info;

+52 -1

arch/s390/include/uapi/asm/uvdevice.h

··· 32 32 __u16 reserved136; /* 0x0136 */ 33 33 }; 34 34 35 + /** 36 + * uvio_uvdev_info - Information of supported functions 37 + * @supp_uvio_cmds - supported IOCTLs by this device 38 + * @supp_uv_cmds - supported UVCs corresponding to the IOCTL 39 + * 40 + * UVIO request to get information about supported request types by this 41 + * uvdevice and the Ultravisor. Everything is output. Bits are in LSB0 42 + * ordering. If the bit is set in both, @supp_uvio_cmds and @supp_uv_cmds, the 43 + * uvdevice and the Ultravisor support that call. 44 + * 45 + * Note that bit 0 (UVIO_IOCTL_UVDEV_INFO_NR) is always zero for `supp_uv_cmds` 46 + * as there is no corresponding UV-call. 47 + */ 48 + struct uvio_uvdev_info { 49 + /* 50 + * If bit `n` is set, this device supports the IOCTL with nr `n`. 51 + */ 52 + __u64 supp_uvio_cmds; 53 + /* 54 + * If bit `n` is set, the Ultravisor(UV) supports the UV-call 55 + * corresponding to the IOCTL with nr `n` in the calling contextx (host 56 + * or guest). The value is only valid if the corresponding bit in 57 + * @supp_uvio_cmds is set as well. 58 + */ 59 + __u64 supp_uv_cmds; 60 + }; 61 + 35 62 /* 36 63 * The following max values define an upper length for the IOCTL in/out buffers. 37 64 * However, they do not represent the maximum the Ultravisor allows which is ··· 69 42 #define UVIO_ATT_ARCB_MAX_LEN 0x100000 70 43 #define UVIO_ATT_MEASUREMENT_MAX_LEN 0x8000 71 44 #define UVIO_ATT_ADDITIONAL_MAX_LEN 0x8000 45 + #define UVIO_ADD_SECRET_MAX_LEN 0x100000 46 + #define UVIO_LIST_SECRETS_LEN 0x1000 72 47 73 48 #define UVIO_DEVICE_NAME "uv" 74 49 #define UVIO_TYPE_UVC 'u' 75 50 76 - #define UVIO_IOCTL_ATT _IOWR(UVIO_TYPE_UVC, 0x01, struct uvio_ioctl_cb) 51 + enum UVIO_IOCTL_NR { 52 + UVIO_IOCTL_UVDEV_INFO_NR = 0x00, 53 + UVIO_IOCTL_ATT_NR, 54 + UVIO_IOCTL_ADD_SECRET_NR, 55 + UVIO_IOCTL_LIST_SECRETS_NR, 56 + UVIO_IOCTL_LOCK_SECRETS_NR, 57 + /* must be the last entry */ 58 + UVIO_IOCTL_NUM_IOCTLS 59 + }; 60 + 61 + #define UVIO_IOCTL(nr) _IOWR(UVIO_TYPE_UVC, nr, struct uvio_ioctl_cb) 62 + #define UVIO_IOCTL_UVDEV_INFO UVIO_IOCTL(UVIO_IOCTL_UVDEV_INFO_NR) 63 + #define UVIO_IOCTL_ATT UVIO_IOCTL(UVIO_IOCTL_ATT_NR) 64 + #define UVIO_IOCTL_ADD_SECRET UVIO_IOCTL(UVIO_IOCTL_ADD_SECRET_NR) 65 + #define UVIO_IOCTL_LIST_SECRETS UVIO_IOCTL(UVIO_IOCTL_LIST_SECRETS_NR) 66 + #define UVIO_IOCTL_LOCK_SECRETS UVIO_IOCTL(UVIO_IOCTL_LOCK_SECRETS_NR) 67 + 68 + #define UVIO_SUPP_CALL(nr) (1ULL << (nr)) 69 + #define UVIO_SUPP_UDEV_INFO UVIO_SUPP_CALL(UVIO_IOCTL_UDEV_INFO_NR) 70 + #define UVIO_SUPP_ATT UVIO_SUPP_CALL(UVIO_IOCTL_ATT_NR) 71 + #define UVIO_SUPP_ADD_SECRET UVIO_SUPP_CALL(UVIO_IOCTL_ADD_SECRET_NR) 72 + #define UVIO_SUPP_LIST_SECRETS UVIO_SUPP_CALL(UVIO_IOCTL_LIST_SECRETS_NR) 73 + #define UVIO_SUPP_LOCK_SECRETS UVIO_SUPP_CALL(UVIO_IOCTL_LOCK_SECRETS_NR) 77 74 78 75 #endif /* __S390_ASM_UVDEVICE_H */

+75 -33

arch/s390/kernel/uv.c

··· 23 23 int __bootdata_preserved(prot_virt_guest); 24 24 #endif 25 25 26 + /* 27 + * uv_info contains both host and guest information but it's currently only 28 + * expected to be used within modules if it's the KVM module or for 29 + * any PV guest module. 30 + * 31 + * The kernel itself will write these values once in uv_query_info() 32 + * and then make some of them readable via a sysfs interface. 33 + */ 26 34 struct uv_info __bootdata_preserved(uv_info); 35 + EXPORT_SYMBOL(uv_info); 27 36 28 37 #if IS_ENABLED(CONFIG_KVM) 29 38 int __bootdata_preserved(prot_virt_host); 30 39 EXPORT_SYMBOL(prot_virt_host); 31 - EXPORT_SYMBOL(uv_info); 32 40 33 41 static int __init uv_init(phys_addr_t stor_base, unsigned long stor_len) 34 42 { ··· 468 460 469 461 #if defined(CONFIG_PROTECTED_VIRTUALIZATION_GUEST) || IS_ENABLED(CONFIG_KVM) 470 462 static ssize_t uv_query_facilities(struct kobject *kobj, 471 - struct kobj_attribute *attr, char *page) 463 + struct kobj_attribute *attr, char *buf) 472 464 { 473 - return scnprintf(page, PAGE_SIZE, "%lx\n%lx\n%lx\n%lx\n", 474 - uv_info.inst_calls_list[0], 475 - uv_info.inst_calls_list[1], 476 - uv_info.inst_calls_list[2], 477 - uv_info.inst_calls_list[3]); 465 + return sysfs_emit(buf, "%lx\n%lx\n%lx\n%lx\n", 466 + uv_info.inst_calls_list[0], 467 + uv_info.inst_calls_list[1], 468 + uv_info.inst_calls_list[2], 469 + uv_info.inst_calls_list[3]); 478 470 } 479 471 480 472 static struct kobj_attribute uv_query_facilities_attr = ··· 499 491 __ATTR(supp_se_hdr_pcf, 0444, uv_query_supp_se_hdr_pcf, NULL); 500 492 501 493 static ssize_t uv_query_dump_cpu_len(struct kobject *kobj, 502 - struct kobj_attribute *attr, char *page) 494 + struct kobj_attribute *attr, char *buf) 503 495 { 504 - return scnprintf(page, PAGE_SIZE, "%lx\n", 505 - uv_info.guest_cpu_stor_len); 496 + return sysfs_emit(buf, "%lx\n", uv_info.guest_cpu_stor_len); 506 497 } 507 498 508 499 static struct kobj_attribute uv_query_dump_cpu_len_attr = 509 500 __ATTR(uv_query_dump_cpu_len, 0444, uv_query_dump_cpu_len, NULL); 510 501 511 502 static ssize_t uv_query_dump_storage_state_len(struct kobject *kobj, 512 - struct kobj_attribute *attr, char *page) 503 + struct kobj_attribute *attr, char *buf) 513 504 { 514 - return scnprintf(page, PAGE_SIZE, "%lx\n", 515 - uv_info.conf_dump_storage_state_len); 505 + return sysfs_emit(buf, "%lx\n", uv_info.conf_dump_storage_state_len); 516 506 } 517 507 518 508 static struct kobj_attribute uv_query_dump_storage_state_len_attr = 519 509 __ATTR(dump_storage_state_len, 0444, uv_query_dump_storage_state_len, NULL); 520 510 521 511 static ssize_t uv_query_dump_finalize_len(struct kobject *kobj, 522 - struct kobj_attribute *attr, char *page) 512 + struct kobj_attribute *attr, char *buf) 523 513 { 524 - return scnprintf(page, PAGE_SIZE, "%lx\n", 525 - uv_info.conf_dump_finalize_len); 514 + return sysfs_emit(buf, "%lx\n", uv_info.conf_dump_finalize_len); 526 515 } 527 516 528 517 static struct kobj_attribute uv_query_dump_finalize_len_attr = ··· 535 530 __ATTR(feature_indications, 0444, uv_query_feature_indications, NULL); 536 531 537 532 static ssize_t uv_query_max_guest_cpus(struct kobject *kobj, 538 - struct kobj_attribute *attr, char *page) 533 + struct kobj_attribute *attr, char *buf) 539 534 { 540 - return scnprintf(page, PAGE_SIZE, "%d\n", 541 - uv_info.max_guest_cpu_id + 1); 535 + return sysfs_emit(buf, "%d\n", uv_info.max_guest_cpu_id + 1); 542 536 } 543 537 544 538 static struct kobj_attribute uv_query_max_guest_cpus_attr = 545 539 __ATTR(max_cpus, 0444, uv_query_max_guest_cpus, NULL); 546 540 547 541 static ssize_t uv_query_max_guest_vms(struct kobject *kobj, 548 - struct kobj_attribute *attr, char *page) 542 + struct kobj_attribute *attr, char *buf) 549 543 { 550 - return scnprintf(page, PAGE_SIZE, "%d\n", 551 - uv_info.max_num_sec_conf); 544 + return sysfs_emit(buf, "%d\n", uv_info.max_num_sec_conf); 552 545 } 553 546 554 547 static struct kobj_attribute uv_query_max_guest_vms_attr = 555 548 __ATTR(max_guests, 0444, uv_query_max_guest_vms, NULL); 556 549 557 550 static ssize_t uv_query_max_guest_addr(struct kobject *kobj, 558 - struct kobj_attribute *attr, char *page) 551 + struct kobj_attribute *attr, char *buf) 559 552 { 560 - return scnprintf(page, PAGE_SIZE, "%lx\n", 561 - uv_info.max_sec_stor_addr); 553 + return sysfs_emit(buf, "%lx\n", uv_info.max_sec_stor_addr); 562 554 } 563 555 564 556 static struct kobj_attribute uv_query_max_guest_addr_attr = 565 557 __ATTR(max_address, 0444, uv_query_max_guest_addr, NULL); 566 558 567 559 static ssize_t uv_query_supp_att_req_hdr_ver(struct kobject *kobj, 568 - struct kobj_attribute *attr, char *page) 560 + struct kobj_attribute *attr, char *buf) 569 561 { 570 - return scnprintf(page, PAGE_SIZE, "%lx\n", uv_info.supp_att_req_hdr_ver); 562 + return sysfs_emit(buf, "%lx\n", uv_info.supp_att_req_hdr_ver); 571 563 } 572 564 573 565 static struct kobj_attribute uv_query_supp_att_req_hdr_ver_attr = 574 566 __ATTR(supp_att_req_hdr_ver, 0444, uv_query_supp_att_req_hdr_ver, NULL); 575 567 576 568 static ssize_t uv_query_supp_att_pflags(struct kobject *kobj, 577 - struct kobj_attribute *attr, char *page) 569 + struct kobj_attribute *attr, char *buf) 578 570 { 579 - return scnprintf(page, PAGE_SIZE, "%lx\n", uv_info.supp_att_pflags); 571 + return sysfs_emit(buf, "%lx\n", uv_info.supp_att_pflags); 580 572 } 581 573 582 574 static struct kobj_attribute uv_query_supp_att_pflags_attr = 583 575 __ATTR(supp_att_pflags, 0444, uv_query_supp_att_pflags, NULL); 576 + 577 + static ssize_t uv_query_supp_add_secret_req_ver(struct kobject *kobj, 578 + struct kobj_attribute *attr, char *buf) 579 + { 580 + return sysfs_emit(buf, "%lx\n", uv_info.supp_add_secret_req_ver); 581 + } 582 + 583 + static struct kobj_attribute uv_query_supp_add_secret_req_ver_attr = 584 + __ATTR(supp_add_secret_req_ver, 0444, uv_query_supp_add_secret_req_ver, NULL); 585 + 586 + static ssize_t uv_query_supp_add_secret_pcf(struct kobject *kobj, 587 + struct kobj_attribute *attr, char *buf) 588 + { 589 + return sysfs_emit(buf, "%lx\n", uv_info.supp_add_secret_pcf); 590 + } 591 + 592 + static struct kobj_attribute uv_query_supp_add_secret_pcf_attr = 593 + __ATTR(supp_add_secret_pcf, 0444, uv_query_supp_add_secret_pcf, NULL); 594 + 595 + static ssize_t uv_query_supp_secret_types(struct kobject *kobj, 596 + struct kobj_attribute *attr, char *buf) 597 + { 598 + return sysfs_emit(buf, "%lx\n", uv_info.supp_secret_types); 599 + } 600 + 601 + static struct kobj_attribute uv_query_supp_secret_types_attr = 602 + __ATTR(supp_secret_types, 0444, uv_query_supp_secret_types, NULL); 603 + 604 + static ssize_t uv_query_max_secrets(struct kobject *kobj, 605 + struct kobj_attribute *attr, char *buf) 606 + { 607 + return sysfs_emit(buf, "%d\n", uv_info.max_secrets); 608 + } 609 + 610 + static struct kobj_attribute uv_query_max_secrets_attr = 611 + __ATTR(max_secrets, 0444, uv_query_max_secrets, NULL); 584 612 585 613 static struct attribute *uv_query_attrs[] = { 586 614 &uv_query_facilities_attr.attr, ··· 628 590 &uv_query_dump_cpu_len_attr.attr, 629 591 &uv_query_supp_att_req_hdr_ver_attr.attr, 630 592 &uv_query_supp_att_pflags_attr.attr, 593 + &uv_query_supp_add_secret_req_ver_attr.attr, 594 + &uv_query_supp_add_secret_pcf_attr.attr, 595 + &uv_query_supp_secret_types_attr.attr, 596 + &uv_query_max_secrets_attr.attr, 631 597 NULL, 632 598 }; 633 599 ··· 640 598 }; 641 599 642 600 static ssize_t uv_is_prot_virt_guest(struct kobject *kobj, 643 - struct kobj_attribute *attr, char *page) 601 + struct kobj_attribute *attr, char *buf) 644 602 { 645 603 int val = 0; 646 604 647 605 #ifdef CONFIG_PROTECTED_VIRTUALIZATION_GUEST 648 606 val = prot_virt_guest; 649 607 #endif 650 - return scnprintf(page, PAGE_SIZE, "%d\n", val); 608 + return sysfs_emit(buf, "%d\n", val); 651 609 } 652 610 653 611 static ssize_t uv_is_prot_virt_host(struct kobject *kobj, 654 - struct kobj_attribute *attr, char *page) 612 + struct kobj_attribute *attr, char *buf) 655 613 { 656 614 int val = 0; 657 615 ··· 659 617 val = prot_virt_host; 660 618 #endif 661 619 662 - return scnprintf(page, PAGE_SIZE, "%d\n", val); 620 + return sysfs_emit(buf, "%d\n", val); 663 621 } 664 622 665 623 static struct kobj_attribute uv_prot_virt_guest =

+5 -3

arch/s390/kvm/diag.c

··· 166 166 static int __diag_time_slice_end_directed(struct kvm_vcpu *vcpu) 167 167 { 168 168 struct kvm_vcpu *tcpu; 169 + int tcpu_cpu; 169 170 int tid; 170 171 171 172 tid = vcpu->run->s.regs.gprs[(vcpu->arch.sie_block->ipa & 0xf0) >> 4]; ··· 182 181 goto no_yield; 183 182 184 183 /* target guest VCPU already running */ 185 - if (READ_ONCE(tcpu->cpu) >= 0) { 184 + tcpu_cpu = READ_ONCE(tcpu->cpu); 185 + if (tcpu_cpu >= 0) { 186 186 if (!diag9c_forwarding_hz || diag9c_forwarding_overrun()) 187 187 goto no_yield; 188 188 189 189 /* target host CPU already running */ 190 - if (!vcpu_is_preempted(tcpu->cpu)) 190 + if (!vcpu_is_preempted(tcpu_cpu)) 191 191 goto no_yield; 192 - smp_yield_cpu(tcpu->cpu); 192 + smp_yield_cpu(tcpu_cpu); 193 193 VCPU_EVENT(vcpu, 5, 194 194 "diag time slice end directed to %d: yield forwarded", 195 195 tid);

+4

arch/s390/kvm/kvm-s390.c

··· 2156 2156 ms = container_of(mnode, struct kvm_memory_slot, gfn_node[slots->node_idx]); 2157 2157 ofs = 0; 2158 2158 } 2159 + 2160 + if (cur_gfn < ms->base_gfn) 2161 + ofs = 0; 2162 + 2159 2163 ofs = find_next_bit(kvm_second_dirty_bitmap(ms), ms->npages, ofs); 2160 2164 while (ofs >= ms->npages && (mnode = rb_next(mnode))) { 2161 2165 ms = container_of(mnode, struct kvm_memory_slot, gfn_node[slots->node_idx]);

+4 -2

arch/s390/kvm/vsie.c

··· 177 177 sizeof(struct kvm_s390_apcb0))) 178 178 return -EFAULT; 179 179 180 - bitmap_and(apcb_s, apcb_s, apcb_h, sizeof(struct kvm_s390_apcb0)); 180 + bitmap_and(apcb_s, apcb_s, apcb_h, 181 + BITS_PER_BYTE * sizeof(struct kvm_s390_apcb0)); 181 182 182 183 return 0; 183 184 } ··· 204 203 sizeof(struct kvm_s390_apcb1))) 205 204 return -EFAULT; 206 205 207 - bitmap_and(apcb_s, apcb_s, apcb_h, sizeof(struct kvm_s390_apcb1)); 206 + bitmap_and(apcb_s, apcb_s, apcb_h, 207 + BITS_PER_BYTE * sizeof(struct kvm_s390_apcb1)); 208 208 209 209 return 0; 210 210 }

+1 -1

drivers/s390/char/Kconfig

··· 96 96 config S390_UV_UAPI 97 97 def_tristate m 98 98 prompt "Ultravisor userspace API" 99 - depends on S390 99 + depends on S390 && (KVM || PROTECTED_VIRTUALIZATION_GUEST) 100 100 help 101 101 Selecting exposes parts of the UV interface to userspace 102 102 by providing a misc character device at /dev/uv.

+224 -7

drivers/s390/char/uvdevice.c

··· 32 32 #include <asm/uvdevice.h> 33 33 #include <asm/uv.h> 34 34 35 + #define BIT_UVIO_INTERNAL U32_MAX 36 + /* Mapping from IOCTL-nr to UVC-bit */ 37 + static const u32 ioctl_nr_to_uvc_bit[] __initconst = { 38 + [UVIO_IOCTL_UVDEV_INFO_NR] = BIT_UVIO_INTERNAL, 39 + [UVIO_IOCTL_ATT_NR] = BIT_UVC_CMD_RETR_ATTEST, 40 + [UVIO_IOCTL_ADD_SECRET_NR] = BIT_UVC_CMD_ADD_SECRET, 41 + [UVIO_IOCTL_LIST_SECRETS_NR] = BIT_UVC_CMD_LIST_SECRETS, 42 + [UVIO_IOCTL_LOCK_SECRETS_NR] = BIT_UVC_CMD_LOCK_SECRETS, 43 + }; 44 + 45 + static_assert(ARRAY_SIZE(ioctl_nr_to_uvc_bit) == UVIO_IOCTL_NUM_IOCTLS); 46 + 47 + static struct uvio_uvdev_info uvdev_info = { 48 + .supp_uvio_cmds = GENMASK_ULL(UVIO_IOCTL_NUM_IOCTLS - 1, 0), 49 + }; 50 + 51 + static void __init set_supp_uv_cmds(unsigned long *supp_uv_cmds) 52 + { 53 + int i; 54 + 55 + for (i = 0; i < UVIO_IOCTL_NUM_IOCTLS; i++) { 56 + if (ioctl_nr_to_uvc_bit[i] == BIT_UVIO_INTERNAL) 57 + continue; 58 + if (!test_bit_inv(ioctl_nr_to_uvc_bit[i], uv_info.inst_calls_list)) 59 + continue; 60 + __set_bit(i, supp_uv_cmds); 61 + } 62 + } 63 + 64 + /** 65 + * uvio_uvdev_info() - get information about the uvdevice 66 + * 67 + * @uv_ioctl: ioctl control block 68 + * 69 + * Lists all IOCTLs that are supported by this uvdevice 70 + */ 71 + static int uvio_uvdev_info(struct uvio_ioctl_cb *uv_ioctl) 72 + { 73 + void __user *user_buf_arg = (void __user *)uv_ioctl->argument_addr; 74 + 75 + if (uv_ioctl->argument_len < sizeof(uvdev_info)) 76 + return -EINVAL; 77 + if (copy_to_user(user_buf_arg, &uvdev_info, sizeof(uvdev_info))) 78 + return -EFAULT; 79 + 80 + uv_ioctl->uv_rc = UVC_RC_EXECUTED; 81 + return 0; 82 + } 83 + 35 84 static int uvio_build_uvcb_attest(struct uv_cb_attest *uvcb_attest, u8 *arcb, 36 85 u8 *meas, u8 *add_data, struct uvio_attest *uvio_attest) 37 86 { ··· 234 185 return ret; 235 186 } 236 187 237 - static int uvio_copy_and_check_ioctl(struct uvio_ioctl_cb *ioctl, void __user *argp) 188 + /** uvio_add_secret() - perform an Add Secret UVC 189 + * 190 + * @uv_ioctl: ioctl control block 191 + * 192 + * uvio_add_secret() performs the Add Secret Ultravisor Call. 193 + * 194 + * The given userspace argument address and size are verified to be 195 + * valid but every other check is made by the Ultravisor 196 + * (UV). Therefore UV errors won't result in a negative return 197 + * value. The request is then copied to kernelspace, the UV-call is 198 + * performed and the results are copied back to userspace. 199 + * 200 + * The argument has to point to an Add Secret Request Control Block 201 + * which is an encrypted and cryptographically verified request that 202 + * inserts a protected guest's secrets into the Ultravisor for later 203 + * use. 204 + * 205 + * If the Add Secret UV facility is not present, UV will return 206 + * invalid command rc. This won't be fenced in the driver and does not 207 + * result in a negative return value. 208 + * 209 + * Context: might sleep 210 + * 211 + * Return: 0 on success or a negative error code on error. 212 + */ 213 + static int uvio_add_secret(struct uvio_ioctl_cb *uv_ioctl) 238 214 { 215 + void __user *user_buf_arg = (void __user *)uv_ioctl->argument_addr; 216 + struct uv_cb_guest_addr uvcb = { 217 + .header.len = sizeof(uvcb), 218 + .header.cmd = UVC_CMD_ADD_SECRET, 219 + }; 220 + void *asrcb = NULL; 221 + int ret; 222 + 223 + if (uv_ioctl->argument_len > UVIO_ADD_SECRET_MAX_LEN) 224 + return -EINVAL; 225 + if (uv_ioctl->argument_len == 0) 226 + return -EINVAL; 227 + 228 + asrcb = kvzalloc(uv_ioctl->argument_len, GFP_KERNEL); 229 + if (!asrcb) 230 + return -ENOMEM; 231 + 232 + ret = -EFAULT; 233 + if (copy_from_user(asrcb, user_buf_arg, uv_ioctl->argument_len)) 234 + goto out; 235 + 236 + ret = 0; 237 + uvcb.addr = (u64)asrcb; 238 + uv_call_sched(0, (u64)&uvcb); 239 + uv_ioctl->uv_rc = uvcb.header.rc; 240 + uv_ioctl->uv_rrc = uvcb.header.rrc; 241 + 242 + out: 243 + kvfree(asrcb); 244 + return ret; 245 + } 246 + 247 + /** uvio_list_secrets() - perform a List Secret UVC 248 + * @uv_ioctl: ioctl control block 249 + * 250 + * uvio_list_secrets() performs the List Secret Ultravisor Call. It verifies 251 + * that the given userspace argument address is valid and its size is sane. 252 + * Every other check is made by the Ultravisor (UV) and won't result in a 253 + * negative return value. It builds the request, performs the UV-call, and 254 + * copies the result to userspace. 255 + * 256 + * The argument specifies the location for the result of the UV-Call. 257 + * 258 + * If the List Secrets UV facility is not present, UV will return invalid 259 + * command rc. This won't be fenced in the driver and does not result in a 260 + * negative return value. 261 + * 262 + * Context: might sleep 263 + * 264 + * Return: 0 on success or a negative error code on error. 265 + */ 266 + static int uvio_list_secrets(struct uvio_ioctl_cb *uv_ioctl) 267 + { 268 + void __user *user_buf_arg = (void __user *)uv_ioctl->argument_addr; 269 + struct uv_cb_guest_addr uvcb = { 270 + .header.len = sizeof(uvcb), 271 + .header.cmd = UVC_CMD_LIST_SECRETS, 272 + }; 273 + void *secrets = NULL; 274 + int ret = 0; 275 + 276 + if (uv_ioctl->argument_len != UVIO_LIST_SECRETS_LEN) 277 + return -EINVAL; 278 + 279 + secrets = kvzalloc(UVIO_LIST_SECRETS_LEN, GFP_KERNEL); 280 + if (!secrets) 281 + return -ENOMEM; 282 + 283 + uvcb.addr = (u64)secrets; 284 + uv_call_sched(0, (u64)&uvcb); 285 + uv_ioctl->uv_rc = uvcb.header.rc; 286 + uv_ioctl->uv_rrc = uvcb.header.rrc; 287 + 288 + if (copy_to_user(user_buf_arg, secrets, UVIO_LIST_SECRETS_LEN)) 289 + ret = -EFAULT; 290 + 291 + kvfree(secrets); 292 + return ret; 293 + } 294 + 295 + /** uvio_lock_secrets() - perform a Lock Secret Store UVC 296 + * @uv_ioctl: ioctl control block 297 + * 298 + * uvio_lock_secrets() performs the Lock Secret Store Ultravisor Call. It 299 + * performs the UV-call and copies the return codes to the ioctl control block. 300 + * After this call was dispatched successfully every following Add Secret UVC 301 + * and Lock Secrets UVC will fail with return code 0x102. 302 + * 303 + * The argument address and size must be 0. 304 + * 305 + * If the Lock Secrets UV facility is not present, UV will return invalid 306 + * command rc. This won't be fenced in the driver and does not result in a 307 + * negative return value. 308 + * 309 + * Context: might sleep 310 + * 311 + * Return: 0 on success or a negative error code on error. 312 + */ 313 + static int uvio_lock_secrets(struct uvio_ioctl_cb *ioctl) 314 + { 315 + struct uv_cb_nodata uvcb = { 316 + .header.len = sizeof(uvcb), 317 + .header.cmd = UVC_CMD_LOCK_SECRETS, 318 + }; 319 + 320 + if (ioctl->argument_addr || ioctl->argument_len) 321 + return -EINVAL; 322 + 323 + uv_call(0, (u64)&uvcb); 324 + ioctl->uv_rc = uvcb.header.rc; 325 + ioctl->uv_rrc = uvcb.header.rrc; 326 + 327 + return 0; 328 + } 329 + 330 + static int uvio_copy_and_check_ioctl(struct uvio_ioctl_cb *ioctl, void __user *argp, 331 + unsigned long cmd) 332 + { 333 + u8 nr = _IOC_NR(cmd); 334 + 335 + if (_IOC_DIR(cmd) != (_IOC_READ | _IOC_WRITE)) 336 + return -ENOIOCTLCMD; 337 + if (_IOC_TYPE(cmd) != UVIO_TYPE_UVC) 338 + return -ENOIOCTLCMD; 339 + if (nr >= UVIO_IOCTL_NUM_IOCTLS) 340 + return -ENOIOCTLCMD; 341 + if (_IOC_SIZE(cmd) != sizeof(*ioctl)) 342 + return -ENOIOCTLCMD; 239 343 if (copy_from_user(ioctl, argp, sizeof(*ioctl))) 240 344 return -EFAULT; 241 345 if (ioctl->flags != 0) ··· 396 194 if (memchr_inv(ioctl->reserved14, 0, sizeof(ioctl->reserved14))) 397 195 return -EINVAL; 398 196 399 - return 0; 197 + return nr; 400 198 } 401 199 402 200 /* ··· 407 205 void __user *argp = (void __user *)arg; 408 206 struct uvio_ioctl_cb uv_ioctl = { }; 409 207 long ret; 208 + int nr; 410 209 411 - switch (cmd) { 412 - case UVIO_IOCTL_ATT: 413 - ret = uvio_copy_and_check_ioctl(&uv_ioctl, argp); 414 - if (ret) 415 - return ret; 210 + nr = uvio_copy_and_check_ioctl(&uv_ioctl, argp, cmd); 211 + if (nr < 0) 212 + return nr; 213 + 214 + switch (nr) { 215 + case UVIO_IOCTL_UVDEV_INFO_NR: 216 + ret = uvio_uvdev_info(&uv_ioctl); 217 + break; 218 + case UVIO_IOCTL_ATT_NR: 416 219 ret = uvio_attestation(&uv_ioctl); 220 + break; 221 + case UVIO_IOCTL_ADD_SECRET_NR: 222 + ret = uvio_add_secret(&uv_ioctl); 223 + break; 224 + case UVIO_IOCTL_LIST_SECRETS_NR: 225 + ret = uvio_list_secrets(&uv_ioctl); 226 + break; 227 + case UVIO_IOCTL_LOCK_SECRETS_NR: 228 + ret = uvio_lock_secrets(&uv_ioctl); 417 229 break; 418 230 default: 419 231 ret = -ENOIOCTLCMD; ··· 461 245 462 246 static int __init uvio_dev_init(void) 463 247 { 248 + set_supp_uv_cmds((unsigned long *)&uvdev_info.supp_uv_cmds); 464 249 return misc_register(&uvio_dev_miscdev); 465 250 } 466 251

+1

tools/testing/selftests/kvm/Makefile

··· 164 164 TEST_GEN_PROGS_s390x += s390x/resets 165 165 TEST_GEN_PROGS_s390x += s390x/sync_regs_test 166 166 TEST_GEN_PROGS_s390x += s390x/tprot 167 + TEST_GEN_PROGS_s390x += s390x/cmma_test 167 168 TEST_GEN_PROGS_s390x += demand_paging_test 168 169 TEST_GEN_PROGS_s390x += dirty_log_test 169 170 TEST_GEN_PROGS_s390x += kvm_create_max_vcpus

+700

tools/testing/selftests/kvm/s390x/cmma_test.c

··· 1 + // SPDX-License-Identifier: GPL-2.0-only 2 + /* 3 + * Test for s390x CMMA migration 4 + * 5 + * Copyright IBM Corp. 2023 6 + * 7 + * Authors: 8 + * Nico Boehr <nrb@linux.ibm.com> 9 + */ 10 + 11 + #define _GNU_SOURCE /* for program_invocation_short_name */ 12 + #include <fcntl.h> 13 + #include <stdio.h> 14 + #include <stdlib.h> 15 + #include <string.h> 16 + #include <sys/ioctl.h> 17 + 18 + #include "test_util.h" 19 + #include "kvm_util.h" 20 + #include "kselftest.h" 21 + 22 + #define MAIN_PAGE_COUNT 512 23 + 24 + #define TEST_DATA_PAGE_COUNT 512 25 + #define TEST_DATA_MEMSLOT 1 26 + #define TEST_DATA_START_GFN 4096 27 + 28 + #define TEST_DATA_TWO_PAGE_COUNT 256 29 + #define TEST_DATA_TWO_MEMSLOT 2 30 + #define TEST_DATA_TWO_START_GFN 8192 31 + 32 + static char cmma_value_buf[MAIN_PAGE_COUNT + TEST_DATA_PAGE_COUNT]; 33 + 34 + /** 35 + * Dirty CMMA attributes of exactly one page in the TEST_DATA memslot, 36 + * so use_cmma goes on and the CMMA related ioctls do something. 37 + */ 38 + static void guest_do_one_essa(void) 39 + { 40 + asm volatile( 41 + /* load TEST_DATA_START_GFN into r1 */ 42 + " llilf 1,%[start_gfn]\n" 43 + /* calculate the address from the gfn */ 44 + " sllg 1,1,12(0)\n" 45 + /* set the first page in TEST_DATA memslot to STABLE */ 46 + " .insn rrf,0xb9ab0000,2,1,1,0\n" 47 + /* hypercall */ 48 + " diag 0,0,0x501\n" 49 + "0: j 0b" 50 + : 51 + : [start_gfn] "L"(TEST_DATA_START_GFN) 52 + : "r1", "r2", "memory", "cc" 53 + ); 54 + } 55 + 56 + /** 57 + * Touch CMMA attributes of all pages in TEST_DATA memslot. Set them to stable 58 + * state. 59 + */ 60 + static void guest_dirty_test_data(void) 61 + { 62 + asm volatile( 63 + /* r1 = TEST_DATA_START_GFN */ 64 + " xgr 1,1\n" 65 + " llilf 1,%[start_gfn]\n" 66 + /* r5 = TEST_DATA_PAGE_COUNT */ 67 + " lghi 5,%[page_count]\n" 68 + /* r5 += r1 */ 69 + "2: agfr 5,1\n" 70 + /* r2 = r1 << 12 */ 71 + "1: sllg 2,1,12(0)\n" 72 + /* essa(r4, r2, SET_STABLE) */ 73 + " .insn rrf,0xb9ab0000,4,2,1,0\n" 74 + /* i++ */ 75 + " agfi 1,1\n" 76 + /* if r1 < r5 goto 1 */ 77 + " cgrjl 1,5,1b\n" 78 + /* hypercall */ 79 + " diag 0,0,0x501\n" 80 + "0: j 0b" 81 + : 82 + : [start_gfn] "L"(TEST_DATA_START_GFN), 83 + [page_count] "L"(TEST_DATA_PAGE_COUNT) 84 + : 85 + /* the counter in our loop over the pages */ 86 + "r1", 87 + /* the calculated page physical address */ 88 + "r2", 89 + /* ESSA output register */ 90 + "r4", 91 + /* last page */ 92 + "r5", 93 + "cc", "memory" 94 + ); 95 + } 96 + 97 + static struct kvm_vm *create_vm(void) 98 + { 99 + return ____vm_create(VM_MODE_DEFAULT); 100 + } 101 + 102 + static void create_main_memslot(struct kvm_vm *vm) 103 + { 104 + int i; 105 + 106 + vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS, 0, 0, MAIN_PAGE_COUNT, 0); 107 + /* set the array of memslots to zero like __vm_create does */ 108 + for (i = 0; i < NR_MEM_REGIONS; i++) 109 + vm->memslots[i] = 0; 110 + } 111 + 112 + static void create_test_memslot(struct kvm_vm *vm) 113 + { 114 + vm_userspace_mem_region_add(vm, 115 + VM_MEM_SRC_ANONYMOUS, 116 + TEST_DATA_START_GFN << vm->page_shift, 117 + TEST_DATA_MEMSLOT, 118 + TEST_DATA_PAGE_COUNT, 119 + 0 120 + ); 121 + vm->memslots[MEM_REGION_TEST_DATA] = TEST_DATA_MEMSLOT; 122 + } 123 + 124 + static void create_memslots(struct kvm_vm *vm) 125 + { 126 + /* 127 + * Our VM has the following memory layout: 128 + * +------+---------------------------+ 129 + * | GFN | Memslot | 130 + * +------+---------------------------+ 131 + * | 0 | | 132 + * | ... | MAIN (Code, Stack, ...) | 133 + * | 511 | | 134 + * +------+---------------------------+ 135 + * | 4096 | | 136 + * | ... | TEST_DATA | 137 + * | 4607 | | 138 + * +------+---------------------------+ 139 + */ 140 + create_main_memslot(vm); 141 + create_test_memslot(vm); 142 + } 143 + 144 + static void finish_vm_setup(struct kvm_vm *vm) 145 + { 146 + struct userspace_mem_region *slot0; 147 + 148 + kvm_vm_elf_load(vm, program_invocation_name); 149 + 150 + slot0 = memslot2region(vm, 0); 151 + ucall_init(vm, slot0->region.guest_phys_addr + slot0->region.memory_size); 152 + 153 + kvm_arch_vm_post_create(vm); 154 + } 155 + 156 + static struct kvm_vm *create_vm_two_memslots(void) 157 + { 158 + struct kvm_vm *vm; 159 + 160 + vm = create_vm(); 161 + 162 + create_memslots(vm); 163 + 164 + finish_vm_setup(vm); 165 + 166 + return vm; 167 + } 168 + 169 + static void enable_cmma(struct kvm_vm *vm) 170 + { 171 + int r; 172 + 173 + r = __kvm_device_attr_set(vm->fd, KVM_S390_VM_MEM_CTRL, KVM_S390_VM_MEM_ENABLE_CMMA, NULL); 174 + TEST_ASSERT(!r, "enabling cmma failed r=%d errno=%d", r, errno); 175 + } 176 + 177 + static void enable_dirty_tracking(struct kvm_vm *vm) 178 + { 179 + vm_mem_region_set_flags(vm, 0, KVM_MEM_LOG_DIRTY_PAGES); 180 + vm_mem_region_set_flags(vm, TEST_DATA_MEMSLOT, KVM_MEM_LOG_DIRTY_PAGES); 181 + } 182 + 183 + static int __enable_migration_mode(struct kvm_vm *vm) 184 + { 185 + return __kvm_device_attr_set(vm->fd, 186 + KVM_S390_VM_MIGRATION, 187 + KVM_S390_VM_MIGRATION_START, 188 + NULL 189 + ); 190 + } 191 + 192 + static void enable_migration_mode(struct kvm_vm *vm) 193 + { 194 + int r = __enable_migration_mode(vm); 195 + 196 + TEST_ASSERT(!r, "enabling migration mode failed r=%d errno=%d", r, errno); 197 + } 198 + 199 + static bool is_migration_mode_on(struct kvm_vm *vm) 200 + { 201 + u64 out; 202 + int r; 203 + 204 + r = __kvm_device_attr_get(vm->fd, 205 + KVM_S390_VM_MIGRATION, 206 + KVM_S390_VM_MIGRATION_STATUS, 207 + &out 208 + ); 209 + TEST_ASSERT(!r, "getting migration mode status failed r=%d errno=%d", r, errno); 210 + return out; 211 + } 212 + 213 + static int vm_get_cmma_bits(struct kvm_vm *vm, u64 flags, int *errno_out) 214 + { 215 + struct kvm_s390_cmma_log args; 216 + int rc; 217 + 218 + errno = 0; 219 + 220 + args = (struct kvm_s390_cmma_log){ 221 + .start_gfn = 0, 222 + .count = sizeof(cmma_value_buf), 223 + .flags = flags, 224 + .values = (__u64)&cmma_value_buf[0] 225 + }; 226 + rc = __vm_ioctl(vm, KVM_S390_GET_CMMA_BITS, &args); 227 + 228 + *errno_out = errno; 229 + return rc; 230 + } 231 + 232 + static void test_get_cmma_basic(void) 233 + { 234 + struct kvm_vm *vm = create_vm_two_memslots(); 235 + struct kvm_vcpu *vcpu; 236 + int rc, errno_out; 237 + 238 + /* GET_CMMA_BITS without CMMA enabled should fail */ 239 + rc = vm_get_cmma_bits(vm, 0, &errno_out); 240 + ASSERT_EQ(rc, -1); 241 + ASSERT_EQ(errno_out, ENXIO); 242 + 243 + enable_cmma(vm); 244 + vcpu = vm_vcpu_add(vm, 1, guest_do_one_essa); 245 + 246 + vcpu_run(vcpu); 247 + 248 + /* GET_CMMA_BITS without migration mode and without peeking should fail */ 249 + rc = vm_get_cmma_bits(vm, 0, &errno_out); 250 + ASSERT_EQ(rc, -1); 251 + ASSERT_EQ(errno_out, EINVAL); 252 + 253 + /* GET_CMMA_BITS without migration mode and with peeking should work */ 254 + rc = vm_get_cmma_bits(vm, KVM_S390_CMMA_PEEK, &errno_out); 255 + ASSERT_EQ(rc, 0); 256 + ASSERT_EQ(errno_out, 0); 257 + 258 + enable_dirty_tracking(vm); 259 + enable_migration_mode(vm); 260 + 261 + /* GET_CMMA_BITS with invalid flags */ 262 + rc = vm_get_cmma_bits(vm, 0xfeedc0fe, &errno_out); 263 + ASSERT_EQ(rc, -1); 264 + ASSERT_EQ(errno_out, EINVAL); 265 + 266 + kvm_vm_free(vm); 267 + } 268 + 269 + static void assert_exit_was_hypercall(struct kvm_vcpu *vcpu) 270 + { 271 + ASSERT_EQ(vcpu->run->exit_reason, 13); 272 + ASSERT_EQ(vcpu->run->s390_sieic.icptcode, 4); 273 + ASSERT_EQ(vcpu->run->s390_sieic.ipa, 0x8300); 274 + ASSERT_EQ(vcpu->run->s390_sieic.ipb, 0x5010000); 275 + } 276 + 277 + static void test_migration_mode(void) 278 + { 279 + struct kvm_vm *vm = create_vm(); 280 + struct kvm_vcpu *vcpu; 281 + u64 orig_psw; 282 + int rc; 283 + 284 + /* enabling migration mode on a VM without memory should fail */ 285 + rc = __enable_migration_mode(vm); 286 + ASSERT_EQ(rc, -1); 287 + ASSERT_EQ(errno, EINVAL); 288 + TEST_ASSERT(!is_migration_mode_on(vm), "migration mode should still be off"); 289 + errno = 0; 290 + 291 + create_memslots(vm); 292 + finish_vm_setup(vm); 293 + 294 + enable_cmma(vm); 295 + vcpu = vm_vcpu_add(vm, 1, guest_do_one_essa); 296 + orig_psw = vcpu->run->psw_addr; 297 + 298 + /* 299 + * Execute one essa instruction in the guest. Otherwise the guest will 300 + * not have use_cmm enabled and GET_CMMA_BITS will return no pages. 301 + */ 302 + vcpu_run(vcpu); 303 + assert_exit_was_hypercall(vcpu); 304 + 305 + /* migration mode when memslots have dirty tracking off should fail */ 306 + rc = __enable_migration_mode(vm); 307 + ASSERT_EQ(rc, -1); 308 + ASSERT_EQ(errno, EINVAL); 309 + TEST_ASSERT(!is_migration_mode_on(vm), "migration mode should still be off"); 310 + errno = 0; 311 + 312 + /* enable dirty tracking */ 313 + enable_dirty_tracking(vm); 314 + 315 + /* enabling migration mode should work now */ 316 + rc = __enable_migration_mode(vm); 317 + ASSERT_EQ(rc, 0); 318 + TEST_ASSERT(is_migration_mode_on(vm), "migration mode should be on"); 319 + errno = 0; 320 + 321 + /* execute another ESSA instruction to see this goes fine */ 322 + vcpu->run->psw_addr = orig_psw; 323 + vcpu_run(vcpu); 324 + assert_exit_was_hypercall(vcpu); 325 + 326 + /* 327 + * With migration mode on, create a new memslot with dirty tracking off. 328 + * This should turn off migration mode. 329 + */ 330 + TEST_ASSERT(is_migration_mode_on(vm), "migration mode should be on"); 331 + vm_userspace_mem_region_add(vm, 332 + VM_MEM_SRC_ANONYMOUS, 333 + TEST_DATA_TWO_START_GFN << vm->page_shift, 334 + TEST_DATA_TWO_MEMSLOT, 335 + TEST_DATA_TWO_PAGE_COUNT, 336 + 0 337 + ); 338 + TEST_ASSERT(!is_migration_mode_on(vm), 339 + "creating memslot without dirty tracking turns off migration mode" 340 + ); 341 + 342 + /* ESSA instructions should still execute fine */ 343 + vcpu->run->psw_addr = orig_psw; 344 + vcpu_run(vcpu); 345 + assert_exit_was_hypercall(vcpu); 346 + 347 + /* 348 + * Turn on dirty tracking on the new memslot. 349 + * It should be possible to turn migration mode back on again. 350 + */ 351 + vm_mem_region_set_flags(vm, TEST_DATA_TWO_MEMSLOT, KVM_MEM_LOG_DIRTY_PAGES); 352 + rc = __enable_migration_mode(vm); 353 + ASSERT_EQ(rc, 0); 354 + TEST_ASSERT(is_migration_mode_on(vm), "migration mode should be on"); 355 + errno = 0; 356 + 357 + /* 358 + * Turn off dirty tracking again, this time with just a flag change. 359 + * Again, migration mode should turn off. 360 + */ 361 + TEST_ASSERT(is_migration_mode_on(vm), "migration mode should be on"); 362 + vm_mem_region_set_flags(vm, TEST_DATA_TWO_MEMSLOT, 0); 363 + TEST_ASSERT(!is_migration_mode_on(vm), 364 + "disabling dirty tracking should turn off migration mode" 365 + ); 366 + 367 + /* ESSA instructions should still execute fine */ 368 + vcpu->run->psw_addr = orig_psw; 369 + vcpu_run(vcpu); 370 + assert_exit_was_hypercall(vcpu); 371 + 372 + kvm_vm_free(vm); 373 + } 374 + 375 + /** 376 + * Given a VM with the MAIN and TEST_DATA memslot, assert that both slots have 377 + * CMMA attributes of all pages in both memslots and nothing more dirty. 378 + * This has the useful side effect of ensuring nothing is CMMA dirty after this 379 + * function. 380 + */ 381 + static void assert_all_slots_cmma_dirty(struct kvm_vm *vm) 382 + { 383 + struct kvm_s390_cmma_log args; 384 + 385 + /* 386 + * First iteration - everything should be dirty. 387 + * Start at the main memslot... 388 + */ 389 + args = (struct kvm_s390_cmma_log){ 390 + .start_gfn = 0, 391 + .count = sizeof(cmma_value_buf), 392 + .flags = 0, 393 + .values = (__u64)&cmma_value_buf[0] 394 + }; 395 + memset(cmma_value_buf, 0xff, sizeof(cmma_value_buf)); 396 + vm_ioctl(vm, KVM_S390_GET_CMMA_BITS, &args); 397 + ASSERT_EQ(args.count, MAIN_PAGE_COUNT); 398 + ASSERT_EQ(args.remaining, TEST_DATA_PAGE_COUNT); 399 + ASSERT_EQ(args.start_gfn, 0); 400 + 401 + /* ...and then - after a hole - the TEST_DATA memslot should follow */ 402 + args = (struct kvm_s390_cmma_log){ 403 + .start_gfn = MAIN_PAGE_COUNT, 404 + .count = sizeof(cmma_value_buf), 405 + .flags = 0, 406 + .values = (__u64)&cmma_value_buf[0] 407 + }; 408 + memset(cmma_value_buf, 0xff, sizeof(cmma_value_buf)); 409 + vm_ioctl(vm, KVM_S390_GET_CMMA_BITS, &args); 410 + ASSERT_EQ(args.count, TEST_DATA_PAGE_COUNT); 411 + ASSERT_EQ(args.start_gfn, TEST_DATA_START_GFN); 412 + ASSERT_EQ(args.remaining, 0); 413 + 414 + /* ...and nothing else should be there */ 415 + args = (struct kvm_s390_cmma_log){ 416 + .start_gfn = TEST_DATA_START_GFN + TEST_DATA_PAGE_COUNT, 417 + .count = sizeof(cmma_value_buf), 418 + .flags = 0, 419 + .values = (__u64)&cmma_value_buf[0] 420 + }; 421 + memset(cmma_value_buf, 0xff, sizeof(cmma_value_buf)); 422 + vm_ioctl(vm, KVM_S390_GET_CMMA_BITS, &args); 423 + ASSERT_EQ(args.count, 0); 424 + ASSERT_EQ(args.start_gfn, 0); 425 + ASSERT_EQ(args.remaining, 0); 426 + } 427 + 428 + /** 429 + * Given a VM, assert no pages are CMMA dirty. 430 + */ 431 + static void assert_no_pages_cmma_dirty(struct kvm_vm *vm) 432 + { 433 + struct kvm_s390_cmma_log args; 434 + 435 + /* If we start from GFN 0 again, nothing should be dirty. */ 436 + args = (struct kvm_s390_cmma_log){ 437 + .start_gfn = 0, 438 + .count = sizeof(cmma_value_buf), 439 + .flags = 0, 440 + .values = (__u64)&cmma_value_buf[0] 441 + }; 442 + memset(cmma_value_buf, 0xff, sizeof(cmma_value_buf)); 443 + vm_ioctl(vm, KVM_S390_GET_CMMA_BITS, &args); 444 + if (args.count || args.remaining || args.start_gfn) 445 + TEST_FAIL("pages are still dirty start_gfn=0x%llx count=%u remaining=%llu", 446 + args.start_gfn, 447 + args.count, 448 + args.remaining 449 + ); 450 + } 451 + 452 + static void test_get_inital_dirty(void) 453 + { 454 + struct kvm_vm *vm = create_vm_two_memslots(); 455 + struct kvm_vcpu *vcpu; 456 + 457 + enable_cmma(vm); 458 + vcpu = vm_vcpu_add(vm, 1, guest_do_one_essa); 459 + 460 + /* 461 + * Execute one essa instruction in the guest. Otherwise the guest will 462 + * not have use_cmm enabled and GET_CMMA_BITS will return no pages. 463 + */ 464 + vcpu_run(vcpu); 465 + assert_exit_was_hypercall(vcpu); 466 + 467 + enable_dirty_tracking(vm); 468 + enable_migration_mode(vm); 469 + 470 + assert_all_slots_cmma_dirty(vm); 471 + 472 + /* Start from the beginning again and make sure nothing else is dirty */ 473 + assert_no_pages_cmma_dirty(vm); 474 + 475 + kvm_vm_free(vm); 476 + } 477 + 478 + static void query_cmma_range(struct kvm_vm *vm, 479 + u64 start_gfn, u64 gfn_count, 480 + struct kvm_s390_cmma_log *res_out) 481 + { 482 + *res_out = (struct kvm_s390_cmma_log){ 483 + .start_gfn = start_gfn, 484 + .count = gfn_count, 485 + .flags = 0, 486 + .values = (__u64)&cmma_value_buf[0] 487 + }; 488 + memset(cmma_value_buf, 0xff, sizeof(cmma_value_buf)); 489 + vm_ioctl(vm, KVM_S390_GET_CMMA_BITS, res_out); 490 + } 491 + 492 + /** 493 + * Assert the given cmma_log struct that was executed by query_cmma_range() 494 + * indicates the first dirty gfn is at first_dirty_gfn and contains exactly 495 + * dirty_gfn_count CMMA values. 496 + */ 497 + static void assert_cmma_dirty(u64 first_dirty_gfn, 498 + u64 dirty_gfn_count, 499 + const struct kvm_s390_cmma_log *res) 500 + { 501 + ASSERT_EQ(res->start_gfn, first_dirty_gfn); 502 + ASSERT_EQ(res->count, dirty_gfn_count); 503 + for (size_t i = 0; i < dirty_gfn_count; i++) 504 + ASSERT_EQ(cmma_value_buf[0], 0x0); /* stable state */ 505 + ASSERT_EQ(cmma_value_buf[dirty_gfn_count], 0xff); /* not touched */ 506 + } 507 + 508 + static void test_get_skip_holes(void) 509 + { 510 + size_t gfn_offset; 511 + struct kvm_vm *vm = create_vm_two_memslots(); 512 + struct kvm_s390_cmma_log log; 513 + struct kvm_vcpu *vcpu; 514 + u64 orig_psw; 515 + 516 + enable_cmma(vm); 517 + vcpu = vm_vcpu_add(vm, 1, guest_dirty_test_data); 518 + 519 + orig_psw = vcpu->run->psw_addr; 520 + 521 + /* 522 + * Execute some essa instructions in the guest. Otherwise the guest will 523 + * not have use_cmm enabled and GET_CMMA_BITS will return no pages. 524 + */ 525 + vcpu_run(vcpu); 526 + assert_exit_was_hypercall(vcpu); 527 + 528 + enable_dirty_tracking(vm); 529 + enable_migration_mode(vm); 530 + 531 + /* un-dirty all pages */ 532 + assert_all_slots_cmma_dirty(vm); 533 + 534 + /* Then, dirty just the TEST_DATA memslot */ 535 + vcpu->run->psw_addr = orig_psw; 536 + vcpu_run(vcpu); 537 + 538 + gfn_offset = TEST_DATA_START_GFN; 539 + /** 540 + * Query CMMA attributes of one page, starting at page 0. Since the 541 + * main memslot was not touched by the VM, this should yield the first 542 + * page of the TEST_DATA memslot. 543 + * The dirty bitmap should now look like this: 544 + * 0: not dirty 545 + * [0x1, 0x200): dirty 546 + */ 547 + query_cmma_range(vm, 0, 1, &log); 548 + assert_cmma_dirty(gfn_offset, 1, &log); 549 + gfn_offset++; 550 + 551 + /** 552 + * Query CMMA attributes of 32 (0x20) pages past the end of the TEST_DATA 553 + * memslot. This should wrap back to the beginning of the TEST_DATA 554 + * memslot, page 1. 555 + * The dirty bitmap should now look like this: 556 + * [0, 0x21): not dirty 557 + * [0x21, 0x200): dirty 558 + */ 559 + query_cmma_range(vm, TEST_DATA_START_GFN + TEST_DATA_PAGE_COUNT, 0x20, &log); 560 + assert_cmma_dirty(gfn_offset, 0x20, &log); 561 + gfn_offset += 0x20; 562 + 563 + /* Skip 32 pages */ 564 + gfn_offset += 0x20; 565 + 566 + /** 567 + * After skipping 32 pages, query the next 32 (0x20) pages. 568 + * The dirty bitmap should now look like this: 569 + * [0, 0x21): not dirty 570 + * [0x21, 0x41): dirty 571 + * [0x41, 0x61): not dirty 572 + * [0x61, 0x200): dirty 573 + */ 574 + query_cmma_range(vm, gfn_offset, 0x20, &log); 575 + assert_cmma_dirty(gfn_offset, 0x20, &log); 576 + gfn_offset += 0x20; 577 + 578 + /** 579 + * Query 1 page from the beginning of the TEST_DATA memslot. This should 580 + * yield page 0x21. 581 + * The dirty bitmap should now look like this: 582 + * [0, 0x22): not dirty 583 + * [0x22, 0x41): dirty 584 + * [0x41, 0x61): not dirty 585 + * [0x61, 0x200): dirty 586 + */ 587 + query_cmma_range(vm, TEST_DATA_START_GFN, 1, &log); 588 + assert_cmma_dirty(TEST_DATA_START_GFN + 0x21, 1, &log); 589 + gfn_offset++; 590 + 591 + /** 592 + * Query 15 (0xF) pages from page 0x23 in TEST_DATA memslot. 593 + * This should yield pages [0x23, 0x33). 594 + * The dirty bitmap should now look like this: 595 + * [0, 0x22): not dirty 596 + * 0x22: dirty 597 + * [0x23, 0x33): not dirty 598 + * [0x33, 0x41): dirty 599 + * [0x41, 0x61): not dirty 600 + * [0x61, 0x200): dirty 601 + */ 602 + gfn_offset = TEST_DATA_START_GFN + 0x23; 603 + query_cmma_range(vm, gfn_offset, 15, &log); 604 + assert_cmma_dirty(gfn_offset, 15, &log); 605 + 606 + /** 607 + * Query 17 (0x11) pages from page 0x22 in TEST_DATA memslot. 608 + * This should yield page [0x22, 0x33) 609 + * The dirty bitmap should now look like this: 610 + * [0, 0x33): not dirty 611 + * [0x33, 0x41): dirty 612 + * [0x41, 0x61): not dirty 613 + * [0x61, 0x200): dirty 614 + */ 615 + gfn_offset = TEST_DATA_START_GFN + 0x22; 616 + query_cmma_range(vm, gfn_offset, 17, &log); 617 + assert_cmma_dirty(gfn_offset, 17, &log); 618 + 619 + /** 620 + * Query 25 (0x19) pages from page 0x40 in TEST_DATA memslot. 621 + * This should yield page 0x40 and nothing more, since there are more 622 + * than 16 non-dirty pages after page 0x40. 623 + * The dirty bitmap should now look like this: 624 + * [0, 0x33): not dirty 625 + * [0x33, 0x40): dirty 626 + * [0x40, 0x61): not dirty 627 + * [0x61, 0x200): dirty 628 + */ 629 + gfn_offset = TEST_DATA_START_GFN + 0x40; 630 + query_cmma_range(vm, gfn_offset, 25, &log); 631 + assert_cmma_dirty(gfn_offset, 1, &log); 632 + 633 + /** 634 + * Query pages [0x33, 0x40). 635 + * The dirty bitmap should now look like this: 636 + * [0, 0x61): not dirty 637 + * [0x61, 0x200): dirty 638 + */ 639 + gfn_offset = TEST_DATA_START_GFN + 0x33; 640 + query_cmma_range(vm, gfn_offset, 0x40 - 0x33, &log); 641 + assert_cmma_dirty(gfn_offset, 0x40 - 0x33, &log); 642 + 643 + /** 644 + * Query the remaining pages [0x61, 0x200). 645 + */ 646 + gfn_offset = TEST_DATA_START_GFN; 647 + query_cmma_range(vm, gfn_offset, TEST_DATA_PAGE_COUNT - 0x61, &log); 648 + assert_cmma_dirty(TEST_DATA_START_GFN + 0x61, TEST_DATA_PAGE_COUNT - 0x61, &log); 649 + 650 + assert_no_pages_cmma_dirty(vm); 651 + } 652 + 653 + struct testdef { 654 + const char *name; 655 + void (*test)(void); 656 + } testlist[] = { 657 + { "migration mode and dirty tracking", test_migration_mode }, 658 + { "GET_CMMA_BITS: basic calls", test_get_cmma_basic }, 659 + { "GET_CMMA_BITS: all pages are dirty initally", test_get_inital_dirty }, 660 + { "GET_CMMA_BITS: holes are skipped", test_get_skip_holes }, 661 + }; 662 + 663 + /** 664 + * The kernel may support CMMA, but the machine may not (i.e. if running as 665 + * guest-3). 666 + * 667 + * In this case, the CMMA capabilities are all there, but the CMMA-related 668 + * ioctls fail. To find out whether the machine supports CMMA, create a 669 + * temporary VM and then query the CMMA feature of the VM. 670 + */ 671 + static int machine_has_cmma(void) 672 + { 673 + struct kvm_vm *vm = create_vm(); 674 + int r; 675 + 676 + r = !__kvm_has_device_attr(vm->fd, KVM_S390_VM_MEM_CTRL, KVM_S390_VM_MEM_ENABLE_CMMA); 677 + kvm_vm_free(vm); 678 + 679 + return r; 680 + } 681 + 682 + int main(int argc, char *argv[]) 683 + { 684 + int idx; 685 + 686 + TEST_REQUIRE(kvm_has_cap(KVM_CAP_SYNC_REGS)); 687 + TEST_REQUIRE(kvm_has_cap(KVM_CAP_S390_CMMA_MIGRATION)); 688 + TEST_REQUIRE(machine_has_cmma()); 689 + 690 + ksft_print_header(); 691 + 692 + ksft_set_plan(ARRAY_SIZE(testlist)); 693 + 694 + for (idx = 0; idx < ARRAY_SIZE(testlist); idx++) { 695 + testlist[idx].test(); 696 + ksft_test_result_pass("%s\n", testlist[idx].name); 697 + } 698 + 699 + ksft_finished(); /* Print results and exit() accordingly */ 700 + }

Configure Feed

Configure Feed