Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

mshv: Fix deposit memory in MSHV_ROOT_HVCALL

When the MSHV_ROOT_HVCALL ioctl is executing a hypercall, and gets
HV_STATUS_INSUFFICIENT_MEMORY, it deposits memory and then returns
-EAGAIN to userspace. The expectation is that the VMM will retry.

However, some VMM code in the wild doesn't do this and simply fails.
Rather than force the VMM to retry, change the ioctl to deposit
memory on demand and immediately retry the hypercall as is done with
all the other hypercall helper functions.

In addition to making the ioctl easier to use, removing the need for
multiple syscalls improves performance.

There is a complication: unlike the other hypercall helper functions,
in MSHV_ROOT_HVCALL the input is opaque to the kernel. This is
problematic for rep hypercalls, because the next part of the input
list can't be copied on each loop after depositing pages (this was
the original reason for returning -EAGAIN in this case).

Introduce hv_do_rep_hypercall_ex(), which adds a 'rep_start'
parameter. This solves the issue, allowing the deposit loop in
MSHV_ROOT_HVCALL to restart a rep hypercall after depositing pages
partway through.

Fixes: 621191d709b1 ("Drivers: hv: Introduce mshv_root module to expose /dev/mshv to VMMs")
Signed-off-by: Nuno Das Neves <nunodasneves@linux.microsoft.com>
Reviewed-by: Michael Kelley <mhklinux@outlook.com>
Signed-off-by: Wei Liu <wei.liu@kernel.org>

authored by

Nuno Das Neves and committed by
Wei Liu
4cc1aa46 7563d021

+44 -31
+30 -28
drivers/hv/mshv_root_main.c
··· 159 159 unsigned int pages_order; 160 160 void *input_pg = NULL; 161 161 void *output_pg = NULL; 162 + u16 reps_completed; 162 163 163 164 if (copy_from_user(&args, user_args, sizeof(args))) 164 165 return -EFAULT; ··· 211 210 */ 212 211 *(u64 *)input_pg = partition->pt_id; 213 212 214 - if (args.reps) 215 - status = hv_do_rep_hypercall(args.code, args.reps, 0, 216 - input_pg, output_pg); 217 - else 218 - status = hv_do_hypercall(args.code, input_pg, output_pg); 219 - 220 - if (hv_result(status) == HV_STATUS_CALL_PENDING) { 221 - if (is_async) { 222 - mshv_async_hvcall_handler(partition, &status); 223 - } else { /* Paranoia check. This shouldn't happen! */ 224 - ret = -EBADFD; 225 - goto free_pages_out; 213 + reps_completed = 0; 214 + do { 215 + if (args.reps) { 216 + status = hv_do_rep_hypercall_ex(args.code, args.reps, 217 + 0, reps_completed, 218 + input_pg, output_pg); 219 + reps_completed = hv_repcomp(status); 220 + } else { 221 + status = hv_do_hypercall(args.code, input_pg, output_pg); 226 222 } 227 - } 228 223 229 - if (hv_result(status) == HV_STATUS_INSUFFICIENT_MEMORY) { 230 - ret = hv_call_deposit_pages(NUMA_NO_NODE, partition->pt_id, 1); 231 - if (!ret) 232 - ret = -EAGAIN; 233 - } else if (!hv_result_success(status)) { 234 - ret = hv_result_to_errno(status); 235 - } 224 + if (hv_result(status) == HV_STATUS_CALL_PENDING) { 225 + if (is_async) { 226 + mshv_async_hvcall_handler(partition, &status); 227 + } else { /* Paranoia check. This shouldn't happen! */ 228 + ret = -EBADFD; 229 + goto free_pages_out; 230 + } 231 + } 236 232 237 - /* 238 - * Always return the status and output data regardless of result. 239 - * The VMM may need it to determine how to proceed. E.g. the status may 240 - * contain the number of reps completed if a rep hypercall partially 241 - * succeeded. 242 - */ 233 + if (hv_result_success(status)) 234 + break; 235 + 236 + if (hv_result(status) != HV_STATUS_INSUFFICIENT_MEMORY) 237 + ret = hv_result_to_errno(status); 238 + else 239 + ret = hv_call_deposit_pages(NUMA_NO_NODE, 240 + partition->pt_id, 1); 241 + } while (!ret); 242 + 243 243 args.status = hv_result(status); 244 - args.reps = args.reps ? hv_repcomp(status) : 0; 244 + args.reps = reps_completed; 245 245 if (copy_to_user(user_args, &args, sizeof(args))) 246 246 ret = -EFAULT; 247 247 248 - if (output_pg && 248 + if (!ret && output_pg && 249 249 copy_to_user((void __user *)args.out_ptr, output_pg, args.out_sz)) 250 250 ret = -EFAULT; 251 251
+14 -3
include/asm-generic/mshyperv.h
··· 124 124 125 125 /* 126 126 * Rep hypercalls. Callers of this functions are supposed to ensure that 127 - * rep_count and varhead_size comply with Hyper-V hypercall definition. 127 + * rep_count, varhead_size, and rep_start comply with Hyper-V hypercall 128 + * definition. 128 129 */ 129 - static inline u64 hv_do_rep_hypercall(u16 code, u16 rep_count, u16 varhead_size, 130 - void *input, void *output) 130 + static inline u64 hv_do_rep_hypercall_ex(u16 code, u16 rep_count, 131 + u16 varhead_size, u16 rep_start, 132 + void *input, void *output) 131 133 { 132 134 u64 control = code; 133 135 u64 status; ··· 137 135 138 136 control |= (u64)varhead_size << HV_HYPERCALL_VARHEAD_OFFSET; 139 137 control |= (u64)rep_count << HV_HYPERCALL_REP_COMP_OFFSET; 138 + control |= (u64)rep_start << HV_HYPERCALL_REP_START_OFFSET; 140 139 141 140 do { 142 141 status = hv_do_hypercall(control, input, output); ··· 153 150 } while (rep_comp < rep_count); 154 151 155 152 return status; 153 + } 154 + 155 + /* For the typical case where rep_start is 0 */ 156 + static inline u64 hv_do_rep_hypercall(u16 code, u16 rep_count, u16 varhead_size, 157 + void *input, void *output) 158 + { 159 + return hv_do_rep_hypercall_ex(code, rep_count, varhead_size, 0, 160 + input, output); 156 161 } 157 162 158 163 /* Generate the guest OS identifier as described in the Hyper-V TLFS */