Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

mshv: Add support for integrated scheduler

Query the hypervisor for integrated scheduler support and use it if
configured.

Microsoft Hypervisor originally provided two schedulers: root and core. The
root scheduler allows the root partition to schedule guest vCPUs across
physical cores, supporting both time slicing and CPU affinity (e.g., via
cgroups). In contrast, the core scheduler delegates vCPU-to-physical-core
scheduling entirely to the hypervisor.

Direct virtualization introduces a new privileged guest partition type - L1
Virtual Host (L1VH) — which can create child partitions from its own
resources. These child partitions are effectively siblings, scheduled by
the hypervisor's core scheduler. This prevents the L1VH parent from setting
affinity or time slicing for its own processes or guest VPs. While cgroups,
CFS, and cpuset controllers can still be used, their effectiveness is
unpredictable, as the core scheduler swaps vCPUs according to its own logic
(typically round-robin across all allocated physical CPUs). As a result,
the system may appear to "steal" time from the L1VH and its children.

To address this, Microsoft Hypervisor introduces the integrated scheduler.
This allows an L1VH partition to schedule its own vCPUs and those of its
guests across its "physical" cores, effectively emulating root scheduler
behavior within the L1VH, while retaining core scheduler behavior for the
rest of the system.

The integrated scheduler is controlled by the root partition and gated by
the vmm_enable_integrated_scheduler capability bit. If set, the hypervisor
supports the integrated scheduler. The L1VH partition must then check if it
is enabled by querying the corresponding extended partition property. If
this property is true, the L1VH partition must use the root scheduler
logic; otherwise, it must use the core scheduler. This requirement makes
reading VMM capabilities in L1VH partition a requirement too.

Signed-off-by: Andreea Pintilie <anpintil@microsoft.com>
Signed-off-by: Stanislav Kinsburskii <skinsburskii@linux.microsoft.com>
Reviewed-by: Michael Kelley <mhklinux@outlook.com>
Signed-off-by: Wei Liu <wei.liu@kernel.org>

authored by

Stanislav Kinsburskii and committed by
Wei Liu
4bef6b28 05976960

+56 -33
+50 -32
drivers/hv/mshv_root_main.c
··· 2079 2079 }; 2080 2080 } 2081 2081 2082 + static int __init l1vh_retrieve_scheduler_type(enum hv_scheduler_type *out) 2083 + { 2084 + u64 integrated_sched_enabled; 2085 + int ret; 2086 + 2087 + *out = HV_SCHEDULER_TYPE_CORE_SMT; 2088 + 2089 + if (!mshv_root.vmm_caps.vmm_enable_integrated_scheduler) 2090 + return 0; 2091 + 2092 + ret = hv_call_get_partition_property_ex(HV_PARTITION_ID_SELF, 2093 + HV_PARTITION_PROPERTY_INTEGRATED_SCHEDULER_ENABLED, 2094 + 0, &integrated_sched_enabled, 2095 + sizeof(integrated_sched_enabled)); 2096 + if (ret) 2097 + return ret; 2098 + 2099 + if (integrated_sched_enabled) 2100 + *out = HV_SCHEDULER_TYPE_ROOT; 2101 + 2102 + return 0; 2103 + } 2104 + 2082 2105 /* TODO move this to hv_common.c when needed outside */ 2083 2106 static int __init hv_retrieve_scheduler_type(enum hv_scheduler_type *out) 2084 2107 { ··· 2134 2111 /* Retrieve and stash the supported scheduler type */ 2135 2112 static int __init mshv_retrieve_scheduler_type(struct device *dev) 2136 2113 { 2137 - int ret = 0; 2114 + int ret; 2138 2115 2139 2116 if (hv_l1vh_partition()) 2140 - hv_scheduler_type = HV_SCHEDULER_TYPE_CORE_SMT; 2117 + ret = l1vh_retrieve_scheduler_type(&hv_scheduler_type); 2141 2118 else 2142 2119 ret = hv_retrieve_scheduler_type(&hv_scheduler_type); 2143 - 2144 2120 if (ret) 2145 2121 return ret; 2146 2122 ··· 2259 2237 static void mshv_root_partition_exit(void) 2260 2238 { 2261 2239 unregister_reboot_notifier(&mshv_reboot_nb); 2262 - root_scheduler_deinit(); 2263 2240 } 2264 2241 2265 2242 static int __init mshv_root_partition_init(struct device *dev) 2266 2243 { 2267 - int err; 2268 - 2269 - err = root_scheduler_init(dev); 2270 - if (err) 2271 - return err; 2272 - 2273 - err = register_reboot_notifier(&mshv_reboot_nb); 2274 - if (err) 2275 - goto root_sched_deinit; 2276 - 2277 - return 0; 2278 - 2279 - root_sched_deinit: 2280 - root_scheduler_deinit(); 2281 - return err; 2244 + return register_reboot_notifier(&mshv_reboot_nb); 2282 2245 } 2283 2246 2284 - static void mshv_init_vmm_caps(struct device *dev) 2247 + static int __init mshv_init_vmm_caps(struct device *dev) 2285 2248 { 2286 - /* 2287 - * This can only fail here if HVCALL_GET_PARTITION_PROPERTY_EX or 2288 - * HV_PARTITION_PROPERTY_VMM_CAPABILITIES are not supported. In that 2289 - * case it's valid to proceed as if all vmm_caps are disabled (zero). 2290 - */ 2291 - if (hv_call_get_partition_property_ex(HV_PARTITION_ID_SELF, 2292 - HV_PARTITION_PROPERTY_VMM_CAPABILITIES, 2293 - 0, &mshv_root.vmm_caps, 2294 - sizeof(mshv_root.vmm_caps))) 2295 - dev_warn(dev, "Unable to get VMM capabilities\n"); 2249 + int ret; 2250 + 2251 + ret = hv_call_get_partition_property_ex(HV_PARTITION_ID_SELF, 2252 + HV_PARTITION_PROPERTY_VMM_CAPABILITIES, 2253 + 0, &mshv_root.vmm_caps, 2254 + sizeof(mshv_root.vmm_caps)); 2255 + if (ret && hv_l1vh_partition()) { 2256 + dev_err(dev, "Failed to get VMM capabilities: %d\n", ret); 2257 + return ret; 2258 + } 2296 2259 2297 2260 dev_dbg(dev, "vmm_caps = %#llx\n", mshv_root.vmm_caps.as_uint64[0]); 2261 + 2262 + return 0; 2298 2263 } 2299 2264 2300 2265 static int __init mshv_parent_partition_init(void) ··· 2327 2318 2328 2319 mshv_cpuhp_online = ret; 2329 2320 2321 + ret = mshv_init_vmm_caps(dev); 2322 + if (ret) 2323 + goto remove_cpu_state; 2324 + 2330 2325 ret = mshv_retrieve_scheduler_type(dev); 2331 2326 if (ret) 2332 2327 goto remove_cpu_state; ··· 2340 2327 if (ret) 2341 2328 goto remove_cpu_state; 2342 2329 2343 - mshv_init_vmm_caps(dev); 2330 + ret = root_scheduler_init(dev); 2331 + if (ret) 2332 + goto exit_partition; 2344 2333 2345 2334 ret = mshv_debugfs_init(); 2346 2335 if (ret) 2347 - goto exit_partition; 2336 + goto deinit_root_scheduler; 2348 2337 2349 2338 ret = mshv_irqfd_wq_init(); 2350 2339 if (ret) ··· 2361 2346 2362 2347 exit_debugfs: 2363 2348 mshv_debugfs_exit(); 2349 + deinit_root_scheduler: 2350 + root_scheduler_deinit(); 2364 2351 exit_partition: 2365 2352 if (hv_root_partition()) 2366 2353 mshv_root_partition_exit(); ··· 2382 2365 mshv_debugfs_exit(); 2383 2366 misc_deregister(&mshv_dev); 2384 2367 mshv_irqfd_wq_cleanup(); 2368 + root_scheduler_deinit(); 2385 2369 if (hv_root_partition()) 2386 2370 mshv_root_partition_exit(); 2387 2371 cpuhp_remove_state(mshv_cpuhp_online);
+6 -1
include/hyperv/hvhdk_mini.h
··· 87 87 HV_PARTITION_PROPERTY_PRIVILEGE_FLAGS = 0x00010000, 88 88 HV_PARTITION_PROPERTY_SYNTHETIC_PROC_FEATURES = 0x00010001, 89 89 90 + /* Integrated scheduling properties */ 91 + HV_PARTITION_PROPERTY_INTEGRATED_SCHEDULER_ENABLED = 0x00020005, 92 + 90 93 /* Resource properties */ 91 94 HV_PARTITION_PROPERTY_GPA_PAGE_ACCESS_TRACKING = 0x00050005, 92 95 HV_PARTITION_PROPERTY_UNIMPLEMENTED_MSR_ACTION = 0x00050017, ··· 105 102 }; 106 103 107 104 #define HV_PARTITION_VMM_CAPABILITIES_BANK_COUNT 1 108 - #define HV_PARTITION_VMM_CAPABILITIES_RESERVED_BITFIELD_COUNT 59 105 + #define HV_PARTITION_VMM_CAPABILITIES_RESERVED_BITFIELD_COUNT 57 109 106 110 107 struct hv_partition_property_vmm_capabilities { 111 108 u16 bank_count; ··· 122 119 u64 reservedbit3: 1; 123 120 #endif 124 121 u64 assignable_synthetic_proc_features: 1; 122 + u64 reservedbit5: 1; 123 + u64 vmm_enable_integrated_scheduler : 1; 125 124 u64 reserved0: HV_PARTITION_VMM_CAPABILITIES_RESERVED_BITFIELD_COUNT; 126 125 } __packed; 127 126 };