Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

dax/hmem, cxl: Defer and resolve Soft Reserved ownership

The current probe time ownership check for Soft Reserved memory based
solely on CXL window intersection is insufficient. dax_hmem probing is not
always guaranteed to run after CXL enumeration and region assembly, which
can lead to incorrect ownership decisions before the CXL stack has
finished publishing windows and assembling committed regions.

Introduce deferred ownership handling for Soft Reserved ranges that
intersect CXL windows. When such a range is encountered during the
initial dax_hmem probe, schedule deferred work to wait for the CXL stack
to complete enumeration and region assembly before deciding ownership.

Once the deferred work runs, evaluate each Soft Reserved range
individually: if a CXL region fully contains the range, skip it and let
dax_cxl bind. Otherwise, register it with dax_hmem. This per-range
ownership model avoids the need for CXL region teardown and
alloc_dax_region() resource exclusion prevents double claiming.

Introduce a boolean flag dax_hmem_initial_probe to live inside device.c
so it survives module reload. Ensure dax_cxl defers driver registration
until dax_hmem has completed ownership resolution. dax_cxl calls
dax_hmem_flush_work() before cxl_driver_register(), which both waits for
the deferred work to complete and creates a module symbol dependency that
forces dax_hmem.ko to load before dax_cxl.

Co-developed-by: Dan Williams <dan.j.williams@intel.com>
Signed-off-by: Smita Koralahalli <Smita.KoralahalliChannabasappa@amd.com>
Reviewed-by: Dave Jiang <dave.jiang@intel.com>
Reviewed-by: Jonathan Cameron <jonathan.cameron@huawei.com>
Link: https://patch.msgid.link/20260322195343.206900-9-Smita.KoralahalliChannabasappa@amd.com
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
Signed-off-by: Dave Jiang <dave.jiang@intel.com>

authored by

Smita Koralahalli and committed by
Dave Jiang
e4de6b91 8e65f99b

+85
+7
drivers/dax/bus.h
··· 49 49 void kill_dev_dax(struct dev_dax *dev_dax); 50 50 bool static_dev_dax(struct dev_dax *dev_dax); 51 51 52 + #if IS_ENABLED(CONFIG_DEV_DAX_HMEM) 53 + extern bool dax_hmem_initial_probe; 54 + void dax_hmem_flush_work(void); 55 + #else 56 + static inline void dax_hmem_flush_work(void) { } 57 + #endif 58 + 52 59 #define MODULE_ALIAS_DAX_DEVICE(type) \ 53 60 MODULE_ALIAS("dax:t" __stringify(type) "*") 54 61 #define DAX_DEVICE_MODALIAS_FMT "dax:t%d"
+1
drivers/dax/cxl.c
··· 44 44 45 45 static void cxl_dax_region_driver_register(struct work_struct *work) 46 46 { 47 + dax_hmem_flush_work(); 47 48 cxl_driver_register(&cxl_dax_region_driver); 48 49 } 49 50
+3
drivers/dax/hmem/device.c
··· 8 8 static bool nohmem; 9 9 module_param_named(disable, nohmem, bool, 0444); 10 10 11 + bool dax_hmem_initial_probe; 12 + EXPORT_SYMBOL_GPL(dax_hmem_initial_probe); 13 + 11 14 static bool platform_initialized; 12 15 static DEFINE_MUTEX(hmem_resource_lock); 13 16 static struct resource hmem_active = {
+74
drivers/dax/hmem/hmem.c
··· 3 3 #include <linux/memregion.h> 4 4 #include <linux/module.h> 5 5 #include <linux/dax.h> 6 + #include <cxl/cxl.h> 6 7 #include "../bus.h" 7 8 8 9 static bool region_idle; ··· 58 57 { 59 58 platform_device_unregister(pdev); 60 59 } 60 + 61 + struct dax_defer_work { 62 + struct platform_device *pdev; 63 + struct work_struct work; 64 + }; 65 + 66 + static void process_defer_work(struct work_struct *w); 67 + 68 + static struct dax_defer_work dax_hmem_work = { 69 + .work = __WORK_INITIALIZER(dax_hmem_work.work, process_defer_work), 70 + }; 71 + 72 + void dax_hmem_flush_work(void) 73 + { 74 + flush_work(&dax_hmem_work.work); 75 + } 76 + EXPORT_SYMBOL_GPL(dax_hmem_flush_work); 61 77 62 78 static int __hmem_register_device(struct device *host, int target_nid, 63 79 const struct resource *res) ··· 140 122 if (IS_ENABLED(CONFIG_DEV_DAX_CXL) && 141 123 region_intersects(res->start, resource_size(res), IORESOURCE_MEM, 142 124 IORES_DESC_CXL) != REGION_DISJOINT) { 125 + if (!dax_hmem_initial_probe) { 126 + dev_dbg(host, "await CXL initial probe: %pr\n", res); 127 + queue_work(system_long_wq, &dax_hmem_work.work); 128 + return 0; 129 + } 143 130 dev_dbg(host, "deferring range to CXL: %pr\n", res); 144 131 return 0; 145 132 } ··· 152 129 return __hmem_register_device(host, target_nid, res); 153 130 } 154 131 132 + static int hmem_register_cxl_device(struct device *host, int target_nid, 133 + const struct resource *res) 134 + { 135 + if (region_intersects(res->start, resource_size(res), IORESOURCE_MEM, 136 + IORES_DESC_CXL) == REGION_DISJOINT) 137 + return 0; 138 + 139 + if (cxl_region_contains_resource((struct resource *)res)) { 140 + dev_dbg(host, "CXL claims resource, dropping: %pr\n", res); 141 + return 0; 142 + } 143 + 144 + dev_dbg(host, "CXL did not claim resource, registering: %pr\n", res); 145 + return __hmem_register_device(host, target_nid, res); 146 + } 147 + 148 + static void process_defer_work(struct work_struct *w) 149 + { 150 + struct dax_defer_work *work = container_of(w, typeof(*work), work); 151 + struct platform_device *pdev; 152 + 153 + if (!work->pdev) 154 + return; 155 + 156 + pdev = work->pdev; 157 + 158 + /* Relies on cxl_acpi and cxl_pci having had a chance to load */ 159 + wait_for_device_probe(); 160 + 161 + guard(device)(&pdev->dev); 162 + if (!pdev->dev.driver) 163 + return; 164 + 165 + if (!dax_hmem_initial_probe) { 166 + dax_hmem_initial_probe = true; 167 + walk_hmem_resources(&pdev->dev, hmem_register_cxl_device); 168 + } 169 + } 170 + 155 171 static int dax_hmem_platform_probe(struct platform_device *pdev) 156 172 { 173 + if (work_pending(&dax_hmem_work.work)) 174 + return -EBUSY; 175 + 176 + if (!dax_hmem_work.pdev) 177 + dax_hmem_work.pdev = 178 + to_platform_device(get_device(&pdev->dev)); 179 + 157 180 return walk_hmem_resources(&pdev->dev, hmem_register_device); 158 181 } 159 182 ··· 237 168 238 169 static __exit void dax_hmem_exit(void) 239 170 { 171 + if (dax_hmem_work.pdev) { 172 + flush_work(&dax_hmem_work.work); 173 + put_device(&dax_hmem_work.pdev->dev); 174 + } 175 + 240 176 platform_driver_unregister(&dax_hmem_driver); 241 177 platform_driver_unregister(&dax_hmem_platform_driver); 242 178 }