Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

dax: add fsdev.c driver for fs-dax on character dax

The new fsdev driver provides pages/folios initialized compatibly with
fsdax - normal rather than devdax-style refcounting, and starting out
with order-0 folios.

When fsdev binds to a daxdev, it is usually (always?) switching from the
devdax mode (device.c), which pre-initializes compound folios according
to its alignment. Fsdev uses fsdev_clear_folio_state() to switch the
folios into a fsdax-compatible state.

A side effect of this is that raw mmap doesn't (can't?) work on an fsdev
dax instance. Accordingly, The fsdev driver does not provide raw mmap -
devices must be put in 'devdax' mode (drivers/dax/device.c) to get raw
mmap capability.

In this commit is just the framework, which remaps pages/folios compatibly
with fsdax.

Enabling dax changes:

- bus.h: add DAXDRV_FSDEV_TYPE driver type
- bus.c: allow DAXDRV_FSDEV_TYPE drivers to bind to daxdevs
- dax.h: prototype inode_dax(), which fsdev needs

Suggested-by: Dan Williams <dan.j.williams@intel.com>
Suggested-by: Gregory Price <gourry@gourry.net>
Reviewed-by: Jonathan Cameron <jonathan.cameron@huawei.com>
Signed-off-by: John Groves <john@groves.net>
Link: https://patch.msgid.link/0100019d311cf904-419e9526-bdaf-4daa-97f1-5060b31a5c9f-000000@email.amazonses.com
Signed-off-by: Ira Weiny <ira.weiny@intel.com>

authored by

John Groves and committed by
Ira Weiny
d5406bd4 59eb73b9

+262
+8
MAINTAINERS
··· 7298 7298 S: Supported 7299 7299 F: drivers/dax/ 7300 7300 7301 + DEVICE DIRECT ACCESS (DAX) [fsdev_dax] 7302 + M: John Groves <jgroves@micron.com> 7303 + M: John Groves <John@Groves.net> 7304 + L: nvdimm@lists.linux.dev 7305 + L: linux-cxl@vger.kernel.org 7306 + S: Supported 7307 + F: drivers/dax/fsdev.c 7308 + 7301 7309 DEVICE FREQUENCY (DEVFREQ) 7302 7310 M: MyungJoo Ham <myungjoo.ham@samsung.com> 7303 7311 M: Kyungmin Park <kyungmin.park@samsung.com>
+5
drivers/dax/Kconfig
··· 61 61 depends on DEV_DAX_HMEM && DAX 62 62 def_bool y 63 63 64 + config DEV_DAX_FSDEV 65 + tristate 66 + depends on DEV_DAX && FS_DAX 67 + default DEV_DAX 68 + 64 69 config DEV_DAX_KMEM 65 70 tristate "KMEM DAX: map dax-devices as System-RAM" 66 71 default DEV_DAX
+2
drivers/dax/Makefile
··· 4 4 obj-$(CONFIG_DEV_DAX_KMEM) += kmem.o 5 5 obj-$(CONFIG_DEV_DAX_PMEM) += dax_pmem.o 6 6 obj-$(CONFIG_DEV_DAX_CXL) += dax_cxl.o 7 + obj-$(CONFIG_DEV_DAX_FSDEV) += fsdev_dax.o 7 8 8 9 dax-y := super.o 9 10 dax-y += bus.o 10 11 device_dax-y := device.o 11 12 dax_pmem-y := pmem.o 12 13 dax_cxl-y := cxl.o 14 + fsdev_dax-y := fsdev.o 13 15 14 16 obj-y += hmem/
+1
drivers/dax/bus.h
··· 31 31 enum dax_driver_type { 32 32 DAXDRV_KMEM_TYPE, 33 33 DAXDRV_DEVICE_TYPE, 34 + DAXDRV_FSDEV_TYPE, 34 35 }; 35 36 36 37 struct dax_device_driver {
+245
drivers/dax/fsdev.c
··· 1 + // SPDX-License-Identifier: GPL-2.0 2 + /* Copyright(c) 2026 Micron Technology, Inc. */ 3 + #include <linux/memremap.h> 4 + #include <linux/pagemap.h> 5 + #include <linux/module.h> 6 + #include <linux/device.h> 7 + #include <linux/cdev.h> 8 + #include <linux/slab.h> 9 + #include <linux/dax.h> 10 + #include <linux/uio.h> 11 + #include <linux/fs.h> 12 + #include <linux/mm.h> 13 + #include "dax-private.h" 14 + #include "bus.h" 15 + 16 + /* 17 + * FS-DAX compatible devdax driver 18 + * 19 + * Unlike drivers/dax/device.c which pre-initializes compound folios based 20 + * on device alignment (via vmemmap_shift), this driver leaves folios 21 + * uninitialized similar to pmem. This allows fs-dax filesystems like famfs 22 + * to work without needing special handling for pre-initialized folios. 23 + * 24 + * Key differences from device.c: 25 + * - pgmap type is MEMORY_DEVICE_FS_DAX (not MEMORY_DEVICE_GENERIC) 26 + * - vmemmap_shift is NOT set (folios remain order-0) 27 + * - fs-dax can dynamically create compound folios as needed 28 + * - No mmap support - all access is through fs-dax/iomap 29 + */ 30 + 31 + static void fsdev_cdev_del(void *cdev) 32 + { 33 + cdev_del(cdev); 34 + } 35 + 36 + static void fsdev_kill(void *dev_dax) 37 + { 38 + kill_dev_dax(dev_dax); 39 + } 40 + 41 + /* 42 + * Page map operations for FS-DAX mode 43 + * Similar to fsdax_pagemap_ops in drivers/nvdimm/pmem.c 44 + * 45 + * Note: folio_free callback is not needed for MEMORY_DEVICE_FS_DAX. 46 + * The core mm code in free_zone_device_folio() handles the wake_up_var() 47 + * directly for this memory type. 48 + */ 49 + static int fsdev_pagemap_memory_failure(struct dev_pagemap *pgmap, 50 + unsigned long pfn, unsigned long nr_pages, int mf_flags) 51 + { 52 + struct dev_dax *dev_dax = pgmap->owner; 53 + u64 offset = PFN_PHYS(pfn) - dev_dax->ranges[0].range.start; 54 + u64 len = nr_pages << PAGE_SHIFT; 55 + 56 + return dax_holder_notify_failure(dev_dax->dax_dev, offset, 57 + len, mf_flags); 58 + } 59 + 60 + static const struct dev_pagemap_ops fsdev_pagemap_ops = { 61 + .memory_failure = fsdev_pagemap_memory_failure, 62 + }; 63 + 64 + /* 65 + * Clear any stale folio state from pages in the given range. 66 + * This is necessary because device_dax pre-initializes compound folios 67 + * based on vmemmap_shift, and that state may persist after driver unbind. 68 + * Since fsdev_dax uses MEMORY_DEVICE_FS_DAX without vmemmap_shift, fs-dax 69 + * expects to find clean order-0 folios that it can build into compound 70 + * folios on demand. 71 + * 72 + * At probe time, no filesystem should be mounted yet, so all mappings 73 + * are stale and must be cleared along with compound state. 74 + */ 75 + static void fsdev_clear_folio_state(struct dev_dax *dev_dax) 76 + { 77 + for (int i = 0; i < dev_dax->nr_range; i++) { 78 + struct range *range = &dev_dax->ranges[i].range; 79 + unsigned long pfn = PHYS_PFN(range->start); 80 + unsigned long end_pfn = PHYS_PFN(range->end) + 1; 81 + 82 + while (pfn < end_pfn) { 83 + struct folio *folio = pfn_folio(pfn); 84 + int order = dax_folio_reset_order(folio); 85 + 86 + pfn += 1UL << order; 87 + } 88 + } 89 + } 90 + 91 + static void fsdev_clear_folio_state_action(void *data) 92 + { 93 + fsdev_clear_folio_state(data); 94 + } 95 + 96 + static int fsdev_open(struct inode *inode, struct file *filp) 97 + { 98 + struct dax_device *dax_dev = inode_dax(inode); 99 + struct dev_dax *dev_dax = dax_get_private(dax_dev); 100 + 101 + filp->private_data = dev_dax; 102 + 103 + return 0; 104 + } 105 + 106 + static int fsdev_release(struct inode *inode, struct file *filp) 107 + { 108 + return 0; 109 + } 110 + 111 + static const struct file_operations fsdev_fops = { 112 + .llseek = noop_llseek, 113 + .owner = THIS_MODULE, 114 + .open = fsdev_open, 115 + .release = fsdev_release, 116 + }; 117 + 118 + static int fsdev_dax_probe(struct dev_dax *dev_dax) 119 + { 120 + struct dax_device *dax_dev = dev_dax->dax_dev; 121 + struct device *dev = &dev_dax->dev; 122 + struct dev_pagemap *pgmap; 123 + struct inode *inode; 124 + struct cdev *cdev; 125 + void *addr; 126 + int rc, i; 127 + 128 + if (static_dev_dax(dev_dax)) { 129 + if (dev_dax->nr_range > 1) { 130 + dev_warn(dev, "static pgmap / multi-range device conflict\n"); 131 + return -EINVAL; 132 + } 133 + 134 + pgmap = dev_dax->pgmap; 135 + } else { 136 + size_t pgmap_size; 137 + 138 + if (dev_dax->pgmap) { 139 + dev_warn(dev, "dynamic-dax with pre-populated page map\n"); 140 + return -EINVAL; 141 + } 142 + 143 + pgmap_size = struct_size(pgmap, ranges, dev_dax->nr_range - 1); 144 + pgmap = devm_kzalloc(dev, pgmap_size, GFP_KERNEL); 145 + if (!pgmap) 146 + return -ENOMEM; 147 + 148 + pgmap->nr_range = dev_dax->nr_range; 149 + dev_dax->pgmap = pgmap; 150 + 151 + for (i = 0; i < dev_dax->nr_range; i++) { 152 + struct range *range = &dev_dax->ranges[i].range; 153 + 154 + pgmap->ranges[i] = *range; 155 + } 156 + } 157 + 158 + for (i = 0; i < dev_dax->nr_range; i++) { 159 + struct range *range = &dev_dax->ranges[i].range; 160 + 161 + if (!devm_request_mem_region(dev, range->start, 162 + range_len(range), dev_name(dev))) { 163 + dev_warn(dev, "mapping%d: %#llx-%#llx could not reserve range\n", 164 + i, range->start, range->end); 165 + return -EBUSY; 166 + } 167 + } 168 + 169 + /* 170 + * Use MEMORY_DEVICE_FS_DAX without setting vmemmap_shift, leaving 171 + * folios at order-0. Unlike device.c (MEMORY_DEVICE_GENERIC), this 172 + * lets fs-dax dynamically build compound folios as needed, similar 173 + * to pmem behavior. 174 + */ 175 + pgmap->type = MEMORY_DEVICE_FS_DAX; 176 + pgmap->ops = &fsdev_pagemap_ops; 177 + pgmap->owner = dev_dax; 178 + 179 + addr = devm_memremap_pages(dev, pgmap); 180 + if (IS_ERR(addr)) 181 + return PTR_ERR(addr); 182 + 183 + /* 184 + * Clear any stale compound folio state left over from a previous 185 + * driver (e.g., device_dax with vmemmap_shift). Also register this 186 + * as a devm action so folio state is cleared on unbind, ensuring 187 + * clean pages for subsequent drivers (e.g., kmem for system-ram). 188 + */ 189 + fsdev_clear_folio_state(dev_dax); 190 + rc = devm_add_action_or_reset(dev, fsdev_clear_folio_state_action, 191 + dev_dax); 192 + if (rc) 193 + return rc; 194 + 195 + /* Detect whether the data is at a non-zero offset into the memory */ 196 + if (pgmap->range.start != dev_dax->ranges[0].range.start) { 197 + u64 phys = dev_dax->ranges[0].range.start; 198 + u64 pgmap_phys = dev_dax->pgmap[0].range.start; 199 + u64 data_offset = 0; 200 + 201 + if (!WARN_ON(pgmap_phys > phys)) 202 + data_offset = phys - pgmap_phys; 203 + 204 + pr_debug("%s: offset detected phys=%llx pgmap_phys=%llx offset=%llx\n", 205 + __func__, phys, pgmap_phys, data_offset); 206 + } 207 + 208 + inode = dax_inode(dax_dev); 209 + cdev = inode->i_cdev; 210 + cdev_init(cdev, &fsdev_fops); 211 + cdev->owner = dev->driver->owner; 212 + cdev_set_parent(cdev, &dev->kobj); 213 + rc = cdev_add(cdev, dev->devt, 1); 214 + if (rc) 215 + return rc; 216 + 217 + rc = devm_add_action_or_reset(dev, fsdev_cdev_del, cdev); 218 + if (rc) 219 + return rc; 220 + 221 + run_dax(dax_dev); 222 + return devm_add_action_or_reset(dev, fsdev_kill, dev_dax); 223 + } 224 + 225 + static struct dax_device_driver fsdev_dax_driver = { 226 + .probe = fsdev_dax_probe, 227 + .type = DAXDRV_FSDEV_TYPE, 228 + }; 229 + 230 + static int __init dax_init(void) 231 + { 232 + return dax_driver_register(&fsdev_dax_driver); 233 + } 234 + 235 + static void __exit dax_exit(void) 236 + { 237 + dax_driver_unregister(&fsdev_dax_driver); 238 + } 239 + 240 + MODULE_AUTHOR("John Groves"); 241 + MODULE_DESCRIPTION("FS-DAX Device: fs-dax compatible devdax driver"); 242 + MODULE_LICENSE("GPL"); 243 + module_init(dax_init); 244 + module_exit(dax_exit); 245 + MODULE_ALIAS_DAX_DEVICE(0);
+1
fs/dax.c
··· 429 429 430 430 return order; 431 431 } 432 + EXPORT_SYMBOL_GPL(dax_folio_reset_order); 432 433 433 434 static inline unsigned long dax_folio_put(struct folio *folio) 434 435 {