Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

device-dax: add an allocation interface for device-dax instances

In preparation for a facility that enables dax regions to be sub-divided,
introduce infrastructure to track and allocate region capacity.

The new dax_region/available_size attribute is only enabled for volatile
hmem devices, not pmem devices that are defined by nvdimm namespace
boundaries. This is per Jeff's feedback the last time dynamic device-dax
capacity allocation support was discussed.

Signed-off-by: Dan Williams <dan.j.williams@intel.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Cc: Vishal Verma <vishal.l.verma@intel.com>
Cc: Brice Goglin <Brice.Goglin@inria.fr>
Cc: Dave Hansen <dave.hansen@linux.intel.com>
Cc: Dave Jiang <dave.jiang@intel.com>
Cc: David Hildenbrand <david@redhat.com>
Cc: Ira Weiny <ira.weiny@intel.com>
Cc: Jia He <justin.he@arm.com>
Cc: Joao Martins <joao.m.martins@oracle.com>
Cc: Jonathan Cameron <Jonathan.Cameron@huawei.com>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Cc: Ard Biesheuvel <ardb@kernel.org>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: Ben Skeggs <bskeggs@redhat.com>
Cc: Bjorn Helgaas <bhelgaas@google.com>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Boris Ostrovsky <boris.ostrovsky@oracle.com>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Daniel Vetter <daniel@ffwll.ch>
Cc: David Airlie <airlied@linux.ie>
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Cc: Hulk Robot <hulkci@huawei.com>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Jason Gunthorpe <jgg@mellanox.com>
Cc: Jason Yan <yanaijie@huawei.com>
Cc: Jeff Moyer <jmoyer@redhat.com>
Cc: "Jérôme Glisse" <jglisse@redhat.com>
Cc: Juergen Gross <jgross@suse.com>
Cc: kernel test robot <lkp@intel.com>
Cc: Michael Ellerman <mpe@ellerman.id.au>
Cc: Mike Rapoport <rppt@linux.ibm.com>
Cc: Paul Mackerras <paulus@ozlabs.org>
Cc: Pavel Tatashin <pasha.tatashin@soleen.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: "Rafael J. Wysocki" <rafael.j.wysocki@intel.com>
Cc: Randy Dunlap <rdunlap@infradead.org>
Cc: Stefano Stabellini <sstabellini@kernel.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Tom Lendacky <thomas.lendacky@amd.com>
Cc: Vivek Goyal <vgoyal@redhat.com>
Cc: Wei Yang <richard.weiyang@linux.alibaba.com>
Cc: Will Deacon <will@kernel.org>
Link: https://lore.kernel.org/linux-nvdimm/x49shpp3zn8.fsf@segfault.boston.devel.redhat.com
Link: https://lkml.kernel.org/r/159643101035.4062302.6785857915652647857.stgit@dwillia2-desk3.amr.corp.intel.com
Link: https://lkml.kernel.org/r/160106112801.30709.14601438735305335071.stgit@dwillia2-desk3.amr.corp.intel.com
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

authored by

Dan Williams and committed by
Linus Torvalds
c2f3011e 0513bd5b

+121 -23
+110 -10
drivers/dax/bus.c
··· 130 130 131 131 static int dax_bus_match(struct device *dev, struct device_driver *drv); 132 132 133 + static bool is_static(struct dax_region *dax_region) 134 + { 135 + return (dax_region->res.flags & IORESOURCE_DAX_STATIC) != 0; 136 + } 137 + 133 138 static struct bus_type dax_bus_type = { 134 139 .name = "dax", 135 140 .uevent = dax_bus_uevent, ··· 190 185 } 191 186 static DEVICE_ATTR_RO(align); 192 187 188 + #define for_each_dax_region_resource(dax_region, res) \ 189 + for (res = (dax_region)->res.child; res; res = res->sibling) 190 + 191 + static unsigned long long dax_region_avail_size(struct dax_region *dax_region) 192 + { 193 + resource_size_t size = resource_size(&dax_region->res); 194 + struct resource *res; 195 + 196 + device_lock_assert(dax_region->dev); 197 + 198 + for_each_dax_region_resource(dax_region, res) 199 + size -= resource_size(res); 200 + return size; 201 + } 202 + 203 + static ssize_t available_size_show(struct device *dev, 204 + struct device_attribute *attr, char *buf) 205 + { 206 + struct dax_region *dax_region = dev_get_drvdata(dev); 207 + unsigned long long size; 208 + 209 + device_lock(dev); 210 + size = dax_region_avail_size(dax_region); 211 + device_unlock(dev); 212 + 213 + return sprintf(buf, "%llu\n", size); 214 + } 215 + static DEVICE_ATTR_RO(available_size); 216 + 217 + static umode_t dax_region_visible(struct kobject *kobj, struct attribute *a, 218 + int n) 219 + { 220 + struct device *dev = container_of(kobj, struct device, kobj); 221 + struct dax_region *dax_region = dev_get_drvdata(dev); 222 + 223 + if (is_static(dax_region) && a == &dev_attr_available_size.attr) 224 + return 0; 225 + return a->mode; 226 + } 227 + 193 228 static struct attribute *dax_region_attributes[] = { 229 + &dev_attr_available_size.attr, 194 230 &dev_attr_region_size.attr, 195 231 &dev_attr_align.attr, 196 232 &dev_attr_id.attr, ··· 241 195 static const struct attribute_group dax_region_attribute_group = { 242 196 .name = "dax_region", 243 197 .attrs = dax_region_attributes, 198 + .is_visible = dax_region_visible, 244 199 }; 245 200 246 201 static const struct attribute_group *dax_region_attribute_groups[] = { ··· 273 226 } 274 227 275 228 struct dax_region *alloc_dax_region(struct device *parent, int region_id, 276 - struct resource *res, int target_node, unsigned int align) 229 + struct resource *res, int target_node, unsigned int align, 230 + unsigned long flags) 277 231 { 278 232 struct dax_region *dax_region; 279 233 ··· 297 249 return NULL; 298 250 299 251 dev_set_drvdata(parent, dax_region); 300 - memcpy(&dax_region->res, res, sizeof(*res)); 301 252 kref_init(&dax_region->kref); 302 253 dax_region->id = region_id; 303 254 dax_region->align = align; 304 255 dax_region->dev = parent; 305 256 dax_region->target_node = target_node; 257 + dax_region->res = (struct resource) { 258 + .start = res->start, 259 + .end = res->end, 260 + .flags = IORESOURCE_MEM | flags, 261 + }; 262 + 306 263 if (sysfs_create_groups(&parent->kobj, dax_region_attribute_groups)) { 307 264 kfree(dax_region); 308 265 return NULL; ··· 319 266 return dax_region; 320 267 } 321 268 EXPORT_SYMBOL_GPL(alloc_dax_region); 269 + 270 + static int alloc_dev_dax_range(struct dev_dax *dev_dax, resource_size_t size) 271 + { 272 + struct dax_region *dax_region = dev_dax->region; 273 + struct resource *res = &dax_region->res; 274 + struct device *dev = &dev_dax->dev; 275 + struct resource *alloc; 276 + 277 + device_lock_assert(dax_region->dev); 278 + 279 + /* TODO: handle multiple allocations per region */ 280 + if (res->child) 281 + return -ENOMEM; 282 + 283 + alloc = __request_region(res, res->start, size, dev_name(dev), 0); 284 + 285 + if (!alloc) 286 + return -ENOMEM; 287 + 288 + dev_dax->range = (struct range) { 289 + .start = alloc->start, 290 + .end = alloc->end, 291 + }; 292 + 293 + return 0; 294 + } 322 295 323 296 static ssize_t size_show(struct device *dev, 324 297 struct device_attribute *attr, char *buf) ··· 440 361 } 441 362 EXPORT_SYMBOL_GPL(kill_dev_dax); 442 363 364 + static void free_dev_dax_range(struct dev_dax *dev_dax) 365 + { 366 + struct dax_region *dax_region = dev_dax->region; 367 + struct range *range = &dev_dax->range; 368 + 369 + device_lock_assert(dax_region->dev); 370 + __release_region(&dax_region->res, range->start, range_len(range)); 371 + } 372 + 443 373 static void dev_dax_release(struct device *dev) 444 374 { 445 375 struct dev_dax *dev_dax = to_dev_dax(dev); ··· 473 385 dev_dbg(dev, "%s\n", __func__); 474 386 475 387 kill_dev_dax(dev_dax); 388 + free_dev_dax_range(dev_dax); 476 389 device_del(dev); 477 390 put_device(dev); 478 391 } ··· 486 397 struct dev_dax *dev_dax; 487 398 struct inode *inode; 488 399 struct device *dev; 489 - int rc = -ENOMEM; 400 + int rc; 490 401 491 402 if (data->id < 0) 492 403 return ERR_PTR(-EINVAL); ··· 495 406 if (!dev_dax) 496 407 return ERR_PTR(-ENOMEM); 497 408 409 + dev_dax->region = dax_region; 410 + dev = &dev_dax->dev; 411 + device_initialize(dev); 412 + dev_set_name(dev, "dax%d.%d", dax_region->id, data->id); 413 + 414 + rc = alloc_dev_dax_range(dev_dax, data->size); 415 + if (rc) 416 + goto err_range; 417 + 498 418 if (data->pgmap) { 419 + dev_WARN_ONCE(parent, !is_static(dax_region), 420 + "custom dev_pagemap requires a static dax_region\n"); 421 + 499 422 dev_dax->pgmap = kmemdup(data->pgmap, 500 423 sizeof(struct dev_pagemap), GFP_KERNEL); 501 - if (!dev_dax->pgmap) 424 + if (!dev_dax->pgmap) { 425 + rc = -ENOMEM; 502 426 goto err_pgmap; 427 + } 503 428 } 504 429 505 430 /* ··· 530 427 kill_dax(dax_dev); 531 428 532 429 /* from here on we're committed to teardown via dev_dax_release() */ 533 - dev = &dev_dax->dev; 534 - device_initialize(dev); 535 - 536 430 dev_dax->dax_dev = dax_dev; 537 - dev_dax->region = dax_region; 538 - dev_dax->range = data->range; 539 431 dev_dax->target_node = dax_region->target_node; 540 432 kref_get(&dax_region->kref); 541 433 ··· 542 444 dev->class = dax_class; 543 445 dev->parent = parent; 544 446 dev->type = &dev_dax_type; 545 - dev_set_name(dev, "dax%d.%d", dax_region->id, data->id); 546 447 547 448 rc = device_add(dev); 548 449 if (rc) { ··· 555 458 return ERR_PTR(rc); 556 459 557 460 return dev_dax; 461 + 558 462 err_alloc_dax: 559 463 kfree(dev_dax->pgmap); 560 464 err_pgmap: 465 + free_dev_dax_range(dev_dax); 466 + err_range: 561 467 kfree(dev_dax); 562 468 563 469 return ERR_PTR(rc);
+5 -2
drivers/dax/bus.h
··· 10 10 struct dax_device; 11 11 struct dax_region; 12 12 void dax_region_put(struct dax_region *dax_region); 13 + 14 + #define IORESOURCE_DAX_STATIC (1UL << 0) 13 15 struct dax_region *alloc_dax_region(struct device *parent, int region_id, 14 - struct resource *res, int target_node, unsigned int align); 16 + struct resource *res, int target_node, unsigned int align, 17 + unsigned long flags); 15 18 16 19 enum dev_dax_subsys { 17 20 DEV_DAX_BUS = 0, /* zeroed dev_dax_data picks this by default */ ··· 25 22 struct dax_region *dax_region; 26 23 struct dev_pagemap *pgmap; 27 24 enum dev_dax_subsys subsys; 28 - struct range range; 25 + resource_size_t size; 29 26 int id; 30 27 }; 31 28
+1 -1
drivers/dax/dax-private.h
··· 22 22 * @kref: to pin while other agents have a need to do lookups 23 23 * @dev: parent device backing this region 24 24 * @align: allocation and mapping alignment for child dax devices 25 - * @res: physical address range of the region 25 + * @res: resource tree to track instance allocations 26 26 */ 27 27 struct dax_region { 28 28 int id;
+2 -5
drivers/dax/hmem/hmem.c
··· 20 20 21 21 mri = dev->platform_data; 22 22 dax_region = alloc_dax_region(dev, pdev->id, res, mri->target_node, 23 - PMD_SIZE); 23 + PMD_SIZE, 0); 24 24 if (!dax_region) 25 25 return -ENOMEM; 26 26 27 27 data = (struct dev_dax_data) { 28 28 .dax_region = dax_region, 29 29 .id = 0, 30 - .range = { 31 - .start = res->start, 32 - .end = res->end, 33 - }, 30 + .size = resource_size(res), 34 31 }; 35 32 dev_dax = devm_create_dev_dax(&data); 36 33 if (IS_ERR(dev_dax))
+3 -5
drivers/dax/pmem/core.c
··· 54 54 memcpy(&res, &pgmap.res, sizeof(res)); 55 55 res.start += offset; 56 56 dax_region = alloc_dax_region(dev, region_id, &res, 57 - nd_region->target_node, le32_to_cpu(pfn_sb->align)); 57 + nd_region->target_node, le32_to_cpu(pfn_sb->align), 58 + IORESOURCE_DAX_STATIC); 58 59 if (!dax_region) 59 60 return ERR_PTR(-ENOMEM); 60 61 ··· 64 63 .id = id, 65 64 .pgmap = &pgmap, 66 65 .subsys = subsys, 67 - .range = { 68 - .start = res.start, 69 - .end = res.end, 70 - }, 66 + .size = resource_size(&res), 71 67 }; 72 68 dev_dax = devm_create_dev_dax(&data); 73 69