Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

Merge branch 'libnvdimm-fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/nvdimm/nvdimm

Pull libnvdimm fixes from Dan Williams:
"1/ Fixes to the libnvdimm 'pfn' device that establishes a reserved
area for storing a struct page array.

2/ Fixes for dax operations on a raw block device to prevent pagecache
collisions with dax mappings.

3/ A fix for pfn_t usage in vm_insert_mixed that lead to a null
pointer de-reference.

These have received build success notification from the kbuild robot
across 153 configs and pass the latest ndctl tests"

* 'libnvdimm-fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/nvdimm/nvdimm:
phys_to_pfn_t: use phys_addr_t
mm: fix pfn_t to page conversion in vm_insert_mixed
block: use DAX for partition table reads
block: revert runtime dax control of the raw block device
fs, block: force direct-I/O for dax-enabled block devices
devm_memremap_pages: fix vmem_altmap lifetime + alignment handling
libnvdimm, pfn: fix restoring memmap location
libnvdimm: fix mode determination for e820 devices

+75 -93
-38
block/ioctl.c
··· 434 434 435 435 return true; 436 436 } 437 - 438 - static int blkdev_daxset(struct block_device *bdev, unsigned long argp) 439 - { 440 - unsigned long arg; 441 - int rc = 0; 442 - 443 - if (!capable(CAP_SYS_ADMIN)) 444 - return -EACCES; 445 - 446 - if (get_user(arg, (int __user *)(argp))) 447 - return -EFAULT; 448 - arg = !!arg; 449 - if (arg == !!(bdev->bd_inode->i_flags & S_DAX)) 450 - return 0; 451 - 452 - if (arg) 453 - arg = S_DAX; 454 - 455 - if (arg && !blkdev_dax_capable(bdev)) 456 - return -ENOTTY; 457 - 458 - inode_lock(bdev->bd_inode); 459 - if (bdev->bd_map_count == 0) 460 - inode_set_flags(bdev->bd_inode, arg, S_DAX); 461 - else 462 - rc = -EBUSY; 463 - inode_unlock(bdev->bd_inode); 464 - return rc; 465 - } 466 - #else 467 - static int blkdev_daxset(struct block_device *bdev, int arg) 468 - { 469 - if (arg) 470 - return -ENOTTY; 471 - return 0; 472 - } 473 437 #endif 474 438 475 439 static int blkdev_flushbuf(struct block_device *bdev, fmode_t mode, ··· 598 634 case BLKTRACESETUP: 599 635 case BLKTRACETEARDOWN: 600 636 return blk_trace_ioctl(bdev, cmd, argp); 601 - case BLKDAXSET: 602 - return blkdev_daxset(bdev, arg); 603 637 case BLKDAXGET: 604 638 return put_int(arg, !!(bdev->bd_inode->i_flags & S_DAX)); 605 639 break;
+15 -3
block/partition-generic.c
··· 16 16 #include <linux/kmod.h> 17 17 #include <linux/ctype.h> 18 18 #include <linux/genhd.h> 19 + #include <linux/dax.h> 19 20 #include <linux/blktrace_api.h> 20 21 21 22 #include "partitions/check.h" ··· 551 550 return 0; 552 551 } 553 552 554 - unsigned char *read_dev_sector(struct block_device *bdev, sector_t n, Sector *p) 553 + static struct page *read_pagecache_sector(struct block_device *bdev, sector_t n) 555 554 { 556 555 struct address_space *mapping = bdev->bd_inode->i_mapping; 556 + 557 + return read_mapping_page(mapping, (pgoff_t)(n >> (PAGE_CACHE_SHIFT-9)), 558 + NULL); 559 + } 560 + 561 + unsigned char *read_dev_sector(struct block_device *bdev, sector_t n, Sector *p) 562 + { 557 563 struct page *page; 558 564 559 - page = read_mapping_page(mapping, (pgoff_t)(n >> (PAGE_CACHE_SHIFT-9)), 560 - NULL); 565 + /* don't populate page cache for dax capable devices */ 566 + if (IS_DAX(bdev->bd_inode)) 567 + page = read_dax_sector(bdev, n); 568 + else 569 + page = read_pagecache_sector(bdev, n); 570 + 561 571 if (!IS_ERR(page)) { 562 572 if (PageError(page)) 563 573 goto fail;
+5 -3
drivers/nvdimm/namespace_devs.c
··· 1277 1277 1278 1278 device_lock(dev); 1279 1279 claim = ndns->claim; 1280 - if (pmem_should_map_pages(dev) || (claim && is_nd_pfn(claim))) 1281 - mode = "memory"; 1282 - else if (claim && is_nd_btt(claim)) 1280 + if (claim && is_nd_btt(claim)) 1283 1281 mode = "safe"; 1282 + else if (claim && is_nd_pfn(claim)) 1283 + mode = "memory"; 1284 + else if (!claim && pmem_should_map_pages(dev)) 1285 + mode = "memory"; 1284 1286 else 1285 1287 mode = "raw"; 1286 1288 rc = sprintf(buf, "%s\n", mode);
+1 -3
drivers/nvdimm/pfn_devs.c
··· 301 301 302 302 switch (le32_to_cpu(pfn_sb->mode)) { 303 303 case PFN_MODE_RAM: 304 - break; 305 304 case PFN_MODE_PMEM: 306 - /* TODO: allocate from PMEM support */ 307 - return -ENOTTY; 305 + break; 308 306 default: 309 307 return -ENXIO; 310 308 }
-28
fs/block_dev.c
··· 1736 1736 return __dax_pmd_fault(vma, addr, pmd, flags, blkdev_get_block, NULL); 1737 1737 } 1738 1738 1739 - static void blkdev_vm_open(struct vm_area_struct *vma) 1740 - { 1741 - struct inode *bd_inode = bdev_file_inode(vma->vm_file); 1742 - struct block_device *bdev = I_BDEV(bd_inode); 1743 - 1744 - inode_lock(bd_inode); 1745 - bdev->bd_map_count++; 1746 - inode_unlock(bd_inode); 1747 - } 1748 - 1749 - static void blkdev_vm_close(struct vm_area_struct *vma) 1750 - { 1751 - struct inode *bd_inode = bdev_file_inode(vma->vm_file); 1752 - struct block_device *bdev = I_BDEV(bd_inode); 1753 - 1754 - inode_lock(bd_inode); 1755 - bdev->bd_map_count--; 1756 - inode_unlock(bd_inode); 1757 - } 1758 - 1759 1739 static const struct vm_operations_struct blkdev_dax_vm_ops = { 1760 - .open = blkdev_vm_open, 1761 - .close = blkdev_vm_close, 1762 1740 .fault = blkdev_dax_fault, 1763 1741 .pmd_fault = blkdev_dax_pmd_fault, 1764 1742 .pfn_mkwrite = blkdev_dax_fault, 1765 1743 }; 1766 1744 1767 1745 static const struct vm_operations_struct blkdev_default_vm_ops = { 1768 - .open = blkdev_vm_open, 1769 - .close = blkdev_vm_close, 1770 1746 .fault = filemap_fault, 1771 1747 .map_pages = filemap_map_pages, 1772 1748 }; ··· 1750 1774 static int blkdev_mmap(struct file *file, struct vm_area_struct *vma) 1751 1775 { 1752 1776 struct inode *bd_inode = bdev_file_inode(file); 1753 - struct block_device *bdev = I_BDEV(bd_inode); 1754 1777 1755 1778 file_accessed(file); 1756 - inode_lock(bd_inode); 1757 - bdev->bd_map_count++; 1758 1779 if (IS_DAX(bd_inode)) { 1759 1780 vma->vm_ops = &blkdev_dax_vm_ops; 1760 1781 vma->vm_flags |= VM_MIXEDMAP | VM_HUGEPAGE; 1761 1782 } else { 1762 1783 vma->vm_ops = &blkdev_default_vm_ops; 1763 1784 } 1764 - inode_unlock(bd_inode); 1765 1785 1766 1786 return 0; 1767 1787 }
+20
fs/dax.c
··· 58 58 blk_queue_exit(bdev->bd_queue); 59 59 } 60 60 61 + struct page *read_dax_sector(struct block_device *bdev, sector_t n) 62 + { 63 + struct page *page = alloc_pages(GFP_KERNEL, 0); 64 + struct blk_dax_ctl dax = { 65 + .size = PAGE_SIZE, 66 + .sector = n & ~((((int) PAGE_SIZE) / 512) - 1), 67 + }; 68 + long rc; 69 + 70 + if (!page) 71 + return ERR_PTR(-ENOMEM); 72 + 73 + rc = dax_map_atomic(bdev, &dax); 74 + if (rc < 0) 75 + return ERR_PTR(rc); 76 + memcpy_from_pmem(page_address(page), dax.addr, PAGE_SIZE); 77 + dax_unmap_atomic(bdev, &dax); 78 + return page; 79 + } 80 + 61 81 /* 62 82 * dax_clear_blocks() is called from within transaction context from XFS, 63 83 * and hence this means the stack from this point must follow GFP_NOFS
+11
include/linux/dax.h
··· 14 14 dax_iodone_t); 15 15 int __dax_fault(struct vm_area_struct *, struct vm_fault *, get_block_t, 16 16 dax_iodone_t); 17 + 18 + #ifdef CONFIG_FS_DAX 19 + struct page *read_dax_sector(struct block_device *bdev, sector_t n); 20 + #else 21 + static inline struct page *read_dax_sector(struct block_device *bdev, 22 + sector_t n) 23 + { 24 + return ERR_PTR(-ENXIO); 25 + } 26 + #endif 27 + 17 28 #ifdef CONFIG_TRANSPARENT_HUGEPAGE 18 29 int dax_pmd_fault(struct vm_area_struct *, unsigned long addr, pmd_t *, 19 30 unsigned int flags, get_block_t, dax_iodone_t);
+1 -4
include/linux/fs.h
··· 484 484 int bd_fsfreeze_count; 485 485 /* Mutex for freeze */ 486 486 struct mutex bd_fsfreeze_mutex; 487 - #ifdef CONFIG_FS_DAX 488 - int bd_map_count; 489 - #endif 490 487 }; 491 488 492 489 /* ··· 2904 2907 2905 2908 static inline bool io_is_direct(struct file *filp) 2906 2909 { 2907 - return (filp->f_flags & O_DIRECT) || IS_DAX(file_inode(filp)); 2910 + return (filp->f_flags & O_DIRECT) || IS_DAX(filp->f_mapping->host); 2908 2911 } 2909 2912 2910 2913 static inline int iocb_flags(struct file *file)
+2 -2
include/linux/pfn_t.h
··· 29 29 return __pfn_to_pfn_t(pfn, 0); 30 30 } 31 31 32 - extern pfn_t phys_to_pfn_t(dma_addr_t addr, unsigned long flags); 32 + extern pfn_t phys_to_pfn_t(phys_addr_t addr, unsigned long flags); 33 33 34 34 static inline bool pfn_t_has_page(pfn_t pfn) 35 35 { ··· 48 48 return NULL; 49 49 } 50 50 51 - static inline dma_addr_t pfn_t_to_phys(pfn_t pfn) 51 + static inline phys_addr_t pfn_t_to_phys(pfn_t pfn) 52 52 { 53 53 return PFN_PHYS(pfn_t_to_pfn(pfn)); 54 54 }
-1
include/uapi/linux/fs.h
··· 222 222 #define BLKSECDISCARD _IO(0x12,125) 223 223 #define BLKROTATIONAL _IO(0x12,126) 224 224 #define BLKZEROOUT _IO(0x12,127) 225 - #define BLKDAXSET _IO(0x12,128) 226 225 #define BLKDAXGET _IO(0x12,129) 227 226 228 227 #define BMAP_IOCTL 1 /* obsolete - kept for compatibility */
+12 -8
kernel/memremap.c
··· 150 150 } 151 151 EXPORT_SYMBOL(devm_memunmap); 152 152 153 - pfn_t phys_to_pfn_t(dma_addr_t addr, unsigned long flags) 153 + pfn_t phys_to_pfn_t(phys_addr_t addr, unsigned long flags) 154 154 { 155 155 return __pfn_to_pfn_t(addr >> PAGE_SHIFT, flags); 156 156 } ··· 183 183 184 184 static void pgmap_radix_release(struct resource *res) 185 185 { 186 - resource_size_t key; 186 + resource_size_t key, align_start, align_size, align_end; 187 + 188 + align_start = res->start & ~(SECTION_SIZE - 1); 189 + align_size = ALIGN(resource_size(res), SECTION_SIZE); 190 + align_end = align_start + align_size - 1; 187 191 188 192 mutex_lock(&pgmap_lock); 189 193 for (key = res->start; key <= res->end; key += SECTION_SIZE) ··· 230 226 percpu_ref_put(pgmap->ref); 231 227 } 232 228 233 - pgmap_radix_release(res); 234 - 235 229 /* pages are dead and unused, undo the arch mapping */ 236 230 align_start = res->start & ~(SECTION_SIZE - 1); 237 231 align_size = ALIGN(resource_size(res), SECTION_SIZE); 238 232 arch_remove_memory(align_start, align_size); 233 + pgmap_radix_release(res); 239 234 dev_WARN_ONCE(dev, pgmap->altmap && pgmap->altmap->alloc, 240 235 "%s: failed to free all reserved pages\n", __func__); 241 236 } ··· 270 267 { 271 268 int is_ram = region_intersects(res->start, resource_size(res), 272 269 "System RAM"); 273 - resource_size_t key, align_start, align_size; 270 + resource_size_t key, align_start, align_size, align_end; 274 271 struct dev_pagemap *pgmap; 275 272 struct page_map *page_map; 276 273 unsigned long pfn; ··· 312 309 313 310 mutex_lock(&pgmap_lock); 314 311 error = 0; 315 - for (key = res->start; key <= res->end; key += SECTION_SIZE) { 312 + align_start = res->start & ~(SECTION_SIZE - 1); 313 + align_size = ALIGN(resource_size(res), SECTION_SIZE); 314 + align_end = align_start + align_size - 1; 315 + for (key = align_start; key <= align_end; key += SECTION_SIZE) { 316 316 struct dev_pagemap *dup; 317 317 318 318 rcu_read_lock(); ··· 342 336 if (nid < 0) 343 337 nid = numa_mem_id(); 344 338 345 - align_start = res->start & ~(SECTION_SIZE - 1); 346 - align_size = ALIGN(resource_size(res), SECTION_SIZE); 347 339 error = arch_add_memory(nid, align_start, align_size, true); 348 340 if (error) 349 341 goto err_add_memory;
+7 -2
mm/memory.c
··· 1591 1591 * than insert_pfn). If a zero_pfn were inserted into a VM_MIXEDMAP 1592 1592 * without pte special, it would there be refcounted as a normal page. 1593 1593 */ 1594 - if (!HAVE_PTE_SPECIAL && pfn_t_valid(pfn)) { 1594 + if (!HAVE_PTE_SPECIAL && !pfn_t_devmap(pfn) && pfn_t_valid(pfn)) { 1595 1595 struct page *page; 1596 1596 1597 - page = pfn_t_to_page(pfn); 1597 + /* 1598 + * At this point we are committed to insert_page() 1599 + * regardless of whether the caller specified flags that 1600 + * result in pfn_t_has_page() == false. 1601 + */ 1602 + page = pfn_to_page(pfn_t_to_pfn(pfn)); 1598 1603 return insert_page(vma, addr, page, vma->vm_page_prot); 1599 1604 } 1600 1605 return insert_pfn(vma, addr, pfn, vma->vm_page_prot);
+1 -1
tools/testing/nvdimm/test/iomap.c
··· 113 113 } 114 114 EXPORT_SYMBOL(__wrap_devm_memremap_pages); 115 115 116 - pfn_t __wrap_phys_to_pfn_t(dma_addr_t addr, unsigned long flags) 116 + pfn_t __wrap_phys_to_pfn_t(phys_addr_t addr, unsigned long flags) 117 117 { 118 118 struct nfit_test_resource *nfit_res = get_nfit_res(addr); 119 119