Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

Merge branch 'akpm' (patches from Andrew)

Merge misc fixes from Andrew Morton:
"18 fixes"

* emailed patches from Andrew Morton <akpm@linux-foundation.org>:
mm, swap: use page-cluster as max window of VMA based swap readahead
mm: page_vma_mapped: ensure pmd is loaded with READ_ONCE outside of lock
kmemleak: clear stale pointers from task stacks
fs/binfmt_misc.c: node could be NULL when evicting inode
fs/mpage.c: fix mpage_writepage() for pages with buffers
linux/kernel.h: add/correct kernel-doc notation
tty: fall back to N_NULL if switching to N_TTY fails during hangup
Revert "vmalloc: back off when the current task is killed"
mm/cma.c: take __GFP_NOWARN into account in cma_alloc()
scripts/kallsyms.c: ignore symbol type 'n'
userfaultfd: selftest: exercise -EEXIST only in background transfer
mm: only display online cpus of the numa node
mm: remove unnecessary WARN_ONCE in page_vma_mapped_walk().
mm/mempolicy: fix NUMA_INTERLEAVE_HIT counter
include/linux/of.h: provide of_n_{addr,size}_cells wrappers for !CONFIG_OF
mm/madvise.c: add description for MADV_WIPEONFORK and MADV_KEEPONFORK
lib/Kconfig.debug: kernel hacking menu: runtime testing: keep tests together
mm/migrate: fix indexing bug (off by one) and avoid out of bound access

+245 -181
-10
Documentation/ABI/testing/sysfs-kernel-mm-swap
··· 14 14 still used for tmpfs etc. other users. If set to 15 15 false, the global swap readahead algorithm will be 16 16 used for all swappable pages. 17 - 18 - What: /sys/kernel/mm/swap/vma_ra_max_order 19 - Date: August 2017 20 - Contact: Linux memory management mailing list <linux-mm@kvack.org> 21 - Description: The max readahead size in order for VMA based swap readahead 22 - 23 - VMA based swap readahead algorithm will readahead at 24 - most 1 << max_order pages for each readahead. The 25 - real readahead size for each readahead will be scaled 26 - according to the estimation algorithm.
+10 -2
drivers/base/node.c
··· 27 27 28 28 static ssize_t node_read_cpumap(struct device *dev, bool list, char *buf) 29 29 { 30 + ssize_t n; 31 + cpumask_var_t mask; 30 32 struct node *node_dev = to_node(dev); 31 - const struct cpumask *mask = cpumask_of_node(node_dev->dev.id); 32 33 33 34 /* 2008/04/07: buf currently PAGE_SIZE, need 9 chars per 32 bits. */ 34 35 BUILD_BUG_ON((NR_CPUS/32 * 9) > (PAGE_SIZE-1)); 35 36 36 - return cpumap_print_to_pagebuf(list, buf, mask); 37 + if (!alloc_cpumask_var(&mask, GFP_KERNEL)) 38 + return 0; 39 + 40 + cpumask_and(mask, cpumask_of_node(node_dev->dev.id), cpu_online_mask); 41 + n = cpumap_print_to_pagebuf(list, buf, mask); 42 + free_cpumask_var(mask); 43 + 44 + return n; 37 45 } 38 46 39 47 static inline ssize_t node_read_cpumask(struct device *dev,
+5 -6
drivers/tty/tty_ldisc.c
··· 694 694 tty_set_termios_ldisc(tty, disc); 695 695 retval = tty_ldisc_open(tty, tty->ldisc); 696 696 if (retval) { 697 - if (!WARN_ON(disc == N_TTY)) { 698 - tty_ldisc_put(tty->ldisc); 699 - tty->ldisc = NULL; 700 - } 697 + tty_ldisc_put(tty->ldisc); 698 + tty->ldisc = NULL; 701 699 } 702 700 return retval; 703 701 } ··· 750 752 751 753 if (tty->ldisc) { 752 754 if (reinit) { 753 - if (tty_ldisc_reinit(tty, tty->termios.c_line) < 0) 754 - tty_ldisc_reinit(tty, N_TTY); 755 + if (tty_ldisc_reinit(tty, tty->termios.c_line) < 0 && 756 + tty_ldisc_reinit(tty, N_TTY) < 0) 757 + WARN_ON(tty_ldisc_reinit(tty, N_NULL) < 0); 755 758 } else 756 759 tty_ldisc_kill(tty); 757 760 }
+1 -1
fs/binfmt_misc.c
··· 596 596 { 597 597 Node *e = inode->i_private; 598 598 599 - if (e->flags & MISC_FMT_OPEN_FILE) 599 + if (e && e->flags & MISC_FMT_OPEN_FILE) 600 600 filp_close(e->interp_file, NULL); 601 601 602 602 clear_inode(inode);
+4 -2
fs/block_dev.c
··· 716 716 717 717 set_page_writeback(page); 718 718 result = ops->rw_page(bdev, sector + get_start_sect(bdev), page, true); 719 - if (result) 719 + if (result) { 720 720 end_page_writeback(page); 721 - else 721 + } else { 722 + clean_page_buffers(page); 722 723 unlock_page(page); 724 + } 723 725 blk_queue_exit(bdev->bd_queue); 724 726 return result; 725 727 }
+11 -3
fs/mpage.c
··· 468 468 try_to_free_buffers(page); 469 469 } 470 470 471 + /* 472 + * For situations where we want to clean all buffers attached to a page. 473 + * We don't need to calculate how many buffers are attached to the page, 474 + * we just need to specify a number larger than the maximum number of buffers. 475 + */ 476 + void clean_page_buffers(struct page *page) 477 + { 478 + clean_buffers(page, ~0U); 479 + } 480 + 471 481 static int __mpage_writepage(struct page *page, struct writeback_control *wbc, 472 482 void *data) 473 483 { ··· 615 605 if (bio == NULL) { 616 606 if (first_unmapped == blocks_per_page) { 617 607 if (!bdev_write_page(bdev, blocks[0] << (blkbits - 9), 618 - page, wbc)) { 619 - clean_buffers(page, first_unmapped); 608 + page, wbc)) 620 609 goto out; 621 - } 622 610 } 623 611 bio = mpage_alloc(bdev, blocks[0] << (blkbits - 9), 624 612 BIO_MAX_PAGES, GFP_NOFS|__GFP_HIGH);
+1
include/linux/buffer_head.h
··· 232 232 loff_t, unsigned, unsigned, 233 233 struct page *, void *); 234 234 void page_zero_new_buffers(struct page *page, unsigned from, unsigned to); 235 + void clean_page_buffers(struct page *page); 235 236 int cont_write_begin(struct file *, struct address_space *, loff_t, 236 237 unsigned, unsigned, struct page **, void **, 237 238 get_block_t *, loff_t *);
+74 -16
include/linux/kernel.h
··· 44 44 45 45 #define STACK_MAGIC 0xdeadbeef 46 46 47 + /** 48 + * REPEAT_BYTE - repeat the value @x multiple times as an unsigned long value 49 + * @x: value to repeat 50 + * 51 + * NOTE: @x is not checked for > 0xff; larger values produce odd results. 52 + */ 47 53 #define REPEAT_BYTE(x) ((~0ul / 0xff) * (x)) 48 54 49 55 /* @a is a power of 2 value */ ··· 63 57 #define READ 0 64 58 #define WRITE 1 65 59 60 + /** 61 + * ARRAY_SIZE - get the number of elements in array @arr 62 + * @arr: array to be sized 63 + */ 66 64 #define ARRAY_SIZE(arr) (sizeof(arr) / sizeof((arr)[0]) + __must_be_array(arr)) 67 65 68 66 #define u64_to_user_ptr(x) ( \ ··· 86 76 #define round_up(x, y) ((((x)-1) | __round_mask(x, y))+1) 87 77 #define round_down(x, y) ((x) & ~__round_mask(x, y)) 88 78 79 + /** 80 + * FIELD_SIZEOF - get the size of a struct's field 81 + * @t: the target struct 82 + * @f: the target struct's field 83 + * Return: the size of @f in the struct definition without having a 84 + * declared instance of @t. 85 + */ 89 86 #define FIELD_SIZEOF(t, f) (sizeof(((t*)0)->f)) 87 + 90 88 #define DIV_ROUND_UP __KERNEL_DIV_ROUND_UP 91 89 92 90 #define DIV_ROUND_DOWN_ULL(ll, d) \ ··· 125 107 /* 126 108 * Divide positive or negative dividend by positive or negative divisor 127 109 * and round to closest integer. Result is undefined for negative 128 - * divisors if he dividend variable type is unsigned and for negative 110 + * divisors if the dividend variable type is unsigned and for negative 129 111 * dividends if the divisor variable type is unsigned. 130 112 */ 131 113 #define DIV_ROUND_CLOSEST(x, divisor)( \ ··· 265 247 * @ep_ro: right open interval endpoint 266 248 * 267 249 * Perform a "reciprocal multiplication" in order to "scale" a value into 268 - * range [0, ep_ro), where the upper interval endpoint is right-open. 250 + * range [0, @ep_ro), where the upper interval endpoint is right-open. 269 251 * This is useful, e.g. for accessing a index of an array containing 270 - * ep_ro elements, for example. Think of it as sort of modulus, only that 252 + * @ep_ro elements, for example. Think of it as sort of modulus, only that 271 253 * the result isn't that of modulo. ;) Note that if initial input is a 272 254 * small value, then result will return 0. 273 255 * 274 - * Return: a result based on val in interval [0, ep_ro). 256 + * Return: a result based on @val in interval [0, @ep_ro). 275 257 */ 276 258 static inline u32 reciprocal_scale(u32 val, u32 ep_ro) 277 259 { ··· 636 618 * trace_printk - printf formatting in the ftrace buffer 637 619 * @fmt: the printf format for printing 638 620 * 639 - * Note: __trace_printk is an internal function for trace_printk and 640 - * the @ip is passed in via the trace_printk macro. 621 + * Note: __trace_printk is an internal function for trace_printk() and 622 + * the @ip is passed in via the trace_printk() macro. 641 623 * 642 624 * This function allows a kernel developer to debug fast path sections 643 625 * that printk is not appropriate for. By scattering in various ··· 647 629 * This is intended as a debugging tool for the developer only. 648 630 * Please refrain from leaving trace_printks scattered around in 649 631 * your code. (Extra memory is used for special buffers that are 650 - * allocated when trace_printk() is used) 632 + * allocated when trace_printk() is used.) 651 633 * 652 634 * A little optization trick is done here. If there's only one 653 635 * argument, there's no need to scan the string for printf formats. ··· 699 681 * the @ip is passed in via the trace_puts macro. 700 682 * 701 683 * This is similar to trace_printk() but is made for those really fast 702 - * paths that a developer wants the least amount of "Heisenbug" affects, 684 + * paths that a developer wants the least amount of "Heisenbug" effects, 703 685 * where the processing of the print format is still too much. 704 686 * 705 687 * This function allows a kernel developer to debug fast path sections ··· 710 692 * This is intended as a debugging tool for the developer only. 711 693 * Please refrain from leaving trace_puts scattered around in 712 694 * your code. (Extra memory is used for special buffers that are 713 - * allocated when trace_puts() is used) 695 + * allocated when trace_puts() is used.) 714 696 * 715 697 * Returns: 0 if nothing was written, positive # if string was. 716 698 * (1 when __trace_bputs is used, strlen(str) when __trace_puts is used) ··· 789 771 t2 min2 = (y); \ 790 772 (void) (&min1 == &min2); \ 791 773 min1 < min2 ? min1 : min2; }) 774 + 775 + /** 776 + * min - return minimum of two values of the same or compatible types 777 + * @x: first value 778 + * @y: second value 779 + */ 792 780 #define min(x, y) \ 793 781 __min(typeof(x), typeof(y), \ 794 782 __UNIQUE_ID(min1_), __UNIQUE_ID(min2_), \ ··· 805 781 t2 max2 = (y); \ 806 782 (void) (&max1 == &max2); \ 807 783 max1 > max2 ? max1 : max2; }) 784 + 785 + /** 786 + * max - return maximum of two values of the same or compatible types 787 + * @x: first value 788 + * @y: second value 789 + */ 808 790 #define max(x, y) \ 809 791 __max(typeof(x), typeof(y), \ 810 792 __UNIQUE_ID(max1_), __UNIQUE_ID(max2_), \ 811 793 x, y) 812 794 795 + /** 796 + * min3 - return minimum of three values 797 + * @x: first value 798 + * @y: second value 799 + * @z: third value 800 + */ 813 801 #define min3(x, y, z) min((typeof(x))min(x, y), z) 802 + 803 + /** 804 + * max3 - return maximum of three values 805 + * @x: first value 806 + * @y: second value 807 + * @z: third value 808 + */ 814 809 #define max3(x, y, z) max((typeof(x))max(x, y), z) 815 810 816 811 /** ··· 848 805 * @lo: lowest allowable value 849 806 * @hi: highest allowable value 850 807 * 851 - * This macro does strict typechecking of lo/hi to make sure they are of the 852 - * same type as val. See the unnecessary pointer comparisons. 808 + * This macro does strict typechecking of @lo/@hi to make sure they are of the 809 + * same type as @val. See the unnecessary pointer comparisons. 853 810 */ 854 811 #define clamp(val, lo, hi) min((typeof(val))max(val, lo), hi) 855 812 ··· 859 816 * 860 817 * Or not use min/max/clamp at all, of course. 861 818 */ 819 + 820 + /** 821 + * min_t - return minimum of two values, using the specified type 822 + * @type: data type to use 823 + * @x: first value 824 + * @y: second value 825 + */ 862 826 #define min_t(type, x, y) \ 863 827 __min(type, type, \ 864 828 __UNIQUE_ID(min1_), __UNIQUE_ID(min2_), \ 865 829 x, y) 866 830 831 + /** 832 + * max_t - return maximum of two values, using the specified type 833 + * @type: data type to use 834 + * @x: first value 835 + * @y: second value 836 + */ 867 837 #define max_t(type, x, y) \ 868 838 __max(type, type, \ 869 839 __UNIQUE_ID(min1_), __UNIQUE_ID(min2_), \ ··· 890 834 * @hi: maximum allowable value 891 835 * 892 836 * This macro does no typechecking and uses temporary variables of type 893 - * 'type' to make all the comparisons. 837 + * @type to make all the comparisons. 894 838 */ 895 839 #define clamp_t(type, val, lo, hi) min_t(type, max_t(type, val, lo), hi) 896 840 ··· 901 845 * @hi: maximum allowable value 902 846 * 903 847 * This macro does no typechecking and uses temporary variables of whatever 904 - * type the input argument 'val' is. This is useful when val is an unsigned 905 - * type and min and max are literals that will otherwise be assigned a signed 848 + * type the input argument @val is. This is useful when @val is an unsigned 849 + * type and @lo and @hi are literals that will otherwise be assigned a signed 906 850 * integer type. 907 851 */ 908 852 #define clamp_val(val, lo, hi) clamp_t(typeof(val), val, lo, hi) 909 853 910 854 911 - /* 912 - * swap - swap value of @a and @b 855 + /** 856 + * swap - swap values of @a and @b 857 + * @a: first value 858 + * @b: second value 913 859 */ 914 860 #define swap(a, b) \ 915 861 do { typeof(a) __tmp = (a); (a) = (b); (b) = __tmp; } while (0)
+10
include/linux/of.h
··· 734 734 return NULL; 735 735 } 736 736 737 + static inline int of_n_addr_cells(struct device_node *np) 738 + { 739 + return 0; 740 + 741 + } 742 + static inline int of_n_size_cells(struct device_node *np) 743 + { 744 + return 0; 745 + } 746 + 737 747 static inline int of_property_read_u64(const struct device_node *np, 738 748 const char *propname, u64 *out_value) 739 749 {
+1 -1
include/linux/thread_info.h
··· 42 42 #define THREAD_ALIGN THREAD_SIZE 43 43 #endif 44 44 45 - #ifdef CONFIG_DEBUG_STACK_USAGE 45 + #if IS_ENABLED(CONFIG_DEBUG_STACK_USAGE) || IS_ENABLED(CONFIG_DEBUG_KMEMLEAK) 46 46 # define THREADINFO_GFP (GFP_KERNEL_ACCOUNT | __GFP_NOTRACK | \ 47 47 __GFP_ZERO) 48 48 #else
+4
kernel/fork.c
··· 215 215 if (!s) 216 216 continue; 217 217 218 + #ifdef CONFIG_DEBUG_KMEMLEAK 219 + /* Clear stale pointers from reused stack. */ 220 + memset(s->addr, 0, THREAD_SIZE); 221 + #endif 218 222 tsk->stack_vm_area = s; 219 223 return s->addr; 220 224 }
+71 -72
lib/Kconfig.debug
··· 1590 1590 1591 1591 source kernel/trace/Kconfig 1592 1592 1593 + config PROVIDE_OHCI1394_DMA_INIT 1594 + bool "Remote debugging over FireWire early on boot" 1595 + depends on PCI && X86 1596 + help 1597 + If you want to debug problems which hang or crash the kernel early 1598 + on boot and the crashing machine has a FireWire port, you can use 1599 + this feature to remotely access the memory of the crashed machine 1600 + over FireWire. This employs remote DMA as part of the OHCI1394 1601 + specification which is now the standard for FireWire controllers. 1602 + 1603 + With remote DMA, you can monitor the printk buffer remotely using 1604 + firescope and access all memory below 4GB using fireproxy from gdb. 1605 + Even controlling a kernel debugger is possible using remote DMA. 1606 + 1607 + Usage: 1608 + 1609 + If ohci1394_dma=early is used as boot parameter, it will initialize 1610 + all OHCI1394 controllers which are found in the PCI config space. 1611 + 1612 + As all changes to the FireWire bus such as enabling and disabling 1613 + devices cause a bus reset and thereby disable remote DMA for all 1614 + devices, be sure to have the cable plugged and FireWire enabled on 1615 + the debugging host before booting the debug target for debugging. 1616 + 1617 + This code (~1k) is freed after boot. By then, the firewire stack 1618 + in charge of the OHCI-1394 controllers should be used instead. 1619 + 1620 + See Documentation/debugging-via-ohci1394.txt for more information. 1621 + 1622 + config DMA_API_DEBUG 1623 + bool "Enable debugging of DMA-API usage" 1624 + depends on HAVE_DMA_API_DEBUG 1625 + help 1626 + Enable this option to debug the use of the DMA API by device drivers. 1627 + With this option you will be able to detect common bugs in device 1628 + drivers like double-freeing of DMA mappings or freeing mappings that 1629 + were never allocated. 1630 + 1631 + This also attempts to catch cases where a page owned by DMA is 1632 + accessed by the cpu in a way that could cause data corruption. For 1633 + example, this enables cow_user_page() to check that the source page is 1634 + not undergoing DMA. 1635 + 1636 + This option causes a performance degradation. Use only if you want to 1637 + debug device drivers and dma interactions. 1638 + 1639 + If unsure, say N. 1640 + 1593 1641 menu "Runtime Testing" 1594 1642 1595 1643 config LKDTM ··· 1797 1749 1798 1750 If unsure, say N. 1799 1751 1800 - endmenu # runtime tests 1801 - 1802 - config PROVIDE_OHCI1394_DMA_INIT 1803 - bool "Remote debugging over FireWire early on boot" 1804 - depends on PCI && X86 1805 - help 1806 - If you want to debug problems which hang or crash the kernel early 1807 - on boot and the crashing machine has a FireWire port, you can use 1808 - this feature to remotely access the memory of the crashed machine 1809 - over FireWire. This employs remote DMA as part of the OHCI1394 1810 - specification which is now the standard for FireWire controllers. 1811 - 1812 - With remote DMA, you can monitor the printk buffer remotely using 1813 - firescope and access all memory below 4GB using fireproxy from gdb. 1814 - Even controlling a kernel debugger is possible using remote DMA. 1815 - 1816 - Usage: 1817 - 1818 - If ohci1394_dma=early is used as boot parameter, it will initialize 1819 - all OHCI1394 controllers which are found in the PCI config space. 1820 - 1821 - As all changes to the FireWire bus such as enabling and disabling 1822 - devices cause a bus reset and thereby disable remote DMA for all 1823 - devices, be sure to have the cable plugged and FireWire enabled on 1824 - the debugging host before booting the debug target for debugging. 1825 - 1826 - This code (~1k) is freed after boot. By then, the firewire stack 1827 - in charge of the OHCI-1394 controllers should be used instead. 1828 - 1829 - See Documentation/debugging-via-ohci1394.txt for more information. 1830 - 1831 - config DMA_API_DEBUG 1832 - bool "Enable debugging of DMA-API usage" 1833 - depends on HAVE_DMA_API_DEBUG 1834 - help 1835 - Enable this option to debug the use of the DMA API by device drivers. 1836 - With this option you will be able to detect common bugs in device 1837 - drivers like double-freeing of DMA mappings or freeing mappings that 1838 - were never allocated. 1839 - 1840 - This also attempts to catch cases where a page owned by DMA is 1841 - accessed by the cpu in a way that could cause data corruption. For 1842 - example, this enables cow_user_page() to check that the source page is 1843 - not undergoing DMA. 1844 - 1845 - This option causes a performance degradation. Use only if you want to 1846 - debug device drivers and dma interactions. 1847 - 1848 - If unsure, say N. 1849 - 1850 1752 config TEST_LKM 1851 1753 tristate "Test module loading with 'hello world' module" 1852 1754 default n ··· 1871 1873 1872 1874 If unsure, say N. 1873 1875 1874 - config MEMTEST 1875 - bool "Memtest" 1876 - depends on HAVE_MEMBLOCK 1877 - ---help--- 1878 - This option adds a kernel parameter 'memtest', which allows memtest 1879 - to be set. 1880 - memtest=0, mean disabled; -- default 1881 - memtest=1, mean do 1 test pattern; 1882 - ... 1883 - memtest=17, mean do 17 test patterns. 1884 - If you are unsure how to answer this question, answer N. 1885 - 1886 1876 config TEST_STATIC_KEYS 1887 1877 tristate "Test static keys" 1888 1878 default n 1889 1879 depends on m 1890 1880 help 1891 1881 Test the static key interfaces. 1892 - 1893 - If unsure, say N. 1894 - 1895 - config BUG_ON_DATA_CORRUPTION 1896 - bool "Trigger a BUG when data corruption is detected" 1897 - select DEBUG_LIST 1898 - help 1899 - Select this option if the kernel should BUG when it encounters 1900 - data corruption in kernel memory structures when they get checked 1901 - for validity. 1902 1882 1903 1883 If unsure, say N. 1904 1884 ··· 1917 1941 1918 1942 If unsure, say N. 1919 1943 1944 + endmenu # runtime tests 1945 + 1946 + config MEMTEST 1947 + bool "Memtest" 1948 + depends on HAVE_MEMBLOCK 1949 + ---help--- 1950 + This option adds a kernel parameter 'memtest', which allows memtest 1951 + to be set. 1952 + memtest=0, mean disabled; -- default 1953 + memtest=1, mean do 1 test pattern; 1954 + ... 1955 + memtest=17, mean do 17 test patterns. 1956 + If you are unsure how to answer this question, answer N. 1957 + 1958 + config BUG_ON_DATA_CORRUPTION 1959 + bool "Trigger a BUG when data corruption is detected" 1960 + select DEBUG_LIST 1961 + help 1962 + Select this option if the kernel should BUG when it encounters 1963 + data corruption in kernel memory structures when they get checked 1964 + for validity. 1965 + 1966 + If unsure, say N. 1920 1967 1921 1968 source "samples/Kconfig" 1922 1969
+1 -1
mm/cma.c
··· 460 460 461 461 trace_cma_alloc(pfn, page, count, align); 462 462 463 - if (ret) { 463 + if (ret && !(gfp_mask & __GFP_NOWARN)) { 464 464 pr_info("%s: alloc failed, req-size: %zu pages, ret: %d\n", 465 465 __func__, count, ret); 466 466 cma_debug_show_areas(cma);
+6 -1
mm/madvise.c
··· 757 757 * MADV_DONTFORK - omit this area from child's address space when forking: 758 758 * typically, to avoid COWing pages pinned by get_user_pages(). 759 759 * MADV_DOFORK - cancel MADV_DONTFORK: no longer omit this area when forking. 760 + * MADV_WIPEONFORK - present the child process with zero-filled memory in this 761 + * range after a fork. 762 + * MADV_KEEPONFORK - undo the effect of MADV_WIPEONFORK 760 763 * MADV_HWPOISON - trigger memory error handler as if the given memory range 761 764 * were corrupted by unrecoverable hardware memory failure. 762 765 * MADV_SOFT_OFFLINE - try to soft-offline the given range of memory. ··· 780 777 * zero - success 781 778 * -EINVAL - start + len < 0, start is not page-aligned, 782 779 * "behavior" is not a valid value, or application 783 - * is attempting to release locked or shared pages. 780 + * is attempting to release locked or shared pages, 781 + * or the specified address range includes file, Huge TLB, 782 + * MAP_SHARED or VMPFNMAP range. 784 783 * -ENOMEM - addresses in the specified range are not currently 785 784 * mapped, or are outside the AS of the process. 786 785 * -EIO - an I/O error occurred while paging in data.
+5 -2
mm/mempolicy.c
··· 1920 1920 struct page *page; 1921 1921 1922 1922 page = __alloc_pages(gfp, order, nid); 1923 - if (page && page_to_nid(page) == nid) 1924 - inc_zone_page_state(page, NUMA_INTERLEAVE_HIT); 1923 + if (page && page_to_nid(page) == nid) { 1924 + preempt_disable(); 1925 + __inc_numa_state(page_zone(page), NUMA_INTERLEAVE_HIT); 1926 + preempt_enable(); 1927 + } 1925 1928 return page; 1926 1929 } 1927 1930
+2 -1
mm/migrate.c
··· 2146 2146 unsigned long addr; 2147 2147 2148 2148 for (addr = start & PAGE_MASK; addr < end; addr += PAGE_SIZE) { 2149 - migrate->src[migrate->npages++] = MIGRATE_PFN_MIGRATE; 2149 + migrate->src[migrate->npages] = MIGRATE_PFN_MIGRATE; 2150 2150 migrate->dst[migrate->npages] = 0; 2151 + migrate->npages++; 2151 2152 migrate->cpages++; 2152 2153 } 2153 2154
+11 -17
mm/page_vma_mapped.c
··· 6 6 7 7 #include "internal.h" 8 8 9 - static inline bool check_pmd(struct page_vma_mapped_walk *pvmw) 10 - { 11 - pmd_t pmde; 12 - /* 13 - * Make sure we don't re-load pmd between present and !trans_huge check. 14 - * We need a consistent view. 15 - */ 16 - pmde = READ_ONCE(*pvmw->pmd); 17 - return pmd_present(pmde) && !pmd_trans_huge(pmde); 18 - } 19 - 20 9 static inline bool not_found(struct page_vma_mapped_walk *pvmw) 21 10 { 22 11 page_vma_mapped_walk_done(pvmw); ··· 105 116 pgd_t *pgd; 106 117 p4d_t *p4d; 107 118 pud_t *pud; 119 + pmd_t pmde; 108 120 109 121 /* The only possible pmd mapping has been handled on last iteration */ 110 122 if (pvmw->pmd && !pvmw->pte) ··· 138 148 if (!pud_present(*pud)) 139 149 return false; 140 150 pvmw->pmd = pmd_offset(pud, pvmw->address); 141 - if (pmd_trans_huge(*pvmw->pmd) || is_pmd_migration_entry(*pvmw->pmd)) { 151 + /* 152 + * Make sure the pmd value isn't cached in a register by the 153 + * compiler and used as a stale value after we've observed a 154 + * subsequent update. 155 + */ 156 + pmde = READ_ONCE(*pvmw->pmd); 157 + if (pmd_trans_huge(pmde) || is_pmd_migration_entry(pmde)) { 142 158 pvmw->ptl = pmd_lock(mm, pvmw->pmd); 143 159 if (likely(pmd_trans_huge(*pvmw->pmd))) { 144 160 if (pvmw->flags & PVMW_MIGRATION) ··· 163 167 return not_found(pvmw); 164 168 return true; 165 169 } 166 - } else 167 - WARN_ONCE(1, "Non present huge pmd without pmd migration enabled!"); 170 + } 168 171 return not_found(pvmw); 169 172 } else { 170 173 /* THP pmd was split under us: handle on pte level */ 171 174 spin_unlock(pvmw->ptl); 172 175 pvmw->ptl = NULL; 173 176 } 174 - } else { 175 - if (!check_pmd(pvmw)) 176 - return false; 177 + } else if (!pmd_present(pmde)) { 178 + return false; 177 179 } 178 180 if (!map_pte(pvmw)) 179 181 goto next_pte;
+7 -34
mm/swap_state.c
··· 39 39 static unsigned int nr_swapper_spaces[MAX_SWAPFILES]; 40 40 bool swap_vma_readahead = true; 41 41 42 - #define SWAP_RA_MAX_ORDER_DEFAULT 3 43 - 44 - static int swap_ra_max_order = SWAP_RA_MAX_ORDER_DEFAULT; 45 - 46 42 #define SWAP_RA_WIN_SHIFT (PAGE_SHIFT / 2) 47 43 #define SWAP_RA_HITS_MASK ((1UL << SWAP_RA_WIN_SHIFT) - 1) 48 44 #define SWAP_RA_HITS_MAX SWAP_RA_HITS_MASK ··· 660 664 pte_t *tpte; 661 665 #endif 662 666 667 + max_win = 1 << min_t(unsigned int, READ_ONCE(page_cluster), 668 + SWAP_RA_ORDER_CEILING); 669 + if (max_win == 1) { 670 + swap_ra->win = 1; 671 + return NULL; 672 + } 673 + 663 674 faddr = vmf->address; 664 675 entry = pte_to_swp_entry(vmf->orig_pte); 665 676 if ((unlikely(non_swap_entry(entry)))) ··· 674 671 page = lookup_swap_cache(entry, vma, faddr); 675 672 if (page) 676 673 return page; 677 - 678 - max_win = 1 << READ_ONCE(swap_ra_max_order); 679 - if (max_win == 1) { 680 - swap_ra->win = 1; 681 - return NULL; 682 - } 683 674 684 675 fpfn = PFN_DOWN(faddr); 685 676 swap_ra_info = GET_SWAP_RA_VAL(vma); ··· 783 786 __ATTR(vma_ra_enabled, 0644, vma_ra_enabled_show, 784 787 vma_ra_enabled_store); 785 788 786 - static ssize_t vma_ra_max_order_show(struct kobject *kobj, 787 - struct kobj_attribute *attr, char *buf) 788 - { 789 - return sprintf(buf, "%d\n", swap_ra_max_order); 790 - } 791 - static ssize_t vma_ra_max_order_store(struct kobject *kobj, 792 - struct kobj_attribute *attr, 793 - const char *buf, size_t count) 794 - { 795 - int err, v; 796 - 797 - err = kstrtoint(buf, 10, &v); 798 - if (err || v > SWAP_RA_ORDER_CEILING || v <= 0) 799 - return -EINVAL; 800 - 801 - swap_ra_max_order = v; 802 - 803 - return count; 804 - } 805 - static struct kobj_attribute vma_ra_max_order_attr = 806 - __ATTR(vma_ra_max_order, 0644, vma_ra_max_order_show, 807 - vma_ra_max_order_store); 808 - 809 789 static struct attribute *swap_attrs[] = { 810 790 &vma_ra_enabled_attr.attr, 811 - &vma_ra_max_order_attr.attr, 812 791 NULL, 813 792 }; 814 793
-6
mm/vmalloc.c
··· 1695 1695 for (i = 0; i < area->nr_pages; i++) { 1696 1696 struct page *page; 1697 1697 1698 - if (fatal_signal_pending(current)) { 1699 - area->nr_pages = i; 1700 - goto fail_no_warn; 1701 - } 1702 - 1703 1698 if (node == NUMA_NO_NODE) 1704 1699 page = alloc_page(alloc_mask|highmem_mask); 1705 1700 else ··· 1718 1723 warn_alloc(gfp_mask, NULL, 1719 1724 "vmalloc: allocation failure, allocated %ld of %ld bytes", 1720 1725 (area->nr_pages*PAGE_SIZE), area->size); 1721 - fail_no_warn: 1722 1726 vfree(area->addr); 1723 1727 return NULL; 1724 1728 }
+1 -1
scripts/kallsyms.c
··· 158 158 else if (str[0] == '$') 159 159 return -1; 160 160 /* exclude debugging symbols */ 161 - else if (stype == 'N') 161 + else if (stype == 'N' || stype == 'n') 162 162 return -1; 163 163 164 164 /* include the type field in the symbol name, so that it gets
+20 -5
tools/testing/selftests/vm/userfaultfd.c
··· 397 397 } 398 398 } 399 399 400 - static int copy_page(int ufd, unsigned long offset) 400 + static int __copy_page(int ufd, unsigned long offset, bool retry) 401 401 { 402 402 struct uffdio_copy uffdio_copy; 403 403 ··· 418 418 fprintf(stderr, "UFFDIO_COPY unexpected copy %Ld\n", 419 419 uffdio_copy.copy), exit(1); 420 420 } else { 421 - if (test_uffdio_copy_eexist) { 421 + if (test_uffdio_copy_eexist && retry) { 422 422 test_uffdio_copy_eexist = false; 423 423 retry_copy_page(ufd, &uffdio_copy, offset); 424 424 } 425 425 return 1; 426 426 } 427 427 return 0; 428 + } 429 + 430 + static int copy_page_retry(int ufd, unsigned long offset) 431 + { 432 + return __copy_page(ufd, offset, true); 433 + } 434 + 435 + static int copy_page(int ufd, unsigned long offset) 436 + { 437 + return __copy_page(ufd, offset, false); 428 438 } 429 439 430 440 static void *uffd_poll_thread(void *arg) ··· 554 544 for (page_nr = cpu * nr_pages_per_cpu; 555 545 page_nr < (cpu+1) * nr_pages_per_cpu; 556 546 page_nr++) 557 - copy_page(uffd, page_nr * page_size); 547 + copy_page_retry(uffd, page_nr * page_size); 558 548 559 549 return NULL; 560 550 } ··· 789 779 } 790 780 } 791 781 792 - static int uffdio_zeropage(int ufd, unsigned long offset) 782 + static int __uffdio_zeropage(int ufd, unsigned long offset, bool retry) 793 783 { 794 784 struct uffdio_zeropage uffdio_zeropage; 795 785 int ret; ··· 824 814 fprintf(stderr, "UFFDIO_ZEROPAGE unexpected %Ld\n", 825 815 uffdio_zeropage.zeropage), exit(1); 826 816 } else { 827 - if (test_uffdio_zeropage_eexist) { 817 + if (test_uffdio_zeropage_eexist && retry) { 828 818 test_uffdio_zeropage_eexist = false; 829 819 retry_uffdio_zeropage(ufd, &uffdio_zeropage, 830 820 offset); ··· 838 828 } 839 829 840 830 return 0; 831 + } 832 + 833 + static int uffdio_zeropage(int ufd, unsigned long offset) 834 + { 835 + return __uffdio_zeropage(ufd, offset, false); 841 836 } 842 837 843 838 /* exercise UFFDIO_ZEROPAGE */