Merge branch 'akpm' (Andrew's patch-bomb)

tjh.dev / kernel

fork

Configure Feed

Issues Pull Requests Commits Tags

Feed URL

Select the types of activity you want to include in your feed.

Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git

kernel os linux

fork

Configure Feed

Issues Pull Requests Commits Tags

Feed URL

Select the types of activity you want to include in your feed.

Merge branch 'akpm' (Andrew's patch-bomb)

Merge fixes from Andrew Morton.

Random drivers and some VM fixes.

* emailed patches from Andrew Morton <akpm@linux-foundation.org>: (17 commits)
mm: compaction: Abort async compaction if locks are contended or taking too long
mm: have order > 0 compaction start near a pageblock with free pages
rapidio/tsi721: fix unused variable compiler warning
rapidio/tsi721: fix inbound doorbell interrupt handling
drivers/rtc/rtc-rs5c348.c: fix hour decoding in 12-hour mode
mm: correct page->pfmemalloc to fix deactivate_slab regression
drivers/rtc/rtc-pcf2123.c: initialize dynamic sysfs attributes
mm/compaction.c: fix deferring compaction mistake
drivers/misc/sgi-xp/xpc_uv.c: SGI XPC fails to load when cpu 0 is out of IRQ resources
string: do not export memweight() to userspace
hugetlb: update hugetlbpage.txt
checkpatch: add control statement test to SINGLE_STATEMENT_DO_WHILE_MACRO
mm: hugetlbfs: correctly populate shared pmd
cciss: fix incorrect scsi status reporting
Documentation: update mount option in filesystem/vfat.txt
mm: change nr_ptes BUG_ON to WARN_ON
cs5535-clockevt: typo, it's MFGPT, not MFPGT

Linus Torvalds 14 years ago 23dcfa61 a484147a

+258 -110

16 changed files

expand all collapse all

Documentation

filesystems

vfat.txt

hugetlbpage.txt

arch

x86

hugetlbpage.c

drivers

block

cciss_scsi.c

clocksource

cs5535-clockevt.c

misc

sgi-xp

xpc_uv.c

rapidio

devices

tsi721.c

rtc

rtc-pcf2123.c

rtc-rs5c348.c

include

linux

compaction.h

string.h

compaction.c

internal.h

mmap.c

page_alloc.c

scripts

checkpatch.pl

+11

Documentation/filesystems/vfat.txt

reviewed

··· 137 137 without doing anything or remount the partition in 138 138 read-only mode (default behavior). 139 139 140 140 + discard -- If set, issues discard/TRIM commands to the block 141 141 + device when blocks are freed. This is useful for SSD devices 142 142 + and sparse/thinly-provisoned LUNs. 143 143 + 144 144 + nfs -- This option maintains an index (cache) of directory 145 145 + inodes by i_logstart which is used by the nfs-related code to 146 146 + improve look-ups. 147 147 + 148 148 + Enable this only if you want to export the FAT filesystem 149 149 + over NFS 150 150 + 140 151 <bool>: 0,1,yes,no,true,false 141 152 142 153 TODO

+8 -2

Documentation/vm/hugetlbpage.txt

reviewed

··· 299 299 ******************************************************************* 300 300 301 301 /* 302 302 - * hugepage-shm: see Documentation/vm/hugepage-shm.c 302 302 + * map_hugetlb: see tools/testing/selftests/vm/map_hugetlb.c 303 303 */ 304 304 305 305 ******************************************************************* 306 306 307 307 /* 308 308 - * hugepage-mmap: see Documentation/vm/hugepage-mmap.c 308 308 + * hugepage-shm: see tools/testing/selftests/vm/hugepage-shm.c 309 309 + */ 310 310 + 311 311 + ******************************************************************* 312 312 + 313 313 + /* 314 314 + * hugepage-mmap: see tools/testing/selftests/vm/hugepage-mmap.c 309 315 */

+16 -5

arch/x86/mm/hugetlbpage.c

reviewed

··· 56 56 } 57 57 58 58 /* 59 59 - * search for a shareable pmd page for hugetlb. 59 59 + * Search for a shareable pmd page for hugetlb. In any case calls pmd_alloc() 60 60 + * and returns the corresponding pte. While this is not necessary for the 61 61 + * !shared pmd case because we can allocate the pmd later as well, it makes the 62 62 + * code much cleaner. pmd allocation is essential for the shared case because 63 63 + * pud has to be populated inside the same i_mmap_mutex section - otherwise 64 64 + * racing tasks could either miss the sharing (see huge_pte_offset) or select a 65 65 + * bad pmd for sharing. 60 66 */ 61 61 - static void huge_pmd_share(struct mm_struct *mm, unsigned long addr, pud_t *pud) 67 67 + static pte_t * 68 68 + huge_pmd_share(struct mm_struct *mm, unsigned long addr, pud_t *pud) 62 69 { 63 70 struct vm_area_struct *vma = find_vma(mm, addr); 64 71 struct address_space *mapping = vma->vm_file->f_mapping; ··· 75 68 struct vm_area_struct *svma; 76 69 unsigned long saddr; 77 70 pte_t *spte = NULL; 71 71 + pte_t *pte; 78 72 79 73 if (!vma_shareable(vma, addr)) 80 80 - return; 74 74 + return (pte_t *)pmd_alloc(mm, pud, addr); 81 75 82 76 mutex_lock(&mapping->i_mmap_mutex); 83 77 vma_prio_tree_foreach(svma, &iter, &mapping->i_mmap, idx, idx) { ··· 105 97 put_page(virt_to_page(spte)); 106 98 spin_unlock(&mm->page_table_lock); 107 99 out: 100 100 + pte = (pte_t *)pmd_alloc(mm, pud, addr); 108 101 mutex_unlock(&mapping->i_mmap_mutex); 102 102 + return pte; 109 103 } 110 104 111 105 /* ··· 152 142 } else { 153 143 BUG_ON(sz != PMD_SIZE); 154 144 if (pud_none(*pud)) 155 155 - huge_pmd_share(mm, addr, pud); 156 156 - pte = (pte_t *) pmd_alloc(mm, pud, addr); 145 145 + pte = huge_pmd_share(mm, addr, pud); 146 146 + else 147 147 + pte = (pte_t *)pmd_alloc(mm, pud, addr); 157 148 } 158 149 } 159 150 BUG_ON(pte && !pte_none(*pte) && !pte_huge(*pte));

+1 -10

drivers/block/cciss_scsi.c

reviewed

··· 763 763 { 764 764 case CMD_TARGET_STATUS: 765 765 /* Pass it up to the upper layers... */ 766 766 - if( ei->ScsiStatus) 767 767 - { 768 768 - #if 0 769 769 - printk(KERN_WARNING "cciss: cmd %p " 770 770 - "has SCSI Status = %x\n", 771 771 - c, ei->ScsiStatus); 772 772 - #endif 773 773 - cmd->result |= (ei->ScsiStatus << 1); 774 774 - } 775 775 - else { /* scsi status is zero??? How??? */ 766 766 + if (!ei->ScsiStatus) { 776 767 777 768 /* Ordinarily, this case should never happen, but there is a bug 778 769 in some released firmware revisions that allows it to happen

+2 -2

drivers/clocksource/cs5535-clockevt.c

reviewed

··· 53 53 #define MFGPT_PERIODIC (MFGPT_HZ / HZ) 54 54 55 55 /* 56 56 - * The MFPGT timers on the CS5536 provide us with suitable timers to use 56 56 + * The MFGPT timers on the CS5536 provide us with suitable timers to use 57 57 * as clock event sources - not as good as a HPET or APIC, but certainly 58 58 * better than the PIT. This isn't a general purpose MFGPT driver, but 59 59 * a simplified one designed specifically to act as a clock event source. ··· 144 144 145 145 timer = cs5535_mfgpt_alloc_timer(MFGPT_TIMER_ANY, MFGPT_DOMAIN_WORKING); 146 146 if (!timer) { 147 147 - printk(KERN_ERR DRV_NAME ": Could not allocate MFPGT timer\n"); 147 147 + printk(KERN_ERR DRV_NAME ": Could not allocate MFGPT timer\n"); 148 148 return -ENODEV; 149 149 } 150 150 cs5535_event_clock = timer;

+65 -19

drivers/misc/sgi-xp/xpc_uv.c

reviewed

··· 18 18 #include <linux/interrupt.h> 19 19 #include <linux/delay.h> 20 20 #include <linux/device.h> 21 21 + #include <linux/cpu.h> 22 22 + #include <linux/module.h> 21 23 #include <linux/err.h> 22 24 #include <linux/slab.h> 23 25 #include <asm/uv/uv_hub.h> ··· 60 58 #define XPC_NOTIFY_MQ_SIZE_UV (4 * XP_MAX_NPARTITIONS_UV * \ 61 59 XPC_NOTIFY_MSG_SIZE_UV) 62 60 #define XPC_NOTIFY_IRQ_NAME "xpc_notify" 61 61 + 62 62 + static int xpc_mq_node = -1; 63 63 64 64 static struct xpc_gru_mq_uv *xpc_activate_mq_uv; 65 65 static struct xpc_gru_mq_uv *xpc_notify_mq_uv; ··· 113 109 #if defined CONFIG_X86_64 114 110 mq->irq = uv_setup_irq(irq_name, cpu, mq->mmr_blade, mq->mmr_offset, 115 111 UV_AFFINITY_CPU); 116 116 - if (mq->irq < 0) { 117 117 - dev_err(xpc_part, "uv_setup_irq() returned error=%d\n", 118 118 - -mq->irq); 112 112 + if (mq->irq < 0) 119 113 return mq->irq; 120 120 - } 121 114 122 115 mq->mmr_value = uv_read_global_mmr64(mmr_pnode, mq->mmr_offset); 123 116 ··· 239 238 mq->mmr_blade = uv_cpu_to_blade_id(cpu); 240 239 241 240 nid = cpu_to_node(cpu); 242 242 - page = alloc_pages_exact_node(nid, GFP_KERNEL | __GFP_ZERO | GFP_THISNODE, 243 243 - pg_order); 241 241 + page = alloc_pages_exact_node(nid, 242 242 + GFP_KERNEL | __GFP_ZERO | GFP_THISNODE, 243 243 + pg_order); 244 244 if (page == NULL) { 245 245 dev_err(xpc_part, "xpc_create_gru_mq_uv() failed to alloc %d " 246 246 "bytes of memory on nid=%d for GRU mq\n", mq_size, nid); ··· 1733 1731 .notify_senders_of_disconnect = xpc_notify_senders_of_disconnect_uv, 1734 1732 }; 1735 1733 1734 1734 + static int 1735 1735 + xpc_init_mq_node(int nid) 1736 1736 + { 1737 1737 + int cpu; 1738 1738 + 1739 1739 + get_online_cpus(); 1740 1740 + 1741 1741 + for_each_cpu(cpu, cpumask_of_node(nid)) { 1742 1742 + xpc_activate_mq_uv = 1743 1743 + xpc_create_gru_mq_uv(XPC_ACTIVATE_MQ_SIZE_UV, nid, 1744 1744 + XPC_ACTIVATE_IRQ_NAME, 1745 1745 + xpc_handle_activate_IRQ_uv); 1746 1746 + if (!IS_ERR(xpc_activate_mq_uv)) 1747 1747 + break; 1748 1748 + } 1749 1749 + if (IS_ERR(xpc_activate_mq_uv)) { 1750 1750 + put_online_cpus(); 1751 1751 + return PTR_ERR(xpc_activate_mq_uv); 1752 1752 + } 1753 1753 + 1754 1754 + for_each_cpu(cpu, cpumask_of_node(nid)) { 1755 1755 + xpc_notify_mq_uv = 1756 1756 + xpc_create_gru_mq_uv(XPC_NOTIFY_MQ_SIZE_UV, nid, 1757 1757 + XPC_NOTIFY_IRQ_NAME, 1758 1758 + xpc_handle_notify_IRQ_uv); 1759 1759 + if (!IS_ERR(xpc_notify_mq_uv)) 1760 1760 + break; 1761 1761 + } 1762 1762 + if (IS_ERR(xpc_notify_mq_uv)) { 1763 1763 + xpc_destroy_gru_mq_uv(xpc_activate_mq_uv); 1764 1764 + put_online_cpus(); 1765 1765 + return PTR_ERR(xpc_notify_mq_uv); 1766 1766 + } 1767 1767 + 1768 1768 + put_online_cpus(); 1769 1769 + return 0; 1770 1770 + } 1771 1771 + 1736 1772 int 1737 1773 xpc_init_uv(void) 1738 1774 { 1775 1775 + int nid; 1776 1776 + int ret = 0; 1777 1777 + 1739 1778 xpc_arch_ops = xpc_arch_ops_uv; 1740 1779 1741 1780 if (sizeof(struct xpc_notify_mq_msghdr_uv) > XPC_MSG_HDR_MAX_SIZE) { ··· 1785 1742 return -E2BIG; 1786 1743 } 1787 1744 1788 1788 - xpc_activate_mq_uv = xpc_create_gru_mq_uv(XPC_ACTIVATE_MQ_SIZE_UV, 0, 1789 1789 - XPC_ACTIVATE_IRQ_NAME, 1790 1790 - xpc_handle_activate_IRQ_uv); 1791 1791 - if (IS_ERR(xpc_activate_mq_uv)) 1792 1792 - return PTR_ERR(xpc_activate_mq_uv); 1745 1745 + if (xpc_mq_node < 0) 1746 1746 + for_each_online_node(nid) { 1747 1747 + ret = xpc_init_mq_node(nid); 1793 1748 1794 1794 - xpc_notify_mq_uv = xpc_create_gru_mq_uv(XPC_NOTIFY_MQ_SIZE_UV, 0, 1795 1795 - XPC_NOTIFY_IRQ_NAME, 1796 1796 - xpc_handle_notify_IRQ_uv); 1797 1797 - if (IS_ERR(xpc_notify_mq_uv)) { 1798 1798 - xpc_destroy_gru_mq_uv(xpc_activate_mq_uv); 1799 1799 - return PTR_ERR(xpc_notify_mq_uv); 1800 1800 - } 1749 1749 + if (!ret) 1750 1750 + break; 1751 1751 + } 1752 1752 + else 1753 1753 + ret = xpc_init_mq_node(xpc_mq_node); 1801 1754 1802 1802 - return 0; 1755 1755 + if (ret < 0) 1756 1756 + dev_err(xpc_part, "xpc_init_mq_node() returned error=%d\n", 1757 1757 + -ret); 1758 1758 + 1759 1759 + return ret; 1803 1760 } 1804 1761 1805 1762 void ··· 1808 1765 xpc_destroy_gru_mq_uv(xpc_notify_mq_uv); 1809 1766 xpc_destroy_gru_mq_uv(xpc_activate_mq_uv); 1810 1767 } 1768 1768 + 1769 1769 + module_param(xpc_mq_node, int, 0); 1770 1770 + MODULE_PARM_DESC(xpc_mq_node, "Node number on which to allocate message queues.");

+11 -1

drivers/rapidio/devices/tsi721.c

reviewed

··· 435 435 " info %4.4x\n", DBELL_SID(idb.bytes), 436 436 DBELL_TID(idb.bytes), DBELL_INF(idb.bytes)); 437 437 } 438 438 + 439 439 + wr_ptr = ioread32(priv->regs + 440 440 + TSI721_IDQ_WP(IDB_QUEUE)) % IDB_QSIZE; 438 441 } 439 442 440 443 iowrite32(rd_ptr & (IDB_QSIZE - 1), ··· 448 445 regval |= TSI721_SR_CHINT_IDBQRCV; 449 446 iowrite32(regval, 450 447 priv->regs + TSI721_SR_CHINTE(IDB_QUEUE)); 448 448 + 449 449 + wr_ptr = ioread32(priv->regs + TSI721_IDQ_WP(IDB_QUEUE)) % IDB_QSIZE; 450 450 + if (wr_ptr != rd_ptr) 451 451 + schedule_work(&priv->idb_work); 451 452 } 452 453 453 454 /** ··· 2219 2212 const struct pci_device_id *id) 2220 2213 { 2221 2214 struct tsi721_device *priv; 2222 2222 - int i, cap; 2215 2215 + int cap; 2223 2216 int err; 2224 2217 u32 regval; 2225 2218 ··· 2239 2232 priv->pdev = pdev; 2240 2233 2241 2234 #ifdef DEBUG 2235 2235 + { 2236 2236 + int i; 2242 2237 for (i = 0; i <= PCI_STD_RESOURCE_END; i++) { 2243 2238 dev_dbg(&pdev->dev, "res[%d] @ 0x%llx (0x%lx, 0x%lx)\n", 2244 2239 i, (unsigned long long)pci_resource_start(pdev, i), 2245 2240 (unsigned long)pci_resource_len(pdev, i), 2246 2241 pci_resource_flags(pdev, i)); 2242 2242 + } 2247 2243 } 2248 2244 #endif 2249 2245 /*

drivers/rtc/rtc-pcf2123.c

reviewed

··· 43 43 #include <linux/rtc.h> 44 44 #include <linux/spi/spi.h> 45 45 #include <linux/module.h> 46 46 + #include <linux/sysfs.h> 46 47 47 48 #define DRV_VERSION "0.6" 48 49 ··· 293 292 pdata->rtc = rtc; 294 293 295 294 for (i = 0; i < 16; i++) { 295 295 + sysfs_attr_init(&pdata->regs[i].attr.attr); 296 296 sprintf(pdata->regs[i].name, "%1x", i); 297 297 pdata->regs[i].attr.attr.mode = S_IRUGO | S_IWUSR; 298 298 pdata->regs[i].attr.attr.name = pdata->regs[i].name;

+5 -2

drivers/rtc/rtc-rs5c348.c

reviewed

··· 122 122 tm->tm_min = bcd2bin(rxbuf[RS5C348_REG_MINS] & RS5C348_MINS_MASK); 123 123 tm->tm_hour = bcd2bin(rxbuf[RS5C348_REG_HOURS] & RS5C348_HOURS_MASK); 124 124 if (!pdata->rtc_24h) { 125 125 - tm->tm_hour %= 12; 126 126 - if (rxbuf[RS5C348_REG_HOURS] & RS5C348_BIT_PM) 125 125 + if (rxbuf[RS5C348_REG_HOURS] & RS5C348_BIT_PM) { 126 126 + tm->tm_hour -= 20; 127 127 + tm->tm_hour %= 12; 127 128 tm->tm_hour += 12; 129 129 + } else 130 130 + tm->tm_hour %= 12; 128 131 } 129 132 tm->tm_wday = bcd2bin(rxbuf[RS5C348_REG_WDAY] & RS5C348_WDAY_MASK); 130 133 tm->tm_mday = bcd2bin(rxbuf[RS5C348_REG_DAY] & RS5C348_DAY_MASK);

+2 -2

include/linux/compaction.h

reviewed

··· 22 22 extern int fragmentation_index(struct zone *zone, unsigned int order); 23 23 extern unsigned long try_to_compact_pages(struct zonelist *zonelist, 24 24 int order, gfp_t gfp_mask, nodemask_t *mask, 25 25 - bool sync); 25 25 + bool sync, bool *contended); 26 26 extern int compact_pgdat(pg_data_t *pgdat, int order); 27 27 extern unsigned long compaction_suitable(struct zone *zone, int order); 28 28 ··· 64 64 #else 65 65 static inline unsigned long try_to_compact_pages(struct zonelist *zonelist, 66 66 int order, gfp_t gfp_mask, nodemask_t *nodemask, 67 67 - bool sync) 67 67 + bool sync, bool *contended) 68 68 { 69 69 return COMPACT_CONTINUE; 70 70 }

+1 -1

include/linux/string.h

reviewed

··· 144 144 { 145 145 return strncmp(str, prefix, strlen(prefix)) == 0; 146 146 } 147 147 - #endif 148 147 149 148 extern size_t memweight(const void *ptr, size_t bytes); 150 149 150 150 + #endif /* __KERNEL__ */ 151 151 #endif /* _LINUX_STRING_H_ */

+108 -48

mm/compaction.c

reviewed

··· 51 51 } 52 52 53 53 /* 54 54 + * Compaction requires the taking of some coarse locks that are potentially 55 55 + * very heavily contended. Check if the process needs to be scheduled or 56 56 + * if the lock is contended. For async compaction, back out in the event 57 57 + * if contention is severe. For sync compaction, schedule. 58 58 + * 59 59 + * Returns true if the lock is held. 60 60 + * Returns false if the lock is released and compaction should abort 61 61 + */ 62 62 + static bool compact_checklock_irqsave(spinlock_t *lock, unsigned long *flags, 63 63 + bool locked, struct compact_control *cc) 64 64 + { 65 65 + if (need_resched() || spin_is_contended(lock)) { 66 66 + if (locked) { 67 67 + spin_unlock_irqrestore(lock, *flags); 68 68 + locked = false; 69 69 + } 70 70 + 71 71 + /* async aborts if taking too long or contended */ 72 72 + if (!cc->sync) { 73 73 + if (cc->contended) 74 74 + *cc->contended = true; 75 75 + return false; 76 76 + } 77 77 + 78 78 + cond_resched(); 79 79 + if (fatal_signal_pending(current)) 80 80 + return false; 81 81 + } 82 82 + 83 83 + if (!locked) 84 84 + spin_lock_irqsave(lock, *flags); 85 85 + return true; 86 86 + } 87 87 + 88 88 + static inline bool compact_trylock_irqsave(spinlock_t *lock, 89 89 + unsigned long *flags, struct compact_control *cc) 90 90 + { 91 91 + return compact_checklock_irqsave(lock, flags, false, cc); 92 92 + } 93 93 + 94 94 + /* 54 95 * Isolate free pages onto a private freelist. Caller must hold zone->lock. 55 96 * If @strict is true, will abort returning 0 on any invalid PFNs or non-free 56 97 * pages inside of the pageblock (even though it may still end up isolating ··· 214 173 } 215 174 216 175 /* Update the number of anon and file isolated pages in the zone */ 217 217 - static void acct_isolated(struct zone *zone, struct compact_control *cc) 176 176 + static void acct_isolated(struct zone *zone, bool locked, struct compact_control *cc) 218 177 { 219 178 struct page *page; 220 179 unsigned int count[2] = { 0, }; ··· 222 181 list_for_each_entry(page, &cc->migratepages, lru) 223 182 count[!!page_is_file_cache(page)]++; 224 183 225 225 - __mod_zone_page_state(zone, NR_ISOLATED_ANON, count[0]); 226 226 - __mod_zone_page_state(zone, NR_ISOLATED_FILE, count[1]); 184 184 + /* If locked we can use the interrupt unsafe versions */ 185 185 + if (locked) { 186 186 + __mod_zone_page_state(zone, NR_ISOLATED_ANON, count[0]); 187 187 + __mod_zone_page_state(zone, NR_ISOLATED_FILE, count[1]); 188 188 + } else { 189 189 + mod_zone_page_state(zone, NR_ISOLATED_ANON, count[0]); 190 190 + mod_zone_page_state(zone, NR_ISOLATED_FILE, count[1]); 191 191 + } 227 192 } 228 193 229 194 /* Similar to reclaim, but different enough that they don't share logic */ ··· 275 228 struct list_head *migratelist = &cc->migratepages; 276 229 isolate_mode_t mode = 0; 277 230 struct lruvec *lruvec; 231 231 + unsigned long flags; 232 232 + bool locked; 278 233 279 234 /* 280 235 * Ensure that there are not too many pages isolated from the LRU ··· 296 247 297 248 /* Time to isolate some pages for migration */ 298 249 cond_resched(); 299 299 - spin_lock_irq(&zone->lru_lock); 250 250 + spin_lock_irqsave(&zone->lru_lock, flags); 251 251 + locked = true; 300 252 for (; low_pfn < end_pfn; low_pfn++) { 301 253 struct page *page; 302 302 - bool locked = true; 303 254 304 255 /* give a chance to irqs before checking need_resched() */ 305 256 if (!((low_pfn+1) % SWAP_CLUSTER_MAX)) { 306 306 - spin_unlock_irq(&zone->lru_lock); 257 257 + spin_unlock_irqrestore(&zone->lru_lock, flags); 307 258 locked = false; 308 259 } 309 309 - if (need_resched() || spin_is_contended(&zone->lru_lock)) { 310 310 - if (locked) 311 311 - spin_unlock_irq(&zone->lru_lock); 312 312 - cond_resched(); 313 313 - spin_lock_irq(&zone->lru_lock); 314 314 - if (fatal_signal_pending(current)) 315 315 - break; 316 316 - } else if (!locked) 317 317 - spin_lock_irq(&zone->lru_lock); 260 260 + 261 261 + /* Check if it is ok to still hold the lock */ 262 262 + locked = compact_checklock_irqsave(&zone->lru_lock, &flags, 263 263 + locked, cc); 264 264 + if (!locked) 265 265 + break; 318 266 319 267 /* 320 268 * migrate_pfn does not necessarily start aligned to a ··· 395 349 } 396 350 } 397 351 398 398 - acct_isolated(zone, cc); 352 352 + acct_isolated(zone, locked, cc); 399 353 400 400 - spin_unlock_irq(&zone->lru_lock); 354 354 + if (locked) 355 355 + spin_unlock_irqrestore(&zone->lru_lock, flags); 401 356 402 357 trace_mm_compaction_isolate_migratepages(nr_scanned, nr_isolated); 403 358 ··· 428 381 429 382 /* Otherwise skip the block */ 430 383 return false; 384 384 + } 385 385 + 386 386 + /* 387 387 + * Returns the start pfn of the last page block in a zone. This is the starting 388 388 + * point for full compaction of a zone. Compaction searches for free pages from 389 389 + * the end of each zone, while isolate_freepages_block scans forward inside each 390 390 + * page block. 391 391 + */ 392 392 + static unsigned long start_free_pfn(struct zone *zone) 393 393 + { 394 394 + unsigned long free_pfn; 395 395 + free_pfn = zone->zone_start_pfn + zone->spanned_pages; 396 396 + free_pfn &= ~(pageblock_nr_pages-1); 397 397 + return free_pfn; 431 398 } 432 399 433 400 /* ··· 483 422 pfn -= pageblock_nr_pages) { 484 423 unsigned long isolated; 485 424 486 486 - /* 487 487 - * Skip ahead if another thread is compacting in the area 488 488 - * simultaneously. If we wrapped around, we can only skip 489 489 - * ahead if zone->compact_cached_free_pfn also wrapped to 490 490 - * above our starting point. 491 491 - */ 492 492 - if (cc->order > 0 && (!cc->wrapped || 493 493 - zone->compact_cached_free_pfn > 494 494 - cc->start_free_pfn)) 495 495 - pfn = min(pfn, zone->compact_cached_free_pfn); 496 496 - 497 425 if (!pfn_valid(pfn)) 498 426 continue; 499 427 ··· 508 458 * are disabled 509 459 */ 510 460 isolated = 0; 511 511 - spin_lock_irqsave(&zone->lock, flags); 461 461 + 462 462 + /* 463 463 + * The zone lock must be held to isolate freepages. This 464 464 + * unfortunately this is a very coarse lock and can be 465 465 + * heavily contended if there are parallel allocations 466 466 + * or parallel compactions. For async compaction do not 467 467 + * spin on the lock 468 468 + */ 469 469 + if (!compact_trylock_irqsave(&zone->lock, &flags, cc)) 470 470 + break; 512 471 if (suitable_migration_target(page)) { 513 472 end_pfn = min(pfn + pageblock_nr_pages, zone_end_pfn); 514 473 isolated = isolate_freepages_block(pfn, end_pfn, ··· 533 474 */ 534 475 if (isolated) { 535 476 high_pfn = max(high_pfn, pfn); 536 536 - if (cc->order > 0) 477 477 + 478 478 + /* 479 479 + * If the free scanner has wrapped, update 480 480 + * compact_cached_free_pfn to point to the highest 481 481 + * pageblock with free pages. This reduces excessive 482 482 + * scanning of full pageblocks near the end of the 483 483 + * zone 484 484 + */ 485 485 + if (cc->order > 0 && cc->wrapped) 537 486 zone->compact_cached_free_pfn = high_pfn; 538 487 } 539 488 } ··· 551 484 552 485 cc->free_pfn = high_pfn; 553 486 cc->nr_freepages = nr_freepages; 487 487 + 488 488 + /* If compact_cached_free_pfn is reset then set it now */ 489 489 + if (cc->order > 0 && !cc->wrapped && 490 490 + zone->compact_cached_free_pfn == start_free_pfn(zone)) 491 491 + zone->compact_cached_free_pfn = high_pfn; 554 492 } 555 493 556 494 /* ··· 640 568 cc->migrate_pfn = low_pfn; 641 569 642 570 return ISOLATE_SUCCESS; 643 643 - } 644 644 - 645 645 - /* 646 646 - * Returns the start pfn of the last page block in a zone. This is the starting 647 647 - * point for full compaction of a zone. Compaction searches for free pages from 648 648 - * the end of each zone, while isolate_freepages_block scans forward inside each 649 649 - * page block. 650 650 - */ 651 651 - static unsigned long start_free_pfn(struct zone *zone) 652 652 - { 653 653 - unsigned long free_pfn; 654 654 - free_pfn = zone->zone_start_pfn + zone->spanned_pages; 655 655 - free_pfn &= ~(pageblock_nr_pages-1); 656 656 - return free_pfn; 657 571 } 658 572 659 573 static int compact_finished(struct zone *zone, ··· 829 771 830 772 static unsigned long compact_zone_order(struct zone *zone, 831 773 int order, gfp_t gfp_mask, 832 832 - bool sync) 774 774 + bool sync, bool *contended) 833 775 { 834 776 struct compact_control cc = { 835 777 .nr_freepages = 0, ··· 838 780 .migratetype = allocflags_to_migratetype(gfp_mask), 839 781 .zone = zone, 840 782 .sync = sync, 783 783 + .contended = contended, 841 784 }; 842 785 INIT_LIST_HEAD(&cc.freepages); 843 786 INIT_LIST_HEAD(&cc.migratepages); ··· 860 801 */ 861 802 unsigned long try_to_compact_pages(struct zonelist *zonelist, 862 803 int order, gfp_t gfp_mask, nodemask_t *nodemask, 863 863 - bool sync) 804 804 + bool sync, bool *contended) 864 805 { 865 806 enum zone_type high_zoneidx = gfp_zone(gfp_mask); 866 807 int may_enter_fs = gfp_mask & __GFP_FS; ··· 884 825 nodemask) { 885 826 int status; 886 827 887 887 - status = compact_zone_order(zone, order, gfp_mask, sync); 828 828 + status = compact_zone_order(zone, order, gfp_mask, sync, 829 829 + contended); 888 830 rc = max(status, rc); 889 831 890 832 /* If a normal allocation would succeed, stop compacting */ ··· 921 861 if (cc->order > 0) { 922 862 int ok = zone_watermark_ok(zone, cc->order, 923 863 low_wmark_pages(zone), 0, 0); 924 924 - if (ok && cc->order > zone->compact_order_failed) 864 864 + if (ok && cc->order >= zone->compact_order_failed) 925 865 zone->compact_order_failed = cc->order + 1; 926 866 /* Currently async compaction is never deferred. */ 927 867 else if (!ok && cc->sync)

mm/internal.h

reviewed

··· 130 130 int order; /* order a direct compactor needs */ 131 131 int migratetype; /* MOVABLE, RECLAIMABLE etc */ 132 132 struct zone *zone; 133 133 + bool *contended; /* True if a lock was contended */ 133 134 }; 134 135 135 136 unsigned long

+1 -1

mm/mmap.c

reviewed

··· 2309 2309 } 2310 2310 vm_unacct_memory(nr_accounted); 2311 2311 2312 2312 - BUG_ON(mm->nr_ptes > (FIRST_USER_ADDRESS+PMD_SIZE-1)>>PMD_SHIFT); 2312 2312 + WARN_ON(mm->nr_ptes > (FIRST_USER_ADDRESS+PMD_SIZE-1)>>PMD_SHIFT); 2313 2313 } 2314 2314 2315 2315 /* Insert vm structure into process list sorted by address

+22 -16

mm/page_alloc.c

reviewed

··· 1928 1928 zlc_active = 0; 1929 1929 goto zonelist_scan; 1930 1930 } 1931 1931 + 1932 1932 + if (page) 1933 1933 + /* 1934 1934 + * page->pfmemalloc is set when ALLOC_NO_WATERMARKS was 1935 1935 + * necessary to allocate the page. The expectation is 1936 1936 + * that the caller is taking steps that will free more 1937 1937 + * memory. The caller should avoid the page being used 1938 1938 + * for !PFMEMALLOC purposes. 1939 1939 + */ 1940 1940 + page->pfmemalloc = !!(alloc_flags & ALLOC_NO_WATERMARKS); 1941 1941 + 1931 1942 return page; 1932 1943 } 1933 1944 ··· 2102 2091 struct zonelist *zonelist, enum zone_type high_zoneidx, 2103 2092 nodemask_t *nodemask, int alloc_flags, struct zone *preferred_zone, 2104 2093 int migratetype, bool sync_migration, 2105 2105 - bool *deferred_compaction, 2094 2094 + bool *contended_compaction, bool *deferred_compaction, 2106 2095 unsigned long *did_some_progress) 2107 2096 { 2108 2097 struct page *page; ··· 2117 2106 2118 2107 current->flags |= PF_MEMALLOC; 2119 2108 *did_some_progress = try_to_compact_pages(zonelist, order, gfp_mask, 2120 2120 - nodemask, sync_migration); 2109 2109 + nodemask, sync_migration, 2110 2110 + contended_compaction); 2121 2111 current->flags &= ~PF_MEMALLOC; 2122 2112 if (*did_some_progress != COMPACT_SKIPPED) { 2123 2113 ··· 2164 2152 struct zonelist *zonelist, enum zone_type high_zoneidx, 2165 2153 nodemask_t *nodemask, int alloc_flags, struct zone *preferred_zone, 2166 2154 int migratetype, bool sync_migration, 2167 2167 - bool *deferred_compaction, 2155 2155 + bool *contended_compaction, bool *deferred_compaction, 2168 2156 unsigned long *did_some_progress) 2169 2157 { 2170 2158 return NULL; ··· 2337 2325 unsigned long did_some_progress; 2338 2326 bool sync_migration = false; 2339 2327 bool deferred_compaction = false; 2328 2328 + bool contended_compaction = false; 2340 2329 2341 2330 /* 2342 2331 * In the slowpath, we sanity check order to avoid ever trying to ··· 2402 2389 zonelist, high_zoneidx, nodemask, 2403 2390 preferred_zone, migratetype); 2404 2391 if (page) { 2405 2405 - /* 2406 2406 - * page->pfmemalloc is set when ALLOC_NO_WATERMARKS was 2407 2407 - * necessary to allocate the page. The expectation is 2408 2408 - * that the caller is taking steps that will free more 2409 2409 - * memory. The caller should avoid the page being used 2410 2410 - * for !PFMEMALLOC purposes. 2411 2411 - */ 2412 2412 - page->pfmemalloc = true; 2413 2392 goto got_pg; 2414 2393 } 2415 2394 } ··· 2427 2422 nodemask, 2428 2423 alloc_flags, preferred_zone, 2429 2424 migratetype, sync_migration, 2425 2425 + &contended_compaction, 2430 2426 &deferred_compaction, 2431 2427 &did_some_progress); 2432 2428 if (page) ··· 2437 2431 /* 2438 2432 * If compaction is deferred for high-order allocations, it is because 2439 2433 * sync compaction recently failed. In this is the case and the caller 2440 2440 - * has requested the system not be heavily disrupted, fail the 2441 2441 - * allocation now instead of entering direct reclaim 2434 2434 + * requested a movable allocation that does not heavily disrupt the 2435 2435 + * system then fail the allocation instead of entering direct reclaim. 2442 2436 */ 2443 2443 - if (deferred_compaction && (gfp_mask & __GFP_NO_KSWAPD)) 2437 2437 + if ((deferred_compaction || contended_compaction) && 2438 2438 + (gfp_mask & __GFP_NO_KSWAPD)) 2444 2439 goto nopage; 2445 2440 2446 2441 /* Try direct reclaim and then allocating */ ··· 2512 2505 nodemask, 2513 2506 alloc_flags, preferred_zone, 2514 2507 migratetype, sync_migration, 2508 2508 + &contended_compaction, 2515 2509 &deferred_compaction, 2516 2510 &did_some_progress); 2517 2511 if (page) ··· 2577 2569 page = __alloc_pages_slowpath(gfp_mask, order, 2578 2570 zonelist, high_zoneidx, nodemask, 2579 2571 preferred_zone, migratetype); 2580 2580 - else 2581 2581 - page->pfmemalloc = false; 2582 2572 2583 2573 trace_mm_page_alloc(page, order, gfp_mask, migratetype); 2584 2574

+2 -1

scripts/checkpatch.pl

reviewed

··· 3016 3016 $herectx .= raw_line($linenr, $n) . "\n"; 3017 3017 } 3018 3018 3019 3019 - if (($stmts =~ tr/;/;/) == 1) { 3019 3019 + if (($stmts =~ tr/;/;/) == 1 && 3020 3020 + $stmts !~ /^\s*(if|while|for|switch)\b/) { 3020 3021 WARN("SINGLE_STATEMENT_DO_WHILE_MACRO", 3021 3022 "Single statement macros should not use a do {} while (0) loop\n" . "$herectx"); 3022 3023 }