Merge branch 'for-linus' of git://oss.sgi.com/xfs/xfs

+9 -15

fs/xfs/linux-2.6/xfs_buf.c

··· 293 293 size_t nbytes, offset; 294 294 gfp_t gfp_mask = xb_to_gfp(flags); 295 295 unsigned short page_count, i; 296 - pgoff_t first; 297 296 xfs_off_t end; 298 297 int error; 299 298 ··· 332 333 return error; 333 334 334 335 offset = bp->b_offset; 335 - first = bp->b_file_offset >> PAGE_SHIFT; 336 336 bp->b_flags |= _XBF_PAGES; 337 337 338 338 for (i = 0; i < bp->b_page_count; i++) { ··· 655 657 xfs_off_t ioff, 656 658 size_t isize) 657 659 { 658 - struct backing_dev_info *bdi; 659 - 660 660 if (bdi_read_congested(target->bt_bdi)) 661 661 return; 662 662 ··· 915 919 916 920 if (atomic_read(&bp->b_pin_count) && (bp->b_flags & XBF_STALE)) 917 921 xfs_log_force(bp->b_target->bt_mount, 0); 918 - if (atomic_read(&bp->b_io_remaining)) 919 - blk_flush_plug(current); 920 922 down(&bp->b_sema); 921 923 XB_SET_OWNER(bp); 922 924 ··· 1303 1309 { 1304 1310 trace_xfs_buf_iowait(bp, _RET_IP_); 1305 1311 1306 - if (atomic_read(&bp->b_io_remaining)) 1307 - blk_flush_plug(current); 1308 1312 wait_for_completion(&bp->b_iowait); 1309 1313 1310 1314 trace_xfs_buf_iowait_done(bp, _RET_IP_); ··· 1739 1747 do { 1740 1748 long age = xfs_buf_age_centisecs * msecs_to_jiffies(10); 1741 1749 long tout = xfs_buf_timer_centisecs * msecs_to_jiffies(10); 1742 - int count = 0; 1743 1750 struct list_head tmp; 1751 + struct blk_plug plug; 1744 1752 1745 1753 if (unlikely(freezing(current))) { 1746 1754 set_bit(XBT_FORCE_SLEEP, &target->bt_flags); ··· 1756 1764 1757 1765 xfs_buf_delwri_split(target, &tmp, age); 1758 1766 list_sort(NULL, &tmp, xfs_buf_cmp); 1767 + 1768 + blk_start_plug(&plug); 1759 1769 while (!list_empty(&tmp)) { 1760 1770 struct xfs_buf *bp; 1761 1771 bp = list_first_entry(&tmp, struct xfs_buf, b_list); 1762 1772 list_del_init(&bp->b_list); 1763 1773 xfs_bdstrat_cb(bp); 1764 - count++; 1765 1774 } 1766 - if (count) 1767 - blk_flush_plug(current); 1768 - 1775 + blk_finish_plug(&plug); 1769 1776 } while (!kthread_should_stop()); 1770 1777 1771 1778 return 0; ··· 1784 1793 int pincount = 0; 1785 1794 LIST_HEAD(tmp_list); 1786 1795 LIST_HEAD(wait_list); 1796 + struct blk_plug plug; 1787 1797 1788 1798 xfs_buf_runall_queues(xfsconvertd_workqueue); 1789 1799 xfs_buf_runall_queues(xfsdatad_workqueue); ··· 1799 1807 * we do that after issuing all the IO. 1800 1808 */ 1801 1809 list_sort(NULL, &tmp_list, xfs_buf_cmp); 1810 + 1811 + blk_start_plug(&plug); 1802 1812 while (!list_empty(&tmp_list)) { 1803 1813 bp = list_first_entry(&tmp_list, struct xfs_buf, b_list); 1804 1814 ASSERT(target == bp->b_target); ··· 1811 1817 } 1812 1818 xfs_bdstrat_cb(bp); 1813 1819 } 1820 + blk_finish_plug(&plug); 1814 1821 1815 1822 if (wait) { 1816 - /* Expedite and wait for IO to complete. */ 1817 - blk_flush_plug(current); 1823 + /* Wait for IO to complete. */ 1818 1824 while (!list_empty(&wait_list)) { 1819 1825 bp = list_first_entry(&wait_list, struct xfs_buf, b_list); 1820 1826

+9 -18

fs/xfs/linux-2.6/xfs_message.c

··· 28 28 /* 29 29 * XFS logging functions 30 30 */ 31 - static int 31 + static void 32 32 __xfs_printk( 33 33 const char *level, 34 34 const struct xfs_mount *mp, 35 35 struct va_format *vaf) 36 36 { 37 37 if (mp && mp->m_fsname) 38 - return printk("%sXFS (%s): %pV\n", level, mp->m_fsname, vaf); 39 - return printk("%sXFS: %pV\n", level, vaf); 38 + printk("%sXFS (%s): %pV\n", level, mp->m_fsname, vaf); 39 + printk("%sXFS: %pV\n", level, vaf); 40 40 } 41 41 42 - int xfs_printk( 42 + void xfs_printk( 43 43 const char *level, 44 44 const struct xfs_mount *mp, 45 45 const char *fmt, ...) 46 46 { 47 47 struct va_format vaf; 48 48 va_list args; 49 - int r; 50 49 51 50 va_start(args, fmt); 52 51 53 52 vaf.fmt = fmt; 54 53 vaf.va = &args; 55 54 56 - r = __xfs_printk(level, mp, &vaf); 55 + __xfs_printk(level, mp, &vaf); 57 56 va_end(args); 58 - 59 - return r; 60 57 } 61 58 62 59 #define define_xfs_printk_level(func, kern_level) \ 63 - int func(const struct xfs_mount *mp, const char *fmt, ...) \ 60 + void func(const struct xfs_mount *mp, const char *fmt, ...) \ 64 61 { \ 65 62 struct va_format vaf; \ 66 63 va_list args; \ 67 - int r; \ 68 64 \ 69 65 va_start(args, fmt); \ 70 66 \ 71 67 vaf.fmt = fmt; \ 72 68 vaf.va = &args; \ 73 69 \ 74 - r = __xfs_printk(kern_level, mp, &vaf); \ 70 + __xfs_printk(kern_level, mp, &vaf); \ 75 71 va_end(args); \ 76 - \ 77 - return r; \ 78 72 } \ 79 73 80 74 define_xfs_printk_level(xfs_emerg, KERN_EMERG); ··· 82 88 define_xfs_printk_level(xfs_debug, KERN_DEBUG); 83 89 #endif 84 90 85 - int 91 + void 86 92 xfs_alert_tag( 87 93 const struct xfs_mount *mp, 88 94 int panic_tag, ··· 91 97 struct va_format vaf; 92 98 va_list args; 93 99 int do_panic = 0; 94 - int r; 95 100 96 101 if (xfs_panic_mask && (xfs_panic_mask & panic_tag)) { 97 102 xfs_printk(KERN_ALERT, mp, ··· 103 110 vaf.fmt = fmt; 104 111 vaf.va = &args; 105 112 106 - r = __xfs_printk(KERN_ALERT, mp, &vaf); 113 + __xfs_printk(KERN_ALERT, mp, &vaf); 107 114 va_end(args); 108 115 109 116 BUG_ON(do_panic); 110 - 111 - return r; 112 117 } 113 118 114 119 void

+13 -11

fs/xfs/linux-2.6/xfs_message.h

··· 3 3 4 4 struct xfs_mount; 5 5 6 - extern int xfs_printk(const char *level, const struct xfs_mount *mp, 6 + extern void xfs_printk(const char *level, const struct xfs_mount *mp, 7 7 const char *fmt, ...) 8 8 __attribute__ ((format (printf, 3, 4))); 9 - extern int xfs_emerg(const struct xfs_mount *mp, const char *fmt, ...) 9 + extern void xfs_emerg(const struct xfs_mount *mp, const char *fmt, ...) 10 10 __attribute__ ((format (printf, 2, 3))); 11 - extern int xfs_alert(const struct xfs_mount *mp, const char *fmt, ...) 11 + extern void xfs_alert(const struct xfs_mount *mp, const char *fmt, ...) 12 12 __attribute__ ((format (printf, 2, 3))); 13 - extern int xfs_alert_tag(const struct xfs_mount *mp, int tag, 13 + extern void xfs_alert_tag(const struct xfs_mount *mp, int tag, 14 14 const char *fmt, ...) 15 15 __attribute__ ((format (printf, 3, 4))); 16 - extern int xfs_crit(const struct xfs_mount *mp, const char *fmt, ...) 16 + extern void xfs_crit(const struct xfs_mount *mp, const char *fmt, ...) 17 17 __attribute__ ((format (printf, 2, 3))); 18 - extern int xfs_err(const struct xfs_mount *mp, const char *fmt, ...) 18 + extern void xfs_err(const struct xfs_mount *mp, const char *fmt, ...) 19 19 __attribute__ ((format (printf, 2, 3))); 20 - extern int xfs_warn(const struct xfs_mount *mp, const char *fmt, ...) 20 + extern void xfs_warn(const struct xfs_mount *mp, const char *fmt, ...) 21 21 __attribute__ ((format (printf, 2, 3))); 22 - extern int xfs_notice(const struct xfs_mount *mp, const char *fmt, ...) 22 + extern void xfs_notice(const struct xfs_mount *mp, const char *fmt, ...) 23 23 __attribute__ ((format (printf, 2, 3))); 24 - extern int xfs_info(const struct xfs_mount *mp, const char *fmt, ...) 24 + extern void xfs_info(const struct xfs_mount *mp, const char *fmt, ...) 25 25 __attribute__ ((format (printf, 2, 3))); 26 26 27 27 #ifdef DEBUG 28 - extern int xfs_debug(const struct xfs_mount *mp, const char *fmt, ...) 28 + extern void xfs_debug(const struct xfs_mount *mp, const char *fmt, ...) 29 29 __attribute__ ((format (printf, 2, 3))); 30 30 #else 31 - #define xfs_debug(mp, fmt, ...) (0) 31 + static inline void xfs_debug(const struct xfs_mount *mp, const char *fmt, ...) 32 + { 33 + } 32 34 #endif 33 35 34 36 extern void assfail(char *expr, char *f, int l);

+45 -84

fs/xfs/linux-2.6/xfs_super.c

··· 816 816 return 0; 817 817 } 818 818 819 - /* 820 - * XFS AIL push thread support 821 - */ 822 - void 823 - xfsaild_wakeup( 824 - struct xfs_ail *ailp, 825 - xfs_lsn_t threshold_lsn) 826 - { 827 - /* only ever move the target forwards */ 828 - if (XFS_LSN_CMP(threshold_lsn, ailp->xa_target) > 0) { 829 - ailp->xa_target = threshold_lsn; 830 - wake_up_process(ailp->xa_task); 831 - } 832 - } 833 - 834 - STATIC int 835 - xfsaild( 836 - void *data) 837 - { 838 - struct xfs_ail *ailp = data; 839 - xfs_lsn_t last_pushed_lsn = 0; 840 - long tout = 0; /* milliseconds */ 841 - 842 - while (!kthread_should_stop()) { 843 - /* 844 - * for short sleeps indicating congestion, don't allow us to 845 - * get woken early. Otherwise all we do is bang on the AIL lock 846 - * without making progress. 847 - */ 848 - if (tout && tout <= 20) 849 - __set_current_state(TASK_KILLABLE); 850 - else 851 - __set_current_state(TASK_INTERRUPTIBLE); 852 - schedule_timeout(tout ? 853 - msecs_to_jiffies(tout) : MAX_SCHEDULE_TIMEOUT); 854 - 855 - /* swsusp */ 856 - try_to_freeze(); 857 - 858 - ASSERT(ailp->xa_mount->m_log); 859 - if (XFS_FORCED_SHUTDOWN(ailp->xa_mount)) 860 - continue; 861 - 862 - tout = xfsaild_push(ailp, &last_pushed_lsn); 863 - } 864 - 865 - return 0; 866 - } /* xfsaild */ 867 - 868 - int 869 - xfsaild_start( 870 - struct xfs_ail *ailp) 871 - { 872 - ailp->xa_target = 0; 873 - ailp->xa_task = kthread_run(xfsaild, ailp, "xfsaild/%s", 874 - ailp->xa_mount->m_fsname); 875 - if (IS_ERR(ailp->xa_task)) 876 - return -PTR_ERR(ailp->xa_task); 877 - return 0; 878 - } 879 - 880 - void 881 - xfsaild_stop( 882 - struct xfs_ail *ailp) 883 - { 884 - kthread_stop(ailp->xa_task); 885 - } 886 - 887 - 888 819 /* Catch misguided souls that try to use this interface on XFS */ 889 820 STATIC struct inode * 890 821 xfs_fs_alloc_inode( ··· 1122 1191 return -error; 1123 1192 1124 1193 if (laptop_mode) { 1125 - int prev_sync_seq = mp->m_sync_seq; 1126 - 1127 1194 /* 1128 1195 * The disk must be active because we're syncing. 1129 1196 * We schedule xfssyncd now (now that the disk is 1130 1197 * active) instead of later (when it might not be). 1131 1198 */ 1132 - wake_up_process(mp->m_sync_task); 1133 - /* 1134 - * We have to wait for the sync iteration to complete. 1135 - * If we don't, the disk activity caused by the sync 1136 - * will come after the sync is completed, and that 1137 - * triggers another sync from laptop mode. 1138 - */ 1139 - wait_event(mp->m_wait_single_sync_task, 1140 - mp->m_sync_seq != prev_sync_seq); 1199 + flush_delayed_work_sync(&mp->m_sync_work); 1141 1200 } 1142 1201 1143 1202 return 0; ··· 1411 1490 spin_lock_init(&mp->m_sb_lock); 1412 1491 mutex_init(&mp->m_growlock); 1413 1492 atomic_set(&mp->m_active_trans, 0); 1414 - INIT_LIST_HEAD(&mp->m_sync_list); 1415 - spin_lock_init(&mp->m_sync_lock); 1416 - init_waitqueue_head(&mp->m_wait_single_sync_task); 1417 1493 1418 1494 mp->m_super = sb; 1419 1495 sb->s_fs_info = mp; ··· 1717 1799 } 1718 1800 1719 1801 STATIC int __init 1802 + xfs_init_workqueues(void) 1803 + { 1804 + /* 1805 + * max_active is set to 8 to give enough concurency to allow 1806 + * multiple work operations on each CPU to run. This allows multiple 1807 + * filesystems to be running sync work concurrently, and scales with 1808 + * the number of CPUs in the system. 1809 + */ 1810 + xfs_syncd_wq = alloc_workqueue("xfssyncd", WQ_CPU_INTENSIVE, 8); 1811 + if (!xfs_syncd_wq) 1812 + goto out; 1813 + 1814 + xfs_ail_wq = alloc_workqueue("xfsail", WQ_CPU_INTENSIVE, 8); 1815 + if (!xfs_ail_wq) 1816 + goto out_destroy_syncd; 1817 + 1818 + return 0; 1819 + 1820 + out_destroy_syncd: 1821 + destroy_workqueue(xfs_syncd_wq); 1822 + out: 1823 + return -ENOMEM; 1824 + } 1825 + 1826 + STATIC void 1827 + xfs_destroy_workqueues(void) 1828 + { 1829 + destroy_workqueue(xfs_ail_wq); 1830 + destroy_workqueue(xfs_syncd_wq); 1831 + } 1832 + 1833 + STATIC int __init 1720 1834 init_xfs_fs(void) 1721 1835 { 1722 1836 int error; ··· 1763 1813 if (error) 1764 1814 goto out; 1765 1815 1766 - error = xfs_mru_cache_init(); 1816 + error = xfs_init_workqueues(); 1767 1817 if (error) 1768 1818 goto out_destroy_zones; 1819 + 1820 + error = xfs_mru_cache_init(); 1821 + if (error) 1822 + goto out_destroy_wq; 1769 1823 1770 1824 error = xfs_filestream_init(); 1771 1825 if (error) ··· 1786 1832 error = xfs_sysctl_register(); 1787 1833 if (error) 1788 1834 goto out_cleanup_procfs; 1835 + 1836 + error = xfs_init_workqueues(); 1837 + if (error) 1838 + goto out_sysctl_unregister; 1789 1839 1790 1840 vfs_initquota(); 1791 1841 ··· 1808 1850 xfs_filestream_uninit(); 1809 1851 out_mru_cache_uninit: 1810 1852 xfs_mru_cache_uninit(); 1853 + out_destroy_wq: 1854 + xfs_destroy_workqueues(); 1811 1855 out_destroy_zones: 1812 1856 xfs_destroy_zones(); 1813 1857 out: ··· 1826 1866 xfs_buf_terminate(); 1827 1867 xfs_filestream_uninit(); 1828 1868 xfs_mru_cache_uninit(); 1869 + xfs_destroy_workqueues(); 1829 1870 xfs_destroy_zones(); 1830 1871 } 1831 1872

+116 -112

fs/xfs/linux-2.6/xfs_sync.c

··· 22 22 #include "xfs_log.h" 23 23 #include "xfs_inum.h" 24 24 #include "xfs_trans.h" 25 + #include "xfs_trans_priv.h" 25 26 #include "xfs_sb.h" 26 27 #include "xfs_ag.h" 27 28 #include "xfs_mount.h" ··· 39 38 40 39 #include <linux/kthread.h> 41 40 #include <linux/freezer.h> 41 + 42 + struct workqueue_struct *xfs_syncd_wq; /* sync workqueue */ 42 43 43 44 /* 44 45 * The inode lookup is done in batches to keep the amount of lock traffic and ··· 434 431 xfs_unmountfs_writesb(mp); 435 432 } 436 433 437 - /* 438 - * Enqueue a work item to be picked up by the vfs xfssyncd thread. 439 - * Doing this has two advantages: 440 - * - It saves on stack space, which is tight in certain situations 441 - * - It can be used (with care) as a mechanism to avoid deadlocks. 442 - * Flushing while allocating in a full filesystem requires both. 443 - */ 444 - STATIC void 445 - xfs_syncd_queue_work( 446 - struct xfs_mount *mp, 447 - void *data, 448 - void (*syncer)(struct xfs_mount *, void *), 449 - struct completion *completion) 434 + static void 435 + xfs_syncd_queue_sync( 436 + struct xfs_mount *mp) 450 437 { 451 - struct xfs_sync_work *work; 452 - 453 - work = kmem_alloc(sizeof(struct xfs_sync_work), KM_SLEEP); 454 - INIT_LIST_HEAD(&work->w_list); 455 - work->w_syncer = syncer; 456 - work->w_data = data; 457 - work->w_mount = mp; 458 - work->w_completion = completion; 459 - spin_lock(&mp->m_sync_lock); 460 - list_add_tail(&work->w_list, &mp->m_sync_list); 461 - spin_unlock(&mp->m_sync_lock); 462 - wake_up_process(mp->m_sync_task); 463 - } 464 - 465 - /* 466 - * Flush delayed allocate data, attempting to free up reserved space 467 - * from existing allocations. At this point a new allocation attempt 468 - * has failed with ENOSPC and we are in the process of scratching our 469 - * heads, looking about for more room... 470 - */ 471 - STATIC void 472 - xfs_flush_inodes_work( 473 - struct xfs_mount *mp, 474 - void *arg) 475 - { 476 - struct inode *inode = arg; 477 - xfs_sync_data(mp, SYNC_TRYLOCK); 478 - xfs_sync_data(mp, SYNC_TRYLOCK | SYNC_WAIT); 479 - iput(inode); 480 - } 481 - 482 - void 483 - xfs_flush_inodes( 484 - xfs_inode_t *ip) 485 - { 486 - struct inode *inode = VFS_I(ip); 487 - DECLARE_COMPLETION_ONSTACK(completion); 488 - 489 - igrab(inode); 490 - xfs_syncd_queue_work(ip->i_mount, inode, xfs_flush_inodes_work, &completion); 491 - wait_for_completion(&completion); 492 - xfs_log_force(ip->i_mount, XFS_LOG_SYNC); 438 + queue_delayed_work(xfs_syncd_wq, &mp->m_sync_work, 439 + msecs_to_jiffies(xfs_syncd_centisecs * 10)); 493 440 } 494 441 495 442 /* ··· 449 496 */ 450 497 STATIC void 451 498 xfs_sync_worker( 452 - struct xfs_mount *mp, 453 - void *unused) 499 + struct work_struct *work) 454 500 { 501 + struct xfs_mount *mp = container_of(to_delayed_work(work), 502 + struct xfs_mount, m_sync_work); 455 503 int error; 456 504 457 505 if (!(mp->m_flags & XFS_MOUNT_RDONLY)) { ··· 462 508 error = xfs_fs_log_dummy(mp); 463 509 else 464 510 xfs_log_force(mp, 0); 465 - xfs_reclaim_inodes(mp, 0); 466 511 error = xfs_qm_sync(mp, SYNC_TRYLOCK); 512 + 513 + /* start pushing all the metadata that is currently dirty */ 514 + xfs_ail_push_all(mp->m_ail); 467 515 } 468 - mp->m_sync_seq++; 469 - wake_up(&mp->m_wait_single_sync_task); 516 + 517 + /* queue us up again */ 518 + xfs_syncd_queue_sync(mp); 470 519 } 471 520 472 - STATIC int 473 - xfssyncd( 474 - void *arg) 521 + /* 522 + * Queue a new inode reclaim pass if there are reclaimable inodes and there 523 + * isn't a reclaim pass already in progress. By default it runs every 5s based 524 + * on the xfs syncd work default of 30s. Perhaps this should have it's own 525 + * tunable, but that can be done if this method proves to be ineffective or too 526 + * aggressive. 527 + */ 528 + static void 529 + xfs_syncd_queue_reclaim( 530 + struct xfs_mount *mp) 475 531 { 476 - struct xfs_mount *mp = arg; 477 - long timeleft; 478 - xfs_sync_work_t *work, *n; 479 - LIST_HEAD (tmp); 480 532 481 - set_freezable(); 482 - timeleft = xfs_syncd_centisecs * msecs_to_jiffies(10); 483 - for (;;) { 484 - if (list_empty(&mp->m_sync_list)) 485 - timeleft = schedule_timeout_interruptible(timeleft); 486 - /* swsusp */ 487 - try_to_freeze(); 488 - if (kthread_should_stop() && list_empty(&mp->m_sync_list)) 489 - break; 533 + /* 534 + * We can have inodes enter reclaim after we've shut down the syncd 535 + * workqueue during unmount, so don't allow reclaim work to be queued 536 + * during unmount. 537 + */ 538 + if (!(mp->m_super->s_flags & MS_ACTIVE)) 539 + return; 490 540 491 - spin_lock(&mp->m_sync_lock); 492 - /* 493 - * We can get woken by laptop mode, to do a sync - 494 - * that's the (only!) case where the list would be 495 - * empty with time remaining. 496 - */ 497 - if (!timeleft || list_empty(&mp->m_sync_list)) { 498 - if (!timeleft) 499 - timeleft = xfs_syncd_centisecs * 500 - msecs_to_jiffies(10); 501 - INIT_LIST_HEAD(&mp->m_sync_work.w_list); 502 - list_add_tail(&mp->m_sync_work.w_list, 503 - &mp->m_sync_list); 504 - } 505 - list_splice_init(&mp->m_sync_list, &tmp); 506 - spin_unlock(&mp->m_sync_lock); 507 - 508 - list_for_each_entry_safe(work, n, &tmp, w_list) { 509 - (*work->w_syncer)(mp, work->w_data); 510 - list_del(&work->w_list); 511 - if (work == &mp->m_sync_work) 512 - continue; 513 - if (work->w_completion) 514 - complete(work->w_completion); 515 - kmem_free(work); 516 - } 541 + rcu_read_lock(); 542 + if (radix_tree_tagged(&mp->m_perag_tree, XFS_ICI_RECLAIM_TAG)) { 543 + queue_delayed_work(xfs_syncd_wq, &mp->m_reclaim_work, 544 + msecs_to_jiffies(xfs_syncd_centisecs / 6 * 10)); 517 545 } 546 + rcu_read_unlock(); 547 + } 518 548 519 - return 0; 549 + /* 550 + * This is a fast pass over the inode cache to try to get reclaim moving on as 551 + * many inodes as possible in a short period of time. It kicks itself every few 552 + * seconds, as well as being kicked by the inode cache shrinker when memory 553 + * goes low. It scans as quickly as possible avoiding locked inodes or those 554 + * already being flushed, and once done schedules a future pass. 555 + */ 556 + STATIC void 557 + xfs_reclaim_worker( 558 + struct work_struct *work) 559 + { 560 + struct xfs_mount *mp = container_of(to_delayed_work(work), 561 + struct xfs_mount, m_reclaim_work); 562 + 563 + xfs_reclaim_inodes(mp, SYNC_TRYLOCK); 564 + xfs_syncd_queue_reclaim(mp); 565 + } 566 + 567 + /* 568 + * Flush delayed allocate data, attempting to free up reserved space 569 + * from existing allocations. At this point a new allocation attempt 570 + * has failed with ENOSPC and we are in the process of scratching our 571 + * heads, looking about for more room. 572 + * 573 + * Queue a new data flush if there isn't one already in progress and 574 + * wait for completion of the flush. This means that we only ever have one 575 + * inode flush in progress no matter how many ENOSPC events are occurring and 576 + * so will prevent the system from bogging down due to every concurrent 577 + * ENOSPC event scanning all the active inodes in the system for writeback. 578 + */ 579 + void 580 + xfs_flush_inodes( 581 + struct xfs_inode *ip) 582 + { 583 + struct xfs_mount *mp = ip->i_mount; 584 + 585 + queue_work(xfs_syncd_wq, &mp->m_flush_work); 586 + flush_work_sync(&mp->m_flush_work); 587 + } 588 + 589 + STATIC void 590 + xfs_flush_worker( 591 + struct work_struct *work) 592 + { 593 + struct xfs_mount *mp = container_of(work, 594 + struct xfs_mount, m_flush_work); 595 + 596 + xfs_sync_data(mp, SYNC_TRYLOCK); 597 + xfs_sync_data(mp, SYNC_TRYLOCK | SYNC_WAIT); 520 598 } 521 599 522 600 int 523 601 xfs_syncd_init( 524 602 struct xfs_mount *mp) 525 603 { 526 - mp->m_sync_work.w_syncer = xfs_sync_worker; 527 - mp->m_sync_work.w_mount = mp; 528 - mp->m_sync_work.w_completion = NULL; 529 - mp->m_sync_task = kthread_run(xfssyncd, mp, "xfssyncd/%s", mp->m_fsname); 530 - if (IS_ERR(mp->m_sync_task)) 531 - return -PTR_ERR(mp->m_sync_task); 604 + INIT_WORK(&mp->m_flush_work, xfs_flush_worker); 605 + INIT_DELAYED_WORK(&mp->m_sync_work, xfs_sync_worker); 606 + INIT_DELAYED_WORK(&mp->m_reclaim_work, xfs_reclaim_worker); 607 + 608 + xfs_syncd_queue_sync(mp); 609 + xfs_syncd_queue_reclaim(mp); 610 + 532 611 return 0; 533 612 } 534 613 ··· 569 582 xfs_syncd_stop( 570 583 struct xfs_mount *mp) 571 584 { 572 - kthread_stop(mp->m_sync_task); 585 + cancel_delayed_work_sync(&mp->m_sync_work); 586 + cancel_delayed_work_sync(&mp->m_reclaim_work); 587 + cancel_work_sync(&mp->m_flush_work); 573 588 } 574 589 575 590 void ··· 590 601 XFS_INO_TO_AGNO(ip->i_mount, ip->i_ino), 591 602 XFS_ICI_RECLAIM_TAG); 592 603 spin_unlock(&ip->i_mount->m_perag_lock); 604 + 605 + /* schedule periodic background inode reclaim */ 606 + xfs_syncd_queue_reclaim(ip->i_mount); 607 + 593 608 trace_xfs_perag_set_reclaim(ip->i_mount, pag->pag_agno, 594 609 -1, _RET_IP_); 595 610 } ··· 1010 1017 } 1011 1018 1012 1019 /* 1013 - * Shrinker infrastructure. 1020 + * Inode cache shrinker. 1021 + * 1022 + * When called we make sure that there is a background (fast) inode reclaim in 1023 + * progress, while we will throttle the speed of reclaim via doiing synchronous 1024 + * reclaim of inodes. That means if we come across dirty inodes, we wait for 1025 + * them to be cleaned, which we hope will not be very long due to the 1026 + * background walker having already kicked the IO off on those dirty inodes. 1014 1027 */ 1015 1028 static int 1016 1029 xfs_reclaim_inode_shrink( ··· 1031 1032 1032 1033 mp = container_of(shrink, struct xfs_mount, m_inode_shrink); 1033 1034 if (nr_to_scan) { 1035 + /* kick background reclaimer and push the AIL */ 1036 + xfs_syncd_queue_reclaim(mp); 1037 + xfs_ail_push_all(mp->m_ail); 1038 + 1034 1039 if (!(gfp_mask & __GFP_FS)) 1035 1040 return -1; 1036 1041 1037 - xfs_reclaim_inodes_ag(mp, SYNC_TRYLOCK, &nr_to_scan); 1042 + xfs_reclaim_inodes_ag(mp, SYNC_TRYLOCK | SYNC_WAIT, 1043 + &nr_to_scan); 1038 1044 /* terminate if we don't exhaust the scan */ 1039 1045 if (nr_to_scan > 0) 1040 1046 return -1;

+2

fs/xfs/linux-2.6/xfs_sync.h

··· 32 32 #define SYNC_WAIT 0x0001 /* wait for i/o to complete */ 33 33 #define SYNC_TRYLOCK 0x0002 /* only try to lock inodes */ 34 34 35 + extern struct workqueue_struct *xfs_syncd_wq; /* sync workqueue */ 36 + 35 37 int xfs_syncd_init(struct xfs_mount *mp); 36 38 void xfs_syncd_stop(struct xfs_mount *mp); 37 39

-7

fs/xfs/quota/xfs_qm.c

··· 461 461 struct xfs_quotainfo *q = mp->m_quotainfo; 462 462 int recl; 463 463 struct xfs_dquot *dqp; 464 - int niters; 465 464 int error; 466 465 467 466 if (!q) 468 467 return 0; 469 - niters = 0; 470 468 again: 471 469 mutex_lock(&q->qi_dqlist_lock); 472 470 list_for_each_entry(dqp, &q->qi_dqlist, q_mplist) { ··· 1312 1314 { 1313 1315 xfs_buf_t *bp; 1314 1316 int error; 1315 - int notcommitted; 1316 - int incr; 1317 1317 int type; 1318 1318 1319 1319 ASSERT(blkcnt > 0); 1320 - notcommitted = 0; 1321 - incr = (blkcnt > XFS_QM_MAX_DQCLUSTER_LOGSZ) ? 1322 - XFS_QM_MAX_DQCLUSTER_LOGSZ : blkcnt; 1323 1320 type = flags & XFS_QMOPT_UQUOTA ? XFS_DQ_USER : 1324 1321 (flags & XFS_QMOPT_PQUOTA ? XFS_DQ_PROJ : XFS_DQ_GROUP); 1325 1322 error = 0;

-5

fs/xfs/quota/xfs_qm.h

··· 65 65 * block in the dquot/xqm code. 66 66 */ 67 67 #define XFS_DQUOT_CLUSTER_SIZE_FSB (xfs_filblks_t)1 68 - /* 69 - * When doing a quotacheck, we log dquot clusters of this many FSBs at most 70 - * in a single transaction. We don't want to ask for too huge a log reservation. 71 - */ 72 - #define XFS_QM_MAX_DQCLUSTER_LOGSZ 3 73 68 74 69 typedef xfs_dqhash_t xfs_dqlist_t; 75 70

-2

fs/xfs/quota/xfs_qm_syscalls.c

··· 313 313 { 314 314 int error; 315 315 uint qf; 316 - uint accflags; 317 316 __int64_t sbflags; 318 317 319 318 flags &= (XFS_ALL_QUOTA_ACCT | XFS_ALL_QUOTA_ENFD); 320 319 /* 321 320 * Switching on quota accounting must be done at mount time. 322 321 */ 323 - accflags = flags & XFS_ALL_QUOTA_ACCT; 324 322 flags &= ~(XFS_ALL_QUOTA_ACCT); 325 323 326 324 sbflags = 0;

+23 -7

fs/xfs/xfs_alloc.c

··· 2395 2395 memset(&args, 0, sizeof(xfs_alloc_arg_t)); 2396 2396 args.tp = tp; 2397 2397 args.mp = tp->t_mountp; 2398 + 2399 + /* 2400 + * validate that the block number is legal - the enables us to detect 2401 + * and handle a silent filesystem corruption rather than crashing. 2402 + */ 2398 2403 args.agno = XFS_FSB_TO_AGNO(args.mp, bno); 2399 - ASSERT(args.agno < args.mp->m_sb.sb_agcount); 2404 + if (args.agno >= args.mp->m_sb.sb_agcount) 2405 + return EFSCORRUPTED; 2406 + 2400 2407 args.agbno = XFS_FSB_TO_AGBNO(args.mp, bno); 2408 + if (args.agbno >= args.mp->m_sb.sb_agblocks) 2409 + return EFSCORRUPTED; 2410 + 2401 2411 args.pag = xfs_perag_get(args.mp, args.agno); 2402 - if ((error = xfs_alloc_fix_freelist(&args, XFS_ALLOC_FLAG_FREEING))) 2412 + ASSERT(args.pag); 2413 + 2414 + error = xfs_alloc_fix_freelist(&args, XFS_ALLOC_FLAG_FREEING); 2415 + if (error) 2403 2416 goto error0; 2404 - #ifdef DEBUG 2405 - ASSERT(args.agbp != NULL); 2406 - ASSERT((args.agbno + len) <= 2407 - be32_to_cpu(XFS_BUF_TO_AGF(args.agbp)->agf_length)); 2408 - #endif 2417 + 2418 + /* validate the extent size is legal now we have the agf locked */ 2419 + if (args.agbno + len > 2420 + be32_to_cpu(XFS_BUF_TO_AGF(args.agbp)->agf_length)) { 2421 + error = EFSCORRUPTED; 2422 + goto error0; 2423 + } 2424 + 2409 2425 error = xfs_free_ag_extent(tp, args.agbp, args.agno, args.agbno, len, 0); 2410 2426 error0: 2411 2427 xfs_perag_put(args.pag);

+40 -27

fs/xfs/xfs_inode_item.c

··· 198 198 } 199 199 200 200 /* 201 + * xfs_inode_item_format_extents - convert in-core extents to on-disk form 202 + * 203 + * For either the data or attr fork in extent format, we need to endian convert 204 + * the in-core extent as we place them into the on-disk inode. In this case, we 205 + * need to do this conversion before we write the extents into the log. Because 206 + * we don't have the disk inode to write into here, we allocate a buffer and 207 + * format the extents into it via xfs_iextents_copy(). We free the buffer in 208 + * the unlock routine after the copy for the log has been made. 209 + * 210 + * In the case of the data fork, the in-core and on-disk fork sizes can be 211 + * different due to delayed allocation extents. We only log on-disk extents 212 + * here, so always use the physical fork size to determine the size of the 213 + * buffer we need to allocate. 214 + */ 215 + STATIC void 216 + xfs_inode_item_format_extents( 217 + struct xfs_inode *ip, 218 + struct xfs_log_iovec *vecp, 219 + int whichfork, 220 + int type) 221 + { 222 + xfs_bmbt_rec_t *ext_buffer; 223 + 224 + ext_buffer = kmem_alloc(XFS_IFORK_SIZE(ip, whichfork), KM_SLEEP); 225 + if (whichfork == XFS_DATA_FORK) 226 + ip->i_itemp->ili_extents_buf = ext_buffer; 227 + else 228 + ip->i_itemp->ili_aextents_buf = ext_buffer; 229 + 230 + vecp->i_addr = ext_buffer; 231 + vecp->i_len = xfs_iextents_copy(ip, ext_buffer, whichfork); 232 + vecp->i_type = type; 233 + } 234 + 235 + /* 201 236 * This is called to fill in the vector of log iovecs for the 202 237 * given inode log item. It fills the first item with an inode 203 238 * log format structure, the second with the on-disk inode structure, ··· 248 213 struct xfs_inode *ip = iip->ili_inode; 249 214 uint nvecs; 250 215 size_t data_bytes; 251 - xfs_bmbt_rec_t *ext_buffer; 252 216 xfs_mount_t *mp; 253 217 254 218 vecp->i_addr = &iip->ili_format; ··· 354 320 } else 355 321 #endif 356 322 { 357 - /* 358 - * There are delayed allocation extents 359 - * in the inode, or we need to convert 360 - * the extents to on disk format. 361 - * Use xfs_iextents_copy() 362 - * to copy only the real extents into 363 - * a separate buffer. We'll free the 364 - * buffer in the unlock routine. 365 - */ 366 - ext_buffer = kmem_alloc(ip->i_df.if_bytes, 367 - KM_SLEEP); 368 - iip->ili_extents_buf = ext_buffer; 369 - vecp->i_addr = ext_buffer; 370 - vecp->i_len = xfs_iextents_copy(ip, ext_buffer, 371 - XFS_DATA_FORK); 372 - vecp->i_type = XLOG_REG_TYPE_IEXT; 323 + xfs_inode_item_format_extents(ip, vecp, 324 + XFS_DATA_FORK, XLOG_REG_TYPE_IEXT); 373 325 } 374 326 ASSERT(vecp->i_len <= ip->i_df.if_bytes); 375 327 iip->ili_format.ilf_dsize = vecp->i_len; ··· 465 445 */ 466 446 vecp->i_addr = ip->i_afp->if_u1.if_extents; 467 447 vecp->i_len = ip->i_afp->if_bytes; 448 + vecp->i_type = XLOG_REG_TYPE_IATTR_EXT; 468 449 #else 469 450 ASSERT(iip->ili_aextents_buf == NULL); 470 - /* 471 - * Need to endian flip before logging 472 - */ 473 - ext_buffer = kmem_alloc(ip->i_afp->if_bytes, 474 - KM_SLEEP); 475 - iip->ili_aextents_buf = ext_buffer; 476 - vecp->i_addr = ext_buffer; 477 - vecp->i_len = xfs_iextents_copy(ip, ext_buffer, 478 - XFS_ATTR_FORK); 451 + xfs_inode_item_format_extents(ip, vecp, 452 + XFS_ATTR_FORK, XLOG_REG_TYPE_IATTR_EXT); 479 453 #endif 480 - vecp->i_type = XLOG_REG_TYPE_IATTR_EXT; 481 454 iip->ili_format.ilf_asize = vecp->i_len; 482 455 vecp++; 483 456 nvecs++;

-2

fs/xfs/xfs_itable.c

··· 204 204 xfs_agi_t *agi; /* agi header data */ 205 205 xfs_agino_t agino; /* inode # in allocation group */ 206 206 xfs_agnumber_t agno; /* allocation group number */ 207 - xfs_daddr_t bno; /* inode cluster start daddr */ 208 207 int chunkidx; /* current index into inode chunk */ 209 208 int clustidx; /* current index into inode cluster */ 210 209 xfs_btree_cur_t *cur; /* btree cursor for ialloc btree */ ··· 462 463 mp->m_sb.sb_inopblog); 463 464 } 464 465 ino = XFS_AGINO_TO_INO(mp, agno, agino); 465 - bno = XFS_AGB_TO_DADDR(mp, agno, agbno); 466 466 /* 467 467 * Skip if this inode is free. 468 468 */

+27 -11

fs/xfs/xfs_log.c

··· 761 761 break; 762 762 case XLOG_STATE_COVER_NEED: 763 763 case XLOG_STATE_COVER_NEED2: 764 - if (!xfs_trans_ail_tail(log->l_ailp) && 764 + if (!xfs_ail_min_lsn(log->l_ailp) && 765 765 xlog_iclogs_empty(log)) { 766 766 if (log->l_covered_state == XLOG_STATE_COVER_NEED) 767 767 log->l_covered_state = XLOG_STATE_COVER_DONE; ··· 801 801 xfs_lsn_t tail_lsn; 802 802 struct log *log = mp->m_log; 803 803 804 - tail_lsn = xfs_trans_ail_tail(mp->m_ail); 804 + tail_lsn = xfs_ail_min_lsn(mp->m_ail); 805 805 if (!tail_lsn) 806 806 tail_lsn = atomic64_read(&log->l_last_sync_lsn); 807 807 ··· 1239 1239 * the filesystem is shutting down. 1240 1240 */ 1241 1241 if (!XLOG_FORCED_SHUTDOWN(log)) 1242 - xfs_trans_ail_push(log->l_ailp, threshold_lsn); 1242 + xfs_ail_push(log->l_ailp, threshold_lsn); 1243 1243 } 1244 1244 1245 1245 /* ··· 3407 3407 xfs_emerg(log->l_mp, "%s: invalid ptr", __func__); 3408 3408 } 3409 3409 3410 + /* 3411 + * Check to make sure the grant write head didn't just over lap the tail. If 3412 + * the cycles are the same, we can't be overlapping. Otherwise, make sure that 3413 + * the cycles differ by exactly one and check the byte count. 3414 + * 3415 + * This check is run unlocked, so can give false positives. Rather than assert 3416 + * on failures, use a warn-once flag and a panic tag to allow the admin to 3417 + * determine if they want to panic the machine when such an error occurs. For 3418 + * debug kernels this will have the same effect as using an assert but, unlinke 3419 + * an assert, it can be turned off at runtime. 3420 + */ 3410 3421 STATIC void 3411 3422 xlog_verify_grant_tail( 3412 3423 struct log *log) ··· 3425 3414 int tail_cycle, tail_blocks; 3426 3415 int cycle, space; 3427 3416 3428 - /* 3429 - * Check to make sure the grant write head didn't just over lap the 3430 - * tail. If the cycles are the same, we can't be overlapping. 3431 - * Otherwise, make sure that the cycles differ by exactly one and 3432 - * check the byte count. 3433 - */ 3434 3417 xlog_crack_grant_head(&log->l_grant_write_head, &cycle, &space); 3435 3418 xlog_crack_atomic_lsn(&log->l_tail_lsn, &tail_cycle, &tail_blocks); 3436 3419 if (tail_cycle != cycle) { 3437 - ASSERT(cycle - 1 == tail_cycle); 3438 - ASSERT(space <= BBTOB(tail_blocks)); 3420 + if (cycle - 1 != tail_cycle && 3421 + !(log->l_flags & XLOG_TAIL_WARN)) { 3422 + xfs_alert_tag(log->l_mp, XFS_PTAG_LOGRES, 3423 + "%s: cycle - 1 != tail_cycle", __func__); 3424 + log->l_flags |= XLOG_TAIL_WARN; 3425 + } 3426 + 3427 + if (space > BBTOB(tail_blocks) && 3428 + !(log->l_flags & XLOG_TAIL_WARN)) { 3429 + xfs_alert_tag(log->l_mp, XFS_PTAG_LOGRES, 3430 + "%s: space > BBTOB(tail_blocks)", __func__); 3431 + log->l_flags |= XLOG_TAIL_WARN; 3432 + } 3439 3433 } 3440 3434 } 3441 3435

+1

fs/xfs/xfs_log_priv.h

··· 144 144 #define XLOG_RECOVERY_NEEDED 0x4 /* log was recovered */ 145 145 #define XLOG_IO_ERROR 0x8 /* log hit an I/O error, and being 146 146 shutdown */ 147 + #define XLOG_TAIL_WARN 0x10 /* log tail verify warning issued */ 147 148 148 149 #ifdef __KERNEL__ 149 150 /*

+3 -6

fs/xfs/xfs_mount.h

··· 203 203 struct mutex m_icsb_mutex; /* balancer sync lock */ 204 204 #endif 205 205 struct xfs_mru_cache *m_filestream; /* per-mount filestream data */ 206 - struct task_struct *m_sync_task; /* generalised sync thread */ 207 - xfs_sync_work_t m_sync_work; /* work item for VFS_SYNC */ 208 - struct list_head m_sync_list; /* sync thread work item list */ 209 - spinlock_t m_sync_lock; /* work item list lock */ 210 - int m_sync_seq; /* sync thread generation no. */ 211 - wait_queue_head_t m_wait_single_sync_task; 206 + struct delayed_work m_sync_work; /* background sync work */ 207 + struct delayed_work m_reclaim_work; /* background inode reclaim */ 208 + struct work_struct m_flush_work; /* background inode flush */ 212 209 __int64_t m_update_flags; /* sb flags we need to update 213 210 on the next remount,rw */ 214 211 struct shrinker m_inode_shrink; /* inode reclaim shrinker */

+231 -190

fs/xfs/xfs_trans_ail.c

··· 28 28 #include "xfs_trans_priv.h" 29 29 #include "xfs_error.h" 30 30 31 - STATIC void xfs_ail_splice(struct xfs_ail *, struct list_head *, xfs_lsn_t); 32 - STATIC void xfs_ail_delete(struct xfs_ail *, xfs_log_item_t *); 33 - STATIC xfs_log_item_t * xfs_ail_min(struct xfs_ail *); 34 - STATIC xfs_log_item_t * xfs_ail_next(struct xfs_ail *, xfs_log_item_t *); 31 + struct workqueue_struct *xfs_ail_wq; /* AIL workqueue */ 35 32 36 33 #ifdef DEBUG 37 - STATIC void xfs_ail_check(struct xfs_ail *, xfs_log_item_t *); 38 - #else 34 + /* 35 + * Check that the list is sorted as it should be. 36 + */ 37 + STATIC void 38 + xfs_ail_check( 39 + struct xfs_ail *ailp, 40 + xfs_log_item_t *lip) 41 + { 42 + xfs_log_item_t *prev_lip; 43 + 44 + if (list_empty(&ailp->xa_ail)) 45 + return; 46 + 47 + /* 48 + * Check the next and previous entries are valid. 49 + */ 50 + ASSERT((lip->li_flags & XFS_LI_IN_AIL) != 0); 51 + prev_lip = list_entry(lip->li_ail.prev, xfs_log_item_t, li_ail); 52 + if (&prev_lip->li_ail != &ailp->xa_ail) 53 + ASSERT(XFS_LSN_CMP(prev_lip->li_lsn, lip->li_lsn) <= 0); 54 + 55 + prev_lip = list_entry(lip->li_ail.next, xfs_log_item_t, li_ail); 56 + if (&prev_lip->li_ail != &ailp->xa_ail) 57 + ASSERT(XFS_LSN_CMP(prev_lip->li_lsn, lip->li_lsn) >= 0); 58 + 59 + 60 + #ifdef XFS_TRANS_DEBUG 61 + /* 62 + * Walk the list checking lsn ordering, and that every entry has the 63 + * XFS_LI_IN_AIL flag set. This is really expensive, so only do it 64 + * when specifically debugging the transaction subsystem. 65 + */ 66 + prev_lip = list_entry(&ailp->xa_ail, xfs_log_item_t, li_ail); 67 + list_for_each_entry(lip, &ailp->xa_ail, li_ail) { 68 + if (&prev_lip->li_ail != &ailp->xa_ail) 69 + ASSERT(XFS_LSN_CMP(prev_lip->li_lsn, lip->li_lsn) <= 0); 70 + ASSERT((lip->li_flags & XFS_LI_IN_AIL) != 0); 71 + prev_lip = lip; 72 + } 73 + #endif /* XFS_TRANS_DEBUG */ 74 + } 75 + #else /* !DEBUG */ 39 76 #define xfs_ail_check(a,l) 40 77 #endif /* DEBUG */ 41 78 79 + /* 80 + * Return a pointer to the first item in the AIL. If the AIL is empty, then 81 + * return NULL. 82 + */ 83 + static xfs_log_item_t * 84 + xfs_ail_min( 85 + struct xfs_ail *ailp) 86 + { 87 + if (list_empty(&ailp->xa_ail)) 88 + return NULL; 89 + 90 + return list_first_entry(&ailp->xa_ail, xfs_log_item_t, li_ail); 91 + } 92 + 93 + /* 94 + * Return a pointer to the last item in the AIL. If the AIL is empty, then 95 + * return NULL. 96 + */ 97 + static xfs_log_item_t * 98 + xfs_ail_max( 99 + struct xfs_ail *ailp) 100 + { 101 + if (list_empty(&ailp->xa_ail)) 102 + return NULL; 103 + 104 + return list_entry(ailp->xa_ail.prev, xfs_log_item_t, li_ail); 105 + } 42 106 43 107 /* 44 - * This is called by the log manager code to determine the LSN 45 - * of the tail of the log. This is exactly the LSN of the first 46 - * item in the AIL. If the AIL is empty, then this function 47 - * returns 0. 108 + * Return a pointer to the item which follows the given item in the AIL. If 109 + * the given item is the last item in the list, then return NULL. 110 + */ 111 + static xfs_log_item_t * 112 + xfs_ail_next( 113 + struct xfs_ail *ailp, 114 + xfs_log_item_t *lip) 115 + { 116 + if (lip->li_ail.next == &ailp->xa_ail) 117 + return NULL; 118 + 119 + return list_first_entry(&lip->li_ail, xfs_log_item_t, li_ail); 120 + } 121 + 122 + /* 123 + * This is called by the log manager code to determine the LSN of the tail of 124 + * the log. This is exactly the LSN of the first item in the AIL. If the AIL 125 + * is empty, then this function returns 0. 48 126 * 49 - * We need the AIL lock in order to get a coherent read of the 50 - * lsn of the last item in the AIL. 127 + * We need the AIL lock in order to get a coherent read of the lsn of the last 128 + * item in the AIL. 51 129 */ 52 130 xfs_lsn_t 53 - xfs_trans_ail_tail( 131 + xfs_ail_min_lsn( 54 132 struct xfs_ail *ailp) 55 133 { 56 - xfs_lsn_t lsn; 134 + xfs_lsn_t lsn = 0; 57 135 xfs_log_item_t *lip; 58 136 59 137 spin_lock(&ailp->xa_lock); 60 138 lip = xfs_ail_min(ailp); 61 - if (lip == NULL) { 62 - lsn = (xfs_lsn_t)0; 63 - } else { 139 + if (lip) 64 140 lsn = lip->li_lsn; 65 - } 66 141 spin_unlock(&ailp->xa_lock); 67 142 68 143 return lsn; 69 144 } 70 145 71 146 /* 72 - * xfs_trans_push_ail 73 - * 74 - * This routine is called to move the tail of the AIL forward. It does this by 75 - * trying to flush items in the AIL whose lsns are below the given 76 - * threshold_lsn. 77 - * 78 - * the push is run asynchronously in a separate thread, so we return the tail 79 - * of the log right now instead of the tail after the push. This means we will 80 - * either continue right away, or we will sleep waiting on the async thread to 81 - * do its work. 82 - * 83 - * We do this unlocked - we only need to know whether there is anything in the 84 - * AIL at the time we are called. We don't need to access the contents of 85 - * any of the objects, so the lock is not needed. 147 + * Return the maximum lsn held in the AIL, or zero if the AIL is empty. 86 148 */ 87 - void 88 - xfs_trans_ail_push( 89 - struct xfs_ail *ailp, 90 - xfs_lsn_t threshold_lsn) 149 + static xfs_lsn_t 150 + xfs_ail_max_lsn( 151 + struct xfs_ail *ailp) 91 152 { 92 - xfs_log_item_t *lip; 153 + xfs_lsn_t lsn = 0; 154 + xfs_log_item_t *lip; 93 155 94 - lip = xfs_ail_min(ailp); 95 - if (lip && !XFS_FORCED_SHUTDOWN(ailp->xa_mount)) { 96 - if (XFS_LSN_CMP(threshold_lsn, ailp->xa_target) > 0) 97 - xfsaild_wakeup(ailp, threshold_lsn); 98 - } 156 + spin_lock(&ailp->xa_lock); 157 + lip = xfs_ail_max(ailp); 158 + if (lip) 159 + lsn = lip->li_lsn; 160 + spin_unlock(&ailp->xa_lock); 161 + 162 + return lsn; 99 163 } 100 164 101 165 /* ··· 300 236 } 301 237 302 238 /* 303 - * xfsaild_push does the work of pushing on the AIL. Returning a timeout of 304 - * zero indicates that the caller should sleep until woken. 239 + * splice the log item list into the AIL at the given LSN. 305 240 */ 306 - long 307 - xfsaild_push( 308 - struct xfs_ail *ailp, 309 - xfs_lsn_t *last_lsn) 241 + static void 242 + xfs_ail_splice( 243 + struct xfs_ail *ailp, 244 + struct list_head *list, 245 + xfs_lsn_t lsn) 310 246 { 311 - long tout = 0; 312 - xfs_lsn_t last_pushed_lsn = *last_lsn; 247 + xfs_log_item_t *next_lip; 248 + 249 + /* If the list is empty, just insert the item. */ 250 + if (list_empty(&ailp->xa_ail)) { 251 + list_splice(list, &ailp->xa_ail); 252 + return; 253 + } 254 + 255 + list_for_each_entry_reverse(next_lip, &ailp->xa_ail, li_ail) { 256 + if (XFS_LSN_CMP(next_lip->li_lsn, lsn) <= 0) 257 + break; 258 + } 259 + 260 + ASSERT(&next_lip->li_ail == &ailp->xa_ail || 261 + XFS_LSN_CMP(next_lip->li_lsn, lsn) <= 0); 262 + 263 + list_splice_init(list, &next_lip->li_ail); 264 + } 265 + 266 + /* 267 + * Delete the given item from the AIL. Return a pointer to the item. 268 + */ 269 + static void 270 + xfs_ail_delete( 271 + struct xfs_ail *ailp, 272 + xfs_log_item_t *lip) 273 + { 274 + xfs_ail_check(ailp, lip); 275 + list_del(&lip->li_ail); 276 + xfs_trans_ail_cursor_clear(ailp, lip); 277 + } 278 + 279 + /* 280 + * xfs_ail_worker does the work of pushing on the AIL. It will requeue itself 281 + * to run at a later time if there is more work to do to complete the push. 282 + */ 283 + STATIC void 284 + xfs_ail_worker( 285 + struct work_struct *work) 286 + { 287 + struct xfs_ail *ailp = container_of(to_delayed_work(work), 288 + struct xfs_ail, xa_work); 289 + long tout; 313 290 xfs_lsn_t target = ailp->xa_target; 314 291 xfs_lsn_t lsn; 315 292 xfs_log_item_t *lip; ··· 361 256 362 257 spin_lock(&ailp->xa_lock); 363 258 xfs_trans_ail_cursor_init(ailp, cur); 364 - lip = xfs_trans_ail_cursor_first(ailp, cur, *last_lsn); 259 + lip = xfs_trans_ail_cursor_first(ailp, cur, ailp->xa_last_pushed_lsn); 365 260 if (!lip || XFS_FORCED_SHUTDOWN(mp)) { 366 261 /* 367 262 * AIL is empty or our push has reached the end. 368 263 */ 369 264 xfs_trans_ail_cursor_done(ailp, cur); 370 265 spin_unlock(&ailp->xa_lock); 371 - *last_lsn = 0; 372 - return tout; 266 + ailp->xa_last_pushed_lsn = 0; 267 + return; 373 268 } 374 269 375 270 XFS_STATS_INC(xs_push_ail); ··· 406 301 case XFS_ITEM_SUCCESS: 407 302 XFS_STATS_INC(xs_push_ail_success); 408 303 IOP_PUSH(lip); 409 - last_pushed_lsn = lsn; 304 + ailp->xa_last_pushed_lsn = lsn; 410 305 break; 411 306 412 307 case XFS_ITEM_PUSHBUF: 413 308 XFS_STATS_INC(xs_push_ail_pushbuf); 414 309 IOP_PUSHBUF(lip); 415 - last_pushed_lsn = lsn; 310 + ailp->xa_last_pushed_lsn = lsn; 416 311 push_xfsbufd = 1; 417 312 break; 418 313 ··· 424 319 425 320 case XFS_ITEM_LOCKED: 426 321 XFS_STATS_INC(xs_push_ail_locked); 427 - last_pushed_lsn = lsn; 322 + ailp->xa_last_pushed_lsn = lsn; 428 323 stuck++; 429 324 break; 430 325 ··· 479 374 wake_up_process(mp->m_ddev_targp->bt_task); 480 375 } 481 376 377 + /* assume we have more work to do in a short while */ 378 + tout = 10; 482 379 if (!count) { 483 380 /* We're past our target or empty, so idle */ 484 - last_pushed_lsn = 0; 381 + ailp->xa_last_pushed_lsn = 0; 382 + 383 + /* 384 + * Check for an updated push target before clearing the 385 + * XFS_AIL_PUSHING_BIT. If the target changed, we've got more 386 + * work to do. Wait a bit longer before starting that work. 387 + */ 388 + smp_rmb(); 389 + if (ailp->xa_target == target) { 390 + clear_bit(XFS_AIL_PUSHING_BIT, &ailp->xa_flags); 391 + return; 392 + } 393 + tout = 50; 485 394 } else if (XFS_LSN_CMP(lsn, target) >= 0) { 486 395 /* 487 396 * We reached the target so wait a bit longer for I/O to ··· 503 384 * start the next scan from the start of the AIL. 504 385 */ 505 386 tout = 50; 506 - last_pushed_lsn = 0; 387 + ailp->xa_last_pushed_lsn = 0; 507 388 } else if ((stuck * 100) / count > 90) { 508 389 /* 509 390 * Either there is a lot of contention on the AIL or we ··· 515 396 * continuing from where we were. 516 397 */ 517 398 tout = 20; 518 - } else { 519 - /* more to do, but wait a short while before continuing */ 520 - tout = 10; 521 399 } 522 - *last_lsn = last_pushed_lsn; 523 - return tout; 400 + 401 + /* There is more to do, requeue us. */ 402 + queue_delayed_work(xfs_syncd_wq, &ailp->xa_work, 403 + msecs_to_jiffies(tout)); 524 404 } 525 405 406 + /* 407 + * This routine is called to move the tail of the AIL forward. It does this by 408 + * trying to flush items in the AIL whose lsns are below the given 409 + * threshold_lsn. 410 + * 411 + * The push is run asynchronously in a workqueue, which means the caller needs 412 + * to handle waiting on the async flush for space to become available. 413 + * We don't want to interrupt any push that is in progress, hence we only queue 414 + * work if we set the pushing bit approriately. 415 + * 416 + * We do this unlocked - we only need to know whether there is anything in the 417 + * AIL at the time we are called. We don't need to access the contents of 418 + * any of the objects, so the lock is not needed. 419 + */ 420 + void 421 + xfs_ail_push( 422 + struct xfs_ail *ailp, 423 + xfs_lsn_t threshold_lsn) 424 + { 425 + xfs_log_item_t *lip; 426 + 427 + lip = xfs_ail_min(ailp); 428 + if (!lip || XFS_FORCED_SHUTDOWN(ailp->xa_mount) || 429 + XFS_LSN_CMP(threshold_lsn, ailp->xa_target) <= 0) 430 + return; 431 + 432 + /* 433 + * Ensure that the new target is noticed in push code before it clears 434 + * the XFS_AIL_PUSHING_BIT. 435 + */ 436 + smp_wmb(); 437 + ailp->xa_target = threshold_lsn; 438 + if (!test_and_set_bit(XFS_AIL_PUSHING_BIT, &ailp->xa_flags)) 439 + queue_delayed_work(xfs_syncd_wq, &ailp->xa_work, 0); 440 + } 441 + 442 + /* 443 + * Push out all items in the AIL immediately 444 + */ 445 + void 446 + xfs_ail_push_all( 447 + struct xfs_ail *ailp) 448 + { 449 + xfs_lsn_t threshold_lsn = xfs_ail_max_lsn(ailp); 450 + 451 + if (threshold_lsn) 452 + xfs_ail_push(ailp, threshold_lsn); 453 + } 526 454 527 455 /* 528 456 * This is to be called when an item is unlocked that may have ··· 781 615 xfs_mount_t *mp) 782 616 { 783 617 struct xfs_ail *ailp; 784 - int error; 785 618 786 619 ailp = kmem_zalloc(sizeof(struct xfs_ail), KM_MAYFAIL); 787 620 if (!ailp) ··· 789 624 ailp->xa_mount = mp; 790 625 INIT_LIST_HEAD(&ailp->xa_ail); 791 626 spin_lock_init(&ailp->xa_lock); 792 - error = xfsaild_start(ailp); 793 - if (error) 794 - goto out_free_ailp; 627 + INIT_DELAYED_WORK(&ailp->xa_work, xfs_ail_worker); 795 628 mp->m_ail = ailp; 796 629 return 0; 797 - 798 - out_free_ailp: 799 - kmem_free(ailp); 800 - return error; 801 630 } 802 631 803 632 void ··· 800 641 { 801 642 struct xfs_ail *ailp = mp->m_ail; 802 643 803 - xfsaild_stop(ailp); 644 + cancel_delayed_work_sync(&ailp->xa_work); 804 645 kmem_free(ailp); 805 646 } 806 - 807 - /* 808 - * splice the log item list into the AIL at the given LSN. 809 - */ 810 - STATIC void 811 - xfs_ail_splice( 812 - struct xfs_ail *ailp, 813 - struct list_head *list, 814 - xfs_lsn_t lsn) 815 - { 816 - xfs_log_item_t *next_lip; 817 - 818 - /* 819 - * If the list is empty, just insert the item. 820 - */ 821 - if (list_empty(&ailp->xa_ail)) { 822 - list_splice(list, &ailp->xa_ail); 823 - return; 824 - } 825 - 826 - list_for_each_entry_reverse(next_lip, &ailp->xa_ail, li_ail) { 827 - if (XFS_LSN_CMP(next_lip->li_lsn, lsn) <= 0) 828 - break; 829 - } 830 - 831 - ASSERT((&next_lip->li_ail == &ailp->xa_ail) || 832 - (XFS_LSN_CMP(next_lip->li_lsn, lsn) <= 0)); 833 - 834 - list_splice_init(list, &next_lip->li_ail); 835 - return; 836 - } 837 - 838 - /* 839 - * Delete the given item from the AIL. Return a pointer to the item. 840 - */ 841 - STATIC void 842 - xfs_ail_delete( 843 - struct xfs_ail *ailp, 844 - xfs_log_item_t *lip) 845 - { 846 - xfs_ail_check(ailp, lip); 847 - list_del(&lip->li_ail); 848 - xfs_trans_ail_cursor_clear(ailp, lip); 849 - } 850 - 851 - /* 852 - * Return a pointer to the first item in the AIL. 853 - * If the AIL is empty, then return NULL. 854 - */ 855 - STATIC xfs_log_item_t * 856 - xfs_ail_min( 857 - struct xfs_ail *ailp) 858 - { 859 - if (list_empty(&ailp->xa_ail)) 860 - return NULL; 861 - 862 - return list_first_entry(&ailp->xa_ail, xfs_log_item_t, li_ail); 863 - } 864 - 865 - /* 866 - * Return a pointer to the item which follows 867 - * the given item in the AIL. If the given item 868 - * is the last item in the list, then return NULL. 869 - */ 870 - STATIC xfs_log_item_t * 871 - xfs_ail_next( 872 - struct xfs_ail *ailp, 873 - xfs_log_item_t *lip) 874 - { 875 - if (lip->li_ail.next == &ailp->xa_ail) 876 - return NULL; 877 - 878 - return list_first_entry(&lip->li_ail, xfs_log_item_t, li_ail); 879 - } 880 - 881 - #ifdef DEBUG 882 - /* 883 - * Check that the list is sorted as it should be. 884 - */ 885 - STATIC void 886 - xfs_ail_check( 887 - struct xfs_ail *ailp, 888 - xfs_log_item_t *lip) 889 - { 890 - xfs_log_item_t *prev_lip; 891 - 892 - if (list_empty(&ailp->xa_ail)) 893 - return; 894 - 895 - /* 896 - * Check the next and previous entries are valid. 897 - */ 898 - ASSERT((lip->li_flags & XFS_LI_IN_AIL) != 0); 899 - prev_lip = list_entry(lip->li_ail.prev, xfs_log_item_t, li_ail); 900 - if (&prev_lip->li_ail != &ailp->xa_ail) 901 - ASSERT(XFS_LSN_CMP(prev_lip->li_lsn, lip->li_lsn) <= 0); 902 - 903 - prev_lip = list_entry(lip->li_ail.next, xfs_log_item_t, li_ail); 904 - if (&prev_lip->li_ail != &ailp->xa_ail) 905 - ASSERT(XFS_LSN_CMP(prev_lip->li_lsn, lip->li_lsn) >= 0); 906 - 907 - 908 - #ifdef XFS_TRANS_DEBUG 909 - /* 910 - * Walk the list checking lsn ordering, and that every entry has the 911 - * XFS_LI_IN_AIL flag set. This is really expensive, so only do it 912 - * when specifically debugging the transaction subsystem. 913 - */ 914 - prev_lip = list_entry(&ailp->xa_ail, xfs_log_item_t, li_ail); 915 - list_for_each_entry(lip, &ailp->xa_ail, li_ail) { 916 - if (&prev_lip->li_ail != &ailp->xa_ail) 917 - ASSERT(XFS_LSN_CMP(prev_lip->li_lsn, lip->li_lsn) <= 0); 918 - ASSERT((lip->li_flags & XFS_LI_IN_AIL) != 0); 919 - prev_lip = lip; 920 - } 921 - #endif /* XFS_TRANS_DEBUG */ 922 - } 923 - #endif /* DEBUG */

+12 -10

fs/xfs/xfs_trans_priv.h

··· 65 65 struct xfs_ail { 66 66 struct xfs_mount *xa_mount; 67 67 struct list_head xa_ail; 68 - uint xa_gen; 69 - struct task_struct *xa_task; 70 68 xfs_lsn_t xa_target; 71 69 struct xfs_ail_cursor xa_cursors; 72 70 spinlock_t xa_lock; 71 + struct delayed_work xa_work; 72 + xfs_lsn_t xa_last_pushed_lsn; 73 + unsigned long xa_flags; 73 74 }; 75 + 76 + #define XFS_AIL_PUSHING_BIT 0 74 77 75 78 /* 76 79 * From xfs_trans_ail.c 77 80 */ 81 + 82 + extern struct workqueue_struct *xfs_ail_wq; /* AIL workqueue */ 83 + 78 84 void xfs_trans_ail_update_bulk(struct xfs_ail *ailp, 79 85 struct xfs_log_item **log_items, int nr_items, 80 86 xfs_lsn_t lsn) __releases(ailp->xa_lock); ··· 104 98 xfs_trans_ail_delete_bulk(ailp, &lip, 1); 105 99 } 106 100 107 - void xfs_trans_ail_push(struct xfs_ail *, xfs_lsn_t); 101 + void xfs_ail_push(struct xfs_ail *, xfs_lsn_t); 102 + void xfs_ail_push_all(struct xfs_ail *); 103 + xfs_lsn_t xfs_ail_min_lsn(struct xfs_ail *ailp); 104 + 108 105 void xfs_trans_unlocked_item(struct xfs_ail *, 109 106 xfs_log_item_t *); 110 - 111 - xfs_lsn_t xfs_trans_ail_tail(struct xfs_ail *ailp); 112 107 113 108 struct xfs_log_item *xfs_trans_ail_cursor_first(struct xfs_ail *ailp, 114 109 struct xfs_ail_cursor *cur, ··· 118 111 struct xfs_ail_cursor *cur); 119 112 void xfs_trans_ail_cursor_done(struct xfs_ail *ailp, 120 113 struct xfs_ail_cursor *cur); 121 - 122 - long xfsaild_push(struct xfs_ail *, xfs_lsn_t *); 123 - void xfsaild_wakeup(struct xfs_ail *, xfs_lsn_t); 124 - int xfsaild_start(struct xfs_ail *); 125 - void xfsaild_stop(struct xfs_ail *); 126 114 127 115 #if BITS_PER_LONG != 64 128 116 static inline void

Configure Feed

Configure Feed