Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

Merge branch 'for-linus' of git://oss.sgi.com:8090/xfs/xfs-2.6

* 'for-linus' of git://oss.sgi.com:8090/xfs/xfs-2.6:
[XFS] Avoid replaying inode buffer initialisation log items if on-disk version is newer.
[XFS] Ensure file size updates have been completed before writing inode to disk.
[XFS] On-demand reaping of the MRU cache

+101 -62
+1
fs/xfs/linux-2.6/xfs_aops.c
··· 181 181 ip->i_d.di_size = isize; 182 182 ip->i_update_core = 1; 183 183 ip->i_update_size = 1; 184 + mark_inode_dirty_sync(vn_to_inode(ioend->io_vnode)); 184 185 } 185 186 186 187 xfs_iunlock(ip, XFS_ILOCK_EXCL);
+3 -1
fs/xfs/linux-2.6/xfs_super.c
··· 415 415 416 416 if (vp) { 417 417 vn_trace_entry(vp, __FUNCTION__, (inst_t *)__return_address); 418 - if (sync) 418 + if (sync) { 419 + filemap_fdatawait(inode->i_mapping); 419 420 flags |= FLUSH_SYNC; 421 + } 420 422 error = bhv_vop_iflush(vp, flags); 421 423 if (error == EAGAIN) 422 424 error = sync? bhv_vop_iflush(vp, flags | FLUSH_LOG) : 0;
+5
fs/xfs/xfs_buf_item.h
··· 52 52 #define XFS_BLI_UDQUOT_BUF 0x4 53 53 #define XFS_BLI_PDQUOT_BUF 0x8 54 54 #define XFS_BLI_GDQUOT_BUF 0x10 55 + /* 56 + * This flag indicates that the buffer contains newly allocated 57 + * inodes. 58 + */ 59 + #define XFS_BLI_INODE_NEW_BUF 0x20 55 60 56 61 #define XFS_BLI_CHUNK 128 57 62 #define XFS_BLI_SHIFT 7
+1 -2
fs/xfs/xfs_filestream.c
··· 467 467 xfs_filestream_flush( 468 468 xfs_mount_t *mp) 469 469 { 470 - /* point in time flush, so keep the reaper running */ 471 - xfs_mru_cache_flush(mp->m_filestream, 1); 470 + xfs_mru_cache_flush(mp->m_filestream); 472 471 } 473 472 474 473 /*
+48 -3
fs/xfs/xfs_log_recover.c
··· 1874 1874 /*ARGSUSED*/ 1875 1875 STATIC void 1876 1876 xlog_recover_do_reg_buffer( 1877 + xfs_mount_t *mp, 1877 1878 xlog_recover_item_t *item, 1878 1879 xfs_buf_t *bp, 1879 1880 xfs_buf_log_format_t *buf_f) ··· 1885 1884 unsigned int *data_map = NULL; 1886 1885 unsigned int map_size = 0; 1887 1886 int error; 1887 + int stale_buf = 1; 1888 + 1889 + /* 1890 + * Scan through the on-disk inode buffer and attempt to 1891 + * determine if it has been written to since it was logged. 1892 + * 1893 + * - If any of the magic numbers are incorrect then the buffer is stale 1894 + * - If any of the modes are non-zero then the buffer is not stale 1895 + * - If all of the modes are zero and at least one of the generation 1896 + * counts is non-zero then the buffer is stale 1897 + * 1898 + * If the end result is a stale buffer then the log buffer is replayed 1899 + * otherwise it is skipped. 1900 + * 1901 + * This heuristic is not perfect. It can be improved by scanning the 1902 + * entire inode chunk for evidence that any of the inode clusters have 1903 + * been updated. To fix this problem completely we will need a major 1904 + * architectural change to the logging system. 1905 + */ 1906 + if (buf_f->blf_flags & XFS_BLI_INODE_NEW_BUF) { 1907 + xfs_dinode_t *dip; 1908 + int inodes_per_buf; 1909 + int mode_count = 0; 1910 + int gen_count = 0; 1911 + 1912 + stale_buf = 0; 1913 + inodes_per_buf = XFS_BUF_COUNT(bp) >> mp->m_sb.sb_inodelog; 1914 + for (i = 0; i < inodes_per_buf; i++) { 1915 + dip = (xfs_dinode_t *)xfs_buf_offset(bp, 1916 + i * mp->m_sb.sb_inodesize); 1917 + if (be16_to_cpu(dip->di_core.di_magic) != 1918 + XFS_DINODE_MAGIC) { 1919 + stale_buf = 1; 1920 + break; 1921 + } 1922 + if (be16_to_cpu(dip->di_core.di_mode)) 1923 + mode_count++; 1924 + if (be16_to_cpu(dip->di_core.di_gen)) 1925 + gen_count++; 1926 + } 1927 + 1928 + if (!mode_count && gen_count) 1929 + stale_buf = 1; 1930 + } 1888 1931 1889 1932 switch (buf_f->blf_type) { 1890 1933 case XFS_LI_BUF: ··· 1962 1917 -1, 0, XFS_QMOPT_DOWARN, 1963 1918 "dquot_buf_recover"); 1964 1919 } 1965 - if (!error) 1920 + if (!error && stale_buf) 1966 1921 memcpy(xfs_buf_offset(bp, 1967 1922 (uint)bit << XFS_BLI_SHIFT), /* dest */ 1968 1923 item->ri_buf[i].i_addr, /* source */ ··· 2134 2089 if (log->l_quotaoffs_flag & type) 2135 2090 return; 2136 2091 2137 - xlog_recover_do_reg_buffer(item, bp, buf_f); 2092 + xlog_recover_do_reg_buffer(mp, item, bp, buf_f); 2138 2093 } 2139 2094 2140 2095 /* ··· 2235 2190 (XFS_BLI_UDQUOT_BUF|XFS_BLI_PDQUOT_BUF|XFS_BLI_GDQUOT_BUF)) { 2236 2191 xlog_recover_do_dquot_buffer(mp, log, item, bp, buf_f); 2237 2192 } else { 2238 - xlog_recover_do_reg_buffer(item, bp, buf_f); 2193 + xlog_recover_do_reg_buffer(mp, item, bp, buf_f); 2239 2194 } 2240 2195 if (error) 2241 2196 return XFS_ERROR(error);
+28 -44
fs/xfs/xfs_mru_cache.c
··· 206 206 */ 207 207 if (!_xfs_mru_cache_migrate(mru, now)) { 208 208 mru->time_zero = now; 209 - if (!mru->next_reap) 210 - mru->next_reap = mru->grp_count * mru->grp_time; 209 + if (!mru->queued) { 210 + mru->queued = 1; 211 + queue_delayed_work(xfs_mru_reap_wq, &mru->work, 212 + mru->grp_count * mru->grp_time); 213 + } 211 214 } else { 212 215 grp = (now - mru->time_zero) / mru->grp_time; 213 216 grp = (mru->lru_grp + grp) % mru->grp_count; ··· 274 271 struct work_struct *work) 275 272 { 276 273 xfs_mru_cache_t *mru = container_of(work, xfs_mru_cache_t, work.work); 277 - unsigned long now; 274 + unsigned long now, next; 278 275 279 276 ASSERT(mru && mru->lists); 280 277 if (!mru || !mru->lists) 281 278 return; 282 279 283 280 mutex_spinlock(&mru->lock); 284 - now = jiffies; 285 - if (mru->reap_all || 286 - (mru->next_reap && time_after(now, mru->next_reap))) { 287 - if (mru->reap_all) 288 - now += mru->grp_count * mru->grp_time * 2; 289 - mru->next_reap = _xfs_mru_cache_migrate(mru, now); 290 - _xfs_mru_cache_clear_reap_list(mru); 281 + next = _xfs_mru_cache_migrate(mru, jiffies); 282 + _xfs_mru_cache_clear_reap_list(mru); 283 + 284 + mru->queued = next; 285 + if ((mru->queued > 0)) { 286 + now = jiffies; 287 + if (next <= now) 288 + next = 0; 289 + else 290 + next -= now; 291 + queue_delayed_work(xfs_mru_reap_wq, &mru->work, next); 291 292 } 292 293 293 - /* 294 - * the process that triggered the reap_all is responsible 295 - * for restating the periodic reap if it is required. 296 - */ 297 - if (!mru->reap_all) 298 - queue_delayed_work(xfs_mru_reap_wq, &mru->work, mru->grp_time); 299 - mru->reap_all = 0; 300 294 mutex_spinunlock(&mru->lock, 0); 301 295 } 302 296 ··· 352 352 353 353 /* An extra list is needed to avoid reaping up to a grp_time early. */ 354 354 mru->grp_count = grp_count + 1; 355 - mru->lists = kmem_alloc(mru->grp_count * sizeof(*mru->lists), KM_SLEEP); 355 + mru->lists = kmem_zalloc(mru->grp_count * sizeof(*mru->lists), KM_SLEEP); 356 356 357 357 if (!mru->lists) { 358 358 err = ENOMEM; ··· 374 374 mru->grp_time = grp_time; 375 375 mru->free_func = free_func; 376 376 377 - /* start up the reaper event */ 378 - mru->next_reap = 0; 379 - mru->reap_all = 0; 380 - queue_delayed_work(xfs_mru_reap_wq, &mru->work, mru->grp_time); 381 - 382 377 *mrup = mru; 383 378 384 379 exit: ··· 389 394 * Call xfs_mru_cache_flush() to flush out all cached entries, calling their 390 395 * free functions as they're deleted. When this function returns, the caller is 391 396 * guaranteed that all the free functions for all the elements have finished 392 - * executing. 393 - * 394 - * While we are flushing, we stop the periodic reaper event from triggering. 395 - * Normally, we want to restart this periodic event, but if we are shutting 396 - * down the cache we do not want it restarted. hence the restart parameter 397 - * where 0 = do not restart reaper and 1 = restart reaper. 397 + * executing and the reaper is not running. 398 398 */ 399 399 void 400 400 xfs_mru_cache_flush( 401 - xfs_mru_cache_t *mru, 402 - int restart) 401 + xfs_mru_cache_t *mru) 403 402 { 404 403 if (!mru || !mru->lists) 405 404 return; 406 405 407 - cancel_rearming_delayed_workqueue(xfs_mru_reap_wq, &mru->work); 408 - 409 406 mutex_spinlock(&mru->lock); 410 - mru->reap_all = 1; 411 - mutex_spinunlock(&mru->lock, 0); 407 + if (mru->queued) { 408 + mutex_spinunlock(&mru->lock, 0); 409 + cancel_rearming_delayed_workqueue(xfs_mru_reap_wq, &mru->work); 410 + mutex_spinlock(&mru->lock); 411 + } 412 412 413 - queue_work(xfs_mru_reap_wq, &mru->work.work); 414 - flush_workqueue(xfs_mru_reap_wq); 413 + _xfs_mru_cache_migrate(mru, jiffies + mru->grp_count * mru->grp_time); 414 + _xfs_mru_cache_clear_reap_list(mru); 415 415 416 - mutex_spinlock(&mru->lock); 417 - WARN_ON_ONCE(mru->reap_all != 0); 418 - mru->reap_all = 0; 419 - if (restart) 420 - queue_delayed_work(xfs_mru_reap_wq, &mru->work, mru->grp_time); 421 416 mutex_spinunlock(&mru->lock, 0); 422 417 } 423 418 ··· 418 433 if (!mru || !mru->lists) 419 434 return; 420 435 421 - /* we don't want the reaper to restart here */ 422 - xfs_mru_cache_flush(mru, 0); 436 + xfs_mru_cache_flush(mru); 423 437 424 438 kmem_free(mru->lists, mru->grp_count * sizeof(*mru->lists)); 425 439 kmem_free(mru, sizeof(*mru));
+2 -4
fs/xfs/xfs_mru_cache.h
··· 32 32 unsigned int grp_time; /* Time period spanned by grps. */ 33 33 unsigned int lru_grp; /* Group containing time zero. */ 34 34 unsigned long time_zero; /* Time first element was added. */ 35 - unsigned long next_reap; /* Time that the reaper should 36 - next do something. */ 37 - unsigned int reap_all; /* if set, reap all lists */ 38 35 xfs_mru_cache_free_func_t free_func; /* Function pointer for freeing. */ 39 36 struct delayed_work work; /* Workqueue data for reaping. */ 37 + unsigned int queued; /* work has been queued */ 40 38 } xfs_mru_cache_t; 41 39 42 40 int xfs_mru_cache_init(void); ··· 42 44 int xfs_mru_cache_create(struct xfs_mru_cache **mrup, unsigned int lifetime_ms, 43 45 unsigned int grp_count, 44 46 xfs_mru_cache_free_func_t free_func); 45 - void xfs_mru_cache_flush(xfs_mru_cache_t *mru, int restart); 47 + void xfs_mru_cache_flush(xfs_mru_cache_t *mru); 46 48 void xfs_mru_cache_destroy(struct xfs_mru_cache *mru); 47 49 int xfs_mru_cache_insert(struct xfs_mru_cache *mru, unsigned long key, 48 50 void *value);
+1
fs/xfs/xfs_trans_buf.c
··· 966 966 ASSERT(atomic_read(&bip->bli_refcount) > 0); 967 967 968 968 bip->bli_flags |= XFS_BLI_INODE_ALLOC_BUF; 969 + bip->bli_format.blf_flags |= XFS_BLI_INODE_NEW_BUF; 969 970 } 970 971 971 972
+12 -8
fs/xfs/xfs_vnodeops.c
··· 1082 1082 if (XFS_FORCED_SHUTDOWN(ip->i_mount)) 1083 1083 return XFS_ERROR(EIO); 1084 1084 1085 + if (flag & FSYNC_DATA) 1086 + filemap_fdatawait(vn_to_inode(XFS_ITOV(ip))->i_mapping); 1087 + 1085 1088 /* 1086 1089 * We always need to make sure that the required inode state 1087 1090 * is safe on disk. The vnode might be clean but because ··· 3772 3769 sync_lsn = log->l_last_sync_lsn; 3773 3770 GRANT_UNLOCK(log, s); 3774 3771 3775 - if ((XFS_LSN_CMP(iip->ili_last_lsn, sync_lsn) <= 0)) 3776 - return 0; 3772 + if ((XFS_LSN_CMP(iip->ili_last_lsn, sync_lsn) > 0)) { 3773 + if (flags & FLUSH_SYNC) 3774 + log_flags |= XFS_LOG_SYNC; 3775 + error = xfs_log_force(mp, iip->ili_last_lsn, log_flags); 3776 + if (error) 3777 + return error; 3778 + } 3777 3779 3778 - if (flags & FLUSH_SYNC) 3779 - log_flags |= XFS_LOG_SYNC; 3780 - return xfs_log_force(mp, iip->ili_last_lsn, log_flags); 3780 + if (ip->i_update_core == 0) 3781 + return 0; 3781 3782 } 3782 3783 } 3783 3784 ··· 3794 3787 */ 3795 3788 if (flags & FLUSH_INODE) { 3796 3789 int flush_flags; 3797 - 3798 - if (xfs_ipincount(ip)) 3799 - return EAGAIN; 3800 3790 3801 3791 if (flags & FLUSH_SYNC) { 3802 3792 xfs_ilock(ip, XFS_ILOCK_SHARED);