Merge tag 'gfs2-fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/steve/gfs2-3.0-fixes

Pull GFS2 fixes from Steven Whitehouse:
"Here is a set of small fixes for GFS2. There is a fix to drop
s_umount which is copied in from the core vfs, two patches relate to a
hard to hit "use after free" and memory leak. Two patches related to
using DIO and buffered I/O on the same file to ensure correct
operation in relation to glock state changes. The final patch adds an
RCU read lock to ensure correct locking on an error path"

* tag 'gfs2-fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/steve/gfs2-3.0-fixes:
GFS2: Fix unsafe dereference in dump_holder()
GFS2: Wait for async DIO in glock state changes
GFS2: Fix incorrect invalidation for DIO/buffered I/O
GFS2: Fix slab memory leak in gfs2_bufdata
GFS2: Fix use-after-free race when calling gfs2_remove_from_ail
GFS2: don't hold s_umount over blkdev_put

Linus Torvalds 12 years ago 152b734a b4796679

+58 -5

6 changed files

expand all

gfs2

aops.c

glock.c

glops.c

log.c

meta_io.c

ops_fstype.c

+30

fs/gfs2/aops.c

··· 986 986 { 987 987 struct file *file = iocb->ki_filp; 988 988 struct inode *inode = file->f_mapping->host; 989 + struct address_space *mapping = inode->i_mapping; 989 990 struct gfs2_inode *ip = GFS2_I(inode); 990 991 struct gfs2_holder gh; 991 992 int rv; ··· 1006 1005 rv = gfs2_ok_for_dio(ip, rw, offset); 1007 1006 if (rv != 1) 1008 1007 goto out; /* dio not valid, fall back to buffered i/o */ 1008 + 1009 + /* 1010 + * Now since we are holding a deferred (CW) lock at this point, you 1011 + * might be wondering why this is ever needed. There is a case however 1012 + * where we've granted a deferred local lock against a cached exclusive 1013 + * glock. That is ok provided all granted local locks are deferred, but 1014 + * it also means that it is possible to encounter pages which are 1015 + * cached and possibly also mapped. So here we check for that and sort 1016 + * them out ahead of the dio. The glock state machine will take care of 1017 + * everything else. 1018 + * 1019 + * If in fact the cached glock state (gl->gl_state) is deferred (CW) in 1020 + * the first place, mapping->nr_pages will always be zero. 1021 + */ 1022 + if (mapping->nrpages) { 1023 + loff_t lstart = offset & (PAGE_CACHE_SIZE - 1); 1024 + loff_t len = iov_length(iov, nr_segs); 1025 + loff_t end = PAGE_ALIGN(offset + len) - 1; 1026 + 1027 + rv = 0; 1028 + if (len == 0) 1029 + goto out; 1030 + if (test_and_clear_bit(GIF_SW_PAGED, &ip->i_flags)) 1031 + unmap_shared_mapping_range(ip->i_inode.i_mapping, offset, len); 1032 + rv = filemap_write_and_wait_range(mapping, lstart, end); 1033 + if (rv) 1034 + return rv; 1035 + truncate_inode_pages_range(mapping, lstart, end); 1036 + } 1009 1037 1010 1038 rv = __blockdev_direct_IO(rw, iocb, inode, inode->i_sb->s_bdev, iov, 1011 1039 offset, nr_segs, gfs2_get_block_direct,

fs/gfs2/glock.c

··· 1655 1655 struct task_struct *gh_owner = NULL; 1656 1656 char flags_buf[32]; 1657 1657 1658 + rcu_read_lock(); 1658 1659 if (gh->gh_owner_pid) 1659 1660 gh_owner = pid_task(gh->gh_owner_pid, PIDTYPE_PID); 1660 1661 gfs2_print_dbg(seq, " H: s:%s f:%s e:%d p:%ld [%s] %pS\n", ··· 1665 1664 gh->gh_owner_pid ? (long)pid_nr(gh->gh_owner_pid) : -1, 1666 1665 gh_owner ? gh_owner->comm : "(ended)", 1667 1666 (void *)gh->gh_ip); 1667 + rcu_read_unlock(); 1668 1668 return 0; 1669 1669 } 1670 1670

+8 -2

fs/gfs2/glops.c

··· 192 192 193 193 if (ip && !S_ISREG(ip->i_inode.i_mode)) 194 194 ip = NULL; 195 - if (ip && test_and_clear_bit(GIF_SW_PAGED, &ip->i_flags)) 196 - unmap_shared_mapping_range(ip->i_inode.i_mapping, 0, 0); 195 + if (ip) { 196 + if (test_and_clear_bit(GIF_SW_PAGED, &ip->i_flags)) 197 + unmap_shared_mapping_range(ip->i_inode.i_mapping, 0, 0); 198 + inode_dio_wait(&ip->i_inode); 199 + } 197 200 if (!test_and_clear_bit(GLF_DIRTY, &gl->gl_flags)) 198 201 return; 199 202 ··· 412 409 if (error) 413 410 return error; 414 411 } 412 + 413 + if (gh->gh_state != LM_ST_DEFERRED) 414 + inode_dio_wait(&ip->i_inode); 415 415 416 416 if ((ip->i_diskflags & GFS2_DIF_TRUNC_IN_PROG) && 417 417 (gl->gl_state == LM_ST_EXCLUSIVE) &&

+2 -2

fs/gfs2/log.c

··· 551 551 struct buffer_head *bh = bd->bd_bh; 552 552 struct gfs2_glock *gl = bd->bd_gl; 553 553 554 - gfs2_remove_from_ail(bd); 555 - bd->bd_bh = NULL; 556 554 bh->b_private = NULL; 557 555 bd->bd_blkno = bh->b_blocknr; 556 + gfs2_remove_from_ail(bd); /* drops ref on bh */ 557 + bd->bd_bh = NULL; 558 558 bd->bd_ops = &gfs2_revoke_lops; 559 559 sdp->sd_log_num_revoke++; 560 560 atomic_inc(&gl->gl_revokes);

fs/gfs2/meta_io.c

··· 258 258 struct address_space *mapping = bh->b_page->mapping; 259 259 struct gfs2_sbd *sdp = gfs2_mapping2sbd(mapping); 260 260 struct gfs2_bufdata *bd = bh->b_private; 261 + int was_pinned = 0; 261 262 262 263 if (test_clear_buffer_pinned(bh)) { 263 264 trace_gfs2_pin(bd, 0); ··· 274 273 tr->tr_num_databuf_rm++; 275 274 } 276 275 tr->tr_touched = 1; 276 + was_pinned = 1; 277 277 brelse(bh); 278 278 } 279 279 if (bd) { 280 280 spin_lock(&sdp->sd_ail_lock); 281 281 if (bd->bd_tr) { 282 282 gfs2_trans_add_revoke(sdp, bd); 283 + } else if (was_pinned) { 284 + bh->b_private = NULL; 285 + kmem_cache_free(gfs2_bufdata_cachep, bd); 283 286 } 284 287 spin_unlock(&sdp->sd_ail_lock); 285 288 }

+11 -1

fs/gfs2/ops_fstype.c

··· 1366 1366 if (IS_ERR(s)) 1367 1367 goto error_bdev; 1368 1368 1369 - if (s->s_root) 1369 + if (s->s_root) { 1370 + /* 1371 + * s_umount nests inside bd_mutex during 1372 + * __invalidate_device(). blkdev_put() acquires 1373 + * bd_mutex and can't be called under s_umount. Drop 1374 + * s_umount temporarily. This is safe as we're 1375 + * holding an active reference. 1376 + */ 1377 + up_write(&s->s_umount); 1370 1378 blkdev_put(bdev, mode); 1379 + down_write(&s->s_umount); 1380 + } 1371 1381 1372 1382 memset(&args, 0, sizeof(args)); 1373 1383 args.ar_quota = GFS2_QUOTA_DEFAULT;

Configure Feed

Configure Feed