Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

Merge tag 'xfs-pnfs-for-linus-3.20-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/dgc/linux-xfs

Pull xfs pnfs block layout support from Dave Chinner:
"This contains the changes to XFS needed to support the PNFS block
layout server that you pulled in through Bruce's NFS server tree
merge.

I originally thought that I'd need to merge changes into the NFS
server side, but Bruce had already picked them up and so this is
purely changes to the fs/xfs/ codebase.

Summary:

This update contains the implementation of the PNFS server export
methods that enable use of XFS filesystems as a block layout target"

* tag 'xfs-pnfs-for-linus-3.20-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/dgc/linux-xfs:
xfs: recall pNFS layouts on conflicting access
xfs: implement pNFS export operations

+393 -8
+1
fs/xfs/Makefile
··· 121 121 xfs-$(CONFIG_PROC_FS) += xfs_stats.o 122 122 xfs-$(CONFIG_SYSCTL) += xfs_sysctl.o 123 123 xfs-$(CONFIG_COMPAT) += xfs_ioctl32.o 124 + xfs-$(CONFIG_NFSD_PNFS) += xfs_pnfs.o
+6
fs/xfs/xfs_export.c
··· 30 30 #include "xfs_trace.h" 31 31 #include "xfs_icache.h" 32 32 #include "xfs_log.h" 33 + #include "xfs_pnfs.h" 33 34 34 35 /* 35 36 * Note that we only accept fileids which are long enough rather than allow ··· 246 245 .fh_to_parent = xfs_fs_fh_to_parent, 247 246 .get_parent = xfs_fs_get_parent, 248 247 .commit_metadata = xfs_fs_nfs_commit_metadata, 248 + #ifdef CONFIG_NFSD_PNFS 249 + .get_uuid = xfs_fs_get_uuid, 250 + .map_blocks = xfs_fs_map_blocks, 251 + .commit_blocks = xfs_fs_commit_blocks, 252 + #endif 249 253 };
+12 -2
fs/xfs/xfs_file.c
··· 36 36 #include "xfs_trace.h" 37 37 #include "xfs_log.h" 38 38 #include "xfs_icache.h" 39 + #include "xfs_pnfs.h" 39 40 40 41 #include <linux/aio.h> 41 42 #include <linux/dcache.h> ··· 555 554 if (error) 556 555 return error; 557 556 557 + error = xfs_break_layouts(inode, iolock); 558 + if (error) 559 + return error; 560 + 558 561 /* 559 562 * If the offset is beyond the size of the file, we need to zero any 560 563 * blocks that fall between the existing EOF and the start of this ··· 827 822 struct xfs_inode *ip = XFS_I(inode); 828 823 long error; 829 824 enum xfs_prealloc_flags flags = 0; 825 + uint iolock = XFS_IOLOCK_EXCL; 830 826 loff_t new_size = 0; 831 827 832 828 if (!S_ISREG(inode->i_mode)) ··· 836 830 FALLOC_FL_COLLAPSE_RANGE | FALLOC_FL_ZERO_RANGE)) 837 831 return -EOPNOTSUPP; 838 832 839 - xfs_ilock(ip, XFS_IOLOCK_EXCL); 833 + xfs_ilock(ip, iolock); 834 + error = xfs_break_layouts(inode, &iolock); 835 + if (error) 836 + goto out_unlock; 837 + 840 838 if (mode & FALLOC_FL_PUNCH_HOLE) { 841 839 error = xfs_free_file_space(ip, offset, len); 842 840 if (error) ··· 904 894 } 905 895 906 896 out_unlock: 907 - xfs_iunlock(ip, XFS_IOLOCK_EXCL); 897 + xfs_iunlock(ip, iolock); 908 898 return error; 909 899 } 910 900
+6
fs/xfs/xfs_fsops.c
··· 602 602 if (!mutex_trylock(&mp->m_growlock)) 603 603 return -EWOULDBLOCK; 604 604 error = xfs_growfs_data_private(mp, in); 605 + /* 606 + * Increment the generation unconditionally, the error could be from 607 + * updating the secondary superblocks, in which case the new size 608 + * is live already. 609 + */ 610 + mp->m_generation++; 605 611 mutex_unlock(&mp->m_growlock); 606 612 return error; 607 613 }
+7 -2
fs/xfs/xfs_ioctl.c
··· 39 39 #include "xfs_icache.h" 40 40 #include "xfs_symlink.h" 41 41 #include "xfs_trans.h" 42 + #include "xfs_pnfs.h" 42 43 43 44 #include <linux/capability.h> 44 45 #include <linux/dcache.h> ··· 609 608 { 610 609 struct iattr iattr; 611 610 enum xfs_prealloc_flags flags = 0; 611 + uint iolock = XFS_IOLOCK_EXCL; 612 612 int error; 613 613 614 614 /* ··· 638 636 if (error) 639 637 return error; 640 638 641 - xfs_ilock(ip, XFS_IOLOCK_EXCL); 639 + xfs_ilock(ip, iolock); 640 + error = xfs_break_layouts(inode, &iolock); 641 + if (error) 642 + goto out_unlock; 642 643 643 644 switch (bf->l_whence) { 644 645 case 0: /*SEEK_SET*/ ··· 730 725 error = xfs_update_prealloc_flags(ip, flags); 731 726 732 727 out_unlock: 733 - xfs_iunlock(ip, XFS_IOLOCK_EXCL); 728 + xfs_iunlock(ip, iolock); 734 729 mnt_drop_write_file(filp); 735 730 return error; 736 731 }
+9 -4
fs/xfs/xfs_iops.c
··· 37 37 #include "xfs_da_btree.h" 38 38 #include "xfs_dir2.h" 39 39 #include "xfs_trans_space.h" 40 + #include "xfs_pnfs.h" 40 41 41 42 #include <linux/capability.h> 42 43 #include <linux/xattr.h> ··· 506 505 inode->i_mode |= mode & ~S_IFMT; 507 506 } 508 507 509 - static void 508 + void 510 509 xfs_setattr_time( 511 510 struct xfs_inode *ip, 512 511 struct iattr *iattr) ··· 980 979 int error; 981 980 982 981 if (iattr->ia_valid & ATTR_SIZE) { 983 - xfs_ilock(ip, XFS_IOLOCK_EXCL); 984 - error = xfs_setattr_size(ip, iattr); 985 - xfs_iunlock(ip, XFS_IOLOCK_EXCL); 982 + uint iolock = XFS_IOLOCK_EXCL; 983 + 984 + xfs_ilock(ip, iolock); 985 + error = xfs_break_layouts(dentry->d_inode, &iolock); 986 + if (!error) 987 + error = xfs_setattr_size(ip, iattr); 988 + xfs_iunlock(ip, iolock); 986 989 } else { 987 990 error = xfs_setattr_nonsize(ip, iattr, 0); 988 991 }
+1
fs/xfs/xfs_iops.h
··· 32 32 */ 33 33 #define XFS_ATTR_NOACL 0x01 /* Don't call posix_acl_chmod */ 34 34 35 + extern void xfs_setattr_time(struct xfs_inode *ip, struct iattr *iattr); 35 36 extern int xfs_setattr_nonsize(struct xfs_inode *ip, struct iattr *vap, 36 37 int flags); 37 38 extern int xfs_setattr_size(struct xfs_inode *ip, struct iattr *vap);
+11
fs/xfs/xfs_mount.h
··· 174 174 struct workqueue_struct *m_reclaim_workqueue; 175 175 struct workqueue_struct *m_log_workqueue; 176 176 struct workqueue_struct *m_eofblocks_workqueue; 177 + 178 + /* 179 + * Generation of the filesysyem layout. This is incremented by each 180 + * growfs, and used by the pNFS server to ensure the client updates 181 + * its view of the block device once it gets a layout that might 182 + * reference the newly added blocks. Does not need to be persistent 183 + * as long as we only allow file system size increments, but if we 184 + * ever support shrinks it would have to be persisted in addition 185 + * to various other kinds of pain inflicted on the pNFS server. 186 + */ 187 + __uint32_t m_generation; 177 188 } xfs_mount_t; 178 189 179 190 /*
+322
fs/xfs/xfs_pnfs.c
··· 1 + /* 2 + * Copyright (c) 2014 Christoph Hellwig. 3 + */ 4 + #include "xfs.h" 5 + #include "xfs_format.h" 6 + #include "xfs_log_format.h" 7 + #include "xfs_trans_resv.h" 8 + #include "xfs_sb.h" 9 + #include "xfs_mount.h" 10 + #include "xfs_inode.h" 11 + #include "xfs_trans.h" 12 + #include "xfs_log.h" 13 + #include "xfs_bmap.h" 14 + #include "xfs_bmap_util.h" 15 + #include "xfs_error.h" 16 + #include "xfs_iomap.h" 17 + #include "xfs_shared.h" 18 + #include "xfs_bit.h" 19 + #include "xfs_pnfs.h" 20 + 21 + /* 22 + * Ensure that we do not have any outstanding pNFS layouts that can be used by 23 + * clients to directly read from or write to this inode. This must be called 24 + * before every operation that can remove blocks from the extent map. 25 + * Additionally we call it during the write operation, where aren't concerned 26 + * about exposing unallocated blocks but just want to provide basic 27 + * synchronization between a local writer and pNFS clients. mmap writes would 28 + * also benefit from this sort of synchronization, but due to the tricky locking 29 + * rules in the page fault path we don't bother. 30 + */ 31 + int 32 + xfs_break_layouts( 33 + struct inode *inode, 34 + uint *iolock) 35 + { 36 + struct xfs_inode *ip = XFS_I(inode); 37 + int error; 38 + 39 + ASSERT(xfs_isilocked(ip, XFS_IOLOCK_SHARED|XFS_IOLOCK_EXCL)); 40 + 41 + while ((error = break_layout(inode, false) == -EWOULDBLOCK)) { 42 + xfs_iunlock(ip, *iolock); 43 + error = break_layout(inode, true); 44 + *iolock = XFS_IOLOCK_EXCL; 45 + xfs_ilock(ip, *iolock); 46 + } 47 + 48 + return error; 49 + } 50 + 51 + /* 52 + * Get a unique ID including its location so that the client can identify 53 + * the exported device. 54 + */ 55 + int 56 + xfs_fs_get_uuid( 57 + struct super_block *sb, 58 + u8 *buf, 59 + u32 *len, 60 + u64 *offset) 61 + { 62 + struct xfs_mount *mp = XFS_M(sb); 63 + 64 + printk_once(KERN_NOTICE 65 + "XFS (%s): using experimental pNFS feature, use at your own risk!\n", 66 + mp->m_fsname); 67 + 68 + if (*len < sizeof(uuid_t)) 69 + return -EINVAL; 70 + 71 + memcpy(buf, &mp->m_sb.sb_uuid, sizeof(uuid_t)); 72 + *len = sizeof(uuid_t); 73 + *offset = offsetof(struct xfs_dsb, sb_uuid); 74 + return 0; 75 + } 76 + 77 + static void 78 + xfs_bmbt_to_iomap( 79 + struct xfs_inode *ip, 80 + struct iomap *iomap, 81 + struct xfs_bmbt_irec *imap) 82 + { 83 + struct xfs_mount *mp = ip->i_mount; 84 + 85 + if (imap->br_startblock == HOLESTARTBLOCK) { 86 + iomap->blkno = IOMAP_NULL_BLOCK; 87 + iomap->type = IOMAP_HOLE; 88 + } else if (imap->br_startblock == DELAYSTARTBLOCK) { 89 + iomap->blkno = IOMAP_NULL_BLOCK; 90 + iomap->type = IOMAP_DELALLOC; 91 + } else { 92 + iomap->blkno = 93 + XFS_FSB_TO_DADDR(ip->i_mount, imap->br_startblock); 94 + if (imap->br_state == XFS_EXT_UNWRITTEN) 95 + iomap->type = IOMAP_UNWRITTEN; 96 + else 97 + iomap->type = IOMAP_MAPPED; 98 + } 99 + iomap->offset = XFS_FSB_TO_B(mp, imap->br_startoff); 100 + iomap->length = XFS_FSB_TO_B(mp, imap->br_blockcount); 101 + } 102 + 103 + /* 104 + * Get a layout for the pNFS client. 105 + */ 106 + int 107 + xfs_fs_map_blocks( 108 + struct inode *inode, 109 + loff_t offset, 110 + u64 length, 111 + struct iomap *iomap, 112 + bool write, 113 + u32 *device_generation) 114 + { 115 + struct xfs_inode *ip = XFS_I(inode); 116 + struct xfs_mount *mp = ip->i_mount; 117 + struct xfs_bmbt_irec imap; 118 + xfs_fileoff_t offset_fsb, end_fsb; 119 + loff_t limit; 120 + int bmapi_flags = XFS_BMAPI_ENTIRE; 121 + int nimaps = 1; 122 + uint lock_flags; 123 + int error = 0; 124 + 125 + if (XFS_FORCED_SHUTDOWN(mp)) 126 + return -EIO; 127 + 128 + /* 129 + * We can't export inodes residing on the realtime device. The realtime 130 + * device doesn't have a UUID to identify it, so the client has no way 131 + * to find it. 132 + */ 133 + if (XFS_IS_REALTIME_INODE(ip)) 134 + return -ENXIO; 135 + 136 + /* 137 + * Lock out any other I/O before we flush and invalidate the pagecache, 138 + * and then hand out a layout to the remote system. This is very 139 + * similar to direct I/O, except that the synchronization is much more 140 + * complicated. See the comment near xfs_break_layouts for a detailed 141 + * explanation. 142 + */ 143 + xfs_ilock(ip, XFS_IOLOCK_EXCL); 144 + 145 + error = -EINVAL; 146 + limit = mp->m_super->s_maxbytes; 147 + if (!write) 148 + limit = max(limit, round_up(i_size_read(inode), 149 + inode->i_sb->s_blocksize)); 150 + if (offset > limit) 151 + goto out_unlock; 152 + if (offset > limit - length) 153 + length = limit - offset; 154 + 155 + error = filemap_write_and_wait(inode->i_mapping); 156 + if (error) 157 + goto out_unlock; 158 + error = invalidate_inode_pages2(inode->i_mapping); 159 + if (WARN_ON_ONCE(error)) 160 + return error; 161 + 162 + end_fsb = XFS_B_TO_FSB(mp, (xfs_ufsize_t)offset + length); 163 + offset_fsb = XFS_B_TO_FSBT(mp, offset); 164 + 165 + lock_flags = xfs_ilock_data_map_shared(ip); 166 + error = xfs_bmapi_read(ip, offset_fsb, end_fsb - offset_fsb, 167 + &imap, &nimaps, bmapi_flags); 168 + xfs_iunlock(ip, lock_flags); 169 + 170 + if (error) 171 + goto out_unlock; 172 + 173 + if (write) { 174 + enum xfs_prealloc_flags flags = 0; 175 + 176 + ASSERT(imap.br_startblock != DELAYSTARTBLOCK); 177 + 178 + if (!nimaps || imap.br_startblock == HOLESTARTBLOCK) { 179 + error = xfs_iomap_write_direct(ip, offset, length, 180 + &imap, nimaps); 181 + if (error) 182 + goto out_unlock; 183 + 184 + /* 185 + * Ensure the next transaction is committed 186 + * synchronously so that the blocks allocated and 187 + * handed out to the client are guaranteed to be 188 + * present even after a server crash. 189 + */ 190 + flags |= XFS_PREALLOC_SET | XFS_PREALLOC_SYNC; 191 + } 192 + 193 + error = xfs_update_prealloc_flags(ip, flags); 194 + if (error) 195 + goto out_unlock; 196 + } 197 + xfs_iunlock(ip, XFS_IOLOCK_EXCL); 198 + 199 + xfs_bmbt_to_iomap(ip, iomap, &imap); 200 + *device_generation = mp->m_generation; 201 + return error; 202 + out_unlock: 203 + xfs_iunlock(ip, XFS_IOLOCK_EXCL); 204 + return error; 205 + } 206 + 207 + /* 208 + * Ensure the size update falls into a valid allocated block. 209 + */ 210 + static int 211 + xfs_pnfs_validate_isize( 212 + struct xfs_inode *ip, 213 + xfs_off_t isize) 214 + { 215 + struct xfs_bmbt_irec imap; 216 + int nimaps = 1; 217 + int error = 0; 218 + 219 + xfs_ilock(ip, XFS_ILOCK_SHARED); 220 + error = xfs_bmapi_read(ip, XFS_B_TO_FSBT(ip->i_mount, isize - 1), 1, 221 + &imap, &nimaps, 0); 222 + xfs_iunlock(ip, XFS_ILOCK_SHARED); 223 + if (error) 224 + return error; 225 + 226 + if (imap.br_startblock == HOLESTARTBLOCK || 227 + imap.br_startblock == DELAYSTARTBLOCK || 228 + imap.br_state == XFS_EXT_UNWRITTEN) 229 + return -EIO; 230 + return 0; 231 + } 232 + 233 + /* 234 + * Make sure the blocks described by maps are stable on disk. This includes 235 + * converting any unwritten extents, flushing the disk cache and updating the 236 + * time stamps. 237 + * 238 + * Note that we rely on the caller to always send us a timestamp update so that 239 + * we always commit a transaction here. If that stops being true we will have 240 + * to manually flush the cache here similar to what the fsync code path does 241 + * for datasyncs on files that have no dirty metadata. 242 + */ 243 + int 244 + xfs_fs_commit_blocks( 245 + struct inode *inode, 246 + struct iomap *maps, 247 + int nr_maps, 248 + struct iattr *iattr) 249 + { 250 + struct xfs_inode *ip = XFS_I(inode); 251 + struct xfs_mount *mp = ip->i_mount; 252 + struct xfs_trans *tp; 253 + bool update_isize = false; 254 + int error, i; 255 + loff_t size; 256 + 257 + ASSERT(iattr->ia_valid & (ATTR_ATIME|ATTR_CTIME|ATTR_MTIME)); 258 + 259 + xfs_ilock(ip, XFS_IOLOCK_EXCL); 260 + 261 + size = i_size_read(inode); 262 + if ((iattr->ia_valid & ATTR_SIZE) && iattr->ia_size > size) { 263 + update_isize = true; 264 + size = iattr->ia_size; 265 + } 266 + 267 + for (i = 0; i < nr_maps; i++) { 268 + u64 start, length, end; 269 + 270 + start = maps[i].offset; 271 + if (start > size) 272 + continue; 273 + 274 + end = start + maps[i].length; 275 + if (end > size) 276 + end = size; 277 + 278 + length = end - start; 279 + if (!length) 280 + continue; 281 + 282 + /* 283 + * Make sure reads through the pagecache see the new data. 284 + */ 285 + error = invalidate_inode_pages2_range(inode->i_mapping, 286 + start >> PAGE_CACHE_SHIFT, 287 + (end - 1) >> PAGE_CACHE_SHIFT); 288 + WARN_ON_ONCE(error); 289 + 290 + error = xfs_iomap_write_unwritten(ip, start, length); 291 + if (error) 292 + goto out_drop_iolock; 293 + } 294 + 295 + if (update_isize) { 296 + error = xfs_pnfs_validate_isize(ip, size); 297 + if (error) 298 + goto out_drop_iolock; 299 + } 300 + 301 + tp = xfs_trans_alloc(mp, XFS_TRANS_SETATTR_NOT_SIZE); 302 + error = xfs_trans_reserve(tp, &M_RES(mp)->tr_ichange, 0, 0); 303 + if (error) 304 + goto out_drop_iolock; 305 + 306 + xfs_ilock(ip, XFS_ILOCK_EXCL); 307 + xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL); 308 + xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); 309 + 310 + xfs_setattr_time(ip, iattr); 311 + if (update_isize) { 312 + i_size_write(inode, iattr->ia_size); 313 + ip->i_d.di_size = iattr->ia_size; 314 + } 315 + 316 + xfs_trans_set_sync(tp); 317 + error = xfs_trans_commit(tp, 0); 318 + 319 + out_drop_iolock: 320 + xfs_iunlock(ip, XFS_IOLOCK_EXCL); 321 + return error; 322 + }
+18
fs/xfs/xfs_pnfs.h
··· 1 + #ifndef _XFS_PNFS_H 2 + #define _XFS_PNFS_H 1 3 + 4 + #ifdef CONFIG_NFSD_PNFS 5 + int xfs_fs_get_uuid(struct super_block *sb, u8 *buf, u32 *len, u64 *offset); 6 + int xfs_fs_map_blocks(struct inode *inode, loff_t offset, u64 length, 7 + struct iomap *iomap, bool write, u32 *device_generation); 8 + int xfs_fs_commit_blocks(struct inode *inode, struct iomap *maps, int nr_maps, 9 + struct iattr *iattr); 10 + 11 + int xfs_break_layouts(struct inode *inode, uint *iolock); 12 + #else 13 + static inline int xfs_break_layouts(struct inode *inode, uint *iolock) 14 + { 15 + return 0; 16 + } 17 + #endif /* CONFIG_NFSD_PNFS */ 18 + #endif /* _XFS_PNFS_H */