Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

Merge git://git.kernel.org/pub/scm/linux/kernel/git/steve/gfs2-2.6-fixes

* git://git.kernel.org/pub/scm/linux/kernel/git/steve/gfs2-2.6-fixes:
GFS2: Fix inode deallocation race

+103 -221
+4 -42
fs/gfs2/export.c
··· 138 138 struct gfs2_inum_host *inum) 139 139 { 140 140 struct gfs2_sbd *sdp = sb->s_fs_info; 141 - struct gfs2_holder i_gh; 142 141 struct inode *inode; 143 142 struct dentry *dentry; 144 - int error; 145 143 146 144 inode = gfs2_ilookup(sb, inum->no_addr); 147 145 if (inode) { ··· 150 152 goto out_inode; 151 153 } 152 154 153 - error = gfs2_glock_nq_num(sdp, inum->no_addr, &gfs2_inode_glops, 154 - LM_ST_SHARED, LM_FLAG_ANY, &i_gh); 155 - if (error) 156 - return ERR_PTR(error); 157 - 158 - error = gfs2_check_blk_type(sdp, inum->no_addr, GFS2_BLKST_DINODE); 159 - if (error) 160 - goto fail; 161 - 162 - inode = gfs2_inode_lookup(sb, DT_UNKNOWN, inum->no_addr, 0); 163 - if (IS_ERR(inode)) { 164 - error = PTR_ERR(inode); 165 - goto fail; 166 - } 167 - 168 - error = gfs2_inode_refresh(GFS2_I(inode)); 169 - if (error) { 170 - iput(inode); 171 - goto fail; 172 - } 173 - 174 - /* Pick up the works we bypass in gfs2_inode_lookup */ 175 - if (inode->i_state & I_NEW) 176 - gfs2_set_iop(inode); 177 - 178 - if (GFS2_I(inode)->i_no_formal_ino != inum->no_formal_ino) { 179 - iput(inode); 180 - goto fail; 181 - } 182 - 183 - error = -EIO; 184 - if (GFS2_I(inode)->i_diskflags & GFS2_DIF_SYSTEM) { 185 - iput(inode); 186 - goto fail; 187 - } 188 - 189 - gfs2_glock_dq_uninit(&i_gh); 155 + inode = gfs2_lookup_by_inum(sdp, inum->no_addr, &inum->no_formal_ino, 156 + GFS2_BLKST_DINODE); 157 + if (IS_ERR(inode)) 158 + return ERR_CAST(inode); 190 159 191 160 out_inode: 192 161 dentry = d_obtain_alias(inode); 193 162 if (!IS_ERR(dentry)) 194 163 dentry->d_op = &gfs2_dops; 195 164 return dentry; 196 - fail: 197 - gfs2_glock_dq_uninit(&i_gh); 198 - return ERR_PTR(error); 199 165 } 200 166 201 167 static struct dentry *gfs2_fh_to_dentry(struct super_block *sb, struct fid *fid,
+10 -11
fs/gfs2/glock.c
··· 686 686 { 687 687 struct gfs2_glock *gl = container_of(work, struct gfs2_glock, gl_delete); 688 688 struct gfs2_sbd *sdp = gl->gl_sbd; 689 - struct gfs2_inode *ip = NULL; 689 + struct gfs2_inode *ip; 690 690 struct inode *inode; 691 - u64 no_addr = 0; 691 + u64 no_addr = gl->gl_name.ln_number; 692 692 693 - spin_lock(&gl->gl_spin); 694 - ip = (struct gfs2_inode *)gl->gl_object; 693 + ip = gl->gl_object; 694 + /* Note: Unsafe to dereference ip as we don't hold right refs/locks */ 695 + 695 696 if (ip) 696 - no_addr = ip->i_no_addr; 697 - spin_unlock(&gl->gl_spin); 698 - if (ip) { 699 697 inode = gfs2_ilookup(sdp->sd_vfs, no_addr); 700 - if (inode) { 701 - d_prune_aliases(inode); 702 - iput(inode); 703 - } 698 + else 699 + inode = gfs2_lookup_by_inum(sdp, no_addr, NULL, GFS2_BLKST_UNLINKED); 700 + if (inode && !IS_ERR(inode)) { 701 + d_prune_aliases(inode); 702 + iput(inode); 704 703 } 705 704 gfs2_glock_put(gl); 706 705 }
+40 -122
fs/gfs2/inode.c
··· 73 73 return iget5_locked(sb, hash, iget_test, iget_set, &no_addr); 74 74 } 75 75 76 - struct gfs2_skip_data { 77 - u64 no_addr; 78 - int skipped; 79 - }; 80 - 81 - static int iget_skip_test(struct inode *inode, void *opaque) 82 - { 83 - struct gfs2_inode *ip = GFS2_I(inode); 84 - struct gfs2_skip_data *data = opaque; 85 - 86 - if (ip->i_no_addr == data->no_addr) { 87 - if (inode->i_state & (I_FREEING|I_WILL_FREE)){ 88 - data->skipped = 1; 89 - return 0; 90 - } 91 - return 1; 92 - } 93 - return 0; 94 - } 95 - 96 - static int iget_skip_set(struct inode *inode, void *opaque) 97 - { 98 - struct gfs2_inode *ip = GFS2_I(inode); 99 - struct gfs2_skip_data *data = opaque; 100 - 101 - if (data->skipped) 102 - return 1; 103 - inode->i_ino = (unsigned long)(data->no_addr); 104 - ip->i_no_addr = data->no_addr; 105 - return 0; 106 - } 107 - 108 - static struct inode *gfs2_iget_skip(struct super_block *sb, 109 - u64 no_addr) 110 - { 111 - struct gfs2_skip_data data; 112 - unsigned long hash = (unsigned long)no_addr; 113 - 114 - data.no_addr = no_addr; 115 - data.skipped = 0; 116 - return iget5_locked(sb, hash, iget_skip_test, iget_skip_set, &data); 117 - } 118 - 119 76 /** 120 77 * GFS2 lookup code fills in vfs inode contents based on info obtained 121 78 * from directory entry inside gfs2_inode_lookup(). This has caused issues ··· 200 243 return ERR_PTR(error); 201 244 } 202 245 203 - /** 204 - * gfs2_process_unlinked_inode - Lookup an unlinked inode for reclamation 205 - * and try to reclaim it by doing iput. 206 - * 207 - * This function assumes no rgrp locks are currently held. 208 - * 209 - * @sb: The super block 210 - * no_addr: The inode number 211 - * 212 - */ 213 - 214 - void gfs2_process_unlinked_inode(struct super_block *sb, u64 no_addr) 246 + struct inode *gfs2_lookup_by_inum(struct gfs2_sbd *sdp, u64 no_addr, 247 + u64 *no_formal_ino, unsigned int blktype) 215 248 { 216 - struct gfs2_sbd *sdp; 217 - struct gfs2_inode *ip; 218 - struct gfs2_glock *io_gl = NULL; 219 - int error; 220 - struct gfs2_holder gh; 249 + struct super_block *sb = sdp->sd_vfs; 250 + struct gfs2_holder i_gh; 221 251 struct inode *inode; 252 + int error; 222 253 223 - inode = gfs2_iget_skip(sb, no_addr); 254 + error = gfs2_glock_nq_num(sdp, no_addr, &gfs2_inode_glops, 255 + LM_ST_SHARED, LM_FLAG_ANY, &i_gh); 256 + if (error) 257 + return ERR_PTR(error); 224 258 225 - if (!inode) 226 - return; 259 + error = gfs2_check_blk_type(sdp, no_addr, blktype); 260 + if (error) 261 + goto fail; 227 262 228 - /* If it's not a new inode, someone's using it, so leave it alone. */ 229 - if (!(inode->i_state & I_NEW)) { 230 - iput(inode); 231 - return; 263 + inode = gfs2_inode_lookup(sb, DT_UNKNOWN, no_addr, 0); 264 + if (IS_ERR(inode)) 265 + goto fail; 266 + 267 + error = gfs2_inode_refresh(GFS2_I(inode)); 268 + if (error) 269 + goto fail_iput; 270 + 271 + /* Pick up the works we bypass in gfs2_inode_lookup */ 272 + if (inode->i_state & I_NEW) 273 + gfs2_set_iop(inode); 274 + 275 + /* Two extra checks for NFS only */ 276 + if (no_formal_ino) { 277 + error = -ESTALE; 278 + if (GFS2_I(inode)->i_no_formal_ino != *no_formal_ino) 279 + goto fail_iput; 280 + 281 + error = -EIO; 282 + if (GFS2_I(inode)->i_diskflags & GFS2_DIF_SYSTEM) 283 + goto fail_iput; 284 + 285 + error = 0; 232 286 } 233 287 234 - ip = GFS2_I(inode); 235 - sdp = GFS2_SB(inode); 236 - ip->i_no_formal_ino = -1; 237 - 238 - error = gfs2_glock_get(sdp, no_addr, &gfs2_inode_glops, CREATE, &ip->i_gl); 239 - if (unlikely(error)) 240 - goto fail; 241 - ip->i_gl->gl_object = ip; 242 - 243 - error = gfs2_glock_get(sdp, no_addr, &gfs2_iopen_glops, CREATE, &io_gl); 244 - if (unlikely(error)) 245 - goto fail_put; 246 - 247 - set_bit(GIF_INVALID, &ip->i_flags); 248 - error = gfs2_glock_nq_init(io_gl, LM_ST_SHARED, LM_FLAG_TRY | GL_EXACT, 249 - &ip->i_iopen_gh); 250 - if (unlikely(error)) 251 - goto fail_iopen; 252 - 253 - ip->i_iopen_gh.gh_gl->gl_object = ip; 254 - gfs2_glock_put(io_gl); 255 - io_gl = NULL; 256 - 257 - inode->i_mode = DT2IF(DT_UNKNOWN); 258 - 259 - /* 260 - * We must read the inode in order to work out its type in 261 - * this case. Note that this doesn't happen often as we normally 262 - * know the type beforehand. This code path only occurs during 263 - * unlinked inode recovery (where it is safe to do this glock, 264 - * which is not true in the general case). 265 - */ 266 - error = gfs2_glock_nq_init(ip->i_gl, LM_ST_EXCLUSIVE, LM_FLAG_TRY, 267 - &gh); 268 - if (unlikely(error)) 269 - goto fail_glock; 270 - 271 - /* Inode is now uptodate */ 272 - gfs2_glock_dq_uninit(&gh); 273 - gfs2_set_iop(inode); 274 - 275 - /* The iput will cause it to be deleted. */ 276 - iput(inode); 277 - return; 278 - 279 - fail_glock: 280 - gfs2_glock_dq(&ip->i_iopen_gh); 281 - fail_iopen: 282 - if (io_gl) 283 - gfs2_glock_put(io_gl); 284 - fail_put: 285 - ip->i_gl->gl_object = NULL; 286 - gfs2_glock_put(ip->i_gl); 287 288 fail: 288 - iget_failed(inode); 289 - return; 289 + gfs2_glock_dq_uninit(&i_gh); 290 + return error ? ERR_PTR(error) : inode; 291 + fail_iput: 292 + iput(inode); 293 + goto fail; 290 294 } 291 295 292 296 static int gfs2_dinode_in(struct gfs2_inode *ip, const void *buf)
+3 -1
fs/gfs2/inode.h
··· 99 99 extern void gfs2_set_iop(struct inode *inode); 100 100 extern struct inode *gfs2_inode_lookup(struct super_block *sb, unsigned type, 101 101 u64 no_addr, u64 no_formal_ino); 102 - extern void gfs2_process_unlinked_inode(struct super_block *sb, u64 no_addr); 102 + extern struct inode *gfs2_lookup_by_inum(struct gfs2_sbd *sdp, u64 no_addr, 103 + u64 *no_formal_ino, 104 + unsigned int blktype); 103 105 extern struct inode *gfs2_ilookup(struct super_block *sb, u64 no_addr); 104 106 105 107 extern int gfs2_inode_refresh(struct gfs2_inode *ip);
+46 -45
fs/gfs2/rgrp.c
··· 963 963 * The inode, if one has been found, in inode. 964 964 */ 965 965 966 - static u64 try_rgrp_unlink(struct gfs2_rgrpd *rgd, u64 *last_unlinked, 967 - u64 skip) 966 + static void try_rgrp_unlink(struct gfs2_rgrpd *rgd, u64 *last_unlinked, u64 skip) 968 967 { 969 968 u32 goal = 0, block; 970 969 u64 no_addr; 971 970 struct gfs2_sbd *sdp = rgd->rd_sbd; 972 971 unsigned int n; 972 + struct gfs2_glock *gl; 973 + struct gfs2_inode *ip; 974 + int error; 975 + int found = 0; 973 976 974 - for(;;) { 975 - if (goal >= rgd->rd_data) 976 - break; 977 + while (goal < rgd->rd_data) { 977 978 down_write(&sdp->sd_log_flush_lock); 978 979 n = 1; 979 980 block = rgblk_search(rgd, goal, GFS2_BLKST_UNLINKED, ··· 991 990 if (no_addr == skip) 992 991 continue; 993 992 *last_unlinked = no_addr; 994 - return no_addr; 993 + 994 + error = gfs2_glock_get(sdp, no_addr, &gfs2_inode_glops, CREATE, &gl); 995 + if (error) 996 + continue; 997 + 998 + /* If the inode is already in cache, we can ignore it here 999 + * because the existing inode disposal code will deal with 1000 + * it when all refs have gone away. Accessing gl_object like 1001 + * this is not safe in general. Here it is ok because we do 1002 + * not dereference the pointer, and we only need an approx 1003 + * answer to whether it is NULL or not. 1004 + */ 1005 + ip = gl->gl_object; 1006 + 1007 + if (ip || queue_work(gfs2_delete_workqueue, &gl->gl_delete) == 0) 1008 + gfs2_glock_put(gl); 1009 + else 1010 + found++; 1011 + 1012 + /* Limit reclaim to sensible number of tasks */ 1013 + if (found > 2*NR_CPUS) 1014 + return; 995 1015 } 996 1016 997 1017 rgd->rd_flags &= ~GFS2_RDF_CHECK; 998 - return 0; 1018 + return; 999 1019 } 1000 1020 1001 1021 /** ··· 1097 1075 * Try to acquire rgrp in way which avoids contending with others. 1098 1076 * 1099 1077 * Returns: errno 1100 - * unlinked: the block address of an unlinked block to be reclaimed 1101 1078 */ 1102 1079 1103 - static int get_local_rgrp(struct gfs2_inode *ip, u64 *unlinked, 1104 - u64 *last_unlinked) 1080 + static int get_local_rgrp(struct gfs2_inode *ip, u64 *last_unlinked) 1105 1081 { 1106 1082 struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); 1107 1083 struct gfs2_rgrpd *rgd, *begin = NULL; ··· 1109 1089 int loops = 0; 1110 1090 int error, rg_locked; 1111 1091 1112 - *unlinked = 0; 1113 1092 rgd = gfs2_blk2rgrpd(sdp, ip->i_goal); 1114 1093 1115 1094 while (rgd) { ··· 1125 1106 case 0: 1126 1107 if (try_rgrp_fit(rgd, al)) 1127 1108 goto out; 1128 - /* If the rg came in already locked, there's no 1129 - way we can recover from a failed try_rgrp_unlink 1130 - because that would require an iput which can only 1131 - happen after the rgrp is unlocked. */ 1132 - if (!rg_locked && rgd->rd_flags & GFS2_RDF_CHECK) 1133 - *unlinked = try_rgrp_unlink(rgd, last_unlinked, 1134 - ip->i_no_addr); 1109 + if (rgd->rd_flags & GFS2_RDF_CHECK) 1110 + try_rgrp_unlink(rgd, last_unlinked, ip->i_no_addr); 1135 1111 if (!rg_locked) 1136 1112 gfs2_glock_dq_uninit(&al->al_rgd_gh); 1137 - if (*unlinked) 1138 - return -EAGAIN; 1139 1113 /* fall through */ 1140 1114 case GLR_TRYFAILED: 1141 1115 rgd = recent_rgrp_next(rgd); ··· 1157 1145 case 0: 1158 1146 if (try_rgrp_fit(rgd, al)) 1159 1147 goto out; 1160 - if (!rg_locked && rgd->rd_flags & GFS2_RDF_CHECK) 1161 - *unlinked = try_rgrp_unlink(rgd, last_unlinked, 1162 - ip->i_no_addr); 1148 + if (rgd->rd_flags & GFS2_RDF_CHECK) 1149 + try_rgrp_unlink(rgd, last_unlinked, ip->i_no_addr); 1163 1150 if (!rg_locked) 1164 1151 gfs2_glock_dq_uninit(&al->al_rgd_gh); 1165 - if (*unlinked) 1166 - return -EAGAIN; 1167 1152 break; 1168 1153 1169 1154 case GLR_TRYFAILED: ··· 1213 1204 struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); 1214 1205 struct gfs2_alloc *al = ip->i_alloc; 1215 1206 int error = 0; 1216 - u64 last_unlinked = NO_BLOCK, unlinked; 1207 + u64 last_unlinked = NO_BLOCK; 1208 + int tries = 0; 1217 1209 1218 1210 if (gfs2_assert_warn(sdp, al->al_requested)) 1219 1211 return -EINVAL; 1220 1212 1221 - try_again: 1222 1213 if (hold_rindex) { 1223 1214 /* We need to hold the rindex unless the inode we're using is 1224 1215 the rindex itself, in which case it's already held. */ ··· 1227 1218 else if (!sdp->sd_rgrps) /* We may not have the rindex read 1228 1219 in, so: */ 1229 1220 error = gfs2_ri_update_special(ip); 1221 + if (error) 1222 + return error; 1230 1223 } 1231 1224 1232 - if (error) 1233 - return error; 1225 + do { 1226 + error = get_local_rgrp(ip, &last_unlinked); 1227 + /* If there is no space, flushing the log may release some */ 1228 + if (error) 1229 + gfs2_log_flush(sdp, NULL); 1230 + } while (error && tries++ < 3); 1234 1231 1235 - /* Find an rgrp suitable for allocation. If it encounters any unlinked 1236 - dinodes along the way, error will equal -EAGAIN and unlinked will 1237 - contains it block address. We then need to look up that inode and 1238 - try to free it, and try the allocation again. */ 1239 - error = get_local_rgrp(ip, &unlinked, &last_unlinked); 1240 1232 if (error) { 1241 1233 if (hold_rindex && ip != GFS2_I(sdp->sd_rindex)) 1242 1234 gfs2_glock_dq_uninit(&al->al_ri_gh); 1243 - if (error != -EAGAIN) 1244 - return error; 1245 - 1246 - gfs2_process_unlinked_inode(ip->i_inode.i_sb, unlinked); 1247 - /* regardless of whether or not gfs2_process_unlinked_inode 1248 - was successful, we don't want to repeat it again. */ 1249 - last_unlinked = unlinked; 1250 - gfs2_log_flush(sdp, NULL); 1251 - error = 0; 1252 - 1253 - goto try_again; 1235 + return error; 1254 1236 } 1237 + 1255 1238 /* no error, so we have the rgrp set in the inode's allocation. */ 1256 1239 al->al_file = file; 1257 1240 al->al_line = line;