Merge tag 'gfs2-for-6.19' of git://git.kernel.org/pub/scm/linux/kernel/git/gfs2/linux-gfs2

Documentation/filesystems/gfs2-glocks.rst Documentation/filesystems/gfs2/glocks.rst

Documentation/filesystems/gfs2-uevents.rst Documentation/filesystems/gfs2/uevents.rst

+12

Documentation/filesystems/gfs2.rst Documentation/filesystems/gfs2/index.rst

··· 4 4 Global File System 2 5 5 ==================== 6 6 7 + Overview 8 + ======== 9 + 7 10 GFS2 is a cluster file system. It allows a cluster of computers to 8 11 simultaneously use a block device that is shared between them (with FC, 9 12 iSCSI, NBD, etc). GFS2 reads and writes to the block device like a local ··· 53 50 gfs2_convert to convert a gfs filesystem to GFS2 in-place 54 51 mkfs.gfs2 to make a filesystem 55 52 ============ ============================================= 53 + 54 + Implementation Notes 55 + ==================== 56 + 57 + .. toctree:: 58 + :maxdepth: 1 59 + 60 + glocks 61 + uevents

+1 -3

Documentation/filesystems/index.rst

··· 89 89 ext3 90 90 ext4/index 91 91 f2fs 92 - gfs2 93 - gfs2-uevents 94 - gfs2-glocks 92 + gfs2/index 95 93 hfs 96 94 hfsplus 97 95 hpfs

+1 -1

MAINTAINERS

··· 10535 10535 S: Supported 10536 10536 B: https://bugzilla.kernel.org/enter_bug.cgi?product=File%20System&component=gfs2 10537 10537 T: git git://git.kernel.org/pub/scm/linux/kernel/git/gfs2/linux-gfs2.git 10538 - F: Documentation/filesystems/gfs2* 10538 + F: Documentation/filesystems/gfs2/ 10539 10539 F: fs/gfs2/ 10540 10540 F: include/uapi/linux/gfs2_ondisk.h 10541 10541

+1 -1

fs/gfs2/aops.c

··· 431 431 error = mpage_read_folio(folio, gfs2_block_map); 432 432 } 433 433 434 - if (gfs2_withdrawing_or_withdrawn(sdp)) 434 + if (gfs2_withdrawn(sdp)) 435 435 return -EIO; 436 436 437 437 return error;

+1 -1

fs/gfs2/file.c

··· 1446 1446 1447 1447 if (!(fl->c.flc_flags & FL_POSIX)) 1448 1448 return -ENOLCK; 1449 - if (gfs2_withdrawing_or_withdrawn(sdp)) { 1449 + if (gfs2_withdrawn(sdp)) { 1450 1450 if (lock_is_unlock(fl)) 1451 1451 locks_lock_file_wait(file, fl); 1452 1452 return -EIO;

+74 -153

fs/gfs2/glock.c

··· 137 137 kmem_cache_free(gfs2_glock_cachep, gl); 138 138 } 139 139 140 - /** 141 - * glock_blocked_by_withdraw - determine if we can still use a glock 142 - * @gl: the glock 143 - * 144 - * We need to allow some glocks to be enqueued, dequeued, promoted, and demoted 145 - * when we're withdrawn. For example, to maintain metadata integrity, we should 146 - * disallow the use of inode and rgrp glocks when withdrawn. Other glocks like 147 - * the iopen or freeze glock may be safely used because none of their 148 - * metadata goes through the journal. So in general, we should disallow all 149 - * glocks that are journaled, and allow all the others. One exception is: 150 - * we need to allow our active journal to be promoted and demoted so others 151 - * may recover it and we can reacquire it when they're done. 152 - */ 153 - static bool glock_blocked_by_withdraw(struct gfs2_glock *gl) 154 - { 155 - struct gfs2_sbd *sdp = gl->gl_name.ln_sbd; 156 - 157 - if (!gfs2_withdrawing_or_withdrawn(sdp)) 158 - return false; 159 - if (gl->gl_ops->go_flags & GLOF_NONDISK) 160 - return false; 161 - if (!sdp->sd_jdesc || 162 - gl->gl_name.ln_number == sdp->sd_jdesc->jd_no_addr) 163 - return false; 164 - return true; 165 - } 166 - 167 140 static void __gfs2_glock_free(struct gfs2_glock *gl) 168 141 { 169 142 rhashtable_remove_fast(&gl_hash_table, &gl->gl_node, ht_parms); ··· 243 270 GLOCK_BUG_ON(gl, !list_empty(&gl->gl_holders)); 244 271 if (mapping) { 245 272 truncate_inode_pages_final(mapping); 246 - if (!gfs2_withdrawing_or_withdrawn(sdp)) 273 + if (!gfs2_withdrawn(sdp)) 247 274 GLOCK_BUG_ON(gl, !mapping_empty(mapping)); 248 275 } 249 276 trace_gfs2_glock_put(gl); ··· 458 485 459 486 static void do_promote(struct gfs2_glock *gl) 460 487 { 488 + struct gfs2_sbd *sdp = gl->gl_name.ln_sbd; 461 489 struct gfs2_holder *gh, *current_gh; 490 + 491 + if (gfs2_withdrawn(sdp)) { 492 + do_error(gl, LM_OUT_ERROR); 493 + return; 494 + } 462 495 463 496 current_gh = find_first_holder(gl); 464 497 list_for_each_entry(gh, &gl->gl_holders, gh_list) { ··· 571 592 state_change(gl, state); 572 593 } 573 594 574 - 575 595 /* Demote to UN request arrived during demote to SH or DF */ 576 596 if (test_bit(GLF_DEMOTE_IN_PROGRESS, &gl->gl_flags) && 577 597 gl->gl_state != LM_ST_UNLOCKED && ··· 641 663 clear_bit(GLF_LOCK, &gl->gl_flags); 642 664 } 643 665 644 - static bool is_system_glock(struct gfs2_glock *gl) 645 - { 646 - struct gfs2_sbd *sdp = gl->gl_name.ln_sbd; 647 - struct gfs2_inode *m_ip = GFS2_I(sdp->sd_statfs_inode); 648 - 649 - if (gl == m_ip->i_gl) 650 - return true; 651 - return false; 652 - } 653 - 654 666 /** 655 667 * do_xmote - Calls the DLM to change the state of a lock 656 668 * @gl: The lock state ··· 659 691 struct lm_lockstruct *ls = &sdp->sd_lockstruct; 660 692 int ret; 661 693 662 - if (target != LM_ST_UNLOCKED && glock_blocked_by_withdraw(gl) && 663 - gh && !(gh->gh_flags & LM_FLAG_NOEXP)) 664 - goto skip_inval; 694 + /* 695 + * When a filesystem is withdrawing, the remaining cluster nodes will 696 + * take care of recovering the withdrawing node's journal. We only 697 + * need to make sure that once we trigger remote recovery, we won't 698 + * write to the shared block device anymore. This means that here, 699 + * 700 + * - no new writes to the filesystem must be triggered (->go_sync()). 701 + * 702 + * - any cached data should be discarded by calling ->go_inval(), dirty 703 + * or not and journaled or unjournaled. 704 + * 705 + * - no more dlm locking operations should be issued (->lm_lock()). 706 + */ 665 707 666 708 GLOCK_BUG_ON(gl, gl->gl_state == target); 667 709 GLOCK_BUG_ON(gl, gl->gl_state == gl->gl_target); 710 + 668 711 if (!glops->go_inval || !glops->go_sync) 669 712 goto skip_inval; 670 713 671 714 spin_unlock(&gl->gl_lockref.lock); 672 - ret = glops->go_sync(gl); 673 - /* If we had a problem syncing (due to io errors or whatever, 674 - * we should not invalidate the metadata or tell dlm to 675 - * release the glock to other nodes. 676 - */ 677 - if (ret) { 678 - if (cmpxchg(&sdp->sd_log_error, 0, ret)) { 679 - fs_err(sdp, "Error %d syncing glock\n", ret); 680 - gfs2_dump_glock(NULL, gl, true); 715 + if (!gfs2_withdrawn(sdp)) { 716 + ret = glops->go_sync(gl); 717 + if (ret) { 718 + if (cmpxchg(&sdp->sd_log_error, 0, ret)) { 719 + fs_err(sdp, "Error %d syncing glock\n", ret); 720 + gfs2_dump_glock(NULL, gl, true); 721 + gfs2_withdraw(sdp); 722 + } 681 723 } 682 - spin_lock(&gl->gl_lockref.lock); 683 - goto skip_inval; 684 724 } 685 725 686 - if (target == LM_ST_UNLOCKED || target == LM_ST_DEFERRED) { 687 - /* 688 - * The call to go_sync should have cleared out the ail list. 689 - * If there are still items, we have a problem. We ought to 690 - * withdraw, but we can't because the withdraw code also uses 691 - * glocks. Warn about the error, dump the glock, then fall 692 - * through and wait for logd to do the withdraw for us. 693 - */ 694 - if ((atomic_read(&gl->gl_ail_count) != 0) && 695 - (!cmpxchg(&sdp->sd_log_error, 0, -EIO))) { 696 - gfs2_glock_assert_warn(gl, 697 - !atomic_read(&gl->gl_ail_count)); 698 - gfs2_dump_glock(NULL, gl, true); 699 - } 726 + if (target == LM_ST_UNLOCKED || target == LM_ST_DEFERRED) 700 727 glops->go_inval(gl, target == LM_ST_DEFERRED ? 0 : DIO_METADATA); 701 - } 702 728 spin_lock(&gl->gl_lockref.lock); 703 729 704 730 skip_inval: 705 - /* 706 - * Check for an error encountered since we called go_sync and go_inval. 707 - * If so, we can't withdraw from the glock code because the withdraw 708 - * code itself uses glocks (see function signal_our_withdraw) to 709 - * change the mount to read-only. Most importantly, we must not call 710 - * dlm to unlock the glock until the journal is in a known good state 711 - * (after journal replay) otherwise other nodes may use the object 712 - * (rgrp or dinode) and then later, journal replay will corrupt the 713 - * file system. The best we can do here is wait for the logd daemon 714 - * to see sd_log_error and withdraw, and in the meantime, requeue the 715 - * work for later. 716 - * 717 - * We make a special exception for some system glocks, such as the 718 - * system statfs inode glock, which needs to be granted before the 719 - * gfs2_quotad daemon can exit, and that exit needs to finish before 720 - * we can unmount the withdrawn file system. 721 - * 722 - * However, if we're just unlocking the lock (say, for unmount, when 723 - * gfs2_gl_hash_clear calls clear_glock) and recovery is complete 724 - * then it's okay to tell dlm to unlock it. 725 - */ 726 - if (unlikely(sdp->sd_log_error) && !gfs2_withdrawing_or_withdrawn(sdp)) 727 - gfs2_withdraw_delayed(sdp); 728 - if (glock_blocked_by_withdraw(gl) && 729 - (target != LM_ST_UNLOCKED || 730 - test_bit(SDF_WITHDRAW_RECOVERY, &sdp->sd_flags))) { 731 - if (!is_system_glock(gl)) { 732 - request_demote(gl, LM_ST_UNLOCKED, 0, false); 733 - /* 734 - * Ordinarily, we would call dlm and its callback would call 735 - * finish_xmote, which would call state_change() to the new state. 736 - * Since we withdrew, we won't call dlm, so call state_change 737 - * manually, but to the UNLOCKED state we desire. 738 - */ 739 - state_change(gl, LM_ST_UNLOCKED); 740 - /* 741 - * We skip telling dlm to do the locking, so we won't get a 742 - * reply that would otherwise clear GLF_LOCK. So we clear it here. 743 - */ 744 - if (!test_bit(GLF_CANCELING, &gl->gl_flags)) 745 - clear_bit(GLF_LOCK, &gl->gl_flags); 746 - clear_bit(GLF_DEMOTE_IN_PROGRESS, &gl->gl_flags); 747 - gl->gl_lockref.count++; 748 - gfs2_glock_queue_work(gl, GL_GLOCK_DFT_HOLD); 749 - return; 750 - } 731 + if (gfs2_withdrawn(sdp)) { 732 + if (target != LM_ST_UNLOCKED) 733 + target = LM_OUT_ERROR; 734 + goto out; 751 735 } 752 736 753 737 if (ls->ls_ops->lm_lock) { ··· 715 795 } 716 796 clear_bit(GLF_PENDING_REPLY, &gl->gl_flags); 717 797 718 - if (ret == -ENODEV && gl->gl_target == LM_ST_UNLOCKED && 719 - target == LM_ST_UNLOCKED) { 798 + if (ret == -ENODEV) { 720 799 /* 721 800 * The lockspace has been released and the lock has 722 801 * been unlocked implicitly. 723 802 */ 803 + if (target != LM_ST_UNLOCKED) { 804 + target = LM_OUT_ERROR; 805 + goto out; 806 + } 724 807 } else { 725 808 fs_err(sdp, "lm_lock ret %d\n", ret); 726 - GLOCK_BUG_ON(gl, !gfs2_withdrawing_or_withdrawn(sdp)); 809 + GLOCK_BUG_ON(gl, !gfs2_withdrawn(sdp)); 727 810 return; 728 811 } 729 812 } 730 813 814 + out: 731 815 /* Complete the operation now. */ 732 816 finish_xmote(gl, target); 733 817 gl->gl_lockref.count++; ··· 890 966 return ip; 891 967 } 892 968 893 - static void gfs2_try_evict(struct gfs2_glock *gl) 969 + static void gfs2_try_to_evict(struct gfs2_glock *gl) 894 970 { 895 971 struct gfs2_inode *ip; 896 972 897 973 /* 898 974 * If there is contention on the iopen glock and we have an inode, try 899 975 * to grab and release the inode so that it can be evicted. The 900 - * GIF_DEFER_DELETE flag indicates to gfs2_evict_inode() that the inode 976 + * GLF_DEFER_DELETE flag indicates to gfs2_evict_inode() that the inode 901 977 * should not be deleted locally. This will allow the remote node to 902 978 * go ahead and delete the inode without us having to do it, which will 903 979 * avoid rgrp glock thrashing. ··· 950 1026 struct gfs2_sbd *sdp = gl->gl_name.ln_sbd; 951 1027 bool verify_delete = test_and_clear_bit(GLF_VERIFY_DELETE, &gl->gl_flags); 952 1028 1029 + /* 1030 + * Check for the GLF_VERIFY_DELETE above: this ensures that we won't 1031 + * immediately process GLF_VERIFY_DELETE work that the below call to 1032 + * gfs2_try_to_evict() queues. 1033 + */ 1034 + 953 1035 if (test_and_clear_bit(GLF_TRY_TO_EVICT, &gl->gl_flags)) 954 - gfs2_try_evict(gl); 1036 + gfs2_try_to_evict(gl); 955 1037 956 1038 if (verify_delete) { 957 1039 u64 no_addr = gl->gl_name.ln_number; ··· 1141 1211 1142 1212 mapping = gfs2_glock2aspace(gl); 1143 1213 if (mapping) { 1214 + gfp_t gfp_mask; 1215 + 1144 1216 mapping->a_ops = &gfs2_meta_aops; 1145 1217 mapping->host = sdp->sd_inode; 1146 1218 mapping->flags = 0; 1147 - mapping_set_gfp_mask(mapping, GFP_NOFS); 1219 + gfp_mask = mapping_gfp_mask(sdp->sd_inode->i_mapping); 1220 + mapping_set_gfp_mask(mapping, gfp_mask); 1148 1221 mapping->i_private_data = NULL; 1149 1222 mapping->writeback_index = 0; 1150 1223 } ··· 1174 1241 * @state: the state we're requesting 1175 1242 * @flags: the modifier flags 1176 1243 * @gh: the holder structure 1177 - * 1244 + * @ip: caller's return address for debugging 1178 1245 */ 1179 1246 1180 1247 void __gfs2_holder_init(struct gfs2_glock *gl, unsigned int state, u16 flags, ··· 1472 1539 int gfs2_glock_nq(struct gfs2_holder *gh) 1473 1540 { 1474 1541 struct gfs2_glock *gl = gh->gh_gl; 1542 + struct gfs2_sbd *sdp = gl->gl_name.ln_sbd; 1475 1543 int error; 1476 1544 1477 - if (glock_blocked_by_withdraw(gl) && !(gh->gh_flags & LM_FLAG_NOEXP)) 1545 + if (gfs2_withdrawn(sdp)) 1478 1546 return -EIO; 1479 1547 1480 1548 if (gh->gh_flags & GL_NOBLOCK) { ··· 1500 1566 gh->gh_error = 0; 1501 1567 spin_lock(&gl->gl_lockref.lock); 1502 1568 add_to_queue(gh); 1503 - if (unlikely((LM_FLAG_NOEXP & gh->gh_flags) && 1569 + if (unlikely((LM_FLAG_RECOVER & gh->gh_flags) && 1504 1570 test_and_clear_bit(GLF_HAVE_FROZEN_REPLY, &gl->gl_flags))) { 1505 1571 set_bit(GLF_HAVE_REPLY, &gl->gl_flags); 1506 1572 gl->gl_lockref.count++; ··· 1573 1639 void gfs2_glock_dq(struct gfs2_holder *gh) 1574 1640 { 1575 1641 struct gfs2_glock *gl = gh->gh_gl; 1576 - struct gfs2_sbd *sdp = gl->gl_name.ln_sbd; 1577 1642 1578 1643 spin_lock(&gl->gl_lockref.lock); 1579 1644 if (!gfs2_holder_queued(gh)) { ··· 1597 1664 clear_bit(GLF_LOCK, &gl->gl_flags); 1598 1665 if (!gfs2_holder_queued(gh)) 1599 1666 goto out; 1600 - } 1601 - 1602 - /* 1603 - * If we're in the process of file system withdraw, we cannot just 1604 - * dequeue any glocks until our journal is recovered, lest we introduce 1605 - * file system corruption. We need two exceptions to this rule: We need 1606 - * to allow unlocking of nondisk glocks and the glock for our own 1607 - * journal that needs recovery. 1608 - */ 1609 - if (test_bit(SDF_WITHDRAW_RECOVERY, &sdp->sd_flags) && 1610 - glock_blocked_by_withdraw(gl) && 1611 - gh->gh_gl != sdp->sd_jinode_gl) { 1612 - sdp->sd_glock_dqs_held++; 1613 - spin_unlock(&gl->gl_lockref.lock); 1614 - might_sleep(); 1615 - wait_on_bit(&sdp->sd_flags, SDF_WITHDRAW_RECOVERY, 1616 - TASK_UNINTERRUPTIBLE); 1617 - spin_lock(&gl->gl_lockref.lock); 1618 1667 } 1619 1668 1620 1669 __gfs2_glock_dq(gh); ··· 1786 1871 * 1787 1872 * Glocks are not frozen if (a) the result of the dlm operation is 1788 1873 * an error, (b) the locking operation was an unlock operation or 1789 - * (c) if there is a "noexp" flagged request anywhere in the queue 1874 + * (c) if there is a "recover" flagged request anywhere in the queue 1790 1875 * 1791 1876 * Returns: 1 if freezing should occur, 0 otherwise 1792 1877 */ ··· 1803 1888 list_for_each_entry(gh, &gl->gl_holders, gh_list) { 1804 1889 if (test_bit(HIF_HOLDER, &gh->gh_iflags)) 1805 1890 continue; 1806 - if (LM_FLAG_NOEXP & gh->gh_flags) 1891 + if (LM_FLAG_RECOVER & gh->gh_flags) 1807 1892 return 0; 1808 1893 } 1809 1894 ··· 2080 2165 dump_glock(NULL, gl, true); 2081 2166 } 2082 2167 2083 - static void withdraw_dq(struct gfs2_glock *gl) 2168 + static void withdraw_glock(struct gfs2_glock *gl) 2084 2169 { 2085 2170 spin_lock(&gl->gl_lockref.lock); 2086 - if (!__lockref_is_dead(&gl->gl_lockref) && 2087 - glock_blocked_by_withdraw(gl)) 2171 + if (!__lockref_is_dead(&gl->gl_lockref)) { 2172 + /* 2173 + * We don't want to write back any more dirty data. Unlock the 2174 + * remaining inode and resource group glocks; this will cause 2175 + * their ->go_inval() hooks to toss out all the remaining 2176 + * cached data, dirty or not. 2177 + */ 2178 + if (gl->gl_ops->go_inval && gl->gl_state != LM_ST_UNLOCKED) 2179 + request_demote(gl, LM_ST_UNLOCKED, 0, false); 2088 2180 do_error(gl, LM_OUT_ERROR); /* remove pending waiters */ 2181 + } 2089 2182 spin_unlock(&gl->gl_lockref.lock); 2090 2183 } 2091 2184 2092 - void gfs2_gl_dq_holders(struct gfs2_sbd *sdp) 2185 + void gfs2_withdraw_glocks(struct gfs2_sbd *sdp) 2093 2186 { 2094 - glock_hash_walk(withdraw_dq, sdp); 2187 + glock_hash_walk(withdraw_glock, sdp); 2095 2188 } 2096 2189 2097 2190 /** ··· 2160 2237 *p++ = 't'; 2161 2238 if (flags & LM_FLAG_TRY_1CB) 2162 2239 *p++ = 'T'; 2163 - if (flags & LM_FLAG_NOEXP) 2240 + if (flags & LM_FLAG_RECOVER) 2164 2241 *p++ = 'e'; 2165 2242 if (flags & LM_FLAG_ANY) 2166 2243 *p++ = 'A'; ··· 2247 2324 *p++ = 'o'; 2248 2325 if (test_bit(GLF_BLOCKING, gflags)) 2249 2326 *p++ = 'b'; 2250 - if (test_bit(GLF_UNLOCKED, gflags)) 2251 - *p++ = 'x'; 2252 2327 if (test_bit(GLF_INSTANTIATE_NEEDED, gflags)) 2253 2328 *p++ = 'n'; 2254 2329 if (test_bit(GLF_INSTANTIATE_IN_PROG, gflags))

+6 -6

fs/gfs2/glock.h

··· 58 58 * LM_FLAG_TRY_1CB 59 59 * Send one blocking callback if TRY is set and the lock is not granted. 60 60 * 61 - * LM_FLAG_NOEXP 61 + * LM_FLAG_RECOVER 62 62 * GFS sets this flag on lock requests it makes while doing journal recovery. 63 - * These special requests should not be blocked due to the recovery like 64 - * ordinary locks would be. 63 + * While ordinary requests are blocked until the end of recovery, requests 64 + * with this flag set do proceed. 65 65 * 66 66 * LM_FLAG_ANY 67 67 * A SHARED request may also be granted in DEFERRED, or a DEFERRED request may ··· 80 80 81 81 #define LM_FLAG_TRY 0x0001 82 82 #define LM_FLAG_TRY_1CB 0x0002 83 - #define LM_FLAG_NOEXP 0x0004 83 + #define LM_FLAG_RECOVER 0x0004 84 84 #define LM_FLAG_ANY 0x0008 85 85 #define LM_FLAG_NODE_SCOPE 0x0020 86 86 #define GL_ASYNC 0x0040 ··· 136 136 void (*lm_first_done) (struct gfs2_sbd *sdp); 137 137 void (*lm_recovery_result) (struct gfs2_sbd *sdp, unsigned int jid, 138 138 unsigned int result); 139 - void (*lm_unmount) (struct gfs2_sbd *sdp); 139 + void (*lm_unmount) (struct gfs2_sbd *sdp, bool clean); 140 140 void (*lm_withdraw) (struct gfs2_sbd *sdp); 141 141 void (*lm_put_lock) (struct gfs2_glock *gl); 142 142 int (*lm_lock) (struct gfs2_glock *gl, unsigned int req_state, ··· 263 263 void gfs2_cancel_delete_work(struct gfs2_glock *gl); 264 264 void gfs2_flush_delete_work(struct gfs2_sbd *sdp); 265 265 void gfs2_gl_hash_clear(struct gfs2_sbd *sdp); 266 - void gfs2_gl_dq_holders(struct gfs2_sbd *sdp); 266 + void gfs2_withdraw_glocks(struct gfs2_sbd *sdp); 267 267 void gfs2_glock_thaw(struct gfs2_sbd *sdp); 268 268 void gfs2_glock_free(struct gfs2_glock *gl); 269 269 void gfs2_glock_free_later(struct gfs2_glock *gl);

+10 -88

fs/gfs2/glops.c

··· 30 30 31 31 struct workqueue_struct *gfs2_freeze_wq; 32 32 33 - extern struct workqueue_struct *gfs2_control_wq; 34 - 35 33 static void gfs2_ail_error(struct gfs2_glock *gl, const struct buffer_head *bh) 36 34 { 37 35 struct gfs2_sbd *sdp = gl->gl_name.ln_sbd; ··· 43 45 gl->gl_name.ln_type, gl->gl_name.ln_number, 44 46 gfs2_glock2aspace(gl)); 45 47 gfs2_lm(sdp, "AIL error\n"); 46 - gfs2_withdraw_delayed(sdp); 48 + gfs2_withdraw(sdp); 47 49 } 48 50 49 51 /** ··· 81 83 GLOCK_BUG_ON(gl, !fsync && atomic_read(&gl->gl_ail_count)); 82 84 spin_unlock(&sdp->sd_ail_lock); 83 85 gfs2_log_unlock(sdp); 84 - 85 - if (gfs2_withdrawing(sdp)) 86 - gfs2_withdraw(sdp); 87 86 } 88 87 89 88 ··· 173 178 174 179 filemap_fdatawrite_range(metamapping, start, end); 175 180 error = filemap_fdatawait_range(metamapping, start, end); 176 - WARN_ON_ONCE(error && !gfs2_withdrawing_or_withdrawn(sdp)); 181 + WARN_ON_ONCE(error && !gfs2_withdrawn(sdp)); 177 182 mapping_set_error(metamapping, error); 178 183 if (error) 179 184 gfs2_io_error(sdp); ··· 232 237 end = PAGE_ALIGN((rgd->rd_addr + rgd->rd_length) * bsize) - 1; 233 238 gfs2_rgrp_brelse(rgd); 234 239 WARN_ON_ONCE(!(flags & DIO_METADATA)); 240 + gfs2_assert_withdraw(sdp, !atomic_read(&gl->gl_ail_count)); 235 241 truncate_inode_pages_range(mapping, start, end); 236 242 } 237 243 ··· 358 362 static void inode_go_inval(struct gfs2_glock *gl, int flags) 359 363 { 360 364 struct gfs2_inode *ip = gfs2_glock2inode(gl); 365 + 366 + gfs2_assert_withdraw(gl->gl_name.ln_sbd, !atomic_read(&gl->gl_ail_count)); 361 367 362 368 if (flags & DIO_METADATA) { 363 369 struct address_space *mapping = gfs2_glock2aspace(gl); ··· 606 608 j_gl->gl_ops->go_inval(j_gl, DIO_METADATA); 607 609 608 610 error = gfs2_find_jhead(sdp->sd_jdesc, &head); 609 - if (gfs2_assert_withdraw_delayed(sdp, !error)) 611 + if (gfs2_assert_withdraw(sdp, !error)) 610 612 return error; 611 - if (gfs2_assert_withdraw_delayed(sdp, head.lh_flags & 612 - GFS2_LOG_HEAD_UNMOUNT)) 613 + if (gfs2_assert_withdraw(sdp, head.lh_flags & 614 + GFS2_LOG_HEAD_UNMOUNT)) 613 615 return -EIO; 614 616 gfs2_log_pointers_init(sdp, &head); 615 617 } ··· 628 630 struct gfs2_inode *ip = gl->gl_object; 629 631 struct gfs2_sbd *sdp = gl->gl_name.ln_sbd; 630 632 631 - if (!remote || sb_rdonly(sdp->sd_vfs) || 632 - test_bit(SDF_KILL, &sdp->sd_flags)) 633 + if (!remote || test_bit(SDF_KILL, &sdp->sd_flags)) 633 634 return; 634 635 635 636 if (gl->gl_demote_state == LM_ST_UNLOCKED && ··· 639 642 } 640 643 } 641 644 642 - /** 643 - * inode_go_unlocked - wake up anyone waiting for dlm's unlock ast 644 - * @gl: glock being unlocked 645 - * 646 - * For now, this is only used for the journal inode glock. In withdraw 647 - * situations, we need to wait for the glock to be unlocked so that we know 648 - * other nodes may proceed with recovery / journal replay. 649 - */ 650 - static void inode_go_unlocked(struct gfs2_glock *gl) 651 - { 652 - /* Note that we cannot reference gl_object because it's already set 653 - * to NULL by this point in its lifecycle. */ 654 - if (!test_bit(GLF_UNLOCKED, &gl->gl_flags)) 655 - return; 656 - clear_bit_unlock(GLF_UNLOCKED, &gl->gl_flags); 657 - wake_up_bit(&gl->gl_flags, GLF_UNLOCKED); 658 - } 659 - 660 - /** 661 - * nondisk_go_callback - used to signal when a node did a withdraw 662 - * @gl: the nondisk glock 663 - * @remote: true if this came from a different cluster node 664 - * 665 - */ 666 - static void nondisk_go_callback(struct gfs2_glock *gl, bool remote) 667 - { 668 - struct gfs2_sbd *sdp = gl->gl_name.ln_sbd; 669 - 670 - /* Ignore the callback unless it's from another node, and it's the 671 - live lock. */ 672 - if (!remote || gl->gl_name.ln_number != GFS2_LIVE_LOCK) 673 - return; 674 - 675 - /* First order of business is to cancel the demote request. We don't 676 - * really want to demote a nondisk glock. At best it's just to inform 677 - * us of another node's withdraw. We'll keep it in SH mode. */ 678 - clear_bit(GLF_DEMOTE, &gl->gl_flags); 679 - clear_bit(GLF_PENDING_DEMOTE, &gl->gl_flags); 680 - 681 - /* Ignore the unlock if we're withdrawn, unmounting, or in recovery. */ 682 - if (test_bit(SDF_NORECOVERY, &sdp->sd_flags) || 683 - test_bit(SDF_WITHDRAWN, &sdp->sd_flags) || 684 - test_bit(SDF_REMOTE_WITHDRAW, &sdp->sd_flags)) 685 - return; 686 - 687 - /* We only care when a node wants us to unlock, because that means 688 - * they want a journal recovered. */ 689 - if (gl->gl_demote_state != LM_ST_UNLOCKED) 690 - return; 691 - 692 - if (sdp->sd_args.ar_spectator) { 693 - fs_warn(sdp, "Spectator node cannot recover journals.\n"); 694 - return; 695 - } 696 - 697 - fs_warn(sdp, "Some node has withdrawn; checking for recovery.\n"); 698 - set_bit(SDF_REMOTE_WITHDRAW, &sdp->sd_flags); 699 - /* 700 - * We can't call remote_withdraw directly here or gfs2_recover_journal 701 - * because this is called from the glock unlock function and the 702 - * remote_withdraw needs to enqueue and dequeue the same "live" glock 703 - * we were called from. So we queue it to the control work queue in 704 - * lock_dlm. 705 - */ 706 - queue_delayed_work(gfs2_control_wq, &sdp->sd_control_work, 0); 707 - } 708 - 709 645 const struct gfs2_glock_operations gfs2_meta_glops = { 710 646 .go_type = LM_TYPE_META, 711 - .go_flags = GLOF_NONDISK, 712 647 }; 713 648 714 649 const struct gfs2_glock_operations gfs2_inode_glops = { ··· 651 722 .go_dump = inode_go_dump, 652 723 .go_type = LM_TYPE_INODE, 653 724 .go_flags = GLOF_ASPACE | GLOF_LVB, 654 - .go_unlocked = inode_go_unlocked, 655 725 }; 656 726 657 727 const struct gfs2_glock_operations gfs2_rgrp_glops = { ··· 666 738 .go_xmote_bh = freeze_go_xmote_bh, 667 739 .go_callback = freeze_go_callback, 668 740 .go_type = LM_TYPE_NONDISK, 669 - .go_flags = GLOF_NONDISK, 670 741 }; 671 742 672 743 const struct gfs2_glock_operations gfs2_iopen_glops = { 673 744 .go_type = LM_TYPE_IOPEN, 674 745 .go_callback = iopen_go_callback, 675 746 .go_dump = inode_go_dump, 676 - .go_flags = GLOF_NONDISK, 677 747 .go_subclass = 1, 678 748 }; 679 749 680 750 const struct gfs2_glock_operations gfs2_flock_glops = { 681 751 .go_type = LM_TYPE_FLOCK, 682 - .go_flags = GLOF_NONDISK, 683 752 }; 684 753 685 754 const struct gfs2_glock_operations gfs2_nondisk_glops = { 686 755 .go_type = LM_TYPE_NONDISK, 687 - .go_flags = GLOF_NONDISK, 688 - .go_callback = nondisk_go_callback, 689 756 }; 690 757 691 758 const struct gfs2_glock_operations gfs2_quota_glops = { 692 759 .go_type = LM_TYPE_QUOTA, 693 - .go_flags = GLOF_LVB | GLOF_NONDISK, 760 + .go_flags = GLOF_LVB, 694 761 }; 695 762 696 763 const struct gfs2_glock_operations gfs2_journal_glops = { 697 764 .go_type = LM_TYPE_JOURNAL, 698 - .go_flags = GLOF_NONDISK, 699 765 }; 700 766 701 767 const struct gfs2_glock_operations *gfs2_glops_list[] = {

+6 -16

fs/gfs2/incore.h

··· 223 223 void (*go_dump)(struct seq_file *seq, const struct gfs2_glock *gl, 224 224 const char *fs_id_buf); 225 225 void (*go_callback)(struct gfs2_glock *gl, bool remote); 226 - void (*go_unlocked)(struct gfs2_glock *gl); 227 226 const int go_subclass; 228 227 const int go_type; 229 228 const unsigned long go_flags; 230 229 #define GLOF_ASPACE 1 /* address space attached */ 231 230 #define GLOF_LVB 2 /* Lock Value Block attached */ 232 - #define GLOF_NONDISK 8 /* not I/O related */ 233 231 }; 234 232 235 233 enum { ··· 324 326 GLF_LRU = 13, 325 327 GLF_OBJECT = 14, /* Used only for tracing */ 326 328 GLF_BLOCKING = 15, 327 - GLF_UNLOCKED = 16, /* Wait for glock to be unlocked */ 328 329 GLF_TRY_TO_EVICT = 17, /* iopen glocks only */ 329 330 GLF_VERIFY_DELETE = 18, /* iopen glocks only */ 330 331 GLF_PENDING_REPLY = 19, ··· 517 520 518 521 struct list_head jd_revoke_list; 519 522 unsigned int jd_replay_tail; 520 - 521 - u64 jd_no_addr; 522 523 }; 523 524 524 525 struct gfs2_statfs_change_host { ··· 537 542 538 543 #define GFS2_ERRORS_DEFAULT GFS2_ERRORS_WITHDRAW 539 544 #define GFS2_ERRORS_WITHDRAW 0 540 - #define GFS2_ERRORS_CONTINUE 1 /* place holder for future feature */ 541 - #define GFS2_ERRORS_RO 2 /* place holder for future feature */ 545 + #define GFS2_ERRORS_DEACTIVATE 1 542 546 #define GFS2_ERRORS_PANIC 3 543 547 544 548 struct gfs2_args { ··· 553 559 unsigned int ar_data:2; /* ordered/writeback */ 554 560 unsigned int ar_meta:1; /* mount metafs */ 555 561 unsigned int ar_discard:1; /* discard requests */ 556 - unsigned int ar_errors:2; /* errors=withdraw | panic */ 562 + unsigned int ar_errors:2; /* errors=withdraw | deactivate | panic */ 557 563 unsigned int ar_nobarrier:1; /* do not send barriers */ 558 564 unsigned int ar_rgrplvb:1; /* use lvbs for rgrp info */ 559 565 unsigned int ar_got_rgrplvb:1; /* Was the rgrplvb opt given? */ ··· 579 585 unsigned int gt_complain_secs; 580 586 unsigned int gt_statfs_quantum; 581 587 unsigned int gt_statfs_slow; 588 + unsigned int gt_withdraw_helper_timeout; 582 589 }; 583 590 584 591 enum { ··· 594 599 SDF_SKIP_DLM_UNLOCK = 8, 595 600 SDF_FORCE_AIL_FLUSH = 9, 596 601 SDF_FREEZE_INITIATOR = 10, 597 - SDF_WITHDRAWING = 11, /* Will withdraw eventually */ 598 - SDF_WITHDRAW_IN_PROG = 12, /* Withdraw is in progress */ 599 - SDF_REMOTE_WITHDRAW = 13, /* Performing remote recovery */ 600 - SDF_WITHDRAW_RECOVERY = 14, /* Wait for journal recovery when we are 601 - withdrawing */ 602 602 SDF_KILL = 15, 603 603 SDF_EVICTING = 16, 604 604 SDF_FROZEN = 17, ··· 706 716 struct gfs2_glock *sd_rename_gl; 707 717 struct gfs2_glock *sd_freeze_gl; 708 718 struct work_struct sd_freeze_work; 719 + struct work_struct sd_withdraw_work; 709 720 wait_queue_head_t sd_kill_wait; 710 721 wait_queue_head_t sd_async_glock_wait; 711 722 atomic_t sd_glock_disposal; 712 723 struct completion sd_locking_init; 713 - struct completion sd_wdack; 724 + struct completion sd_withdraw_helper; 725 + int sd_withdraw_helper_status; 714 726 struct delayed_work sd_control_work; 715 727 716 728 /* Inode Stuff */ ··· 753 761 struct gfs2_jdesc *sd_jdesc; 754 762 struct gfs2_holder sd_journal_gh; 755 763 struct gfs2_holder sd_jinode_gh; 756 - struct gfs2_glock *sd_jinode_gl; 757 764 758 765 struct gfs2_holder sd_sc_gh; 759 766 struct buffer_head *sd_sc_bh; ··· 837 846 838 847 unsigned long sd_last_warning; 839 848 struct dentry *debugfs_dir; /* debugfs directory */ 840 - unsigned long sd_glock_dqs_held; 841 849 }; 842 850 843 851 #define GFS2_BAD_INO 1

+15

fs/gfs2/inode.c

··· 89 89 return 0; 90 90 } 91 91 92 + void gfs2_setup_inode(struct inode *inode) 93 + { 94 + gfp_t gfp_mask; 95 + 96 + /* 97 + * Ensure all page cache allocations are done from GFP_NOFS context to 98 + * prevent direct reclaim recursion back into the filesystem and blowing 99 + * stacks or deadlocking. 100 + */ 101 + gfp_mask = mapping_gfp_mask(inode->i_mapping); 102 + mapping_set_gfp_mask(inode->i_mapping, gfp_mask & ~__GFP_FS); 103 + } 104 + 92 105 /** 93 106 * gfs2_inode_lookup - Lookup an inode 94 107 * @sb: The super block ··· 145 132 struct gfs2_glock *io_gl; 146 133 int extra_flags = 0; 147 134 135 + gfs2_setup_inode(inode); 148 136 error = gfs2_glock_get(sdp, no_addr, &gfs2_inode_glops, CREATE, 149 137 &ip->i_gl); 150 138 if (unlikely(error)) ··· 766 752 error = -ENOMEM; 767 753 if (!inode) 768 754 goto fail_gunlock; 755 + gfs2_setup_inode(inode); 769 756 ip = GFS2_I(inode); 770 757 771 758 error = posix_acl_create(dir, &mode, &default_acl, &acl);

+1

fs/gfs2/inode.h

··· 86 86 return -EIO; 87 87 } 88 88 89 + void gfs2_setup_inode(struct inode *inode); 89 90 struct inode *gfs2_inode_lookup(struct super_block *sb, unsigned type, 90 91 u64 no_addr, u64 no_formal_ino, 91 92 unsigned int blktype);

+16 -41

fs/gfs2/lock_dlm.c

··· 15 15 #include <linux/sched/signal.h> 16 16 17 17 #include "incore.h" 18 - #include "glock.h" 19 - #include "glops.h" 20 - #include "recovery.h" 21 18 #include "util.h" 22 19 #include "sys.h" 23 20 #include "trace_gfs2.h" ··· 136 139 137 140 switch (gl->gl_lksb.sb_status) { 138 141 case -DLM_EUNLOCK: /* Unlocked, so glock can be freed */ 139 - if (gl->gl_ops->go_unlocked) 140 - gl->gl_ops->go_unlocked(gl); 141 142 gfs2_glock_free(gl); 142 143 return; 143 144 case -DLM_ECANCEL: /* Cancel while getting lock */ ··· 394 399 /* 395 400 * dlm/gfs2 recovery coordination using dlm_recover callbacks 396 401 * 397 - * 0. gfs2 checks for another cluster node withdraw, needing journal replay 398 402 * 1. dlm_controld sees lockspace members change 399 403 * 2. dlm_controld blocks dlm-kernel locking activity 400 404 * 3. dlm_controld within dlm-kernel notifies gfs2 (recover_prep) ··· 651 657 &ls->ls_control_lksb, "control_lock"); 652 658 } 653 659 654 - /** 655 - * remote_withdraw - react to a node withdrawing from the file system 656 - * @sdp: The superblock 657 - */ 658 - static void remote_withdraw(struct gfs2_sbd *sdp) 659 - { 660 - struct gfs2_jdesc *jd; 661 - int ret = 0, count = 0; 662 - 663 - list_for_each_entry(jd, &sdp->sd_jindex_list, jd_list) { 664 - if (jd->jd_jid == sdp->sd_lockstruct.ls_jid) 665 - continue; 666 - ret = gfs2_recover_journal(jd, true); 667 - if (ret) 668 - break; 669 - count++; 670 - } 671 - 672 - /* Now drop the additional reference we acquired */ 673 - fs_err(sdp, "Journals checked: %d, ret = %d.\n", count, ret); 674 - } 675 - 676 660 static void gfs2_control_func(struct work_struct *work) 677 661 { 678 662 struct gfs2_sbd *sdp = container_of(work, struct gfs2_sbd, sd_control_work.work); ··· 660 688 int write_lvb = 0; 661 689 int recover_size; 662 690 int i, error; 663 - 664 - /* First check for other nodes that may have done a withdraw. */ 665 - if (test_bit(SDF_REMOTE_WITHDRAW, &sdp->sd_flags)) { 666 - remote_withdraw(sdp); 667 - clear_bit(SDF_REMOTE_WITHDRAW, &sdp->sd_flags); 668 - return; 669 - } 670 691 671 692 spin_lock(&ls->ls_recover_spin); 672 693 /* ··· 1160 1195 struct gfs2_sbd *sdp = arg; 1161 1196 struct lm_lockstruct *ls = &sdp->sd_lockstruct; 1162 1197 1163 - if (gfs2_withdrawing_or_withdrawn(sdp)) { 1198 + if (gfs2_withdrawn(sdp)) { 1164 1199 fs_err(sdp, "recover_prep ignored due to withdraw.\n"); 1165 1200 return; 1166 1201 } ··· 1186 1221 struct lm_lockstruct *ls = &sdp->sd_lockstruct; 1187 1222 int jid = slot->slot - 1; 1188 1223 1189 - if (gfs2_withdrawing_or_withdrawn(sdp)) { 1224 + if (gfs2_withdrawn(sdp)) { 1190 1225 fs_err(sdp, "recover_slot jid %d ignored due to withdraw.\n", 1191 1226 jid); 1192 1227 return; ··· 1215 1250 struct gfs2_sbd *sdp = arg; 1216 1251 struct lm_lockstruct *ls = &sdp->sd_lockstruct; 1217 1252 1218 - if (gfs2_withdrawing_or_withdrawn(sdp)) { 1253 + if (gfs2_withdrawn(sdp)) { 1219 1254 fs_err(sdp, "recover_done ignored due to withdraw.\n"); 1220 1255 return; 1221 1256 } ··· 1246 1281 { 1247 1282 struct lm_lockstruct *ls = &sdp->sd_lockstruct; 1248 1283 1249 - if (gfs2_withdrawing_or_withdrawn(sdp)) { 1284 + if (gfs2_withdrawn(sdp)) { 1250 1285 fs_err(sdp, "recovery_result jid %d ignored due to withdraw.\n", 1251 1286 jid); 1252 1287 return; ··· 1403 1438 fs_err(sdp, "mount first_done error %d\n", error); 1404 1439 } 1405 1440 1406 - static void gdlm_unmount(struct gfs2_sbd *sdp) 1441 + /* 1442 + * gdlm_unmount - release our lockspace 1443 + * @sdp: the superblock 1444 + * @clean: Indicates whether or not the remaining nodes in the cluster should 1445 + * perform recovery. Recovery is necessary when a node withdraws and 1446 + * its journal remains dirty. Recovery isn't necessary when a node 1447 + * cleanly unmounts a filesystem. 1448 + */ 1449 + static void gdlm_unmount(struct gfs2_sbd *sdp, bool clean) 1407 1450 { 1408 1451 struct lm_lockstruct *ls = &sdp->sd_lockstruct; 1409 1452 ··· 1429 1456 release: 1430 1457 down_write(&ls->ls_sem); 1431 1458 if (ls->ls_dlm) { 1432 - dlm_release_lockspace(ls->ls_dlm, DLM_RELEASE_NORMAL); 1459 + dlm_release_lockspace(ls->ls_dlm, 1460 + clean ? DLM_RELEASE_NORMAL : 1461 + DLM_RELEASE_RECOVER); 1433 1462 ls->ls_dlm = NULL; 1434 1463 } 1435 1464 up_write(&ls->ls_sem);

+16 -43

fs/gfs2/log.c

··· 112 112 &tr->tr_ail2_list); 113 113 continue; 114 114 } 115 - if (!cmpxchg(&sdp->sd_log_error, 0, -EIO)) { 115 + if (!cmpxchg(&sdp->sd_log_error, 0, -EIO)) 116 116 gfs2_io_error_bh(sdp, bh); 117 - gfs2_withdraw_delayed(sdp); 118 - } 119 117 } 120 118 121 - if (gfs2_withdrawing_or_withdrawn(sdp)) { 119 + if (gfs2_withdrawn(sdp)) { 122 120 gfs2_remove_from_ail(bd); 123 121 continue; 124 122 } ··· 322 324 continue; 323 325 } 324 326 if (!buffer_uptodate(bh) && 325 - !cmpxchg(&sdp->sd_log_error, 0, -EIO)) { 327 + !cmpxchg(&sdp->sd_log_error, 0, -EIO)) 326 328 gfs2_io_error_bh(sdp, bh); 327 - gfs2_withdraw_delayed(sdp); 328 - } 329 329 /* 330 330 * If we have space for revokes and the bd is no longer on any 331 331 * buf list, we can just add a revoke for it immediately and ··· 803 807 gfs2_log_lock(sdp); 804 808 gfs2_ail1_empty(sdp, max_revokes); 805 809 gfs2_log_unlock(sdp); 806 - 807 - if (gfs2_withdrawing(sdp)) 808 - gfs2_withdraw(sdp); 809 810 } 810 811 811 812 /** ··· 830 837 struct super_block *sb = sdp->sd_vfs; 831 838 u64 dblock; 832 839 833 - if (gfs2_withdrawing_or_withdrawn(sdp)) 840 + if (gfs2_withdrawn(sdp)) 834 841 return; 835 842 836 843 page = mempool_alloc(gfs2_page_pool, GFP_NOIO); ··· 977 984 gfs2_ail1_wait(sdp); 978 985 empty = gfs2_ail1_empty(sdp, 0); 979 986 980 - if (gfs2_withdrawing_or_withdrawn(sdp)) 987 + if (gfs2_withdrawn(sdp)) 981 988 break; 982 989 } 983 - 984 - if (gfs2_withdrawing(sdp)) 985 - gfs2_withdraw(sdp); 986 990 } 987 991 988 992 /** ··· 1040 1050 * Do this check while holding the log_flush_lock to prevent new 1041 1051 * buffers from being added to the ail via gfs2_pin() 1042 1052 */ 1043 - if (gfs2_withdrawing_or_withdrawn(sdp) || 1053 + if (gfs2_withdrawn(sdp) || 1044 1054 !test_bit(SDF_JOURNAL_LIVE, &sdp->sd_flags)) 1045 1055 goto out; 1046 1056 ··· 1061 1071 sdp->sd_log_tr = NULL; 1062 1072 tr->tr_first = first_log_head; 1063 1073 if (unlikely(frozen)) { 1064 - if (gfs2_assert_withdraw_delayed(sdp, 1074 + if (gfs2_assert_withdraw(sdp, 1065 1075 !tr->tr_num_buf_new && !tr->tr_num_databuf_new)) 1066 1076 goto out_withdraw; 1067 1077 } ··· 1086 1096 clear_bit(SDF_JOURNAL_LIVE, &sdp->sd_flags); 1087 1097 1088 1098 if (unlikely(frozen)) 1089 - if (gfs2_assert_withdraw_delayed(sdp, !reserved_revokes)) 1099 + if (gfs2_assert_withdraw(sdp, !reserved_revokes)) 1090 1100 goto out_withdraw; 1091 1101 1092 1102 gfs2_ordered_write(sdp); 1093 - if (gfs2_withdrawing_or_withdrawn(sdp)) 1103 + if (gfs2_withdrawn(sdp)) 1094 1104 goto out_withdraw; 1095 1105 lops_before_commit(sdp, tr); 1096 - if (gfs2_withdrawing_or_withdrawn(sdp)) 1106 + if (gfs2_withdrawn(sdp)) 1097 1107 goto out_withdraw; 1098 1108 if (sdp->sd_jdesc) 1099 1109 gfs2_log_submit_bio(&sdp->sd_jdesc->jd_log_bio, REQ_OP_WRITE); 1100 - if (gfs2_withdrawing_or_withdrawn(sdp)) 1110 + if (gfs2_withdrawn(sdp)) 1101 1111 goto out_withdraw; 1102 1112 1103 1113 if (sdp->sd_log_head != sdp->sd_log_flush_head) { ··· 1105 1115 } else if (sdp->sd_log_tail != sdp->sd_log_flush_tail && !sdp->sd_log_idle) { 1106 1116 log_write_header(sdp, flags); 1107 1117 } 1108 - if (gfs2_withdrawing_or_withdrawn(sdp)) 1118 + if (gfs2_withdrawn(sdp)) 1109 1119 goto out_withdraw; 1110 1120 lops_after_commit(sdp, tr); 1111 1121 ··· 1123 1133 if (!(flags & GFS2_LOG_HEAD_FLUSH_NORMAL)) { 1124 1134 if (!sdp->sd_log_idle) { 1125 1135 empty_ail1_list(sdp); 1126 - if (gfs2_withdrawing_or_withdrawn(sdp)) 1136 + if (gfs2_withdrawn(sdp)) 1127 1137 goto out_withdraw; 1128 1138 log_write_header(sdp, flags); 1129 1139 } ··· 1141 1151 reserved_blocks += (reserved_revokes - sdp->sd_ldptrs) / sdp->sd_inptrs; 1142 1152 out: 1143 1153 if (used_blocks != reserved_blocks) { 1144 - gfs2_assert_withdraw_delayed(sdp, used_blocks < reserved_blocks); 1154 + gfs2_assert_withdraw(sdp, used_blocks < reserved_blocks); 1145 1155 gfs2_log_release(sdp, reserved_blocks - used_blocks); 1146 1156 } 1147 1157 up_write(&sdp->sd_log_flush_lock); 1148 1158 gfs2_trans_free(sdp, tr); 1149 - if (gfs2_withdrawing(sdp)) 1150 - gfs2_withdraw(sdp); 1151 1159 trace_gfs2_log_flush(sdp, 0, flags); 1152 1160 return; 1153 1161 ··· 1292 1304 1293 1305 set_freezable(); 1294 1306 while (!kthread_should_stop()) { 1295 - if (gfs2_withdrawing_or_withdrawn(sdp)) 1307 + if (gfs2_withdrawn(sdp)) 1296 1308 break; 1297 - 1298 - /* Check for errors writing to the journal */ 1299 - if (sdp->sd_log_error) { 1300 - gfs2_lm(sdp, 1301 - "GFS2: fsid=%s: error %d: " 1302 - "withdrawing the file system to " 1303 - "prevent further damage.\n", 1304 - sdp->sd_fsname, sdp->sd_log_error); 1305 - gfs2_withdraw(sdp); 1306 - break; 1307 - } 1308 1309 1309 1310 if (gfs2_jrnl_flush_reqd(sdp) || t == 0) { 1310 1311 gfs2_ail1_empty(sdp, 0); ··· 1317 1340 test_bit(SDF_FORCE_AIL_FLUSH, &sdp->sd_flags) || 1318 1341 gfs2_ail_flush_reqd(sdp) || 1319 1342 gfs2_jrnl_flush_reqd(sdp) || 1320 - sdp->sd_log_error || 1321 - gfs2_withdrawing_or_withdrawn(sdp) || 1343 + gfs2_withdrawn(sdp) || 1322 1344 kthread_should_stop(), 1323 1345 t); 1324 1346 } 1325 - 1326 - if (gfs2_withdrawing(sdp)) 1327 - gfs2_withdraw(sdp); 1328 1347 1329 1348 return 0; 1330 1349 }

+4 -8

fs/gfs2/lops.c

··· 49 49 if (test_set_buffer_pinned(bh)) 50 50 gfs2_assert_withdraw(sdp, 0); 51 51 if (!buffer_uptodate(bh)) 52 - gfs2_io_error_bh_wd(sdp, bh); 52 + gfs2_io_error_bh(sdp, bh); 53 53 bd = bh->b_private; 54 54 /* If this buffer is in the AIL and it has already been written 55 55 * to in-place disk block, remove it from the AIL. ··· 209 209 if (!cmpxchg(&sdp->sd_log_error, 0, err)) 210 210 fs_err(sdp, "Error %d writing to journal, jid=%u\n", 211 211 err, sdp->sd_jdesc->jd_jid); 212 - gfs2_withdraw_delayed(sdp); 213 - /* prevent more writes to the journal */ 214 - clear_bit(SDF_JOURNAL_LIVE, &sdp->sd_flags); 215 - wake_up(&sdp->sd_logd_waitq); 212 + gfs2_withdraw(sdp); 216 213 } 217 214 218 215 bio_for_each_segment_all(bvec, bio, iter_all) { ··· 484 487 new = bio_alloc(prev->bi_bdev, nr_iovecs, prev->bi_opf, GFP_NOIO); 485 488 bio_clone_blkg_association(new, prev); 486 489 new->bi_iter.bi_sector = bio_end_sector(prev); 487 - bio_chain(new, prev); 490 + bio_chain(prev, new); 488 491 submit_bio(prev); 489 492 return new; 490 493 } ··· 559 562 bio = gfs2_log_alloc_bio(sdp, dblock, gfs2_end_log_read); 560 563 bio->bi_opf = REQ_OP_READ; 561 564 add_block_to_new_bio: 562 - if (!bio_add_folio(bio, folio, bsize, off)) 563 - BUG(); 565 + bio_add_folio_nofail(bio, folio, bsize, off); 564 566 block_added: 565 567 off += bsize; 566 568 if (off == folio_size(folio))

+5 -8

fs/gfs2/meta_io.c

··· 263 263 struct buffer_head *bh, *bhs[2]; 264 264 int num = 0; 265 265 266 - if (gfs2_withdrawing_or_withdrawn(sdp) && 267 - !gfs2_withdraw_in_prog(sdp)) { 266 + if (gfs2_withdrawn(sdp)) { 268 267 *bhp = NULL; 269 268 return -EIO; 270 269 } ··· 302 303 if (unlikely(!buffer_uptodate(bh))) { 303 304 struct gfs2_trans *tr = current->journal_info; 304 305 if (tr && test_bit(TR_TOUCHED, &tr->tr_flags)) 305 - gfs2_io_error_bh_wd(sdp, bh); 306 + gfs2_io_error_bh(sdp, bh); 306 307 brelse(bh); 307 308 *bhp = NULL; 308 309 return -EIO; ··· 321 322 322 323 int gfs2_meta_wait(struct gfs2_sbd *sdp, struct buffer_head *bh) 323 324 { 324 - if (gfs2_withdrawing_or_withdrawn(sdp) && 325 - !gfs2_withdraw_in_prog(sdp)) 325 + if (gfs2_withdrawn(sdp)) 326 326 return -EIO; 327 327 328 328 wait_on_buffer(bh); ··· 329 331 if (!buffer_uptodate(bh)) { 330 332 struct gfs2_trans *tr = current->journal_info; 331 333 if (tr && test_bit(TR_TOUCHED, &tr->tr_flags)) 332 - gfs2_io_error_bh_wd(sdp, bh); 334 + gfs2_io_error_bh(sdp, bh); 333 335 return -EIO; 334 336 } 335 - if (gfs2_withdrawing_or_withdrawn(sdp) && 336 - !gfs2_withdraw_in_prog(sdp)) 337 + if (gfs2_withdrawn(sdp)) 337 338 return -EIO; 338 339 339 340 return 0;

+17 -20

fs/gfs2/ops_fstype.c

··· 60 60 gt->gt_new_files_jdata = 0; 61 61 gt->gt_max_readahead = BIT(18); 62 62 gt->gt_complain_secs = 10; 63 + gt->gt_withdraw_helper_timeout = 5; 63 64 } 64 65 65 66 void free_sbd(struct gfs2_sbd *sdp) ··· 93 92 init_waitqueue_head(&sdp->sd_async_glock_wait); 94 93 atomic_set(&sdp->sd_glock_disposal, 0); 95 94 init_completion(&sdp->sd_locking_init); 96 - init_completion(&sdp->sd_wdack); 95 + init_completion(&sdp->sd_withdraw_helper); 97 96 spin_lock_init(&sdp->sd_statfs_spin); 98 97 99 98 spin_lock_init(&sdp->sd_rindex_spin); ··· 371 370 error = gfs2_glock_nq_num(sdp, 372 371 GFS2_MOUNT_LOCK, &gfs2_nondisk_glops, 373 372 LM_ST_EXCLUSIVE, 374 - LM_FLAG_NOEXP | GL_NOCACHE | GL_NOPID, 373 + LM_FLAG_RECOVER | GL_NOCACHE | GL_NOPID, 375 374 mount_gh); 376 375 if (error) { 377 376 fs_err(sdp, "can't acquire mount glock: %d\n", error); ··· 381 380 error = gfs2_glock_nq_num(sdp, 382 381 GFS2_LIVE_LOCK, &gfs2_nondisk_glops, 383 382 LM_ST_SHARED, 384 - LM_FLAG_NOEXP | GL_EXACT | GL_NOPID, 383 + LM_FLAG_RECOVER | GL_EXACT | GL_NOPID, 385 384 &sdp->sd_live_gh); 386 385 if (error) { 387 386 fs_err(sdp, "can't acquire live glock: %d\n", error); ··· 543 542 mutex_lock(&sdp->sd_jindex_mutex); 544 543 545 544 for (;;) { 546 - struct gfs2_inode *jip; 547 - 548 545 error = gfs2_glock_nq_init(dip->i_gl, LM_ST_SHARED, 0, ji_gh); 549 546 if (error) 550 547 break; ··· 583 584 d_mark_dontcache(jd->jd_inode); 584 585 spin_lock(&sdp->sd_jindex_spin); 585 586 jd->jd_jid = sdp->sd_journals++; 586 - jip = GFS2_I(jd->jd_inode); 587 - jd->jd_no_addr = jip->i_no_addr; 588 587 list_add_tail(&jd->jd_list, &sdp->sd_jindex_list); 589 588 spin_unlock(&sdp->sd_jindex_spin); 590 589 } ··· 742 745 error = gfs2_glock_nq_num(sdp, sdp->sd_lockstruct.ls_jid, 743 746 &gfs2_journal_glops, 744 747 LM_ST_EXCLUSIVE, 745 - LM_FLAG_NOEXP | GL_NOCACHE | GL_NOPID, 748 + LM_FLAG_RECOVER | GL_NOPID, 746 749 &sdp->sd_journal_gh); 747 750 if (error) { 748 751 fs_err(sdp, "can't acquire journal glock: %d\n", error); ··· 750 753 } 751 754 752 755 ip = GFS2_I(sdp->sd_jdesc->jd_inode); 753 - sdp->sd_jinode_gl = ip->i_gl; 754 756 error = gfs2_glock_nq_init(ip->i_gl, LM_ST_SHARED, 755 - LM_FLAG_NOEXP | GL_EXACT | 757 + LM_FLAG_RECOVER | GL_EXACT | 756 758 GL_NOCACHE | GL_NOPID, 757 759 &sdp->sd_jinode_gh); 758 760 if (error) { ··· 817 821 fail_statfs: 818 822 uninit_statfs(sdp); 819 823 fail_jinode_gh: 820 - /* A withdraw may have done dq/uninit so now we need to check it */ 821 - if (!sdp->sd_args.ar_spectator && 822 - gfs2_holder_initialized(&sdp->sd_jinode_gh)) 824 + if (!sdp->sd_args.ar_spectator) 823 825 gfs2_glock_dq_uninit(&sdp->sd_jinode_gh); 824 826 fail_journal_gh: 825 - if (!sdp->sd_args.ar_spectator && 826 - gfs2_holder_initialized(&sdp->sd_journal_gh)) 827 + if (!sdp->sd_args.ar_spectator) 827 828 gfs2_glock_dq_uninit(&sdp->sd_journal_gh); 828 829 fail_jindex: 829 830 gfs2_jindex_free(sdp); ··· 1033 1040 void gfs2_lm_unmount(struct gfs2_sbd *sdp) 1034 1041 { 1035 1042 const struct lm_lockops *lm = sdp->sd_lockstruct.ls_ops; 1036 - if (!gfs2_withdrawing_or_withdrawn(sdp) && lm->lm_unmount) 1037 - lm->lm_unmount(sdp); 1043 + if (!gfs2_withdrawn(sdp) && lm->lm_unmount) 1044 + lm->lm_unmount(sdp, true); 1038 1045 } 1039 1046 1040 1047 static int wait_on_journal(struct gfs2_sbd *sdp) ··· 1176 1183 1177 1184 mapping = gfs2_aspace(sdp); 1178 1185 mapping->a_ops = &gfs2_rgrp_aops; 1179 - mapping_set_gfp_mask(mapping, GFP_NOFS); 1186 + gfs2_setup_inode(sdp->sd_inode); 1180 1187 1181 1188 error = init_names(sdp, silent); 1182 1189 if (error) ··· 1207 1214 error = gfs2_lm_mount(sdp, silent); 1208 1215 if (error) 1209 1216 goto fail_debug; 1217 + 1218 + INIT_WORK(&sdp->sd_withdraw_work, gfs2_withdraw_func); 1210 1219 1211 1220 error = init_locking(sdp, &mount_gh, DO); 1212 1221 if (error) ··· 1396 1401 }; 1397 1402 1398 1403 enum opt_errors { 1399 - Opt_errors_withdraw = GFS2_ERRORS_WITHDRAW, 1400 - Opt_errors_panic = GFS2_ERRORS_PANIC, 1404 + Opt_errors_withdraw = GFS2_ERRORS_WITHDRAW, 1405 + Opt_errors_deactivate = GFS2_ERRORS_DEACTIVATE, 1406 + Opt_errors_panic = GFS2_ERRORS_PANIC, 1401 1407 }; 1402 1408 1403 1409 static const struct constant_table gfs2_param_errors[] = { 1404 1410 {"withdraw", Opt_errors_withdraw }, 1411 + {"deactivate", Opt_errors_deactivate }, 1405 1412 {"panic", Opt_errors_panic }, 1406 1413 {} 1407 1414 };

+31 -35

fs/gfs2/quota.c

··· 125 125 hlist_bl_del_rcu(&qd->qd_hlist); 126 126 spin_unlock_bucket(qd->qd_hash); 127 127 128 - if (!gfs2_withdrawing_or_withdrawn(sdp)) { 128 + if (!gfs2_withdrawn(sdp)) { 129 129 gfs2_assert_warn(sdp, !qd->qd_change); 130 130 gfs2_assert_warn(sdp, !qd->qd_slot_ref); 131 131 gfs2_assert_warn(sdp, !qd->qd_bh_count); ··· 1551 1551 { 1552 1552 if (error == 0 || error == -EROFS) 1553 1553 return; 1554 - if (!gfs2_withdrawing_or_withdrawn(sdp)) { 1554 + if (!gfs2_withdrawn(sdp)) { 1555 1555 if (!cmpxchg(&sdp->sd_log_error, 0, error)) 1556 1556 fs_err(sdp, "gfs2_quotad: %s error %d\n", msg, error); 1557 1557 wake_up(&sdp->sd_logd_waitq); 1558 - } 1559 - } 1560 - 1561 - static void quotad_check_timeo(struct gfs2_sbd *sdp, const char *msg, 1562 - int (*fxn)(struct super_block *sb, int type), 1563 - unsigned long t, unsigned long *timeo, 1564 - unsigned int *new_timeo) 1565 - { 1566 - if (t >= *timeo) { 1567 - int error = fxn(sdp->sd_vfs, 0); 1568 - quotad_error(sdp, msg, error); 1569 - *timeo = gfs2_tune_get_i(&sdp->sd_tune, new_timeo) * HZ; 1570 - } else { 1571 - *timeo -= t; 1572 1558 } 1573 1559 } 1574 1560 ··· 1575 1589 int gfs2_quotad(void *data) 1576 1590 { 1577 1591 struct gfs2_sbd *sdp = data; 1578 - struct gfs2_tune *tune = &sdp->sd_tune; 1579 - unsigned long statfs_timeo = 0; 1580 - unsigned long quotad_timeo = 0; 1581 - unsigned long t = 0; 1592 + unsigned long now = jiffies; 1593 + unsigned long statfs_deadline = now; 1594 + unsigned long quotad_deadline = now; 1582 1595 1583 1596 set_freezable(); 1584 1597 while (!kthread_should_stop()) { 1585 - if (gfs2_withdrawing_or_withdrawn(sdp)) 1598 + unsigned long t; 1599 + 1600 + if (gfs2_withdrawn(sdp)) 1586 1601 break; 1587 1602 1588 - /* Update the master statfs file */ 1589 - if (sdp->sd_statfs_force_sync) { 1590 - int error = gfs2_statfs_sync(sdp->sd_vfs, 0); 1603 + now = jiffies; 1604 + if (sdp->sd_statfs_force_sync || 1605 + time_after(now, statfs_deadline)) { 1606 + unsigned int quantum; 1607 + int error; 1608 + 1609 + /* Update the master statfs file */ 1610 + error = gfs2_statfs_sync(sdp->sd_vfs, 0); 1591 1611 quotad_error(sdp, "statfs", error); 1592 - statfs_timeo = gfs2_tune_get(sdp, gt_statfs_quantum) * HZ; 1612 + 1613 + quantum = gfs2_tune_get(sdp, gt_statfs_quantum); 1614 + statfs_deadline = now + quantum * HZ; 1593 1615 } 1594 - else 1595 - quotad_check_timeo(sdp, "statfs", gfs2_statfs_sync, t, 1596 - &statfs_timeo, 1597 - &tune->gt_statfs_quantum); 1616 + if (time_after(now, quotad_deadline)) { 1617 + unsigned int quantum; 1618 + int error; 1598 1619 1599 - /* Update quota file */ 1600 - quotad_check_timeo(sdp, "sync", gfs2_quota_sync, t, 1601 - &quotad_timeo, &tune->gt_quota_quantum); 1620 + /* Update the quota file */ 1621 + error = gfs2_quota_sync(sdp->sd_vfs, 0); 1622 + quotad_error(sdp, "sync", error); 1602 1623 1603 - t = min(quotad_timeo, statfs_timeo); 1624 + quantum = gfs2_tune_get(sdp, gt_quota_quantum); 1625 + quotad_deadline = now + quantum * HZ; 1626 + } 1604 1627 1605 - t = wait_event_freezable_timeout(sdp->sd_quota_wait, 1628 + t = min(statfs_deadline - now, quotad_deadline - now); 1629 + wait_event_freezable_timeout(sdp->sd_quota_wait, 1606 1630 sdp->sd_statfs_force_sync || 1607 - gfs2_withdrawing_or_withdrawn(sdp) || 1631 + gfs2_withdrawn(sdp) || 1608 1632 kthread_should_stop(), 1609 1633 t); 1610 1634

+5 -3

fs/gfs2/recovery.c

··· 408 408 int error = 0; 409 409 int jlocked = 0; 410 410 411 - if (gfs2_withdrawing_or_withdrawn(sdp)) { 411 + if (gfs2_withdrawn(sdp)) { 412 412 fs_err(sdp, "jid=%u: Recovery not attempted due to withdraw.\n", 413 413 jd->jd_jid); 414 414 goto fail; ··· 424 424 425 425 error = gfs2_glock_nq_num(sdp, jd->jd_jid, &gfs2_journal_glops, 426 426 LM_ST_EXCLUSIVE, 427 - LM_FLAG_NOEXP | LM_FLAG_TRY | GL_NOCACHE, 427 + LM_FLAG_RECOVER | LM_FLAG_TRY | 428 + GL_NOCACHE, 428 429 &j_gh); 429 430 switch (error) { 430 431 case 0: ··· 441 440 } 442 441 443 442 error = gfs2_glock_nq_init(ip->i_gl, LM_ST_SHARED, 444 - LM_FLAG_NOEXP | GL_NOCACHE, &ji_gh); 443 + LM_FLAG_RECOVER | GL_NOCACHE, 444 + &ji_gh); 445 445 if (error) 446 446 goto fail_gunlock_j; 447 447 } else {

+11 -24

fs/gfs2/super.c

··· 137 137 int error; 138 138 139 139 j_gl->gl_ops->go_inval(j_gl, DIO_METADATA); 140 - if (gfs2_withdrawing_or_withdrawn(sdp)) 140 + if (gfs2_withdrawn(sdp)) 141 141 return -EIO; 142 142 143 143 if (sdp->sd_log_sequence == 0) { ··· 147 147 } 148 148 149 149 error = gfs2_quota_init(sdp); 150 - if (!error && gfs2_withdrawing_or_withdrawn(sdp)) 150 + if (!error && gfs2_withdrawn(sdp)) 151 151 error = -EIO; 152 152 if (!error) 153 153 set_bit(SDF_JOURNAL_LIVE, &sdp->sd_flags); ··· 351 351 gfs2_freeze_unlock(sdp); 352 352 353 353 error = gfs2_glock_nq_init(sdp->sd_freeze_gl, LM_ST_EXCLUSIVE, 354 - LM_FLAG_NOEXP | GL_NOPID, 354 + LM_FLAG_RECOVER | GL_NOPID, 355 355 &sdp->sd_freeze_gh); 356 356 if (error) 357 357 goto relock_shared; ··· 491 491 if (unlikely(!ip->i_gl)) 492 492 return; 493 493 494 - if (gfs2_withdrawing_or_withdrawn(sdp)) 494 + if (gfs2_withdrawn(sdp)) 495 495 return; 496 496 if (!gfs2_glock_is_locked_by_me(ip->i_gl)) { 497 497 ret = gfs2_glock_nq_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &gh); ··· 597 597 if (!sb_rdonly(sb)) 598 598 gfs2_make_fs_ro(sdp); 599 599 else { 600 - if (gfs2_withdrawing_or_withdrawn(sdp)) 600 + if (gfs2_withdrawn(sdp)) 601 601 gfs2_destroy_threads(sdp); 602 602 603 603 gfs2_quota_cleanup(sdp); 604 604 } 605 605 606 - WARN_ON(gfs2_withdrawing(sdp)); 606 + flush_work(&sdp->sd_withdraw_work); 607 607 608 608 /* At this point, we're through modifying the disk */ 609 609 ··· 749 749 break; 750 750 } 751 751 752 - error = gfs2_do_thaw(sdp, who, freeze_owner); 753 - if (error) 754 - goto out; 752 + (void)gfs2_do_thaw(sdp, who, freeze_owner); 755 753 756 754 if (error == -EBUSY) 757 755 fs_err(sdp, "waiting for recovery before freeze\n"); ··· 776 778 if (test_bit(SDF_JOURNAL_LIVE, &sdp->sd_flags)) { 777 779 gfs2_log_flush(sdp, NULL, GFS2_LOG_HEAD_FLUSH_FREEZE | 778 780 GFS2_LFC_FREEZE_GO_SYNC); 779 - if (gfs2_withdrawing_or_withdrawn(sdp)) 781 + if (gfs2_withdrawn(sdp)) 780 782 return -EIO; 781 783 } 782 784 return 0; ··· 815 817 mutex_unlock(&sdp->sd_freeze_mutex); 816 818 deactivate_super(sb); 817 819 return error; 818 - } 819 - 820 - void gfs2_thaw_freeze_initiator(struct super_block *sb) 821 - { 822 - struct gfs2_sbd *sdp = sb->s_fs_info; 823 - 824 - mutex_lock(&sdp->sd_freeze_mutex); 825 - if (!test_bit(SDF_FREEZE_INITIATOR, &sdp->sd_flags)) 826 - goto out; 827 - 828 - gfs2_freeze_unlock(sdp); 829 - 830 - out: 831 - mutex_unlock(&sdp->sd_freeze_mutex); 832 820 } 833 821 834 822 /** ··· 1130 1146 switch (args->ar_errors) { 1131 1147 case GFS2_ERRORS_WITHDRAW: 1132 1148 state = "withdraw"; 1149 + break; 1150 + case GFS2_ERRORS_DEACTIVATE: 1151 + state = "deactivate"; 1133 1152 break; 1134 1153 case GFS2_ERRORS_PANIC: 1135 1154 state = "panic";

-1

fs/gfs2/super.h

··· 47 47 void update_statfs(struct gfs2_sbd *sdp, struct buffer_head *m_bh); 48 48 int gfs2_statfs_sync(struct super_block *sb, int type); 49 49 void gfs2_freeze_func(struct work_struct *work); 50 - void gfs2_thaw_freeze_initiator(struct super_block *sb); 51 50 52 51 void free_local_statfs_inodes(struct gfs2_sbd *sdp); 53 52 struct inode *find_local_statfs_inode(struct gfs2_sbd *sdp,

+26 -38

fs/gfs2/sys.c

··· 59 59 60 60 static ssize_t id_show(struct gfs2_sbd *sdp, char *buf) 61 61 { 62 - return snprintf(buf, PAGE_SIZE, "%u:%u\n", 62 + return sysfs_emit(buf, "%u:%u\n", 63 63 MAJOR(sdp->sd_vfs->s_dev), MINOR(sdp->sd_vfs->s_dev)); 64 64 } 65 65 ··· 68 68 unsigned long f = sdp->sd_flags; 69 69 ssize_t s; 70 70 71 - s = snprintf(buf, PAGE_SIZE, 71 + s = sysfs_emit(buf, 72 72 "Journal Checked: %d\n" 73 73 "Journal Live: %d\n" 74 74 "Journal ID: %d\n" ··· 84 84 "Force AIL Flush: %d\n" 85 85 "FS Freeze Initiator: %d\n" 86 86 "FS Frozen: %d\n" 87 - "Withdrawing: %d\n" 88 - "Withdraw In Prog: %d\n" 89 - "Remote Withdraw: %d\n" 90 - "Withdraw Recovery: %d\n" 91 87 "Killing: %d\n" 92 88 "sd_log_error: %d\n" 93 89 "sd_log_flush_lock: %d\n" ··· 113 117 test_bit(SDF_FORCE_AIL_FLUSH, &f), 114 118 test_bit(SDF_FREEZE_INITIATOR, &f), 115 119 test_bit(SDF_FROZEN, &f), 116 - test_bit(SDF_WITHDRAWING, &f), 117 - test_bit(SDF_WITHDRAW_IN_PROG, &f), 118 - test_bit(SDF_REMOTE_WITHDRAW, &f), 119 - test_bit(SDF_WITHDRAW_RECOVERY, &f), 120 120 test_bit(SDF_KILL, &f), 121 121 sdp->sd_log_error, 122 122 rwsem_is_locked(&sdp->sd_log_flush_lock), ··· 132 140 133 141 static ssize_t fsname_show(struct gfs2_sbd *sdp, char *buf) 134 142 { 135 - return snprintf(buf, PAGE_SIZE, "%s\n", sdp->sd_fsname); 143 + return sysfs_emit(buf, "%s\n", sdp->sd_fsname); 136 144 } 137 145 138 146 static ssize_t uuid_show(struct gfs2_sbd *sdp, char *buf) ··· 142 150 buf[0] = '\0'; 143 151 if (uuid_is_null(&s->s_uuid)) 144 152 return 0; 145 - return snprintf(buf, PAGE_SIZE, "%pUB\n", &s->s_uuid); 153 + return sysfs_emit(buf, "%pUB\n", &s->s_uuid); 146 154 } 147 155 148 156 static ssize_t freeze_show(struct gfs2_sbd *sdp, char *buf) ··· 150 158 struct super_block *sb = sdp->sd_vfs; 151 159 int frozen = (sb->s_writers.frozen == SB_UNFROZEN) ? 0 : 1; 152 160 153 - return snprintf(buf, PAGE_SIZE, "%d\n", frozen); 161 + return sysfs_emit(buf, "%d\n", frozen); 154 162 } 155 163 156 164 static ssize_t freeze_store(struct gfs2_sbd *sdp, const char *buf, size_t len) ··· 185 193 186 194 static ssize_t withdraw_show(struct gfs2_sbd *sdp, char *buf) 187 195 { 188 - unsigned int b = gfs2_withdrawing_or_withdrawn(sdp); 189 - return snprintf(buf, PAGE_SIZE, "%u\n", b); 196 + unsigned int b = gfs2_withdrawn(sdp); 197 + return sysfs_emit(buf, "%u\n", b); 190 198 } 191 199 192 200 static ssize_t withdraw_store(struct gfs2_sbd *sdp, const char *buf, size_t len) ··· 389 397 static ssize_t proto_name_show(struct gfs2_sbd *sdp, char *buf) 390 398 { 391 399 const struct lm_lockops *ops = sdp->sd_lockstruct.ls_ops; 392 - return sprintf(buf, "%s\n", ops->lm_proto_name); 400 + return sysfs_emit(buf, "%s\n", ops->lm_proto_name); 393 401 } 394 402 395 403 static ssize_t block_show(struct gfs2_sbd *sdp, char *buf) ··· 400 408 401 409 if (test_bit(DFL_BLOCK_LOCKS, &ls->ls_recover_flags)) 402 410 val = 1; 403 - ret = sprintf(buf, "%d\n", val); 411 + ret = sysfs_emit(buf, "%d\n", val); 404 412 return ret; 405 413 } 406 414 ··· 425 433 return len; 426 434 } 427 435 428 - static ssize_t wdack_show(struct gfs2_sbd *sdp, char *buf) 429 - { 430 - int val = completion_done(&sdp->sd_wdack) ? 1 : 0; 431 - 432 - return sprintf(buf, "%d\n", val); 433 - } 434 - 435 - static ssize_t wdack_store(struct gfs2_sbd *sdp, const char *buf, size_t len) 436 + static ssize_t withdraw_helper_status_store(struct gfs2_sbd *sdp, 437 + const char *buf, 438 + size_t len) 436 439 { 437 440 int ret, val; 438 441 439 442 ret = kstrtoint(buf, 0, &val); 440 443 if (ret) 441 444 return ret; 442 - 443 - if ((val == 1) && 444 - !strcmp(sdp->sd_lockstruct.ls_ops->lm_proto_name, "lock_dlm")) 445 - complete(&sdp->sd_wdack); 446 - else 445 + if (val < 0 || val > 1) 447 446 return -EINVAL; 447 + 448 + sdp->sd_withdraw_helper_status = val; 449 + complete(&sdp->sd_withdraw_helper); 448 450 return len; 449 451 } 450 452 451 453 static ssize_t lkfirst_show(struct gfs2_sbd *sdp, char *buf) 452 454 { 453 455 struct lm_lockstruct *ls = &sdp->sd_lockstruct; 454 - return sprintf(buf, "%d\n", ls->ls_first); 456 + return sysfs_emit(buf, "%d\n", ls->ls_first); 455 457 } 456 458 457 459 static ssize_t lkfirst_store(struct gfs2_sbd *sdp, const char *buf, size_t len) ··· 478 492 static ssize_t first_done_show(struct gfs2_sbd *sdp, char *buf) 479 493 { 480 494 struct lm_lockstruct *ls = &sdp->sd_lockstruct; 481 - return sprintf(buf, "%d\n", !!test_bit(DFL_FIRST_MOUNT_DONE, &ls->ls_recover_flags)); 495 + return sysfs_emit(buf, "%d\n", !!test_bit(DFL_FIRST_MOUNT_DONE, &ls->ls_recover_flags)); 482 496 } 483 497 484 498 int gfs2_recover_set(struct gfs2_sbd *sdp, unsigned jid) ··· 536 550 static ssize_t recover_done_show(struct gfs2_sbd *sdp, char *buf) 537 551 { 538 552 struct lm_lockstruct *ls = &sdp->sd_lockstruct; 539 - return sprintf(buf, "%d\n", ls->ls_recover_jid_done); 553 + return sysfs_emit(buf, "%d\n", ls->ls_recover_jid_done); 540 554 } 541 555 542 556 static ssize_t recover_status_show(struct gfs2_sbd *sdp, char *buf) 543 557 { 544 558 struct lm_lockstruct *ls = &sdp->sd_lockstruct; 545 - return sprintf(buf, "%d\n", ls->ls_recover_jid_status); 559 + return sysfs_emit(buf, "%d\n", ls->ls_recover_jid_status); 546 560 } 547 561 548 562 static ssize_t jid_show(struct gfs2_sbd *sdp, char *buf) 549 563 { 550 - return sprintf(buf, "%d\n", sdp->sd_lockstruct.ls_jid); 564 + return sysfs_emit(buf, "%d\n", sdp->sd_lockstruct.ls_jid); 551 565 } 552 566 553 567 static ssize_t jid_store(struct gfs2_sbd *sdp, const char *buf, size_t len) ··· 585 599 586 600 GDLM_ATTR(proto_name, 0444, proto_name_show, NULL); 587 601 GDLM_ATTR(block, 0644, block_show, block_store); 588 - GDLM_ATTR(withdraw, 0644, wdack_show, wdack_store); 602 + GDLM_ATTR(withdraw, 0200, NULL, withdraw_helper_status_store); 589 603 GDLM_ATTR(jid, 0644, jid_show, jid_store); 590 604 GDLM_ATTR(first, 0644, lkfirst_show, lkfirst_store); 591 605 GDLM_ATTR(first_done, 0444, first_done_show, NULL); ··· 612 626 613 627 static ssize_t quota_scale_show(struct gfs2_sbd *sdp, char *buf) 614 628 { 615 - return snprintf(buf, PAGE_SIZE, "%u %u\n", 629 + return sysfs_emit(buf, "%u %u\n", 616 630 sdp->sd_tune.gt_quota_scale_num, 617 631 sdp->sd_tune.gt_quota_scale_den); 618 632 } ··· 665 679 #define TUNE_ATTR_2(name, store) \ 666 680 static ssize_t name##_show(struct gfs2_sbd *sdp, char *buf) \ 667 681 { \ 668 - return snprintf(buf, PAGE_SIZE, "%u\n", sdp->sd_tune.gt_##name); \ 682 + return sysfs_emit(buf, "%u\n", sdp->sd_tune.gt_##name); \ 669 683 } \ 670 684 TUNE_ATTR_3(name, name##_show, store) 671 685 ··· 684 698 TUNE_ATTR(new_files_jdata, 0); 685 699 TUNE_ATTR(statfs_quantum, 1); 686 700 TUNE_ATTR_3(quota_scale, quota_scale_show, quota_scale_store); 701 + TUNE_ATTR(withdraw_helper_timeout, 1); 687 702 688 703 static struct attribute *tune_attrs[] = { 689 704 &tune_attr_quota_warn_period.attr, ··· 695 708 &tune_attr_statfs_quantum.attr, 696 709 &tune_attr_quota_scale.attr, 697 710 &tune_attr_new_files_jdata.attr, 711 + &tune_attr_withdraw_helper_timeout.attr, 698 712 NULL, 699 713 }; 700 714

-1

fs/gfs2/trace_gfs2.h

··· 59 59 {(1UL << GLF_LRU), "L" }, \ 60 60 {(1UL << GLF_OBJECT), "o" }, \ 61 61 {(1UL << GLF_BLOCKING), "b" }, \ 62 - {(1UL << GLF_UNLOCKED), "x" }, \ 63 62 {(1UL << GLF_INSTANTIATE_NEEDED), "n" }, \ 64 63 {(1UL << GLF_INSTANTIATE_IN_PROG), "N" }, \ 65 64 {(1UL << GLF_TRY_TO_EVICT), "e" }, \

+17 -15

fs/gfs2/trans.c

··· 49 49 } 50 50 BUG_ON(blocks == 0 && revokes == 0); 51 51 52 - if (!test_bit(SDF_JOURNAL_LIVE, &sdp->sd_flags)) 52 + if (gfs2_withdrawn(sdp)) 53 53 return -EROFS; 54 54 55 55 tr->tr_ip = ip; ··· 85 85 */ 86 86 87 87 down_read(&sdp->sd_log_flush_lock); 88 + if (unlikely(!test_bit(SDF_JOURNAL_LIVE, &sdp->sd_flags))) 89 + goto out_not_live; 88 90 if (gfs2_log_try_reserve(sdp, tr, &extra_revokes)) 89 91 goto reserved; 92 + 90 93 up_read(&sdp->sd_log_flush_lock); 91 94 gfs2_log_reserve(sdp, tr, &extra_revokes); 92 95 down_read(&sdp->sd_log_flush_lock); 96 + if (unlikely(!test_bit(SDF_JOURNAL_LIVE, &sdp->sd_flags))) { 97 + revokes = tr->tr_revokes + extra_revokes; 98 + gfs2_log_release_revokes(sdp, revokes); 99 + gfs2_log_release(sdp, tr->tr_reserved); 100 + goto out_not_live; 101 + } 93 102 94 103 reserved: 95 104 gfs2_log_release_revokes(sdp, extra_revokes); 96 - if (unlikely(!test_bit(SDF_JOURNAL_LIVE, &sdp->sd_flags))) { 97 - gfs2_log_release_revokes(sdp, tr->tr_revokes); 98 - up_read(&sdp->sd_log_flush_lock); 99 - gfs2_log_release(sdp, tr->tr_reserved); 100 - sb_end_intwrite(sdp->sd_vfs); 101 - return -EROFS; 102 - } 103 - 104 105 current->journal_info = tr; 105 - 106 106 return 0; 107 + 108 + out_not_live: 109 + up_read(&sdp->sd_log_flush_lock); 110 + sb_end_intwrite(sdp->sd_vfs); 111 + return -EROFS; 107 112 } 108 113 109 114 int gfs2_trans_begin(struct gfs2_sbd *sdp, unsigned int blocks, ··· 260 255 struct gfs2_bufdata *bd; 261 256 struct gfs2_meta_header *mh; 262 257 struct gfs2_trans *tr = current->journal_info; 263 - bool withdraw = false; 264 258 265 259 lock_buffer(bh); 266 260 if (buffer_pinned(bh)) { ··· 293 289 (unsigned long long)bd->bd_bh->b_blocknr); 294 290 BUG(); 295 291 } 296 - if (gfs2_withdrawing_or_withdrawn(sdp)) { 292 + if (gfs2_withdrawn(sdp)) { 297 293 fs_info(sdp, "GFS2:adding buf while withdrawn! 0x%llx\n", 298 294 (unsigned long long)bd->bd_bh->b_blocknr); 299 295 goto out_unlock; 300 296 } 301 297 if (unlikely(sb->s_writers.frozen == SB_FREEZE_COMPLETE)) { 302 298 fs_info(sdp, "GFS2:adding buf while frozen\n"); 303 - withdraw = true; 299 + gfs2_withdraw(sdp); 304 300 goto out_unlock; 305 301 } 306 302 gfs2_pin(sdp, bd->bd_bh); ··· 310 306 tr->tr_num_buf_new++; 311 307 out_unlock: 312 308 gfs2_log_unlock(sdp); 313 - if (withdraw) 314 - gfs2_assert_withdraw(sdp, 0); 315 309 out: 316 310 unlock_buffer(bh); 317 311 }

+112 -216

fs/gfs2/util.c

··· 58 58 struct gfs2_inode *ip; 59 59 60 60 ip = GFS2_I(jd->jd_inode); 61 - error = gfs2_glock_nq_init(ip->i_gl, LM_ST_SHARED, LM_FLAG_NOEXP | 61 + error = gfs2_glock_nq_init(ip->i_gl, LM_ST_SHARED, LM_FLAG_RECOVER | 62 62 GL_EXACT | GL_NOCACHE, &j_gh); 63 63 if (error) { 64 64 if (verbose) ··· 99 99 */ 100 100 int gfs2_freeze_lock_shared(struct gfs2_sbd *sdp) 101 101 { 102 - int flags = LM_FLAG_NOEXP | GL_EXACT; 102 + int flags = LM_FLAG_RECOVER | GL_EXACT; 103 103 int error; 104 104 105 105 error = gfs2_glock_nq_init(sdp->sd_freeze_gl, LM_ST_SHARED, flags, ··· 115 115 gfs2_glock_dq_uninit(&sdp->sd_freeze_gh); 116 116 } 117 117 118 - static void signal_our_withdraw(struct gfs2_sbd *sdp) 118 + static void do_withdraw(struct gfs2_sbd *sdp) 119 119 { 120 - struct gfs2_glock *live_gl = sdp->sd_live_gh.gh_gl; 121 - struct inode *inode; 122 - struct gfs2_inode *ip; 123 - struct gfs2_glock *i_gl; 124 - u64 no_formal_ino; 125 - int ret = 0; 126 - int tries; 127 - 128 - if (test_bit(SDF_NORECOVERY, &sdp->sd_flags) || !sdp->sd_jdesc) 120 + down_write(&sdp->sd_log_flush_lock); 121 + if (!test_bit(SDF_JOURNAL_LIVE, &sdp->sd_flags)) { 122 + up_write(&sdp->sd_log_flush_lock); 129 123 return; 124 + } 125 + clear_bit(SDF_JOURNAL_LIVE, &sdp->sd_flags); 126 + up_write(&sdp->sd_log_flush_lock); 130 127 131 128 gfs2_ail_drain(sdp); /* frees all transactions */ 132 - inode = sdp->sd_jdesc->jd_inode; 133 - ip = GFS2_I(inode); 134 - i_gl = ip->i_gl; 135 - no_formal_ino = ip->i_no_formal_ino; 136 129 137 - /* Prevent any glock dq until withdraw recovery is complete */ 138 - set_bit(SDF_WITHDRAW_RECOVERY, &sdp->sd_flags); 139 - /* 140 - * Don't tell dlm we're bailing until we have no more buffers in the 141 - * wind. If journal had an IO error, the log code should just purge 142 - * the outstanding buffers rather than submitting new IO. Making the 143 - * file system read-only will flush the journal, etc. 144 - * 145 - * During a normal unmount, gfs2_make_fs_ro calls gfs2_log_shutdown 146 - * which clears SDF_JOURNAL_LIVE. In a withdraw, we must not write 147 - * any UNMOUNT log header, so we can't call gfs2_log_shutdown, and 148 - * therefore we need to clear SDF_JOURNAL_LIVE manually. 149 - */ 150 - clear_bit(SDF_JOURNAL_LIVE, &sdp->sd_flags); 151 - if (!sb_rdonly(sdp->sd_vfs)) { 152 - bool locked = mutex_trylock(&sdp->sd_freeze_mutex); 130 + wake_up(&sdp->sd_logd_waitq); 131 + wake_up(&sdp->sd_quota_wait); 153 132 154 - wake_up(&sdp->sd_logd_waitq); 155 - wake_up(&sdp->sd_quota_wait); 133 + wait_event_timeout(sdp->sd_log_waitq, 134 + gfs2_log_is_empty(sdp), 135 + HZ * 5); 156 136 157 - wait_event_timeout(sdp->sd_log_waitq, 158 - gfs2_log_is_empty(sdp), 159 - HZ * 5); 160 - 161 - sdp->sd_vfs->s_flags |= SB_RDONLY; 162 - 163 - if (locked) 164 - mutex_unlock(&sdp->sd_freeze_mutex); 165 - 166 - /* 167 - * Dequeue any pending non-system glock holders that can no 168 - * longer be granted because the file system is withdrawn. 169 - */ 170 - gfs2_gl_dq_holders(sdp); 171 - } 172 - 173 - if (sdp->sd_lockstruct.ls_ops->lm_lock == NULL) { /* lock_nolock */ 174 - if (!ret) 175 - ret = -EIO; 176 - clear_bit(SDF_WITHDRAW_RECOVERY, &sdp->sd_flags); 177 - goto skip_recovery; 178 - } 179 - /* 180 - * Drop the glock for our journal so another node can recover it. 181 - */ 182 - if (gfs2_holder_initialized(&sdp->sd_journal_gh)) { 183 - gfs2_glock_dq_wait(&sdp->sd_journal_gh); 184 - gfs2_holder_uninit(&sdp->sd_journal_gh); 185 - } 186 - sdp->sd_jinode_gh.gh_flags |= GL_NOCACHE; 187 - gfs2_glock_dq(&sdp->sd_jinode_gh); 188 - gfs2_thaw_freeze_initiator(sdp->sd_vfs); 189 - wait_on_bit(&i_gl->gl_flags, GLF_DEMOTE, TASK_UNINTERRUPTIBLE); 137 + sdp->sd_vfs->s_flags |= SB_RDONLY; 190 138 191 139 /* 192 - * holder_uninit to force glock_put, to force dlm to let go 140 + * Dequeue any pending non-system glock holders that can no 141 + * longer be granted because the file system is withdrawn. 193 142 */ 194 - gfs2_holder_uninit(&sdp->sd_jinode_gh); 195 - 196 - /* 197 - * Note: We need to be careful here: 198 - * Our iput of jd_inode will evict it. The evict will dequeue its 199 - * glock, but the glock dq will wait for the withdraw unless we have 200 - * exception code in glock_dq. 201 - */ 202 - iput(inode); 203 - sdp->sd_jdesc->jd_inode = NULL; 204 - /* 205 - * Wait until the journal inode's glock is freed. This allows try locks 206 - * on other nodes to be successful, otherwise we remain the owner of 207 - * the glock as far as dlm is concerned. 208 - */ 209 - if (i_gl->gl_ops->go_unlocked) { 210 - set_bit(GLF_UNLOCKED, &i_gl->gl_flags); 211 - wait_on_bit(&i_gl->gl_flags, GLF_UNLOCKED, TASK_UNINTERRUPTIBLE); 212 - } 213 - 214 - /* 215 - * Dequeue the "live" glock, but keep a reference so it's never freed. 216 - */ 217 - gfs2_glock_hold(live_gl); 218 - gfs2_glock_dq_wait(&sdp->sd_live_gh); 219 - /* 220 - * We enqueue the "live" glock in EX so that all other nodes 221 - * get a demote request and act on it. We don't really want the 222 - * lock in EX, so we send a "try" lock with 1CB to produce a callback. 223 - */ 224 - fs_warn(sdp, "Requesting recovery of jid %d.\n", 225 - sdp->sd_lockstruct.ls_jid); 226 - gfs2_holder_reinit(LM_ST_EXCLUSIVE, 227 - LM_FLAG_TRY_1CB | LM_FLAG_NOEXP | GL_NOPID, 228 - &sdp->sd_live_gh); 229 - msleep(GL_GLOCK_MAX_HOLD); 230 - /* 231 - * This will likely fail in a cluster, but succeed standalone: 232 - */ 233 - ret = gfs2_glock_nq(&sdp->sd_live_gh); 234 - 235 - gfs2_glock_put(live_gl); /* drop extra reference we acquired */ 236 - clear_bit(SDF_WITHDRAW_RECOVERY, &sdp->sd_flags); 237 - 238 - /* 239 - * If we actually got the "live" lock in EX mode, there are no other 240 - * nodes available to replay our journal. 241 - */ 242 - if (ret == 0) { 243 - fs_warn(sdp, "No other mounters found.\n"); 244 - /* 245 - * We are about to release the lockspace. By keeping live_gl 246 - * locked here, we ensure that the next mounter coming along 247 - * will be a "first" mounter which will perform recovery. 248 - */ 249 - goto skip_recovery; 250 - } 251 - 252 - /* 253 - * At this point our journal is evicted, so we need to get a new inode 254 - * for it. Once done, we need to call gfs2_find_jhead which 255 - * calls gfs2_map_journal_extents to map it for us again. 256 - * 257 - * Note that we don't really want it to look up a FREE block. The 258 - * GFS2_BLKST_FREE simply overrides a block check in gfs2_inode_lookup 259 - * which would otherwise fail because it requires grabbing an rgrp 260 - * glock, which would fail with -EIO because we're withdrawing. 261 - */ 262 - inode = gfs2_inode_lookup(sdp->sd_vfs, DT_UNKNOWN, 263 - sdp->sd_jdesc->jd_no_addr, no_formal_ino, 264 - GFS2_BLKST_FREE); 265 - if (IS_ERR(inode)) { 266 - fs_warn(sdp, "Reprocessing of jid %d failed with %ld.\n", 267 - sdp->sd_lockstruct.ls_jid, PTR_ERR(inode)); 268 - goto skip_recovery; 269 - } 270 - sdp->sd_jdesc->jd_inode = inode; 271 - d_mark_dontcache(inode); 272 - 273 - /* 274 - * Now wait until recovery is complete. 275 - */ 276 - for (tries = 0; tries < 10; tries++) { 277 - ret = check_journal_clean(sdp, sdp->sd_jdesc, false); 278 - if (!ret) 279 - break; 280 - msleep(HZ); 281 - fs_warn(sdp, "Waiting for journal recovery jid %d.\n", 282 - sdp->sd_lockstruct.ls_jid); 283 - } 284 - skip_recovery: 285 - if (!ret) 286 - fs_warn(sdp, "Journal recovery complete for jid %d.\n", 287 - sdp->sd_lockstruct.ls_jid); 288 - else 289 - fs_warn(sdp, "Journal recovery skipped for jid %d until next " 290 - "mount.\n", sdp->sd_lockstruct.ls_jid); 291 - fs_warn(sdp, "Glock dequeues delayed: %lu\n", sdp->sd_glock_dqs_held); 292 - sdp->sd_glock_dqs_held = 0; 293 - wake_up_bit(&sdp->sd_flags, SDF_WITHDRAW_RECOVERY); 143 + gfs2_withdraw_glocks(sdp); 294 144 } 295 145 296 146 void gfs2_lm(struct gfs2_sbd *sdp, const char *fmt, ...) ··· 159 309 va_end(args); 160 310 } 161 311 162 - void gfs2_withdraw(struct gfs2_sbd *sdp) 312 + /** 313 + * gfs2_offline_uevent - run gfs2_withdraw_helper 314 + * @sdp: The GFS2 superblock 315 + */ 316 + static bool gfs2_offline_uevent(struct gfs2_sbd *sdp) 163 317 { 164 318 struct lm_lockstruct *ls = &sdp->sd_lockstruct; 319 + long timeout; 320 + 321 + /* Skip protocol "lock_nolock" which doesn't require shared storage. */ 322 + if (!ls->ls_ops->lm_lock) 323 + return false; 324 + 325 + /* 326 + * The gfs2_withdraw_helper replies by writing one of the following 327 + * status codes to "/sys$DEVPATH/lock_module/withdraw": 328 + * 329 + * 0 - The shared block device has been marked inactive. Future write 330 + * operations will fail. 331 + * 332 + * 1 - The shared block device may still be active and carry out 333 + * write operations. 334 + * 335 + * If the "offline" uevent isn't reacted upon in time, the event 336 + * handler is assumed to have failed. 337 + */ 338 + 339 + sdp->sd_withdraw_helper_status = -1; 340 + kobject_uevent(&sdp->sd_kobj, KOBJ_OFFLINE); 341 + timeout = gfs2_tune_get(sdp, gt_withdraw_helper_timeout) * HZ; 342 + wait_for_completion_timeout(&sdp->sd_withdraw_helper, timeout); 343 + if (sdp->sd_withdraw_helper_status == -1) { 344 + fs_err(sdp, "%s timed out\n", "gfs2_withdraw_helper"); 345 + } else { 346 + fs_err(sdp, "%s %s with status %d\n", 347 + "gfs2_withdraw_helper", 348 + sdp->sd_withdraw_helper_status == 0 ? 349 + "succeeded" : "failed", 350 + sdp->sd_withdraw_helper_status); 351 + } 352 + return sdp->sd_withdraw_helper_status == 0; 353 + } 354 + 355 + void gfs2_withdraw_func(struct work_struct *work) 356 + { 357 + struct gfs2_sbd *sdp = container_of(work, struct gfs2_sbd, sd_withdraw_work); 358 + struct lm_lockstruct *ls = &sdp->sd_lockstruct; 165 359 const struct lm_lockops *lm = ls->ls_ops; 360 + bool device_inactive; 166 361 167 - if (sdp->sd_args.ar_errors == GFS2_ERRORS_WITHDRAW) { 168 - unsigned long old = READ_ONCE(sdp->sd_flags), new; 362 + if (test_bit(SDF_KILL, &sdp->sd_flags)) 363 + return; 169 364 170 - do { 171 - if (old & BIT(SDF_WITHDRAWN)) { 172 - wait_on_bit(&sdp->sd_flags, 173 - SDF_WITHDRAW_IN_PROG, 174 - TASK_UNINTERRUPTIBLE); 175 - return; 176 - } 177 - new = old | BIT(SDF_WITHDRAWN) | BIT(SDF_WITHDRAW_IN_PROG); 178 - } while (unlikely(!try_cmpxchg(&sdp->sd_flags, &old, new))); 365 + BUG_ON(sdp->sd_args.ar_debug); 179 366 180 - fs_err(sdp, "about to withdraw this file system\n"); 181 - BUG_ON(sdp->sd_args.ar_debug); 367 + /* 368 + * Try to deactivate the shared block device so that no more I/O will 369 + * go through. If successful, we can immediately trigger remote 370 + * recovery. Otherwise, we must first empty out all our local caches. 371 + */ 182 372 183 - signal_our_withdraw(sdp); 373 + device_inactive = gfs2_offline_uevent(sdp); 184 374 185 - kobject_uevent(&sdp->sd_kobj, KOBJ_OFFLINE); 375 + if (sdp->sd_args.ar_errors == GFS2_ERRORS_DEACTIVATE && !device_inactive) 376 + panic("GFS2: fsid=%s: panic requested\n", sdp->sd_fsname); 186 377 187 - if (!strcmp(sdp->sd_lockstruct.ls_ops->lm_proto_name, "lock_dlm")) 188 - wait_for_completion(&sdp->sd_wdack); 189 - 190 - if (lm->lm_unmount) { 191 - fs_err(sdp, "telling LM to unmount\n"); 192 - lm->lm_unmount(sdp); 378 + if (lm->lm_unmount) { 379 + if (device_inactive) { 380 + lm->lm_unmount(sdp, false); 381 + do_withdraw(sdp); 382 + } else { 383 + do_withdraw(sdp); 384 + lm->lm_unmount(sdp, false); 193 385 } 194 - fs_err(sdp, "File system withdrawn\n"); 386 + } else { 387 + do_withdraw(sdp); 388 + } 389 + 390 + fs_err(sdp, "file system withdrawn\n"); 391 + } 392 + 393 + void gfs2_withdraw(struct gfs2_sbd *sdp) 394 + { 395 + if (sdp->sd_args.ar_errors == GFS2_ERRORS_WITHDRAW || 396 + sdp->sd_args.ar_errors == GFS2_ERRORS_DEACTIVATE) { 397 + if (test_and_set_bit(SDF_WITHDRAWN, &sdp->sd_flags)) 398 + return; 399 + 195 400 dump_stack(); 196 - clear_bit(SDF_WITHDRAW_IN_PROG, &sdp->sd_flags); 197 - smp_mb__after_atomic(); 198 - wake_up_bit(&sdp->sd_flags, SDF_WITHDRAW_IN_PROG); 401 + /* 402 + * There is no need to withdraw when the superblock hasn't been 403 + * fully initialized, yet. 404 + */ 405 + if (!(sdp->sd_vfs->s_flags & SB_BORN)) 406 + return; 407 + fs_err(sdp, "about to withdraw this file system\n"); 408 + schedule_work(&sdp->sd_withdraw_work); 409 + return; 199 410 } 200 411 201 412 if (sdp->sd_args.ar_errors == GFS2_ERRORS_PANIC) ··· 268 357 */ 269 358 270 359 void gfs2_assert_withdraw_i(struct gfs2_sbd *sdp, char *assertion, 271 - const char *function, char *file, unsigned int line, 272 - bool delayed) 360 + const char *function, char *file, unsigned int line) 273 361 { 274 - if (gfs2_withdrawing_or_withdrawn(sdp)) 362 + if (gfs2_withdrawn(sdp)) 275 363 return; 276 364 277 365 fs_err(sdp, ··· 278 368 "function = %s, file = %s, line = %u\n", 279 369 assertion, function, file, line); 280 370 281 - /* 282 - * If errors=panic was specified on mount, it won't help to delay the 283 - * withdraw. 284 - */ 285 - if (sdp->sd_args.ar_errors == GFS2_ERRORS_PANIC) 286 - delayed = false; 287 - 288 - if (delayed) 289 - gfs2_withdraw_delayed(sdp); 290 - else 291 - gfs2_withdraw(sdp); 371 + gfs2_withdraw(sdp); 292 372 dump_stack(); 293 373 } 294 374 ··· 420 520 } 421 521 422 522 /* 423 - * gfs2_io_error_bh_i - Flag a buffer I/O error 424 - * @withdraw: withdraw the filesystem 523 + * gfs2_io_error_bh_i - Flag a buffer I/O error and withdraw 425 524 */ 426 525 427 526 void gfs2_io_error_bh_i(struct gfs2_sbd *sdp, struct buffer_head *bh, 428 - const char *function, char *file, unsigned int line, 429 - bool withdraw) 527 + const char *function, char *file, unsigned int line) 430 528 { 431 - if (gfs2_withdrawing_or_withdrawn(sdp)) 529 + if (gfs2_withdrawn(sdp)) 432 530 return; 433 531 434 532 fs_err(sdp, "fatal: I/O error - " 435 533 "block = %llu, " 436 534 "function = %s, file = %s, line = %u\n", 437 535 (unsigned long long)bh->b_blocknr, function, file, line); 438 - if (withdraw) 439 - gfs2_withdraw(sdp); 536 + gfs2_withdraw(sdp); 440 537 } 441 -

+9 -47

fs/gfs2/util.h

··· 37 37 38 38 39 39 void gfs2_assert_withdraw_i(struct gfs2_sbd *sdp, char *assertion, 40 - const char *function, char *file, unsigned int line, 41 - bool delayed); 40 + const char *function, char *file, unsigned int line); 42 41 43 42 #define gfs2_assert_withdraw(sdp, assertion) \ 44 43 ({ \ 45 44 bool _bool = (assertion); \ 46 45 if (unlikely(!_bool)) \ 47 46 gfs2_assert_withdraw_i((sdp), #assertion, \ 48 - __func__, __FILE__, __LINE__, false); \ 49 - !_bool; \ 50 - }) 51 - 52 - #define gfs2_assert_withdraw_delayed(sdp, assertion) \ 53 - ({ \ 54 - bool _bool = (assertion); \ 55 - if (unlikely(!_bool)) \ 56 - gfs2_assert_withdraw_i((sdp), #assertion, \ 57 - __func__, __FILE__, __LINE__, true); \ 47 + __func__, __FILE__, __LINE__); \ 58 48 !_bool; \ 59 49 }) 60 50 ··· 151 161 152 162 153 163 void gfs2_io_error_bh_i(struct gfs2_sbd *sdp, struct buffer_head *bh, 154 - const char *function, char *file, unsigned int line, 155 - bool withdraw); 156 - 157 - #define gfs2_io_error_bh_wd(sdp, bh) \ 158 - gfs2_io_error_bh_i((sdp), (bh), __func__, __FILE__, __LINE__, true) 164 + const char *function, char *file, unsigned int line); 159 165 160 166 #define gfs2_io_error_bh(sdp, bh) \ 161 - gfs2_io_error_bh_i((sdp), (bh), __func__, __FILE__, __LINE__, false) 167 + gfs2_io_error_bh_i((sdp), (bh), __func__, __FILE__, __LINE__) 162 168 163 169 164 170 extern struct kmem_cache *gfs2_glock_cachep; ··· 179 193 } 180 194 181 195 /** 182 - * gfs2_withdraw_delayed - withdraw as soon as possible without deadlocks 196 + * gfs2_withdrawn - test whether the file system is withdrawn 183 197 * @sdp: the superblock 184 198 */ 185 - static inline void gfs2_withdraw_delayed(struct gfs2_sbd *sdp) 199 + static inline bool gfs2_withdrawn(struct gfs2_sbd *sdp) 186 200 { 187 - set_bit(SDF_WITHDRAWING, &sdp->sd_flags); 188 - } 189 - 190 - /** 191 - * gfs2_withdrawing_or_withdrawn - test whether the file system is withdrawing 192 - * or withdrawn 193 - * @sdp: the superblock 194 - */ 195 - static inline bool gfs2_withdrawing_or_withdrawn(struct gfs2_sbd *sdp) 196 - { 197 - return unlikely(test_bit(SDF_WITHDRAWN, &sdp->sd_flags) || 198 - test_bit(SDF_WITHDRAWING, &sdp->sd_flags)); 199 - } 200 - 201 - /** 202 - * gfs2_withdrawing - check if a withdraw is pending 203 - * @sdp: the superblock 204 - */ 205 - static inline bool gfs2_withdrawing(struct gfs2_sbd *sdp) 206 - { 207 - return unlikely(test_bit(SDF_WITHDRAWING, &sdp->sd_flags) && 208 - !test_bit(SDF_WITHDRAWN, &sdp->sd_flags)); 209 - } 210 - 211 - static inline bool gfs2_withdraw_in_prog(struct gfs2_sbd *sdp) 212 - { 213 - return unlikely(test_bit(SDF_WITHDRAW_IN_PROG, &sdp->sd_flags)); 201 + return unlikely(test_bit(SDF_WITHDRAWN, &sdp->sd_flags)); 214 202 } 215 203 216 204 #define gfs2_tune_get(sdp, field) \ ··· 192 232 193 233 __printf(2, 3) 194 234 void gfs2_lm(struct gfs2_sbd *sdp, const char *fmt, ...); 235 + 236 + void gfs2_withdraw_func(struct work_struct *work); 195 237 void gfs2_withdraw(struct gfs2_sbd *sdp); 196 238 197 239 #endif /* __UTIL_DOT_H__ */

Configure Feed

Configure Feed