Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

Merge tag 'for-linus-20181123' of git://git.kernel.dk/linux-block

Pull block fix from Jens Axboe:
"Just a single fix for this week, fixing an issue with nvme-fc"

* tag 'for-linus-20181123' of git://git.kernel.dk/linux-block:
nvme-fc: resolve io failures during connect

+64 -11
+64 -11
drivers/nvme/host/fc.c
··· 152 152 153 153 bool ioq_live; 154 154 bool assoc_active; 155 + atomic_t err_work_active; 155 156 u64 association_id; 156 157 157 158 struct list_head ctrl_list; /* rport->ctrl_list */ ··· 161 160 struct blk_mq_tag_set tag_set; 162 161 163 162 struct delayed_work connect_work; 163 + struct work_struct err_work; 164 164 165 165 struct kref ref; 166 166 u32 flags; ··· 1533 1531 struct nvme_fc_fcp_op *aen_op = ctrl->aen_ops; 1534 1532 int i; 1535 1533 1534 + /* ensure we've initialized the ops once */ 1535 + if (!(aen_op->flags & FCOP_FLAGS_AEN)) 1536 + return; 1537 + 1536 1538 for (i = 0; i < NVME_NR_AEN_COMMANDS; i++, aen_op++) 1537 1539 __nvme_fc_abort_op(ctrl, aen_op); 1538 1540 } ··· 2055 2049 static void 2056 2050 nvme_fc_error_recovery(struct nvme_fc_ctrl *ctrl, char *errmsg) 2057 2051 { 2058 - /* only proceed if in LIVE state - e.g. on first error */ 2052 + int active; 2053 + 2054 + /* 2055 + * if an error (io timeout, etc) while (re)connecting, 2056 + * it's an error on creating the new association. 2057 + * Start the error recovery thread if it hasn't already 2058 + * been started. It is expected there could be multiple 2059 + * ios hitting this path before things are cleaned up. 2060 + */ 2061 + if (ctrl->ctrl.state == NVME_CTRL_CONNECTING) { 2062 + active = atomic_xchg(&ctrl->err_work_active, 1); 2063 + if (!active && !schedule_work(&ctrl->err_work)) { 2064 + atomic_set(&ctrl->err_work_active, 0); 2065 + WARN_ON(1); 2066 + } 2067 + return; 2068 + } 2069 + 2070 + /* Otherwise, only proceed if in LIVE state - e.g. on first error */ 2059 2071 if (ctrl->ctrl.state != NVME_CTRL_LIVE) 2060 2072 return; 2061 2073 ··· 2838 2814 { 2839 2815 struct nvme_fc_ctrl *ctrl = to_fc_ctrl(nctrl); 2840 2816 2817 + cancel_work_sync(&ctrl->err_work); 2841 2818 cancel_delayed_work_sync(&ctrl->connect_work); 2842 2819 /* 2843 2820 * kill the association on the link side. this will block ··· 2891 2866 } 2892 2867 2893 2868 static void 2869 + __nvme_fc_terminate_io(struct nvme_fc_ctrl *ctrl) 2870 + { 2871 + nvme_stop_keep_alive(&ctrl->ctrl); 2872 + 2873 + /* will block will waiting for io to terminate */ 2874 + nvme_fc_delete_association(ctrl); 2875 + 2876 + if (ctrl->ctrl.state != NVME_CTRL_CONNECTING && 2877 + !nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_CONNECTING)) 2878 + dev_err(ctrl->ctrl.device, 2879 + "NVME-FC{%d}: error_recovery: Couldn't change state " 2880 + "to CONNECTING\n", ctrl->cnum); 2881 + } 2882 + 2883 + static void 2894 2884 nvme_fc_reset_ctrl_work(struct work_struct *work) 2895 2885 { 2896 2886 struct nvme_fc_ctrl *ctrl = 2897 2887 container_of(work, struct nvme_fc_ctrl, ctrl.reset_work); 2898 2888 int ret; 2899 2889 2890 + __nvme_fc_terminate_io(ctrl); 2891 + 2900 2892 nvme_stop_ctrl(&ctrl->ctrl); 2901 - 2902 - /* will block will waiting for io to terminate */ 2903 - nvme_fc_delete_association(ctrl); 2904 - 2905 - if (!nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_CONNECTING)) { 2906 - dev_err(ctrl->ctrl.device, 2907 - "NVME-FC{%d}: error_recovery: Couldn't change state " 2908 - "to CONNECTING\n", ctrl->cnum); 2909 - return; 2910 - } 2911 2893 2912 2894 if (ctrl->rport->remoteport.port_state == FC_OBJSTATE_ONLINE) 2913 2895 ret = nvme_fc_create_association(ctrl); ··· 2927 2895 dev_info(ctrl->ctrl.device, 2928 2896 "NVME-FC{%d}: controller reset complete\n", 2929 2897 ctrl->cnum); 2898 + } 2899 + 2900 + static void 2901 + nvme_fc_connect_err_work(struct work_struct *work) 2902 + { 2903 + struct nvme_fc_ctrl *ctrl = 2904 + container_of(work, struct nvme_fc_ctrl, err_work); 2905 + 2906 + __nvme_fc_terminate_io(ctrl); 2907 + 2908 + atomic_set(&ctrl->err_work_active, 0); 2909 + 2910 + /* 2911 + * Rescheduling the connection after recovering 2912 + * from the io error is left to the reconnect work 2913 + * item, which is what should have stalled waiting on 2914 + * the io that had the error that scheduled this work. 2915 + */ 2930 2916 } 2931 2917 2932 2918 static const struct nvme_ctrl_ops nvme_fc_ctrl_ops = { ··· 3057 3007 ctrl->cnum = idx; 3058 3008 ctrl->ioq_live = false; 3059 3009 ctrl->assoc_active = false; 3010 + atomic_set(&ctrl->err_work_active, 0); 3060 3011 init_waitqueue_head(&ctrl->ioabort_wait); 3061 3012 3062 3013 get_device(ctrl->dev); ··· 3065 3014 3066 3015 INIT_WORK(&ctrl->ctrl.reset_work, nvme_fc_reset_ctrl_work); 3067 3016 INIT_DELAYED_WORK(&ctrl->connect_work, nvme_fc_connect_ctrl_work); 3017 + INIT_WORK(&ctrl->err_work, nvme_fc_connect_err_work); 3068 3018 spin_lock_init(&ctrl->lock); 3069 3019 3070 3020 /* io queue count */ ··· 3155 3103 fail_ctrl: 3156 3104 nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_DELETING); 3157 3105 cancel_work_sync(&ctrl->ctrl.reset_work); 3106 + cancel_work_sync(&ctrl->err_work); 3158 3107 cancel_delayed_work_sync(&ctrl->connect_work); 3159 3108 3160 3109 ctrl->ctrl.opts = NULL;