Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

scsi: lpfc: Fix scheduling call while in softirq context in lpfc_unreg_rpi

The following call trace was seen during HBA reset testing:

BUG: scheduling while atomic: swapper/2/0/0x10000100
...
Call Trace:
dump_stack+0x19/0x1b
__schedule_bug+0x64/0x72
__schedule+0x782/0x840
__cond_resched+0x26/0x30
_cond_resched+0x3a/0x50
mempool_alloc+0xa0/0x170
lpfc_unreg_rpi+0x151/0x630 [lpfc]
lpfc_sli_abts_recover_port+0x171/0x190 [lpfc]
lpfc_sli4_abts_err_handler+0xb2/0x1f0 [lpfc]
lpfc_sli4_io_xri_aborted+0x256/0x300 [lpfc]
lpfc_sli4_sp_handle_abort_xri_wcqe.isra.51+0xa3/0x190 [lpfc]
lpfc_sli4_fp_handle_cqe+0x89/0x4d0 [lpfc]
__lpfc_sli4_process_cq+0xdb/0x2e0 [lpfc]
__lpfc_sli4_hba_process_cq+0x41/0x100 [lpfc]
lpfc_cq_poll_hdler+0x1a/0x30 [lpfc]
irq_poll_softirq+0xc7/0x100
__do_softirq+0xf5/0x280
call_softirq+0x1c/0x30
do_softirq+0x65/0xa0
irq_exit+0x105/0x110
do_IRQ+0x56/0xf0
common_interrupt+0x16a/0x16a

With the conversion to blk_io_poll for better interrupt latency in normal
cases, it introduced this code path, executed when I/O aborts or logouts
are seen, which attempts to allocate memory for a mailbox command to be
issued. The allocation is GFP_KERNEL, thus it could attempt to sleep.

Fix by creating a work element that performs the event handling for the
remote port. This will have the mailbox commands and other items performed
in the work element, not the irq. A much better method as the "irq" routine
does not stall while performing all this deep handling code.

Ensure that allocation failures are handled and send LOGO on failure.

Additionally, enlarge the mailbox memory pool to reduce the possibility of
additional allocation in this path.

Link: https://lore.kernel.org/r/20201020202719.54726-3-james.smart@broadcom.com
Fixes: 317aeb83c92b ("scsi: lpfc: Add blk_io_poll support for latency improvment")
Cc: <stable@vger.kernel.org> # v5.9+
Co-developed-by: Dick Kennedy <dick.kennedy@broadcom.com>
Signed-off-by: Dick Kennedy <dick.kennedy@broadcom.com>
Signed-off-by: James Smart <james.smart@broadcom.com>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>

authored by

James Smart and committed by
Martin K. Petersen
e7dab164 62e3a931

+140 -46
+1 -1
drivers/scsi/lpfc/lpfc.h
··· 753 753 #define HBA_SP_QUEUE_EVT 0x8 /* Slow-path qevt posted to worker thread*/ 754 754 #define HBA_POST_RECEIVE_BUFFER 0x10 /* Rcv buffers need to be posted */ 755 755 #define HBA_PERSISTENT_TOPO 0x20 /* Persistent topology support in hba */ 756 - #define ELS_XRI_ABORT_EVENT 0x40 756 + #define ELS_XRI_ABORT_EVENT 0x40 /* ELS_XRI abort event was queued */ 757 757 #define ASYNC_EVENT 0x80 758 758 #define LINK_DISABLED 0x100 /* Link disabled by user */ 759 759 #define FCF_TS_INPROG 0x200 /* FCF table scan in progress */
+2
drivers/scsi/lpfc/lpfc_disc.h
··· 41 41 LPFC_EVT_DEV_LOSS, 42 42 LPFC_EVT_FASTPATH_MGMT_EVT, 43 43 LPFC_EVT_RESET_HBA, 44 + LPFC_EVT_RECOVER_PORT 44 45 }; 45 46 46 47 /* structure used to queue event to the discovery tasklet */ ··· 129 128 struct lpfc_vport *vport; 130 129 struct lpfc_work_evt els_retry_evt; 131 130 struct lpfc_work_evt dev_loss_evt; 131 + struct lpfc_work_evt recovery_evt; 132 132 struct kref kref; 133 133 atomic_t cmd_pending; 134 134 uint32_t cmd_qdepth;
+35
drivers/scsi/lpfc/lpfc_hbadisc.c
··· 552 552 fcf_inuse, 553 553 nlp_did); 554 554 break; 555 + case LPFC_EVT_RECOVER_PORT: 556 + ndlp = (struct lpfc_nodelist *)(evtp->evt_arg1); 557 + lpfc_sli_abts_recover_port(ndlp->vport, ndlp); 558 + free_evt = 0; 559 + /* decrement the node reference count held for 560 + * this queued work 561 + */ 562 + lpfc_nlp_put(ndlp); 563 + break; 555 564 case LPFC_EVT_ONLINE: 556 565 if (phba->link_state < LPFC_LINK_DOWN) 557 566 *(int *) (evtp->evt_arg1) = lpfc_online(phba); ··· 4524 4515 INIT_LIST_HEAD(&ndlp->els_retry_evt.evt_listp); 4525 4516 INIT_LIST_HEAD(&ndlp->dev_loss_evt.evt_listp); 4526 4517 timer_setup(&ndlp->nlp_delayfunc, lpfc_els_retry_delay, 0); 4518 + INIT_LIST_HEAD(&ndlp->recovery_evt.evt_listp); 4519 + 4527 4520 ndlp->nlp_DID = did; 4528 4521 ndlp->vport = vport; 4529 4522 ndlp->phba = vport->phba; ··· 5022 5011 mempool_free(mbox, phba->mbox_mem_pool); 5023 5012 acc_plogi = 1; 5024 5013 } 5014 + } else { 5015 + lpfc_printf_vlog(vport, KERN_INFO, 5016 + LOG_NODE | LOG_DISCOVERY, 5017 + "1444 Failed to allocate mempool " 5018 + "unreg_rpi UNREG x%x, " 5019 + "DID x%x, flag x%x, " 5020 + "ndlp x%px\n", 5021 + ndlp->nlp_rpi, ndlp->nlp_DID, 5022 + ndlp->nlp_flag, ndlp); 5023 + 5024 + /* Because mempool_alloc failed, we 5025 + * will issue a LOGO here and keep the rpi alive if 5026 + * not unloading. 5027 + */ 5028 + if (!(vport->load_flag & FC_UNLOADING)) { 5029 + ndlp->nlp_flag &= ~NLP_UNREG_INP; 5030 + lpfc_issue_els_logo(vport, ndlp, 0); 5031 + ndlp->nlp_prev_state = ndlp->nlp_state; 5032 + lpfc_nlp_set_state(vport, ndlp, 5033 + NLP_STE_NPR_NODE); 5034 + } 5035 + 5036 + return 1; 5025 5037 } 5026 5038 lpfc_no_rpi(phba, ndlp); 5027 5039 out: ··· 5248 5214 5249 5215 list_del_init(&ndlp->els_retry_evt.evt_listp); 5250 5216 list_del_init(&ndlp->dev_loss_evt.evt_listp); 5217 + list_del_init(&ndlp->recovery_evt.evt_listp); 5251 5218 lpfc_cleanup_vports_rrqs(vport, ndlp); 5252 5219 if (phba->sli_rev == LPFC_SLI_REV4) 5253 5220 ndlp->nlp_flag |= NLP_RELEASE_RPI;
+31 -23
drivers/scsi/lpfc/lpfc_init.c
··· 5958 5958 void lpfc_sli4_async_event_proc(struct lpfc_hba *phba) 5959 5959 { 5960 5960 struct lpfc_cq_event *cq_event; 5961 + unsigned long iflags; 5961 5962 5962 5963 /* First, declare the async event has been handled */ 5963 - spin_lock_irq(&phba->hbalock); 5964 + spin_lock_irqsave(&phba->hbalock, iflags); 5964 5965 phba->hba_flag &= ~ASYNC_EVENT; 5965 - spin_unlock_irq(&phba->hbalock); 5966 + spin_unlock_irqrestore(&phba->hbalock, iflags); 5967 + 5966 5968 /* Now, handle all the async events */ 5969 + spin_lock_irqsave(&phba->sli4_hba.asynce_list_lock, iflags); 5967 5970 while (!list_empty(&phba->sli4_hba.sp_asynce_work_queue)) { 5968 - /* Get the first event from the head of the event queue */ 5969 - spin_lock_irq(&phba->hbalock); 5970 5971 list_remove_head(&phba->sli4_hba.sp_asynce_work_queue, 5971 5972 cq_event, struct lpfc_cq_event, list); 5972 - spin_unlock_irq(&phba->hbalock); 5973 + spin_unlock_irqrestore(&phba->sli4_hba.asynce_list_lock, 5974 + iflags); 5975 + 5973 5976 /* Process the asynchronous event */ 5974 5977 switch (bf_get(lpfc_trailer_code, &cq_event->cqe.mcqe_cmpl)) { 5975 5978 case LPFC_TRAILER_CODE_LINK: ··· 6004 6001 &cq_event->cqe.mcqe_cmpl)); 6005 6002 break; 6006 6003 } 6004 + 6007 6005 /* Free the completion event processed to the free pool */ 6008 6006 lpfc_sli4_cq_event_release(phba, cq_event); 6007 + spin_lock_irqsave(&phba->sli4_hba.asynce_list_lock, iflags); 6009 6008 } 6009 + spin_unlock_irqrestore(&phba->sli4_hba.asynce_list_lock, iflags); 6010 6010 } 6011 6011 6012 6012 /** ··· 6636 6630 /* This abort list used by worker thread */ 6637 6631 spin_lock_init(&phba->sli4_hba.sgl_list_lock); 6638 6632 spin_lock_init(&phba->sli4_hba.nvmet_io_wait_lock); 6633 + spin_lock_init(&phba->sli4_hba.asynce_list_lock); 6634 + spin_lock_init(&phba->sli4_hba.els_xri_abrt_list_lock); 6639 6635 6640 6636 /* 6641 6637 * Initialize driver internal slow-path work queues ··· 6649 6641 INIT_LIST_HEAD(&phba->sli4_hba.sp_queue_event); 6650 6642 /* Asynchronous event CQ Event work queue list */ 6651 6643 INIT_LIST_HEAD(&phba->sli4_hba.sp_asynce_work_queue); 6652 - /* Fast-path XRI aborted CQ Event work queue list */ 6653 - INIT_LIST_HEAD(&phba->sli4_hba.sp_fcp_xri_aborted_work_queue); 6654 6644 /* Slow-path XRI aborted CQ Event work queue list */ 6655 6645 INIT_LIST_HEAD(&phba->sli4_hba.sp_els_xri_aborted_work_queue); 6656 6646 /* Receive queue CQ Event work queue list */ ··· 10179 10173 static void 10180 10174 lpfc_sli4_cq_event_release_all(struct lpfc_hba *phba) 10181 10175 { 10182 - LIST_HEAD(cqelist); 10183 - struct lpfc_cq_event *cqe; 10176 + LIST_HEAD(cq_event_list); 10177 + struct lpfc_cq_event *cq_event; 10184 10178 unsigned long iflags; 10185 10179 10186 10180 /* Retrieve all the pending WCQEs from pending WCQE lists */ 10187 - spin_lock_irqsave(&phba->hbalock, iflags); 10188 - /* Pending FCP XRI abort events */ 10189 - list_splice_init(&phba->sli4_hba.sp_fcp_xri_aborted_work_queue, 10190 - &cqelist); 10191 - /* Pending ELS XRI abort events */ 10192 - list_splice_init(&phba->sli4_hba.sp_els_xri_aborted_work_queue, 10193 - &cqelist); 10194 - /* Pending asynnc events */ 10195 - list_splice_init(&phba->sli4_hba.sp_asynce_work_queue, 10196 - &cqelist); 10197 - spin_unlock_irqrestore(&phba->hbalock, iflags); 10198 10181 10199 - while (!list_empty(&cqelist)) { 10200 - list_remove_head(&cqelist, cqe, struct lpfc_cq_event, list); 10201 - lpfc_sli4_cq_event_release(phba, cqe); 10182 + /* Pending ELS XRI abort events */ 10183 + spin_lock_irqsave(&phba->sli4_hba.els_xri_abrt_list_lock, iflags); 10184 + list_splice_init(&phba->sli4_hba.sp_els_xri_aborted_work_queue, 10185 + &cq_event_list); 10186 + spin_unlock_irqrestore(&phba->sli4_hba.els_xri_abrt_list_lock, iflags); 10187 + 10188 + /* Pending asynnc events */ 10189 + spin_lock_irqsave(&phba->sli4_hba.asynce_list_lock, iflags); 10190 + list_splice_init(&phba->sli4_hba.sp_asynce_work_queue, 10191 + &cq_event_list); 10192 + spin_unlock_irqrestore(&phba->sli4_hba.asynce_list_lock, iflags); 10193 + 10194 + while (!list_empty(&cq_event_list)) { 10195 + list_remove_head(&cq_event_list, cq_event, 10196 + struct lpfc_cq_event, list); 10197 + lpfc_sli4_cq_event_release(phba, cq_event); 10202 10198 } 10203 10199 } 10204 10200
+3 -2
drivers/scsi/lpfc/lpfc_mem.c
··· 46 46 #define LPFC_MEM_POOL_SIZE 64 /* max elem in non-DMA safety pool */ 47 47 #define LPFC_DEVICE_DATA_POOL_SIZE 64 /* max elements in device data pool */ 48 48 #define LPFC_RRQ_POOL_SIZE 256 /* max elements in non-DMA pool */ 49 + #define LPFC_MBX_POOL_SIZE 256 /* max elements in MBX non-DMA pool */ 49 50 50 51 int 51 52 lpfc_mem_alloc_active_rrq_pool_s4(struct lpfc_hba *phba) { ··· 112 111 pool->current_count++; 113 112 } 114 113 115 - phba->mbox_mem_pool = mempool_create_kmalloc_pool(LPFC_MEM_POOL_SIZE, 116 - sizeof(LPFC_MBOXQ_t)); 114 + phba->mbox_mem_pool = mempool_create_kmalloc_pool(LPFC_MBX_POOL_SIZE, 115 + sizeof(LPFC_MBOXQ_t)); 117 116 if (!phba->mbox_mem_pool) 118 117 goto fail_free_mbuf_pool; 119 118
+13 -5
drivers/scsi/lpfc/lpfc_nvme.c
··· 2280 2280 int ret, i, pending = 0; 2281 2281 struct lpfc_sli_ring *pring; 2282 2282 struct lpfc_hba *phba = vport->phba; 2283 + struct lpfc_sli4_hdw_queue *qp; 2284 + int abts_scsi, abts_nvme; 2283 2285 2284 2286 /* Host transport has to clean up and confirm requiring an indefinite 2285 2287 * wait. Print a message if a 10 second wait expires and renew the ··· 2292 2290 ret = wait_for_completion_timeout(lport_unreg_cmp, wait_tmo); 2293 2291 if (unlikely(!ret)) { 2294 2292 pending = 0; 2293 + abts_scsi = 0; 2294 + abts_nvme = 0; 2295 2295 for (i = 0; i < phba->cfg_hdw_queue; i++) { 2296 - pring = phba->sli4_hba.hdwq[i].io_wq->pring; 2296 + qp = &phba->sli4_hba.hdwq[i]; 2297 + pring = qp->io_wq->pring; 2297 2298 if (!pring) 2298 2299 continue; 2299 - if (pring->txcmplq_cnt) 2300 - pending += pring->txcmplq_cnt; 2300 + pending += pring->txcmplq_cnt; 2301 + abts_scsi += qp->abts_scsi_io_bufs; 2302 + abts_nvme += qp->abts_nvme_io_bufs; 2301 2303 } 2302 2304 lpfc_printf_vlog(vport, KERN_ERR, LOG_TRACE_EVENT, 2303 2305 "6176 Lport x%px Localport x%px wait " 2304 - "timed out. Pending %d. Renewing.\n", 2305 - lport, vport->localport, pending); 2306 + "timed out. Pending %d [%d:%d]. " 2307 + "Renewing.\n", 2308 + lport, vport->localport, pending, 2309 + abts_scsi, abts_nvme); 2306 2310 continue; 2307 2311 } 2308 2312 break;
+52 -12
drivers/scsi/lpfc/lpfc_sli.c
··· 10367 10367 return 0; 10368 10368 } 10369 10369 10370 + static void 10371 + lpfc_sli_post_recovery_event(struct lpfc_hba *phba, 10372 + struct lpfc_nodelist *ndlp) 10373 + { 10374 + unsigned long iflags; 10375 + struct lpfc_work_evt *evtp = &ndlp->recovery_evt; 10376 + 10377 + spin_lock_irqsave(&phba->hbalock, iflags); 10378 + if (!list_empty(&evtp->evt_listp)) { 10379 + spin_unlock_irqrestore(&phba->hbalock, iflags); 10380 + return; 10381 + } 10382 + 10383 + /* Incrementing the reference count until the queued work is done. */ 10384 + evtp->evt_arg1 = lpfc_nlp_get(ndlp); 10385 + if (!evtp->evt_arg1) { 10386 + spin_unlock_irqrestore(&phba->hbalock, iflags); 10387 + return; 10388 + } 10389 + evtp->evt = LPFC_EVT_RECOVER_PORT; 10390 + list_add_tail(&evtp->evt_listp, &phba->work_list); 10391 + spin_unlock_irqrestore(&phba->hbalock, iflags); 10392 + 10393 + lpfc_worker_wake_up(phba); 10394 + } 10395 + 10370 10396 /* lpfc_sli_abts_err_handler - handle a failed ABTS request from an SLI3 port. 10371 10397 * @phba: Pointer to HBA context object. 10372 10398 * @iocbq: Pointer to iocb object. ··· 10483 10457 ext_status = axri->parameter & IOERR_PARAM_MASK; 10484 10458 if ((bf_get(lpfc_wcqe_xa_status, axri) == IOSTAT_LOCAL_REJECT) && 10485 10459 ((ext_status == IOERR_SEQUENCE_TIMEOUT) || (ext_status == 0))) 10486 - lpfc_sli_abts_recover_port(vport, ndlp); 10460 + lpfc_sli_post_recovery_event(phba, ndlp); 10487 10461 } 10488 10462 10489 10463 /** ··· 13091 13065 void lpfc_sli4_els_xri_abort_event_proc(struct lpfc_hba *phba) 13092 13066 { 13093 13067 struct lpfc_cq_event *cq_event; 13068 + unsigned long iflags; 13094 13069 13095 13070 /* First, declare the els xri abort event has been handled */ 13096 - spin_lock_irq(&phba->hbalock); 13071 + spin_lock_irqsave(&phba->hbalock, iflags); 13097 13072 phba->hba_flag &= ~ELS_XRI_ABORT_EVENT; 13098 - spin_unlock_irq(&phba->hbalock); 13073 + spin_unlock_irqrestore(&phba->hbalock, iflags); 13074 + 13099 13075 /* Now, handle all the els xri abort events */ 13076 + spin_lock_irqsave(&phba->sli4_hba.els_xri_abrt_list_lock, iflags); 13100 13077 while (!list_empty(&phba->sli4_hba.sp_els_xri_aborted_work_queue)) { 13101 13078 /* Get the first event from the head of the event queue */ 13102 - spin_lock_irq(&phba->hbalock); 13103 13079 list_remove_head(&phba->sli4_hba.sp_els_xri_aborted_work_queue, 13104 13080 cq_event, struct lpfc_cq_event, list); 13105 - spin_unlock_irq(&phba->hbalock); 13081 + spin_unlock_irqrestore(&phba->sli4_hba.els_xri_abrt_list_lock, 13082 + iflags); 13106 13083 /* Notify aborted XRI for ELS work queue */ 13107 13084 lpfc_sli4_els_xri_aborted(phba, &cq_event->cqe.wcqe_axri); 13085 + 13108 13086 /* Free the event processed back to the free pool */ 13109 13087 lpfc_sli4_cq_event_release(phba, cq_event); 13088 + spin_lock_irqsave(&phba->sli4_hba.els_xri_abrt_list_lock, 13089 + iflags); 13110 13090 } 13091 + spin_unlock_irqrestore(&phba->sli4_hba.els_xri_abrt_list_lock, iflags); 13111 13092 } 13112 13093 13113 13094 /** ··· 13325 13292 cq_event = lpfc_cq_event_setup(phba, mcqe, sizeof(struct lpfc_mcqe)); 13326 13293 if (!cq_event) 13327 13294 return false; 13328 - spin_lock_irqsave(&phba->hbalock, iflags); 13295 + 13296 + spin_lock_irqsave(&phba->sli4_hba.asynce_list_lock, iflags); 13329 13297 list_add_tail(&cq_event->list, &phba->sli4_hba.sp_asynce_work_queue); 13298 + spin_unlock_irqrestore(&phba->sli4_hba.asynce_list_lock, iflags); 13299 + 13330 13300 /* Set the async event flag */ 13301 + spin_lock_irqsave(&phba->hbalock, iflags); 13331 13302 phba->hba_flag |= ASYNC_EVENT; 13332 13303 spin_unlock_irqrestore(&phba->hbalock, iflags); 13333 13304 ··· 13606 13569 break; 13607 13570 case LPFC_NVME_LS: /* NVME LS uses ELS resources */ 13608 13571 case LPFC_ELS: 13609 - cq_event = lpfc_cq_event_setup( 13610 - phba, wcqe, sizeof(struct sli4_wcqe_xri_aborted)); 13611 - if (!cq_event) 13612 - return false; 13572 + cq_event = lpfc_cq_event_setup(phba, wcqe, sizeof(*wcqe)); 13573 + if (!cq_event) { 13574 + workposted = false; 13575 + break; 13576 + } 13613 13577 cq_event->hdwq = cq->hdwq; 13614 - spin_lock_irqsave(&phba->hbalock, iflags); 13578 + spin_lock_irqsave(&phba->sli4_hba.els_xri_abrt_list_lock, 13579 + iflags); 13615 13580 list_add_tail(&cq_event->list, 13616 13581 &phba->sli4_hba.sp_els_xri_aborted_work_queue); 13617 13582 /* Set the els xri abort event flag */ 13618 13583 phba->hba_flag |= ELS_XRI_ABORT_EVENT; 13619 - spin_unlock_irqrestore(&phba->hbalock, iflags); 13584 + spin_unlock_irqrestore(&phba->sli4_hba.els_xri_abrt_list_lock, 13585 + iflags); 13620 13586 workposted = true; 13621 13587 break; 13622 13588 default:
+3 -3
drivers/scsi/lpfc/lpfc_sli4.h
··· 920 920 struct list_head sp_queue_event; 921 921 struct list_head sp_cqe_event_pool; 922 922 struct list_head sp_asynce_work_queue; 923 - struct list_head sp_fcp_xri_aborted_work_queue; 923 + spinlock_t asynce_list_lock; /* protect sp_asynce_work_queue list */ 924 924 struct list_head sp_els_xri_aborted_work_queue; 925 + spinlock_t els_xri_abrt_list_lock; /* protect els_xri_aborted list */ 925 926 struct list_head sp_unsol_work_queue; 926 927 struct lpfc_sli4_link link_state; 927 928 struct lpfc_sli4_lnk_info lnk_info; ··· 1104 1103 void lpfc_sli4_fcf_redisc_event_proc(struct lpfc_hba *); 1105 1104 int lpfc_sli4_resume_rpi(struct lpfc_nodelist *, 1106 1105 void (*)(struct lpfc_hba *, LPFC_MBOXQ_t *), void *); 1107 - void lpfc_sli4_fcp_xri_abort_event_proc(struct lpfc_hba *); 1108 - void lpfc_sli4_els_xri_abort_event_proc(struct lpfc_hba *); 1106 + void lpfc_sli4_els_xri_abort_event_proc(struct lpfc_hba *phba); 1109 1107 void lpfc_sli4_nvme_xri_aborted(struct lpfc_hba *phba, 1110 1108 struct sli4_wcqe_xri_aborted *axri, 1111 1109 struct lpfc_io_buf *lpfc_ncmd);