Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

Merge tag 'block-5.15-2021-10-29' of git://git.kernel.dk/linux-block

Pull block fixes from Jens Axboe:

- NVMe pull request:
- fix nvmet-tcp header digest verification (Amit Engel)
- fix a memory leak in nvmet-tcp when releasing a queue (Maurizio
Lombardi)
- fix nvme-tcp H2CData PDU send accounting again (Sagi Grimberg)
- fix digest pointer calculation in nvme-tcp and nvmet-tcp (Varun
Prakash)
- fix possible nvme-tcp req->offset corruption (Varun Prakash)

- Queue drain ordering fix (Ming)

- Partition check regression for zoned devices (Shin'ichiro)

- Zone queue restart fix (Naohiro)

* tag 'block-5.15-2021-10-29' of git://git.kernel.dk/linux-block:
block: Fix partition check for host-aware zoned block devices
nvmet-tcp: fix header digest verification
nvmet-tcp: fix data digest pointer calculation
nvme-tcp: fix data digest pointer calculation
nvme-tcp: fix possible req->offset corruption
block: schedule queue restart after BLK_STS_ZONE_RESOURCE
block: drain queue after disk is removed from sysfs
nvme-tcp: fix H2CData PDU send accounting (again)
nvmet-tcp: fix a memory leak when releasing a queue

+51 -20
+9 -4
block/blk-mq.c
··· 1325 1325 int errors, queued; 1326 1326 blk_status_t ret = BLK_STS_OK; 1327 1327 LIST_HEAD(zone_list); 1328 + bool needs_resource = false; 1328 1329 1329 1330 if (list_empty(list)) 1330 1331 return false; ··· 1371 1370 queued++; 1372 1371 break; 1373 1372 case BLK_STS_RESOURCE: 1373 + needs_resource = true; 1374 + fallthrough; 1374 1375 case BLK_STS_DEV_RESOURCE: 1375 1376 blk_mq_handle_dev_resource(rq, list); 1376 1377 goto out; ··· 1383 1380 * accept. 1384 1381 */ 1385 1382 blk_mq_handle_zone_resource(rq, &zone_list); 1383 + needs_resource = true; 1386 1384 break; 1387 1385 default: 1388 1386 errors++; ··· 1410 1406 /* For non-shared tags, the RESTART check will suffice */ 1411 1407 bool no_tag = prep == PREP_DISPATCH_NO_TAG && 1412 1408 (hctx->flags & BLK_MQ_F_TAG_QUEUE_SHARED); 1413 - bool no_budget_avail = prep == PREP_DISPATCH_NO_BUDGET; 1414 1409 1415 1410 if (nr_budgets) 1416 1411 blk_mq_release_budgets(q, list); ··· 1450 1447 * If driver returns BLK_STS_RESOURCE and SCHED_RESTART 1451 1448 * bit is set, run queue after a delay to avoid IO stalls 1452 1449 * that could otherwise occur if the queue is idle. We'll do 1453 - * similar if we couldn't get budget and SCHED_RESTART is set. 1450 + * similar if we couldn't get budget or couldn't lock a zone 1451 + * and SCHED_RESTART is set. 1454 1452 */ 1455 1453 needs_restart = blk_mq_sched_needs_restart(hctx); 1454 + if (prep == PREP_DISPATCH_NO_BUDGET) 1455 + needs_resource = true; 1456 1456 if (!needs_restart || 1457 1457 (no_tag && list_empty_careful(&hctx->dispatch_wait.entry))) 1458 1458 blk_mq_run_hw_queue(hctx, true); 1459 - else if (needs_restart && (ret == BLK_STS_RESOURCE || 1460 - no_budget_avail)) 1459 + else if (needs_restart && needs_resource) 1461 1460 blk_mq_delay_run_hw_queue(hctx, BLK_MQ_RESOURCE_DELAY); 1462 1461 1463 1462 blk_mq_update_dispatch_busy(hctx, true);
+19 -1
block/blk-settings.c
··· 842 842 } 843 843 EXPORT_SYMBOL_GPL(blk_queue_can_use_dma_map_merging); 844 844 845 + static bool disk_has_partitions(struct gendisk *disk) 846 + { 847 + unsigned long idx; 848 + struct block_device *part; 849 + bool ret = false; 850 + 851 + rcu_read_lock(); 852 + xa_for_each(&disk->part_tbl, idx, part) { 853 + if (bdev_is_partition(part)) { 854 + ret = true; 855 + break; 856 + } 857 + } 858 + rcu_read_unlock(); 859 + 860 + return ret; 861 + } 862 + 845 863 /** 846 864 * blk_queue_set_zoned - configure a disk queue zoned model. 847 865 * @disk: the gendisk of the queue to configure ··· 894 876 * we do nothing special as far as the block layer is concerned. 895 877 */ 896 878 if (!IS_ENABLED(CONFIG_BLK_DEV_ZONED) || 897 - !xa_empty(&disk->part_tbl)) 879 + disk_has_partitions(disk)) 898 880 model = BLK_ZONED_NONE; 899 881 break; 900 882 case BLK_ZONED_NONE:
+12 -10
block/genhd.c
··· 588 588 * Prevent new I/O from crossing bio_queue_enter(). 589 589 */ 590 590 blk_queue_start_drain(q); 591 - blk_mq_freeze_queue_wait(q); 592 - 593 - rq_qos_exit(q); 594 - blk_sync_queue(q); 595 - blk_flush_integrity(); 596 - /* 597 - * Allow using passthrough request again after the queue is torn down. 598 - */ 599 - blk_queue_flag_clear(QUEUE_FLAG_INIT_DONE, q); 600 - __blk_mq_unfreeze_queue(q, true); 601 591 602 592 if (!(disk->flags & GENHD_FL_HIDDEN)) { 603 593 sysfs_remove_link(&disk_to_dev(disk)->kobj, "bdi"); ··· 610 620 sysfs_remove_link(block_depr, dev_name(disk_to_dev(disk))); 611 621 pm_runtime_set_memalloc_noio(disk_to_dev(disk), false); 612 622 device_del(disk_to_dev(disk)); 623 + 624 + blk_mq_freeze_queue_wait(q); 625 + 626 + rq_qos_exit(q); 627 + blk_sync_queue(q); 628 + blk_flush_integrity(); 629 + /* 630 + * Allow using passthrough request again after the queue is torn down. 631 + */ 632 + blk_queue_flag_clear(QUEUE_FLAG_INIT_DONE, q); 633 + __blk_mq_unfreeze_queue(q, true); 634 + 613 635 } 614 636 EXPORT_SYMBOL(del_gendisk); 615 637
+6 -3
drivers/nvme/host/tcp.c
··· 926 926 static int nvme_tcp_try_send_data(struct nvme_tcp_request *req) 927 927 { 928 928 struct nvme_tcp_queue *queue = req->queue; 929 + int req_data_len = req->data_len; 929 930 930 931 while (true) { 931 932 struct page *page = nvme_tcp_req_cur_page(req); 932 933 size_t offset = nvme_tcp_req_cur_offset(req); 933 934 size_t len = nvme_tcp_req_cur_length(req); 934 935 bool last = nvme_tcp_pdu_last_send(req, len); 936 + int req_data_sent = req->data_sent; 935 937 int ret, flags = MSG_DONTWAIT; 936 938 937 939 if (last && !queue->data_digest && !nvme_tcp_queue_more(queue)) ··· 960 958 * in the request where we don't want to modify it as we may 961 959 * compete with the RX path completing the request. 962 960 */ 963 - if (req->data_sent + ret < req->data_len) 961 + if (req_data_sent + ret < req_data_len) 964 962 nvme_tcp_advance_req(req, ret); 965 963 966 964 /* fully successful last send in current PDU */ ··· 1050 1048 static int nvme_tcp_try_send_ddgst(struct nvme_tcp_request *req) 1051 1049 { 1052 1050 struct nvme_tcp_queue *queue = req->queue; 1051 + size_t offset = req->offset; 1053 1052 int ret; 1054 1053 struct msghdr msg = { .msg_flags = MSG_DONTWAIT }; 1055 1054 struct kvec iov = { 1056 - .iov_base = &req->ddgst + req->offset, 1055 + .iov_base = (u8 *)&req->ddgst + req->offset, 1057 1056 .iov_len = NVME_TCP_DIGEST_LENGTH - req->offset 1058 1057 }; 1059 1058 ··· 1067 1064 if (unlikely(ret <= 0)) 1068 1065 return ret; 1069 1066 1070 - if (req->offset + ret == NVME_TCP_DIGEST_LENGTH) { 1067 + if (offset + ret == NVME_TCP_DIGEST_LENGTH) { 1071 1068 nvme_tcp_done_send_req(queue); 1072 1069 return 1; 1073 1070 }
+5 -2
drivers/nvme/target/tcp.c
··· 702 702 struct nvmet_tcp_queue *queue = cmd->queue; 703 703 struct msghdr msg = { .msg_flags = MSG_DONTWAIT }; 704 704 struct kvec iov = { 705 - .iov_base = &cmd->exp_ddgst + cmd->offset, 705 + .iov_base = (u8 *)&cmd->exp_ddgst + cmd->offset, 706 706 .iov_len = NVME_TCP_DIGEST_LENGTH - cmd->offset 707 707 }; 708 708 int ret; ··· 1096 1096 } 1097 1097 1098 1098 if (queue->hdr_digest && 1099 - nvmet_tcp_verify_hdgst(queue, &queue->pdu, queue->offset)) { 1099 + nvmet_tcp_verify_hdgst(queue, &queue->pdu, hdr->hlen)) { 1100 1100 nvmet_tcp_fatal_error(queue); /* fatal */ 1101 1101 return -EPROTO; 1102 1102 } ··· 1428 1428 1429 1429 static void nvmet_tcp_release_queue_work(struct work_struct *w) 1430 1430 { 1431 + struct page *page; 1431 1432 struct nvmet_tcp_queue *queue = 1432 1433 container_of(w, struct nvmet_tcp_queue, release_work); 1433 1434 ··· 1448 1447 nvmet_tcp_free_crypto(queue); 1449 1448 ida_simple_remove(&nvmet_tcp_queue_ida, queue->idx); 1450 1449 1450 + page = virt_to_head_page(queue->pf_cache.va); 1451 + __page_frag_cache_drain(page, queue->pf_cache.pagecnt_bias); 1451 1452 kfree(queue); 1452 1453 } 1453 1454