Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

Merge tag 'block-5.19-2022-06-24' of git://git.kernel.dk/linux-block

Pull block fixes from Jens Axboe:

- Series fixing issues with sysfs locking and name reuse (Christoph)

- NVMe pull request via Christoph:
- Fix the mixed up CRIMS/CRWMS constants (Joel Granados)
- Add another broken identifier quirk (Leo Savernik)
- Fix up a quirk because Samsung reuses PCI IDs over different
products (Christoph Hellwig)

- Remove old WARN_ON() that doesn't apply anymore (Li)

- Fix for using a stale cached request value for rq-qos throttling
mechanisms that may schedule(), like iocost (me)

- Remove unused parameter to blk_independent_access_range() (Damien)

* tag 'block-5.19-2022-06-24' of git://git.kernel.dk/linux-block:
block: remove WARN_ON() from bd_link_disk_holder
nvme: move the Samsung X5 quirk entry to the core quirks
nvme: fix the CRIMS and CRWMS definitions to match the spec
nvme: add a bogus subsystem NQN quirk for Micron MTFDKBA2T0TFH
block: pop cached rq before potentially blocking rq_qos_throttle()
block: remove queue from struct blk_independent_access_range
block: freeze the queue earlier in del_gendisk
block: remove per-disk debugfs files in blk_unregister_queue
block: serialize all debugfs operations using q->debugfs_mutex
block: disable the elevator int del_gendisk

+91 -105
-13
block/blk-core.c
··· 322 322 blk_mq_exit_queue(q); 323 323 } 324 324 325 - /* 326 - * In theory, request pool of sched_tags belongs to request queue. 327 - * However, the current implementation requires tag_set for freeing 328 - * requests, so free the pool now. 329 - * 330 - * Queue has become frozen, there can't be any in-queue requests, so 331 - * it is safe to free requests now. 332 - */ 333 - mutex_lock(&q->sysfs_lock); 334 - if (q->elevator) 335 - blk_mq_sched_free_rqs(q); 336 - mutex_unlock(&q->sysfs_lock); 337 - 338 325 /* @q is and will stay empty, shutdown and put */ 339 326 blk_put_queue(q); 340 327 }
-1
block/blk-ia-ranges.c
··· 144 144 } 145 145 146 146 for (i = 0; i < iars->nr_ia_ranges; i++) { 147 - iars->ia_range[i].queue = q; 148 147 ret = kobject_init_and_add(&iars->ia_range[i].kobj, 149 148 &blk_ia_range_ktype, &iars->kobj, 150 149 "%d", i);
+18 -11
block/blk-mq-debugfs.c
··· 711 711 } 712 712 } 713 713 714 - void blk_mq_debugfs_unregister(struct request_queue *q) 715 - { 716 - q->sched_debugfs_dir = NULL; 717 - } 718 - 719 714 static void blk_mq_debugfs_register_ctx(struct blk_mq_hw_ctx *hctx, 720 715 struct blk_mq_ctx *ctx) 721 716 { ··· 741 746 742 747 void blk_mq_debugfs_unregister_hctx(struct blk_mq_hw_ctx *hctx) 743 748 { 749 + if (!hctx->queue->debugfs_dir) 750 + return; 744 751 debugfs_remove_recursive(hctx->debugfs_dir); 745 752 hctx->sched_debugfs_dir = NULL; 746 753 hctx->debugfs_dir = NULL; ··· 770 773 { 771 774 struct elevator_type *e = q->elevator->type; 772 775 776 + lockdep_assert_held(&q->debugfs_mutex); 777 + 773 778 /* 774 779 * If the parent directory has not been created yet, return, we will be 775 780 * called again later on and the directory/files will be created then. ··· 789 790 790 791 void blk_mq_debugfs_unregister_sched(struct request_queue *q) 791 792 { 793 + lockdep_assert_held(&q->debugfs_mutex); 794 + 792 795 debugfs_remove_recursive(q->sched_debugfs_dir); 793 796 q->sched_debugfs_dir = NULL; 794 797 } ··· 812 811 813 812 void blk_mq_debugfs_unregister_rqos(struct rq_qos *rqos) 814 813 { 814 + lockdep_assert_held(&rqos->q->debugfs_mutex); 815 + 816 + if (!rqos->q->debugfs_dir) 817 + return; 815 818 debugfs_remove_recursive(rqos->debugfs_dir); 816 819 rqos->debugfs_dir = NULL; 817 820 } ··· 824 819 { 825 820 struct request_queue *q = rqos->q; 826 821 const char *dir_name = rq_qos_id_to_name(rqos->id); 822 + 823 + lockdep_assert_held(&q->debugfs_mutex); 827 824 828 825 if (rqos->debugfs_dir || !rqos->ops->debugfs_attrs) 829 826 return; ··· 840 833 debugfs_create_files(rqos->debugfs_dir, rqos, rqos->ops->debugfs_attrs); 841 834 } 842 835 843 - void blk_mq_debugfs_unregister_queue_rqos(struct request_queue *q) 844 - { 845 - debugfs_remove_recursive(q->rqos_debugfs_dir); 846 - q->rqos_debugfs_dir = NULL; 847 - } 848 - 849 836 void blk_mq_debugfs_register_sched_hctx(struct request_queue *q, 850 837 struct blk_mq_hw_ctx *hctx) 851 838 { 852 839 struct elevator_type *e = q->elevator->type; 840 + 841 + lockdep_assert_held(&q->debugfs_mutex); 853 842 854 843 /* 855 844 * If the parent debugfs directory has not been created yet, return; ··· 866 863 867 864 void blk_mq_debugfs_unregister_sched_hctx(struct blk_mq_hw_ctx *hctx) 868 865 { 866 + lockdep_assert_held(&hctx->queue->debugfs_mutex); 867 + 868 + if (!hctx->queue->debugfs_dir) 869 + return; 869 870 debugfs_remove_recursive(hctx->sched_debugfs_dir); 870 871 hctx->sched_debugfs_dir = NULL; 871 872 }
-10
block/blk-mq-debugfs.h
··· 21 21 int blk_mq_debugfs_rq_show(struct seq_file *m, void *v); 22 22 23 23 void blk_mq_debugfs_register(struct request_queue *q); 24 - void blk_mq_debugfs_unregister(struct request_queue *q); 25 24 void blk_mq_debugfs_register_hctx(struct request_queue *q, 26 25 struct blk_mq_hw_ctx *hctx); 27 26 void blk_mq_debugfs_unregister_hctx(struct blk_mq_hw_ctx *hctx); ··· 35 36 36 37 void blk_mq_debugfs_register_rqos(struct rq_qos *rqos); 37 38 void blk_mq_debugfs_unregister_rqos(struct rq_qos *rqos); 38 - void blk_mq_debugfs_unregister_queue_rqos(struct request_queue *q); 39 39 #else 40 40 static inline void blk_mq_debugfs_register(struct request_queue *q) 41 - { 42 - } 43 - 44 - static inline void blk_mq_debugfs_unregister(struct request_queue *q) 45 41 { 46 42 } 47 43 ··· 79 85 } 80 86 81 87 static inline void blk_mq_debugfs_unregister_rqos(struct rq_qos *rqos) 82 - { 83 - } 84 - 85 - static inline void blk_mq_debugfs_unregister_queue_rqos(struct request_queue *q) 86 88 { 87 89 } 88 90 #endif
+11
block/blk-mq-sched.c
··· 594 594 if (ret) 595 595 goto err_free_map_and_rqs; 596 596 597 + mutex_lock(&q->debugfs_mutex); 597 598 blk_mq_debugfs_register_sched(q); 599 + mutex_unlock(&q->debugfs_mutex); 598 600 599 601 queue_for_each_hw_ctx(q, hctx, i) { 600 602 if (e->ops.init_hctx) { ··· 609 607 return ret; 610 608 } 611 609 } 610 + mutex_lock(&q->debugfs_mutex); 612 611 blk_mq_debugfs_register_sched_hctx(q, hctx); 612 + mutex_unlock(&q->debugfs_mutex); 613 613 } 614 614 615 615 return 0; ··· 652 648 unsigned int flags = 0; 653 649 654 650 queue_for_each_hw_ctx(q, hctx, i) { 651 + mutex_lock(&q->debugfs_mutex); 655 652 blk_mq_debugfs_unregister_sched_hctx(hctx); 653 + mutex_unlock(&q->debugfs_mutex); 654 + 656 655 if (e->type->ops.exit_hctx && hctx->sched_data) { 657 656 e->type->ops.exit_hctx(hctx, i); 658 657 hctx->sched_data = NULL; 659 658 } 660 659 flags = hctx->flags; 661 660 } 661 + 662 + mutex_lock(&q->debugfs_mutex); 662 663 blk_mq_debugfs_unregister_sched(q); 664 + mutex_unlock(&q->debugfs_mutex); 665 + 663 666 if (e->type->ops.exit_sched) 664 667 e->type->ops.exit_sched(e); 665 668 blk_mq_sched_tags_teardown(q, flags);
+8 -3
block/blk-mq.c
··· 2765 2765 return NULL; 2766 2766 } 2767 2767 2768 - rq_qos_throttle(q, *bio); 2769 - 2770 2768 if (blk_mq_get_hctx_type((*bio)->bi_opf) != rq->mq_hctx->type) 2771 2769 return NULL; 2772 2770 if (op_is_flush(rq->cmd_flags) != op_is_flush((*bio)->bi_opf)) 2773 2771 return NULL; 2774 2772 2775 - rq->cmd_flags = (*bio)->bi_opf; 2773 + /* 2774 + * If any qos ->throttle() end up blocking, we will have flushed the 2775 + * plug and hence killed the cached_rq list as well. Pop this entry 2776 + * before we throttle. 2777 + */ 2776 2778 plug->cached_rq = rq_list_next(rq); 2779 + rq_qos_throttle(q, *bio); 2780 + 2781 + rq->cmd_flags = (*bio)->bi_opf; 2777 2782 INIT_LIST_HEAD(&rq->queuelist); 2778 2783 return rq; 2779 2784 }
-2
block/blk-rq-qos.c
··· 294 294 295 295 void rq_qos_exit(struct request_queue *q) 296 296 { 297 - blk_mq_debugfs_unregister_queue_rqos(q); 298 - 299 297 while (q->rq_qos) { 300 298 struct rq_qos *rqos = q->rq_qos; 301 299 q->rq_qos = rqos->next;
+6 -1
block/blk-rq-qos.h
··· 104 104 105 105 blk_mq_unfreeze_queue(q); 106 106 107 - if (rqos->ops->debugfs_attrs) 107 + if (rqos->ops->debugfs_attrs) { 108 + mutex_lock(&q->debugfs_mutex); 108 109 blk_mq_debugfs_register_rqos(rqos); 110 + mutex_unlock(&q->debugfs_mutex); 111 + } 109 112 } 110 113 111 114 static inline void rq_qos_del(struct request_queue *q, struct rq_qos *rqos) ··· 132 129 133 130 blk_mq_unfreeze_queue(q); 134 131 132 + mutex_lock(&q->debugfs_mutex); 135 133 blk_mq_debugfs_unregister_rqos(rqos); 134 + mutex_unlock(&q->debugfs_mutex); 136 135 } 137 136 138 137 typedef bool (acquire_inflight_cb_t)(struct rq_wait *rqw, void *private_data);
+14 -16
block/blk-sysfs.c
··· 779 779 if (queue_is_mq(q)) 780 780 blk_mq_release(q); 781 781 782 - blk_trace_shutdown(q); 783 - mutex_lock(&q->debugfs_mutex); 784 - debugfs_remove_recursive(q->debugfs_dir); 785 - mutex_unlock(&q->debugfs_mutex); 786 - 787 - if (queue_is_mq(q)) 788 - blk_mq_debugfs_unregister(q); 789 - 790 782 bioset_exit(&q->bio_split); 791 783 792 784 if (blk_queue_has_srcu(q)) ··· 828 836 goto unlock; 829 837 } 830 838 839 + if (queue_is_mq(q)) 840 + __blk_mq_register_dev(dev, q); 841 + mutex_lock(&q->sysfs_lock); 842 + 831 843 mutex_lock(&q->debugfs_mutex); 832 844 q->debugfs_dir = debugfs_create_dir(kobject_name(q->kobj.parent), 833 845 blk_debugfs_root); 834 - mutex_unlock(&q->debugfs_mutex); 835 - 836 - if (queue_is_mq(q)) { 837 - __blk_mq_register_dev(dev, q); 846 + if (queue_is_mq(q)) 838 847 blk_mq_debugfs_register(q); 839 - } 840 - 841 - mutex_lock(&q->sysfs_lock); 848 + mutex_unlock(&q->debugfs_mutex); 842 849 843 850 ret = disk_register_independent_access_ranges(disk, NULL); 844 851 if (ret) ··· 939 948 /* Now that we've deleted all child objects, we can delete the queue. */ 940 949 kobject_uevent(&q->kobj, KOBJ_REMOVE); 941 950 kobject_del(&q->kobj); 942 - 943 951 mutex_unlock(&q->sysfs_dir_lock); 952 + 953 + mutex_lock(&q->debugfs_mutex); 954 + blk_trace_shutdown(q); 955 + debugfs_remove_recursive(q->debugfs_dir); 956 + q->debugfs_dir = NULL; 957 + q->sched_debugfs_dir = NULL; 958 + q->rqos_debugfs_dir = NULL; 959 + mutex_unlock(&q->debugfs_mutex); 944 960 945 961 kobject_put(&disk_to_dev(disk)->kobj); 946 962 }
+12 -30
block/genhd.c
··· 623 623 * Prevent new I/O from crossing bio_queue_enter(). 624 624 */ 625 625 blk_queue_start_drain(q); 626 + blk_mq_freeze_queue_wait(q); 626 627 627 628 if (!(disk->flags & GENHD_FL_HIDDEN)) { 628 629 sysfs_remove_link(&disk_to_dev(disk)->kobj, "bdi"); ··· 647 646 pm_runtime_set_memalloc_noio(disk_to_dev(disk), false); 648 647 device_del(disk_to_dev(disk)); 649 648 650 - blk_mq_freeze_queue_wait(q); 651 - 652 649 blk_throtl_cancel_bios(disk->queue); 653 650 654 651 blk_sync_queue(q); 655 652 blk_flush_integrity(); 653 + blk_mq_cancel_work_sync(q); 654 + 655 + blk_mq_quiesce_queue(q); 656 + if (q->elevator) { 657 + mutex_lock(&q->sysfs_lock); 658 + elevator_exit(q); 659 + mutex_unlock(&q->sysfs_lock); 660 + } 661 + rq_qos_exit(q); 662 + blk_mq_unquiesce_queue(q); 663 + 656 664 /* 657 665 * Allow using passthrough request again after the queue is torn down. 658 666 */ ··· 1130 1120 NULL 1131 1121 }; 1132 1122 1133 - static void disk_release_mq(struct request_queue *q) 1134 - { 1135 - blk_mq_cancel_work_sync(q); 1136 - 1137 - /* 1138 - * There can't be any non non-passthrough bios in flight here, but 1139 - * requests stay around longer, including passthrough ones so we 1140 - * still need to freeze the queue here. 1141 - */ 1142 - blk_mq_freeze_queue(q); 1143 - 1144 - /* 1145 - * Since the I/O scheduler exit code may access cgroup information, 1146 - * perform I/O scheduler exit before disassociating from the block 1147 - * cgroup controller. 1148 - */ 1149 - if (q->elevator) { 1150 - mutex_lock(&q->sysfs_lock); 1151 - elevator_exit(q); 1152 - mutex_unlock(&q->sysfs_lock); 1153 - } 1154 - rq_qos_exit(q); 1155 - __blk_mq_unfreeze_queue(q, true); 1156 - } 1157 - 1158 1123 /** 1159 1124 * disk_release - releases all allocated resources of the gendisk 1160 1125 * @dev: the device representing this disk ··· 1150 1165 1151 1166 might_sleep(); 1152 1167 WARN_ON_ONCE(disk_live(disk)); 1153 - 1154 - if (queue_is_mq(disk->queue)) 1155 - disk_release_mq(disk->queue); 1156 1168 1157 1169 blkcg_exit_queue(disk->queue); 1158 1170
-4
block/holder.c
··· 79 79 80 80 WARN_ON_ONCE(!bdev->bd_holder); 81 81 82 - /* FIXME: remove the following once add_disk() handles errors */ 83 - if (WARN_ON(!bdev->bd_holder_dir)) 84 - goto out_unlock; 85 - 86 82 holder = bd_find_holder_disk(bdev, disk); 87 83 if (holder) { 88 84 holder->refcnt++;
+14
drivers/nvme/host/core.c
··· 2546 2546 .vid = 0x1e0f, 2547 2547 .mn = "KCD6XVUL6T40", 2548 2548 .quirks = NVME_QUIRK_NO_APST, 2549 + }, 2550 + { 2551 + /* 2552 + * The external Samsung X5 SSD fails initialization without a 2553 + * delay before checking if it is ready and has a whole set of 2554 + * other problems. To make this even more interesting, it 2555 + * shares the PCI ID with internal Samsung 970 Evo Plus that 2556 + * does not need or want these quirks. 2557 + */ 2558 + .vid = 0x144d, 2559 + .mn = "Samsung Portable SSD X5", 2560 + .quirks = NVME_QUIRK_DELAY_BEFORE_CHK_RDY | 2561 + NVME_QUIRK_NO_DEEPEST_PS | 2562 + NVME_QUIRK_IGNORE_DEV_SUBNQN, 2549 2563 } 2550 2564 }; 2551 2565
+2 -4
drivers/nvme/host/pci.c
··· 3474 3474 { PCI_DEVICE(0x1cc1, 0x8201), /* ADATA SX8200PNP 512GB */ 3475 3475 .driver_data = NVME_QUIRK_NO_DEEPEST_PS | 3476 3476 NVME_QUIRK_IGNORE_DEV_SUBNQN, }, 3477 + { PCI_DEVICE(0x1344, 0x5407), /* Micron Technology Inc NVMe SSD */ 3478 + .driver_data = NVME_QUIRK_IGNORE_DEV_SUBNQN }, 3477 3479 { PCI_DEVICE(0x1c5c, 0x1504), /* SK Hynix PC400 */ 3478 3480 .driver_data = NVME_QUIRK_DISABLE_WRITE_ZEROES, }, 3479 3481 { PCI_DEVICE(0x1c5c, 0x174a), /* SK Hynix P31 SSD */ ··· 3526 3524 NVME_QUIRK_128_BYTES_SQES | 3527 3525 NVME_QUIRK_SHARED_TAGS | 3528 3526 NVME_QUIRK_SKIP_CID_GEN }, 3529 - { PCI_DEVICE(0x144d, 0xa808), /* Samsung X5 */ 3530 - .driver_data = NVME_QUIRK_DELAY_BEFORE_CHK_RDY| 3531 - NVME_QUIRK_NO_DEEPEST_PS | 3532 - NVME_QUIRK_IGNORE_DEV_SUBNQN, }, 3533 3527 { PCI_DEVICE_CLASS(PCI_CLASS_STORAGE_EXPRESS, 0xffffff) }, 3534 3528 { 0, } 3535 3529 };
+4 -5
include/linux/blkdev.h
··· 342 342 */ 343 343 struct blk_independent_access_range { 344 344 struct kobject kobj; 345 - struct request_queue *queue; 346 345 sector_t sector; 347 346 sector_t nr_sectors; 348 347 }; ··· 481 482 #endif /* CONFIG_BLK_DEV_ZONED */ 482 483 483 484 int node; 484 - struct mutex debugfs_mutex; 485 485 #ifdef CONFIG_BLK_DEV_IO_TRACE 486 486 struct blk_trace __rcu *blk_trace; 487 487 #endif ··· 524 526 struct bio_set bio_split; 525 527 526 528 struct dentry *debugfs_dir; 527 - 528 - #ifdef CONFIG_BLK_DEBUG_FS 529 529 struct dentry *sched_debugfs_dir; 530 530 struct dentry *rqos_debugfs_dir; 531 - #endif 531 + /* 532 + * Serializes all debugfs metadata operations using the above dentries. 533 + */ 534 + struct mutex debugfs_mutex; 532 535 533 536 bool mq_sysfs_init_done; 534 537
+2 -2
include/linux/nvme.h
··· 233 233 }; 234 234 235 235 enum { 236 - NVME_CAP_CRMS_CRIMS = 1ULL << 59, 237 - NVME_CAP_CRMS_CRWMS = 1ULL << 60, 236 + NVME_CAP_CRMS_CRWMS = 1ULL << 59, 237 + NVME_CAP_CRMS_CRIMS = 1ULL << 60, 238 238 }; 239 239 240 240 struct nvme_id_power_state {
-3
kernel/trace/blktrace.c
··· 770 770 **/ 771 771 void blk_trace_shutdown(struct request_queue *q) 772 772 { 773 - mutex_lock(&q->debugfs_mutex); 774 773 if (rcu_dereference_protected(q->blk_trace, 775 774 lockdep_is_held(&q->debugfs_mutex))) { 776 775 __blk_trace_startstop(q, 0); 777 776 __blk_trace_remove(q); 778 777 } 779 - 780 - mutex_unlock(&q->debugfs_mutex); 781 778 } 782 779 783 780 #ifdef CONFIG_BLK_CGROUP