Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

Merge branch 'for-linus' of git://git.kernel.dk/linux-block

Pull block fixes from Jens Axboe:
"A collection of fixes since the merge window;

- fix for a double elevator module release, from Chao Yu. Ancient bug.

- the splice() MORE flag fix from Christophe Leroy.

- a fix for NVMe, fixing a patch that went in in the merge window.
From Keith.

- two fixes for blk-mq CPU hotplug handling, from Ming Lei.

- bdi vs blockdev lifetime fix from Neil Brown, fixing and oops in md.

- two blk-mq fixes from Shaohua, fixing a race on queue stop and a
bad merge issue with FUA writes.

- division-by-zero fix for writeback from Tejun.

- a block bounce page accounting fix, making sure we inc/dec after
bouncing so that pre/post IO pages match up. From Wang YanQing"

* 'for-linus' of git://git.kernel.dk/linux-block:
splice: sendfile() at once fails for big files
blk-mq: don't lose requests if a stopped queue restarts
blk-mq: fix FUA request hang
block: destroy bdi before blockdev is unregistered.
block:bounce: fix call inc_|dec_zone_page_state on different pages confuse value of NR_BOUNCE
elevator: fix double release of elevator module
writeback: use |1 instead of +1 to protect against div by zero
blk-mq: fix CPU hotplug handling
blk-mq: fix race between timeout and CPU hotplug
NVMe: Fix VPD B0 max sectors translation

+60 -41
+2
block/blk-core.c
··· 552 552 q->queue_lock = &q->__queue_lock; 553 553 spin_unlock_irq(lock); 554 554 555 + bdi_destroy(&q->backing_dev_info); 556 + 555 557 /* @q is and will stay empty, shutdown and put */ 556 558 blk_put_queue(q); 557 559 }
+36 -24
block/blk-mq.c
··· 677 677 data.next = blk_rq_timeout(round_jiffies_up(data.next)); 678 678 mod_timer(&q->timeout, data.next); 679 679 } else { 680 - queue_for_each_hw_ctx(q, hctx, i) 681 - blk_mq_tag_idle(hctx); 680 + queue_for_each_hw_ctx(q, hctx, i) { 681 + /* the hctx may be unmapped, so check it here */ 682 + if (blk_mq_hw_queue_mapped(hctx)) 683 + blk_mq_tag_idle(hctx); 684 + } 682 685 } 683 686 } 684 687 ··· 858 855 spin_lock(&hctx->lock); 859 856 list_splice(&rq_list, &hctx->dispatch); 860 857 spin_unlock(&hctx->lock); 858 + /* 859 + * the queue is expected stopped with BLK_MQ_RQ_QUEUE_BUSY, but 860 + * it's possible the queue is stopped and restarted again 861 + * before this. Queue restart will dispatch requests. And since 862 + * requests in rq_list aren't added into hctx->dispatch yet, 863 + * the requests in rq_list might get lost. 864 + * 865 + * blk_mq_run_hw_queue() already checks the STOPPED bit 866 + **/ 867 + blk_mq_run_hw_queue(hctx, true); 861 868 } 862 869 } 863 870 ··· 1584 1571 return NOTIFY_OK; 1585 1572 } 1586 1573 1587 - static int blk_mq_hctx_cpu_online(struct blk_mq_hw_ctx *hctx, int cpu) 1588 - { 1589 - struct request_queue *q = hctx->queue; 1590 - struct blk_mq_tag_set *set = q->tag_set; 1591 - 1592 - if (set->tags[hctx->queue_num]) 1593 - return NOTIFY_OK; 1594 - 1595 - set->tags[hctx->queue_num] = blk_mq_init_rq_map(set, hctx->queue_num); 1596 - if (!set->tags[hctx->queue_num]) 1597 - return NOTIFY_STOP; 1598 - 1599 - hctx->tags = set->tags[hctx->queue_num]; 1600 - return NOTIFY_OK; 1601 - } 1602 - 1603 1574 static int blk_mq_hctx_notify(void *data, unsigned long action, 1604 1575 unsigned int cpu) 1605 1576 { ··· 1591 1594 1592 1595 if (action == CPU_DEAD || action == CPU_DEAD_FROZEN) 1593 1596 return blk_mq_hctx_cpu_offline(hctx, cpu); 1594 - else if (action == CPU_ONLINE || action == CPU_ONLINE_FROZEN) 1595 - return blk_mq_hctx_cpu_online(hctx, cpu); 1597 + 1598 + /* 1599 + * In case of CPU online, tags may be reallocated 1600 + * in blk_mq_map_swqueue() after mapping is updated. 1601 + */ 1596 1602 1597 1603 return NOTIFY_OK; 1598 1604 } ··· 1775 1775 unsigned int i; 1776 1776 struct blk_mq_hw_ctx *hctx; 1777 1777 struct blk_mq_ctx *ctx; 1778 + struct blk_mq_tag_set *set = q->tag_set; 1778 1779 1779 1780 queue_for_each_hw_ctx(q, hctx, i) { 1780 1781 cpumask_clear(hctx->cpumask); ··· 1804 1803 * disable it and free the request entries. 1805 1804 */ 1806 1805 if (!hctx->nr_ctx) { 1807 - struct blk_mq_tag_set *set = q->tag_set; 1808 - 1809 1806 if (set->tags[i]) { 1810 1807 blk_mq_free_rq_map(set, set->tags[i], i); 1811 1808 set->tags[i] = NULL; 1812 - hctx->tags = NULL; 1813 1809 } 1810 + hctx->tags = NULL; 1814 1811 continue; 1815 1812 } 1813 + 1814 + /* unmapped hw queue can be remapped after CPU topo changed */ 1815 + if (!set->tags[i]) 1816 + set->tags[i] = blk_mq_init_rq_map(set, i); 1817 + hctx->tags = set->tags[i]; 1818 + WARN_ON(!hctx->tags); 1816 1819 1817 1820 /* 1818 1821 * Set the map size to the number of mapped software queues. ··· 2095 2090 */ 2096 2091 list_for_each_entry(q, &all_q_list, all_q_node) 2097 2092 blk_mq_freeze_queue_start(q); 2098 - list_for_each_entry(q, &all_q_list, all_q_node) 2093 + list_for_each_entry(q, &all_q_list, all_q_node) { 2099 2094 blk_mq_freeze_queue_wait(q); 2095 + 2096 + /* 2097 + * timeout handler can't touch hw queue during the 2098 + * reinitialization 2099 + */ 2100 + del_timer_sync(&q->timeout); 2101 + } 2100 2102 2101 2103 list_for_each_entry(q, &all_q_list, all_q_node) 2102 2104 blk_mq_queue_reinit(q);
-2
block/blk-sysfs.c
··· 522 522 523 523 blk_trace_shutdown(q); 524 524 525 - bdi_destroy(&q->backing_dev_info); 526 - 527 525 ida_simple_remove(&blk_queue_ida, q->id); 528 526 call_rcu(&q->rcu_head, blk_free_queue_rcu); 529 527 }
+1 -1
block/bounce.c
··· 221 221 if (page_to_pfn(page) <= queue_bounce_pfn(q) && !force) 222 222 continue; 223 223 224 - inc_zone_page_state(to->bv_page, NR_BOUNCE); 225 224 to->bv_page = mempool_alloc(pool, q->bounce_gfp); 225 + inc_zone_page_state(to->bv_page, NR_BOUNCE); 226 226 227 227 if (rw == WRITE) { 228 228 char *vto, *vfrom;
+1 -5
block/elevator.c
··· 157 157 158 158 eq = kzalloc_node(sizeof(*eq), GFP_KERNEL, q->node); 159 159 if (unlikely(!eq)) 160 - goto err; 160 + return NULL; 161 161 162 162 eq->type = e; 163 163 kobject_init(&eq->kobj, &elv_ktype); ··· 165 165 hash_init(eq->hash); 166 166 167 167 return eq; 168 - err: 169 - kfree(eq); 170 - elevator_put(e); 171 - return NULL; 172 168 } 173 169 EXPORT_SYMBOL(elevator_alloc); 174 170
+1 -1
drivers/block/loop.c
··· 1620 1620 1621 1621 static void loop_remove(struct loop_device *lo) 1622 1622 { 1623 - del_gendisk(lo->lo_disk); 1624 1623 blk_cleanup_queue(lo->lo_queue); 1624 + del_gendisk(lo->lo_disk); 1625 1625 blk_mq_free_tag_set(&lo->tag_set); 1626 1626 put_disk(lo->lo_disk); 1627 1627 kfree(lo);
+2 -1
drivers/block/nvme-scsi.c
··· 944 944 static int nvme_trans_bdev_limits_page(struct nvme_ns *ns, struct sg_io_hdr *hdr, 945 945 u8 *inq_response, int alloc_len) 946 946 { 947 - __be32 max_sectors = cpu_to_be32(queue_max_hw_sectors(ns->queue)); 947 + __be32 max_sectors = cpu_to_be32( 948 + nvme_block_nr(ns, queue_max_hw_sectors(ns->queue))); 948 949 __be32 max_discard = cpu_to_be32(ns->queue->limits.max_discard_sectors); 949 950 __be32 discard_desc_count = cpu_to_be32(0x100); 950 951
+2 -2
drivers/md/md.c
··· 4818 4818 if (mddev->sysfs_state) 4819 4819 sysfs_put(mddev->sysfs_state); 4820 4820 4821 + if (mddev->queue) 4822 + blk_cleanup_queue(mddev->queue); 4821 4823 if (mddev->gendisk) { 4822 4824 del_gendisk(mddev->gendisk); 4823 4825 put_disk(mddev->gendisk); 4824 4826 } 4825 - if (mddev->queue) 4826 - blk_cleanup_queue(mddev->queue); 4827 4827 4828 4828 kfree(mddev); 4829 4829 }
+11 -1
fs/splice.c
··· 1161 1161 long ret, bytes; 1162 1162 umode_t i_mode; 1163 1163 size_t len; 1164 - int i, flags; 1164 + int i, flags, more; 1165 1165 1166 1166 /* 1167 1167 * We require the input being a regular file, as we don't want to ··· 1204 1204 * Don't block on output, we have to drain the direct pipe. 1205 1205 */ 1206 1206 sd->flags &= ~SPLICE_F_NONBLOCK; 1207 + more = sd->flags & SPLICE_F_MORE; 1207 1208 1208 1209 while (len) { 1209 1210 size_t read_len; ··· 1217 1216 read_len = ret; 1218 1217 sd->total_len = read_len; 1219 1218 1219 + /* 1220 + * If more data is pending, set SPLICE_F_MORE 1221 + * If this is the last data and SPLICE_F_MORE was not set 1222 + * initially, clears it. 1223 + */ 1224 + if (read_len < len) 1225 + sd->flags |= SPLICE_F_MORE; 1226 + else if (!more) 1227 + sd->flags &= ~SPLICE_F_MORE; 1220 1228 /* 1221 1229 * NOTE: nonblocking mode only applies to the input. We 1222 1230 * must not do the output in nonblocking mode as then we
+1 -1
include/linux/blk_types.h
··· 220 220 221 221 /* This mask is used for both bio and request merge checking */ 222 222 #define REQ_NOMERGE_FLAGS \ 223 - (REQ_NOMERGE | REQ_STARTED | REQ_SOFTBARRIER | REQ_FLUSH | REQ_FUA) 223 + (REQ_NOMERGE | REQ_STARTED | REQ_SOFTBARRIER | REQ_FLUSH | REQ_FUA | REQ_FLUSH_SEQ) 224 224 225 225 #define REQ_RAHEAD (1ULL << __REQ_RAHEAD) 226 226 #define REQ_THROTTLED (1ULL << __REQ_THROTTLED)
+3 -3
mm/page-writeback.c
··· 580 580 long x; 581 581 582 582 x = div64_s64(((s64)setpoint - (s64)dirty) << RATELIMIT_CALC_SHIFT, 583 - limit - setpoint + 1); 583 + (limit - setpoint) | 1); 584 584 pos_ratio = x; 585 585 pos_ratio = pos_ratio * x >> RATELIMIT_CALC_SHIFT; 586 586 pos_ratio = pos_ratio * x >> RATELIMIT_CALC_SHIFT; ··· 807 807 * scale global setpoint to bdi's: 808 808 * bdi_setpoint = setpoint * bdi_thresh / thresh 809 809 */ 810 - x = div_u64((u64)bdi_thresh << 16, thresh + 1); 810 + x = div_u64((u64)bdi_thresh << 16, thresh | 1); 811 811 bdi_setpoint = setpoint * (u64)x >> 16; 812 812 /* 813 813 * Use span=(8*write_bw) in single bdi case as indicated by ··· 822 822 823 823 if (bdi_dirty < x_intercept - span / 4) { 824 824 pos_ratio = div64_u64(pos_ratio * (x_intercept - bdi_dirty), 825 - x_intercept - bdi_setpoint + 1); 825 + (x_intercept - bdi_setpoint) | 1); 826 826 } else 827 827 pos_ratio /= 4; 828 828