Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

Merge tag 'block-5.7-2020-05-09' of git://git.kernel.dk/linux-block

Pull block fixes from Jens Axboe:

- a small series fixing a use-after-free of bdi name (Christoph,Yufen)

- NVMe fix for a regression with the smaller CQ update (Alexey)

- NVMe fix for a hang at namespace scanning error recovery (Sagi)

- fix race with blk-iocost iocg->abs_vdebt updates (Tejun)

* tag 'block-5.7-2020-05-09' of git://git.kernel.dk/linux-block:
nvme: fix possible hang when ns scanning fails during error recovery
nvme-pci: fix "slimmer CQ head update"
bdi: add a ->dev_name field to struct backing_dev_info
bdi: use bdi_dev_name() to get device name
bdi: move bdi_dev_name out of line
vboxsf: don't use the source name in the bdi name
iocost: protect iocg->abs_vdebt with iocg->waitq.lock

+107 -68
+4 -2
block/bfq-iosched.c
··· 123 123 #include <linux/ioprio.h> 124 124 #include <linux/sbitmap.h> 125 125 #include <linux/delay.h> 126 + #include <linux/backing-dev.h> 126 127 127 128 #include "blk.h" 128 129 #include "blk-mq.h" ··· 4977 4976 ioprio_class = IOPRIO_PRIO_CLASS(bic->ioprio); 4978 4977 switch (ioprio_class) { 4979 4978 default: 4980 - dev_err(bfqq->bfqd->queue->backing_dev_info->dev, 4981 - "bfq: bad prio class %d\n", ioprio_class); 4979 + pr_err("bdi %s: bfq: bad prio class %d\n", 4980 + bdi_dev_name(bfqq->bfqd->queue->backing_dev_info), 4981 + ioprio_class); 4982 4982 /* fall through */ 4983 4983 case IOPRIO_CLASS_NONE: 4984 4984 /*
+1 -1
block/blk-cgroup.c
··· 496 496 { 497 497 /* some drivers (floppy) instantiate a queue w/o disk registered */ 498 498 if (blkg->q->backing_dev_info->dev) 499 - return dev_name(blkg->q->backing_dev_info->dev); 499 + return bdi_dev_name(blkg->q->backing_dev_info); 500 500 return NULL; 501 501 } 502 502
+71 -46
block/blk-iocost.c
··· 466 466 */ 467 467 atomic64_t vtime; 468 468 atomic64_t done_vtime; 469 - atomic64_t abs_vdebt; 469 + u64 abs_vdebt; 470 470 u64 last_vtime; 471 471 472 472 /* ··· 1142 1142 struct iocg_wake_ctx ctx = { .iocg = iocg }; 1143 1143 u64 margin_ns = (u64)(ioc->period_us * 1144 1144 WAITQ_TIMER_MARGIN_PCT / 100) * NSEC_PER_USEC; 1145 - u64 abs_vdebt, vdebt, vshortage, expires, oexpires; 1145 + u64 vdebt, vshortage, expires, oexpires; 1146 1146 s64 vbudget; 1147 1147 u32 hw_inuse; 1148 1148 ··· 1152 1152 vbudget = now->vnow - atomic64_read(&iocg->vtime); 1153 1153 1154 1154 /* pay off debt */ 1155 - abs_vdebt = atomic64_read(&iocg->abs_vdebt); 1156 - vdebt = abs_cost_to_cost(abs_vdebt, hw_inuse); 1155 + vdebt = abs_cost_to_cost(iocg->abs_vdebt, hw_inuse); 1157 1156 if (vdebt && vbudget > 0) { 1158 1157 u64 delta = min_t(u64, vbudget, vdebt); 1159 1158 u64 abs_delta = min(cost_to_abs_cost(delta, hw_inuse), 1160 - abs_vdebt); 1159 + iocg->abs_vdebt); 1161 1160 1162 1161 atomic64_add(delta, &iocg->vtime); 1163 1162 atomic64_add(delta, &iocg->done_vtime); 1164 - atomic64_sub(abs_delta, &iocg->abs_vdebt); 1165 - if (WARN_ON_ONCE(atomic64_read(&iocg->abs_vdebt) < 0)) 1166 - atomic64_set(&iocg->abs_vdebt, 0); 1163 + iocg->abs_vdebt -= abs_delta; 1167 1164 } 1168 1165 1169 1166 /* ··· 1216 1219 u64 expires, oexpires; 1217 1220 u32 hw_inuse; 1218 1221 1222 + lockdep_assert_held(&iocg->waitq.lock); 1223 + 1219 1224 /* debt-adjust vtime */ 1220 1225 current_hweight(iocg, NULL, &hw_inuse); 1221 - vtime += abs_cost_to_cost(atomic64_read(&iocg->abs_vdebt), hw_inuse); 1226 + vtime += abs_cost_to_cost(iocg->abs_vdebt, hw_inuse); 1222 1227 1223 - /* clear or maintain depending on the overage */ 1224 - if (time_before_eq64(vtime, now->vnow)) { 1228 + /* 1229 + * Clear or maintain depending on the overage. Non-zero vdebt is what 1230 + * guarantees that @iocg is online and future iocg_kick_delay() will 1231 + * clear use_delay. Don't leave it on when there's no vdebt. 1232 + */ 1233 + if (!iocg->abs_vdebt || time_before_eq64(vtime, now->vnow)) { 1225 1234 blkcg_clear_delay(blkg); 1226 1235 return false; 1227 1236 } ··· 1261 1258 { 1262 1259 struct ioc_gq *iocg = container_of(timer, struct ioc_gq, delay_timer); 1263 1260 struct ioc_now now; 1261 + unsigned long flags; 1264 1262 1263 + spin_lock_irqsave(&iocg->waitq.lock, flags); 1265 1264 ioc_now(iocg->ioc, &now); 1266 1265 iocg_kick_delay(iocg, &now, 0); 1266 + spin_unlock_irqrestore(&iocg->waitq.lock, flags); 1267 1267 1268 1268 return HRTIMER_NORESTART; 1269 1269 } ··· 1374 1368 * should have woken up in the last period and expire idle iocgs. 1375 1369 */ 1376 1370 list_for_each_entry_safe(iocg, tiocg, &ioc->active_iocgs, active_list) { 1377 - if (!waitqueue_active(&iocg->waitq) && 1378 - !atomic64_read(&iocg->abs_vdebt) && !iocg_is_idle(iocg)) 1371 + if (!waitqueue_active(&iocg->waitq) && iocg->abs_vdebt && 1372 + !iocg_is_idle(iocg)) 1379 1373 continue; 1380 1374 1381 1375 spin_lock(&iocg->waitq.lock); 1382 1376 1383 - if (waitqueue_active(&iocg->waitq) || 1384 - atomic64_read(&iocg->abs_vdebt)) { 1377 + if (waitqueue_active(&iocg->waitq) || iocg->abs_vdebt) { 1385 1378 /* might be oversleeping vtime / hweight changes, kick */ 1386 1379 iocg_kick_waitq(iocg, &now); 1387 1380 iocg_kick_delay(iocg, &now, 0); ··· 1723 1718 * tests are racy but the races aren't systemic - we only miss once 1724 1719 * in a while which is fine. 1725 1720 */ 1726 - if (!waitqueue_active(&iocg->waitq) && 1727 - !atomic64_read(&iocg->abs_vdebt) && 1721 + if (!waitqueue_active(&iocg->waitq) && !iocg->abs_vdebt && 1728 1722 time_before_eq64(vtime + cost, now.vnow)) { 1729 1723 iocg_commit_bio(iocg, bio, cost); 1730 1724 return; 1731 1725 } 1732 1726 1733 1727 /* 1734 - * We're over budget. If @bio has to be issued regardless, 1735 - * remember the abs_cost instead of advancing vtime. 1736 - * iocg_kick_waitq() will pay off the debt before waking more IOs. 1728 + * We activated above but w/o any synchronization. Deactivation is 1729 + * synchronized with waitq.lock and we won't get deactivated as long 1730 + * as we're waiting or has debt, so we're good if we're activated 1731 + * here. In the unlikely case that we aren't, just issue the IO. 1732 + */ 1733 + spin_lock_irq(&iocg->waitq.lock); 1734 + 1735 + if (unlikely(list_empty(&iocg->active_list))) { 1736 + spin_unlock_irq(&iocg->waitq.lock); 1737 + iocg_commit_bio(iocg, bio, cost); 1738 + return; 1739 + } 1740 + 1741 + /* 1742 + * We're over budget. If @bio has to be issued regardless, remember 1743 + * the abs_cost instead of advancing vtime. iocg_kick_waitq() will pay 1744 + * off the debt before waking more IOs. 1745 + * 1737 1746 * This way, the debt is continuously paid off each period with the 1738 - * actual budget available to the cgroup. If we just wound vtime, 1739 - * we would incorrectly use the current hw_inuse for the entire 1740 - * amount which, for example, can lead to the cgroup staying 1741 - * blocked for a long time even with substantially raised hw_inuse. 1747 + * actual budget available to the cgroup. If we just wound vtime, we 1748 + * would incorrectly use the current hw_inuse for the entire amount 1749 + * which, for example, can lead to the cgroup staying blocked for a 1750 + * long time even with substantially raised hw_inuse. 1751 + * 1752 + * An iocg with vdebt should stay online so that the timer can keep 1753 + * deducting its vdebt and [de]activate use_delay mechanism 1754 + * accordingly. We don't want to race against the timer trying to 1755 + * clear them and leave @iocg inactive w/ dangling use_delay heavily 1756 + * penalizing the cgroup and its descendants. 1742 1757 */ 1743 1758 if (bio_issue_as_root_blkg(bio) || fatal_signal_pending(current)) { 1744 - atomic64_add(abs_cost, &iocg->abs_vdebt); 1759 + iocg->abs_vdebt += abs_cost; 1745 1760 if (iocg_kick_delay(iocg, &now, cost)) 1746 1761 blkcg_schedule_throttle(rqos->q, 1747 1762 (bio->bi_opf & REQ_SWAP) == REQ_SWAP); 1763 + spin_unlock_irq(&iocg->waitq.lock); 1748 1764 return; 1749 1765 } 1750 1766 ··· 1782 1756 * All waiters are on iocg->waitq and the wait states are 1783 1757 * synchronized using waitq.lock. 1784 1758 */ 1785 - spin_lock_irq(&iocg->waitq.lock); 1786 - 1787 - /* 1788 - * We activated above but w/o any synchronization. Deactivation is 1789 - * synchronized with waitq.lock and we won't get deactivated as 1790 - * long as we're waiting, so we're good if we're activated here. 1791 - * In the unlikely case that we are deactivated, just issue the IO. 1792 - */ 1793 - if (unlikely(list_empty(&iocg->active_list))) { 1794 - spin_unlock_irq(&iocg->waitq.lock); 1795 - iocg_commit_bio(iocg, bio, cost); 1796 - return; 1797 - } 1798 - 1799 1759 init_waitqueue_func_entry(&wait.wait, iocg_wake_fn); 1800 1760 wait.wait.private = current; 1801 1761 wait.bio = bio; ··· 1813 1801 struct ioc_now now; 1814 1802 u32 hw_inuse; 1815 1803 u64 abs_cost, cost; 1804 + unsigned long flags; 1816 1805 1817 1806 /* bypass if disabled or for root cgroup */ 1818 1807 if (!ioc->enabled || !iocg->level) ··· 1833 1820 iocg->cursor = bio_end; 1834 1821 1835 1822 /* 1836 - * Charge if there's enough vtime budget and the existing request 1837 - * has cost assigned. Otherwise, account it as debt. See debt 1838 - * handling in ioc_rqos_throttle() for details. 1823 + * Charge if there's enough vtime budget and the existing request has 1824 + * cost assigned. 1839 1825 */ 1840 1826 if (rq->bio && rq->bio->bi_iocost_cost && 1841 - time_before_eq64(atomic64_read(&iocg->vtime) + cost, now.vnow)) 1827 + time_before_eq64(atomic64_read(&iocg->vtime) + cost, now.vnow)) { 1842 1828 iocg_commit_bio(iocg, bio, cost); 1843 - else 1844 - atomic64_add(abs_cost, &iocg->abs_vdebt); 1829 + return; 1830 + } 1831 + 1832 + /* 1833 + * Otherwise, account it as debt if @iocg is online, which it should 1834 + * be for the vast majority of cases. See debt handling in 1835 + * ioc_rqos_throttle() for details. 1836 + */ 1837 + spin_lock_irqsave(&iocg->waitq.lock, flags); 1838 + if (likely(!list_empty(&iocg->active_list))) { 1839 + iocg->abs_vdebt += abs_cost; 1840 + iocg_kick_delay(iocg, &now, cost); 1841 + } else { 1842 + iocg_commit_bio(iocg, bio, cost); 1843 + } 1844 + spin_unlock_irqrestore(&iocg->waitq.lock, flags); 1845 1845 } 1846 1846 1847 1847 static void ioc_rqos_done_bio(struct rq_qos *rqos, struct bio *bio) ··· 2024 1998 iocg->ioc = ioc; 2025 1999 atomic64_set(&iocg->vtime, now.vnow); 2026 2000 atomic64_set(&iocg->done_vtime, now.vnow); 2027 - atomic64_set(&iocg->abs_vdebt, 0); 2028 2001 atomic64_set(&iocg->active_period, atomic64_read(&ioc->cur_period)); 2029 2002 INIT_LIST_HEAD(&iocg->active_list); 2030 2003 iocg->hweight_active = HWEIGHT_WHOLE;
+1 -1
drivers/nvme/host/core.c
··· 1110 1110 * Don't treat an error as fatal, as we potentially already 1111 1111 * have a NGUID or EUI-64. 1112 1112 */ 1113 - if (status > 0) 1113 + if (status > 0 && !(status & NVME_SC_DNR)) 1114 1114 status = 0; 1115 1115 goto free_data; 1116 1116 }
+5 -1
drivers/nvme/host/pci.c
··· 973 973 974 974 static inline void nvme_update_cq_head(struct nvme_queue *nvmeq) 975 975 { 976 - if (++nvmeq->cq_head == nvmeq->q_depth) { 976 + u16 tmp = nvmeq->cq_head + 1; 977 + 978 + if (tmp == nvmeq->q_depth) { 977 979 nvmeq->cq_head = 0; 978 980 nvmeq->cq_phase ^= 1; 981 + } else { 982 + nvmeq->cq_head = tmp; 979 983 } 980 984 } 981 985
+1 -1
fs/ceph/debugfs.c
··· 271 271 &congestion_kb_fops); 272 272 273 273 snprintf(name, sizeof(name), "../../bdi/%s", 274 - dev_name(fsc->sb->s_bdi->dev)); 274 + bdi_dev_name(fsc->sb->s_bdi)); 275 275 fsc->debugfs_bdi = 276 276 debugfs_create_symlink("bdi", 277 277 fsc->client->debugfs_dir,
+1 -1
fs/vboxsf/super.c
··· 164 164 goto fail_free; 165 165 } 166 166 167 - err = super_setup_bdi_name(sb, "vboxsf-%s.%d", fc->source, sbi->bdi_id); 167 + err = super_setup_bdi_name(sb, "vboxsf-%d", sbi->bdi_id); 168 168 if (err) 169 169 goto fail_free; 170 170
+1
include/linux/backing-dev-defs.h
··· 219 219 wait_queue_head_t wb_waitq; 220 220 221 221 struct device *dev; 222 + char dev_name[64]; 222 223 struct device *owner; 223 224 224 225 struct timer_list laptop_mode_wb_timer;
+1 -8
include/linux/backing-dev.h
··· 505 505 (1 << WB_async_congested)); 506 506 } 507 507 508 - extern const char *bdi_unknown_name; 509 - 510 - static inline const char *bdi_dev_name(struct backing_dev_info *bdi) 511 - { 512 - if (!bdi || !bdi->dev) 513 - return bdi_unknown_name; 514 - return dev_name(bdi->dev); 515 - } 508 + const char *bdi_dev_name(struct backing_dev_info *bdi); 516 509 517 510 #endif /* _LINUX_BACKING_DEV_H */
+4 -4
include/trace/events/wbt.h
··· 33 33 ), 34 34 35 35 TP_fast_assign( 36 - strlcpy(__entry->name, dev_name(bdi->dev), 36 + strlcpy(__entry->name, bdi_dev_name(bdi), 37 37 ARRAY_SIZE(__entry->name)); 38 38 __entry->rmean = stat[0].mean; 39 39 __entry->rmin = stat[0].min; ··· 68 68 ), 69 69 70 70 TP_fast_assign( 71 - strlcpy(__entry->name, dev_name(bdi->dev), 71 + strlcpy(__entry->name, bdi_dev_name(bdi), 72 72 ARRAY_SIZE(__entry->name)); 73 73 __entry->lat = div_u64(lat, 1000); 74 74 ), ··· 105 105 ), 106 106 107 107 TP_fast_assign( 108 - strlcpy(__entry->name, dev_name(bdi->dev), 108 + strlcpy(__entry->name, bdi_dev_name(bdi), 109 109 ARRAY_SIZE(__entry->name)); 110 110 __entry->msg = msg; 111 111 __entry->step = step; ··· 141 141 ), 142 142 143 143 TP_fast_assign( 144 - strlcpy(__entry->name, dev_name(bdi->dev), 144 + strlcpy(__entry->name, bdi_dev_name(bdi), 145 145 ARRAY_SIZE(__entry->name)); 146 146 __entry->status = status; 147 147 __entry->step = step;
+11 -2
mm/backing-dev.c
··· 21 21 EXPORT_SYMBOL_GPL(noop_backing_dev_info); 22 22 23 23 static struct class *bdi_class; 24 - const char *bdi_unknown_name = "(unknown)"; 24 + static const char *bdi_unknown_name = "(unknown)"; 25 25 26 26 /* 27 27 * bdi_lock protects bdi_tree and updates to bdi_list. bdi_list has RCU ··· 938 938 if (bdi->dev) /* The driver needs to use separate queues per device */ 939 939 return 0; 940 940 941 - dev = device_create_vargs(bdi_class, NULL, MKDEV(0, 0), bdi, fmt, args); 941 + vsnprintf(bdi->dev_name, sizeof(bdi->dev_name), fmt, args); 942 + dev = device_create(bdi_class, NULL, MKDEV(0, 0), bdi, bdi->dev_name); 942 943 if (IS_ERR(dev)) 943 944 return PTR_ERR(dev); 944 945 ··· 1043 1042 kref_put(&bdi->refcnt, release_bdi); 1044 1043 } 1045 1044 EXPORT_SYMBOL(bdi_put); 1045 + 1046 + const char *bdi_dev_name(struct backing_dev_info *bdi) 1047 + { 1048 + if (!bdi || !bdi->dev) 1049 + return bdi_unknown_name; 1050 + return bdi->dev_name; 1051 + } 1052 + EXPORT_SYMBOL_GPL(bdi_dev_name); 1046 1053 1047 1054 static wait_queue_head_t congestion_wqh[2] = { 1048 1055 __WAIT_QUEUE_HEAD_INITIALIZER(congestion_wqh[0]),
+6 -1
tools/cgroup/iocost_monitor.py
··· 159 159 else: 160 160 self.inflight_pct = 0 161 161 162 - self.debt_ms = iocg.abs_vdebt.counter.value_() / VTIME_PER_USEC / 1000 162 + # vdebt used to be an atomic64_t and is now u64, support both 163 + try: 164 + self.debt_ms = iocg.abs_vdebt.counter.value_() / VTIME_PER_USEC / 1000 165 + except: 166 + self.debt_ms = iocg.abs_vdebt.value_() / VTIME_PER_USEC / 1000 167 + 163 168 self.use_delay = blkg.use_delay.counter.value_() 164 169 self.delay_ms = blkg.delay_nsec.counter.value_() / 1_000_000 165 170