Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

Merge tag 'block-5.14-2021-07-30' of git://git.kernel.dk/linux-block

Pull block fixes from Jens Axboe:

- gendisk freeing fix (Christoph)

- blk-iocost wake ordering fix (Tejun)

- tag allocation error handling fix (John)

- loop locking fix. While this isn't the prettiest fix in the world,
nobody has any good alternatives for 5.14. Something to likely
revisit for 5.15. (Tetsuo)

* tag 'block-5.14-2021-07-30' of git://git.kernel.dk/linux-block:
block: delay freeing the gendisk
blk-iocost: fix operation ordering in iocg_wake_fn()
blk-mq-sched: Fix blk_mq_sched_alloc_tags() error handling
loop: reintroduce global lock for safe loop_validate_file() traversal

+110 -51
+6 -5
block/blk-iocost.c
··· 1440 1440 return -1; 1441 1441 1442 1442 iocg_commit_bio(ctx->iocg, wait->bio, wait->abs_cost, cost); 1443 + wait->committed = true; 1443 1444 1444 1445 /* 1445 1446 * autoremove_wake_function() removes the wait entry only when it 1446 - * actually changed the task state. We want the wait always 1447 - * removed. Remove explicitly and use default_wake_function(). 1447 + * actually changed the task state. We want the wait always removed. 1448 + * Remove explicitly and use default_wake_function(). Note that the 1449 + * order of operations is important as finish_wait() tests whether 1450 + * @wq_entry is removed without grabbing the lock. 1448 1451 */ 1449 - list_del_init(&wq_entry->entry); 1450 - wait->committed = true; 1451 - 1452 1452 default_wake_function(wq_entry, mode, flags, key); 1453 + list_del_init_careful(&wq_entry->entry); 1453 1454 return 0; 1454 1455 } 1455 1456
+4 -13
block/blk-mq-sched.c
··· 515 515 percpu_ref_put(&q->q_usage_counter); 516 516 } 517 517 518 - static void blk_mq_sched_free_tags(struct blk_mq_tag_set *set, 519 - struct blk_mq_hw_ctx *hctx, 520 - unsigned int hctx_idx) 521 - { 522 - if (hctx->sched_tags) { 523 - blk_mq_free_rqs(set, hctx->sched_tags, hctx_idx); 524 - blk_mq_free_rq_map(hctx->sched_tags, set->flags); 525 - hctx->sched_tags = NULL; 526 - } 527 - } 528 - 529 518 static int blk_mq_sched_alloc_tags(struct request_queue *q, 530 519 struct blk_mq_hw_ctx *hctx, 531 520 unsigned int hctx_idx) ··· 528 539 return -ENOMEM; 529 540 530 541 ret = blk_mq_alloc_rqs(set, hctx->sched_tags, hctx_idx, q->nr_requests); 531 - if (ret) 532 - blk_mq_sched_free_tags(set, hctx, hctx_idx); 542 + if (ret) { 543 + blk_mq_free_rq_map(hctx->sched_tags, set->flags); 544 + hctx->sched_tags = NULL; 545 + } 533 546 534 547 return ret; 535 548 }
+1 -2
block/genhd.c
··· 1079 1079 disk_release_events(disk); 1080 1080 kfree(disk->random); 1081 1081 xa_destroy(&disk->part_tbl); 1082 - bdput(disk->part0); 1083 1082 if (test_bit(GD_QUEUE_REF, &disk->state) && disk->queue) 1084 1083 blk_put_queue(disk->queue); 1085 - kfree(disk); 1084 + bdput(disk->part0); /* frees the disk */ 1086 1085 } 1087 1086 struct class block_class = { 1088 1087 .name = "block",
+97 -31
drivers/block/loop.c
··· 88 88 89 89 static DEFINE_IDR(loop_index_idr); 90 90 static DEFINE_MUTEX(loop_ctl_mutex); 91 + static DEFINE_MUTEX(loop_validate_mutex); 92 + 93 + /** 94 + * loop_global_lock_killable() - take locks for safe loop_validate_file() test 95 + * 96 + * @lo: struct loop_device 97 + * @global: true if @lo is about to bind another "struct loop_device", false otherwise 98 + * 99 + * Returns 0 on success, -EINTR otherwise. 100 + * 101 + * Since loop_validate_file() traverses on other "struct loop_device" if 102 + * is_loop_device() is true, we need a global lock for serializing concurrent 103 + * loop_configure()/loop_change_fd()/__loop_clr_fd() calls. 104 + */ 105 + static int loop_global_lock_killable(struct loop_device *lo, bool global) 106 + { 107 + int err; 108 + 109 + if (global) { 110 + err = mutex_lock_killable(&loop_validate_mutex); 111 + if (err) 112 + return err; 113 + } 114 + err = mutex_lock_killable(&lo->lo_mutex); 115 + if (err && global) 116 + mutex_unlock(&loop_validate_mutex); 117 + return err; 118 + } 119 + 120 + /** 121 + * loop_global_unlock() - release locks taken by loop_global_lock_killable() 122 + * 123 + * @lo: struct loop_device 124 + * @global: true if @lo was about to bind another "struct loop_device", false otherwise 125 + */ 126 + static void loop_global_unlock(struct loop_device *lo, bool global) 127 + { 128 + mutex_unlock(&lo->lo_mutex); 129 + if (global) 130 + mutex_unlock(&loop_validate_mutex); 131 + } 91 132 92 133 static int max_part; 93 134 static int part_shift; ··· 713 672 while (is_loop_device(f)) { 714 673 struct loop_device *l; 715 674 675 + lockdep_assert_held(&loop_validate_mutex); 716 676 if (f->f_mapping->host->i_rdev == bdev->bd_dev) 717 677 return -EBADF; 718 678 719 679 l = I_BDEV(f->f_mapping->host)->bd_disk->private_data; 720 - if (l->lo_state != Lo_bound) { 680 + if (l->lo_state != Lo_bound) 721 681 return -EINVAL; 722 - } 682 + /* Order wrt setting lo->lo_backing_file in loop_configure(). */ 683 + rmb(); 723 684 f = l->lo_backing_file; 724 685 } 725 686 if (!S_ISREG(inode->i_mode) && !S_ISBLK(inode->i_mode)) ··· 740 697 static int loop_change_fd(struct loop_device *lo, struct block_device *bdev, 741 698 unsigned int arg) 742 699 { 743 - struct file *file = NULL, *old_file; 744 - int error; 745 - bool partscan; 700 + struct file *file = fget(arg); 701 + struct file *old_file; 702 + int error; 703 + bool partscan; 704 + bool is_loop; 746 705 747 - error = mutex_lock_killable(&lo->lo_mutex); 706 + if (!file) 707 + return -EBADF; 708 + is_loop = is_loop_device(file); 709 + error = loop_global_lock_killable(lo, is_loop); 748 710 if (error) 749 - return error; 711 + goto out_putf; 750 712 error = -ENXIO; 751 713 if (lo->lo_state != Lo_bound) 752 714 goto out_err; ··· 759 711 /* the loop device has to be read-only */ 760 712 error = -EINVAL; 761 713 if (!(lo->lo_flags & LO_FLAGS_READ_ONLY)) 762 - goto out_err; 763 - 764 - error = -EBADF; 765 - file = fget(arg); 766 - if (!file) 767 714 goto out_err; 768 715 769 716 error = loop_validate_file(file, bdev); ··· 783 740 loop_update_dio(lo); 784 741 blk_mq_unfreeze_queue(lo->lo_queue); 785 742 partscan = lo->lo_flags & LO_FLAGS_PARTSCAN; 786 - mutex_unlock(&lo->lo_mutex); 743 + loop_global_unlock(lo, is_loop); 744 + 745 + /* 746 + * Flush loop_validate_file() before fput(), for l->lo_backing_file 747 + * might be pointing at old_file which might be the last reference. 748 + */ 749 + if (!is_loop) { 750 + mutex_lock(&loop_validate_mutex); 751 + mutex_unlock(&loop_validate_mutex); 752 + } 787 753 /* 788 754 * We must drop file reference outside of lo_mutex as dropping 789 755 * the file ref can take open_mutex which creates circular locking ··· 804 752 return 0; 805 753 806 754 out_err: 807 - mutex_unlock(&lo->lo_mutex); 808 - if (file) 809 - fput(file); 755 + loop_global_unlock(lo, is_loop); 756 + out_putf: 757 + fput(file); 810 758 return error; 811 759 } 812 760 ··· 1188 1136 struct block_device *bdev, 1189 1137 const struct loop_config *config) 1190 1138 { 1191 - struct file *file; 1192 - struct inode *inode; 1139 + struct file *file = fget(config->fd); 1140 + struct inode *inode; 1193 1141 struct address_space *mapping; 1194 - int error; 1195 - loff_t size; 1196 - bool partscan; 1197 - unsigned short bsize; 1142 + int error; 1143 + loff_t size; 1144 + bool partscan; 1145 + unsigned short bsize; 1146 + bool is_loop; 1147 + 1148 + if (!file) 1149 + return -EBADF; 1150 + is_loop = is_loop_device(file); 1198 1151 1199 1152 /* This is safe, since we have a reference from open(). */ 1200 1153 __module_get(THIS_MODULE); 1201 - 1202 - error = -EBADF; 1203 - file = fget(config->fd); 1204 - if (!file) 1205 - goto out; 1206 1154 1207 1155 /* 1208 1156 * If we don't hold exclusive handle for the device, upgrade to it ··· 1214 1162 goto out_putf; 1215 1163 } 1216 1164 1217 - error = mutex_lock_killable(&lo->lo_mutex); 1165 + error = loop_global_lock_killable(lo, is_loop); 1218 1166 if (error) 1219 1167 goto out_bdev; 1220 1168 ··· 1294 1242 size = get_loop_size(lo, file); 1295 1243 loop_set_size(lo, size); 1296 1244 1245 + /* Order wrt reading lo_state in loop_validate_file(). */ 1246 + wmb(); 1247 + 1297 1248 lo->lo_state = Lo_bound; 1298 1249 if (part_shift) 1299 1250 lo->lo_flags |= LO_FLAGS_PARTSCAN; ··· 1308 1253 * put /dev/loopXX inode. Later in __loop_clr_fd() we bdput(bdev). 1309 1254 */ 1310 1255 bdgrab(bdev); 1311 - mutex_unlock(&lo->lo_mutex); 1256 + loop_global_unlock(lo, is_loop); 1312 1257 if (partscan) 1313 1258 loop_reread_partitions(lo); 1314 1259 if (!(mode & FMODE_EXCL)) ··· 1316 1261 return 0; 1317 1262 1318 1263 out_unlock: 1319 - mutex_unlock(&lo->lo_mutex); 1264 + loop_global_unlock(lo, is_loop); 1320 1265 out_bdev: 1321 1266 if (!(mode & FMODE_EXCL)) 1322 1267 bd_abort_claiming(bdev, loop_configure); 1323 1268 out_putf: 1324 1269 fput(file); 1325 - out: 1326 1270 /* This is safe: open() is still holding a reference. */ 1327 1271 module_put(THIS_MODULE); 1328 1272 return error; ··· 1336 1282 bool partscan = false; 1337 1283 int lo_number; 1338 1284 struct loop_worker *pos, *worker; 1285 + 1286 + /* 1287 + * Flush loop_configure() and loop_change_fd(). It is acceptable for 1288 + * loop_validate_file() to succeed, for actual clear operation has not 1289 + * started yet. 1290 + */ 1291 + mutex_lock(&loop_validate_mutex); 1292 + mutex_unlock(&loop_validate_mutex); 1293 + /* 1294 + * loop_validate_file() now fails because l->lo_state != Lo_bound 1295 + * became visible. 1296 + */ 1339 1297 1340 1298 mutex_lock(&lo->lo_mutex); 1341 1299 if (WARN_ON_ONCE(lo->lo_state != Lo_rundown)) {
+2
fs/block_dev.c
··· 812 812 free_percpu(bdev->bd_stats); 813 813 kfree(bdev->bd_meta_info); 814 814 815 + if (!bdev_is_partition(bdev)) 816 + kfree(bdev->bd_disk); 815 817 kmem_cache_free(bdev_cachep, BDEV_I(inode)); 816 818 } 817 819