Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

Merge tag 'block-5.15-2021-09-11' of git://git.kernel.dk/linux-block

Pull block fixes from Jens Axboe:

- NVMe pull request from Christoph:
- fix nvmet command set reporting for passthrough controllers (Adam Manzanares)
- update a MAINTAINERS email address (Chaitanya Kulkarni)
- set QUEUE_FLAG_NOWAIT for nvme-multipth (me)
- handle errors from add_disk() (Luis Chamberlain)
- update the keep alive interval when kato is modified (Tatsuya Sasaki)
- fix a buffer overrun in nvmet_subsys_attr_serial (Hannes Reinecke)
- do not reset transport on data digest errors in nvme-tcp (Daniel Wagner)
- only call synchronize_srcu when clearing current path (Daniel Wagner)
- revalidate paths during rescan (Hannes Reinecke)

- Split out the fs/block_dev into block/fops.c and block/bdev.c, which
has been long overdue. Do this now before -rc1, to avoid annoying
conflicts due to this (Christoph)

- blk-throtl use-after-free fix (Li)

- Improve plug depth for multi-device plugs, greatly increasing md
resync performance (Song)

- blkdev_show() locking fix (Tetsuo)

- n64cart error check fix (Yang)

* tag 'block-5.15-2021-09-11' of git://git.kernel.dk/linux-block:
n64cart: fix return value check in n64cart_probe()
blk-mq: allow 4x BLK_MAX_REQUEST_COUNT at blk_plug for multiple_queues
block: move fs/block_dev.c to block/bdev.c
block: split out operations on block special files
blk-throttle: fix UAF by deleteing timer in blk_throtl_exit()
block: genhd: don't call blkdev_show() with major_names_lock held
nvme: update MAINTAINERS email address
nvme: add error handling support for add_disk()
nvme: only call synchronize_srcu when clearing current path
nvme: update keep alive interval when kato is modified
nvme-tcp: Do not reset transport on data digest errors
nvmet: fixup buffer overrun in nvmet_subsys_attr_serial()
nvmet: return bool from nvmet_passthru_ctrl and nvmet_is_passthru_req
nvmet: looks at the passthrough controller when initializing CAP
nvme: move nvme_multi_css into nvme.h
nvme-multipath: revalidate paths during rescan
nvme-multipath: set QUEUE_FLAG_NOWAIT

+805 -682
+3
Documentation/core-api/kernel-api.rst
··· 315 315 .. kernel-doc:: block/genhd.c 316 316 :export: 317 317 318 + .. kernel-doc:: block/bdev.c 319 + :export: 320 + 318 321 Char devices 319 322 ============ 320 323
-3
Documentation/filesystems/api-summary.rst
··· 71 71 .. kernel-doc:: fs/fs-writeback.c 72 72 :export: 73 73 74 - .. kernel-doc:: fs/block_dev.c 75 - :export: 76 - 77 74 .. kernel-doc:: fs/anon_inodes.c 78 75 :export: 79 76
+1 -2
MAINTAINERS
··· 3313 3313 T: git git://git.kernel.org/pub/scm/linux/kernel/git/axboe/linux-block.git 3314 3314 F: block/ 3315 3315 F: drivers/block/ 3316 - F: fs/block_dev.c 3317 3316 F: include/linux/blk* 3318 3317 F: kernel/trace/blktrace.c 3319 3318 F: lib/sbitmap.c ··· 13408 13409 NVM EXPRESS TARGET DRIVER 13409 13410 M: Christoph Hellwig <hch@lst.de> 13410 13411 M: Sagi Grimberg <sagi@grimberg.me> 13411 - M: Chaitanya Kulkarni <chaitanya.kulkarni@wdc.com> 13412 + M: Chaitanya Kulkarni <kch@nvidia.com> 13412 13413 L: linux-nvme@lists.infradead.org 13413 13414 S: Supported 13414 13415 W: http://git.infradead.org/nvme.git
+1 -1
block/Makefile
··· 3 3 # Makefile for the kernel block layer 4 4 # 5 5 6 - obj-$(CONFIG_BLOCK) := bio.o elevator.o blk-core.o blk-sysfs.o \ 6 + obj-$(CONFIG_BLOCK) := bdev.o fops.o bio.o elevator.o blk-core.o blk-sysfs.o \ 7 7 blk-flush.o blk-settings.o blk-ioc.o blk-map.o \ 8 8 blk-exec.o blk-merge.o blk-timeout.o \ 9 9 blk-lib.o blk-mq.o blk-mq-tag.o blk-stat.o \
+13 -1
block/blk-mq.c
··· 2135 2135 } 2136 2136 } 2137 2137 2138 + /* 2139 + * Allow 4x BLK_MAX_REQUEST_COUNT requests on plug queue for multiple 2140 + * queues. This is important for md arrays to benefit from merging 2141 + * requests. 2142 + */ 2143 + static inline unsigned short blk_plug_max_rq_count(struct blk_plug *plug) 2144 + { 2145 + if (plug->multiple_queues) 2146 + return BLK_MAX_REQUEST_COUNT * 4; 2147 + return BLK_MAX_REQUEST_COUNT; 2148 + } 2149 + 2138 2150 /** 2139 2151 * blk_mq_submit_bio - Create and send a request to block device. 2140 2152 * @bio: Bio pointer. ··· 2243 2231 else 2244 2232 last = list_entry_rq(plug->mq_list.prev); 2245 2233 2246 - if (request_count >= BLK_MAX_REQUEST_COUNT || (last && 2234 + if (request_count >= blk_plug_max_rq_count(plug) || (last && 2247 2235 blk_rq_bytes(last) >= BLK_PLUG_FLUSH_SIZE)) { 2248 2236 blk_flush_plug_list(plug, false); 2249 2237 trace_block_plug(q);
+1
block/blk-throttle.c
··· 2458 2458 void blk_throtl_exit(struct request_queue *q) 2459 2459 { 2460 2460 BUG_ON(!q->td); 2461 + del_timer_sync(&q->td->service_queue.pending_timer); 2461 2462 throtl_shutdown_wq(q); 2462 2463 blkcg_deactivate_policy(q, &blkcg_policy_throtl); 2463 2464 free_percpu(q->td->latency_buckets[READ]);
+2
block/blk.h
··· 373 373 bio->bi_opf &= ~REQ_HIPRI; 374 374 } 375 375 376 + extern const struct address_space_operations def_blk_aops; 377 + 376 378 #endif /* BLK_INTERNAL_H */
+640
block/fops.c
··· 1 + // SPDX-License-Identifier: GPL-2.0-only 2 + /* 3 + * Copyright (C) 1991, 1992 Linus Torvalds 4 + * Copyright (C) 2001 Andrea Arcangeli <andrea@suse.de> SuSE 5 + * Copyright (C) 2016 - 2020 Christoph Hellwig 6 + */ 7 + #include <linux/init.h> 8 + #include <linux/mm.h> 9 + #include <linux/blkdev.h> 10 + #include <linux/buffer_head.h> 11 + #include <linux/mpage.h> 12 + #include <linux/uio.h> 13 + #include <linux/namei.h> 14 + #include <linux/task_io_accounting_ops.h> 15 + #include <linux/falloc.h> 16 + #include <linux/suspend.h> 17 + #include "blk.h" 18 + 19 + static struct inode *bdev_file_inode(struct file *file) 20 + { 21 + return file->f_mapping->host; 22 + } 23 + 24 + static int blkdev_get_block(struct inode *inode, sector_t iblock, 25 + struct buffer_head *bh, int create) 26 + { 27 + bh->b_bdev = I_BDEV(inode); 28 + bh->b_blocknr = iblock; 29 + set_buffer_mapped(bh); 30 + return 0; 31 + } 32 + 33 + static unsigned int dio_bio_write_op(struct kiocb *iocb) 34 + { 35 + unsigned int op = REQ_OP_WRITE | REQ_SYNC | REQ_IDLE; 36 + 37 + /* avoid the need for a I/O completion work item */ 38 + if (iocb->ki_flags & IOCB_DSYNC) 39 + op |= REQ_FUA; 40 + return op; 41 + } 42 + 43 + #define DIO_INLINE_BIO_VECS 4 44 + 45 + static void blkdev_bio_end_io_simple(struct bio *bio) 46 + { 47 + struct task_struct *waiter = bio->bi_private; 48 + 49 + WRITE_ONCE(bio->bi_private, NULL); 50 + blk_wake_io_task(waiter); 51 + } 52 + 53 + static ssize_t __blkdev_direct_IO_simple(struct kiocb *iocb, 54 + struct iov_iter *iter, unsigned int nr_pages) 55 + { 56 + struct file *file = iocb->ki_filp; 57 + struct block_device *bdev = I_BDEV(bdev_file_inode(file)); 58 + struct bio_vec inline_vecs[DIO_INLINE_BIO_VECS], *vecs; 59 + loff_t pos = iocb->ki_pos; 60 + bool should_dirty = false; 61 + struct bio bio; 62 + ssize_t ret; 63 + blk_qc_t qc; 64 + 65 + if ((pos | iov_iter_alignment(iter)) & 66 + (bdev_logical_block_size(bdev) - 1)) 67 + return -EINVAL; 68 + 69 + if (nr_pages <= DIO_INLINE_BIO_VECS) 70 + vecs = inline_vecs; 71 + else { 72 + vecs = kmalloc_array(nr_pages, sizeof(struct bio_vec), 73 + GFP_KERNEL); 74 + if (!vecs) 75 + return -ENOMEM; 76 + } 77 + 78 + bio_init(&bio, vecs, nr_pages); 79 + bio_set_dev(&bio, bdev); 80 + bio.bi_iter.bi_sector = pos >> 9; 81 + bio.bi_write_hint = iocb->ki_hint; 82 + bio.bi_private = current; 83 + bio.bi_end_io = blkdev_bio_end_io_simple; 84 + bio.bi_ioprio = iocb->ki_ioprio; 85 + 86 + ret = bio_iov_iter_get_pages(&bio, iter); 87 + if (unlikely(ret)) 88 + goto out; 89 + ret = bio.bi_iter.bi_size; 90 + 91 + if (iov_iter_rw(iter) == READ) { 92 + bio.bi_opf = REQ_OP_READ; 93 + if (iter_is_iovec(iter)) 94 + should_dirty = true; 95 + } else { 96 + bio.bi_opf = dio_bio_write_op(iocb); 97 + task_io_account_write(ret); 98 + } 99 + if (iocb->ki_flags & IOCB_NOWAIT) 100 + bio.bi_opf |= REQ_NOWAIT; 101 + if (iocb->ki_flags & IOCB_HIPRI) 102 + bio_set_polled(&bio, iocb); 103 + 104 + qc = submit_bio(&bio); 105 + for (;;) { 106 + set_current_state(TASK_UNINTERRUPTIBLE); 107 + if (!READ_ONCE(bio.bi_private)) 108 + break; 109 + if (!(iocb->ki_flags & IOCB_HIPRI) || 110 + !blk_poll(bdev_get_queue(bdev), qc, true)) 111 + blk_io_schedule(); 112 + } 113 + __set_current_state(TASK_RUNNING); 114 + 115 + bio_release_pages(&bio, should_dirty); 116 + if (unlikely(bio.bi_status)) 117 + ret = blk_status_to_errno(bio.bi_status); 118 + 119 + out: 120 + if (vecs != inline_vecs) 121 + kfree(vecs); 122 + 123 + bio_uninit(&bio); 124 + 125 + return ret; 126 + } 127 + 128 + struct blkdev_dio { 129 + union { 130 + struct kiocb *iocb; 131 + struct task_struct *waiter; 132 + }; 133 + size_t size; 134 + atomic_t ref; 135 + bool multi_bio : 1; 136 + bool should_dirty : 1; 137 + bool is_sync : 1; 138 + struct bio bio; 139 + }; 140 + 141 + static struct bio_set blkdev_dio_pool; 142 + 143 + static int blkdev_iopoll(struct kiocb *kiocb, bool wait) 144 + { 145 + struct block_device *bdev = I_BDEV(kiocb->ki_filp->f_mapping->host); 146 + struct request_queue *q = bdev_get_queue(bdev); 147 + 148 + return blk_poll(q, READ_ONCE(kiocb->ki_cookie), wait); 149 + } 150 + 151 + static void blkdev_bio_end_io(struct bio *bio) 152 + { 153 + struct blkdev_dio *dio = bio->bi_private; 154 + bool should_dirty = dio->should_dirty; 155 + 156 + if (bio->bi_status && !dio->bio.bi_status) 157 + dio->bio.bi_status = bio->bi_status; 158 + 159 + if (!dio->multi_bio || atomic_dec_and_test(&dio->ref)) { 160 + if (!dio->is_sync) { 161 + struct kiocb *iocb = dio->iocb; 162 + ssize_t ret; 163 + 164 + if (likely(!dio->bio.bi_status)) { 165 + ret = dio->size; 166 + iocb->ki_pos += ret; 167 + } else { 168 + ret = blk_status_to_errno(dio->bio.bi_status); 169 + } 170 + 171 + dio->iocb->ki_complete(iocb, ret, 0); 172 + if (dio->multi_bio) 173 + bio_put(&dio->bio); 174 + } else { 175 + struct task_struct *waiter = dio->waiter; 176 + 177 + WRITE_ONCE(dio->waiter, NULL); 178 + blk_wake_io_task(waiter); 179 + } 180 + } 181 + 182 + if (should_dirty) { 183 + bio_check_pages_dirty(bio); 184 + } else { 185 + bio_release_pages(bio, false); 186 + bio_put(bio); 187 + } 188 + } 189 + 190 + static ssize_t __blkdev_direct_IO(struct kiocb *iocb, struct iov_iter *iter, 191 + unsigned int nr_pages) 192 + { 193 + struct file *file = iocb->ki_filp; 194 + struct inode *inode = bdev_file_inode(file); 195 + struct block_device *bdev = I_BDEV(inode); 196 + struct blk_plug plug; 197 + struct blkdev_dio *dio; 198 + struct bio *bio; 199 + bool is_poll = (iocb->ki_flags & IOCB_HIPRI) != 0; 200 + bool is_read = (iov_iter_rw(iter) == READ), is_sync; 201 + loff_t pos = iocb->ki_pos; 202 + blk_qc_t qc = BLK_QC_T_NONE; 203 + int ret = 0; 204 + 205 + if ((pos | iov_iter_alignment(iter)) & 206 + (bdev_logical_block_size(bdev) - 1)) 207 + return -EINVAL; 208 + 209 + bio = bio_alloc_kiocb(iocb, nr_pages, &blkdev_dio_pool); 210 + 211 + dio = container_of(bio, struct blkdev_dio, bio); 212 + dio->is_sync = is_sync = is_sync_kiocb(iocb); 213 + if (dio->is_sync) { 214 + dio->waiter = current; 215 + bio_get(bio); 216 + } else { 217 + dio->iocb = iocb; 218 + } 219 + 220 + dio->size = 0; 221 + dio->multi_bio = false; 222 + dio->should_dirty = is_read && iter_is_iovec(iter); 223 + 224 + /* 225 + * Don't plug for HIPRI/polled IO, as those should go straight 226 + * to issue 227 + */ 228 + if (!is_poll) 229 + blk_start_plug(&plug); 230 + 231 + for (;;) { 232 + bio_set_dev(bio, bdev); 233 + bio->bi_iter.bi_sector = pos >> 9; 234 + bio->bi_write_hint = iocb->ki_hint; 235 + bio->bi_private = dio; 236 + bio->bi_end_io = blkdev_bio_end_io; 237 + bio->bi_ioprio = iocb->ki_ioprio; 238 + 239 + ret = bio_iov_iter_get_pages(bio, iter); 240 + if (unlikely(ret)) { 241 + bio->bi_status = BLK_STS_IOERR; 242 + bio_endio(bio); 243 + break; 244 + } 245 + 246 + if (is_read) { 247 + bio->bi_opf = REQ_OP_READ; 248 + if (dio->should_dirty) 249 + bio_set_pages_dirty(bio); 250 + } else { 251 + bio->bi_opf = dio_bio_write_op(iocb); 252 + task_io_account_write(bio->bi_iter.bi_size); 253 + } 254 + if (iocb->ki_flags & IOCB_NOWAIT) 255 + bio->bi_opf |= REQ_NOWAIT; 256 + 257 + dio->size += bio->bi_iter.bi_size; 258 + pos += bio->bi_iter.bi_size; 259 + 260 + nr_pages = bio_iov_vecs_to_alloc(iter, BIO_MAX_VECS); 261 + if (!nr_pages) { 262 + bool polled = false; 263 + 264 + if (iocb->ki_flags & IOCB_HIPRI) { 265 + bio_set_polled(bio, iocb); 266 + polled = true; 267 + } 268 + 269 + qc = submit_bio(bio); 270 + 271 + if (polled) 272 + WRITE_ONCE(iocb->ki_cookie, qc); 273 + break; 274 + } 275 + 276 + if (!dio->multi_bio) { 277 + /* 278 + * AIO needs an extra reference to ensure the dio 279 + * structure which is embedded into the first bio 280 + * stays around. 281 + */ 282 + if (!is_sync) 283 + bio_get(bio); 284 + dio->multi_bio = true; 285 + atomic_set(&dio->ref, 2); 286 + } else { 287 + atomic_inc(&dio->ref); 288 + } 289 + 290 + submit_bio(bio); 291 + bio = bio_alloc(GFP_KERNEL, nr_pages); 292 + } 293 + 294 + if (!is_poll) 295 + blk_finish_plug(&plug); 296 + 297 + if (!is_sync) 298 + return -EIOCBQUEUED; 299 + 300 + for (;;) { 301 + set_current_state(TASK_UNINTERRUPTIBLE); 302 + if (!READ_ONCE(dio->waiter)) 303 + break; 304 + 305 + if (!(iocb->ki_flags & IOCB_HIPRI) || 306 + !blk_poll(bdev_get_queue(bdev), qc, true)) 307 + blk_io_schedule(); 308 + } 309 + __set_current_state(TASK_RUNNING); 310 + 311 + if (!ret) 312 + ret = blk_status_to_errno(dio->bio.bi_status); 313 + if (likely(!ret)) 314 + ret = dio->size; 315 + 316 + bio_put(&dio->bio); 317 + return ret; 318 + } 319 + 320 + static ssize_t blkdev_direct_IO(struct kiocb *iocb, struct iov_iter *iter) 321 + { 322 + unsigned int nr_pages; 323 + 324 + if (!iov_iter_count(iter)) 325 + return 0; 326 + 327 + nr_pages = bio_iov_vecs_to_alloc(iter, BIO_MAX_VECS + 1); 328 + if (is_sync_kiocb(iocb) && nr_pages <= BIO_MAX_VECS) 329 + return __blkdev_direct_IO_simple(iocb, iter, nr_pages); 330 + 331 + return __blkdev_direct_IO(iocb, iter, bio_max_segs(nr_pages)); 332 + } 333 + 334 + static int blkdev_writepage(struct page *page, struct writeback_control *wbc) 335 + { 336 + return block_write_full_page(page, blkdev_get_block, wbc); 337 + } 338 + 339 + static int blkdev_readpage(struct file * file, struct page * page) 340 + { 341 + return block_read_full_page(page, blkdev_get_block); 342 + } 343 + 344 + static void blkdev_readahead(struct readahead_control *rac) 345 + { 346 + mpage_readahead(rac, blkdev_get_block); 347 + } 348 + 349 + static int blkdev_write_begin(struct file *file, struct address_space *mapping, 350 + loff_t pos, unsigned len, unsigned flags, struct page **pagep, 351 + void **fsdata) 352 + { 353 + return block_write_begin(mapping, pos, len, flags, pagep, 354 + blkdev_get_block); 355 + } 356 + 357 + static int blkdev_write_end(struct file *file, struct address_space *mapping, 358 + loff_t pos, unsigned len, unsigned copied, struct page *page, 359 + void *fsdata) 360 + { 361 + int ret; 362 + ret = block_write_end(file, mapping, pos, len, copied, page, fsdata); 363 + 364 + unlock_page(page); 365 + put_page(page); 366 + 367 + return ret; 368 + } 369 + 370 + static int blkdev_writepages(struct address_space *mapping, 371 + struct writeback_control *wbc) 372 + { 373 + return generic_writepages(mapping, wbc); 374 + } 375 + 376 + const struct address_space_operations def_blk_aops = { 377 + .set_page_dirty = __set_page_dirty_buffers, 378 + .readpage = blkdev_readpage, 379 + .readahead = blkdev_readahead, 380 + .writepage = blkdev_writepage, 381 + .write_begin = blkdev_write_begin, 382 + .write_end = blkdev_write_end, 383 + .writepages = blkdev_writepages, 384 + .direct_IO = blkdev_direct_IO, 385 + .migratepage = buffer_migrate_page_norefs, 386 + .is_dirty_writeback = buffer_check_dirty_writeback, 387 + }; 388 + 389 + /* 390 + * for a block special file file_inode(file)->i_size is zero 391 + * so we compute the size by hand (just as in block_read/write above) 392 + */ 393 + static loff_t blkdev_llseek(struct file *file, loff_t offset, int whence) 394 + { 395 + struct inode *bd_inode = bdev_file_inode(file); 396 + loff_t retval; 397 + 398 + inode_lock(bd_inode); 399 + retval = fixed_size_llseek(file, offset, whence, i_size_read(bd_inode)); 400 + inode_unlock(bd_inode); 401 + return retval; 402 + } 403 + 404 + static int blkdev_fsync(struct file *filp, loff_t start, loff_t end, 405 + int datasync) 406 + { 407 + struct inode *bd_inode = bdev_file_inode(filp); 408 + struct block_device *bdev = I_BDEV(bd_inode); 409 + int error; 410 + 411 + error = file_write_and_wait_range(filp, start, end); 412 + if (error) 413 + return error; 414 + 415 + /* 416 + * There is no need to serialise calls to blkdev_issue_flush with 417 + * i_mutex and doing so causes performance issues with concurrent 418 + * O_SYNC writers to a block device. 419 + */ 420 + error = blkdev_issue_flush(bdev); 421 + if (error == -EOPNOTSUPP) 422 + error = 0; 423 + 424 + return error; 425 + } 426 + 427 + static int blkdev_open(struct inode *inode, struct file *filp) 428 + { 429 + struct block_device *bdev; 430 + 431 + /* 432 + * Preserve backwards compatibility and allow large file access 433 + * even if userspace doesn't ask for it explicitly. Some mkfs 434 + * binary needs it. We might want to drop this workaround 435 + * during an unstable branch. 436 + */ 437 + filp->f_flags |= O_LARGEFILE; 438 + filp->f_mode |= FMODE_NOWAIT | FMODE_BUF_RASYNC; 439 + 440 + if (filp->f_flags & O_NDELAY) 441 + filp->f_mode |= FMODE_NDELAY; 442 + if (filp->f_flags & O_EXCL) 443 + filp->f_mode |= FMODE_EXCL; 444 + if ((filp->f_flags & O_ACCMODE) == 3) 445 + filp->f_mode |= FMODE_WRITE_IOCTL; 446 + 447 + bdev = blkdev_get_by_dev(inode->i_rdev, filp->f_mode, filp); 448 + if (IS_ERR(bdev)) 449 + return PTR_ERR(bdev); 450 + filp->f_mapping = bdev->bd_inode->i_mapping; 451 + filp->f_wb_err = filemap_sample_wb_err(filp->f_mapping); 452 + return 0; 453 + } 454 + 455 + static int blkdev_close(struct inode *inode, struct file *filp) 456 + { 457 + struct block_device *bdev = I_BDEV(bdev_file_inode(filp)); 458 + 459 + blkdev_put(bdev, filp->f_mode); 460 + return 0; 461 + } 462 + 463 + static long block_ioctl(struct file *file, unsigned cmd, unsigned long arg) 464 + { 465 + struct block_device *bdev = I_BDEV(bdev_file_inode(file)); 466 + fmode_t mode = file->f_mode; 467 + 468 + /* 469 + * O_NDELAY can be altered using fcntl(.., F_SETFL, ..), so we have 470 + * to updated it before every ioctl. 471 + */ 472 + if (file->f_flags & O_NDELAY) 473 + mode |= FMODE_NDELAY; 474 + else 475 + mode &= ~FMODE_NDELAY; 476 + 477 + return blkdev_ioctl(bdev, mode, cmd, arg); 478 + } 479 + 480 + /* 481 + * Write data to the block device. Only intended for the block device itself 482 + * and the raw driver which basically is a fake block device. 483 + * 484 + * Does not take i_mutex for the write and thus is not for general purpose 485 + * use. 486 + */ 487 + static ssize_t blkdev_write_iter(struct kiocb *iocb, struct iov_iter *from) 488 + { 489 + struct file *file = iocb->ki_filp; 490 + struct inode *bd_inode = bdev_file_inode(file); 491 + loff_t size = i_size_read(bd_inode); 492 + struct blk_plug plug; 493 + size_t shorted = 0; 494 + ssize_t ret; 495 + 496 + if (bdev_read_only(I_BDEV(bd_inode))) 497 + return -EPERM; 498 + 499 + if (IS_SWAPFILE(bd_inode) && !is_hibernate_resume_dev(bd_inode->i_rdev)) 500 + return -ETXTBSY; 501 + 502 + if (!iov_iter_count(from)) 503 + return 0; 504 + 505 + if (iocb->ki_pos >= size) 506 + return -ENOSPC; 507 + 508 + if ((iocb->ki_flags & (IOCB_NOWAIT | IOCB_DIRECT)) == IOCB_NOWAIT) 509 + return -EOPNOTSUPP; 510 + 511 + size -= iocb->ki_pos; 512 + if (iov_iter_count(from) > size) { 513 + shorted = iov_iter_count(from) - size; 514 + iov_iter_truncate(from, size); 515 + } 516 + 517 + blk_start_plug(&plug); 518 + ret = __generic_file_write_iter(iocb, from); 519 + if (ret > 0) 520 + ret = generic_write_sync(iocb, ret); 521 + iov_iter_reexpand(from, iov_iter_count(from) + shorted); 522 + blk_finish_plug(&plug); 523 + return ret; 524 + } 525 + 526 + static ssize_t blkdev_read_iter(struct kiocb *iocb, struct iov_iter *to) 527 + { 528 + struct file *file = iocb->ki_filp; 529 + struct inode *bd_inode = bdev_file_inode(file); 530 + loff_t size = i_size_read(bd_inode); 531 + loff_t pos = iocb->ki_pos; 532 + size_t shorted = 0; 533 + ssize_t ret; 534 + 535 + if (pos >= size) 536 + return 0; 537 + 538 + size -= pos; 539 + if (iov_iter_count(to) > size) { 540 + shorted = iov_iter_count(to) - size; 541 + iov_iter_truncate(to, size); 542 + } 543 + 544 + ret = generic_file_read_iter(iocb, to); 545 + iov_iter_reexpand(to, iov_iter_count(to) + shorted); 546 + return ret; 547 + } 548 + 549 + #define BLKDEV_FALLOC_FL_SUPPORTED \ 550 + (FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE | \ 551 + FALLOC_FL_ZERO_RANGE | FALLOC_FL_NO_HIDE_STALE) 552 + 553 + static long blkdev_fallocate(struct file *file, int mode, loff_t start, 554 + loff_t len) 555 + { 556 + struct block_device *bdev = I_BDEV(bdev_file_inode(file)); 557 + loff_t end = start + len - 1; 558 + loff_t isize; 559 + int error; 560 + 561 + /* Fail if we don't recognize the flags. */ 562 + if (mode & ~BLKDEV_FALLOC_FL_SUPPORTED) 563 + return -EOPNOTSUPP; 564 + 565 + /* Don't go off the end of the device. */ 566 + isize = i_size_read(bdev->bd_inode); 567 + if (start >= isize) 568 + return -EINVAL; 569 + if (end >= isize) { 570 + if (mode & FALLOC_FL_KEEP_SIZE) { 571 + len = isize - start; 572 + end = start + len - 1; 573 + } else 574 + return -EINVAL; 575 + } 576 + 577 + /* 578 + * Don't allow IO that isn't aligned to logical block size. 579 + */ 580 + if ((start | len) & (bdev_logical_block_size(bdev) - 1)) 581 + return -EINVAL; 582 + 583 + /* Invalidate the page cache, including dirty pages. */ 584 + error = truncate_bdev_range(bdev, file->f_mode, start, end); 585 + if (error) 586 + return error; 587 + 588 + switch (mode) { 589 + case FALLOC_FL_ZERO_RANGE: 590 + case FALLOC_FL_ZERO_RANGE | FALLOC_FL_KEEP_SIZE: 591 + error = blkdev_issue_zeroout(bdev, start >> 9, len >> 9, 592 + GFP_KERNEL, BLKDEV_ZERO_NOUNMAP); 593 + break; 594 + case FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE: 595 + error = blkdev_issue_zeroout(bdev, start >> 9, len >> 9, 596 + GFP_KERNEL, BLKDEV_ZERO_NOFALLBACK); 597 + break; 598 + case FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE | FALLOC_FL_NO_HIDE_STALE: 599 + error = blkdev_issue_discard(bdev, start >> 9, len >> 9, 600 + GFP_KERNEL, 0); 601 + break; 602 + default: 603 + return -EOPNOTSUPP; 604 + } 605 + if (error) 606 + return error; 607 + 608 + /* 609 + * Invalidate the page cache again; if someone wandered in and dirtied 610 + * a page, we just discard it - userspace has no way of knowing whether 611 + * the write happened before or after discard completing... 612 + */ 613 + return truncate_bdev_range(bdev, file->f_mode, start, end); 614 + } 615 + 616 + const struct file_operations def_blk_fops = { 617 + .open = blkdev_open, 618 + .release = blkdev_close, 619 + .llseek = blkdev_llseek, 620 + .read_iter = blkdev_read_iter, 621 + .write_iter = blkdev_write_iter, 622 + .iopoll = blkdev_iopoll, 623 + .mmap = generic_file_mmap, 624 + .fsync = blkdev_fsync, 625 + .unlocked_ioctl = block_ioctl, 626 + #ifdef CONFIG_COMPAT 627 + .compat_ioctl = compat_blkdev_ioctl, 628 + #endif 629 + .splice_read = generic_file_splice_read, 630 + .splice_write = iter_file_splice_write, 631 + .fallocate = blkdev_fallocate, 632 + }; 633 + 634 + static __init int blkdev_init(void) 635 + { 636 + return bioset_init(&blkdev_dio_pool, 4, 637 + offsetof(struct blkdev_dio, bio), 638 + BIOSET_NEED_BVECS|BIOSET_PERCPU_CACHE); 639 + } 640 + module_init(blkdev_init);
+7 -2
block/genhd.c
··· 183 183 void (*probe)(dev_t devt); 184 184 } *major_names[BLKDEV_MAJOR_HASH_SIZE]; 185 185 static DEFINE_MUTEX(major_names_lock); 186 + static DEFINE_SPINLOCK(major_names_spinlock); 186 187 187 188 /* index in the above - for now: assume no multimajor ranges */ 188 189 static inline int major_to_index(unsigned major) ··· 196 195 { 197 196 struct blk_major_name *dp; 198 197 199 - mutex_lock(&major_names_lock); 198 + spin_lock(&major_names_spinlock); 200 199 for (dp = major_names[major_to_index(offset)]; dp; dp = dp->next) 201 200 if (dp->major == offset) 202 201 seq_printf(seqf, "%3d %s\n", dp->major, dp->name); 203 - mutex_unlock(&major_names_lock); 202 + spin_unlock(&major_names_spinlock); 204 203 } 205 204 #endif /* CONFIG_PROC_FS */ 206 205 ··· 272 271 p->next = NULL; 273 272 index = major_to_index(major); 274 273 274 + spin_lock(&major_names_spinlock); 275 275 for (n = &major_names[index]; *n; n = &(*n)->next) { 276 276 if ((*n)->major == major) 277 277 break; ··· 281 279 *n = p; 282 280 else 283 281 ret = -EBUSY; 282 + spin_unlock(&major_names_spinlock); 284 283 285 284 if (ret < 0) { 286 285 printk("register_blkdev: cannot get major %u for %s\n", ··· 301 298 int index = major_to_index(major); 302 299 303 300 mutex_lock(&major_names_lock); 301 + spin_lock(&major_names_spinlock); 304 302 for (n = &major_names[index]; *n; n = &(*n)->next) 305 303 if ((*n)->major == major) 306 304 break; ··· 311 307 p = *n; 312 308 *n = p->next; 313 309 } 310 + spin_unlock(&major_names_spinlock); 314 311 mutex_unlock(&major_names_lock); 315 312 kfree(p); 316 313 }
+2 -2
drivers/block/n64cart.c
··· 129 129 } 130 130 131 131 reg_base = devm_platform_ioremap_resource(pdev, 0); 132 - if (!reg_base) 133 - return -EINVAL; 132 + if (IS_ERR(reg_base)) 133 + return PTR_ERR(reg_base); 134 134 135 135 disk = blk_alloc_disk(NUMA_NO_NODE); 136 136 if (!disk)
+57 -11
drivers/nvme/host/core.c
··· 116 116 static void nvme_put_subsystem(struct nvme_subsystem *subsys); 117 117 static void nvme_remove_invalid_namespaces(struct nvme_ctrl *ctrl, 118 118 unsigned nsid); 119 + static void nvme_update_keep_alive(struct nvme_ctrl *ctrl, 120 + struct nvme_command *cmd); 119 121 120 122 /* 121 123 * Prepare a queue for teardown. ··· 1154 1152 return effects; 1155 1153 } 1156 1154 1157 - static void nvme_passthru_end(struct nvme_ctrl *ctrl, u32 effects) 1155 + static void nvme_passthru_end(struct nvme_ctrl *ctrl, u32 effects, 1156 + struct nvme_command *cmd, int status) 1158 1157 { 1159 1158 if (effects & NVME_CMD_EFFECTS_CSE_MASK) { 1160 1159 nvme_unfreeze(ctrl); ··· 1169 1166 if (effects & (NVME_CMD_EFFECTS_NIC | NVME_CMD_EFFECTS_NCC)) { 1170 1167 nvme_queue_scan(ctrl); 1171 1168 flush_work(&ctrl->scan_work); 1169 + } 1170 + 1171 + switch (cmd->common.opcode) { 1172 + case nvme_admin_set_features: 1173 + switch (le32_to_cpu(cmd->common.cdw10) & 0xFF) { 1174 + case NVME_FEAT_KATO: 1175 + /* 1176 + * Keep alive commands interval on the host should be 1177 + * updated when KATO is modified by Set Features 1178 + * commands. 1179 + */ 1180 + if (!status) 1181 + nvme_update_keep_alive(ctrl, cmd); 1182 + break; 1183 + default: 1184 + break; 1185 + } 1186 + break; 1187 + default: 1188 + break; 1172 1189 } 1173 1190 } 1174 1191 ··· 1204 1181 effects = nvme_passthru_start(ctrl, ns, cmd->common.opcode); 1205 1182 ret = nvme_execute_rq(disk, rq, false); 1206 1183 if (effects) /* nothing to be done for zero cmd effects */ 1207 - nvme_passthru_end(ctrl, effects); 1184 + nvme_passthru_end(ctrl, effects, cmd, ret); 1208 1185 1209 1186 return ret; 1210 1187 } ··· 1292 1269 } 1293 1270 EXPORT_SYMBOL_GPL(nvme_stop_keep_alive); 1294 1271 1272 + static void nvme_update_keep_alive(struct nvme_ctrl *ctrl, 1273 + struct nvme_command *cmd) 1274 + { 1275 + unsigned int new_kato = 1276 + DIV_ROUND_UP(le32_to_cpu(cmd->common.cdw11), 1000); 1277 + 1278 + dev_info(ctrl->device, 1279 + "keep alive interval updated from %u ms to %u ms\n", 1280 + ctrl->kato * 1000 / 2, new_kato * 1000 / 2); 1281 + 1282 + nvme_stop_keep_alive(ctrl); 1283 + ctrl->kato = new_kato; 1284 + nvme_start_keep_alive(ctrl); 1285 + } 1286 + 1295 1287 /* 1296 1288 * In NVMe 1.0 the CNS field was just a binary controller or namespace 1297 1289 * flag, thus sending any new CNS opcodes has a big chance of not working. ··· 1338 1300 if (error) 1339 1301 kfree(*id); 1340 1302 return error; 1341 - } 1342 - 1343 - static bool nvme_multi_css(struct nvme_ctrl *ctrl) 1344 - { 1345 - return (ctrl->ctrl_config & NVME_CC_CSS_MASK) == NVME_CC_CSS_CSI; 1346 1303 } 1347 1304 1348 1305 static int nvme_process_ns_desc(struct nvme_ctrl *ctrl, struct nvme_ns_ids *ids, ··· 1907 1874 goto out_unfreeze; 1908 1875 } 1909 1876 1877 + set_bit(NVME_NS_READY, &ns->flags); 1910 1878 blk_mq_unfreeze_queue(ns->disk->queue); 1911 1879 1912 1880 if (blk_queue_is_zoned(ns->queue)) { ··· 1919 1885 if (nvme_ns_head_multipath(ns->head)) { 1920 1886 blk_mq_freeze_queue(ns->head->disk->queue); 1921 1887 nvme_update_disk_info(ns->head->disk, ns, id); 1888 + nvme_mpath_revalidate_paths(ns); 1922 1889 blk_stack_limits(&ns->head->disk->queue->limits, 1923 1890 &ns->queue->limits, 0); 1924 1891 disk_update_readahead(ns->head->disk); ··· 3798 3763 3799 3764 nvme_get_ctrl(ctrl); 3800 3765 3801 - device_add_disk(ctrl->device, ns->disk, nvme_ns_id_attr_groups); 3766 + if (device_add_disk(ctrl->device, ns->disk, nvme_ns_id_attr_groups)) 3767 + goto out_cleanup_ns_from_list; 3768 + 3802 3769 if (!nvme_ns_head_multipath(ns->head)) 3803 3770 nvme_add_ns_cdev(ns); 3804 3771 ··· 3810 3773 3811 3774 return; 3812 3775 3776 + out_cleanup_ns_from_list: 3777 + nvme_put_ctrl(ctrl); 3778 + down_write(&ctrl->namespaces_rwsem); 3779 + list_del_init(&ns->list); 3780 + up_write(&ctrl->namespaces_rwsem); 3813 3781 out_unlink_ns: 3814 3782 mutex_lock(&ctrl->subsys->lock); 3815 3783 list_del_rcu(&ns->siblings); ··· 3837 3795 if (test_and_set_bit(NVME_NS_REMOVING, &ns->flags)) 3838 3796 return; 3839 3797 3798 + clear_bit(NVME_NS_READY, &ns->flags); 3840 3799 set_capacity(ns->disk, 0); 3841 3800 nvme_fault_inject_fini(&ns->fault_inject); 3842 3801 ··· 3845 3802 list_del_rcu(&ns->siblings); 3846 3803 mutex_unlock(&ns->ctrl->subsys->lock); 3847 3804 3848 - synchronize_rcu(); /* guarantee not available in head->list */ 3849 - nvme_mpath_clear_current_path(ns); 3850 - synchronize_srcu(&ns->head->srcu); /* wait for concurrent submissions */ 3805 + /* guarantee not available in head->list */ 3806 + synchronize_rcu(); 3807 + 3808 + /* wait for concurrent submissions */ 3809 + if (nvme_mpath_clear_current_path(ns)) 3810 + synchronize_srcu(&ns->head->srcu); 3851 3811 3852 3812 if (!nvme_ns_head_multipath(ns->head)) 3853 3813 nvme_cdev_del(&ns->cdev, &ns->cdev_device);
+18 -1
drivers/nvme/host/multipath.c
··· 147 147 mutex_unlock(&ctrl->scan_lock); 148 148 } 149 149 150 + void nvme_mpath_revalidate_paths(struct nvme_ns *ns) 151 + { 152 + struct nvme_ns_head *head = ns->head; 153 + sector_t capacity = get_capacity(head->disk); 154 + int node; 155 + 156 + list_for_each_entry_rcu(ns, &head->list, siblings) { 157 + if (capacity != get_capacity(ns->disk)) 158 + clear_bit(NVME_NS_READY, &ns->flags); 159 + } 160 + 161 + for_each_node(node) 162 + rcu_assign_pointer(head->current_path[node], NULL); 163 + } 164 + 150 165 static bool nvme_path_is_disabled(struct nvme_ns *ns) 151 166 { 152 167 /* ··· 173 158 ns->ctrl->state != NVME_CTRL_DELETING) 174 159 return true; 175 160 if (test_bit(NVME_NS_ANA_PENDING, &ns->flags) || 176 - test_bit(NVME_NS_REMOVING, &ns->flags)) 161 + !test_bit(NVME_NS_READY, &ns->flags)) 177 162 return true; 178 163 return false; 179 164 } ··· 480 465 ctrl->subsys->instance, head->instance); 481 466 482 467 blk_queue_flag_set(QUEUE_FLAG_NONROT, head->disk->queue); 468 + blk_queue_flag_set(QUEUE_FLAG_NOWAIT, head->disk->queue); 469 + 483 470 /* set to a default value of 512 until the disk is validated */ 484 471 blk_queue_logical_block_size(head->disk->queue, 512); 485 472 blk_set_stacking_limits(&head->disk->queue->limits);
+10
drivers/nvme/host/nvme.h
··· 456 456 #define NVME_NS_DEAD 1 457 457 #define NVME_NS_ANA_PENDING 2 458 458 #define NVME_NS_FORCE_RO 3 459 + #define NVME_NS_READY 4 459 460 460 461 struct cdev cdev; 461 462 struct device cdev_device; ··· 749 748 void nvme_mpath_uninit(struct nvme_ctrl *ctrl); 750 749 void nvme_mpath_stop(struct nvme_ctrl *ctrl); 751 750 bool nvme_mpath_clear_current_path(struct nvme_ns *ns); 751 + void nvme_mpath_revalidate_paths(struct nvme_ns *ns); 752 752 void nvme_mpath_clear_ctrl_paths(struct nvme_ctrl *ctrl); 753 753 void nvme_mpath_shutdown_disk(struct nvme_ns_head *head); 754 754 ··· 796 794 static inline bool nvme_mpath_clear_current_path(struct nvme_ns *ns) 797 795 { 798 796 return false; 797 + } 798 + static inline void nvme_mpath_revalidate_paths(struct nvme_ns *ns) 799 + { 799 800 } 800 801 static inline void nvme_mpath_clear_ctrl_paths(struct nvme_ctrl *ctrl) 801 802 { ··· 891 886 struct nvme_ctrl *nvme_ctrl_from_file(struct file *file); 892 887 struct nvme_ns *nvme_find_get_ns(struct nvme_ctrl *ctrl, unsigned nsid); 893 888 void nvme_put_ns(struct nvme_ns *ns); 889 + 890 + static inline bool nvme_multi_css(struct nvme_ctrl *ctrl) 891 + { 892 + return (ctrl->ctrl_config & NVME_CC_CSS_MASK) == NVME_CC_CSS_CSI; 893 + } 894 894 895 895 #endif /* _NVME_H */
+18 -4
drivers/nvme/host/tcp.c
··· 45 45 u32 pdu_len; 46 46 u32 pdu_sent; 47 47 u16 ttag; 48 + __le16 status; 48 49 struct list_head entry; 49 50 struct llist_node lentry; 50 51 __le32 ddgst; ··· 486 485 static int nvme_tcp_process_nvme_cqe(struct nvme_tcp_queue *queue, 487 486 struct nvme_completion *cqe) 488 487 { 488 + struct nvme_tcp_request *req; 489 489 struct request *rq; 490 490 491 491 rq = nvme_find_rq(nvme_tcp_tagset(queue), cqe->command_id); ··· 498 496 return -EINVAL; 499 497 } 500 498 501 - if (!nvme_try_complete_req(rq, cqe->status, cqe->result)) 499 + req = blk_mq_rq_to_pdu(rq); 500 + if (req->status == cpu_to_le16(NVME_SC_SUCCESS)) 501 + req->status = cqe->status; 502 + 503 + if (!nvme_try_complete_req(rq, req->status, cqe->result)) 502 504 nvme_complete_rq(rq); 503 505 queue->nr_cqe++; 504 506 ··· 764 758 queue->ddgst_remaining = NVME_TCP_DIGEST_LENGTH; 765 759 } else { 766 760 if (pdu->hdr.flags & NVME_TCP_F_DATA_SUCCESS) { 767 - nvme_tcp_end_request(rq, NVME_SC_SUCCESS); 761 + nvme_tcp_end_request(rq, 762 + le16_to_cpu(req->status)); 768 763 queue->nr_cqe++; 769 764 } 770 765 nvme_tcp_init_recv_ctx(queue); ··· 795 788 return 0; 796 789 797 790 if (queue->recv_ddgst != queue->exp_ddgst) { 791 + struct request *rq = nvme_cid_to_rq(nvme_tcp_tagset(queue), 792 + pdu->command_id); 793 + struct nvme_tcp_request *req = blk_mq_rq_to_pdu(rq); 794 + 795 + req->status = cpu_to_le16(NVME_SC_DATA_XFER_ERROR); 796 + 798 797 dev_err(queue->ctrl->ctrl.device, 799 798 "data digest error: recv %#x expected %#x\n", 800 799 le32_to_cpu(queue->recv_ddgst), 801 800 le32_to_cpu(queue->exp_ddgst)); 802 - return -EIO; 803 801 } 804 802 805 803 if (pdu->hdr.flags & NVME_TCP_F_DATA_SUCCESS) { 806 804 struct request *rq = nvme_cid_to_rq(nvme_tcp_tagset(queue), 807 805 pdu->command_id); 806 + struct nvme_tcp_request *req = blk_mq_rq_to_pdu(rq); 808 807 809 - nvme_tcp_end_request(rq, NVME_SC_SUCCESS); 808 + nvme_tcp_end_request(rq, le16_to_cpu(req->status)); 810 809 queue->nr_cqe++; 811 810 } 812 811 ··· 2306 2293 return ret; 2307 2294 2308 2295 req->state = NVME_TCP_SEND_CMD_PDU; 2296 + req->status = cpu_to_le16(NVME_SC_SUCCESS); 2309 2297 req->offset = 0; 2310 2298 req->data_sent = 0; 2311 2299 req->pdu_len = 0;
+1 -1
drivers/nvme/target/admin-cmd.c
··· 1015 1015 if (unlikely(ret)) 1016 1016 return ret; 1017 1017 1018 - if (nvmet_req_passthru_ctrl(req)) 1018 + if (nvmet_is_passthru_req(req)) 1019 1019 return nvmet_parse_passthru_admin_cmd(req); 1020 1020 1021 1021 switch (cmd->common.opcode) {
+3 -2
drivers/nvme/target/configfs.c
··· 1028 1028 } 1029 1029 1030 1030 /* passthru subsystems use the underlying controller's version */ 1031 - if (nvmet_passthru_ctrl(subsys)) 1031 + if (nvmet_is_passthru_subsys(subsys)) 1032 1032 return -EINVAL; 1033 1033 1034 1034 ret = sscanf(page, "%d.%d.%d\n", &major, &minor, &tertiary); ··· 1067 1067 { 1068 1068 struct nvmet_subsys *subsys = to_subsys(item); 1069 1069 1070 - return snprintf(page, PAGE_SIZE, "%s\n", subsys->serial); 1070 + return snprintf(page, PAGE_SIZE, "%*s\n", 1071 + NVMET_SN_MAX_SIZE, subsys->serial); 1071 1072 } 1072 1073 1073 1074 static ssize_t
+6 -4
drivers/nvme/target/core.c
··· 553 553 mutex_lock(&subsys->lock); 554 554 ret = 0; 555 555 556 - if (nvmet_passthru_ctrl(subsys)) { 556 + if (nvmet_is_passthru_subsys(subsys)) { 557 557 pr_info("cannot enable both passthru and regular namespaces for a single subsystem"); 558 558 goto out_unlock; 559 559 } ··· 869 869 if (unlikely(ret)) 870 870 return ret; 871 871 872 - if (nvmet_req_passthru_ctrl(req)) 872 + if (nvmet_is_passthru_req(req)) 873 873 return nvmet_parse_passthru_io_cmd(req); 874 874 875 875 ret = nvmet_req_find_ns(req); ··· 1206 1206 ctrl->cap |= (15ULL << 24); 1207 1207 /* maximum queue entries supported: */ 1208 1208 ctrl->cap |= NVMET_QUEUE_SIZE - 1; 1209 + 1210 + if (nvmet_is_passthru_subsys(ctrl->subsys)) 1211 + nvmet_passthrough_override_cap(ctrl); 1209 1212 } 1210 1213 1211 1214 struct nvmet_ctrl *nvmet_ctrl_find_get(const char *subsysnqn, ··· 1366 1363 goto out_put_subsystem; 1367 1364 mutex_init(&ctrl->lock); 1368 1365 1369 - nvmet_init_cap(ctrl); 1370 - 1371 1366 ctrl->port = req->port; 1372 1367 1373 1368 INIT_WORK(&ctrl->async_event_work, nvmet_async_event_work); ··· 1379 1378 1380 1379 kref_init(&ctrl->ref); 1381 1380 ctrl->subsys = subsys; 1381 + nvmet_init_cap(ctrl); 1382 1382 WRITE_ONCE(ctrl->aen_enabled, NVMET_AEN_CFG_OPTIONAL); 1383 1383 1384 1384 ctrl->changed_ns_list = kmalloc_array(NVME_MAX_CHANGED_NAMESPACES,
+6 -5
drivers/nvme/target/nvmet.h
··· 582 582 void nvmet_passthru_ctrl_disable(struct nvmet_subsys *subsys); 583 583 u16 nvmet_parse_passthru_admin_cmd(struct nvmet_req *req); 584 584 u16 nvmet_parse_passthru_io_cmd(struct nvmet_req *req); 585 - static inline struct nvme_ctrl *nvmet_passthru_ctrl(struct nvmet_subsys *subsys) 585 + static inline bool nvmet_is_passthru_subsys(struct nvmet_subsys *subsys) 586 586 { 587 587 return subsys->passthru_ctrl; 588 588 } ··· 601 601 { 602 602 return 0; 603 603 } 604 - static inline struct nvme_ctrl *nvmet_passthru_ctrl(struct nvmet_subsys *subsys) 604 + static inline bool nvmet_is_passthru_subsys(struct nvmet_subsys *subsys) 605 605 { 606 606 return NULL; 607 607 } 608 608 #endif /* CONFIG_NVME_TARGET_PASSTHRU */ 609 609 610 - static inline struct nvme_ctrl * 611 - nvmet_req_passthru_ctrl(struct nvmet_req *req) 610 + static inline bool nvmet_is_passthru_req(struct nvmet_req *req) 612 611 { 613 - return nvmet_passthru_ctrl(nvmet_req_subsys(req)); 612 + return nvmet_is_passthru_subsys(nvmet_req_subsys(req)); 614 613 } 614 + 615 + void nvmet_passthrough_override_cap(struct nvmet_ctrl *ctrl); 615 616 616 617 u16 errno_to_nvme_status(struct nvmet_req *req, int errno); 617 618 u16 nvmet_report_invalid_opcode(struct nvmet_req *req);
+12 -2
drivers/nvme/target/passthru.c
··· 20 20 */ 21 21 static DEFINE_XARRAY(passthru_subsystems); 22 22 23 + void nvmet_passthrough_override_cap(struct nvmet_ctrl *ctrl) 24 + { 25 + /* 26 + * Multiple command set support can only be declared if the underlying 27 + * controller actually supports it. 28 + */ 29 + if (!nvme_multi_css(ctrl->subsys->passthru_ctrl)) 30 + ctrl->cap &= ~(1ULL << 43); 31 + } 32 + 23 33 static u16 nvmet_passthru_override_id_ctrl(struct nvmet_req *req) 24 34 { 25 35 struct nvmet_ctrl *ctrl = req->sq->ctrl; ··· 228 218 229 219 static void nvmet_passthru_execute_cmd(struct nvmet_req *req) 230 220 { 231 - struct nvme_ctrl *ctrl = nvmet_req_passthru_ctrl(req); 221 + struct nvme_ctrl *ctrl = nvmet_req_subsys(req)->passthru_ctrl; 232 222 struct request_queue *q = ctrl->admin_q; 233 223 struct nvme_ns *ns = NULL; 234 224 struct request *rq = NULL; ··· 309 299 */ 310 300 static void nvmet_passthru_set_host_behaviour(struct nvmet_req *req) 311 301 { 312 - struct nvme_ctrl *ctrl = nvmet_req_passthru_ctrl(req); 302 + struct nvme_ctrl *ctrl = nvmet_req_subsys(req)->passthru_ctrl; 313 303 struct nvme_feat_host_behavior *host; 314 304 u16 status = NVME_SC_INTERNAL; 315 305 int ret;
+1 -1
fs/Makefile
··· 17 17 kernel_read_file.o remap_range.o 18 18 19 19 ifeq ($(CONFIG_BLOCK),y) 20 - obj-y += buffer.o block_dev.o direct-io.o mpage.o 20 + obj-y += buffer.o direct-io.o mpage.o 21 21 else 22 22 obj-y += no-block.o 23 23 endif
+2 -639
fs/block_dev.c block/bdev.c
··· 7 7 8 8 #include <linux/init.h> 9 9 #include <linux/mm.h> 10 - #include <linux/fcntl.h> 11 10 #include <linux/slab.h> 12 11 #include <linux/kmod.h> 13 12 #include <linux/major.h> 14 13 #include <linux/device_cgroup.h> 15 - #include <linux/highmem.h> 16 14 #include <linux/blkdev.h> 17 15 #include <linux/backing-dev.h> 18 16 #include <linux/module.h> ··· 18 20 #include <linux/magic.h> 19 21 #include <linux/buffer_head.h> 20 22 #include <linux/swap.h> 21 - #include <linux/pagevec.h> 22 23 #include <linux/writeback.h> 23 - #include <linux/mpage.h> 24 24 #include <linux/mount.h> 25 25 #include <linux/pseudo_fs.h> 26 26 #include <linux/uio.h> 27 27 #include <linux/namei.h> 28 - #include <linux/log2.h> 29 28 #include <linux/cleancache.h> 30 - #include <linux/task_io_accounting_ops.h> 31 - #include <linux/falloc.h> 32 29 #include <linux/part_stat.h> 33 30 #include <linux/uaccess.h> 34 - #include <linux/suspend.h> 35 - #include "internal.h" 36 - #include "../block/blk.h" 31 + #include "../fs/internal.h" 32 + #include "blk.h" 37 33 38 34 struct bdev_inode { 39 35 struct block_device bdev; 40 36 struct inode vfs_inode; 41 37 }; 42 - 43 - static const struct address_space_operations def_blk_aops; 44 38 45 39 static inline struct bdev_inode *BDEV_I(struct inode *inode) 46 40 { ··· 184 194 185 195 EXPORT_SYMBOL(sb_min_blocksize); 186 196 187 - static int 188 - blkdev_get_block(struct inode *inode, sector_t iblock, 189 - struct buffer_head *bh, int create) 190 - { 191 - bh->b_bdev = I_BDEV(inode); 192 - bh->b_blocknr = iblock; 193 - set_buffer_mapped(bh); 194 - return 0; 195 - } 196 - 197 - static struct inode *bdev_file_inode(struct file *file) 198 - { 199 - return file->f_mapping->host; 200 - } 201 - 202 - static unsigned int dio_bio_write_op(struct kiocb *iocb) 203 - { 204 - unsigned int op = REQ_OP_WRITE | REQ_SYNC | REQ_IDLE; 205 - 206 - /* avoid the need for a I/O completion work item */ 207 - if (iocb->ki_flags & IOCB_DSYNC) 208 - op |= REQ_FUA; 209 - return op; 210 - } 211 - 212 - #define DIO_INLINE_BIO_VECS 4 213 - 214 - static void blkdev_bio_end_io_simple(struct bio *bio) 215 - { 216 - struct task_struct *waiter = bio->bi_private; 217 - 218 - WRITE_ONCE(bio->bi_private, NULL); 219 - blk_wake_io_task(waiter); 220 - } 221 - 222 - static ssize_t 223 - __blkdev_direct_IO_simple(struct kiocb *iocb, struct iov_iter *iter, 224 - unsigned int nr_pages) 225 - { 226 - struct file *file = iocb->ki_filp; 227 - struct block_device *bdev = I_BDEV(bdev_file_inode(file)); 228 - struct bio_vec inline_vecs[DIO_INLINE_BIO_VECS], *vecs; 229 - loff_t pos = iocb->ki_pos; 230 - bool should_dirty = false; 231 - struct bio bio; 232 - ssize_t ret; 233 - blk_qc_t qc; 234 - 235 - if ((pos | iov_iter_alignment(iter)) & 236 - (bdev_logical_block_size(bdev) - 1)) 237 - return -EINVAL; 238 - 239 - if (nr_pages <= DIO_INLINE_BIO_VECS) 240 - vecs = inline_vecs; 241 - else { 242 - vecs = kmalloc_array(nr_pages, sizeof(struct bio_vec), 243 - GFP_KERNEL); 244 - if (!vecs) 245 - return -ENOMEM; 246 - } 247 - 248 - bio_init(&bio, vecs, nr_pages); 249 - bio_set_dev(&bio, bdev); 250 - bio.bi_iter.bi_sector = pos >> 9; 251 - bio.bi_write_hint = iocb->ki_hint; 252 - bio.bi_private = current; 253 - bio.bi_end_io = blkdev_bio_end_io_simple; 254 - bio.bi_ioprio = iocb->ki_ioprio; 255 - 256 - ret = bio_iov_iter_get_pages(&bio, iter); 257 - if (unlikely(ret)) 258 - goto out; 259 - ret = bio.bi_iter.bi_size; 260 - 261 - if (iov_iter_rw(iter) == READ) { 262 - bio.bi_opf = REQ_OP_READ; 263 - if (iter_is_iovec(iter)) 264 - should_dirty = true; 265 - } else { 266 - bio.bi_opf = dio_bio_write_op(iocb); 267 - task_io_account_write(ret); 268 - } 269 - if (iocb->ki_flags & IOCB_NOWAIT) 270 - bio.bi_opf |= REQ_NOWAIT; 271 - if (iocb->ki_flags & IOCB_HIPRI) 272 - bio_set_polled(&bio, iocb); 273 - 274 - qc = submit_bio(&bio); 275 - for (;;) { 276 - set_current_state(TASK_UNINTERRUPTIBLE); 277 - if (!READ_ONCE(bio.bi_private)) 278 - break; 279 - if (!(iocb->ki_flags & IOCB_HIPRI) || 280 - !blk_poll(bdev_get_queue(bdev), qc, true)) 281 - blk_io_schedule(); 282 - } 283 - __set_current_state(TASK_RUNNING); 284 - 285 - bio_release_pages(&bio, should_dirty); 286 - if (unlikely(bio.bi_status)) 287 - ret = blk_status_to_errno(bio.bi_status); 288 - 289 - out: 290 - if (vecs != inline_vecs) 291 - kfree(vecs); 292 - 293 - bio_uninit(&bio); 294 - 295 - return ret; 296 - } 297 - 298 - struct blkdev_dio { 299 - union { 300 - struct kiocb *iocb; 301 - struct task_struct *waiter; 302 - }; 303 - size_t size; 304 - atomic_t ref; 305 - bool multi_bio : 1; 306 - bool should_dirty : 1; 307 - bool is_sync : 1; 308 - struct bio bio; 309 - }; 310 - 311 - static struct bio_set blkdev_dio_pool; 312 - 313 - static int blkdev_iopoll(struct kiocb *kiocb, bool wait) 314 - { 315 - struct block_device *bdev = I_BDEV(kiocb->ki_filp->f_mapping->host); 316 - struct request_queue *q = bdev_get_queue(bdev); 317 - 318 - return blk_poll(q, READ_ONCE(kiocb->ki_cookie), wait); 319 - } 320 - 321 - static void blkdev_bio_end_io(struct bio *bio) 322 - { 323 - struct blkdev_dio *dio = bio->bi_private; 324 - bool should_dirty = dio->should_dirty; 325 - 326 - if (bio->bi_status && !dio->bio.bi_status) 327 - dio->bio.bi_status = bio->bi_status; 328 - 329 - if (!dio->multi_bio || atomic_dec_and_test(&dio->ref)) { 330 - if (!dio->is_sync) { 331 - struct kiocb *iocb = dio->iocb; 332 - ssize_t ret; 333 - 334 - if (likely(!dio->bio.bi_status)) { 335 - ret = dio->size; 336 - iocb->ki_pos += ret; 337 - } else { 338 - ret = blk_status_to_errno(dio->bio.bi_status); 339 - } 340 - 341 - dio->iocb->ki_complete(iocb, ret, 0); 342 - if (dio->multi_bio) 343 - bio_put(&dio->bio); 344 - } else { 345 - struct task_struct *waiter = dio->waiter; 346 - 347 - WRITE_ONCE(dio->waiter, NULL); 348 - blk_wake_io_task(waiter); 349 - } 350 - } 351 - 352 - if (should_dirty) { 353 - bio_check_pages_dirty(bio); 354 - } else { 355 - bio_release_pages(bio, false); 356 - bio_put(bio); 357 - } 358 - } 359 - 360 - static ssize_t __blkdev_direct_IO(struct kiocb *iocb, struct iov_iter *iter, 361 - unsigned int nr_pages) 362 - { 363 - struct file *file = iocb->ki_filp; 364 - struct inode *inode = bdev_file_inode(file); 365 - struct block_device *bdev = I_BDEV(inode); 366 - struct blk_plug plug; 367 - struct blkdev_dio *dio; 368 - struct bio *bio; 369 - bool is_poll = (iocb->ki_flags & IOCB_HIPRI) != 0; 370 - bool is_read = (iov_iter_rw(iter) == READ), is_sync; 371 - loff_t pos = iocb->ki_pos; 372 - blk_qc_t qc = BLK_QC_T_NONE; 373 - int ret = 0; 374 - 375 - if ((pos | iov_iter_alignment(iter)) & 376 - (bdev_logical_block_size(bdev) - 1)) 377 - return -EINVAL; 378 - 379 - bio = bio_alloc_kiocb(iocb, nr_pages, &blkdev_dio_pool); 380 - 381 - dio = container_of(bio, struct blkdev_dio, bio); 382 - dio->is_sync = is_sync = is_sync_kiocb(iocb); 383 - if (dio->is_sync) { 384 - dio->waiter = current; 385 - bio_get(bio); 386 - } else { 387 - dio->iocb = iocb; 388 - } 389 - 390 - dio->size = 0; 391 - dio->multi_bio = false; 392 - dio->should_dirty = is_read && iter_is_iovec(iter); 393 - 394 - /* 395 - * Don't plug for HIPRI/polled IO, as those should go straight 396 - * to issue 397 - */ 398 - if (!is_poll) 399 - blk_start_plug(&plug); 400 - 401 - for (;;) { 402 - bio_set_dev(bio, bdev); 403 - bio->bi_iter.bi_sector = pos >> 9; 404 - bio->bi_write_hint = iocb->ki_hint; 405 - bio->bi_private = dio; 406 - bio->bi_end_io = blkdev_bio_end_io; 407 - bio->bi_ioprio = iocb->ki_ioprio; 408 - 409 - ret = bio_iov_iter_get_pages(bio, iter); 410 - if (unlikely(ret)) { 411 - bio->bi_status = BLK_STS_IOERR; 412 - bio_endio(bio); 413 - break; 414 - } 415 - 416 - if (is_read) { 417 - bio->bi_opf = REQ_OP_READ; 418 - if (dio->should_dirty) 419 - bio_set_pages_dirty(bio); 420 - } else { 421 - bio->bi_opf = dio_bio_write_op(iocb); 422 - task_io_account_write(bio->bi_iter.bi_size); 423 - } 424 - if (iocb->ki_flags & IOCB_NOWAIT) 425 - bio->bi_opf |= REQ_NOWAIT; 426 - 427 - dio->size += bio->bi_iter.bi_size; 428 - pos += bio->bi_iter.bi_size; 429 - 430 - nr_pages = bio_iov_vecs_to_alloc(iter, BIO_MAX_VECS); 431 - if (!nr_pages) { 432 - bool polled = false; 433 - 434 - if (iocb->ki_flags & IOCB_HIPRI) { 435 - bio_set_polled(bio, iocb); 436 - polled = true; 437 - } 438 - 439 - qc = submit_bio(bio); 440 - 441 - if (polled) 442 - WRITE_ONCE(iocb->ki_cookie, qc); 443 - break; 444 - } 445 - 446 - if (!dio->multi_bio) { 447 - /* 448 - * AIO needs an extra reference to ensure the dio 449 - * structure which is embedded into the first bio 450 - * stays around. 451 - */ 452 - if (!is_sync) 453 - bio_get(bio); 454 - dio->multi_bio = true; 455 - atomic_set(&dio->ref, 2); 456 - } else { 457 - atomic_inc(&dio->ref); 458 - } 459 - 460 - submit_bio(bio); 461 - bio = bio_alloc(GFP_KERNEL, nr_pages); 462 - } 463 - 464 - if (!is_poll) 465 - blk_finish_plug(&plug); 466 - 467 - if (!is_sync) 468 - return -EIOCBQUEUED; 469 - 470 - for (;;) { 471 - set_current_state(TASK_UNINTERRUPTIBLE); 472 - if (!READ_ONCE(dio->waiter)) 473 - break; 474 - 475 - if (!(iocb->ki_flags & IOCB_HIPRI) || 476 - !blk_poll(bdev_get_queue(bdev), qc, true)) 477 - blk_io_schedule(); 478 - } 479 - __set_current_state(TASK_RUNNING); 480 - 481 - if (!ret) 482 - ret = blk_status_to_errno(dio->bio.bi_status); 483 - if (likely(!ret)) 484 - ret = dio->size; 485 - 486 - bio_put(&dio->bio); 487 - return ret; 488 - } 489 - 490 - static ssize_t 491 - blkdev_direct_IO(struct kiocb *iocb, struct iov_iter *iter) 492 - { 493 - unsigned int nr_pages; 494 - 495 - if (!iov_iter_count(iter)) 496 - return 0; 497 - 498 - nr_pages = bio_iov_vecs_to_alloc(iter, BIO_MAX_VECS + 1); 499 - if (is_sync_kiocb(iocb) && nr_pages <= BIO_MAX_VECS) 500 - return __blkdev_direct_IO_simple(iocb, iter, nr_pages); 501 - 502 - return __blkdev_direct_IO(iocb, iter, bio_max_segs(nr_pages)); 503 - } 504 - 505 - static __init int blkdev_init(void) 506 - { 507 - return bioset_init(&blkdev_dio_pool, 4, 508 - offsetof(struct blkdev_dio, bio), 509 - BIOSET_NEED_BVECS|BIOSET_PERCPU_CACHE); 510 - } 511 - module_init(blkdev_init); 512 - 513 197 int __sync_blockdev(struct block_device *bdev, int wait) 514 198 { 515 199 if (!bdev) ··· 300 636 return error; 301 637 } 302 638 EXPORT_SYMBOL(thaw_bdev); 303 - 304 - static int blkdev_writepage(struct page *page, struct writeback_control *wbc) 305 - { 306 - return block_write_full_page(page, blkdev_get_block, wbc); 307 - } 308 - 309 - static int blkdev_readpage(struct file * file, struct page * page) 310 - { 311 - return block_read_full_page(page, blkdev_get_block); 312 - } 313 - 314 - static void blkdev_readahead(struct readahead_control *rac) 315 - { 316 - mpage_readahead(rac, blkdev_get_block); 317 - } 318 - 319 - static int blkdev_write_begin(struct file *file, struct address_space *mapping, 320 - loff_t pos, unsigned len, unsigned flags, 321 - struct page **pagep, void **fsdata) 322 - { 323 - return block_write_begin(mapping, pos, len, flags, pagep, 324 - blkdev_get_block); 325 - } 326 - 327 - static int blkdev_write_end(struct file *file, struct address_space *mapping, 328 - loff_t pos, unsigned len, unsigned copied, 329 - struct page *page, void *fsdata) 330 - { 331 - int ret; 332 - ret = block_write_end(file, mapping, pos, len, copied, page, fsdata); 333 - 334 - unlock_page(page); 335 - put_page(page); 336 - 337 - return ret; 338 - } 339 - 340 - /* 341 - * private llseek: 342 - * for a block special file file_inode(file)->i_size is zero 343 - * so we compute the size by hand (just as in block_read/write above) 344 - */ 345 - static loff_t block_llseek(struct file *file, loff_t offset, int whence) 346 - { 347 - struct inode *bd_inode = bdev_file_inode(file); 348 - loff_t retval; 349 - 350 - inode_lock(bd_inode); 351 - retval = fixed_size_llseek(file, offset, whence, i_size_read(bd_inode)); 352 - inode_unlock(bd_inode); 353 - return retval; 354 - } 355 - 356 - static int blkdev_fsync(struct file *filp, loff_t start, loff_t end, 357 - int datasync) 358 - { 359 - struct inode *bd_inode = bdev_file_inode(filp); 360 - struct block_device *bdev = I_BDEV(bd_inode); 361 - int error; 362 - 363 - error = file_write_and_wait_range(filp, start, end); 364 - if (error) 365 - return error; 366 - 367 - /* 368 - * There is no need to serialise calls to blkdev_issue_flush with 369 - * i_mutex and doing so causes performance issues with concurrent 370 - * O_SYNC writers to a block device. 371 - */ 372 - error = blkdev_issue_flush(bdev); 373 - if (error == -EOPNOTSUPP) 374 - error = 0; 375 - 376 - return error; 377 - } 378 639 379 640 /** 380 641 * bdev_read_page() - Start reading a page from a block device ··· 894 1305 } 895 1306 EXPORT_SYMBOL(blkdev_get_by_path); 896 1307 897 - static int blkdev_open(struct inode * inode, struct file * filp) 898 - { 899 - struct block_device *bdev; 900 - 901 - /* 902 - * Preserve backwards compatibility and allow large file access 903 - * even if userspace doesn't ask for it explicitly. Some mkfs 904 - * binary needs it. We might want to drop this workaround 905 - * during an unstable branch. 906 - */ 907 - filp->f_flags |= O_LARGEFILE; 908 - 909 - filp->f_mode |= FMODE_NOWAIT | FMODE_BUF_RASYNC; 910 - 911 - if (filp->f_flags & O_NDELAY) 912 - filp->f_mode |= FMODE_NDELAY; 913 - if (filp->f_flags & O_EXCL) 914 - filp->f_mode |= FMODE_EXCL; 915 - if ((filp->f_flags & O_ACCMODE) == 3) 916 - filp->f_mode |= FMODE_WRITE_IOCTL; 917 - 918 - bdev = blkdev_get_by_dev(inode->i_rdev, filp->f_mode, filp); 919 - if (IS_ERR(bdev)) 920 - return PTR_ERR(bdev); 921 - filp->f_mapping = bdev->bd_inode->i_mapping; 922 - filp->f_wb_err = filemap_sample_wb_err(filp->f_mapping); 923 - return 0; 924 - } 925 - 926 1308 void blkdev_put(struct block_device *bdev, fmode_t mode) 927 1309 { 928 1310 struct gendisk *disk = bdev->bd_disk; ··· 956 1396 blkdev_put_no_open(bdev); 957 1397 } 958 1398 EXPORT_SYMBOL(blkdev_put); 959 - 960 - static int blkdev_close(struct inode * inode, struct file * filp) 961 - { 962 - struct block_device *bdev = I_BDEV(bdev_file_inode(filp)); 963 - blkdev_put(bdev, filp->f_mode); 964 - return 0; 965 - } 966 - 967 - static long block_ioctl(struct file *file, unsigned cmd, unsigned long arg) 968 - { 969 - struct block_device *bdev = I_BDEV(bdev_file_inode(file)); 970 - fmode_t mode = file->f_mode; 971 - 972 - /* 973 - * O_NDELAY can be altered using fcntl(.., F_SETFL, ..), so we have 974 - * to updated it before every ioctl. 975 - */ 976 - if (file->f_flags & O_NDELAY) 977 - mode |= FMODE_NDELAY; 978 - else 979 - mode &= ~FMODE_NDELAY; 980 - 981 - return blkdev_ioctl(bdev, mode, cmd, arg); 982 - } 983 - 984 - /* 985 - * Write data to the block device. Only intended for the block device itself 986 - * and the raw driver which basically is a fake block device. 987 - * 988 - * Does not take i_mutex for the write and thus is not for general purpose 989 - * use. 990 - */ 991 - static ssize_t blkdev_write_iter(struct kiocb *iocb, struct iov_iter *from) 992 - { 993 - struct file *file = iocb->ki_filp; 994 - struct inode *bd_inode = bdev_file_inode(file); 995 - loff_t size = i_size_read(bd_inode); 996 - struct blk_plug plug; 997 - size_t shorted = 0; 998 - ssize_t ret; 999 - 1000 - if (bdev_read_only(I_BDEV(bd_inode))) 1001 - return -EPERM; 1002 - 1003 - if (IS_SWAPFILE(bd_inode) && !is_hibernate_resume_dev(bd_inode->i_rdev)) 1004 - return -ETXTBSY; 1005 - 1006 - if (!iov_iter_count(from)) 1007 - return 0; 1008 - 1009 - if (iocb->ki_pos >= size) 1010 - return -ENOSPC; 1011 - 1012 - if ((iocb->ki_flags & (IOCB_NOWAIT | IOCB_DIRECT)) == IOCB_NOWAIT) 1013 - return -EOPNOTSUPP; 1014 - 1015 - size -= iocb->ki_pos; 1016 - if (iov_iter_count(from) > size) { 1017 - shorted = iov_iter_count(from) - size; 1018 - iov_iter_truncate(from, size); 1019 - } 1020 - 1021 - blk_start_plug(&plug); 1022 - ret = __generic_file_write_iter(iocb, from); 1023 - if (ret > 0) 1024 - ret = generic_write_sync(iocb, ret); 1025 - iov_iter_reexpand(from, iov_iter_count(from) + shorted); 1026 - blk_finish_plug(&plug); 1027 - return ret; 1028 - } 1029 - 1030 - static ssize_t blkdev_read_iter(struct kiocb *iocb, struct iov_iter *to) 1031 - { 1032 - struct file *file = iocb->ki_filp; 1033 - struct inode *bd_inode = bdev_file_inode(file); 1034 - loff_t size = i_size_read(bd_inode); 1035 - loff_t pos = iocb->ki_pos; 1036 - size_t shorted = 0; 1037 - ssize_t ret; 1038 - 1039 - if (pos >= size) 1040 - return 0; 1041 - 1042 - size -= pos; 1043 - if (iov_iter_count(to) > size) { 1044 - shorted = iov_iter_count(to) - size; 1045 - iov_iter_truncate(to, size); 1046 - } 1047 - 1048 - ret = generic_file_read_iter(iocb, to); 1049 - iov_iter_reexpand(to, iov_iter_count(to) + shorted); 1050 - return ret; 1051 - } 1052 - 1053 - static int blkdev_writepages(struct address_space *mapping, 1054 - struct writeback_control *wbc) 1055 - { 1056 - return generic_writepages(mapping, wbc); 1057 - } 1058 - 1059 - static const struct address_space_operations def_blk_aops = { 1060 - .set_page_dirty = __set_page_dirty_buffers, 1061 - .readpage = blkdev_readpage, 1062 - .readahead = blkdev_readahead, 1063 - .writepage = blkdev_writepage, 1064 - .write_begin = blkdev_write_begin, 1065 - .write_end = blkdev_write_end, 1066 - .writepages = blkdev_writepages, 1067 - .direct_IO = blkdev_direct_IO, 1068 - .migratepage = buffer_migrate_page_norefs, 1069 - .is_dirty_writeback = buffer_check_dirty_writeback, 1070 - }; 1071 - 1072 - #define BLKDEV_FALLOC_FL_SUPPORTED \ 1073 - (FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE | \ 1074 - FALLOC_FL_ZERO_RANGE | FALLOC_FL_NO_HIDE_STALE) 1075 - 1076 - static long blkdev_fallocate(struct file *file, int mode, loff_t start, 1077 - loff_t len) 1078 - { 1079 - struct block_device *bdev = I_BDEV(bdev_file_inode(file)); 1080 - loff_t end = start + len - 1; 1081 - loff_t isize; 1082 - int error; 1083 - 1084 - /* Fail if we don't recognize the flags. */ 1085 - if (mode & ~BLKDEV_FALLOC_FL_SUPPORTED) 1086 - return -EOPNOTSUPP; 1087 - 1088 - /* Don't go off the end of the device. */ 1089 - isize = i_size_read(bdev->bd_inode); 1090 - if (start >= isize) 1091 - return -EINVAL; 1092 - if (end >= isize) { 1093 - if (mode & FALLOC_FL_KEEP_SIZE) { 1094 - len = isize - start; 1095 - end = start + len - 1; 1096 - } else 1097 - return -EINVAL; 1098 - } 1099 - 1100 - /* 1101 - * Don't allow IO that isn't aligned to logical block size. 1102 - */ 1103 - if ((start | len) & (bdev_logical_block_size(bdev) - 1)) 1104 - return -EINVAL; 1105 - 1106 - /* Invalidate the page cache, including dirty pages. */ 1107 - error = truncate_bdev_range(bdev, file->f_mode, start, end); 1108 - if (error) 1109 - return error; 1110 - 1111 - switch (mode) { 1112 - case FALLOC_FL_ZERO_RANGE: 1113 - case FALLOC_FL_ZERO_RANGE | FALLOC_FL_KEEP_SIZE: 1114 - error = blkdev_issue_zeroout(bdev, start >> 9, len >> 9, 1115 - GFP_KERNEL, BLKDEV_ZERO_NOUNMAP); 1116 - break; 1117 - case FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE: 1118 - error = blkdev_issue_zeroout(bdev, start >> 9, len >> 9, 1119 - GFP_KERNEL, BLKDEV_ZERO_NOFALLBACK); 1120 - break; 1121 - case FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE | FALLOC_FL_NO_HIDE_STALE: 1122 - error = blkdev_issue_discard(bdev, start >> 9, len >> 9, 1123 - GFP_KERNEL, 0); 1124 - break; 1125 - default: 1126 - return -EOPNOTSUPP; 1127 - } 1128 - if (error) 1129 - return error; 1130 - 1131 - /* 1132 - * Invalidate the page cache again; if someone wandered in and dirtied 1133 - * a page, we just discard it - userspace has no way of knowing whether 1134 - * the write happened before or after discard completing... 1135 - */ 1136 - return truncate_bdev_range(bdev, file->f_mode, start, end); 1137 - } 1138 - 1139 - const struct file_operations def_blk_fops = { 1140 - .open = blkdev_open, 1141 - .release = blkdev_close, 1142 - .llseek = block_llseek, 1143 - .read_iter = blkdev_read_iter, 1144 - .write_iter = blkdev_write_iter, 1145 - .iopoll = blkdev_iopoll, 1146 - .mmap = generic_file_mmap, 1147 - .fsync = blkdev_fsync, 1148 - .unlocked_ioctl = block_ioctl, 1149 - #ifdef CONFIG_COMPAT 1150 - .compat_ioctl = compat_blkdev_ioctl, 1151 - #endif 1152 - .splice_read = generic_file_splice_read, 1153 - .splice_write = iter_file_splice_write, 1154 - .fallocate = blkdev_fallocate, 1155 - }; 1156 1399 1157 1400 /** 1158 1401 * lookup_bdev - lookup a struct block_device by name
+1 -1
fs/internal.h
··· 18 18 struct pipe_inode_info; 19 19 20 20 /* 21 - * block_dev.c 21 + * block/bdev.c 22 22 */ 23 23 #ifdef CONFIG_BLOCK 24 24 extern void __init bdev_cache_init(void);