Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

at ee9dce44362b2d8132c32964656ab6dff7dfbc6a 1908 lines 56 kB view raw
1/* SPDX-License-Identifier: GPL-2.0 */ 2/* 3 * Portions Copyright (C) 1992 Drew Eckhardt 4 */ 5#ifndef _LINUX_BLKDEV_H 6#define _LINUX_BLKDEV_H 7 8#include <linux/types.h> 9#include <linux/blk_types.h> 10#include <linux/device.h> 11#include <linux/list.h> 12#include <linux/llist.h> 13#include <linux/minmax.h> 14#include <linux/timer.h> 15#include <linux/workqueue.h> 16#include <linux/completion.h> 17#include <linux/wait.h> 18#include <linux/bio.h> 19#include <linux/gfp.h> 20#include <linux/kdev_t.h> 21#include <linux/rcupdate.h> 22#include <linux/percpu-refcount.h> 23#include <linux/blkzoned.h> 24#include <linux/sched.h> 25#include <linux/sbitmap.h> 26#include <linux/uuid.h> 27#include <linux/xarray.h> 28#include <linux/file.h> 29#include <linux/lockdep.h> 30 31struct module; 32struct request_queue; 33struct elevator_queue; 34struct blk_trace; 35struct request; 36struct sg_io_hdr; 37struct blkcg_gq; 38struct blk_flush_queue; 39struct kiocb; 40struct pr_ops; 41struct rq_qos; 42struct hd_geometry; 43struct blk_report_zones_args; 44struct blk_queue_stats; 45struct blk_stat_callback; 46struct blk_crypto_profile; 47 48extern const struct device_type disk_type; 49extern const struct device_type part_type; 50extern const struct class block_class; 51 52/* 53 * Maximum number of blkcg policies allowed to be registered concurrently. 54 * Defined here to simplify include dependency. 55 */ 56#define BLKCG_MAX_POLS 6 57 58#define DISK_MAX_PARTS 256 59#define DISK_NAME_LEN 32 60 61#define PARTITION_META_INFO_VOLNAMELTH 64 62/* 63 * Enough for the string representation of any kind of UUID plus NULL. 64 * EFI UUID is 36 characters. MSDOS UUID is 11 characters. 65 */ 66#define PARTITION_META_INFO_UUIDLTH (UUID_STRING_LEN + 1) 67 68struct partition_meta_info { 69 char uuid[PARTITION_META_INFO_UUIDLTH]; 70 u8 volname[PARTITION_META_INFO_VOLNAMELTH]; 71}; 72 73/** 74 * DOC: genhd capability flags 75 * 76 * ``GENHD_FL_REMOVABLE``: indicates that the block device gives access to 77 * removable media. When set, the device remains present even when media is not 78 * inserted. Shall not be set for devices which are removed entirely when the 79 * media is removed. 80 * 81 * ``GENHD_FL_HIDDEN``: the block device is hidden; it doesn't produce events, 82 * doesn't appear in sysfs, and can't be opened from userspace or using 83 * blkdev_get*. Used for the underlying components of multipath devices. 84 * 85 * ``GENHD_FL_NO_PART``: partition support is disabled. The kernel will not 86 * scan for partitions from add_disk, and users can't add partitions manually. 87 * 88 */ 89enum { 90 GENHD_FL_REMOVABLE = 1 << 0, 91 GENHD_FL_HIDDEN = 1 << 1, 92 GENHD_FL_NO_PART = 1 << 2, 93}; 94 95enum { 96 DISK_EVENT_MEDIA_CHANGE = 1 << 0, /* media changed */ 97 DISK_EVENT_EJECT_REQUEST = 1 << 1, /* eject requested */ 98}; 99 100enum { 101 /* Poll even if events_poll_msecs is unset */ 102 DISK_EVENT_FLAG_POLL = 1 << 0, 103 /* Forward events to udev */ 104 DISK_EVENT_FLAG_UEVENT = 1 << 1, 105 /* Block event polling when open for exclusive write */ 106 DISK_EVENT_FLAG_BLOCK_ON_EXCL_WRITE = 1 << 2, 107}; 108 109struct disk_events; 110struct badblocks; 111 112enum blk_integrity_checksum { 113 BLK_INTEGRITY_CSUM_NONE = 0, 114 BLK_INTEGRITY_CSUM_IP = 1, 115 BLK_INTEGRITY_CSUM_CRC = 2, 116 BLK_INTEGRITY_CSUM_CRC64 = 3, 117} __packed ; 118 119struct blk_integrity { 120 unsigned char flags; 121 enum blk_integrity_checksum csum_type; 122 unsigned char metadata_size; 123 unsigned char pi_offset; 124 unsigned char interval_exp; 125 unsigned char tag_size; 126 unsigned char pi_tuple_size; 127}; 128 129typedef unsigned int __bitwise blk_mode_t; 130 131/* open for reading */ 132#define BLK_OPEN_READ ((__force blk_mode_t)(1 << 0)) 133/* open for writing */ 134#define BLK_OPEN_WRITE ((__force blk_mode_t)(1 << 1)) 135/* open exclusively (vs other exclusive openers */ 136#define BLK_OPEN_EXCL ((__force blk_mode_t)(1 << 2)) 137/* opened with O_NDELAY */ 138#define BLK_OPEN_NDELAY ((__force blk_mode_t)(1 << 3)) 139/* open for "writes" only for ioctls (specialy hack for floppy.c) */ 140#define BLK_OPEN_WRITE_IOCTL ((__force blk_mode_t)(1 << 4)) 141/* open is exclusive wrt all other BLK_OPEN_WRITE opens to the device */ 142#define BLK_OPEN_RESTRICT_WRITES ((__force blk_mode_t)(1 << 5)) 143/* return partition scanning errors */ 144#define BLK_OPEN_STRICT_SCAN ((__force blk_mode_t)(1 << 6)) 145 146struct gendisk { 147 /* 148 * major/first_minor/minors should not be set by any new driver, the 149 * block core will take care of allocating them automatically. 150 */ 151 int major; 152 int first_minor; 153 int minors; 154 155 char disk_name[DISK_NAME_LEN]; /* name of major driver */ 156 157 unsigned short events; /* supported events */ 158 unsigned short event_flags; /* flags related to event processing */ 159 160 struct xarray part_tbl; 161 struct block_device *part0; 162 163 const struct block_device_operations *fops; 164 struct request_queue *queue; 165 void *private_data; 166 167 struct bio_set bio_split; 168 169 int flags; 170 unsigned long state; 171#define GD_NEED_PART_SCAN 0 172#define GD_READ_ONLY 1 173#define GD_DEAD 2 174#define GD_NATIVE_CAPACITY 3 175#define GD_ADDED 4 176#define GD_SUPPRESS_PART_SCAN 5 177#define GD_OWNS_QUEUE 6 178#define GD_ZONE_APPEND_USED 7 179 180 struct mutex open_mutex; /* open/close mutex */ 181 unsigned open_partitions; /* number of open partitions */ 182 183 struct backing_dev_info *bdi; 184 struct kobject queue_kobj; /* the queue/ directory */ 185 struct kobject *slave_dir; 186#ifdef CONFIG_BLOCK_HOLDER_DEPRECATED 187 struct list_head slave_bdevs; 188#endif 189 struct timer_rand_state *random; 190 struct disk_events *ev; 191 192#ifdef CONFIG_BLK_DEV_ZONED 193 /* 194 * Zoned block device information. Reads of this information must be 195 * protected with blk_queue_enter() / blk_queue_exit(). Modifying this 196 * information is only allowed while no requests are being processed. 197 * See also blk_mq_freeze_queue() and blk_mq_unfreeze_queue(). 198 */ 199 unsigned int nr_zones; 200 unsigned int zone_capacity; 201 unsigned int last_zone_capacity; 202 u8 __rcu *zones_cond; 203 unsigned int zone_wplugs_hash_bits; 204 atomic_t nr_zone_wplugs; 205 spinlock_t zone_wplugs_hash_lock; 206 struct mempool *zone_wplugs_pool; 207 struct hlist_head *zone_wplugs_hash; 208 struct workqueue_struct *zone_wplugs_wq; 209 spinlock_t zone_wplugs_list_lock; 210 struct list_head zone_wplugs_list; 211 struct task_struct *zone_wplugs_worker; 212 struct completion zone_wplugs_worker_bio_done; 213#endif /* CONFIG_BLK_DEV_ZONED */ 214 215#if IS_ENABLED(CONFIG_CDROM) 216 struct cdrom_device_info *cdi; 217#endif 218 int node_id; 219 struct badblocks *bb; 220 struct lockdep_map lockdep_map; 221 u64 diskseq; 222 blk_mode_t open_mode; 223 224 /* 225 * Independent sector access ranges. This is always NULL for 226 * devices that do not have multiple independent access ranges. 227 */ 228 struct blk_independent_access_ranges *ia_ranges; 229 230 struct mutex rqos_state_mutex; /* rqos state change mutex */ 231}; 232 233/** 234 * disk_openers - returns how many openers are there for a disk 235 * @disk: disk to check 236 * 237 * This returns the number of openers for a disk. Note that this value is only 238 * stable if disk->open_mutex is held. 239 * 240 * Note: Due to a quirk in the block layer open code, each open partition is 241 * only counted once even if there are multiple openers. 242 */ 243static inline unsigned int disk_openers(struct gendisk *disk) 244{ 245 return atomic_read(&disk->part0->bd_openers); 246} 247 248/** 249 * disk_has_partscan - return %true if partition scanning is enabled on a disk 250 * @disk: disk to check 251 * 252 * Returns %true if partitions scanning is enabled for @disk, or %false if 253 * partition scanning is disabled either permanently or temporarily. 254 */ 255static inline bool disk_has_partscan(struct gendisk *disk) 256{ 257 return !(disk->flags & (GENHD_FL_NO_PART | GENHD_FL_HIDDEN)) && 258 !test_bit(GD_SUPPRESS_PART_SCAN, &disk->state); 259} 260 261/* 262 * The gendisk is refcounted by the part0 block_device, and the bd_device 263 * therein is also used for device model presentation in sysfs. 264 */ 265#define dev_to_disk(device) \ 266 (dev_to_bdev(device)->bd_disk) 267#define disk_to_dev(disk) \ 268 (&((disk)->part0->bd_device)) 269 270#if IS_REACHABLE(CONFIG_CDROM) 271#define disk_to_cdi(disk) ((disk)->cdi) 272#else 273#define disk_to_cdi(disk) NULL 274#endif 275 276static inline dev_t disk_devt(struct gendisk *disk) 277{ 278 return MKDEV(disk->major, disk->first_minor); 279} 280 281#ifdef CONFIG_TRANSPARENT_HUGEPAGE 282/* 283 * We should strive for 1 << (PAGE_SHIFT + MAX_PAGECACHE_ORDER) 284 * however we constrain this to what we can validate and test. 285 */ 286#define BLK_MAX_BLOCK_SIZE SZ_64K 287#else 288#define BLK_MAX_BLOCK_SIZE PAGE_SIZE 289#endif 290 291 292/* blk_validate_limits() validates bsize, so drivers don't usually need to */ 293static inline int blk_validate_block_size(unsigned long bsize) 294{ 295 if (bsize < 512 || bsize > BLK_MAX_BLOCK_SIZE || !is_power_of_2(bsize)) 296 return -EINVAL; 297 298 return 0; 299} 300 301static inline bool blk_op_is_passthrough(blk_opf_t op) 302{ 303 op &= REQ_OP_MASK; 304 return op == REQ_OP_DRV_IN || op == REQ_OP_DRV_OUT; 305} 306 307/* flags set by the driver in queue_limits.features */ 308typedef unsigned int __bitwise blk_features_t; 309 310/* supports a volatile write cache */ 311#define BLK_FEAT_WRITE_CACHE ((__force blk_features_t)(1u << 0)) 312 313/* supports passing on the FUA bit */ 314#define BLK_FEAT_FUA ((__force blk_features_t)(1u << 1)) 315 316/* rotational device (hard drive or floppy) */ 317#define BLK_FEAT_ROTATIONAL ((__force blk_features_t)(1u << 2)) 318 319/* contributes to the random number pool */ 320#define BLK_FEAT_ADD_RANDOM ((__force blk_features_t)(1u << 3)) 321 322/* do disk/partitions IO accounting */ 323#define BLK_FEAT_IO_STAT ((__force blk_features_t)(1u << 4)) 324 325/* don't modify data until writeback is done */ 326#define BLK_FEAT_STABLE_WRITES ((__force blk_features_t)(1u << 5)) 327 328/* always completes in submit context */ 329#define BLK_FEAT_SYNCHRONOUS ((__force blk_features_t)(1u << 6)) 330 331/* supports REQ_NOWAIT */ 332#define BLK_FEAT_NOWAIT ((__force blk_features_t)(1u << 7)) 333 334/* supports DAX */ 335#define BLK_FEAT_DAX ((__force blk_features_t)(1u << 8)) 336 337/* supports I/O polling */ 338#define BLK_FEAT_POLL ((__force blk_features_t)(1u << 9)) 339 340/* is a zoned device */ 341#define BLK_FEAT_ZONED ((__force blk_features_t)(1u << 10)) 342 343/* supports PCI(e) p2p requests */ 344#define BLK_FEAT_PCI_P2PDMA ((__force blk_features_t)(1u << 12)) 345 346/* skip this queue in blk_mq_(un)quiesce_tagset */ 347#define BLK_FEAT_SKIP_TAGSET_QUIESCE ((__force blk_features_t)(1u << 13)) 348 349/* atomic writes enabled */ 350#define BLK_FEAT_ATOMIC_WRITES ((__force blk_features_t)(1u << 14)) 351 352/* undocumented magic for bcache */ 353#define BLK_FEAT_RAID_PARTIAL_STRIPES_EXPENSIVE \ 354 ((__force blk_features_t)(1u << 15)) 355 356/* 357 * Flags automatically inherited when stacking limits. 358 */ 359#define BLK_FEAT_INHERIT_MASK \ 360 (BLK_FEAT_WRITE_CACHE | BLK_FEAT_FUA | BLK_FEAT_ROTATIONAL | \ 361 BLK_FEAT_STABLE_WRITES | BLK_FEAT_ZONED | \ 362 BLK_FEAT_RAID_PARTIAL_STRIPES_EXPENSIVE) 363 364/* internal flags in queue_limits.flags */ 365typedef unsigned int __bitwise blk_flags_t; 366 367/* do not send FLUSH/FUA commands despite advertising a write cache */ 368#define BLK_FLAG_WRITE_CACHE_DISABLED ((__force blk_flags_t)(1u << 0)) 369 370/* I/O topology is misaligned */ 371#define BLK_FLAG_MISALIGNED ((__force blk_flags_t)(1u << 1)) 372 373/* passthrough command IO accounting */ 374#define BLK_FLAG_IOSTATS_PASSTHROUGH ((__force blk_flags_t)(1u << 2)) 375 376struct queue_limits { 377 blk_features_t features; 378 blk_flags_t flags; 379 unsigned long seg_boundary_mask; 380 unsigned long virt_boundary_mask; 381 382 unsigned int max_hw_sectors; 383 unsigned int max_dev_sectors; 384 unsigned int chunk_sectors; 385 unsigned int max_sectors; 386 unsigned int max_user_sectors; 387 unsigned int max_segment_size; 388 unsigned int max_fast_segment_size; 389 unsigned int physical_block_size; 390 unsigned int logical_block_size; 391 unsigned int alignment_offset; 392 unsigned int io_min; 393 unsigned int io_opt; 394 unsigned int max_discard_sectors; 395 unsigned int max_hw_discard_sectors; 396 unsigned int max_user_discard_sectors; 397 unsigned int max_secure_erase_sectors; 398 unsigned int max_write_zeroes_sectors; 399 unsigned int max_wzeroes_unmap_sectors; 400 unsigned int max_hw_wzeroes_unmap_sectors; 401 unsigned int max_user_wzeroes_unmap_sectors; 402 unsigned int max_hw_zone_append_sectors; 403 unsigned int max_zone_append_sectors; 404 unsigned int discard_granularity; 405 unsigned int discard_alignment; 406 unsigned int zone_write_granularity; 407 408 /* atomic write limits */ 409 unsigned int atomic_write_hw_max; 410 unsigned int atomic_write_max_sectors; 411 unsigned int atomic_write_hw_boundary; 412 unsigned int atomic_write_boundary_sectors; 413 unsigned int atomic_write_hw_unit_min; 414 unsigned int atomic_write_unit_min; 415 unsigned int atomic_write_hw_unit_max; 416 unsigned int atomic_write_unit_max; 417 418 unsigned short max_segments; 419 unsigned short max_integrity_segments; 420 unsigned short max_discard_segments; 421 422 unsigned short max_write_streams; 423 unsigned int write_stream_granularity; 424 425 unsigned int max_open_zones; 426 unsigned int max_active_zones; 427 428 /* 429 * Drivers that set dma_alignment to less than 511 must be prepared to 430 * handle individual bvec's that are not a multiple of a SECTOR_SIZE 431 * due to possible offsets. 432 */ 433 unsigned int dma_alignment; 434 unsigned int dma_pad_mask; 435 436 struct blk_integrity integrity; 437}; 438 439typedef int (*report_zones_cb)(struct blk_zone *zone, unsigned int idx, 440 void *data); 441 442int disk_report_zone(struct gendisk *disk, struct blk_zone *zone, 443 unsigned int idx, struct blk_report_zones_args *args); 444 445int blkdev_get_zone_info(struct block_device *bdev, sector_t sector, 446 struct blk_zone *zone); 447 448#define BLK_ALL_ZONES ((unsigned int)-1) 449int blkdev_report_zones(struct block_device *bdev, sector_t sector, 450 unsigned int nr_zones, report_zones_cb cb, void *data); 451int blkdev_report_zones_cached(struct block_device *bdev, sector_t sector, 452 unsigned int nr_zones, report_zones_cb cb, void *data); 453int blkdev_zone_mgmt(struct block_device *bdev, enum req_op op, 454 sector_t sectors, sector_t nr_sectors); 455int blk_revalidate_disk_zones(struct gendisk *disk); 456 457/* 458 * Independent access ranges: struct blk_independent_access_range describes 459 * a range of contiguous sectors that can be accessed using device command 460 * execution resources that are independent from the resources used for 461 * other access ranges. This is typically found with single-LUN multi-actuator 462 * HDDs where each access range is served by a different set of heads. 463 * The set of independent ranges supported by the device is defined using 464 * struct blk_independent_access_ranges. The independent ranges must not overlap 465 * and must include all sectors within the disk capacity (no sector holes 466 * allowed). 467 * For a device with multiple ranges, requests targeting sectors in different 468 * ranges can be executed in parallel. A request can straddle an access range 469 * boundary. 470 */ 471struct blk_independent_access_range { 472 struct kobject kobj; 473 sector_t sector; 474 sector_t nr_sectors; 475}; 476 477struct blk_independent_access_ranges { 478 struct kobject kobj; 479 bool sysfs_registered; 480 unsigned int nr_ia_ranges; 481 struct blk_independent_access_range ia_range[]; 482}; 483 484struct request_queue { 485 /* 486 * The queue owner gets to use this for whatever they like. 487 * ll_rw_blk doesn't touch it. 488 */ 489 void *queuedata; 490 491 struct elevator_queue *elevator; 492 493 const struct blk_mq_ops *mq_ops; 494 495 /* sw queues */ 496 struct blk_mq_ctx __percpu *queue_ctx; 497 498 /* 499 * various queue flags, see QUEUE_* below 500 */ 501 unsigned long queue_flags; 502 503 unsigned int __data_racy rq_timeout; 504 505 unsigned int queue_depth; 506 507 refcount_t refs; 508 509 /* hw dispatch queues */ 510 unsigned int nr_hw_queues; 511 struct blk_mq_hw_ctx * __rcu *queue_hw_ctx __counted_by_ptr(nr_hw_queues); 512 513 struct percpu_ref q_usage_counter; 514 struct lock_class_key io_lock_cls_key; 515 struct lockdep_map io_lockdep_map; 516 517 struct lock_class_key q_lock_cls_key; 518 struct lockdep_map q_lockdep_map; 519 520 struct request *last_merge; 521 522 spinlock_t queue_lock; 523 524 int quiesce_depth; 525 526 struct gendisk *disk; 527 528 /* 529 * mq queue kobject 530 */ 531 struct kobject *mq_kobj; 532 533 struct queue_limits limits; 534 535#ifdef CONFIG_PM 536 struct device *dev; 537 enum rpm_status rpm_status; 538#endif 539 540 /* 541 * Number of contexts that have called blk_set_pm_only(). If this 542 * counter is above zero then only RQF_PM requests are processed. 543 */ 544 atomic_t pm_only; 545 546 struct blk_queue_stats *stats; 547 struct rq_qos *rq_qos; 548 struct mutex rq_qos_mutex; 549 550 /* 551 * ida allocated id for this queue. Used to index queues from 552 * ioctx. 553 */ 554 int id; 555 556 /* 557 * queue settings 558 */ 559 unsigned int nr_requests; /* Max # of requests */ 560 unsigned int async_depth; /* Max # of async requests */ 561 562#ifdef CONFIG_BLK_INLINE_ENCRYPTION 563 struct blk_crypto_profile *crypto_profile; 564 struct kobject *crypto_kobject; 565#endif 566 567 struct timer_list timeout; 568 struct work_struct timeout_work; 569 570 atomic_t nr_active_requests_shared_tags; 571 572 struct blk_mq_tags *sched_shared_tags; 573 574 struct list_head icq_list; 575#ifdef CONFIG_BLK_CGROUP 576 DECLARE_BITMAP (blkcg_pols, BLKCG_MAX_POLS); 577 struct blkcg_gq *root_blkg; 578 struct list_head blkg_list; 579 struct mutex blkcg_mutex; 580#endif 581 582 int node; 583 584 spinlock_t requeue_lock; 585 struct list_head requeue_list; 586 struct delayed_work requeue_work; 587 588#ifdef CONFIG_BLK_DEV_IO_TRACE 589 struct blk_trace __rcu *blk_trace; 590#endif 591 /* 592 * for flush operations 593 */ 594 struct blk_flush_queue *fq; 595 struct list_head flush_list; 596 597 /* 598 * Protects against I/O scheduler switching, particularly when updating 599 * q->elevator. Since the elevator update code path may also modify q-> 600 * nr_requests and wbt latency, this lock also protects the sysfs attrs 601 * nr_requests and wbt_lat_usec. Additionally the nr_hw_queues update 602 * may modify hctx tags, reserved-tags and cpumask, so this lock also 603 * helps protect the hctx sysfs/debugfs attrs. To ensure proper locking 604 * order during an elevator or nr_hw_queue update, first freeze the 605 * queue, then acquire ->elevator_lock. 606 */ 607 struct mutex elevator_lock; 608 609 struct mutex sysfs_lock; 610 /* 611 * Protects queue limits and also sysfs attribute read_ahead_kb. 612 */ 613 struct mutex limits_lock; 614 615 /* 616 * for reusing dead hctx instance in case of updating 617 * nr_hw_queues 618 */ 619 struct list_head unused_hctx_list; 620 spinlock_t unused_hctx_lock; 621 622 int mq_freeze_depth; 623 624#ifdef CONFIG_BLK_DEV_THROTTLING 625 /* Throttle data */ 626 struct throtl_data *td; 627#endif 628 struct rcu_head rcu_head; 629#ifdef CONFIG_LOCKDEP 630 struct task_struct *mq_freeze_owner; 631 int mq_freeze_owner_depth; 632 /* 633 * Records disk & queue state in current context, used in unfreeze 634 * queue 635 */ 636 bool mq_freeze_disk_dead; 637 bool mq_freeze_queue_dying; 638#endif 639 wait_queue_head_t mq_freeze_wq; 640 /* 641 * Protect concurrent access to q_usage_counter by 642 * percpu_ref_kill() and percpu_ref_reinit(). 643 */ 644 struct mutex mq_freeze_lock; 645 646 struct blk_mq_tag_set *tag_set; 647 struct list_head tag_set_list; 648 649 struct dentry *debugfs_dir; 650 struct dentry *sched_debugfs_dir; 651 struct dentry *rqos_debugfs_dir; 652 /* 653 * Serializes all debugfs metadata operations using the above dentries. 654 */ 655 struct mutex debugfs_mutex; 656}; 657 658/* Keep blk_queue_flag_name[] in sync with the definitions below */ 659enum { 660 QUEUE_FLAG_DYING, /* queue being torn down */ 661 QUEUE_FLAG_NOMERGES, /* disable merge attempts */ 662 QUEUE_FLAG_SAME_COMP, /* complete on same CPU-group */ 663 QUEUE_FLAG_FAIL_IO, /* fake timeout */ 664 QUEUE_FLAG_NOXMERGES, /* No extended merges */ 665 QUEUE_FLAG_SAME_FORCE, /* force complete on same CPU */ 666 QUEUE_FLAG_INIT_DONE, /* queue is initialized */ 667 QUEUE_FLAG_STATS, /* track IO start and completion times */ 668 QUEUE_FLAG_REGISTERED, /* queue has been registered to a disk */ 669 QUEUE_FLAG_QUIESCED, /* queue has been quiesced */ 670 QUEUE_FLAG_RQ_ALLOC_TIME, /* record rq->alloc_time_ns */ 671 QUEUE_FLAG_HCTX_ACTIVE, /* at least one blk-mq hctx is active */ 672 QUEUE_FLAG_SQ_SCHED, /* single queue style io dispatch */ 673 QUEUE_FLAG_DISABLE_WBT_DEF, /* for sched to disable/enable wbt */ 674 QUEUE_FLAG_NO_ELV_SWITCH, /* can't switch elevator any more */ 675 QUEUE_FLAG_QOS_ENABLED, /* qos is enabled */ 676 QUEUE_FLAG_BIO_ISSUE_TIME, /* record bio->issue_time_ns */ 677 QUEUE_FLAG_ZONED_QD1_WRITES, /* Limit zoned devices writes to QD=1 */ 678 QUEUE_FLAG_MAX 679}; 680 681#define QUEUE_FLAG_MQ_DEFAULT (1UL << QUEUE_FLAG_SAME_COMP) 682 683void blk_queue_flag_set(unsigned int flag, struct request_queue *q); 684void blk_queue_flag_clear(unsigned int flag, struct request_queue *q); 685 686#define blk_queue_dying(q) test_bit(QUEUE_FLAG_DYING, &(q)->queue_flags) 687#define blk_queue_init_done(q) test_bit(QUEUE_FLAG_INIT_DONE, &(q)->queue_flags) 688#define blk_queue_nomerges(q) test_bit(QUEUE_FLAG_NOMERGES, &(q)->queue_flags) 689#define blk_queue_noxmerges(q) \ 690 test_bit(QUEUE_FLAG_NOXMERGES, &(q)->queue_flags) 691#define blk_queue_rot(q) ((q)->limits.features & BLK_FEAT_ROTATIONAL) 692#define blk_queue_io_stat(q) ((q)->limits.features & BLK_FEAT_IO_STAT) 693#define blk_queue_passthrough_stat(q) \ 694 ((q)->limits.flags & BLK_FLAG_IOSTATS_PASSTHROUGH) 695#define blk_queue_dax(q) ((q)->limits.features & BLK_FEAT_DAX) 696#define blk_queue_pci_p2pdma(q) ((q)->limits.features & BLK_FEAT_PCI_P2PDMA) 697#ifdef CONFIG_BLK_RQ_ALLOC_TIME 698#define blk_queue_rq_alloc_time(q) \ 699 test_bit(QUEUE_FLAG_RQ_ALLOC_TIME, &(q)->queue_flags) 700#else 701#define blk_queue_rq_alloc_time(q) false 702#endif 703 704#define blk_noretry_request(rq) \ 705 ((rq)->cmd_flags & (REQ_FAILFAST_DEV|REQ_FAILFAST_TRANSPORT| \ 706 REQ_FAILFAST_DRIVER)) 707#define blk_queue_quiesced(q) test_bit(QUEUE_FLAG_QUIESCED, &(q)->queue_flags) 708#define blk_queue_pm_only(q) atomic_read(&(q)->pm_only) 709#define blk_queue_registered(q) test_bit(QUEUE_FLAG_REGISTERED, &(q)->queue_flags) 710#define blk_queue_sq_sched(q) test_bit(QUEUE_FLAG_SQ_SCHED, &(q)->queue_flags) 711#define blk_queue_skip_tagset_quiesce(q) \ 712 ((q)->limits.features & BLK_FEAT_SKIP_TAGSET_QUIESCE) 713#define blk_queue_disable_wbt(q) \ 714 test_bit(QUEUE_FLAG_DISABLE_WBT_DEF, &(q)->queue_flags) 715#define blk_queue_no_elv_switch(q) \ 716 test_bit(QUEUE_FLAG_NO_ELV_SWITCH, &(q)->queue_flags) 717#define blk_queue_zoned_qd1_writes(q) \ 718 test_bit(QUEUE_FLAG_ZONED_QD1_WRITES, &(q)->queue_flags) 719 720extern void blk_set_pm_only(struct request_queue *q); 721extern void blk_clear_pm_only(struct request_queue *q); 722 723#define list_entry_rq(ptr) list_entry((ptr), struct request, queuelist) 724 725#define dma_map_bvec(dev, bv, dir, attrs) \ 726 dma_map_page_attrs(dev, (bv)->bv_page, (bv)->bv_offset, (bv)->bv_len, \ 727 (dir), (attrs)) 728 729static inline bool queue_is_mq(struct request_queue *q) 730{ 731 return q->mq_ops; 732} 733 734#ifdef CONFIG_PM 735static inline enum rpm_status queue_rpm_status(struct request_queue *q) 736{ 737 return q->rpm_status; 738} 739#else 740static inline enum rpm_status queue_rpm_status(struct request_queue *q) 741{ 742 return RPM_ACTIVE; 743} 744#endif 745 746static inline bool blk_queue_is_zoned(struct request_queue *q) 747{ 748 return IS_ENABLED(CONFIG_BLK_DEV_ZONED) && 749 (q->limits.features & BLK_FEAT_ZONED); 750} 751 752static inline unsigned int disk_zone_no(struct gendisk *disk, sector_t sector) 753{ 754 if (!blk_queue_is_zoned(disk->queue)) 755 return 0; 756 return sector >> ilog2(disk->queue->limits.chunk_sectors); 757} 758 759static inline unsigned int bdev_max_open_zones(struct block_device *bdev) 760{ 761 return bdev->bd_disk->queue->limits.max_open_zones; 762} 763 764static inline unsigned int bdev_max_active_zones(struct block_device *bdev) 765{ 766 return bdev->bd_disk->queue->limits.max_active_zones; 767} 768 769static inline unsigned int blk_queue_depth(struct request_queue *q) 770{ 771 if (q->queue_depth) 772 return q->queue_depth; 773 774 return q->nr_requests; 775} 776 777/* 778 * default timeout for SG_IO if none specified 779 */ 780#define BLK_DEFAULT_SG_TIMEOUT (60 * HZ) 781#define BLK_MIN_SG_TIMEOUT (7 * HZ) 782 783/* This should not be used directly - use rq_for_each_segment */ 784#define for_each_bio(_bio) \ 785 for (; _bio; _bio = _bio->bi_next) 786 787int __must_check add_disk_fwnode(struct device *parent, struct gendisk *disk, 788 const struct attribute_group **groups, 789 struct fwnode_handle *fwnode); 790int __must_check device_add_disk(struct device *parent, struct gendisk *disk, 791 const struct attribute_group **groups); 792static inline int __must_check add_disk(struct gendisk *disk) 793{ 794 return device_add_disk(NULL, disk, NULL); 795} 796void del_gendisk(struct gendisk *gp); 797void invalidate_disk(struct gendisk *disk); 798void set_disk_ro(struct gendisk *disk, bool read_only); 799void disk_uevent(struct gendisk *disk, enum kobject_action action); 800 801static inline u8 bdev_partno(const struct block_device *bdev) 802{ 803 return atomic_read(&bdev->__bd_flags) & BD_PARTNO; 804} 805 806static inline bool bdev_test_flag(const struct block_device *bdev, unsigned flag) 807{ 808 return atomic_read(&bdev->__bd_flags) & flag; 809} 810 811static inline void bdev_set_flag(struct block_device *bdev, unsigned flag) 812{ 813 atomic_or(flag, &bdev->__bd_flags); 814} 815 816static inline void bdev_clear_flag(struct block_device *bdev, unsigned flag) 817{ 818 atomic_andnot(flag, &bdev->__bd_flags); 819} 820 821static inline bool get_disk_ro(struct gendisk *disk) 822{ 823 return bdev_test_flag(disk->part0, BD_READ_ONLY) || 824 test_bit(GD_READ_ONLY, &disk->state); 825} 826 827static inline bool bdev_read_only(struct block_device *bdev) 828{ 829 return bdev_test_flag(bdev, BD_READ_ONLY) || get_disk_ro(bdev->bd_disk); 830} 831 832bool set_capacity_and_notify(struct gendisk *disk, sector_t size); 833void disk_force_media_change(struct gendisk *disk); 834void bdev_mark_dead(struct block_device *bdev, bool surprise); 835 836void add_disk_randomness(struct gendisk *disk) __latent_entropy; 837void rand_initialize_disk(struct gendisk *disk); 838 839static inline sector_t get_start_sect(struct block_device *bdev) 840{ 841 return bdev->bd_start_sect; 842} 843 844static inline sector_t bdev_nr_sectors(struct block_device *bdev) 845{ 846 return bdev->bd_nr_sectors; 847} 848 849static inline loff_t bdev_nr_bytes(struct block_device *bdev) 850{ 851 return (loff_t)bdev_nr_sectors(bdev) << SECTOR_SHIFT; 852} 853 854static inline sector_t get_capacity(struct gendisk *disk) 855{ 856 return bdev_nr_sectors(disk->part0); 857} 858 859static inline u64 sb_bdev_nr_blocks(struct super_block *sb) 860{ 861 return bdev_nr_sectors(sb->s_bdev) >> 862 (sb->s_blocksize_bits - SECTOR_SHIFT); 863} 864 865#ifdef CONFIG_BLK_DEV_ZONED 866static inline unsigned int disk_nr_zones(struct gendisk *disk) 867{ 868 return disk->nr_zones; 869} 870 871/** 872 * bio_needs_zone_write_plugging - Check if a BIO needs to be handled with zone 873 * write plugging 874 * @bio: The BIO being submitted 875 * 876 * Return true whenever @bio execution needs to be handled through zone 877 * write plugging (using blk_zone_plug_bio()). Return false otherwise. 878 */ 879static inline bool bio_needs_zone_write_plugging(struct bio *bio) 880{ 881 enum req_op op = bio_op(bio); 882 883 /* 884 * Only zoned block devices have a zone write plug hash table. But not 885 * all of them have one (e.g. DM devices may not need one). 886 */ 887 if (!bio->bi_bdev->bd_disk->zone_wplugs_hash) 888 return false; 889 890 /* Only write operations need zone write plugging. */ 891 if (!op_is_write(op)) 892 return false; 893 894 /* Ignore empty flush */ 895 if (op_is_flush(bio->bi_opf) && !bio_sectors(bio)) 896 return false; 897 898 /* Ignore BIOs that already have been handled by zone write plugging. */ 899 if (bio_flagged(bio, BIO_ZONE_WRITE_PLUGGING)) 900 return false; 901 902 /* 903 * All zone write operations must be handled through zone write plugging 904 * using blk_zone_plug_bio(). 905 */ 906 switch (op) { 907 case REQ_OP_ZONE_APPEND: 908 case REQ_OP_WRITE: 909 case REQ_OP_WRITE_ZEROES: 910 case REQ_OP_ZONE_FINISH: 911 case REQ_OP_ZONE_RESET: 912 case REQ_OP_ZONE_RESET_ALL: 913 return true; 914 default: 915 return false; 916 } 917} 918 919bool blk_zone_plug_bio(struct bio *bio, unsigned int nr_segs); 920 921/** 922 * disk_zone_capacity - returns the zone capacity of zone containing @sector 923 * @disk: disk to work with 924 * @sector: sector number within the querying zone 925 * 926 * Returns the zone capacity of a zone containing @sector. @sector can be any 927 * sector in the zone. 928 */ 929static inline unsigned int disk_zone_capacity(struct gendisk *disk, 930 sector_t sector) 931{ 932 sector_t zone_sectors = disk->queue->limits.chunk_sectors; 933 934 if (sector + zone_sectors >= get_capacity(disk)) 935 return disk->last_zone_capacity; 936 return disk->zone_capacity; 937} 938static inline unsigned int bdev_zone_capacity(struct block_device *bdev, 939 sector_t pos) 940{ 941 return disk_zone_capacity(bdev->bd_disk, pos); 942} 943 944bool bdev_zone_is_seq(struct block_device *bdev, sector_t sector); 945 946#else /* CONFIG_BLK_DEV_ZONED */ 947static inline unsigned int disk_nr_zones(struct gendisk *disk) 948{ 949 return 0; 950} 951 952static inline bool bdev_zone_is_seq(struct block_device *bdev, sector_t sector) 953{ 954 return false; 955} 956 957static inline bool bio_needs_zone_write_plugging(struct bio *bio) 958{ 959 return false; 960} 961 962static inline bool blk_zone_plug_bio(struct bio *bio, unsigned int nr_segs) 963{ 964 return false; 965} 966#endif /* CONFIG_BLK_DEV_ZONED */ 967 968static inline unsigned int bdev_nr_zones(struct block_device *bdev) 969{ 970 return disk_nr_zones(bdev->bd_disk); 971} 972 973int bdev_disk_changed(struct gendisk *disk, bool invalidate); 974 975void put_disk(struct gendisk *disk); 976struct gendisk *__blk_alloc_disk(struct queue_limits *lim, int node, 977 struct lock_class_key *lkclass); 978 979/** 980 * blk_alloc_disk - allocate a gendisk structure 981 * @lim: queue limits to be used for this disk. 982 * @node_id: numa node to allocate on 983 * 984 * Allocate and pre-initialize a gendisk structure for use with BIO based 985 * drivers. 986 * 987 * Returns an ERR_PTR on error, else the allocated disk. 988 * 989 * Context: can sleep 990 */ 991#define blk_alloc_disk(lim, node_id) \ 992({ \ 993 static struct lock_class_key __key; \ 994 \ 995 __blk_alloc_disk(lim, node_id, &__key); \ 996}) 997 998int __register_blkdev(unsigned int major, const char *name, 999 void (*probe)(dev_t devt)); 1000#define register_blkdev(major, name) \ 1001 __register_blkdev(major, name, NULL) 1002void unregister_blkdev(unsigned int major, const char *name); 1003 1004bool disk_check_media_change(struct gendisk *disk); 1005void set_capacity(struct gendisk *disk, sector_t size); 1006 1007#ifdef CONFIG_BLOCK_HOLDER_DEPRECATED 1008int bd_link_disk_holder(struct block_device *bdev, struct gendisk *disk); 1009void bd_unlink_disk_holder(struct block_device *bdev, struct gendisk *disk); 1010#else 1011static inline int bd_link_disk_holder(struct block_device *bdev, 1012 struct gendisk *disk) 1013{ 1014 return 0; 1015} 1016static inline void bd_unlink_disk_holder(struct block_device *bdev, 1017 struct gendisk *disk) 1018{ 1019} 1020#endif /* CONFIG_BLOCK_HOLDER_DEPRECATED */ 1021 1022dev_t part_devt(struct gendisk *disk, u8 partno); 1023void inc_diskseq(struct gendisk *disk); 1024void blk_request_module(dev_t devt); 1025 1026extern int blk_register_queue(struct gendisk *disk); 1027extern void blk_unregister_queue(struct gendisk *disk); 1028void submit_bio_noacct(struct bio *bio); 1029struct bio *bio_split_to_limits(struct bio *bio); 1030struct bio *bio_submit_split_bioset(struct bio *bio, unsigned int split_sectors, 1031 struct bio_set *bs); 1032 1033extern int blk_lld_busy(struct request_queue *q); 1034extern int blk_queue_enter(struct request_queue *q, blk_mq_req_flags_t flags); 1035extern void blk_queue_exit(struct request_queue *q); 1036extern void blk_sync_queue(struct request_queue *q); 1037 1038/* Convert a request operation REQ_OP_name into the string "name" */ 1039extern const char *blk_op_str(enum req_op op); 1040 1041int blk_status_to_errno(blk_status_t status); 1042blk_status_t errno_to_blk_status(int errno); 1043const char *blk_status_to_str(blk_status_t status); 1044 1045/* only poll the hardware once, don't continue until a completion was found */ 1046#define BLK_POLL_ONESHOT (1 << 0) 1047int bio_poll(struct bio *bio, struct io_comp_batch *iob, unsigned int flags); 1048int iocb_bio_iopoll(struct kiocb *kiocb, struct io_comp_batch *iob, 1049 unsigned int flags); 1050 1051static inline struct request_queue *bdev_get_queue(struct block_device *bdev) 1052{ 1053 return bdev->bd_queue; /* this is never NULL */ 1054} 1055 1056/* Convert a zone condition BLK_ZONE_COND_name into the string "name" */ 1057const char *blk_zone_cond_str(enum blk_zone_cond zone_cond); 1058 1059static inline unsigned int bio_zone_no(struct bio *bio) 1060{ 1061 return disk_zone_no(bio->bi_bdev->bd_disk, bio->bi_iter.bi_sector); 1062} 1063 1064static inline bool bio_straddles_zones(struct bio *bio) 1065{ 1066 return bio_sectors(bio) && 1067 bio_zone_no(bio) != 1068 disk_zone_no(bio->bi_bdev->bd_disk, bio_end_sector(bio) - 1); 1069} 1070 1071/* 1072 * Return how much within the boundary is left to be used for I/O at a given 1073 * offset. 1074 */ 1075static inline unsigned int blk_boundary_sectors_left(sector_t offset, 1076 unsigned int boundary_sectors) 1077{ 1078 if (unlikely(!is_power_of_2(boundary_sectors))) 1079 return boundary_sectors - sector_div(offset, boundary_sectors); 1080 return boundary_sectors - (offset & (boundary_sectors - 1)); 1081} 1082 1083/** 1084 * queue_limits_start_update - start an atomic update of queue limits 1085 * @q: queue to update 1086 * 1087 * This functions starts an atomic update of the queue limits. It takes a lock 1088 * to prevent other updates and returns a snapshot of the current limits that 1089 * the caller can modify. The caller must call queue_limits_commit_update() 1090 * to finish the update. 1091 * 1092 * Context: process context. 1093 */ 1094static inline struct queue_limits 1095queue_limits_start_update(struct request_queue *q) 1096{ 1097 mutex_lock(&q->limits_lock); 1098 return q->limits; 1099} 1100int queue_limits_commit_update_frozen(struct request_queue *q, 1101 struct queue_limits *lim); 1102int queue_limits_commit_update(struct request_queue *q, 1103 struct queue_limits *lim); 1104int queue_limits_set(struct request_queue *q, struct queue_limits *lim); 1105int blk_validate_limits(struct queue_limits *lim); 1106 1107/** 1108 * queue_limits_cancel_update - cancel an atomic update of queue limits 1109 * @q: queue to update 1110 * 1111 * This functions cancels an atomic update of the queue limits started by 1112 * queue_limits_start_update() and should be used when an error occurs after 1113 * starting update. 1114 */ 1115static inline void queue_limits_cancel_update(struct request_queue *q) 1116{ 1117 mutex_unlock(&q->limits_lock); 1118} 1119 1120/* 1121 * These helpers are for drivers that have sloppy feature negotiation and might 1122 * have to disable DISCARD, WRITE_ZEROES or SECURE_DISCARD from the I/O 1123 * completion handler when the device returned an indicator that the respective 1124 * feature is not actually supported. They are racy and the driver needs to 1125 * cope with that. Try to avoid this scheme if you can. 1126 */ 1127static inline void blk_queue_disable_discard(struct request_queue *q) 1128{ 1129 q->limits.max_discard_sectors = 0; 1130} 1131 1132static inline void blk_queue_disable_secure_erase(struct request_queue *q) 1133{ 1134 q->limits.max_secure_erase_sectors = 0; 1135} 1136 1137static inline void blk_queue_disable_write_zeroes(struct request_queue *q) 1138{ 1139 q->limits.max_write_zeroes_sectors = 0; 1140 q->limits.max_wzeroes_unmap_sectors = 0; 1141} 1142 1143/* 1144 * Access functions for manipulating queue properties 1145 */ 1146extern void blk_set_queue_depth(struct request_queue *q, unsigned int depth); 1147extern void blk_set_stacking_limits(struct queue_limits *lim); 1148extern int blk_stack_limits(struct queue_limits *t, struct queue_limits *b, 1149 sector_t offset); 1150void queue_limits_stack_bdev(struct queue_limits *t, struct block_device *bdev, 1151 sector_t offset, const char *pfx); 1152extern void blk_queue_rq_timeout(struct request_queue *, unsigned int); 1153 1154struct blk_independent_access_ranges * 1155disk_alloc_independent_access_ranges(struct gendisk *disk, int nr_ia_ranges); 1156void disk_set_independent_access_ranges(struct gendisk *disk, 1157 struct blk_independent_access_ranges *iars); 1158 1159bool __must_check blk_get_queue(struct request_queue *); 1160extern void blk_put_queue(struct request_queue *); 1161 1162void blk_mark_disk_dead(struct gendisk *disk); 1163 1164struct rq_list { 1165 struct request *head; 1166 struct request *tail; 1167}; 1168 1169#ifdef CONFIG_BLOCK 1170/* 1171 * blk_plug permits building a queue of related requests by holding the I/O 1172 * fragments for a short period. This allows merging of sequential requests 1173 * into single larger request. As the requests are moved from a per-task list to 1174 * the device's request_queue in a batch, this results in improved scalability 1175 * as the lock contention for request_queue lock is reduced. 1176 * 1177 * It is ok not to disable preemption when adding the request to the plug list 1178 * or when attempting a merge. For details, please see schedule() where 1179 * blk_flush_plug() is called. 1180 */ 1181struct blk_plug { 1182 struct rq_list mq_list; /* blk-mq requests */ 1183 1184 /* if ios_left is > 1, we can batch tag/rq allocations */ 1185 struct rq_list cached_rqs; 1186 u64 cur_ktime; 1187 unsigned short nr_ios; 1188 1189 unsigned short rq_count; 1190 1191 bool multiple_queues; 1192 bool has_elevator; 1193 1194 struct list_head cb_list; /* md requires an unplug callback */ 1195}; 1196 1197struct blk_plug_cb; 1198typedef void (*blk_plug_cb_fn)(struct blk_plug_cb *, bool); 1199struct blk_plug_cb { 1200 struct list_head list; 1201 blk_plug_cb_fn callback; 1202 void *data; 1203}; 1204extern struct blk_plug_cb *blk_check_plugged(blk_plug_cb_fn unplug, 1205 void *data, int size); 1206extern void blk_start_plug(struct blk_plug *); 1207extern void blk_start_plug_nr_ios(struct blk_plug *, unsigned short); 1208extern void blk_finish_plug(struct blk_plug *); 1209 1210void __blk_flush_plug(struct blk_plug *plug, bool from_schedule); 1211static inline void blk_flush_plug(struct blk_plug *plug, bool async) 1212{ 1213 if (plug) 1214 __blk_flush_plug(plug, async); 1215} 1216 1217/* 1218 * tsk == current here 1219 */ 1220static inline void blk_plug_invalidate_ts(struct task_struct *tsk) 1221{ 1222 struct blk_plug *plug = tsk->plug; 1223 1224 if (plug) 1225 plug->cur_ktime = 0; 1226 current->flags &= ~PF_BLOCK_TS; 1227} 1228 1229int blkdev_issue_flush(struct block_device *bdev); 1230long nr_blockdev_pages(void); 1231#else /* CONFIG_BLOCK */ 1232struct blk_plug { 1233}; 1234 1235static inline void blk_start_plug_nr_ios(struct blk_plug *plug, 1236 unsigned short nr_ios) 1237{ 1238} 1239 1240static inline void blk_start_plug(struct blk_plug *plug) 1241{ 1242} 1243 1244static inline void blk_finish_plug(struct blk_plug *plug) 1245{ 1246} 1247 1248static inline void blk_flush_plug(struct blk_plug *plug, bool async) 1249{ 1250} 1251 1252static inline void blk_plug_invalidate_ts(struct task_struct *tsk) 1253{ 1254} 1255 1256static inline int blkdev_issue_flush(struct block_device *bdev) 1257{ 1258 return 0; 1259} 1260 1261static inline long nr_blockdev_pages(void) 1262{ 1263 return 0; 1264} 1265#endif /* CONFIG_BLOCK */ 1266 1267extern void blk_io_schedule(void); 1268 1269int blkdev_issue_discard(struct block_device *bdev, sector_t sector, 1270 sector_t nr_sects, gfp_t gfp_mask); 1271void __blkdev_issue_discard(struct block_device *bdev, sector_t sector, 1272 sector_t nr_sects, gfp_t gfp_mask, struct bio **biop); 1273int blkdev_issue_secure_erase(struct block_device *bdev, sector_t sector, 1274 sector_t nr_sects, gfp_t gfp); 1275 1276#define BLKDEV_ZERO_NOUNMAP (1 << 0) /* do not free blocks */ 1277#define BLKDEV_ZERO_NOFALLBACK (1 << 1) /* don't write explicit zeroes */ 1278#define BLKDEV_ZERO_KILLABLE (1 << 2) /* interruptible by fatal signals */ 1279 1280extern int __blkdev_issue_zeroout(struct block_device *bdev, sector_t sector, 1281 sector_t nr_sects, gfp_t gfp_mask, struct bio **biop, 1282 unsigned flags); 1283extern int blkdev_issue_zeroout(struct block_device *bdev, sector_t sector, 1284 sector_t nr_sects, gfp_t gfp_mask, unsigned flags); 1285 1286static inline int sb_issue_discard(struct super_block *sb, sector_t block, 1287 sector_t nr_blocks, gfp_t gfp_mask, unsigned long flags) 1288{ 1289 return blkdev_issue_discard(sb->s_bdev, 1290 block << (sb->s_blocksize_bits - 1291 SECTOR_SHIFT), 1292 nr_blocks << (sb->s_blocksize_bits - 1293 SECTOR_SHIFT), 1294 gfp_mask); 1295} 1296static inline int sb_issue_zeroout(struct super_block *sb, sector_t block, 1297 sector_t nr_blocks, gfp_t gfp_mask) 1298{ 1299 return blkdev_issue_zeroout(sb->s_bdev, 1300 block << (sb->s_blocksize_bits - 1301 SECTOR_SHIFT), 1302 nr_blocks << (sb->s_blocksize_bits - 1303 SECTOR_SHIFT), 1304 gfp_mask, 0); 1305} 1306 1307static inline bool bdev_is_partition(struct block_device *bdev) 1308{ 1309 return bdev_partno(bdev) != 0; 1310} 1311 1312enum blk_default_limits { 1313 BLK_MAX_SEGMENTS = 128, 1314 BLK_SAFE_MAX_SECTORS = 255, 1315 BLK_MAX_SEGMENT_SIZE = 65536, 1316 BLK_SEG_BOUNDARY_MASK = 0xFFFFFFFFUL, 1317}; 1318 1319static inline struct queue_limits *bdev_limits(struct block_device *bdev) 1320{ 1321 return &bdev_get_queue(bdev)->limits; 1322} 1323 1324static inline unsigned long queue_segment_boundary(const struct request_queue *q) 1325{ 1326 return q->limits.seg_boundary_mask; 1327} 1328 1329static inline unsigned long queue_virt_boundary(const struct request_queue *q) 1330{ 1331 return q->limits.virt_boundary_mask; 1332} 1333 1334static inline unsigned int queue_max_sectors(const struct request_queue *q) 1335{ 1336 return q->limits.max_sectors; 1337} 1338 1339static inline unsigned int queue_max_bytes(struct request_queue *q) 1340{ 1341 return min_t(unsigned int, queue_max_sectors(q), INT_MAX >> 9) << 9; 1342} 1343 1344static inline unsigned int queue_max_hw_sectors(const struct request_queue *q) 1345{ 1346 return q->limits.max_hw_sectors; 1347} 1348 1349static inline unsigned short queue_max_segments(const struct request_queue *q) 1350{ 1351 return q->limits.max_segments; 1352} 1353 1354static inline unsigned short queue_max_discard_segments(const struct request_queue *q) 1355{ 1356 return q->limits.max_discard_segments; 1357} 1358 1359static inline unsigned int queue_max_segment_size(const struct request_queue *q) 1360{ 1361 return q->limits.max_segment_size; 1362} 1363 1364static inline bool queue_emulates_zone_append(struct request_queue *q) 1365{ 1366 return blk_queue_is_zoned(q) && !q->limits.max_hw_zone_append_sectors; 1367} 1368 1369static inline bool bdev_emulates_zone_append(struct block_device *bdev) 1370{ 1371 return queue_emulates_zone_append(bdev_get_queue(bdev)); 1372} 1373 1374static inline unsigned int 1375bdev_max_zone_append_sectors(struct block_device *bdev) 1376{ 1377 return bdev_limits(bdev)->max_zone_append_sectors; 1378} 1379 1380static inline unsigned int bdev_max_segments(struct block_device *bdev) 1381{ 1382 return queue_max_segments(bdev_get_queue(bdev)); 1383} 1384 1385static inline unsigned short bdev_max_write_streams(struct block_device *bdev) 1386{ 1387 if (bdev_is_partition(bdev)) 1388 return 0; 1389 return bdev_limits(bdev)->max_write_streams; 1390} 1391 1392static inline unsigned queue_logical_block_size(const struct request_queue *q) 1393{ 1394 return q->limits.logical_block_size; 1395} 1396 1397static inline unsigned int bdev_logical_block_size(struct block_device *bdev) 1398{ 1399 return queue_logical_block_size(bdev_get_queue(bdev)); 1400} 1401 1402static inline unsigned int queue_physical_block_size(const struct request_queue *q) 1403{ 1404 return q->limits.physical_block_size; 1405} 1406 1407static inline unsigned int bdev_physical_block_size(struct block_device *bdev) 1408{ 1409 return queue_physical_block_size(bdev_get_queue(bdev)); 1410} 1411 1412static inline unsigned int queue_io_min(const struct request_queue *q) 1413{ 1414 return q->limits.io_min; 1415} 1416 1417static inline unsigned int bdev_io_min(struct block_device *bdev) 1418{ 1419 return queue_io_min(bdev_get_queue(bdev)); 1420} 1421 1422static inline unsigned int queue_io_opt(const struct request_queue *q) 1423{ 1424 return q->limits.io_opt; 1425} 1426 1427static inline unsigned int bdev_io_opt(struct block_device *bdev) 1428{ 1429 return queue_io_opt(bdev_get_queue(bdev)); 1430} 1431 1432static inline unsigned int 1433queue_zone_write_granularity(const struct request_queue *q) 1434{ 1435 return q->limits.zone_write_granularity; 1436} 1437 1438static inline unsigned int 1439bdev_zone_write_granularity(struct block_device *bdev) 1440{ 1441 return queue_zone_write_granularity(bdev_get_queue(bdev)); 1442} 1443 1444int bdev_alignment_offset(struct block_device *bdev); 1445unsigned int bdev_discard_alignment(struct block_device *bdev); 1446 1447static inline unsigned int bdev_max_discard_sectors(struct block_device *bdev) 1448{ 1449 return bdev_limits(bdev)->max_discard_sectors; 1450} 1451 1452static inline unsigned int bdev_discard_granularity(struct block_device *bdev) 1453{ 1454 return bdev_limits(bdev)->discard_granularity; 1455} 1456 1457static inline unsigned int 1458bdev_max_secure_erase_sectors(struct block_device *bdev) 1459{ 1460 return bdev_limits(bdev)->max_secure_erase_sectors; 1461} 1462 1463static inline unsigned int bdev_write_zeroes_sectors(struct block_device *bdev) 1464{ 1465 return bdev_limits(bdev)->max_write_zeroes_sectors; 1466} 1467 1468static inline unsigned int 1469bdev_write_zeroes_unmap_sectors(struct block_device *bdev) 1470{ 1471 return bdev_limits(bdev)->max_wzeroes_unmap_sectors; 1472} 1473 1474static inline bool bdev_rot(struct block_device *bdev) 1475{ 1476 return blk_queue_rot(bdev_get_queue(bdev)); 1477} 1478 1479static inline bool bdev_synchronous(struct block_device *bdev) 1480{ 1481 return bdev->bd_disk->queue->limits.features & BLK_FEAT_SYNCHRONOUS; 1482} 1483 1484static inline bool bdev_has_integrity_csum(struct block_device *bdev) 1485{ 1486 struct queue_limits *lim = bdev_limits(bdev); 1487 1488 return IS_ENABLED(CONFIG_BLK_DEV_INTEGRITY) && 1489 lim->integrity.csum_type != BLK_INTEGRITY_CSUM_NONE; 1490} 1491 1492static inline bool bdev_stable_writes(struct block_device *bdev) 1493{ 1494 return bdev_has_integrity_csum(bdev) || 1495 (bdev_limits(bdev)->features & BLK_FEAT_STABLE_WRITES); 1496} 1497 1498static inline bool blk_queue_write_cache(struct request_queue *q) 1499{ 1500 return (q->limits.features & BLK_FEAT_WRITE_CACHE) && 1501 !(q->limits.flags & BLK_FLAG_WRITE_CACHE_DISABLED); 1502} 1503 1504static inline bool bdev_write_cache(struct block_device *bdev) 1505{ 1506 return blk_queue_write_cache(bdev_get_queue(bdev)); 1507} 1508 1509static inline bool bdev_fua(struct block_device *bdev) 1510{ 1511 return bdev_limits(bdev)->features & BLK_FEAT_FUA; 1512} 1513 1514static inline bool bdev_nowait(struct block_device *bdev) 1515{ 1516 return bdev->bd_disk->queue->limits.features & BLK_FEAT_NOWAIT; 1517} 1518 1519static inline bool bdev_is_zoned(struct block_device *bdev) 1520{ 1521 return blk_queue_is_zoned(bdev_get_queue(bdev)); 1522} 1523 1524static inline unsigned int bdev_zone_no(struct block_device *bdev, sector_t sec) 1525{ 1526 return disk_zone_no(bdev->bd_disk, sec); 1527} 1528 1529static inline sector_t bdev_zone_sectors(struct block_device *bdev) 1530{ 1531 struct request_queue *q = bdev_get_queue(bdev); 1532 1533 if (!blk_queue_is_zoned(q)) 1534 return 0; 1535 return q->limits.chunk_sectors; 1536} 1537 1538static inline sector_t bdev_zone_start(struct block_device *bdev, 1539 sector_t sector) 1540{ 1541 return sector & ~(bdev_zone_sectors(bdev) - 1); 1542} 1543 1544static inline sector_t bdev_offset_from_zone_start(struct block_device *bdev, 1545 sector_t sector) 1546{ 1547 return sector & (bdev_zone_sectors(bdev) - 1); 1548} 1549 1550static inline sector_t bio_offset_from_zone_start(struct bio *bio) 1551{ 1552 return bdev_offset_from_zone_start(bio->bi_bdev, 1553 bio->bi_iter.bi_sector); 1554} 1555 1556static inline bool bdev_is_zone_start(struct block_device *bdev, 1557 sector_t sector) 1558{ 1559 return bdev_offset_from_zone_start(bdev, sector) == 0; 1560} 1561 1562/* Check whether @sector is a multiple of the zone size. */ 1563static inline bool bdev_is_zone_aligned(struct block_device *bdev, 1564 sector_t sector) 1565{ 1566 return bdev_is_zone_start(bdev, sector); 1567} 1568 1569int blk_zone_issue_zeroout(struct block_device *bdev, sector_t sector, 1570 sector_t nr_sects, gfp_t gfp_mask); 1571 1572static inline unsigned int queue_dma_alignment(const struct request_queue *q) 1573{ 1574 return q->limits.dma_alignment; 1575} 1576 1577static inline unsigned int 1578queue_atomic_write_unit_max_bytes(const struct request_queue *q) 1579{ 1580 return q->limits.atomic_write_unit_max; 1581} 1582 1583static inline unsigned int 1584queue_atomic_write_unit_min_bytes(const struct request_queue *q) 1585{ 1586 return q->limits.atomic_write_unit_min; 1587} 1588 1589static inline unsigned int 1590queue_atomic_write_boundary_bytes(const struct request_queue *q) 1591{ 1592 return q->limits.atomic_write_boundary_sectors << SECTOR_SHIFT; 1593} 1594 1595static inline unsigned int 1596queue_atomic_write_max_bytes(const struct request_queue *q) 1597{ 1598 return q->limits.atomic_write_max_sectors << SECTOR_SHIFT; 1599} 1600 1601static inline unsigned int bdev_dma_alignment(struct block_device *bdev) 1602{ 1603 return queue_dma_alignment(bdev_get_queue(bdev)); 1604} 1605 1606static inline unsigned int 1607blk_lim_dma_alignment_and_pad(struct queue_limits *lim) 1608{ 1609 return lim->dma_alignment | lim->dma_pad_mask; 1610} 1611 1612static inline bool blk_rq_aligned(struct request_queue *q, unsigned long addr, 1613 unsigned int len) 1614{ 1615 unsigned int alignment = blk_lim_dma_alignment_and_pad(&q->limits); 1616 1617 return !(addr & alignment) && !(len & alignment); 1618} 1619 1620/* assumes size > 256 */ 1621static inline unsigned int blksize_bits(unsigned int size) 1622{ 1623 return order_base_2(size >> SECTOR_SHIFT) + SECTOR_SHIFT; 1624} 1625 1626int kblockd_schedule_work(struct work_struct *work); 1627int kblockd_mod_delayed_work_on(int cpu, struct delayed_work *dwork, unsigned long delay); 1628 1629#define MODULE_ALIAS_BLOCKDEV(major,minor) \ 1630 MODULE_ALIAS("block-major-" __stringify(major) "-" __stringify(minor)) 1631#define MODULE_ALIAS_BLOCKDEV_MAJOR(major) \ 1632 MODULE_ALIAS("block-major-" __stringify(major) "-*") 1633 1634#ifdef CONFIG_BLK_INLINE_ENCRYPTION 1635 1636bool blk_crypto_register(struct blk_crypto_profile *profile, 1637 struct request_queue *q); 1638 1639#else /* CONFIG_BLK_INLINE_ENCRYPTION */ 1640 1641static inline bool blk_crypto_register(struct blk_crypto_profile *profile, 1642 struct request_queue *q) 1643{ 1644 return true; 1645} 1646 1647#endif /* CONFIG_BLK_INLINE_ENCRYPTION */ 1648 1649enum blk_unique_id { 1650 /* these match the Designator Types specified in SPC */ 1651 BLK_UID_T10 = 1, 1652 BLK_UID_EUI64 = 2, 1653 BLK_UID_NAA = 3, 1654}; 1655 1656struct block_device_operations { 1657 void (*submit_bio)(struct bio *bio); 1658 int (*poll_bio)(struct bio *bio, struct io_comp_batch *iob, 1659 unsigned int flags); 1660 int (*open)(struct gendisk *disk, blk_mode_t mode); 1661 void (*release)(struct gendisk *disk); 1662 int (*ioctl)(struct block_device *bdev, blk_mode_t mode, 1663 unsigned cmd, unsigned long arg); 1664 int (*compat_ioctl)(struct block_device *bdev, blk_mode_t mode, 1665 unsigned cmd, unsigned long arg); 1666 unsigned int (*check_events) (struct gendisk *disk, 1667 unsigned int clearing); 1668 void (*unlock_native_capacity) (struct gendisk *); 1669 int (*getgeo)(struct gendisk *, struct hd_geometry *); 1670 int (*set_read_only)(struct block_device *bdev, bool ro); 1671 void (*free_disk)(struct gendisk *disk); 1672 /* this callback is with swap_lock and sometimes page table lock held */ 1673 void (*swap_slot_free_notify) (struct block_device *, unsigned long); 1674 int (*report_zones)(struct gendisk *, sector_t sector, 1675 unsigned int nr_zones, 1676 struct blk_report_zones_args *args); 1677 char *(*devnode)(struct gendisk *disk, umode_t *mode); 1678 /* returns the length of the identifier or a negative errno: */ 1679 int (*get_unique_id)(struct gendisk *disk, u8 id[16], 1680 enum blk_unique_id id_type); 1681 struct module *owner; 1682 const struct pr_ops *pr_ops; 1683 1684 /* 1685 * Special callback for probing GPT entry at a given sector. 1686 * Needed by Android devices, used by GPT scanner and MMC blk 1687 * driver. 1688 */ 1689 int (*alternative_gpt_sector)(struct gendisk *disk, sector_t *sector); 1690}; 1691 1692#ifdef CONFIG_COMPAT 1693extern int blkdev_compat_ptr_ioctl(struct block_device *, blk_mode_t, 1694 unsigned int, unsigned long); 1695#else 1696#define blkdev_compat_ptr_ioctl NULL 1697#endif 1698 1699static inline void blk_wake_io_task(struct task_struct *waiter) 1700{ 1701 /* 1702 * If we're polling, the task itself is doing the completions. For 1703 * that case, we don't need to signal a wakeup, it's enough to just 1704 * mark us as RUNNING. 1705 */ 1706 if (waiter == current) 1707 __set_current_state(TASK_RUNNING); 1708 else 1709 wake_up_process(waiter); 1710} 1711 1712unsigned long bdev_start_io_acct(struct block_device *bdev, enum req_op op, 1713 unsigned long start_time); 1714void bdev_end_io_acct(struct block_device *bdev, enum req_op op, 1715 unsigned int sectors, unsigned long start_time); 1716 1717unsigned long bio_start_io_acct(struct bio *bio); 1718void bio_end_io_acct_remapped(struct bio *bio, unsigned long start_time, 1719 struct block_device *orig_bdev); 1720 1721/** 1722 * bio_end_io_acct - end I/O accounting for bio based drivers 1723 * @bio: bio to end account for 1724 * @start_time: start time returned by bio_start_io_acct() 1725 */ 1726static inline void bio_end_io_acct(struct bio *bio, unsigned long start_time) 1727{ 1728 return bio_end_io_acct_remapped(bio, start_time, bio->bi_bdev); 1729} 1730 1731int bdev_validate_blocksize(struct block_device *bdev, int block_size); 1732int set_blocksize(struct file *file, int size); 1733 1734int lookup_bdev(const char *pathname, dev_t *dev); 1735 1736void blkdev_show(struct seq_file *seqf, off_t offset); 1737 1738#define BDEVNAME_SIZE 32 /* Largest string for a blockdev identifier */ 1739#define BDEVT_SIZE 10 /* Largest string for MAJ:MIN for blkdev */ 1740#ifdef CONFIG_BLOCK 1741#define BLKDEV_MAJOR_MAX 512 1742#else 1743#define BLKDEV_MAJOR_MAX 0 1744#endif 1745 1746struct blk_holder_ops { 1747 void (*mark_dead)(struct block_device *bdev, bool surprise); 1748 1749 /* 1750 * Sync the file system mounted on the block device. 1751 */ 1752 void (*sync)(struct block_device *bdev); 1753 1754 /* 1755 * Freeze the file system mounted on the block device. 1756 */ 1757 int (*freeze)(struct block_device *bdev); 1758 1759 /* 1760 * Thaw the file system mounted on the block device. 1761 */ 1762 int (*thaw)(struct block_device *bdev); 1763}; 1764 1765/* 1766 * For filesystems using @fs_holder_ops, the @holder argument passed to 1767 * helpers used to open and claim block devices via 1768 * bd_prepare_to_claim() must point to a superblock. 1769 */ 1770extern const struct blk_holder_ops fs_holder_ops; 1771 1772/* 1773 * Return the correct open flags for blkdev_get_by_* for super block flags 1774 * as stored in sb->s_flags. 1775 */ 1776#define sb_open_mode(flags) \ 1777 (BLK_OPEN_READ | BLK_OPEN_RESTRICT_WRITES | \ 1778 (((flags) & SB_RDONLY) ? 0 : BLK_OPEN_WRITE)) 1779 1780struct file *bdev_file_open_by_dev(dev_t dev, blk_mode_t mode, void *holder, 1781 const struct blk_holder_ops *hops); 1782struct file *bdev_file_open_by_path(const char *path, blk_mode_t mode, 1783 void *holder, const struct blk_holder_ops *hops); 1784int bd_prepare_to_claim(struct block_device *bdev, void *holder, 1785 const struct blk_holder_ops *hops); 1786void bd_abort_claiming(struct block_device *bdev, void *holder); 1787 1788struct block_device *I_BDEV(struct inode *inode); 1789struct block_device *file_bdev(struct file *bdev_file); 1790bool disk_live(struct gendisk *disk); 1791unsigned int block_size(struct block_device *bdev); 1792 1793#ifdef CONFIG_BLOCK 1794void invalidate_bdev(struct block_device *bdev); 1795int sync_blockdev(struct block_device *bdev); 1796int sync_blockdev_range(struct block_device *bdev, loff_t lstart, loff_t lend); 1797int sync_blockdev_nowait(struct block_device *bdev); 1798void sync_bdevs(bool wait); 1799void bdev_statx(const struct path *path, struct kstat *stat, u32 request_mask); 1800void printk_all_partitions(void); 1801int __init early_lookup_bdev(const char *pathname, dev_t *dev); 1802#else 1803static inline void invalidate_bdev(struct block_device *bdev) 1804{ 1805} 1806static inline int sync_blockdev(struct block_device *bdev) 1807{ 1808 return 0; 1809} 1810static inline int sync_blockdev_nowait(struct block_device *bdev) 1811{ 1812 return 0; 1813} 1814static inline void sync_bdevs(bool wait) 1815{ 1816} 1817static inline void bdev_statx(const struct path *path, struct kstat *stat, 1818 u32 request_mask) 1819{ 1820} 1821static inline void printk_all_partitions(void) 1822{ 1823} 1824static inline int early_lookup_bdev(const char *pathname, dev_t *dev) 1825{ 1826 return -EINVAL; 1827} 1828#endif /* CONFIG_BLOCK */ 1829 1830int bdev_freeze(struct block_device *bdev); 1831int bdev_thaw(struct block_device *bdev); 1832void bdev_fput(struct file *bdev_file); 1833 1834struct io_comp_batch { 1835 struct rq_list req_list; 1836 bool need_ts; 1837 void (*complete)(struct io_comp_batch *); 1838 void *poll_ctx; 1839}; 1840 1841static inline bool blk_atomic_write_start_sect_aligned(sector_t sector, 1842 struct queue_limits *limits) 1843{ 1844 unsigned int alignment = max(limits->atomic_write_hw_unit_min, 1845 limits->atomic_write_hw_boundary); 1846 1847 return IS_ALIGNED(sector, alignment >> SECTOR_SHIFT); 1848} 1849 1850static inline bool bdev_can_atomic_write(struct block_device *bdev) 1851{ 1852 struct request_queue *bd_queue = bdev->bd_queue; 1853 struct queue_limits *limits = &bd_queue->limits; 1854 1855 if (!limits->atomic_write_unit_min) 1856 return false; 1857 1858 if (bdev_is_partition(bdev)) 1859 return blk_atomic_write_start_sect_aligned(bdev->bd_start_sect, 1860 limits); 1861 1862 return true; 1863} 1864 1865static inline unsigned int 1866bdev_atomic_write_unit_min_bytes(struct block_device *bdev) 1867{ 1868 if (!bdev_can_atomic_write(bdev)) 1869 return 0; 1870 return queue_atomic_write_unit_min_bytes(bdev_get_queue(bdev)); 1871} 1872 1873static inline unsigned int 1874bdev_atomic_write_unit_max_bytes(struct block_device *bdev) 1875{ 1876 if (!bdev_can_atomic_write(bdev)) 1877 return 0; 1878 return queue_atomic_write_unit_max_bytes(bdev_get_queue(bdev)); 1879} 1880 1881static inline int bio_split_rw_at(struct bio *bio, 1882 const struct queue_limits *lim, 1883 unsigned *segs, unsigned max_bytes) 1884{ 1885 return bio_split_io_at(bio, lim, segs, max_bytes, lim->dma_alignment); 1886} 1887 1888/* 1889 * Maximum contiguous integrity buffer allocation. 1890 */ 1891#define BLK_INTEGRITY_MAX_SIZE SZ_2M 1892 1893/* 1894 * Maximum size of I/O that needs a block layer integrity buffer. Limited 1895 * by the number of intervals for which we can fit the integrity buffer into 1896 * the buffer size. Because the buffer is a single segment it is also limited 1897 * by the maximum segment size. 1898 */ 1899static inline unsigned int max_integrity_io_size(struct queue_limits *lim) 1900{ 1901 return min_t(unsigned int, lim->max_segment_size, 1902 (BLK_INTEGRITY_MAX_SIZE / lim->integrity.metadata_size) << 1903 lim->integrity.interval_exp); 1904} 1905 1906#define DEFINE_IO_COMP_BATCH(name) struct io_comp_batch name = { } 1907 1908#endif /* _LINUX_BLKDEV_H */