Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

blk-integrity: support arbitrary buffer alignment

A bio segment may have partial interval block data with the rest
continuing into the next segments because direct-io data payloads only
need to align in memory to the device's DMA limits.

At the same time, the protection information may also be split in
multiple segments. The most likely way that may happen is if two
requests merge, or if we're directly using the io_uring user metadata.
The generate/verify, however, only ever accessed the first bip_vec.

Further, it may be possible to unalign the protection fields from the
user space buffer, or if there are odd additional opaque bytes in front
or in back of the protection information metadata region.

Change up the iteration to allow spanning multiple segments. This patch
is mostly a re-write of the protection information handling to allow any
arbitrary alignments, so it's probably easier to review the end result
rather than the diff.

Many controllers are not able to handle interval data composed of
multiple segments when PI is used, so this patch introduces a new
integrity limit that a low level driver can set to notify that it is
capable, default to false. The nvme driver is the first one to enable it
in this patch. Everyone else will force DMA alignment to the logical
block size as before to ensure interval data is always aligned within a
single segment.

Reviewed-by: Martin K. Petersen <martin.petersen@oracle.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Keith Busch <kbusch@kernel.org>
Link: https://patch.msgid.link/20260313144701.1221652-2-kbusch@meta.com
Signed-off-by: Jens Axboe <axboe@kernel.dk>

authored by

Keith Busch and committed by
Jens Axboe
203247c5 3dbaacf6

+484 -384
+8 -4
block/blk-settings.c
··· 189 189 } 190 190 191 191 /* 192 - * The PI generation / validation helpers do not expect intervals to 193 - * straddle multiple bio_vecs. Enforce alignment so that those are 192 + * Some IO controllers can not handle data intervals straddling 193 + * multiple bio_vecs. For those, enforce alignment so that those are 194 194 * never generated, and that each buffer is aligned as expected. 195 195 */ 196 - if (bi->csum_type) { 196 + if (!(bi->flags & BLK_SPLIT_INTERVAL_CAPABLE) && bi->csum_type) { 197 197 lim->dma_alignment = max(lim->dma_alignment, 198 198 (1U << bi->interval_exp) - 1); 199 199 } ··· 992 992 if ((ti->flags & BLK_INTEGRITY_REF_TAG) != 993 993 (bi->flags & BLK_INTEGRITY_REF_TAG)) 994 994 goto incompatible; 995 + if ((ti->flags & BLK_SPLIT_INTERVAL_CAPABLE) && 996 + !(bi->flags & BLK_SPLIT_INTERVAL_CAPABLE)) 997 + ti->flags &= ~BLK_SPLIT_INTERVAL_CAPABLE; 995 998 } else { 996 999 ti->flags = BLK_INTEGRITY_STACKED; 997 1000 ti->flags |= (bi->flags & BLK_INTEGRITY_DEVICE_CAPABLE) | 998 - (bi->flags & BLK_INTEGRITY_REF_TAG); 1001 + (bi->flags & BLK_INTEGRITY_REF_TAG) | 1002 + (bi->flags & BLK_SPLIT_INTERVAL_CAPABLE); 999 1003 ti->csum_type = bi->csum_type; 1000 1004 ti->pi_tuple_size = bi->pi_tuple_size; 1001 1005 ti->metadata_size = bi->metadata_size;
+474 -380
block/t10-pi.c
··· 12 12 #include <linux/unaligned.h> 13 13 #include "blk.h" 14 14 15 - struct blk_integrity_iter { 16 - void *prot_buf; 17 - void *data_buf; 18 - sector_t seed; 19 - unsigned int data_size; 20 - unsigned short interval; 21 - const char *disk_name; 15 + #define APP_TAG_ESCAPE 0xffff 16 + #define REF_TAG_ESCAPE 0xffffffff 17 + 18 + /* 19 + * This union is used for onstack allocations when the pi field is split across 20 + * segments. blk_validate_integrity_limits() guarantees pi_tuple_size matches 21 + * the sizeof one of these two types. 22 + */ 23 + union pi_tuple { 24 + struct crc64_pi_tuple crc64_pi; 25 + struct t10_pi_tuple t10_pi; 22 26 }; 23 27 24 - static __be16 t10_pi_csum(__be16 csum, void *data, unsigned int len, 25 - unsigned char csum_type) 28 + struct blk_integrity_iter { 29 + struct bio *bio; 30 + struct bio_integrity_payload *bip; 31 + struct blk_integrity *bi; 32 + struct bvec_iter data_iter; 33 + struct bvec_iter prot_iter; 34 + unsigned int interval_remaining; 35 + u64 seed; 36 + u64 csum; 37 + }; 38 + 39 + static void blk_calculate_guard(struct blk_integrity_iter *iter, void *data, 40 + unsigned int len) 26 41 { 27 - if (csum_type == BLK_INTEGRITY_CSUM_IP) 28 - return (__force __be16)ip_compute_csum(data, len); 29 - return cpu_to_be16(crc_t10dif_update(be16_to_cpu(csum), data, len)); 42 + switch (iter->bi->csum_type) { 43 + case BLK_INTEGRITY_CSUM_CRC64: 44 + iter->csum = crc64_nvme(iter->csum, data, len); 45 + break; 46 + case BLK_INTEGRITY_CSUM_CRC: 47 + iter->csum = crc_t10dif_update(iter->csum, data, len); 48 + break; 49 + case BLK_INTEGRITY_CSUM_IP: 50 + iter->csum = (__force u32)csum_partial(data, len, 51 + (__force __wsum)iter->csum); 52 + break; 53 + default: 54 + WARN_ON_ONCE(1); 55 + iter->csum = U64_MAX; 56 + break; 57 + } 58 + } 59 + 60 + static void blk_integrity_csum_finish(struct blk_integrity_iter *iter) 61 + { 62 + switch (iter->bi->csum_type) { 63 + case BLK_INTEGRITY_CSUM_IP: 64 + iter->csum = (__force u16)csum_fold((__force __wsum)iter->csum); 65 + break; 66 + default: 67 + break; 68 + } 30 69 } 31 70 32 71 /* 33 - * Type 1 and Type 2 protection use the same format: 16 bit guard tag, 34 - * 16 bit app tag, 32 bit reference tag. Type 3 does not define the ref 35 - * tag. 72 + * Update the csum for formats that have metadata padding in front of the data 73 + * integrity field 36 74 */ 37 - static void t10_pi_generate(struct blk_integrity_iter *iter, 38 - struct blk_integrity *bi) 75 + static void blk_integrity_csum_offset(struct blk_integrity_iter *iter) 39 76 { 40 - u8 offset = bi->pi_offset; 41 - unsigned int i; 77 + unsigned int offset = iter->bi->pi_offset; 78 + struct bio_vec *bvec = iter->bip->bip_vec; 42 79 43 - for (i = 0 ; i < iter->data_size ; i += iter->interval) { 44 - struct t10_pi_tuple *pi = iter->prot_buf + offset; 80 + while (offset > 0) { 81 + struct bio_vec pbv = bvec_iter_bvec(bvec, iter->prot_iter); 82 + unsigned int len = min(pbv.bv_len, offset); 83 + void *prot_buf = bvec_kmap_local(&pbv); 45 84 46 - pi->guard_tag = t10_pi_csum(0, iter->data_buf, iter->interval, 47 - bi->csum_type); 48 - if (offset) 49 - pi->guard_tag = t10_pi_csum(pi->guard_tag, 50 - iter->prot_buf, offset, bi->csum_type); 51 - pi->app_tag = 0; 85 + blk_calculate_guard(iter, prot_buf, len); 86 + kunmap_local(prot_buf); 87 + offset -= len; 88 + bvec_iter_advance_single(bvec, &iter->prot_iter, len); 89 + } 90 + blk_integrity_csum_finish(iter); 91 + } 52 92 53 - if (bi->flags & BLK_INTEGRITY_REF_TAG) 54 - pi->ref_tag = cpu_to_be32(lower_32_bits(iter->seed)); 55 - else 56 - pi->ref_tag = 0; 93 + static void blk_integrity_copy_from_tuple(struct bio_integrity_payload *bip, 94 + struct bvec_iter *iter, void *tuple, 95 + unsigned int tuple_size) 96 + { 97 + while (tuple_size) { 98 + struct bio_vec pbv = bvec_iter_bvec(bip->bip_vec, *iter); 99 + unsigned int len = min(tuple_size, pbv.bv_len); 100 + void *prot_buf = bvec_kmap_local(&pbv); 57 101 58 - iter->data_buf += iter->interval; 59 - iter->prot_buf += bi->metadata_size; 60 - iter->seed++; 102 + memcpy(prot_buf, tuple, len); 103 + kunmap_local(prot_buf); 104 + bvec_iter_advance_single(bip->bip_vec, iter, len); 105 + tuple_size -= len; 106 + tuple += len; 61 107 } 62 108 } 63 109 64 - static blk_status_t t10_pi_verify(struct blk_integrity_iter *iter, 65 - struct blk_integrity *bi) 110 + static void blk_integrity_copy_to_tuple(struct bio_integrity_payload *bip, 111 + struct bvec_iter *iter, void *tuple, 112 + unsigned int tuple_size) 66 113 { 67 - u8 offset = bi->pi_offset; 68 - unsigned int i; 114 + while (tuple_size) { 115 + struct bio_vec pbv = bvec_iter_bvec(bip->bip_vec, *iter); 116 + unsigned int len = min(tuple_size, pbv.bv_len); 117 + void *prot_buf = bvec_kmap_local(&pbv); 69 118 70 - for (i = 0 ; i < iter->data_size ; i += iter->interval) { 71 - struct t10_pi_tuple *pi = iter->prot_buf + offset; 72 - __be16 csum; 73 - 74 - if (bi->flags & BLK_INTEGRITY_REF_TAG) { 75 - if (pi->app_tag == T10_PI_APP_ESCAPE) 76 - goto next; 77 - 78 - if (be32_to_cpu(pi->ref_tag) != 79 - lower_32_bits(iter->seed)) { 80 - pr_err("%s: ref tag error at location %llu " \ 81 - "(rcvd %u)\n", iter->disk_name, 82 - (unsigned long long) 83 - iter->seed, be32_to_cpu(pi->ref_tag)); 84 - return BLK_STS_PROTECTION; 85 - } 86 - } else { 87 - if (pi->app_tag == T10_PI_APP_ESCAPE && 88 - pi->ref_tag == T10_PI_REF_ESCAPE) 89 - goto next; 90 - } 91 - 92 - csum = t10_pi_csum(0, iter->data_buf, iter->interval, 93 - bi->csum_type); 94 - if (offset) 95 - csum = t10_pi_csum(csum, iter->prot_buf, offset, 96 - bi->csum_type); 97 - 98 - if (pi->guard_tag != csum) { 99 - pr_err("%s: guard tag error at sector %llu " \ 100 - "(rcvd %04x, want %04x)\n", iter->disk_name, 101 - (unsigned long long)iter->seed, 102 - be16_to_cpu(pi->guard_tag), be16_to_cpu(csum)); 103 - return BLK_STS_PROTECTION; 104 - } 105 - 106 - next: 107 - iter->data_buf += iter->interval; 108 - iter->prot_buf += bi->metadata_size; 109 - iter->seed++; 110 - } 111 - 112 - return BLK_STS_OK; 113 - } 114 - 115 - /** 116 - * t10_pi_type1_prepare - prepare PI prior submitting request to device 117 - * @rq: request with PI that should be prepared 118 - * 119 - * For Type 1/Type 2, the virtual start sector is the one that was 120 - * originally submitted by the block layer for the ref_tag usage. Due to 121 - * partitioning, MD/DM cloning, etc. the actual physical start sector is 122 - * likely to be different. Remap protection information to match the 123 - * physical LBA. 124 - */ 125 - static void t10_pi_type1_prepare(struct request *rq) 126 - { 127 - struct blk_integrity *bi = &rq->q->limits.integrity; 128 - const int tuple_sz = bi->metadata_size; 129 - u32 ref_tag = t10_pi_ref_tag(rq); 130 - u8 offset = bi->pi_offset; 131 - struct bio *bio; 132 - 133 - __rq_for_each_bio(bio, rq) { 134 - struct bio_integrity_payload *bip = bio_integrity(bio); 135 - u32 virt = bip_get_seed(bip) & 0xffffffff; 136 - struct bio_vec iv; 137 - struct bvec_iter iter; 138 - 139 - /* Already remapped? */ 140 - if (bip->bip_flags & BIP_MAPPED_INTEGRITY) 141 - break; 142 - 143 - bip_for_each_vec(iv, bip, iter) { 144 - unsigned int j; 145 - void *p; 146 - 147 - p = bvec_kmap_local(&iv); 148 - for (j = 0; j < iv.bv_len; j += tuple_sz) { 149 - struct t10_pi_tuple *pi = p + offset; 150 - 151 - if (be32_to_cpu(pi->ref_tag) == virt) 152 - pi->ref_tag = cpu_to_be32(ref_tag); 153 - virt++; 154 - ref_tag++; 155 - p += tuple_sz; 156 - } 157 - kunmap_local(p); 158 - } 159 - 160 - bip->bip_flags |= BIP_MAPPED_INTEGRITY; 161 - } 162 - } 163 - 164 - /** 165 - * t10_pi_type1_complete - prepare PI prior returning request to the blk layer 166 - * @rq: request with PI that should be prepared 167 - * @nr_bytes: total bytes to prepare 168 - * 169 - * For Type 1/Type 2, the virtual start sector is the one that was 170 - * originally submitted by the block layer for the ref_tag usage. Due to 171 - * partitioning, MD/DM cloning, etc. the actual physical start sector is 172 - * likely to be different. Since the physical start sector was submitted 173 - * to the device, we should remap it back to virtual values expected by the 174 - * block layer. 175 - */ 176 - static void t10_pi_type1_complete(struct request *rq, unsigned int nr_bytes) 177 - { 178 - struct blk_integrity *bi = &rq->q->limits.integrity; 179 - unsigned intervals = nr_bytes >> bi->interval_exp; 180 - const int tuple_sz = bi->metadata_size; 181 - u32 ref_tag = t10_pi_ref_tag(rq); 182 - u8 offset = bi->pi_offset; 183 - struct bio *bio; 184 - 185 - __rq_for_each_bio(bio, rq) { 186 - struct bio_integrity_payload *bip = bio_integrity(bio); 187 - u32 virt = bip_get_seed(bip) & 0xffffffff; 188 - struct bio_vec iv; 189 - struct bvec_iter iter; 190 - 191 - bip_for_each_vec(iv, bip, iter) { 192 - unsigned int j; 193 - void *p; 194 - 195 - p = bvec_kmap_local(&iv); 196 - for (j = 0; j < iv.bv_len && intervals; j += tuple_sz) { 197 - struct t10_pi_tuple *pi = p + offset; 198 - 199 - if (be32_to_cpu(pi->ref_tag) == ref_tag) 200 - pi->ref_tag = cpu_to_be32(virt); 201 - virt++; 202 - ref_tag++; 203 - intervals--; 204 - p += tuple_sz; 205 - } 206 - kunmap_local(p); 207 - } 208 - } 209 - } 210 - 211 - static __be64 ext_pi_crc64(u64 crc, void *data, unsigned int len) 212 - { 213 - return cpu_to_be64(crc64_nvme(crc, data, len)); 214 - } 215 - 216 - static void ext_pi_crc64_generate(struct blk_integrity_iter *iter, 217 - struct blk_integrity *bi) 218 - { 219 - u8 offset = bi->pi_offset; 220 - unsigned int i; 221 - 222 - for (i = 0 ; i < iter->data_size ; i += iter->interval) { 223 - struct crc64_pi_tuple *pi = iter->prot_buf + offset; 224 - 225 - pi->guard_tag = ext_pi_crc64(0, iter->data_buf, iter->interval); 226 - if (offset) 227 - pi->guard_tag = ext_pi_crc64(be64_to_cpu(pi->guard_tag), 228 - iter->prot_buf, offset); 229 - pi->app_tag = 0; 230 - 231 - if (bi->flags & BLK_INTEGRITY_REF_TAG) 232 - put_unaligned_be48(iter->seed, pi->ref_tag); 233 - else 234 - put_unaligned_be48(0ULL, pi->ref_tag); 235 - 236 - iter->data_buf += iter->interval; 237 - iter->prot_buf += bi->metadata_size; 238 - iter->seed++; 119 + memcpy(tuple, prot_buf, len); 120 + kunmap_local(prot_buf); 121 + bvec_iter_advance_single(bip->bip_vec, iter, len); 122 + tuple_size -= len; 123 + tuple += len; 239 124 } 240 125 } 241 126 ··· 131 246 return memcmp(ref_tag, ref_escape, sizeof(ref_escape)) == 0; 132 247 } 133 248 134 - static blk_status_t ext_pi_crc64_verify(struct blk_integrity_iter *iter, 135 - struct blk_integrity *bi) 249 + static blk_status_t blk_verify_ext_pi(struct blk_integrity_iter *iter, 250 + struct crc64_pi_tuple *pi) 136 251 { 137 - u8 offset = bi->pi_offset; 138 - unsigned int i; 252 + u64 seed = lower_48_bits(iter->seed); 253 + u64 guard = get_unaligned_be64(&pi->guard_tag); 254 + u64 ref = get_unaligned_be48(pi->ref_tag); 255 + u16 app = get_unaligned_be16(&pi->app_tag); 139 256 140 - for (i = 0; i < iter->data_size; i += iter->interval) { 141 - struct crc64_pi_tuple *pi = iter->prot_buf + offset; 142 - u64 ref, seed; 143 - __be64 csum; 144 - 145 - if (bi->flags & BLK_INTEGRITY_REF_TAG) { 146 - if (pi->app_tag == T10_PI_APP_ESCAPE) 147 - goto next; 148 - 149 - ref = get_unaligned_be48(pi->ref_tag); 150 - seed = lower_48_bits(iter->seed); 151 - if (ref != seed) { 152 - pr_err("%s: ref tag error at location %llu (rcvd %llu)\n", 153 - iter->disk_name, seed, ref); 154 - return BLK_STS_PROTECTION; 155 - } 156 - } else { 157 - if (pi->app_tag == T10_PI_APP_ESCAPE && 158 - ext_pi_ref_escape(pi->ref_tag)) 159 - goto next; 160 - } 161 - 162 - csum = ext_pi_crc64(0, iter->data_buf, iter->interval); 163 - if (offset) 164 - csum = ext_pi_crc64(be64_to_cpu(csum), iter->prot_buf, 165 - offset); 166 - 167 - if (pi->guard_tag != csum) { 168 - pr_err("%s: guard tag error at sector %llu " \ 169 - "(rcvd %016llx, want %016llx)\n", 170 - iter->disk_name, (unsigned long long)iter->seed, 171 - be64_to_cpu(pi->guard_tag), be64_to_cpu(csum)); 257 + if (iter->bi->flags & BLK_INTEGRITY_REF_TAG) { 258 + if (app == APP_TAG_ESCAPE) 259 + return BLK_STS_OK; 260 + if (ref != seed) { 261 + pr_err("%s: ref tag error at location %llu (rcvd %llu)\n", 262 + iter->bio->bi_bdev->bd_disk->disk_name, seed, 263 + ref); 172 264 return BLK_STS_PROTECTION; 173 265 } 266 + } else if (app == APP_TAG_ESCAPE && ext_pi_ref_escape(pi->ref_tag)) { 267 + return BLK_STS_OK; 268 + } 174 269 175 - next: 176 - iter->data_buf += iter->interval; 177 - iter->prot_buf += bi->metadata_size; 178 - iter->seed++; 270 + if (guard != iter->csum) { 271 + pr_err("%s: guard tag error at sector %llu (rcvd %016llx, want %016llx)\n", 272 + iter->bio->bi_bdev->bd_disk->disk_name, iter->seed, 273 + guard, iter->csum); 274 + return BLK_STS_PROTECTION; 179 275 } 180 276 181 277 return BLK_STS_OK; 182 278 } 183 279 184 - static void ext_pi_type1_prepare(struct request *rq) 280 + static blk_status_t blk_verify_pi(struct blk_integrity_iter *iter, 281 + struct t10_pi_tuple *pi, u16 guard) 185 282 { 186 - struct blk_integrity *bi = &rq->q->limits.integrity; 187 - const int tuple_sz = bi->metadata_size; 188 - u64 ref_tag = ext_pi_ref_tag(rq); 189 - u8 offset = bi->pi_offset; 190 - struct bio *bio; 283 + u32 seed = lower_32_bits(iter->seed); 284 + u32 ref = get_unaligned_be32(&pi->ref_tag); 285 + u16 app = get_unaligned_be16(&pi->app_tag); 191 286 192 - __rq_for_each_bio(bio, rq) { 193 - struct bio_integrity_payload *bip = bio_integrity(bio); 194 - u64 virt = lower_48_bits(bip_get_seed(bip)); 195 - struct bio_vec iv; 196 - struct bvec_iter iter; 197 - 198 - /* Already remapped? */ 199 - if (bip->bip_flags & BIP_MAPPED_INTEGRITY) 200 - break; 201 - 202 - bip_for_each_vec(iv, bip, iter) { 203 - unsigned int j; 204 - void *p; 205 - 206 - p = bvec_kmap_local(&iv); 207 - for (j = 0; j < iv.bv_len; j += tuple_sz) { 208 - struct crc64_pi_tuple *pi = p + offset; 209 - u64 ref = get_unaligned_be48(pi->ref_tag); 210 - 211 - if (ref == virt) 212 - put_unaligned_be48(ref_tag, pi->ref_tag); 213 - virt++; 214 - ref_tag++; 215 - p += tuple_sz; 216 - } 217 - kunmap_local(p); 287 + if (iter->bi->flags & BLK_INTEGRITY_REF_TAG) { 288 + if (app == APP_TAG_ESCAPE) 289 + return BLK_STS_OK; 290 + if (ref != seed) { 291 + pr_err("%s: ref tag error at location %u (rcvd %u)\n", 292 + iter->bio->bi_bdev->bd_disk->disk_name, seed, 293 + ref); 294 + return BLK_STS_PROTECTION; 218 295 } 296 + } else if (app == APP_TAG_ESCAPE && ref == REF_TAG_ESCAPE) { 297 + return BLK_STS_OK; 298 + } 219 299 220 - bip->bip_flags |= BIP_MAPPED_INTEGRITY; 300 + if (guard != (u16)iter->csum) { 301 + pr_err("%s: guard tag error at sector %llu (rcvd %04x, want %04x)\n", 302 + iter->bio->bi_bdev->bd_disk->disk_name, iter->seed, 303 + guard, (u16)iter->csum); 304 + return BLK_STS_PROTECTION; 305 + } 306 + 307 + return BLK_STS_OK; 308 + } 309 + 310 + static blk_status_t blk_verify_t10_pi(struct blk_integrity_iter *iter, 311 + struct t10_pi_tuple *pi) 312 + { 313 + u16 guard = get_unaligned_be16(&pi->guard_tag); 314 + 315 + return blk_verify_pi(iter, pi, guard); 316 + } 317 + 318 + static blk_status_t blk_verify_ip_pi(struct blk_integrity_iter *iter, 319 + struct t10_pi_tuple *pi) 320 + { 321 + u16 guard = get_unaligned((u16 *)&pi->guard_tag); 322 + 323 + return blk_verify_pi(iter, pi, guard); 324 + } 325 + 326 + static blk_status_t blk_integrity_verify(struct blk_integrity_iter *iter, 327 + union pi_tuple *tuple) 328 + { 329 + switch (iter->bi->csum_type) { 330 + case BLK_INTEGRITY_CSUM_CRC64: 331 + return blk_verify_ext_pi(iter, &tuple->crc64_pi); 332 + case BLK_INTEGRITY_CSUM_CRC: 333 + return blk_verify_t10_pi(iter, &tuple->t10_pi); 334 + case BLK_INTEGRITY_CSUM_IP: 335 + return blk_verify_ip_pi(iter, &tuple->t10_pi); 336 + default: 337 + return BLK_STS_OK; 221 338 } 222 339 } 223 340 224 - static void ext_pi_type1_complete(struct request *rq, unsigned int nr_bytes) 341 + static void blk_set_ext_pi(struct blk_integrity_iter *iter, 342 + struct crc64_pi_tuple *pi) 225 343 { 226 - struct blk_integrity *bi = &rq->q->limits.integrity; 227 - unsigned intervals = nr_bytes >> bi->interval_exp; 228 - const int tuple_sz = bi->metadata_size; 229 - u64 ref_tag = ext_pi_ref_tag(rq); 230 - u8 offset = bi->pi_offset; 231 - struct bio *bio; 344 + put_unaligned_be64(iter->csum, &pi->guard_tag); 345 + put_unaligned_be16(0, &pi->app_tag); 346 + put_unaligned_be48(iter->seed, &pi->ref_tag); 347 + } 232 348 233 - __rq_for_each_bio(bio, rq) { 234 - struct bio_integrity_payload *bip = bio_integrity(bio); 235 - u64 virt = lower_48_bits(bip_get_seed(bip)); 236 - struct bio_vec iv; 237 - struct bvec_iter iter; 349 + static void blk_set_pi(struct blk_integrity_iter *iter, 350 + struct t10_pi_tuple *pi, __be16 csum) 351 + { 352 + put_unaligned(csum, &pi->guard_tag); 353 + put_unaligned_be16(0, &pi->app_tag); 354 + put_unaligned_be32(iter->seed, &pi->ref_tag); 355 + } 238 356 239 - bip_for_each_vec(iv, bip, iter) { 240 - unsigned int j; 241 - void *p; 357 + static void blk_set_t10_pi(struct blk_integrity_iter *iter, 358 + struct t10_pi_tuple *pi) 359 + { 360 + blk_set_pi(iter, pi, cpu_to_be16((u16)iter->csum)); 361 + } 242 362 243 - p = bvec_kmap_local(&iv); 244 - for (j = 0; j < iv.bv_len && intervals; j += tuple_sz) { 245 - struct crc64_pi_tuple *pi = p + offset; 246 - u64 ref = get_unaligned_be48(pi->ref_tag); 363 + static void blk_set_ip_pi(struct blk_integrity_iter *iter, 364 + struct t10_pi_tuple *pi) 365 + { 366 + blk_set_pi(iter, pi, (__force __be16)(u16)iter->csum); 367 + } 247 368 248 - if (ref == ref_tag) 249 - put_unaligned_be48(virt, pi->ref_tag); 250 - virt++; 251 - ref_tag++; 252 - intervals--; 253 - p += tuple_sz; 254 - } 255 - kunmap_local(p); 256 - } 369 + static void blk_integrity_set(struct blk_integrity_iter *iter, 370 + union pi_tuple *tuple) 371 + { 372 + switch (iter->bi->csum_type) { 373 + case BLK_INTEGRITY_CSUM_CRC64: 374 + return blk_set_ext_pi(iter, &tuple->crc64_pi); 375 + case BLK_INTEGRITY_CSUM_CRC: 376 + return blk_set_t10_pi(iter, &tuple->t10_pi); 377 + case BLK_INTEGRITY_CSUM_IP: 378 + return blk_set_ip_pi(iter, &tuple->t10_pi); 379 + default: 380 + WARN_ON_ONCE(1); 381 + return; 257 382 } 383 + } 384 + 385 + static blk_status_t blk_integrity_interval(struct blk_integrity_iter *iter, 386 + bool verify) 387 + { 388 + blk_status_t ret = BLK_STS_OK; 389 + union pi_tuple tuple; 390 + void *ptuple = &tuple; 391 + struct bio_vec pbv; 392 + 393 + blk_integrity_csum_offset(iter); 394 + pbv = bvec_iter_bvec(iter->bip->bip_vec, iter->prot_iter); 395 + if (pbv.bv_len >= iter->bi->pi_tuple_size) { 396 + ptuple = bvec_kmap_local(&pbv); 397 + bvec_iter_advance_single(iter->bip->bip_vec, &iter->prot_iter, 398 + iter->bi->metadata_size - iter->bi->pi_offset); 399 + } else if (verify) { 400 + blk_integrity_copy_to_tuple(iter->bip, &iter->prot_iter, 401 + ptuple, iter->bi->pi_tuple_size); 402 + } 403 + 404 + if (verify) 405 + ret = blk_integrity_verify(iter, ptuple); 406 + else 407 + blk_integrity_set(iter, ptuple); 408 + 409 + if (likely(ptuple != &tuple)) { 410 + kunmap_local(ptuple); 411 + } else if (!verify) { 412 + blk_integrity_copy_from_tuple(iter->bip, &iter->prot_iter, 413 + ptuple, iter->bi->pi_tuple_size); 414 + } 415 + 416 + iter->interval_remaining = 1 << iter->bi->interval_exp; 417 + iter->csum = 0; 418 + iter->seed++; 419 + return ret; 420 + } 421 + 422 + static blk_status_t blk_integrity_iterate(struct bio *bio, 423 + struct bvec_iter *data_iter, 424 + bool verify) 425 + { 426 + struct blk_integrity *bi = blk_get_integrity(bio->bi_bdev->bd_disk); 427 + struct bio_integrity_payload *bip = bio_integrity(bio); 428 + struct blk_integrity_iter iter = { 429 + .bio = bio, 430 + .bip = bip, 431 + .bi = bi, 432 + .data_iter = *data_iter, 433 + .prot_iter = bip->bip_iter, 434 + .interval_remaining = 1 << bi->interval_exp, 435 + .seed = data_iter->bi_sector, 436 + .csum = 0, 437 + }; 438 + blk_status_t ret = BLK_STS_OK; 439 + 440 + while (iter.data_iter.bi_size && ret == BLK_STS_OK) { 441 + struct bio_vec bv = bvec_iter_bvec(iter.bio->bi_io_vec, 442 + iter.data_iter); 443 + void *kaddr = bvec_kmap_local(&bv); 444 + void *data = kaddr; 445 + unsigned int len; 446 + 447 + bvec_iter_advance_single(iter.bio->bi_io_vec, &iter.data_iter, 448 + bv.bv_len); 449 + while (bv.bv_len && ret == BLK_STS_OK) { 450 + len = min(iter.interval_remaining, bv.bv_len); 451 + blk_calculate_guard(&iter, data, len); 452 + bv.bv_len -= len; 453 + data += len; 454 + iter.interval_remaining -= len; 455 + if (!iter.interval_remaining) 456 + ret = blk_integrity_interval(&iter, verify); 457 + } 458 + kunmap_local(kaddr); 459 + } 460 + 461 + return ret; 258 462 } 259 463 260 464 void bio_integrity_generate(struct bio *bio) 261 465 { 262 466 struct blk_integrity *bi = blk_get_integrity(bio->bi_bdev->bd_disk); 263 - struct bio_integrity_payload *bip = bio_integrity(bio); 264 - struct blk_integrity_iter iter; 265 - struct bvec_iter bviter; 266 - struct bio_vec bv; 267 467 268 - iter.disk_name = bio->bi_bdev->bd_disk->disk_name; 269 - iter.interval = 1 << bi->interval_exp; 270 - iter.seed = bio->bi_iter.bi_sector; 271 - iter.prot_buf = bvec_virt(bip->bip_vec); 272 - bio_for_each_segment(bv, bio, bviter) { 273 - void *kaddr = bvec_kmap_local(&bv); 274 - 275 - iter.data_buf = kaddr; 276 - iter.data_size = bv.bv_len; 277 - switch (bi->csum_type) { 278 - case BLK_INTEGRITY_CSUM_CRC64: 279 - ext_pi_crc64_generate(&iter, bi); 280 - break; 281 - case BLK_INTEGRITY_CSUM_CRC: 282 - case BLK_INTEGRITY_CSUM_IP: 283 - t10_pi_generate(&iter, bi); 284 - break; 285 - default: 286 - break; 287 - } 288 - kunmap_local(kaddr); 468 + switch (bi->csum_type) { 469 + case BLK_INTEGRITY_CSUM_CRC64: 470 + case BLK_INTEGRITY_CSUM_CRC: 471 + case BLK_INTEGRITY_CSUM_IP: 472 + blk_integrity_iterate(bio, &bio->bi_iter, false); 473 + break; 474 + default: 475 + break; 289 476 } 290 477 } 291 478 292 479 blk_status_t bio_integrity_verify(struct bio *bio, struct bvec_iter *saved_iter) 293 480 { 294 481 struct blk_integrity *bi = blk_get_integrity(bio->bi_bdev->bd_disk); 295 - struct bio_integrity_payload *bip = bio_integrity(bio); 296 - struct blk_integrity_iter iter; 297 - struct bvec_iter bviter; 298 - struct bio_vec bv; 299 482 300 - /* 301 - * At the moment verify is called bi_iter has been advanced during split 302 - * and completion, so use the copy created during submission here. 303 - */ 304 - iter.disk_name = bio->bi_bdev->bd_disk->disk_name; 305 - iter.interval = 1 << bi->interval_exp; 306 - iter.seed = saved_iter->bi_sector; 307 - iter.prot_buf = bvec_virt(bip->bip_vec); 308 - __bio_for_each_segment(bv, bio, bviter, *saved_iter) { 309 - void *kaddr = bvec_kmap_local(&bv); 310 - blk_status_t ret = BLK_STS_OK; 311 - 312 - iter.data_buf = kaddr; 313 - iter.data_size = bv.bv_len; 314 - switch (bi->csum_type) { 315 - case BLK_INTEGRITY_CSUM_CRC64: 316 - ret = ext_pi_crc64_verify(&iter, bi); 317 - break; 318 - case BLK_INTEGRITY_CSUM_CRC: 319 - case BLK_INTEGRITY_CSUM_IP: 320 - ret = t10_pi_verify(&iter, bi); 321 - break; 322 - default: 323 - break; 324 - } 325 - kunmap_local(kaddr); 326 - 327 - if (ret) 328 - return ret; 483 + switch (bi->csum_type) { 484 + case BLK_INTEGRITY_CSUM_CRC64: 485 + case BLK_INTEGRITY_CSUM_CRC: 486 + case BLK_INTEGRITY_CSUM_IP: 487 + return blk_integrity_iterate(bio, saved_iter, true); 488 + default: 489 + break; 329 490 } 330 491 331 492 return BLK_STS_OK; 332 493 } 333 494 334 - void blk_integrity_prepare(struct request *rq) 495 + /* 496 + * Advance @iter past the protection offset for protection formats that 497 + * contain front padding on the metadata region. 498 + */ 499 + static void blk_pi_advance_offset(struct blk_integrity *bi, 500 + struct bio_integrity_payload *bip, 501 + struct bvec_iter *iter) 502 + { 503 + unsigned int offset = bi->pi_offset; 504 + 505 + while (offset > 0) { 506 + struct bio_vec bv = mp_bvec_iter_bvec(bip->bip_vec, *iter); 507 + unsigned int len = min(bv.bv_len, offset); 508 + 509 + bvec_iter_advance_single(bip->bip_vec, iter, len); 510 + offset -= len; 511 + } 512 + } 513 + 514 + static void *blk_tuple_remap_begin(union pi_tuple *tuple, 515 + struct blk_integrity *bi, 516 + struct bio_integrity_payload *bip, 517 + struct bvec_iter *iter) 518 + { 519 + struct bvec_iter titer; 520 + struct bio_vec pbv; 521 + 522 + blk_pi_advance_offset(bi, bip, iter); 523 + pbv = bvec_iter_bvec(bip->bip_vec, *iter); 524 + if (likely(pbv.bv_len >= bi->pi_tuple_size)) 525 + return bvec_kmap_local(&pbv); 526 + 527 + /* 528 + * We need to preserve the state of the original iter for the 529 + * copy_from_tuple at the end, so make a temp iter for here. 530 + */ 531 + titer = *iter; 532 + blk_integrity_copy_to_tuple(bip, &titer, tuple, bi->pi_tuple_size); 533 + return tuple; 534 + } 535 + 536 + static void blk_tuple_remap_end(union pi_tuple *tuple, void *ptuple, 537 + struct blk_integrity *bi, 538 + struct bio_integrity_payload *bip, 539 + struct bvec_iter *iter) 540 + { 541 + unsigned int len = bi->metadata_size - bi->pi_offset; 542 + 543 + if (likely(ptuple != tuple)) { 544 + kunmap_local(ptuple); 545 + } else { 546 + blk_integrity_copy_from_tuple(bip, iter, ptuple, 547 + bi->pi_tuple_size); 548 + len -= bi->pi_tuple_size; 549 + } 550 + 551 + bvec_iter_advance(bip->bip_vec, iter, len); 552 + } 553 + 554 + static void blk_set_ext_unmap_ref(struct crc64_pi_tuple *pi, u64 virt, 555 + u64 ref_tag) 556 + { 557 + u64 ref = get_unaligned_be48(&pi->ref_tag); 558 + 559 + if (ref == lower_48_bits(ref_tag) && ref != lower_48_bits(virt)) 560 + put_unaligned_be48(virt, pi->ref_tag); 561 + } 562 + 563 + static void blk_set_t10_unmap_ref(struct t10_pi_tuple *pi, u32 virt, 564 + u32 ref_tag) 565 + { 566 + u32 ref = get_unaligned_be32(&pi->ref_tag); 567 + 568 + if (ref == ref_tag && ref != virt) 569 + put_unaligned_be32(virt, &pi->ref_tag); 570 + } 571 + 572 + static void blk_reftag_remap_complete(struct blk_integrity *bi, 573 + union pi_tuple *tuple, u64 virt, u64 ref) 574 + { 575 + switch (bi->csum_type) { 576 + case BLK_INTEGRITY_CSUM_CRC64: 577 + blk_set_ext_unmap_ref(&tuple->crc64_pi, virt, ref); 578 + break; 579 + case BLK_INTEGRITY_CSUM_CRC: 580 + case BLK_INTEGRITY_CSUM_IP: 581 + blk_set_t10_unmap_ref(&tuple->t10_pi, virt, ref); 582 + break; 583 + default: 584 + WARN_ON_ONCE(1); 585 + break; 586 + } 587 + } 588 + 589 + static void blk_set_ext_map_ref(struct crc64_pi_tuple *pi, u64 virt, 590 + u64 ref_tag) 591 + { 592 + u64 ref = get_unaligned_be48(&pi->ref_tag); 593 + 594 + if (ref == lower_48_bits(virt) && ref != ref_tag) 595 + put_unaligned_be48(ref_tag, pi->ref_tag); 596 + } 597 + 598 + static void blk_set_t10_map_ref(struct t10_pi_tuple *pi, u32 virt, u32 ref_tag) 599 + { 600 + u32 ref = get_unaligned_be32(&pi->ref_tag); 601 + 602 + if (ref == virt && ref != ref_tag) 603 + put_unaligned_be32(ref_tag, &pi->ref_tag); 604 + } 605 + 606 + static void blk_reftag_remap_prepare(struct blk_integrity *bi, 607 + union pi_tuple *tuple, 608 + u64 virt, u64 ref) 609 + { 610 + switch (bi->csum_type) { 611 + case BLK_INTEGRITY_CSUM_CRC64: 612 + blk_set_ext_map_ref(&tuple->crc64_pi, virt, ref); 613 + break; 614 + case BLK_INTEGRITY_CSUM_CRC: 615 + case BLK_INTEGRITY_CSUM_IP: 616 + blk_set_t10_map_ref(&tuple->t10_pi, virt, ref); 617 + break; 618 + default: 619 + WARN_ON_ONCE(1); 620 + break; 621 + } 622 + } 623 + 624 + static void __blk_reftag_remap(struct bio *bio, struct blk_integrity *bi, 625 + unsigned *intervals, u64 *ref, bool prep) 626 + { 627 + struct bio_integrity_payload *bip = bio_integrity(bio); 628 + struct bvec_iter iter = bip->bip_iter; 629 + u64 virt = bip_get_seed(bip); 630 + union pi_tuple *ptuple; 631 + union pi_tuple tuple; 632 + 633 + if (prep && bip->bip_flags & BIP_MAPPED_INTEGRITY) { 634 + *ref += bio->bi_iter.bi_size >> bi->interval_exp; 635 + return; 636 + } 637 + 638 + while (iter.bi_size && *intervals) { 639 + ptuple = blk_tuple_remap_begin(&tuple, bi, bip, &iter); 640 + 641 + if (prep) 642 + blk_reftag_remap_prepare(bi, ptuple, virt, *ref); 643 + else 644 + blk_reftag_remap_complete(bi, ptuple, virt, *ref); 645 + 646 + blk_tuple_remap_end(&tuple, ptuple, bi, bip, &iter); 647 + (*intervals)--; 648 + (*ref)++; 649 + virt++; 650 + } 651 + 652 + if (prep) 653 + bip->bip_flags |= BIP_MAPPED_INTEGRITY; 654 + } 655 + 656 + static void blk_integrity_remap(struct request *rq, unsigned int nr_bytes, 657 + bool prep) 335 658 { 336 659 struct blk_integrity *bi = &rq->q->limits.integrity; 660 + u64 ref = blk_rq_pos(rq) >> (bi->interval_exp - SECTOR_SHIFT); 661 + unsigned intervals = nr_bytes >> bi->interval_exp; 662 + struct bio *bio; 337 663 338 664 if (!(bi->flags & BLK_INTEGRITY_REF_TAG)) 339 665 return; 340 666 341 - if (bi->csum_type == BLK_INTEGRITY_CSUM_CRC64) 342 - ext_pi_type1_prepare(rq); 343 - else 344 - t10_pi_type1_prepare(rq); 667 + __rq_for_each_bio(bio, rq) { 668 + __blk_reftag_remap(bio, bi, &intervals, &ref, prep); 669 + if (!intervals) 670 + break; 671 + } 672 + } 673 + 674 + void blk_integrity_prepare(struct request *rq) 675 + { 676 + blk_integrity_remap(rq, blk_rq_bytes(rq), true); 345 677 } 346 678 347 679 void blk_integrity_complete(struct request *rq, unsigned int nr_bytes) 348 680 { 349 - struct blk_integrity *bi = &rq->q->limits.integrity; 350 - 351 - if (!(bi->flags & BLK_INTEGRITY_REF_TAG)) 352 - return; 353 - 354 - if (bi->csum_type == BLK_INTEGRITY_CSUM_CRC64) 355 - ext_pi_type1_complete(rq, nr_bytes); 356 - else 357 - t10_pi_type1_complete(rq, nr_bytes); 681 + blk_integrity_remap(rq, nr_bytes, false); 358 682 }
+1
drivers/nvme/host/core.c
··· 1875 1875 break; 1876 1876 } 1877 1877 1878 + bi->flags |= BLK_SPLIT_INTERVAL_CAPABLE; 1878 1879 bi->metadata_size = head->ms; 1879 1880 if (bi->csum_type) { 1880 1881 bi->pi_tuple_size = head->pi_size;
+1
include/linux/blk-integrity.h
··· 14 14 BLK_INTEGRITY_DEVICE_CAPABLE = 1 << 2, 15 15 BLK_INTEGRITY_REF_TAG = 1 << 3, 16 16 BLK_INTEGRITY_STACKED = 1 << 4, 17 + BLK_SPLIT_INTERVAL_CAPABLE = 1 << 5, 17 18 }; 18 19 19 20 const char *blk_integrity_profile_name(struct blk_integrity *bi);