Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

Merge branch 'libnvdimm-fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/nvdimm/nvdimm

Pull libnvdimm fixes from Dan Williams:
"These fixes are all tagged for -stable and have received a build
success notification from the kbuild robot.

- NVDIMM namespaces, configured to enforce 1GB alignment, fail to
initialize on platforms that mis-align the start or end of the
physical address range.

- The Linux implementation of the BTT (Block Translation Table) is
incompatible with the UEFI 2.7 definition of the BTT format. The
BTT layers a software atomic sector semantic on top of an NVDIMM
namespace. Linux needs to be compatible with the UEFI definition to
enable boot support or any pre-OS access of data on a BTT enabled
namespace.

- A fix for ACPI SMART notification events, this allows a userspace
monitor to register for health events rather than poll. This has
been broken since it was initially merged as the unit test
inadvertently worked around the problem. The urgency for fixing
this during the -rc series is driven by how expensive it is to poll
for this data (System Management Mode entry)"

* 'libnvdimm-fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/nvdimm/nvdimm:
libnvdimm, btt: Fix an incompatibility in the log layout
libnvdimm, btt: add a couple of missing kernel-doc lines
libnvdimm, dax: fix 1GB-aligned namespaces vs physical misalignment
libnvdimm, pfn: fix start_pad handling for aligned namespaces
acpi, nfit: fix health event notification

+236 -41
+8 -1
drivers/acpi/nfit/core.c
··· 1670 1670 dev_name(&adev_dimm->dev)); 1671 1671 return -ENXIO; 1672 1672 } 1673 + /* 1674 + * Record nfit_mem for the notification path to track back to 1675 + * the nfit sysfs attributes for this dimm device object. 1676 + */ 1677 + dev_set_drvdata(&adev_dimm->dev, nfit_mem); 1673 1678 1674 1679 /* 1675 1680 * Until standardization materializes we need to consider 4 ··· 1757 1752 sysfs_put(nfit_mem->flags_attr); 1758 1753 nfit_mem->flags_attr = NULL; 1759 1754 } 1760 - if (adev_dimm) 1755 + if (adev_dimm) { 1761 1756 acpi_remove_notify_handler(adev_dimm->handle, 1762 1757 ACPI_DEVICE_NOTIFY, acpi_nvdimm_notify); 1758 + dev_set_drvdata(&adev_dimm->dev, NULL); 1759 + } 1763 1760 } 1764 1761 mutex_unlock(&acpi_desc->init_mutex); 1765 1762 }
+167 -34
drivers/nvdimm/btt.c
··· 211 211 return ret; 212 212 } 213 213 214 - static int btt_log_read_pair(struct arena_info *arena, u32 lane, 215 - struct log_entry *ent) 214 + static int btt_log_group_read(struct arena_info *arena, u32 lane, 215 + struct log_group *log) 216 216 { 217 217 return arena_read_bytes(arena, 218 - arena->logoff + (2 * lane * LOG_ENT_SIZE), ent, 219 - 2 * LOG_ENT_SIZE, 0); 218 + arena->logoff + (lane * LOG_GRP_SIZE), log, 219 + LOG_GRP_SIZE, 0); 220 220 } 221 221 222 222 static struct dentry *debugfs_root; ··· 256 256 debugfs_create_x64("logoff", S_IRUGO, d, &a->logoff); 257 257 debugfs_create_x64("info2off", S_IRUGO, d, &a->info2off); 258 258 debugfs_create_x32("flags", S_IRUGO, d, &a->flags); 259 + debugfs_create_u32("log_index_0", S_IRUGO, d, &a->log_index[0]); 260 + debugfs_create_u32("log_index_1", S_IRUGO, d, &a->log_index[1]); 259 261 } 260 262 261 263 static void btt_debugfs_init(struct btt *btt) ··· 276 274 } 277 275 } 278 276 277 + static u32 log_seq(struct log_group *log, int log_idx) 278 + { 279 + return le32_to_cpu(log->ent[log_idx].seq); 280 + } 281 + 279 282 /* 280 283 * This function accepts two log entries, and uses the 281 284 * sequence number to find the 'older' entry. ··· 290 283 * 291 284 * TODO The logic feels a bit kludge-y. make it better.. 292 285 */ 293 - static int btt_log_get_old(struct log_entry *ent) 286 + static int btt_log_get_old(struct arena_info *a, struct log_group *log) 294 287 { 288 + int idx0 = a->log_index[0]; 289 + int idx1 = a->log_index[1]; 295 290 int old; 296 291 297 292 /* ··· 301 292 * the next time, the following logic works out to put this 302 293 * (next) entry into [1] 303 294 */ 304 - if (ent[0].seq == 0) { 305 - ent[0].seq = cpu_to_le32(1); 295 + if (log_seq(log, idx0) == 0) { 296 + log->ent[idx0].seq = cpu_to_le32(1); 306 297 return 0; 307 298 } 308 299 309 - if (ent[0].seq == ent[1].seq) 300 + if (log_seq(log, idx0) == log_seq(log, idx1)) 310 301 return -EINVAL; 311 - if (le32_to_cpu(ent[0].seq) + le32_to_cpu(ent[1].seq) > 5) 302 + if (log_seq(log, idx0) + log_seq(log, idx1) > 5) 312 303 return -EINVAL; 313 304 314 - if (le32_to_cpu(ent[0].seq) < le32_to_cpu(ent[1].seq)) { 315 - if (le32_to_cpu(ent[1].seq) - le32_to_cpu(ent[0].seq) == 1) 305 + if (log_seq(log, idx0) < log_seq(log, idx1)) { 306 + if ((log_seq(log, idx1) - log_seq(log, idx0)) == 1) 316 307 old = 0; 317 308 else 318 309 old = 1; 319 310 } else { 320 - if (le32_to_cpu(ent[0].seq) - le32_to_cpu(ent[1].seq) == 1) 311 + if ((log_seq(log, idx0) - log_seq(log, idx1)) == 1) 321 312 old = 1; 322 313 else 323 314 old = 0; ··· 337 328 { 338 329 int ret; 339 330 int old_ent, ret_ent; 340 - struct log_entry log[2]; 331 + struct log_group log; 341 332 342 - ret = btt_log_read_pair(arena, lane, log); 333 + ret = btt_log_group_read(arena, lane, &log); 343 334 if (ret) 344 335 return -EIO; 345 336 346 - old_ent = btt_log_get_old(log); 337 + old_ent = btt_log_get_old(arena, &log); 347 338 if (old_ent < 0 || old_ent > 1) { 348 339 dev_err(to_dev(arena), 349 340 "log corruption (%d): lane %d seq [%d, %d]\n", 350 - old_ent, lane, log[0].seq, log[1].seq); 341 + old_ent, lane, log.ent[arena->log_index[0]].seq, 342 + log.ent[arena->log_index[1]].seq); 351 343 /* TODO set error state? */ 352 344 return -EIO; 353 345 } ··· 356 346 ret_ent = (old_flag ? old_ent : (1 - old_ent)); 357 347 358 348 if (ent != NULL) 359 - memcpy(ent, &log[ret_ent], LOG_ENT_SIZE); 349 + memcpy(ent, &log.ent[arena->log_index[ret_ent]], LOG_ENT_SIZE); 360 350 361 351 return ret_ent; 362 352 } ··· 370 360 u32 sub, struct log_entry *ent, unsigned long flags) 371 361 { 372 362 int ret; 373 - /* 374 - * Ignore the padding in log_entry for calculating log_half. 375 - * The entry is 'committed' when we write the sequence number, 376 - * and we want to ensure that that is the last thing written. 377 - * We don't bother writing the padding as that would be extra 378 - * media wear and write amplification 379 - */ 380 - unsigned int log_half = (LOG_ENT_SIZE - 2 * sizeof(u64)) / 2; 381 - u64 ns_off = arena->logoff + (((2 * lane) + sub) * LOG_ENT_SIZE); 363 + u32 group_slot = arena->log_index[sub]; 364 + unsigned int log_half = LOG_ENT_SIZE / 2; 382 365 void *src = ent; 366 + u64 ns_off; 383 367 368 + ns_off = arena->logoff + (lane * LOG_GRP_SIZE) + 369 + (group_slot * LOG_ENT_SIZE); 384 370 /* split the 16B write into atomic, durable halves */ 385 371 ret = arena_write_bytes(arena, ns_off, src, log_half, flags); 386 372 if (ret) ··· 459 453 { 460 454 size_t logsize = arena->info2off - arena->logoff; 461 455 size_t chunk_size = SZ_4K, offset = 0; 462 - struct log_entry log; 456 + struct log_entry ent; 463 457 void *zerobuf; 464 458 int ret; 465 459 u32 i; ··· 491 485 } 492 486 493 487 for (i = 0; i < arena->nfree; i++) { 494 - log.lba = cpu_to_le32(i); 495 - log.old_map = cpu_to_le32(arena->external_nlba + i); 496 - log.new_map = cpu_to_le32(arena->external_nlba + i); 497 - log.seq = cpu_to_le32(LOG_SEQ_INIT); 498 - ret = __btt_log_write(arena, i, 0, &log, 0); 488 + ent.lba = cpu_to_le32(i); 489 + ent.old_map = cpu_to_le32(arena->external_nlba + i); 490 + ent.new_map = cpu_to_le32(arena->external_nlba + i); 491 + ent.seq = cpu_to_le32(LOG_SEQ_INIT); 492 + ret = __btt_log_write(arena, i, 0, &ent, 0); 499 493 if (ret) 500 494 goto free; 501 495 } ··· 600 594 return 0; 601 595 } 602 596 597 + static bool ent_is_padding(struct log_entry *ent) 598 + { 599 + return (ent->lba == 0) && (ent->old_map == 0) && (ent->new_map == 0) 600 + && (ent->seq == 0); 601 + } 602 + 603 + /* 604 + * Detecting valid log indices: We read a log group (see the comments in btt.h 605 + * for a description of a 'log_group' and its 'slots'), and iterate over its 606 + * four slots. We expect that a padding slot will be all-zeroes, and use this 607 + * to detect a padding slot vs. an actual entry. 608 + * 609 + * If a log_group is in the initial state, i.e. hasn't been used since the 610 + * creation of this BTT layout, it will have three of the four slots with 611 + * zeroes. We skip over these log_groups for the detection of log_index. If 612 + * all log_groups are in the initial state (i.e. the BTT has never been 613 + * written to), it is safe to assume the 'new format' of log entries in slots 614 + * (0, 1). 615 + */ 616 + static int log_set_indices(struct arena_info *arena) 617 + { 618 + bool idx_set = false, initial_state = true; 619 + int ret, log_index[2] = {-1, -1}; 620 + u32 i, j, next_idx = 0; 621 + struct log_group log; 622 + u32 pad_count = 0; 623 + 624 + for (i = 0; i < arena->nfree; i++) { 625 + ret = btt_log_group_read(arena, i, &log); 626 + if (ret < 0) 627 + return ret; 628 + 629 + for (j = 0; j < 4; j++) { 630 + if (!idx_set) { 631 + if (ent_is_padding(&log.ent[j])) { 632 + pad_count++; 633 + continue; 634 + } else { 635 + /* Skip if index has been recorded */ 636 + if ((next_idx == 1) && 637 + (j == log_index[0])) 638 + continue; 639 + /* valid entry, record index */ 640 + log_index[next_idx] = j; 641 + next_idx++; 642 + } 643 + if (next_idx == 2) { 644 + /* two valid entries found */ 645 + idx_set = true; 646 + } else if (next_idx > 2) { 647 + /* too many valid indices */ 648 + return -ENXIO; 649 + } 650 + } else { 651 + /* 652 + * once the indices have been set, just verify 653 + * that all subsequent log groups are either in 654 + * their initial state or follow the same 655 + * indices. 656 + */ 657 + if (j == log_index[0]) { 658 + /* entry must be 'valid' */ 659 + if (ent_is_padding(&log.ent[j])) 660 + return -ENXIO; 661 + } else if (j == log_index[1]) { 662 + ; 663 + /* 664 + * log_index[1] can be padding if the 665 + * lane never got used and it is still 666 + * in the initial state (three 'padding' 667 + * entries) 668 + */ 669 + } else { 670 + /* entry must be invalid (padding) */ 671 + if (!ent_is_padding(&log.ent[j])) 672 + return -ENXIO; 673 + } 674 + } 675 + } 676 + /* 677 + * If any of the log_groups have more than one valid, 678 + * non-padding entry, then the we are no longer in the 679 + * initial_state 680 + */ 681 + if (pad_count < 3) 682 + initial_state = false; 683 + pad_count = 0; 684 + } 685 + 686 + if (!initial_state && !idx_set) 687 + return -ENXIO; 688 + 689 + /* 690 + * If all the entries in the log were in the initial state, 691 + * assume new padding scheme 692 + */ 693 + if (initial_state) 694 + log_index[1] = 1; 695 + 696 + /* 697 + * Only allow the known permutations of log/padding indices, 698 + * i.e. (0, 1), and (0, 2) 699 + */ 700 + if ((log_index[0] == 0) && ((log_index[1] == 1) || (log_index[1] == 2))) 701 + ; /* known index possibilities */ 702 + else { 703 + dev_err(to_dev(arena), "Found an unknown padding scheme\n"); 704 + return -ENXIO; 705 + } 706 + 707 + arena->log_index[0] = log_index[0]; 708 + arena->log_index[1] = log_index[1]; 709 + dev_dbg(to_dev(arena), "log_index_0 = %d\n", log_index[0]); 710 + dev_dbg(to_dev(arena), "log_index_1 = %d\n", log_index[1]); 711 + return 0; 712 + } 713 + 603 714 static int btt_rtt_init(struct arena_info *arena) 604 715 { 605 716 arena->rtt = kcalloc(arena->nfree, sizeof(u32), GFP_KERNEL); ··· 773 650 available -= 2 * BTT_PG_SIZE; 774 651 775 652 /* The log takes a fixed amount of space based on nfree */ 776 - logsize = roundup(2 * arena->nfree * sizeof(struct log_entry), 777 - BTT_PG_SIZE); 653 + logsize = roundup(arena->nfree * LOG_GRP_SIZE, BTT_PG_SIZE); 778 654 available -= logsize; 779 655 780 656 /* Calculate optimal split between map and data area */ ··· 790 668 arena->mapoff = arena->dataoff + datasize; 791 669 arena->logoff = arena->mapoff + mapsize; 792 670 arena->info2off = arena->logoff + logsize; 671 + 672 + /* Default log indices are (0,1) */ 673 + arena->log_index[0] = 0; 674 + arena->log_index[1] = 1; 793 675 return arena; 794 676 } 795 677 ··· 883 757 884 758 arena->external_lba_start = cur_nlba; 885 759 parse_arena_meta(arena, super, cur_off); 760 + 761 + ret = log_set_indices(arena); 762 + if (ret) { 763 + dev_err(to_dev(arena), 764 + "Unable to deduce log/padding indices\n"); 765 + goto out; 766 + } 886 767 887 768 mutex_init(&arena->err_lock); 888 769 ret = btt_freelist_init(arena);
+46 -1
drivers/nvdimm/btt.h
··· 27 27 #define MAP_ERR_MASK (1 << MAP_ERR_SHIFT) 28 28 #define MAP_LBA_MASK (~((1 << MAP_TRIM_SHIFT) | (1 << MAP_ERR_SHIFT))) 29 29 #define MAP_ENT_NORMAL 0xC0000000 30 + #define LOG_GRP_SIZE sizeof(struct log_group) 30 31 #define LOG_ENT_SIZE sizeof(struct log_entry) 31 32 #define ARENA_MIN_SIZE (1UL << 24) /* 16 MB */ 32 33 #define ARENA_MAX_SIZE (1ULL << 39) /* 512 GB */ ··· 51 50 INIT_READY 52 51 }; 53 52 53 + /* 54 + * A log group represents one log 'lane', and consists of four log entries. 55 + * Two of the four entries are valid entries, and the remaining two are 56 + * padding. Due to an old bug in the padding location, we need to perform a 57 + * test to determine the padding scheme being used, and use that scheme 58 + * thereafter. 59 + * 60 + * In kernels prior to 4.15, 'log group' would have actual log entries at 61 + * indices (0, 2) and padding at indices (1, 3), where as the correct/updated 62 + * format has log entries at indices (0, 1) and padding at indices (2, 3). 63 + * 64 + * Old (pre 4.15) format: 65 + * +-----------------+-----------------+ 66 + * | ent[0] | ent[1] | 67 + * | 16B | 16B | 68 + * | lba/old/new/seq | pad | 69 + * +-----------------------------------+ 70 + * | ent[2] | ent[3] | 71 + * | 16B | 16B | 72 + * | lba/old/new/seq | pad | 73 + * +-----------------+-----------------+ 74 + * 75 + * New format: 76 + * +-----------------+-----------------+ 77 + * | ent[0] | ent[1] | 78 + * | 16B | 16B | 79 + * | lba/old/new/seq | lba/old/new/seq | 80 + * +-----------------------------------+ 81 + * | ent[2] | ent[3] | 82 + * | 16B | 16B | 83 + * | pad | pad | 84 + * +-----------------+-----------------+ 85 + * 86 + * We detect during start-up which format is in use, and set 87 + * arena->log_index[(0, 1)] with the detected format. 88 + */ 89 + 54 90 struct log_entry { 55 91 __le32 lba; 56 92 __le32 old_map; 57 93 __le32 new_map; 58 94 __le32 seq; 59 - __le64 padding[2]; 95 + }; 96 + 97 + struct log_group { 98 + struct log_entry ent[4]; 60 99 }; 61 100 62 101 struct btt_sb { ··· 166 125 * @list: List head for list of arenas 167 126 * @debugfs_dir: Debugfs dentry 168 127 * @flags: Arena flags - may signify error states. 128 + * @err_lock: Mutex for synchronizing error clearing. 129 + * @log_index: Indices of the valid log entries in a log_group 169 130 * 170 131 * arena_info is a per-arena handle. Once an arena is narrowed down for an 171 132 * IO, this struct is passed around for the duration of the IO. ··· 200 157 /* Arena flags */ 201 158 u32 flags; 202 159 struct mutex err_lock; 160 + int log_index[2]; 203 161 }; 204 162 205 163 /** ··· 220 176 * @init_lock: Mutex used for the BTT initialization 221 177 * @init_state: Flag describing the initialization state for the BTT 222 178 * @num_arenas: Number of arenas in the BTT instance 179 + * @phys_bb: Pointer to the namespace's badblocks structure 223 180 */ 224 181 struct btt { 225 182 struct gendisk *btt_disk;
+15 -5
drivers/nvdimm/pfn_devs.c
··· 364 364 int nd_pfn_validate(struct nd_pfn *nd_pfn, const char *sig) 365 365 { 366 366 u64 checksum, offset; 367 - unsigned long align; 368 367 enum nd_pfn_mode mode; 369 368 struct nd_namespace_io *nsio; 369 + unsigned long align, start_pad; 370 370 struct nd_pfn_sb *pfn_sb = nd_pfn->pfn_sb; 371 371 struct nd_namespace_common *ndns = nd_pfn->ndns; 372 372 const u8 *parent_uuid = nd_dev_to_uuid(&ndns->dev); ··· 410 410 411 411 align = le32_to_cpu(pfn_sb->align); 412 412 offset = le64_to_cpu(pfn_sb->dataoff); 413 + start_pad = le32_to_cpu(pfn_sb->start_pad); 413 414 if (align == 0) 414 415 align = 1UL << ilog2(offset); 415 416 mode = le32_to_cpu(pfn_sb->mode); ··· 469 468 return -EBUSY; 470 469 } 471 470 472 - if ((align && !IS_ALIGNED(offset, align)) 471 + if ((align && !IS_ALIGNED(nsio->res.start + offset + start_pad, align)) 473 472 || !IS_ALIGNED(offset, PAGE_SIZE)) { 474 473 dev_err(&nd_pfn->dev, 475 474 "bad offset: %#llx dax disabled align: %#lx\n", ··· 583 582 return altmap; 584 583 } 585 584 585 + static u64 phys_pmem_align_down(struct nd_pfn *nd_pfn, u64 phys) 586 + { 587 + return min_t(u64, PHYS_SECTION_ALIGN_DOWN(phys), 588 + ALIGN_DOWN(phys, nd_pfn->align)); 589 + } 590 + 586 591 static int nd_pfn_init(struct nd_pfn *nd_pfn) 587 592 { 588 593 u32 dax_label_reserve = is_nd_dax(&nd_pfn->dev) ? SZ_128K : 0; ··· 644 637 start = nsio->res.start; 645 638 size = PHYS_SECTION_ALIGN_UP(start + size) - start; 646 639 if (region_intersects(start, size, IORESOURCE_SYSTEM_RAM, 647 - IORES_DESC_NONE) == REGION_MIXED) { 640 + IORES_DESC_NONE) == REGION_MIXED 641 + || !IS_ALIGNED(start + resource_size(&nsio->res), 642 + nd_pfn->align)) { 648 643 size = resource_size(&nsio->res); 649 - end_trunc = start + size - PHYS_SECTION_ALIGN_DOWN(start + size); 644 + end_trunc = start + size - phys_pmem_align_down(nd_pfn, 645 + start + size); 650 646 } 651 647 652 648 if (start_pad + end_trunc) 653 - dev_info(&nd_pfn->dev, "%s section collision, truncate %d bytes\n", 649 + dev_info(&nd_pfn->dev, "%s alignment collision, truncate %d bytes\n", 654 650 dev_name(&ndns->dev), start_pad + end_trunc); 655 651 656 652 /*