Merge branch 'libnvdimm-fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/nvdimm/nvdimm

+8 -1

drivers/acpi/nfit/core.c

··· 1670 1670 dev_name(&adev_dimm->dev)); 1671 1671 return -ENXIO; 1672 1672 } 1673 + /* 1674 + * Record nfit_mem for the notification path to track back to 1675 + * the nfit sysfs attributes for this dimm device object. 1676 + */ 1677 + dev_set_drvdata(&adev_dimm->dev, nfit_mem); 1673 1678 1674 1679 /* 1675 1680 * Until standardization materializes we need to consider 4 ··· 1757 1752 sysfs_put(nfit_mem->flags_attr); 1758 1753 nfit_mem->flags_attr = NULL; 1759 1754 } 1760 - if (adev_dimm) 1755 + if (adev_dimm) { 1761 1756 acpi_remove_notify_handler(adev_dimm->handle, 1762 1757 ACPI_DEVICE_NOTIFY, acpi_nvdimm_notify); 1758 + dev_set_drvdata(&adev_dimm->dev, NULL); 1759 + } 1763 1760 } 1764 1761 mutex_unlock(&acpi_desc->init_mutex); 1765 1762 }

+167 -34

drivers/nvdimm/btt.c

··· 211 211 return ret; 212 212 } 213 213 214 - static int btt_log_read_pair(struct arena_info *arena, u32 lane, 215 - struct log_entry *ent) 214 + static int btt_log_group_read(struct arena_info *arena, u32 lane, 215 + struct log_group *log) 216 216 { 217 217 return arena_read_bytes(arena, 218 - arena->logoff + (2 * lane * LOG_ENT_SIZE), ent, 219 - 2 * LOG_ENT_SIZE, 0); 218 + arena->logoff + (lane * LOG_GRP_SIZE), log, 219 + LOG_GRP_SIZE, 0); 220 220 } 221 221 222 222 static struct dentry *debugfs_root; ··· 256 256 debugfs_create_x64("logoff", S_IRUGO, d, &a->logoff); 257 257 debugfs_create_x64("info2off", S_IRUGO, d, &a->info2off); 258 258 debugfs_create_x32("flags", S_IRUGO, d, &a->flags); 259 + debugfs_create_u32("log_index_0", S_IRUGO, d, &a->log_index[0]); 260 + debugfs_create_u32("log_index_1", S_IRUGO, d, &a->log_index[1]); 259 261 } 260 262 261 263 static void btt_debugfs_init(struct btt *btt) ··· 276 274 } 277 275 } 278 276 277 + static u32 log_seq(struct log_group *log, int log_idx) 278 + { 279 + return le32_to_cpu(log->ent[log_idx].seq); 280 + } 281 + 279 282 /* 280 283 * This function accepts two log entries, and uses the 281 284 * sequence number to find the 'older' entry. ··· 290 283 * 291 284 * TODO The logic feels a bit kludge-y. make it better.. 292 285 */ 293 - static int btt_log_get_old(struct log_entry *ent) 286 + static int btt_log_get_old(struct arena_info *a, struct log_group *log) 294 287 { 288 + int idx0 = a->log_index[0]; 289 + int idx1 = a->log_index[1]; 295 290 int old; 296 291 297 292 /* ··· 301 292 * the next time, the following logic works out to put this 302 293 * (next) entry into [1] 303 294 */ 304 - if (ent[0].seq == 0) { 305 - ent[0].seq = cpu_to_le32(1); 295 + if (log_seq(log, idx0) == 0) { 296 + log->ent[idx0].seq = cpu_to_le32(1); 306 297 return 0; 307 298 } 308 299 309 - if (ent[0].seq == ent[1].seq) 300 + if (log_seq(log, idx0) == log_seq(log, idx1)) 310 301 return -EINVAL; 311 - if (le32_to_cpu(ent[0].seq) + le32_to_cpu(ent[1].seq) > 5) 302 + if (log_seq(log, idx0) + log_seq(log, idx1) > 5) 312 303 return -EINVAL; 313 304 314 - if (le32_to_cpu(ent[0].seq) < le32_to_cpu(ent[1].seq)) { 315 - if (le32_to_cpu(ent[1].seq) - le32_to_cpu(ent[0].seq) == 1) 305 + if (log_seq(log, idx0) < log_seq(log, idx1)) { 306 + if ((log_seq(log, idx1) - log_seq(log, idx0)) == 1) 316 307 old = 0; 317 308 else 318 309 old = 1; 319 310 } else { 320 - if (le32_to_cpu(ent[0].seq) - le32_to_cpu(ent[1].seq) == 1) 311 + if ((log_seq(log, idx0) - log_seq(log, idx1)) == 1) 321 312 old = 1; 322 313 else 323 314 old = 0; ··· 337 328 { 338 329 int ret; 339 330 int old_ent, ret_ent; 340 - struct log_entry log[2]; 331 + struct log_group log; 341 332 342 - ret = btt_log_read_pair(arena, lane, log); 333 + ret = btt_log_group_read(arena, lane, &log); 343 334 if (ret) 344 335 return -EIO; 345 336 346 - old_ent = btt_log_get_old(log); 337 + old_ent = btt_log_get_old(arena, &log); 347 338 if (old_ent < 0 || old_ent > 1) { 348 339 dev_err(to_dev(arena), 349 340 "log corruption (%d): lane %d seq [%d, %d]\n", 350 - old_ent, lane, log[0].seq, log[1].seq); 341 + old_ent, lane, log.ent[arena->log_index[0]].seq, 342 + log.ent[arena->log_index[1]].seq); 351 343 /* TODO set error state? */ 352 344 return -EIO; 353 345 } ··· 356 346 ret_ent = (old_flag ? old_ent : (1 - old_ent)); 357 347 358 348 if (ent != NULL) 359 - memcpy(ent, &log[ret_ent], LOG_ENT_SIZE); 349 + memcpy(ent, &log.ent[arena->log_index[ret_ent]], LOG_ENT_SIZE); 360 350 361 351 return ret_ent; 362 352 } ··· 370 360 u32 sub, struct log_entry *ent, unsigned long flags) 371 361 { 372 362 int ret; 373 - /* 374 - * Ignore the padding in log_entry for calculating log_half. 375 - * The entry is 'committed' when we write the sequence number, 376 - * and we want to ensure that that is the last thing written. 377 - * We don't bother writing the padding as that would be extra 378 - * media wear and write amplification 379 - */ 380 - unsigned int log_half = (LOG_ENT_SIZE - 2 * sizeof(u64)) / 2; 381 - u64 ns_off = arena->logoff + (((2 * lane) + sub) * LOG_ENT_SIZE); 363 + u32 group_slot = arena->log_index[sub]; 364 + unsigned int log_half = LOG_ENT_SIZE / 2; 382 365 void *src = ent; 366 + u64 ns_off; 383 367 368 + ns_off = arena->logoff + (lane * LOG_GRP_SIZE) + 369 + (group_slot * LOG_ENT_SIZE); 384 370 /* split the 16B write into atomic, durable halves */ 385 371 ret = arena_write_bytes(arena, ns_off, src, log_half, flags); 386 372 if (ret) ··· 459 453 { 460 454 size_t logsize = arena->info2off - arena->logoff; 461 455 size_t chunk_size = SZ_4K, offset = 0; 462 - struct log_entry log; 456 + struct log_entry ent; 463 457 void *zerobuf; 464 458 int ret; 465 459 u32 i; ··· 491 485 } 492 486 493 487 for (i = 0; i < arena->nfree; i++) { 494 - log.lba = cpu_to_le32(i); 495 - log.old_map = cpu_to_le32(arena->external_nlba + i); 496 - log.new_map = cpu_to_le32(arena->external_nlba + i); 497 - log.seq = cpu_to_le32(LOG_SEQ_INIT); 498 - ret = __btt_log_write(arena, i, 0, &log, 0); 488 + ent.lba = cpu_to_le32(i); 489 + ent.old_map = cpu_to_le32(arena->external_nlba + i); 490 + ent.new_map = cpu_to_le32(arena->external_nlba + i); 491 + ent.seq = cpu_to_le32(LOG_SEQ_INIT); 492 + ret = __btt_log_write(arena, i, 0, &ent, 0); 499 493 if (ret) 500 494 goto free; 501 495 } ··· 600 594 return 0; 601 595 } 602 596 597 + static bool ent_is_padding(struct log_entry *ent) 598 + { 599 + return (ent->lba == 0) && (ent->old_map == 0) && (ent->new_map == 0) 600 + && (ent->seq == 0); 601 + } 602 + 603 + /* 604 + * Detecting valid log indices: We read a log group (see the comments in btt.h 605 + * for a description of a 'log_group' and its 'slots'), and iterate over its 606 + * four slots. We expect that a padding slot will be all-zeroes, and use this 607 + * to detect a padding slot vs. an actual entry. 608 + * 609 + * If a log_group is in the initial state, i.e. hasn't been used since the 610 + * creation of this BTT layout, it will have three of the four slots with 611 + * zeroes. We skip over these log_groups for the detection of log_index. If 612 + * all log_groups are in the initial state (i.e. the BTT has never been 613 + * written to), it is safe to assume the 'new format' of log entries in slots 614 + * (0, 1). 615 + */ 616 + static int log_set_indices(struct arena_info *arena) 617 + { 618 + bool idx_set = false, initial_state = true; 619 + int ret, log_index[2] = {-1, -1}; 620 + u32 i, j, next_idx = 0; 621 + struct log_group log; 622 + u32 pad_count = 0; 623 + 624 + for (i = 0; i < arena->nfree; i++) { 625 + ret = btt_log_group_read(arena, i, &log); 626 + if (ret < 0) 627 + return ret; 628 + 629 + for (j = 0; j < 4; j++) { 630 + if (!idx_set) { 631 + if (ent_is_padding(&log.ent[j])) { 632 + pad_count++; 633 + continue; 634 + } else { 635 + /* Skip if index has been recorded */ 636 + if ((next_idx == 1) && 637 + (j == log_index[0])) 638 + continue; 639 + /* valid entry, record index */ 640 + log_index[next_idx] = j; 641 + next_idx++; 642 + } 643 + if (next_idx == 2) { 644 + /* two valid entries found */ 645 + idx_set = true; 646 + } else if (next_idx > 2) { 647 + /* too many valid indices */ 648 + return -ENXIO; 649 + } 650 + } else { 651 + /* 652 + * once the indices have been set, just verify 653 + * that all subsequent log groups are either in 654 + * their initial state or follow the same 655 + * indices. 656 + */ 657 + if (j == log_index[0]) { 658 + /* entry must be 'valid' */ 659 + if (ent_is_padding(&log.ent[j])) 660 + return -ENXIO; 661 + } else if (j == log_index[1]) { 662 + ; 663 + /* 664 + * log_index[1] can be padding if the 665 + * lane never got used and it is still 666 + * in the initial state (three 'padding' 667 + * entries) 668 + */ 669 + } else { 670 + /* entry must be invalid (padding) */ 671 + if (!ent_is_padding(&log.ent[j])) 672 + return -ENXIO; 673 + } 674 + } 675 + } 676 + /* 677 + * If any of the log_groups have more than one valid, 678 + * non-padding entry, then the we are no longer in the 679 + * initial_state 680 + */ 681 + if (pad_count < 3) 682 + initial_state = false; 683 + pad_count = 0; 684 + } 685 + 686 + if (!initial_state && !idx_set) 687 + return -ENXIO; 688 + 689 + /* 690 + * If all the entries in the log were in the initial state, 691 + * assume new padding scheme 692 + */ 693 + if (initial_state) 694 + log_index[1] = 1; 695 + 696 + /* 697 + * Only allow the known permutations of log/padding indices, 698 + * i.e. (0, 1), and (0, 2) 699 + */ 700 + if ((log_index[0] == 0) && ((log_index[1] == 1) || (log_index[1] == 2))) 701 + ; /* known index possibilities */ 702 + else { 703 + dev_err(to_dev(arena), "Found an unknown padding scheme\n"); 704 + return -ENXIO; 705 + } 706 + 707 + arena->log_index[0] = log_index[0]; 708 + arena->log_index[1] = log_index[1]; 709 + dev_dbg(to_dev(arena), "log_index_0 = %d\n", log_index[0]); 710 + dev_dbg(to_dev(arena), "log_index_1 = %d\n", log_index[1]); 711 + return 0; 712 + } 713 + 603 714 static int btt_rtt_init(struct arena_info *arena) 604 715 { 605 716 arena->rtt = kcalloc(arena->nfree, sizeof(u32), GFP_KERNEL); ··· 773 650 available -= 2 * BTT_PG_SIZE; 774 651 775 652 /* The log takes a fixed amount of space based on nfree */ 776 - logsize = roundup(2 * arena->nfree * sizeof(struct log_entry), 777 - BTT_PG_SIZE); 653 + logsize = roundup(arena->nfree * LOG_GRP_SIZE, BTT_PG_SIZE); 778 654 available -= logsize; 779 655 780 656 /* Calculate optimal split between map and data area */ ··· 790 668 arena->mapoff = arena->dataoff + datasize; 791 669 arena->logoff = arena->mapoff + mapsize; 792 670 arena->info2off = arena->logoff + logsize; 671 + 672 + /* Default log indices are (0,1) */ 673 + arena->log_index[0] = 0; 674 + arena->log_index[1] = 1; 793 675 return arena; 794 676 } 795 677 ··· 883 757 884 758 arena->external_lba_start = cur_nlba; 885 759 parse_arena_meta(arena, super, cur_off); 760 + 761 + ret = log_set_indices(arena); 762 + if (ret) { 763 + dev_err(to_dev(arena), 764 + "Unable to deduce log/padding indices\n"); 765 + goto out; 766 + } 886 767 887 768 mutex_init(&arena->err_lock); 888 769 ret = btt_freelist_init(arena);

+46 -1

drivers/nvdimm/btt.h

··· 27 27 #define MAP_ERR_MASK (1 << MAP_ERR_SHIFT) 28 28 #define MAP_LBA_MASK (~((1 << MAP_TRIM_SHIFT) | (1 << MAP_ERR_SHIFT))) 29 29 #define MAP_ENT_NORMAL 0xC0000000 30 + #define LOG_GRP_SIZE sizeof(struct log_group) 30 31 #define LOG_ENT_SIZE sizeof(struct log_entry) 31 32 #define ARENA_MIN_SIZE (1UL << 24) /* 16 MB */ 32 33 #define ARENA_MAX_SIZE (1ULL << 39) /* 512 GB */ ··· 51 50 INIT_READY 52 51 }; 53 52 53 + /* 54 + * A log group represents one log 'lane', and consists of four log entries. 55 + * Two of the four entries are valid entries, and the remaining two are 56 + * padding. Due to an old bug in the padding location, we need to perform a 57 + * test to determine the padding scheme being used, and use that scheme 58 + * thereafter. 59 + * 60 + * In kernels prior to 4.15, 'log group' would have actual log entries at 61 + * indices (0, 2) and padding at indices (1, 3), where as the correct/updated 62 + * format has log entries at indices (0, 1) and padding at indices (2, 3). 63 + * 64 + * Old (pre 4.15) format: 65 + * +-----------------+-----------------+ 66 + * | ent[0] | ent[1] | 67 + * | 16B | 16B | 68 + * | lba/old/new/seq | pad | 69 + * +-----------------------------------+ 70 + * | ent[2] | ent[3] | 71 + * | 16B | 16B | 72 + * | lba/old/new/seq | pad | 73 + * +-----------------+-----------------+ 74 + * 75 + * New format: 76 + * +-----------------+-----------------+ 77 + * | ent[0] | ent[1] | 78 + * | 16B | 16B | 79 + * | lba/old/new/seq | lba/old/new/seq | 80 + * +-----------------------------------+ 81 + * | ent[2] | ent[3] | 82 + * | 16B | 16B | 83 + * | pad | pad | 84 + * +-----------------+-----------------+ 85 + * 86 + * We detect during start-up which format is in use, and set 87 + * arena->log_index[(0, 1)] with the detected format. 88 + */ 89 + 54 90 struct log_entry { 55 91 __le32 lba; 56 92 __le32 old_map; 57 93 __le32 new_map; 58 94 __le32 seq; 59 - __le64 padding[2]; 95 + }; 96 + 97 + struct log_group { 98 + struct log_entry ent[4]; 60 99 }; 61 100 62 101 struct btt_sb { ··· 166 125 * @list: List head for list of arenas 167 126 * @debugfs_dir: Debugfs dentry 168 127 * @flags: Arena flags - may signify error states. 128 + * @err_lock: Mutex for synchronizing error clearing. 129 + * @log_index: Indices of the valid log entries in a log_group 169 130 * 170 131 * arena_info is a per-arena handle. Once an arena is narrowed down for an 171 132 * IO, this struct is passed around for the duration of the IO. ··· 200 157 /* Arena flags */ 201 158 u32 flags; 202 159 struct mutex err_lock; 160 + int log_index[2]; 203 161 }; 204 162 205 163 /** ··· 220 176 * @init_lock: Mutex used for the BTT initialization 221 177 * @init_state: Flag describing the initialization state for the BTT 222 178 * @num_arenas: Number of arenas in the BTT instance 179 + * @phys_bb: Pointer to the namespace's badblocks structure 223 180 */ 224 181 struct btt { 225 182 struct gendisk *btt_disk;

+15 -5

drivers/nvdimm/pfn_devs.c

··· 364 364 int nd_pfn_validate(struct nd_pfn *nd_pfn, const char *sig) 365 365 { 366 366 u64 checksum, offset; 367 - unsigned long align; 368 367 enum nd_pfn_mode mode; 369 368 struct nd_namespace_io *nsio; 369 + unsigned long align, start_pad; 370 370 struct nd_pfn_sb *pfn_sb = nd_pfn->pfn_sb; 371 371 struct nd_namespace_common *ndns = nd_pfn->ndns; 372 372 const u8 *parent_uuid = nd_dev_to_uuid(&ndns->dev); ··· 410 410 411 411 align = le32_to_cpu(pfn_sb->align); 412 412 offset = le64_to_cpu(pfn_sb->dataoff); 413 + start_pad = le32_to_cpu(pfn_sb->start_pad); 413 414 if (align == 0) 414 415 align = 1UL << ilog2(offset); 415 416 mode = le32_to_cpu(pfn_sb->mode); ··· 469 468 return -EBUSY; 470 469 } 471 470 472 - if ((align && !IS_ALIGNED(offset, align)) 471 + if ((align && !IS_ALIGNED(nsio->res.start + offset + start_pad, align)) 473 472 || !IS_ALIGNED(offset, PAGE_SIZE)) { 474 473 dev_err(&nd_pfn->dev, 475 474 "bad offset: %#llx dax disabled align: %#lx\n", ··· 583 582 return altmap; 584 583 } 585 584 585 + static u64 phys_pmem_align_down(struct nd_pfn *nd_pfn, u64 phys) 586 + { 587 + return min_t(u64, PHYS_SECTION_ALIGN_DOWN(phys), 588 + ALIGN_DOWN(phys, nd_pfn->align)); 589 + } 590 + 586 591 static int nd_pfn_init(struct nd_pfn *nd_pfn) 587 592 { 588 593 u32 dax_label_reserve = is_nd_dax(&nd_pfn->dev) ? SZ_128K : 0; ··· 644 637 start = nsio->res.start; 645 638 size = PHYS_SECTION_ALIGN_UP(start + size) - start; 646 639 if (region_intersects(start, size, IORESOURCE_SYSTEM_RAM, 647 - IORES_DESC_NONE) == REGION_MIXED) { 640 + IORES_DESC_NONE) == REGION_MIXED 641 + || !IS_ALIGNED(start + resource_size(&nsio->res), 642 + nd_pfn->align)) { 648 643 size = resource_size(&nsio->res); 649 - end_trunc = start + size - PHYS_SECTION_ALIGN_DOWN(start + size); 644 + end_trunc = start + size - phys_pmem_align_down(nd_pfn, 645 + start + size); 650 646 } 651 647 652 648 if (start_pad + end_trunc) 653 - dev_info(&nd_pfn->dev, "%s section collision, truncate %d bytes\n", 649 + dev_info(&nd_pfn->dev, "%s alignment collision, truncate %d bytes\n", 654 650 dev_name(&ndns->dev), start_pad + end_trunc); 655 651 656 652 /*

Configure Feed

Configure Feed