Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

Merge tag 'attr-leaf-freemap-fixes-7.0_2026-01-25' of https://git.kernel.org/pub/scm/linux/kernel/git/djwong/xfs-linux into xfs-7.0-merge

xfs: fix problems in the attr leaf freemap code [1/3]

Running generic/753 for hours revealed data corruption problems in the
attr leaf block space management code. Under certain circumstances,
freemap entries are left with zero size but a nonzero offset. If that
offset happens to be the same offset as the end of the entries array
during an attr set operation, the leaf entry table expansion will push
the freemap record offset upwards without checking for overlap with any
other freemap entries. If there happened to be a second freemap entry
overlapping with the newly allocated leaf entry space, then the next
attr set operation might find that space and overwrite the leaf entry,
thereby corrupting the leaf block.

Fix this by zeroing the freemap offset any time we set the size to zero.
If a subsequent attr set operation finds no space in the freemap, it
will compact the block and regenerate the freemaps.

With a bit of luck, this should all go splendidly.

Signed-off-by: "Darrick J. Wong" <djwong@kernel.org>
Signed-off-by: Carlos Maiolino <cem@kernel.org>

+155 -63
+123 -34
fs/xfs/libxfs/xfs_attr_leaf.c
··· 75 75 int move_count); 76 76 STATIC int xfs_attr_leaf_entsize(xfs_attr_leafblock_t *leaf, int index); 77 77 78 + /* Compute the byte offset of the end of the leaf entry array. */ 79 + static inline int 80 + xfs_attr_leaf_entries_end( 81 + unsigned int hdrcount, 82 + const struct xfs_attr_leafblock *leaf) 83 + { 84 + return hdrcount * sizeof(struct xfs_attr_leaf_entry) + 85 + xfs_attr3_leaf_hdr_size(leaf); 86 + } 87 + 88 + static inline bool 89 + ichdr_freemaps_overlap( 90 + const struct xfs_attr3_icleaf_hdr *ichdr, 91 + unsigned int x, 92 + unsigned int y) 93 + { 94 + const unsigned int xend = 95 + ichdr->freemap[x].base + ichdr->freemap[x].size; 96 + const unsigned int yend = 97 + ichdr->freemap[y].base + ichdr->freemap[y].size; 98 + 99 + /* empty slots do not overlap */ 100 + if (!ichdr->freemap[x].size || !ichdr->freemap[y].size) 101 + return false; 102 + 103 + return ichdr->freemap[x].base < yend && xend > ichdr->freemap[y].base; 104 + } 105 + 106 + static inline xfs_failaddr_t 107 + xfs_attr_leaf_ichdr_freemaps_verify( 108 + const struct xfs_attr3_icleaf_hdr *ichdr, 109 + const struct xfs_attr_leafblock *leaf) 110 + { 111 + unsigned int entries_end = 112 + xfs_attr_leaf_entries_end(ichdr->count, leaf); 113 + int i; 114 + 115 + if (ichdr_freemaps_overlap(ichdr, 0, 1)) 116 + return __this_address; 117 + if (ichdr_freemaps_overlap(ichdr, 0, 2)) 118 + return __this_address; 119 + if (ichdr_freemaps_overlap(ichdr, 1, 2)) 120 + return __this_address; 121 + 122 + for (i = 0; i < XFS_ATTR_LEAF_MAPSIZE; i++) { 123 + if (ichdr->freemap[i].size > 0 && 124 + ichdr->freemap[i].base < entries_end) 125 + return __this_address; 126 + } 127 + 128 + return NULL; 129 + } 130 + 78 131 /* 79 132 * attr3 block 'firstused' conversion helpers. 80 133 * ··· 271 218 hdr3->freemap[i].base = cpu_to_be16(from->freemap[i].base); 272 219 hdr3->freemap[i].size = cpu_to_be16(from->freemap[i].size); 273 220 } 221 + 222 + ASSERT(xfs_attr_leaf_ichdr_freemaps_verify(from, to) == NULL); 274 223 return; 275 224 } 276 225 to->hdr.info.forw = cpu_to_be32(from->forw); ··· 288 233 to->hdr.freemap[i].base = cpu_to_be16(from->freemap[i].base); 289 234 to->hdr.freemap[i].size = cpu_to_be16(from->freemap[i].size); 290 235 } 236 + 237 + ASSERT(xfs_attr_leaf_ichdr_freemaps_verify(from, to) == NULL); 291 238 } 292 239 293 240 static xfs_failaddr_t ··· 441 384 if (end > mp->m_attr_geo->blksize) 442 385 return __this_address; 443 386 } 387 + 388 + fa = xfs_attr_leaf_ichdr_freemaps_verify(&ichdr, leaf); 389 + if (fa) 390 + return fa; 444 391 445 392 return NULL; 446 393 } ··· 1470 1409 * Search through freemap for first-fit on new name length. 1471 1410 * (may need to figure in size of entry struct too) 1472 1411 */ 1473 - tablesize = (ichdr.count + 1) * sizeof(xfs_attr_leaf_entry_t) 1474 - + xfs_attr3_leaf_hdr_size(leaf); 1412 + tablesize = xfs_attr_leaf_entries_end(ichdr.count + 1, leaf); 1475 1413 for (sum = 0, i = XFS_ATTR_LEAF_MAPSIZE - 1; i >= 0; i--) { 1476 1414 if (tablesize > ichdr.firstused) { 1477 1415 sum += ichdr.freemap[i].size; ··· 1536 1476 struct xfs_attr_leaf_name_local *name_loc; 1537 1477 struct xfs_attr_leaf_name_remote *name_rmt; 1538 1478 struct xfs_mount *mp; 1479 + int old_end, new_end; 1539 1480 int tmp; 1540 1481 int i; 1541 1482 ··· 1629 1568 if (be16_to_cpu(entry->nameidx) < ichdr->firstused) 1630 1569 ichdr->firstused = be16_to_cpu(entry->nameidx); 1631 1570 1632 - ASSERT(ichdr->firstused >= ichdr->count * sizeof(xfs_attr_leaf_entry_t) 1633 - + xfs_attr3_leaf_hdr_size(leaf)); 1634 - tmp = (ichdr->count - 1) * sizeof(xfs_attr_leaf_entry_t) 1635 - + xfs_attr3_leaf_hdr_size(leaf); 1571 + new_end = xfs_attr_leaf_entries_end(ichdr->count, leaf); 1572 + old_end = new_end - sizeof(struct xfs_attr_leaf_entry); 1573 + 1574 + ASSERT(ichdr->firstused >= new_end); 1636 1575 1637 1576 for (i = 0; i < XFS_ATTR_LEAF_MAPSIZE; i++) { 1638 - if (ichdr->freemap[i].base == tmp) { 1639 - ichdr->freemap[i].base += sizeof(xfs_attr_leaf_entry_t); 1577 + int diff = 0; 1578 + 1579 + if (ichdr->freemap[i].base == old_end) { 1580 + /* 1581 + * This freemap entry starts at the old end of the 1582 + * leaf entry array, so we need to adjust its base 1583 + * upward to accomodate the larger array. 1584 + */ 1585 + diff = sizeof(struct xfs_attr_leaf_entry); 1586 + } else if (ichdr->freemap[i].size > 0 && 1587 + ichdr->freemap[i].base < new_end) { 1588 + /* 1589 + * This freemap entry starts in the space claimed by 1590 + * the new leaf entry. Adjust its base upward to 1591 + * reflect that. 1592 + */ 1593 + diff = new_end - ichdr->freemap[i].base; 1594 + } 1595 + 1596 + if (diff) { 1597 + ichdr->freemap[i].base += diff; 1640 1598 ichdr->freemap[i].size -= 1641 - min_t(uint16_t, ichdr->freemap[i].size, 1642 - sizeof(xfs_attr_leaf_entry_t)); 1599 + min_t(uint16_t, ichdr->freemap[i].size, diff); 1600 + } 1601 + 1602 + /* 1603 + * Don't leave zero-length freemaps with nonzero base lying 1604 + * around, because we don't want the code in _remove that 1605 + * matches on base address to get confused and create 1606 + * overlapping freemaps. If we end up with no freemap entries 1607 + * then the next _add will compact the leaf block and 1608 + * regenerate the freemaps. 1609 + */ 1610 + if (ichdr->freemap[i].size == 0 && ichdr->freemap[i].base > 0) { 1611 + ichdr->freemap[i].base = 0; 1612 + ichdr->holes = 1; 1643 1613 } 1644 1614 } 1645 1615 ichdr->usedbytes += xfs_attr_leaf_entsize(leaf, args->index); ··· 1715 1623 ichdr_dst->freemap[0].base = xfs_attr3_leaf_hdr_size(leaf_src); 1716 1624 ichdr_dst->freemap[0].size = ichdr_dst->firstused - 1717 1625 ichdr_dst->freemap[0].base; 1626 + ichdr_dst->freemap[1].base = 0; 1627 + ichdr_dst->freemap[2].base = 0; 1628 + ichdr_dst->freemap[1].size = 0; 1629 + ichdr_dst->freemap[2].size = 0; 1718 1630 1719 1631 /* write the header back to initialise the underlying buffer */ 1720 1632 xfs_attr3_leaf_hdr_to_disk(args->geo, leaf_dst, ichdr_dst); ··· 1870 1774 /* 1871 1775 * leaf2 is the destination, compact it if it looks tight. 1872 1776 */ 1873 - max = ichdr2.firstused - xfs_attr3_leaf_hdr_size(leaf1); 1874 - max -= ichdr2.count * sizeof(xfs_attr_leaf_entry_t); 1777 + max = ichdr2.firstused - 1778 + xfs_attr_leaf_entries_end(ichdr2.count, leaf1); 1875 1779 if (space > max) 1876 1780 xfs_attr3_leaf_compact(args, &ichdr2, blk2->bp); 1877 1781 ··· 1899 1803 /* 1900 1804 * leaf1 is the destination, compact it if it looks tight. 1901 1805 */ 1902 - max = ichdr1.firstused - xfs_attr3_leaf_hdr_size(leaf1); 1903 - max -= ichdr1.count * sizeof(xfs_attr_leaf_entry_t); 1806 + max = ichdr1.firstused - 1807 + xfs_attr_leaf_entries_end(ichdr1.count, leaf1); 1904 1808 if (space > max) 1905 1809 xfs_attr3_leaf_compact(args, &ichdr1, blk1->bp); 1906 1810 ··· 2106 2010 blk = &state->path.blk[ state->path.active-1 ]; 2107 2011 leaf = blk->bp->b_addr; 2108 2012 xfs_attr3_leaf_hdr_from_disk(state->args->geo, &ichdr, leaf); 2109 - bytes = xfs_attr3_leaf_hdr_size(leaf) + 2110 - ichdr.count * sizeof(xfs_attr_leaf_entry_t) + 2111 - ichdr.usedbytes; 2013 + bytes = xfs_attr_leaf_entries_end(ichdr.count, leaf) + ichdr.usedbytes; 2112 2014 if (bytes > (state->args->geo->blksize >> 1)) { 2113 2015 *action = 0; /* blk over 50%, don't try to join */ 2114 2016 return 0; ··· 2164 2070 bytes = state->args->geo->blksize - 2165 2071 (state->args->geo->blksize >> 2) - 2166 2072 ichdr.usedbytes - ichdr2.usedbytes - 2167 - ((ichdr.count + ichdr2.count) * 2168 - sizeof(xfs_attr_leaf_entry_t)) - 2169 - xfs_attr3_leaf_hdr_size(leaf); 2073 + xfs_attr_leaf_entries_end(ichdr.count + ichdr2.count, 2074 + leaf); 2170 2075 2171 2076 xfs_trans_brelse(state->args->trans, bp); 2172 2077 if (bytes >= 0) ··· 2227 2134 2228 2135 ASSERT(ichdr.count > 0 && ichdr.count < args->geo->blksize / 8); 2229 2136 ASSERT(args->index >= 0 && args->index < ichdr.count); 2230 - ASSERT(ichdr.firstused >= ichdr.count * sizeof(*entry) + 2231 - xfs_attr3_leaf_hdr_size(leaf)); 2137 + ASSERT(ichdr.firstused >= xfs_attr_leaf_entries_end(ichdr.count, leaf)); 2232 2138 2233 2139 entry = &xfs_attr3_leaf_entryp(leaf)[args->index]; 2234 2140 ··· 2240 2148 * find smallest free region in case we need to replace it, 2241 2149 * adjust any map that borders the entry table, 2242 2150 */ 2243 - tablesize = ichdr.count * sizeof(xfs_attr_leaf_entry_t) 2244 - + xfs_attr3_leaf_hdr_size(leaf); 2151 + tablesize = xfs_attr_leaf_entries_end(ichdr.count, leaf); 2245 2152 tmp = ichdr.freemap[0].size; 2246 2153 before = after = -1; 2247 2154 smallest = XFS_ATTR_LEAF_MAPSIZE - 1; ··· 2347 2256 * Check if leaf is less than 50% full, caller may want to 2348 2257 * "join" the leaf with a sibling if so. 2349 2258 */ 2350 - tmp = ichdr.usedbytes + xfs_attr3_leaf_hdr_size(leaf) + 2351 - ichdr.count * sizeof(xfs_attr_leaf_entry_t); 2259 + tmp = ichdr.usedbytes + xfs_attr_leaf_entries_end(ichdr.count, leaf); 2352 2260 2353 2261 return tmp < args->geo->magicpct; /* leaf is < 37% full */ 2354 2262 } ··· 2670 2580 ichdr_s->magic == XFS_ATTR3_LEAF_MAGIC); 2671 2581 ASSERT(ichdr_s->magic == ichdr_d->magic); 2672 2582 ASSERT(ichdr_s->count > 0 && ichdr_s->count < args->geo->blksize / 8); 2673 - ASSERT(ichdr_s->firstused >= (ichdr_s->count * sizeof(*entry_s)) 2674 - + xfs_attr3_leaf_hdr_size(leaf_s)); 2583 + ASSERT(ichdr_s->firstused >= 2584 + xfs_attr_leaf_entries_end(ichdr_s->count, leaf_s)); 2675 2585 ASSERT(ichdr_d->count < args->geo->blksize / 8); 2676 - ASSERT(ichdr_d->firstused >= (ichdr_d->count * sizeof(*entry_d)) 2677 - + xfs_attr3_leaf_hdr_size(leaf_d)); 2586 + ASSERT(ichdr_d->firstused >= 2587 + xfs_attr_leaf_entries_end(ichdr_d->count, leaf_d)); 2678 2588 2679 2589 ASSERT(start_s < ichdr_s->count); 2680 2590 ASSERT(start_d <= ichdr_d->count); ··· 2734 2644 ichdr_d->usedbytes += tmp; 2735 2645 ichdr_s->count -= 1; 2736 2646 ichdr_d->count += 1; 2737 - tmp = ichdr_d->count * sizeof(xfs_attr_leaf_entry_t) 2738 - + xfs_attr3_leaf_hdr_size(leaf_d); 2647 + tmp = xfs_attr_leaf_entries_end(ichdr_d->count, leaf_d); 2739 2648 ASSERT(ichdr_d->firstused >= tmp); 2740 2649 #ifdef GROT 2741 2650 } ··· 2770 2681 /* 2771 2682 * Fill in the freemap information 2772 2683 */ 2773 - ichdr_d->freemap[0].base = xfs_attr3_leaf_hdr_size(leaf_d); 2774 - ichdr_d->freemap[0].base += ichdr_d->count * sizeof(xfs_attr_leaf_entry_t); 2684 + ichdr_d->freemap[0].base = 2685 + xfs_attr_leaf_entries_end(ichdr_d->count, leaf_d); 2775 2686 ichdr_d->freemap[0].size = ichdr_d->firstused - ichdr_d->freemap[0].base; 2776 2687 ichdr_d->freemap[1].base = 0; 2777 2688 ichdr_d->freemap[2].base = 0;
+1 -1
fs/xfs/libxfs/xfs_da_format.h
··· 746 746 #define XFS_ATTR_LEAF_NAME_ALIGN ((uint)sizeof(xfs_dablk_t)) 747 747 748 748 static inline int 749 - xfs_attr3_leaf_hdr_size(struct xfs_attr_leafblock *leafp) 749 + xfs_attr3_leaf_hdr_size(const struct xfs_attr_leafblock *leafp) 750 750 { 751 751 if (leafp->hdr.info.magic == cpu_to_be16(XFS_ATTR3_LEAF_MAGIC)) 752 752 return sizeof(struct xfs_attr3_leaf_hdr);
+31 -28
fs/xfs/scrub/attr.c
··· 288 288 } 289 289 290 290 /* 291 - * Check the leaf freemap from the usage bitmap. Returns false if the 292 - * attr freemap has problems or points to used space. 293 - */ 294 - STATIC bool 295 - xchk_xattr_check_freemap( 296 - struct xfs_scrub *sc, 297 - struct xfs_attr3_icleaf_hdr *leafhdr) 298 - { 299 - struct xchk_xattr_buf *ab = sc->buf; 300 - unsigned int mapsize = sc->mp->m_attr_geo->blksize; 301 - int i; 302 - 303 - /* Construct bitmap of freemap contents. */ 304 - bitmap_zero(ab->freemap, mapsize); 305 - for (i = 0; i < XFS_ATTR_LEAF_MAPSIZE; i++) { 306 - if (!xchk_xattr_set_map(sc, ab->freemap, 307 - leafhdr->freemap[i].base, 308 - leafhdr->freemap[i].size)) 309 - return false; 310 - } 311 - 312 - /* Look for bits that are set in freemap and are marked in use. */ 313 - return !bitmap_intersects(ab->freemap, ab->usedmap, mapsize); 314 - } 315 - 316 - /* 317 291 * Check this leaf entry's relations to everything else. 318 292 * Returns the number of bytes used for the name/value data. 319 293 */ ··· 338 364 rentry = xfs_attr3_leaf_name_remote(leaf, idx); 339 365 namesize = xfs_attr_leaf_entsize_remote(rentry->namelen); 340 366 name_end = (char *)rentry + namesize; 341 - if (rentry->namelen == 0 || rentry->valueblk == 0) 367 + if (rentry->namelen == 0) 368 + xchk_da_set_corrupt(ds, level); 369 + if (rentry->valueblk == 0 && 370 + !(ent->flags & XFS_ATTR_INCOMPLETE)) 342 371 xchk_da_set_corrupt(ds, level); 343 372 } 344 373 if (name_end > buf_end) ··· 380 403 381 404 *last_checked = blk->blkno; 382 405 bitmap_zero(ab->usedmap, mp->m_attr_geo->blksize); 406 + bitmap_zero(ab->freemap, mp->m_attr_geo->blksize); 383 407 384 408 /* Check all the padding. */ 385 409 if (xfs_has_crc(ds->sc->mp)) { ··· 427 449 if ((char *)&entries[leafhdr.count] > (char *)leaf + leafhdr.firstused) 428 450 xchk_da_set_corrupt(ds, level); 429 451 452 + if (ds->sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT) 453 + goto out; 454 + 430 455 buf_end = (char *)bp->b_addr + mp->m_attr_geo->blksize; 431 456 for (i = 0, ent = entries; i < leafhdr.count; ent++, i++) { 432 457 /* Mark the leaf entry itself. */ ··· 448 467 goto out; 449 468 } 450 469 451 - if (!xchk_xattr_check_freemap(ds->sc, &leafhdr)) 470 + /* Construct bitmap of freemap contents. */ 471 + for (i = 0; i < XFS_ATTR_LEAF_MAPSIZE; i++) { 472 + if (!xchk_xattr_set_map(ds->sc, ab->freemap, 473 + leafhdr.freemap[i].base, 474 + leafhdr.freemap[i].size)) 475 + xchk_da_set_corrupt(ds, level); 476 + 477 + /* 478 + * freemap entries with zero length and nonzero base can cause 479 + * problems with older kernels, so we mark these for preening 480 + * even though there's no inconsistency. 481 + */ 482 + if (leafhdr.freemap[i].size == 0 && 483 + leafhdr.freemap[i].base > 0) 484 + xchk_da_set_preen(ds, level); 485 + 486 + if (ds->sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT) 487 + goto out; 488 + } 489 + 490 + /* Look for bits that are set in freemap and are marked in use. */ 491 + if (bitmap_intersects(ab->freemap, ab->usedmap, 492 + mp->m_attr_geo->blksize)) 452 493 xchk_da_set_corrupt(ds, level); 453 494 454 495 if (leafhdr.usedbytes != usedbytes)