Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

fs/ntfs3: allow readdir() to finish after directory mutations without rewinddir()

This patch introduces a per-directory version counter that increments on
each directory modification (indx_insert_entry() / indx_delete_entry()).
ntfs_readdir() uses this version to detect whether the directory has
changed since enumeration began. If readdir() reaches end-of-directory
but the version has changed, the walk restarts from the beginning of the
index tree instead of returning prematurely. This provides rmdir-like
behavior for tools that remove entries as they enumerate them.

Prior to this change, bonnie++ directory operations could fail due to
premature termination of readdir() during concurrent index updates.
With this patch applied, bonnie++ completes successfully with no errors.

Signed-off-by: Konstantin Komarov <almaz.alexandrovich@paragon-software.com>

+76 -29
+73 -29
fs/ntfs3/dir.c
··· 392 392 * ntfs_readdir - file_operations::iterate_shared 393 393 * 394 394 * Use non sorted enumeration. 395 - * We have an example of broken volume where sorted enumeration 396 - * counts each name twice. 395 + * Sorted enumeration may result infinite loop if names tree contains loop. 397 396 */ 398 397 static int ntfs_readdir(struct file *file, struct dir_context *ctx) 399 398 { 400 399 const struct INDEX_ROOT *root; 401 - u64 vbo; 402 400 size_t bit; 403 - loff_t eod; 404 401 int err = 0; 405 402 struct inode *dir = file_inode(file); 406 403 struct ntfs_inode *ni = ntfs_i(dir); 407 404 struct super_block *sb = dir->i_sb; 408 405 struct ntfs_sb_info *sbi = sb->s_fs_info; 409 406 loff_t i_size = i_size_read(dir); 410 - u32 pos = ctx->pos; 407 + u64 pos = ctx->pos; 411 408 u8 *name = NULL; 412 409 struct indx_node *node = NULL; 413 410 u8 index_bits = ni->dir.index_bits; 411 + size_t max_bit = i_size >> ni->dir.index_bits; 412 + loff_t eod = i_size + sbi->record_size; 414 413 415 414 /* Name is a buffer of PATH_MAX length. */ 416 415 static_assert(NTFS_NAME_LEN * 4 < PATH_MAX); 417 416 418 - eod = i_size + sbi->record_size; 417 + if (!pos) { 418 + /* 419 + * ni->dir.version increments each directory change. 420 + * Save the initial value of ni->dir.version. 421 + */ 422 + file->private_data = (void *)ni->dir.version; 423 + } 419 424 420 - if (pos >= eod) 421 - return 0; 425 + if (pos >= eod) { 426 + if (file->private_data == (void *)ni->dir.version) { 427 + /* No changes since first readdir. */ 428 + return 0; 429 + } 430 + 431 + /* 432 + * Handle directories that changed after the initial readdir(). 433 + * 434 + * Some user space code implements recursive removal like this instead 435 + * of calling rmdir(2) directly: 436 + * 437 + * fd = opendir(path); 438 + * while ((dent = readdir(fd))) 439 + * unlinkat(dirfd(fd), dent->d_name, 0); 440 + * closedir(fd); 441 + * 442 + * POSIX leaves unspecified what readdir() should return once the 443 + * directory has been modified after opendir()/rewinddir(), so this 444 + * pattern is not guaranteed to work on all filesystems or platforms. 445 + * 446 + * In ntfs3 the internal name tree may be reshaped while entries are 447 + * being removed, so there is no stable anchor for continuing a 448 + * single-pass walk based on the original readdir() order. 449 + * 450 + * In practice some widely used tools (for example certain rm(1) 451 + * implementations) have used this readdir()/unlink() loop, and some 452 + * filesystems behave in a way that effectively makes it work in the 453 + * common case. 454 + * 455 + * The code below follows that practice and tries to provide 456 + * "rmdir-like" behaviour for such callers on ntfs3, even though the 457 + * situation is not strictly defined by the APIs. 458 + * 459 + * Apple documents the same readdir()/unlink() issue and a workaround 460 + * for HFS file systems in: 461 + * https://web.archive.org/web/20220122122948/https:/support.apple.com/kb/TA21420?locale=en_US 462 + */ 463 + ctx->pos = pos = 3; 464 + file->private_data = (void *)ni->dir.version; 465 + } 422 466 423 467 if (!dir_emit_dots(file, ctx)) 424 468 return 0; ··· 498 454 if (pos >= sbi->record_size) { 499 455 bit = (pos - sbi->record_size) >> index_bits; 500 456 } else { 457 + /* 458 + * Add each name from root in 'ctx'. 459 + */ 501 460 err = ntfs_read_hdr(sbi, ni, &root->ihdr, 0, pos, name, ctx); 502 461 if (err) 503 462 goto out; 504 463 bit = 0; 505 464 } 506 465 507 - if (!i_size) { 508 - ctx->pos = eod; 509 - goto out; 510 - } 511 - 512 - for (;;) { 513 - vbo = (u64)bit << index_bits; 514 - if (vbo >= i_size) { 515 - ctx->pos = eod; 516 - goto out; 517 - } 518 - 466 + /* 467 + * Enumerate indexes until the end of dir. 468 + */ 469 + for (; bit < max_bit; bit += 1) { 470 + /* Get the next used index. */ 519 471 err = indx_used_bit(&ni->dir, ni, &bit); 520 472 if (err) 521 473 goto out; 522 474 523 475 if (bit == MINUS_ONE_T) { 524 - ctx->pos = eod; 525 - goto out; 476 + /* no more used indexes. end of dir. */ 477 + break; 526 478 } 527 479 528 - vbo = (u64)bit << index_bits; 529 - if (vbo >= i_size) { 480 + if (bit >= max_bit) { 481 + /* Corrupted directory. */ 530 482 err = -EINVAL; 531 483 goto out; 532 484 } ··· 532 492 if (err) 533 493 goto out; 534 494 495 + /* 496 + * Add each name from index in 'ctx'. 497 + */ 535 498 err = ntfs_read_hdr(sbi, ni, &node->index->ihdr, 536 - vbo + sbi->record_size, pos, name, ctx); 499 + ((u64)bit << index_bits) + sbi->record_size, 500 + pos, name, ctx); 537 501 if (err) 538 502 goto out; 539 - 540 - bit += 1; 541 503 } 542 504 543 505 out: 544 - 545 506 __putname(name); 546 507 put_indx_node(node); 547 508 548 - if (err == 1) { 509 + if (!err) { 510 + /* End of directory. */ 511 + ctx->pos = eod; 512 + } else if (err == 1) { 549 513 /* 'ctx' is full. */ 550 514 err = 0; 551 515 } else if (err == -ENOENT) {
+2
fs/ntfs3/index.c
··· 2002 2002 fnd->level - 1, fnd); 2003 2003 } 2004 2004 2005 + indx->version += 1; 2005 2006 out: 2006 2007 fnd_put(fnd_a); 2007 2008 out1: ··· 2650 2649 mi->dirty = true; 2651 2650 } 2652 2651 2652 + indx->version += 1; 2653 2653 out: 2654 2654 fnd_put(fnd2); 2655 2655 out1:
+1
fs/ntfs3/ntfs_fs.h
··· 191 191 struct runs_tree alloc_run; 192 192 /* read/write access to 'bitmap_run'/'alloc_run' while ntfs_readdir */ 193 193 struct rw_semaphore run_lock; 194 + size_t version; /* increment each change */ 194 195 195 196 /*TODO: Remove 'cmp'. */ 196 197 NTFS_CMP_FUNC cmp;