Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

Merge branch 'splice' of git://brick.kernel.dk/data/git/linux-2.6-block

* 'splice' of git://brick.kernel.dk/data/git/linux-2.6-block:
[PATCH] splice: fix page stealing LRU handling.
[PATCH] splice: page stealing needs to wait_on_page_writeback()
[PATCH] splice: export generic_splice_sendpage
[PATCH] splice: add a SPLICE_F_MORE flag
[PATCH] splice: add comments documenting more of the code
[PATCH] splice: improve writeback and clean up page stealing
[PATCH] splice: fix shadow[] filling logic

+147 -42
+3 -1
fs/pipe.c
··· 95 95 { 96 96 struct page *page = buf->page; 97 97 98 + buf->flags &= ~PIPE_BUF_FLAG_STOLEN; 99 + 98 100 /* 99 101 * If nobody else uses this page, and we don't already have a 100 102 * temporary page, let's keep track of it as a one-deep ··· 126 124 static int anon_pipe_buf_steal(struct pipe_inode_info *info, 127 125 struct pipe_buffer *buf) 128 126 { 129 - buf->stolen = 1; 127 + buf->flags |= PIPE_BUF_FLAG_STOLEN; 130 128 return 0; 131 129 } 132 130
+139 -40
fs/splice.c
··· 22 22 #include <linux/pipe_fs_i.h> 23 23 #include <linux/mm_inline.h> 24 24 #include <linux/swap.h> 25 + #include <linux/writeback.h> 26 + #include <linux/buffer_head.h> 25 27 #include <linux/module.h> 28 + #include <linux/syscalls.h> 26 29 27 30 /* 28 31 * Passed to the actors ··· 37 34 loff_t pos; /* file position */ 38 35 }; 39 36 37 + /* 38 + * Attempt to steal a page from a pipe buffer. This should perhaps go into 39 + * a vm helper function, it's already simplified quite a bit by the 40 + * addition of remove_mapping(). If success is returned, the caller may 41 + * attempt to reuse this page for another destination. 42 + */ 40 43 static int page_cache_pipe_buf_steal(struct pipe_inode_info *info, 41 44 struct pipe_buffer *buf) 42 45 { 43 46 struct page *page = buf->page; 47 + struct address_space *mapping = page_mapping(page); 44 48 45 49 WARN_ON(!PageLocked(page)); 46 50 WARN_ON(!PageUptodate(page)); 47 51 48 - if (!remove_mapping(page_mapping(page), page)) 52 + /* 53 + * At least for ext2 with nobh option, we need to wait on writeback 54 + * completing on this page, since we'll remove it from the pagecache. 55 + * Otherwise truncate wont wait on the page, allowing the disk 56 + * blocks to be reused by someone else before we actually wrote our 57 + * data to them. fs corruption ensues. 58 + */ 59 + wait_on_page_writeback(page); 60 + 61 + if (PagePrivate(page)) 62 + try_to_release_page(page, mapping_gfp_mask(mapping)); 63 + 64 + if (!remove_mapping(mapping, page)) 49 65 return 1; 50 66 51 - if (PageLRU(page)) { 52 - struct zone *zone = page_zone(page); 53 - 54 - spin_lock_irq(&zone->lru_lock); 55 - BUG_ON(!PageLRU(page)); 56 - __ClearPageLRU(page); 57 - del_page_from_lru(zone, page); 58 - spin_unlock_irq(&zone->lru_lock); 59 - } 60 - 61 - buf->stolen = 1; 67 + buf->flags |= PIPE_BUF_FLAG_STOLEN | PIPE_BUF_FLAG_LRU; 62 68 return 0; 63 69 } 64 70 ··· 76 64 { 77 65 page_cache_release(buf->page); 78 66 buf->page = NULL; 79 - buf->stolen = 0; 67 + buf->flags &= ~(PIPE_BUF_FLAG_STOLEN | PIPE_BUF_FLAG_LRU); 80 68 } 81 69 82 70 static void *page_cache_pipe_buf_map(struct file *file, ··· 103 91 static void page_cache_pipe_buf_unmap(struct pipe_inode_info *info, 104 92 struct pipe_buffer *buf) 105 93 { 106 - if (!buf->stolen) 107 - unlock_page(buf->page); 94 + unlock_page(buf->page); 108 95 kunmap(buf->page); 109 96 } 110 97 ··· 115 104 .steal = page_cache_pipe_buf_steal, 116 105 }; 117 106 107 + /* 108 + * Pipe output worker. This sets up our pipe format with the page cache 109 + * pipe buffer operations. Otherwise very similar to the regular pipe_writev(). 110 + */ 118 111 static ssize_t move_to_pipe(struct inode *inode, struct page **pages, 119 112 int nr_pages, unsigned long offset, 120 113 unsigned long len, unsigned int flags) ··· 252 237 * fill shadow[] with pages at the right locations, so we only 253 238 * have to fill holes 254 239 */ 255 - memset(shadow, 0, i * sizeof(struct page *)); 256 - for (j = 0, pidx = index; j < i; pidx++, j++) 257 - shadow[pages[j]->index - pidx] = pages[j]; 240 + memset(shadow, 0, nr_pages * sizeof(struct page *)); 241 + for (j = 0; j < i; j++) 242 + shadow[pages[j]->index - index] = pages[j]; 258 243 259 244 /* 260 245 * now fill in the holes ··· 303 288 return move_to_pipe(pipe, pages, i, offset, len, flags); 304 289 } 305 290 291 + /** 292 + * generic_file_splice_read - splice data from file to a pipe 293 + * @in: file to splice from 294 + * @pipe: pipe to splice to 295 + * @len: number of bytes to splice 296 + * @flags: splice modifier flags 297 + * 298 + * Will read pages from given file and fill them into a pipe. 299 + * 300 + */ 306 301 ssize_t generic_file_splice_read(struct file *in, struct inode *pipe, 307 302 size_t len, unsigned int flags) 308 303 { ··· 343 318 return ret; 344 319 } 345 320 321 + EXPORT_SYMBOL(generic_file_splice_read); 322 + 346 323 /* 347 - * Send 'len' bytes to socket from 'file' at position 'pos' using sendpage(). 324 + * Send 'sd->len' bytes to socket from 'sd->file' at position 'sd->pos' 325 + * using sendpage(). 348 326 */ 349 327 static int pipe_to_sendpage(struct pipe_inode_info *info, 350 328 struct pipe_buffer *buf, struct splice_desc *sd) ··· 357 329 unsigned int offset; 358 330 ssize_t ret; 359 331 void *ptr; 332 + int more; 360 333 361 334 /* 362 335 * sub-optimal, but we are limited by the pipe ->map. we don't ··· 370 341 return PTR_ERR(ptr); 371 342 372 343 offset = pos & ~PAGE_CACHE_MASK; 344 + more = (sd->flags & SPLICE_F_MORE) || sd->len < sd->total_len; 373 345 374 - ret = file->f_op->sendpage(file, buf->page, offset, sd->len, &pos, 375 - sd->len < sd->total_len); 346 + ret = file->f_op->sendpage(file, buf->page, offset, sd->len, &pos,more); 376 347 377 348 buf->ops->unmap(info, buf); 378 349 if (ret == sd->len) ··· 394 365 * - Destination page does not exist, we can add the pipe page to 395 366 * the page cache and avoid the copy. 396 367 * 397 - * For now we just do the slower thing and always copy pages over, it's 398 - * easier than migrating pages from the pipe to the target file. For the 399 - * case of doing file | file splicing, the migrate approach had some LRU 400 - * nastiness... 368 + * If asked to move pages to the output file (SPLICE_F_MOVE is set in 369 + * sd->flags), we attempt to migrate pages from the pipe to the output 370 + * file address space page cache. This is possible if no one else has 371 + * the pipe page referenced outside of the pipe and page cache. If 372 + * SPLICE_F_MOVE isn't set, or we cannot move the page, we simply create 373 + * a new page in the output file page cache and fill/dirty that. 401 374 */ 402 375 static int pipe_to_file(struct pipe_inode_info *info, struct pipe_buffer *buf, 403 376 struct splice_desc *sd) 404 377 { 405 378 struct file *file = sd->file; 406 379 struct address_space *mapping = file->f_mapping; 380 + gfp_t gfp_mask = mapping_gfp_mask(mapping); 407 381 unsigned int offset; 408 382 struct page *page; 409 383 pgoff_t index; ··· 427 395 * reuse buf page, if SPLICE_F_MOVE is set 428 396 */ 429 397 if (sd->flags & SPLICE_F_MOVE) { 398 + /* 399 + * If steal succeeds, buf->page is now pruned from the vm 400 + * side (LRU and page cache) and we can reuse it. 401 + */ 430 402 if (buf->ops->steal(info, buf)) 431 403 goto find_page; 432 404 433 405 page = buf->page; 434 - if (add_to_page_cache_lru(page, mapping, index, 435 - mapping_gfp_mask(mapping))) 406 + if (add_to_page_cache(page, mapping, index, gfp_mask)) 436 407 goto find_page; 408 + 409 + if (!(buf->flags & PIPE_BUF_FLAG_LRU)) 410 + lru_cache_add(page); 437 411 } else { 438 412 find_page: 439 413 ret = -ENOMEM; 440 - page = find_or_create_page(mapping, index, 441 - mapping_gfp_mask(mapping)); 414 + page = find_or_create_page(mapping, index, gfp_mask); 442 415 if (!page) 443 416 goto out; 444 417 ··· 480 443 } 481 444 482 445 ret = mapping->a_ops->prepare_write(file, page, 0, sd->len); 483 - if (ret) 446 + if (ret == AOP_TRUNCATED_PAGE) { 447 + page_cache_release(page); 448 + goto find_page; 449 + } else if (ret) 484 450 goto out; 485 451 486 - if (!buf->stolen) { 452 + if (!(buf->flags & PIPE_BUF_FLAG_STOLEN)) { 487 453 char *dst = kmap_atomic(page, KM_USER0); 488 454 489 455 memcpy(dst + offset, src + buf->offset, sd->len); ··· 495 455 } 496 456 497 457 ret = mapping->a_ops->commit_write(file, page, 0, sd->len); 498 - if (ret < 0) 458 + if (ret == AOP_TRUNCATED_PAGE) { 459 + page_cache_release(page); 460 + goto find_page; 461 + } else if (ret) 499 462 goto out; 500 463 501 - set_page_dirty(page); 502 - ret = write_one_page(page, 0); 464 + balance_dirty_pages_ratelimited(mapping); 503 465 out: 504 - if (ret < 0) 505 - unlock_page(page); 506 - if (!buf->stolen) 466 + if (!(buf->flags & PIPE_BUF_FLAG_STOLEN)) { 507 467 page_cache_release(page); 468 + unlock_page(page); 469 + } 508 470 buf->ops->unmap(info, buf); 509 471 return ret; 510 472 } ··· 514 472 typedef int (splice_actor)(struct pipe_inode_info *, struct pipe_buffer *, 515 473 struct splice_desc *); 516 474 475 + /* 476 + * Pipe input worker. Most of this logic works like a regular pipe, the 477 + * key here is the 'actor' worker passed in that actually moves the data 478 + * to the wanted destination. See pipe_to_file/pipe_to_sendpage above. 479 + */ 517 480 static ssize_t move_from_pipe(struct inode *inode, struct file *out, 518 481 size_t len, unsigned int flags, 519 482 splice_actor *actor) ··· 620 573 621 574 } 622 575 576 + /** 577 + * generic_file_splice_write - splice data from a pipe to a file 578 + * @inode: pipe inode 579 + * @out: file to write to 580 + * @len: number of bytes to splice 581 + * @flags: splice modifier flags 582 + * 583 + * Will either move or copy pages (determined by @flags options) from 584 + * the given pipe inode to the given file. 585 + * 586 + */ 623 587 ssize_t generic_file_splice_write(struct inode *inode, struct file *out, 624 588 size_t len, unsigned int flags) 625 589 { 626 - return move_from_pipe(inode, out, len, flags, pipe_to_file); 590 + struct address_space *mapping = out->f_mapping; 591 + ssize_t ret = move_from_pipe(inode, out, len, flags, pipe_to_file); 592 + 593 + /* 594 + * if file or inode is SYNC and we actually wrote some data, sync it 595 + */ 596 + if (unlikely((out->f_flags & O_SYNC) || IS_SYNC(mapping->host)) 597 + && ret > 0) { 598 + struct inode *inode = mapping->host; 599 + int err; 600 + 601 + mutex_lock(&inode->i_mutex); 602 + err = generic_osync_inode(mapping->host, mapping, 603 + OSYNC_METADATA|OSYNC_DATA); 604 + mutex_unlock(&inode->i_mutex); 605 + 606 + if (err) 607 + ret = err; 608 + } 609 + 610 + return ret; 627 611 } 628 612 613 + EXPORT_SYMBOL(generic_file_splice_write); 614 + 615 + /** 616 + * generic_splice_sendpage - splice data from a pipe to a socket 617 + * @inode: pipe inode 618 + * @out: socket to write to 619 + * @len: number of bytes to splice 620 + * @flags: splice modifier flags 621 + * 622 + * Will send @len bytes from the pipe to a network socket. No data copying 623 + * is involved. 624 + * 625 + */ 629 626 ssize_t generic_splice_sendpage(struct inode *inode, struct file *out, 630 627 size_t len, unsigned int flags) 631 628 { 632 629 return move_from_pipe(inode, out, len, flags, pipe_to_sendpage); 633 630 } 634 631 635 - EXPORT_SYMBOL(generic_file_splice_write); 636 - EXPORT_SYMBOL(generic_file_splice_read); 632 + EXPORT_SYMBOL(generic_splice_sendpage); 637 633 634 + /* 635 + * Attempt to initiate a splice from pipe to file. 636 + */ 638 637 static long do_splice_from(struct inode *pipe, struct file *out, size_t len, 639 638 unsigned int flags) 640 639 { ··· 701 608 return out->f_op->splice_write(pipe, out, len, flags); 702 609 } 703 610 611 + /* 612 + * Attempt to initiate a splice from a file to a pipe. 613 + */ 704 614 static long do_splice_to(struct file *in, struct inode *pipe, size_t len, 705 615 unsigned int flags) 706 616 { ··· 732 636 return in->f_op->splice_read(in, pipe, len, flags); 733 637 } 734 638 639 + /* 640 + * Determine where to splice to/from. 641 + */ 735 642 static long do_splice(struct file *in, struct file *out, size_t len, 736 643 unsigned int flags) 737 644 {
+5 -1
include/linux/pipe_fs_i.h
··· 5 5 6 6 #define PIPE_BUFFERS (16) 7 7 8 + #define PIPE_BUF_FLAG_STOLEN 0x01 9 + #define PIPE_BUF_FLAG_LRU 0x02 10 + 8 11 struct pipe_buffer { 9 12 struct page *page; 10 13 unsigned int offset, len; 11 14 struct pipe_buf_operations *ops; 12 - unsigned int stolen; 15 + unsigned int flags; 13 16 }; 14 17 15 18 struct pipe_buf_operations { ··· 66 63 #define SPLICE_F_NONBLOCK (0x02) /* don't block on the pipe splicing (but */ 67 64 /* we may still block on the fd we splice */ 68 65 /* from/to, of course */ 66 + #define SPLICE_F_MORE (0x04) /* expect more data */ 69 67 70 68 #endif