Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

Merge tag 'vfs-6.15-rc1.pipe' of git://git.kernel.org/pub/scm/linux/kernel/git/vfs/vfs

Pull vfs pipe updates from Christian Brauner:

- Introduce struct file_operations pipeanon_fops

- Don't update {a,c,m}time for anonymous pipes to avoid the performance
costs associated with it

- Change pipe_write() to never add a zero-sized buffer

- Limit the slots in pipe_resize_ring()

- Use pipe_buf() to retrieve the pipe buffer everywhere

- Drop an always true check in anon_pipe_write()

- Cache 2 pages instead of 1

- Avoid spurious calls to prepare_to_wait_event() in ___wait_event()

* tag 'vfs-6.15-rc1.pipe' of git://git.kernel.org/pub/scm/linux/kernel/git/vfs/vfs:
fs/splice: Use pipe_buf() helper to retrieve pipe buffer
fs/pipe: Use pipe_buf() helper to retrieve pipe buffer
kernel/watch_queue: Use pipe_buf() to retrieve the pipe buffer
fs/pipe: Limit the slots in pipe_resize_ring()
wait: avoid spurious calls to prepare_to_wait_event() in ___wait_event()
pipe: cache 2 pages instead of 1
pipe: drop an always true check in anon_pipe_write()
pipe: change pipe_write() to never add a zero-sized buffer
pipe: don't update {a,c,m}time for anonymous pipes
pipe: introduce struct file_operations pipeanon_fops

+128 -113
+107 -82
fs/pipe.c
··· 112 112 pipe_lock(pipe2); 113 113 } 114 114 115 + static struct page *anon_pipe_get_page(struct pipe_inode_info *pipe) 116 + { 117 + for (int i = 0; i < ARRAY_SIZE(pipe->tmp_page); i++) { 118 + if (pipe->tmp_page[i]) { 119 + struct page *page = pipe->tmp_page[i]; 120 + pipe->tmp_page[i] = NULL; 121 + return page; 122 + } 123 + } 124 + 125 + return alloc_page(GFP_HIGHUSER | __GFP_ACCOUNT); 126 + } 127 + 128 + static void anon_pipe_put_page(struct pipe_inode_info *pipe, 129 + struct page *page) 130 + { 131 + if (page_count(page) == 1) { 132 + for (int i = 0; i < ARRAY_SIZE(pipe->tmp_page); i++) { 133 + if (!pipe->tmp_page[i]) { 134 + pipe->tmp_page[i] = page; 135 + return; 136 + } 137 + } 138 + } 139 + 140 + put_page(page); 141 + } 142 + 115 143 static void anon_pipe_buf_release(struct pipe_inode_info *pipe, 116 144 struct pipe_buffer *buf) 117 145 { 118 146 struct page *page = buf->page; 119 147 120 - /* 121 - * If nobody else uses this page, and we don't already have a 122 - * temporary page, let's keep track of it as a one-deep 123 - * allocation cache. (Otherwise just release our reference to it) 124 - */ 125 - if (page_count(page) == 1 && !pipe->tmp_page) 126 - pipe->tmp_page = page; 127 - else 128 - put_page(page); 148 + anon_pipe_put_page(pipe, page); 129 149 } 130 150 131 151 static bool anon_pipe_buf_try_steal(struct pipe_inode_info *pipe, ··· 267 247 } 268 248 269 249 static ssize_t 270 - pipe_read(struct kiocb *iocb, struct iov_iter *to) 250 + anon_pipe_read(struct kiocb *iocb, struct iov_iter *to) 271 251 { 272 252 size_t total_len = iov_iter_count(to); 273 253 struct file *filp = iocb->ki_filp; ··· 294 274 /* Read ->head with a barrier vs post_one_notification() */ 295 275 unsigned int head = smp_load_acquire(&pipe->head); 296 276 unsigned int tail = pipe->tail; 297 - unsigned int mask = pipe->ring_size - 1; 298 277 299 278 #ifdef CONFIG_WATCH_QUEUE 300 279 if (pipe->note_loss) { ··· 320 301 #endif 321 302 322 303 if (!pipe_empty(head, tail)) { 323 - struct pipe_buffer *buf = &pipe->bufs[tail & mask]; 304 + struct pipe_buffer *buf = pipe_buf(pipe, tail); 324 305 size_t chars = buf->len; 325 306 size_t written; 326 307 int error; ··· 378 359 break; 379 360 } 380 361 mutex_unlock(&pipe->mutex); 381 - 382 362 /* 383 363 * We only get here if we didn't actually read anything. 384 364 * 385 - * However, we could have seen (and removed) a zero-sized 386 - * pipe buffer, and might have made space in the buffers 387 - * that way. 388 - * 389 - * You can't make zero-sized pipe buffers by doing an empty 390 - * write (not even in packet mode), but they can happen if 391 - * the writer gets an EFAULT when trying to fill a buffer 392 - * that already got allocated and inserted in the buffer 393 - * array. 394 - * 395 - * So we still need to wake up any pending writers in the 396 - * _very_ unlikely case that the pipe was full, but we got 397 - * no data. 398 - */ 399 - if (unlikely(wake_writer)) 400 - wake_up_interruptible_sync_poll(&pipe->wr_wait, EPOLLOUT | EPOLLWRNORM); 401 - kill_fasync(&pipe->fasync_writers, SIGIO, POLL_OUT); 402 - 403 - /* 404 365 * But because we didn't read anything, at this point we can 405 366 * just return directly with -ERESTARTSYS if we're interrupted, 406 367 * since we've done any required wakeups and there's no need ··· 389 390 if (wait_event_interruptible_exclusive(pipe->rd_wait, pipe_readable(pipe)) < 0) 390 391 return -ERESTARTSYS; 391 392 392 - wake_writer = false; 393 393 wake_next_reader = true; 394 394 mutex_lock(&pipe->mutex); 395 395 } ··· 401 403 if (wake_next_reader) 402 404 wake_up_interruptible_sync_poll(&pipe->rd_wait, EPOLLIN | EPOLLRDNORM); 403 405 kill_fasync(&pipe->fasync_writers, SIGIO, POLL_OUT); 406 + return ret; 407 + } 408 + 409 + static ssize_t 410 + fifo_pipe_read(struct kiocb *iocb, struct iov_iter *to) 411 + { 412 + int ret = anon_pipe_read(iocb, to); 404 413 if (ret > 0) 405 - file_accessed(filp); 414 + file_accessed(iocb->ki_filp); 406 415 return ret; 407 416 } 408 417 ··· 429 424 } 430 425 431 426 static ssize_t 432 - pipe_write(struct kiocb *iocb, struct iov_iter *from) 427 + anon_pipe_write(struct kiocb *iocb, struct iov_iter *from) 433 428 { 434 429 struct file *filp = iocb->ki_filp; 435 430 struct pipe_inode_info *pipe = filp->private_data; ··· 476 471 was_empty = pipe_empty(head, pipe->tail); 477 472 chars = total_len & (PAGE_SIZE-1); 478 473 if (chars && !was_empty) { 479 - unsigned int mask = pipe->ring_size - 1; 480 - struct pipe_buffer *buf = &pipe->bufs[(head - 1) & mask]; 474 + struct pipe_buffer *buf = pipe_buf(pipe, head - 1); 481 475 int offset = buf->offset + buf->len; 482 476 483 477 if ((buf->flags & PIPE_BUF_FLAG_CAN_MERGE) && ··· 507 503 508 504 head = pipe->head; 509 505 if (!pipe_full(head, pipe->tail, pipe->max_usage)) { 510 - unsigned int mask = pipe->ring_size - 1; 511 506 struct pipe_buffer *buf; 512 - struct page *page = pipe->tmp_page; 507 + struct page *page; 513 508 int copied; 514 509 515 - if (!page) { 516 - page = alloc_page(GFP_HIGHUSER | __GFP_ACCOUNT); 517 - if (unlikely(!page)) { 518 - ret = ret ? : -ENOMEM; 519 - break; 520 - } 521 - pipe->tmp_page = page; 510 + page = anon_pipe_get_page(pipe); 511 + if (unlikely(!page)) { 512 + if (!ret) 513 + ret = -ENOMEM; 514 + break; 522 515 } 523 - 524 - /* Allocate a slot in the ring in advance and attach an 525 - * empty buffer. If we fault or otherwise fail to use 526 - * it, either the reader will consume it or it'll still 527 - * be there for the next write. 528 - */ 529 - pipe->head = head + 1; 530 - 531 - /* Insert it into the buffer array */ 532 - buf = &pipe->bufs[head & mask]; 533 - buf->page = page; 534 - buf->ops = &anon_pipe_buf_ops; 535 - buf->offset = 0; 536 - buf->len = 0; 537 - if (is_packetized(filp)) 538 - buf->flags = PIPE_BUF_FLAG_PACKET; 539 - else 540 - buf->flags = PIPE_BUF_FLAG_CAN_MERGE; 541 - pipe->tmp_page = NULL; 542 516 543 517 copied = copy_page_from_iter(page, 0, PAGE_SIZE, from); 544 518 if (unlikely(copied < PAGE_SIZE && iov_iter_count(from))) { 519 + anon_pipe_put_page(pipe, page); 545 520 if (!ret) 546 521 ret = -EFAULT; 547 522 break; 548 523 } 549 - ret += copied; 524 + 525 + pipe->head = head + 1; 526 + /* Insert it into the buffer array */ 527 + buf = pipe_buf(pipe, head); 528 + buf->page = page; 529 + buf->ops = &anon_pipe_buf_ops; 530 + buf->offset = 0; 531 + if (is_packetized(filp)) 532 + buf->flags = PIPE_BUF_FLAG_PACKET; 533 + else 534 + buf->flags = PIPE_BUF_FLAG_CAN_MERGE; 535 + 550 536 buf->len = copied; 537 + ret += copied; 551 538 552 539 if (!iov_iter_count(from)) 553 540 break; 554 - } 555 541 556 - if (!pipe_full(head, pipe->tail, pipe->max_usage)) 557 542 continue; 543 + } 558 544 559 545 /* Wait for buffer space to become available. */ 560 546 if ((filp->f_flags & O_NONBLOCK) || ··· 596 602 kill_fasync(&pipe->fasync_readers, SIGIO, POLL_IN); 597 603 if (wake_next_writer) 598 604 wake_up_interruptible_sync_poll(&pipe->wr_wait, EPOLLOUT | EPOLLWRNORM); 599 - if (ret > 0 && sb_start_write_trylock(file_inode(filp)->i_sb)) { 600 - int err = file_update_time(filp); 601 - if (err) 602 - ret = err; 603 - sb_end_write(file_inode(filp)->i_sb); 605 + return ret; 606 + } 607 + 608 + static ssize_t 609 + fifo_pipe_write(struct kiocb *iocb, struct iov_iter *from) 610 + { 611 + int ret = anon_pipe_write(iocb, from); 612 + if (ret > 0) { 613 + struct file *filp = iocb->ki_filp; 614 + if (sb_start_write_trylock(file_inode(filp)->i_sb)) { 615 + int err = file_update_time(filp); 616 + if (err) 617 + ret = err; 618 + sb_end_write(file_inode(filp)->i_sb); 619 + } 604 620 } 605 621 return ret; 606 622 } ··· 857 853 if (pipe->watch_queue) 858 854 put_watch_queue(pipe->watch_queue); 859 855 #endif 860 - if (pipe->tmp_page) 861 - __free_page(pipe->tmp_page); 856 + for (i = 0; i < ARRAY_SIZE(pipe->tmp_page); i++) { 857 + if (pipe->tmp_page[i]) 858 + __free_page(pipe->tmp_page[i]); 859 + } 862 860 kfree(pipe->bufs); 863 861 kfree(pipe); 864 862 } ··· 880 874 .d_dname = pipefs_dname, 881 875 }; 882 876 877 + static const struct file_operations pipeanon_fops; 878 + 883 879 static struct inode * get_pipe_inode(void) 884 880 { 885 881 struct inode *inode = new_inode_pseudo(pipe_mnt->mnt_sb); ··· 899 891 inode->i_pipe = pipe; 900 892 pipe->files = 2; 901 893 pipe->readers = pipe->writers = 1; 902 - inode->i_fop = &pipefifo_fops; 894 + inode->i_fop = &pipeanon_fops; 903 895 904 896 /* 905 897 * Mark the inode dirty from the very beginning, ··· 942 934 943 935 f = alloc_file_pseudo(inode, pipe_mnt, "", 944 936 O_WRONLY | (flags & (O_NONBLOCK | O_DIRECT)), 945 - &pipefifo_fops); 937 + &pipeanon_fops); 946 938 if (IS_ERR(f)) { 947 939 free_pipe_info(inode->i_pipe); 948 940 iput(inode); ··· 953 945 f->f_pipe = 0; 954 946 955 947 res[0] = alloc_file_clone(f, O_RDONLY | (flags & O_NONBLOCK), 956 - &pipefifo_fops); 948 + &pipeanon_fops); 957 949 if (IS_ERR(res[0])) { 958 950 put_pipe_info(inode, inode->i_pipe); 959 951 fput(f); ··· 1117 1109 1118 1110 static int fifo_open(struct inode *inode, struct file *filp) 1119 1111 { 1112 + bool is_pipe = inode->i_fop == &pipeanon_fops; 1120 1113 struct pipe_inode_info *pipe; 1121 - bool is_pipe = inode->i_sb->s_magic == PIPEFS_MAGIC; 1122 1114 int ret; 1123 1115 1124 1116 filp->f_pipe = 0; ··· 1242 1234 1243 1235 const struct file_operations pipefifo_fops = { 1244 1236 .open = fifo_open, 1245 - .read_iter = pipe_read, 1246 - .write_iter = pipe_write, 1237 + .read_iter = fifo_pipe_read, 1238 + .write_iter = fifo_pipe_write, 1239 + .poll = pipe_poll, 1240 + .unlocked_ioctl = pipe_ioctl, 1241 + .release = pipe_release, 1242 + .fasync = pipe_fasync, 1243 + .splice_write = iter_file_splice_write, 1244 + }; 1245 + 1246 + static const struct file_operations pipeanon_fops = { 1247 + .open = fifo_open, 1248 + .read_iter = anon_pipe_read, 1249 + .write_iter = anon_pipe_write, 1247 1250 .poll = pipe_poll, 1248 1251 .unlocked_ioctl = pipe_ioctl, 1249 1252 .release = pipe_release, ··· 1289 1270 { 1290 1271 struct pipe_buffer *bufs; 1291 1272 unsigned int head, tail, mask, n; 1273 + 1274 + /* nr_slots larger than limits of pipe->{head,tail} */ 1275 + if (unlikely(nr_slots > (pipe_index_t)-1u)) 1276 + return -EINVAL; 1292 1277 1293 1278 bufs = kcalloc(nr_slots, sizeof(*bufs), 1294 1279 GFP_KERNEL_ACCOUNT | __GFP_NOWARN); ··· 1413 1390 { 1414 1391 struct pipe_inode_info *pipe = file->private_data; 1415 1392 1416 - if (file->f_op != &pipefifo_fops || !pipe) 1393 + if (!pipe) 1394 + return NULL; 1395 + if (file->f_op != &pipefifo_fops && file->f_op != &pipeanon_fops) 1417 1396 return NULL; 1418 1397 if (for_splice && pipe_has_watch_queue(pipe)) 1419 1398 return NULL;
+14 -26
fs/splice.c
··· 200 200 unsigned int spd_pages = spd->nr_pages; 201 201 unsigned int tail = pipe->tail; 202 202 unsigned int head = pipe->head; 203 - unsigned int mask = pipe->ring_size - 1; 204 203 ssize_t ret = 0; 205 204 int page_nr = 0; 206 205 ··· 213 214 } 214 215 215 216 while (!pipe_full(head, tail, pipe->max_usage)) { 216 - struct pipe_buffer *buf = &pipe->bufs[head & mask]; 217 + struct pipe_buffer *buf = pipe_buf(pipe, head); 217 218 218 219 buf->page = spd->pages[page_nr]; 219 220 buf->offset = spd->partial[page_nr].offset; ··· 246 247 { 247 248 unsigned int head = pipe->head; 248 249 unsigned int tail = pipe->tail; 249 - unsigned int mask = pipe->ring_size - 1; 250 250 int ret; 251 251 252 252 if (unlikely(!pipe->readers)) { ··· 254 256 } else if (pipe_full(head, tail, pipe->max_usage)) { 255 257 ret = -EAGAIN; 256 258 } else { 257 - pipe->bufs[head & mask] = *buf; 259 + *pipe_buf(pipe, head) = *buf; 258 260 pipe->head = head + 1; 259 261 return buf->len; 260 262 } ··· 445 447 { 446 448 unsigned int head = pipe->head; 447 449 unsigned int tail = pipe->tail; 448 - unsigned int mask = pipe->ring_size - 1; 449 450 int ret; 450 451 451 452 while (!pipe_empty(head, tail)) { 452 - struct pipe_buffer *buf = &pipe->bufs[tail & mask]; 453 + struct pipe_buffer *buf = pipe_buf(pipe, tail); 453 454 454 455 sd->len = buf->len; 455 456 if (sd->len > sd->total_len) ··· 492 495 static inline bool eat_empty_buffer(struct pipe_inode_info *pipe) 493 496 { 494 497 unsigned int tail = pipe->tail; 495 - unsigned int mask = pipe->ring_size - 1; 496 - struct pipe_buffer *buf = &pipe->bufs[tail & mask]; 498 + struct pipe_buffer *buf = pipe_buf(pipe, tail); 497 499 498 500 if (unlikely(!buf->len)) { 499 501 pipe_buf_release(pipe, buf); ··· 686 690 while (sd.total_len) { 687 691 struct kiocb kiocb; 688 692 struct iov_iter from; 689 - unsigned int head, tail, mask; 693 + unsigned int head, tail; 690 694 size_t left; 691 695 int n; 692 696 ··· 707 711 708 712 head = pipe->head; 709 713 tail = pipe->tail; 710 - mask = pipe->ring_size - 1; 711 714 712 715 /* build the vector */ 713 716 left = sd.total_len; 714 717 for (n = 0; !pipe_empty(head, tail) && left && n < nbufs; tail++) { 715 - struct pipe_buffer *buf = &pipe->bufs[tail & mask]; 718 + struct pipe_buffer *buf = pipe_buf(pipe, tail); 716 719 size_t this_len = buf->len; 717 720 718 721 /* zero-length bvecs are not supported, skip them */ ··· 747 752 /* dismiss the fully eaten buffers, adjust the partial one */ 748 753 tail = pipe->tail; 749 754 while (ret) { 750 - struct pipe_buffer *buf = &pipe->bufs[tail & mask]; 755 + struct pipe_buffer *buf = pipe_buf(pipe, tail); 751 756 if (ret >= buf->len) { 752 757 ret -= buf->len; 753 758 buf->len = 0; ··· 804 809 pipe_lock(pipe); 805 810 806 811 while (len > 0) { 807 - unsigned int head, tail, mask, bc = 0; 812 + unsigned int head, tail, bc = 0; 808 813 size_t remain = len; 809 814 810 815 /* ··· 841 846 842 847 head = pipe->head; 843 848 tail = pipe->tail; 844 - mask = pipe->ring_size - 1; 845 849 846 850 while (!pipe_empty(head, tail)) { 847 - struct pipe_buffer *buf = &pipe->bufs[tail & mask]; 851 + struct pipe_buffer *buf = pipe_buf(pipe, tail); 848 852 size_t seg; 849 853 850 854 if (!buf->len) { ··· 888 894 len -= ret; 889 895 tail = pipe->tail; 890 896 while (ret > 0) { 891 - struct pipe_buffer *buf = &pipe->bufs[tail & mask]; 897 + struct pipe_buffer *buf = pipe_buf(pipe, tail); 892 898 size_t seg = min_t(size_t, ret, buf->len); 893 899 894 900 buf->offset += seg; ··· 1719 1725 struct pipe_buffer *ibuf, *obuf; 1720 1726 unsigned int i_head, o_head; 1721 1727 unsigned int i_tail, o_tail; 1722 - unsigned int i_mask, o_mask; 1723 1728 int ret = 0; 1724 1729 bool input_wakeup = false; 1725 1730 ··· 1740 1747 pipe_double_lock(ipipe, opipe); 1741 1748 1742 1749 i_tail = ipipe->tail; 1743 - i_mask = ipipe->ring_size - 1; 1744 1750 o_head = opipe->head; 1745 - o_mask = opipe->ring_size - 1; 1746 1751 1747 1752 do { 1748 1753 size_t o_len; ··· 1783 1792 goto retry; 1784 1793 } 1785 1794 1786 - ibuf = &ipipe->bufs[i_tail & i_mask]; 1787 - obuf = &opipe->bufs[o_head & o_mask]; 1795 + ibuf = pipe_buf(ipipe, i_tail); 1796 + obuf = pipe_buf(opipe, o_head); 1788 1797 1789 1798 if (len >= ibuf->len) { 1790 1799 /* ··· 1853 1862 struct pipe_buffer *ibuf, *obuf; 1854 1863 unsigned int i_head, o_head; 1855 1864 unsigned int i_tail, o_tail; 1856 - unsigned int i_mask, o_mask; 1857 1865 ssize_t ret = 0; 1858 1866 1859 1867 /* ··· 1863 1873 pipe_double_lock(ipipe, opipe); 1864 1874 1865 1875 i_tail = ipipe->tail; 1866 - i_mask = ipipe->ring_size - 1; 1867 1876 o_head = opipe->head; 1868 - o_mask = opipe->ring_size - 1; 1869 1877 1870 1878 do { 1871 1879 if (!opipe->readers) { ··· 1884 1896 pipe_full(o_head, o_tail, opipe->max_usage)) 1885 1897 break; 1886 1898 1887 - ibuf = &ipipe->bufs[i_tail & i_mask]; 1888 - obuf = &opipe->bufs[o_head & o_mask]; 1899 + ibuf = pipe_buf(ipipe, i_tail); 1900 + obuf = pipe_buf(opipe, o_head); 1889 1901 1890 1902 /* 1891 1903 * Get a reference to this pipe buffer,
+1 -1
include/linux/pipe_fs_i.h
··· 108 108 #ifdef CONFIG_WATCH_QUEUE 109 109 bool note_loss; 110 110 #endif 111 - struct page *tmp_page; 111 + struct page *tmp_page[2]; 112 112 struct fasync_struct *fasync_readers; 113 113 struct fasync_struct *fasync_writers; 114 114 struct pipe_buffer *bufs;
+3
include/linux/wait.h
··· 316 316 } \ 317 317 \ 318 318 cmd; \ 319 + \ 320 + if (condition) \ 321 + break; \ 319 322 } \ 320 323 finish_wait(&wq_head, &__wq_entry); \ 321 324 __out: __ret; \
+3 -4
kernel/watch_queue.c
··· 101 101 struct pipe_inode_info *pipe = wqueue->pipe; 102 102 struct pipe_buffer *buf; 103 103 struct page *page; 104 - unsigned int head, tail, mask, note, offset, len; 104 + unsigned int head, tail, note, offset, len; 105 105 bool done = false; 106 106 107 107 spin_lock_irq(&pipe->rd_wait.lock); 108 108 109 - mask = pipe->ring_size - 1; 110 109 head = pipe->head; 111 110 tail = pipe->tail; 112 111 if (pipe_full(head, tail, pipe->ring_size)) ··· 123 124 memcpy(p + offset, n, len); 124 125 kunmap_atomic(p); 125 126 126 - buf = &pipe->bufs[head & mask]; 127 + buf = pipe_buf(pipe, head); 127 128 buf->page = page; 128 129 buf->private = (unsigned long)wqueue; 129 130 buf->ops = &watch_queue_pipe_buf_ops; ··· 146 147 return done; 147 148 148 149 lost: 149 - buf = &pipe->bufs[(head - 1) & mask]; 150 + buf = pipe_buf(pipe, head - 1); 150 151 buf->flags |= PIPE_BUF_FLAG_LOSS; 151 152 goto out; 152 153 }