Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/sage/ceph-client

* 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/sage/ceph-client:
ceph: fix readdir EOVERFLOW on 32-bit archs
ceph: fix frag offset for non-leftmost frags
ceph: fix dangling pointer
ceph: explicitly specify page alignment in network messages
ceph: make page alignment explicit in osd interface
ceph: fix comment, remove extraneous args
ceph: fix update of ctime from MDS
ceph: fix version check on racing inode updates
ceph: fix uid/gid on resent mds requests
ceph: fix rdcache_gen usage and invalidate
ceph: re-request max_size if cap auth changes
ceph: only let auth caps update max_size
ceph: fix open for write on clustered mds
ceph: fix bad pointer dereference in ceph_fill_trace
ceph: fix small seq message skipping
Revert "ceph: update issue_seq on cap grant"

+129 -76
+3 -3
fs/ceph/addr.c
··· 204 204 err = ceph_osdc_readpages(osdc, ceph_vino(inode), &ci->i_layout, 205 205 page->index << PAGE_CACHE_SHIFT, &len, 206 206 ci->i_truncate_seq, ci->i_truncate_size, 207 - &page, 1); 207 + &page, 1, 0); 208 208 if (err == -ENOENT) 209 209 err = 0; 210 210 if (err < 0) { ··· 287 287 rc = ceph_osdc_readpages(osdc, ceph_vino(inode), &ci->i_layout, 288 288 offset, &len, 289 289 ci->i_truncate_seq, ci->i_truncate_size, 290 - pages, nr_pages); 290 + pages, nr_pages, 0); 291 291 if (rc == -ENOENT) 292 292 rc = 0; 293 293 if (rc < 0) ··· 774 774 snapc, do_sync, 775 775 ci->i_truncate_seq, 776 776 ci->i_truncate_size, 777 - &inode->i_mtime, true, 1); 777 + &inode->i_mtime, true, 1, 0); 778 778 max_pages = req->r_num_pages; 779 779 780 780 alloc_page_vec(fsc, req);
+10 -7
fs/ceph/caps.c
··· 1430 1430 invalidating_gen == ci->i_rdcache_gen) { 1431 1431 /* success. */ 1432 1432 dout("try_nonblocking_invalidate %p success\n", inode); 1433 - ci->i_rdcache_gen = 0; 1434 - ci->i_rdcache_revoking = 0; 1433 + /* save any racing async invalidate some trouble */ 1434 + ci->i_rdcache_revoking = ci->i_rdcache_gen - 1; 1435 1435 return 0; 1436 1436 } 1437 1437 dout("try_nonblocking_invalidate %p failed\n", inode); ··· 2273 2273 { 2274 2274 struct ceph_inode_info *ci = ceph_inode(inode); 2275 2275 int mds = session->s_mds; 2276 - unsigned seq = le32_to_cpu(grant->seq); 2277 - unsigned issue_seq = le32_to_cpu(grant->issue_seq); 2276 + int seq = le32_to_cpu(grant->seq); 2278 2277 int newcaps = le32_to_cpu(grant->caps); 2279 2278 int issued, implemented, used, wanted, dirty; 2280 2279 u64 size = le64_to_cpu(grant->size); ··· 2285 2286 int revoked_rdcache = 0; 2286 2287 int queue_invalidate = 0; 2287 2288 2288 - dout("handle_cap_grant inode %p cap %p mds%d seq %u/%u %s\n", 2289 - inode, cap, mds, seq, issue_seq, ceph_cap_string(newcaps)); 2289 + dout("handle_cap_grant inode %p cap %p mds%d seq %d %s\n", 2290 + inode, cap, mds, seq, ceph_cap_string(newcaps)); 2290 2291 dout(" size %llu max_size %llu, i_size %llu\n", size, max_size, 2291 2292 inode->i_size); 2292 2293 ··· 2382 2383 } 2383 2384 2384 2385 cap->seq = seq; 2385 - cap->issue_seq = issue_seq; 2386 2386 2387 2387 /* file layout may have changed */ 2388 2388 ci->i_layout = grant->layout; ··· 2689 2691 NULL /* no caps context */); 2690 2692 try_flush_caps(inode, session, NULL); 2691 2693 up_read(&mdsc->snap_rwsem); 2694 + 2695 + /* make sure we re-request max_size, if necessary */ 2696 + spin_lock(&inode->i_lock); 2697 + ci->i_requested_max_size = 0; 2698 + spin_unlock(&inode->i_lock); 2692 2699 } 2693 2700 2694 2701 /*
+12 -4
fs/ceph/dir.c
··· 336 336 if (req->r_reply_info.dir_end) { 337 337 kfree(fi->last_name); 338 338 fi->last_name = NULL; 339 - fi->next_offset = 2; 339 + if (ceph_frag_is_rightmost(frag)) 340 + fi->next_offset = 2; 341 + else 342 + fi->next_offset = 0; 340 343 } else { 341 344 rinfo = &req->r_reply_info; 342 345 err = note_last_dentry(fi, ··· 358 355 u64 pos = ceph_make_fpos(frag, off); 359 356 struct ceph_mds_reply_inode *in = 360 357 rinfo->dir_in[off - fi->offset].in; 358 + struct ceph_vino vino; 359 + ino_t ino; 360 + 361 361 dout("readdir off %d (%d/%d) -> %lld '%.*s' %p\n", 362 362 off, off - fi->offset, rinfo->dir_nr, pos, 363 363 rinfo->dir_dname_len[off - fi->offset], 364 364 rinfo->dir_dname[off - fi->offset], in); 365 365 BUG_ON(!in); 366 366 ftype = le32_to_cpu(in->mode) >> 12; 367 + vino.ino = le64_to_cpu(in->ino); 368 + vino.snap = le64_to_cpu(in->snapid); 369 + ino = ceph_vino_to_ino(vino); 367 370 if (filldir(dirent, 368 371 rinfo->dir_dname[off - fi->offset], 369 372 rinfo->dir_dname_len[off - fi->offset], 370 - pos, 371 - le64_to_cpu(in->ino), 372 - ftype) < 0) { 373 + pos, ino, ftype) < 0) { 373 374 dout("filldir stopping us...\n"); 374 375 return 0; 375 376 } ··· 421 414 fi->last_readdir = NULL; 422 415 } 423 416 kfree(fi->last_name); 417 + fi->last_name = NULL; 424 418 fi->next_offset = 2; /* compensate for . and .. */ 425 419 if (fi->dentry) { 426 420 dput(fi->dentry);
+34 -18
fs/ceph/file.c
··· 154 154 } 155 155 156 156 /* 157 - * No need to block if we have any caps. Update wanted set 157 + * No need to block if we have caps on the auth MDS (for 158 + * write) or any MDS (for read). Update wanted set 158 159 * asynchronously. 159 160 */ 160 161 spin_lock(&inode->i_lock); 161 - if (__ceph_is_any_real_caps(ci)) { 162 + if (__ceph_is_any_real_caps(ci) && 163 + (((fmode & CEPH_FILE_MODE_WR) == 0) || ci->i_auth_cap)) { 162 164 int mds_wanted = __ceph_caps_mds_wanted(ci); 163 165 int issued = __ceph_caps_issued(ci, NULL); 164 166 ··· 282 280 static int striped_read(struct inode *inode, 283 281 u64 off, u64 len, 284 282 struct page **pages, int num_pages, 285 - int *checkeof) 283 + int *checkeof, bool align_to_pages) 286 284 { 287 285 struct ceph_fs_client *fsc = ceph_inode_to_client(inode); 288 286 struct ceph_inode_info *ci = ceph_inode(inode); 289 287 u64 pos, this_len; 288 + int io_align, page_align; 290 289 int page_off = off & ~PAGE_CACHE_MASK; /* first byte's offset in page */ 291 290 int left, pages_left; 292 291 int read; ··· 303 300 page_pos = pages; 304 301 pages_left = num_pages; 305 302 read = 0; 303 + io_align = off & ~PAGE_MASK; 306 304 307 305 more: 306 + if (align_to_pages) 307 + page_align = (pos - io_align) & ~PAGE_MASK; 308 + else 309 + page_align = pos & ~PAGE_MASK; 308 310 this_len = left; 309 311 ret = ceph_osdc_readpages(&fsc->client->osdc, ceph_vino(inode), 310 312 &ci->i_layout, pos, &this_len, 311 313 ci->i_truncate_seq, 312 314 ci->i_truncate_size, 313 - page_pos, pages_left); 315 + page_pos, pages_left, page_align); 314 316 hit_stripe = this_len < left; 315 317 was_short = ret >= 0 && ret < this_len; 316 318 if (ret == -ENOENT) ··· 382 374 dout("sync_read on file %p %llu~%u %s\n", file, off, len, 383 375 (file->f_flags & O_DIRECT) ? "O_DIRECT" : ""); 384 376 385 - if (file->f_flags & O_DIRECT) { 386 - pages = ceph_get_direct_page_vector(data, num_pages, off, len); 387 - 388 - /* 389 - * flush any page cache pages in this range. this 390 - * will make concurrent normal and O_DIRECT io slow, 391 - * but it will at least behave sensibly when they are 392 - * in sequence. 393 - */ 394 - } else { 377 + if (file->f_flags & O_DIRECT) 378 + pages = ceph_get_direct_page_vector(data, num_pages); 379 + else 395 380 pages = ceph_alloc_page_vector(num_pages, GFP_NOFS); 396 - } 397 381 if (IS_ERR(pages)) 398 382 return PTR_ERR(pages); 399 383 384 + /* 385 + * flush any page cache pages in this range. this 386 + * will make concurrent normal and sync io slow, 387 + * but it will at least behave sensibly when they are 388 + * in sequence. 389 + */ 400 390 ret = filemap_write_and_wait(inode->i_mapping); 401 391 if (ret < 0) 402 392 goto done; 403 393 404 - ret = striped_read(inode, off, len, pages, num_pages, checkeof); 394 + ret = striped_read(inode, off, len, pages, num_pages, checkeof, 395 + file->f_flags & O_DIRECT); 405 396 406 397 if (ret >= 0 && (file->f_flags & O_DIRECT) == 0) 407 398 ret = ceph_copy_page_vector_to_user(pages, data, off, ret); ··· 455 448 int flags; 456 449 int do_sync = 0; 457 450 int check_caps = 0; 451 + int page_align, io_align; 458 452 int ret; 459 453 struct timespec mtime = CURRENT_TIME; 460 454 ··· 469 461 pos = i_size_read(inode); 470 462 else 471 463 pos = *offset; 464 + 465 + io_align = pos & ~PAGE_MASK; 472 466 473 467 ret = filemap_write_and_wait_range(inode->i_mapping, pos, pos + left); 474 468 if (ret < 0) ··· 496 486 */ 497 487 more: 498 488 len = left; 489 + if (file->f_flags & O_DIRECT) 490 + /* write from beginning of first page, regardless of 491 + io alignment */ 492 + page_align = (pos - io_align) & ~PAGE_MASK; 493 + else 494 + page_align = pos & ~PAGE_MASK; 499 495 req = ceph_osdc_new_request(&fsc->client->osdc, &ci->i_layout, 500 496 ceph_vino(inode), pos, &len, 501 497 CEPH_OSD_OP_WRITE, flags, 502 498 ci->i_snap_realm->cached_context, 503 499 do_sync, 504 500 ci->i_truncate_seq, ci->i_truncate_size, 505 - &mtime, false, 2); 501 + &mtime, false, 2, page_align); 506 502 if (!req) 507 503 return -ENOMEM; 508 504 509 505 num_pages = calc_pages_for(pos, len); 510 506 511 507 if (file->f_flags & O_DIRECT) { 512 - pages = ceph_get_direct_page_vector(data, num_pages, pos, len); 508 + pages = ceph_get_direct_page_vector(data, num_pages); 513 509 if (IS_ERR(pages)) { 514 510 ret = PTR_ERR(pages); 515 511 goto out;
+31 -18
fs/ceph/inode.c
··· 470 470 471 471 if (issued & (CEPH_CAP_FILE_EXCL| 472 472 CEPH_CAP_FILE_WR| 473 - CEPH_CAP_FILE_BUFFER)) { 473 + CEPH_CAP_FILE_BUFFER| 474 + CEPH_CAP_AUTH_EXCL| 475 + CEPH_CAP_XATTR_EXCL)) { 474 476 if (timespec_compare(ctime, &inode->i_ctime) > 0) { 475 477 dout("ctime %ld.%09ld -> %ld.%09ld inc w/ cap\n", 476 478 inode->i_ctime.tv_sec, inode->i_ctime.tv_nsec, ··· 512 510 warn = 1; 513 511 } 514 512 } else { 515 - /* we have no write caps; whatever the MDS says is true */ 513 + /* we have no write|excl caps; whatever the MDS says is true */ 516 514 if (ceph_seq_cmp(time_warp_seq, ci->i_time_warp_seq) >= 0) { 517 515 inode->i_ctime = *ctime; 518 516 inode->i_mtime = *mtime; ··· 568 566 569 567 /* 570 568 * provided version will be odd if inode value is projected, 571 - * even if stable. skip the update if we have a newer info 572 - * (e.g., due to inode info racing form multiple MDSs), or if 573 - * we are getting projected (unstable) inode info. 569 + * even if stable. skip the update if we have newer stable 570 + * info (ours>=theirs, e.g. due to racing mds replies), unless 571 + * we are getting projected (unstable) info (in which case the 572 + * version is odd, and we want ours>theirs). 573 + * us them 574 + * 2 2 skip 575 + * 3 2 skip 576 + * 3 3 update 574 577 */ 575 578 if (le64_to_cpu(info->version) > 0 && 576 - (ci->i_version & ~1) > le64_to_cpu(info->version)) 579 + (ci->i_version & ~1) >= le64_to_cpu(info->version)) 577 580 goto no_change; 578 581 579 582 issued = __ceph_caps_issued(ci, &implemented); ··· 612 605 le32_to_cpu(info->time_warp_seq), 613 606 &ctime, &mtime, &atime); 614 607 615 - ci->i_max_size = le64_to_cpu(info->max_size); 608 + /* only update max_size on auth cap */ 609 + if ((info->cap.flags & CEPH_CAP_FLAG_AUTH) && 610 + ci->i_max_size != le64_to_cpu(info->max_size)) { 611 + dout("max_size %lld -> %llu\n", ci->i_max_size, 612 + le64_to_cpu(info->max_size)); 613 + ci->i_max_size = le64_to_cpu(info->max_size); 614 + } 615 + 616 616 ci->i_layout = info->layout; 617 617 inode->i_blkbits = fls(le32_to_cpu(info->layout.fl_stripe_unit)) - 1; 618 618 ··· 1068 1054 ininfo = rinfo->targeti.in; 1069 1055 vino.ino = le64_to_cpu(ininfo->ino); 1070 1056 vino.snap = le64_to_cpu(ininfo->snapid); 1071 - if (!dn->d_inode) { 1057 + in = dn->d_inode; 1058 + if (!in) { 1072 1059 in = ceph_get_inode(sb, vino); 1073 1060 if (IS_ERR(in)) { 1074 1061 pr_err("fill_trace bad get_inode " ··· 1400 1385 spin_lock(&inode->i_lock); 1401 1386 dout("invalidate_pages %p gen %d revoking %d\n", inode, 1402 1387 ci->i_rdcache_gen, ci->i_rdcache_revoking); 1403 - if (ci->i_rdcache_gen == 0 || 1404 - ci->i_rdcache_revoking != ci->i_rdcache_gen) { 1405 - BUG_ON(ci->i_rdcache_revoking > ci->i_rdcache_gen); 1388 + if (ci->i_rdcache_revoking != ci->i_rdcache_gen) { 1406 1389 /* nevermind! */ 1407 - ci->i_rdcache_revoking = 0; 1408 1390 spin_unlock(&inode->i_lock); 1409 1391 goto out; 1410 1392 } ··· 1411 1399 ceph_invalidate_nondirty_pages(inode->i_mapping); 1412 1400 1413 1401 spin_lock(&inode->i_lock); 1414 - if (orig_gen == ci->i_rdcache_gen) { 1402 + if (orig_gen == ci->i_rdcache_gen && 1403 + orig_gen == ci->i_rdcache_revoking) { 1415 1404 dout("invalidate_pages %p gen %d successful\n", inode, 1416 1405 ci->i_rdcache_gen); 1417 - ci->i_rdcache_gen = 0; 1418 - ci->i_rdcache_revoking = 0; 1406 + ci->i_rdcache_revoking--; 1419 1407 check = 1; 1420 1408 } else { 1421 - dout("invalidate_pages %p gen %d raced, gen now %d\n", 1422 - inode, orig_gen, ci->i_rdcache_gen); 1409 + dout("invalidate_pages %p gen %d raced, now %d revoking %d\n", 1410 + inode, orig_gen, ci->i_rdcache_gen, 1411 + ci->i_rdcache_revoking); 1423 1412 } 1424 1413 spin_unlock(&inode->i_lock); 1425 1414 ··· 1751 1738 return 0; 1752 1739 } 1753 1740 1754 - dout("do_getattr inode %p mask %s\n", inode, ceph_cap_string(mask)); 1741 + dout("do_getattr inode %p mask %s mode 0%o\n", inode, ceph_cap_string(mask), inode->i_mode); 1755 1742 if (ceph_caps_issued_mask(ceph_inode(inode), mask, 1)) 1756 1743 return 0; 1757 1744
+5 -2
fs/ceph/mds_client.c
··· 528 528 ceph_mdsc_get_request(req); 529 529 __insert_request(mdsc, req); 530 530 531 + req->r_uid = current_fsuid(); 532 + req->r_gid = current_fsgid(); 533 + 531 534 if (dir) { 532 535 struct ceph_inode_info *ci = ceph_inode(dir); 533 536 ··· 1590 1587 1591 1588 head->mdsmap_epoch = cpu_to_le32(mdsc->mdsmap->m_epoch); 1592 1589 head->op = cpu_to_le32(req->r_op); 1593 - head->caller_uid = cpu_to_le32(current_fsuid()); 1594 - head->caller_gid = cpu_to_le32(current_fsgid()); 1590 + head->caller_uid = cpu_to_le32(req->r_uid); 1591 + head->caller_gid = cpu_to_le32(req->r_gid); 1595 1592 head->args = req->r_args; 1596 1593 1597 1594 ceph_encode_filepath(&p, end, ino1, path1);
+2
fs/ceph/mds_client.h
··· 170 170 171 171 union ceph_mds_request_args r_args; 172 172 int r_fmode; /* file mode, if expecting cap */ 173 + uid_t r_uid; 174 + gid_t r_gid; 173 175 174 176 /* for choosing which mds to send this request to */ 175 177 int r_direct_mode;
+1 -3
fs/ceph/super.h
··· 293 293 int i_rd_ref, i_rdcache_ref, i_wr_ref; 294 294 int i_wrbuffer_ref, i_wrbuffer_ref_head; 295 295 u32 i_shared_gen; /* increment each time we get FILE_SHARED */ 296 - u32 i_rdcache_gen; /* we increment this each time we get 297 - FILE_CACHE. If it's non-zero, we 298 - _may_ have cached pages. */ 296 + u32 i_rdcache_gen; /* incremented each time we get FILE_CACHE. */ 299 297 u32 i_rdcache_revoking; /* RDCACHE gen to async invalidate, if any */ 300 298 301 299 struct list_head i_unsafe_writes; /* uncommitted sync writes */
+1 -2
include/linux/ceph/libceph.h
··· 227 227 extern void ceph_release_page_vector(struct page **pages, int num_pages); 228 228 229 229 extern struct page **ceph_get_direct_page_vector(const char __user *data, 230 - int num_pages, 231 - loff_t off, size_t len); 230 + int num_pages); 232 231 extern void ceph_put_page_vector(struct page **pages, int num_pages); 233 232 extern void ceph_release_page_vector(struct page **pages, int num_pages); 234 233 extern struct page **ceph_alloc_page_vector(int num_pages, gfp_t flags);
+1
include/linux/ceph/messenger.h
··· 82 82 struct ceph_buffer *middle; 83 83 struct page **pages; /* data payload. NOT OWNER. */ 84 84 unsigned nr_pages; /* size of page array */ 85 + unsigned page_alignment; /* io offset in first page */ 85 86 struct ceph_pagelist *pagelist; /* instead of pages */ 86 87 struct list_head list_head; 87 88 struct kref kref;
+5 -2
include/linux/ceph/osd_client.h
··· 79 79 struct ceph_file_layout r_file_layout; 80 80 struct ceph_snap_context *r_snapc; /* snap context for writes */ 81 81 unsigned r_num_pages; /* size of page array (follows) */ 82 + unsigned r_page_alignment; /* io offset in first page */ 82 83 struct page **r_pages; /* pages for data payload */ 83 84 int r_pages_from_pool; 84 85 int r_own_pages; /* if true, i own page list */ ··· 195 194 int do_sync, u32 truncate_seq, 196 195 u64 truncate_size, 197 196 struct timespec *mtime, 198 - bool use_mempool, int num_reply); 197 + bool use_mempool, int num_reply, 198 + int page_align); 199 199 200 200 static inline void ceph_osdc_get_request(struct ceph_osd_request *req) 201 201 { ··· 220 218 struct ceph_file_layout *layout, 221 219 u64 off, u64 *plen, 222 220 u32 truncate_seq, u64 truncate_size, 223 - struct page **pages, int nr_pages); 221 + struct page **pages, int nr_pages, 222 + int page_align); 224 223 225 224 extern int ceph_osdc_writepages(struct ceph_osd_client *osdc, 226 225 struct ceph_vino vino,
+6 -7
net/ceph/messenger.c
··· 540 540 /* initialize page iterator */ 541 541 con->out_msg_pos.page = 0; 542 542 if (m->pages) 543 - con->out_msg_pos.page_pos = 544 - le16_to_cpu(m->hdr.data_off) & ~PAGE_MASK; 543 + con->out_msg_pos.page_pos = m->page_alignment; 545 544 else 546 545 con->out_msg_pos.page_pos = 0; 547 546 con->out_msg_pos.data_pos = 0; ··· 1490 1491 struct ceph_msg *m = con->in_msg; 1491 1492 int ret; 1492 1493 int to, left; 1493 - unsigned front_len, middle_len, data_len, data_off; 1494 + unsigned front_len, middle_len, data_len; 1494 1495 int datacrc = con->msgr->nocrc; 1495 1496 int skip; 1496 1497 u64 seq; ··· 1526 1527 data_len = le32_to_cpu(con->in_hdr.data_len); 1527 1528 if (data_len > CEPH_MSG_MAX_DATA_LEN) 1528 1529 return -EIO; 1529 - data_off = le16_to_cpu(con->in_hdr.data_off); 1530 1530 1531 1531 /* verify seq# */ 1532 1532 seq = le64_to_cpu(con->in_hdr.seq); 1533 1533 if ((s64)seq - (s64)con->in_seq < 1) { 1534 - pr_info("skipping %s%lld %s seq %lld, expected %lld\n", 1534 + pr_info("skipping %s%lld %s seq %lld expected %lld\n", 1535 1535 ENTITY_NAME(con->peer_name), 1536 1536 ceph_pr_addr(&con->peer_addr.in_addr), 1537 1537 seq, con->in_seq + 1); 1538 1538 con->in_base_pos = -front_len - middle_len - data_len - 1539 1539 sizeof(m->footer); 1540 1540 con->in_tag = CEPH_MSGR_TAG_READY; 1541 - con->in_seq++; 1542 1541 return 0; 1543 1542 } else if ((s64)seq - (s64)con->in_seq > 1) { 1544 1543 pr_err("read_partial_message bad seq %lld expected %lld\n", ··· 1573 1576 1574 1577 con->in_msg_pos.page = 0; 1575 1578 if (m->pages) 1576 - con->in_msg_pos.page_pos = data_off & ~PAGE_MASK; 1579 + con->in_msg_pos.page_pos = m->page_alignment; 1577 1580 else 1578 1581 con->in_msg_pos.page_pos = 0; 1579 1582 con->in_msg_pos.data_pos = 0; ··· 2298 2301 2299 2302 /* data */ 2300 2303 m->nr_pages = 0; 2304 + m->page_alignment = 0; 2301 2305 m->pages = NULL; 2302 2306 m->pagelist = NULL; 2303 2307 m->bio = NULL; ··· 2368 2370 type, front_len); 2369 2371 return NULL; 2370 2372 } 2373 + msg->page_alignment = le16_to_cpu(hdr->data_off); 2371 2374 } 2372 2375 memcpy(&msg->hdr, &con->in_hdr, sizeof(con->in_hdr)); 2373 2376
+17 -8
net/ceph/osd_client.c
··· 71 71 op->extent.length = objlen; 72 72 } 73 73 req->r_num_pages = calc_pages_for(off, *plen); 74 + req->r_page_alignment = off & ~PAGE_MASK; 74 75 if (op->op == CEPH_OSD_OP_WRITE) 75 76 op->payload_len = *plen; 76 77 ··· 391 390 req->r_request->hdr.data_len = cpu_to_le32(data_len); 392 391 } 393 392 393 + req->r_request->page_alignment = req->r_page_alignment; 394 + 394 395 BUG_ON(p > msg->front.iov_base + msg->front.iov_len); 395 396 msg_size = p - msg->front.iov_base; 396 397 msg->front.iov_len = msg_size; ··· 422 419 u32 truncate_seq, 423 420 u64 truncate_size, 424 421 struct timespec *mtime, 425 - bool use_mempool, int num_reply) 422 + bool use_mempool, int num_reply, 423 + int page_align) 426 424 { 427 425 struct ceph_osd_req_op ops[3]; 428 426 struct ceph_osd_request *req; ··· 450 446 /* calculate max write size */ 451 447 calc_layout(osdc, vino, layout, off, plen, req, ops); 452 448 req->r_file_layout = *layout; /* keep a copy */ 449 + 450 + /* in case it differs from natural alignment that calc_layout 451 + filled in for us */ 452 + req->r_page_alignment = page_align; 453 453 454 454 ceph_osdc_build_request(req, off, plen, ops, 455 455 snapc, ··· 1497 1489 struct ceph_vino vino, struct ceph_file_layout *layout, 1498 1490 u64 off, u64 *plen, 1499 1491 u32 truncate_seq, u64 truncate_size, 1500 - struct page **pages, int num_pages) 1492 + struct page **pages, int num_pages, int page_align) 1501 1493 { 1502 1494 struct ceph_osd_request *req; 1503 1495 int rc = 0; ··· 1507 1499 req = ceph_osdc_new_request(osdc, layout, vino, off, plen, 1508 1500 CEPH_OSD_OP_READ, CEPH_OSD_FLAG_READ, 1509 1501 NULL, 0, truncate_seq, truncate_size, NULL, 1510 - false, 1); 1502 + false, 1, page_align); 1511 1503 if (!req) 1512 1504 return -ENOMEM; 1513 1505 1514 1506 /* it may be a short read due to an object boundary */ 1515 1507 req->r_pages = pages; 1516 1508 1517 - dout("readpages final extent is %llu~%llu (%d pages)\n", 1518 - off, *plen, req->r_num_pages); 1509 + dout("readpages final extent is %llu~%llu (%d pages align %d)\n", 1510 + off, *plen, req->r_num_pages, page_align); 1519 1511 1520 1512 rc = ceph_osdc_start_request(osdc, req, false); 1521 1513 if (!rc) ··· 1541 1533 { 1542 1534 struct ceph_osd_request *req; 1543 1535 int rc = 0; 1536 + int page_align = off & ~PAGE_MASK; 1544 1537 1545 1538 BUG_ON(vino.snap != CEPH_NOSNAP); 1546 1539 req = ceph_osdc_new_request(osdc, layout, vino, off, &len, ··· 1550 1541 CEPH_OSD_FLAG_WRITE, 1551 1542 snapc, do_sync, 1552 1543 truncate_seq, truncate_size, mtime, 1553 - nofail, 1); 1544 + nofail, 1, page_align); 1554 1545 if (!req) 1555 1546 return -ENOMEM; 1556 1547 ··· 1647 1638 m = ceph_msg_get(req->r_reply); 1648 1639 1649 1640 if (data_len > 0) { 1650 - unsigned data_off = le16_to_cpu(hdr->data_off); 1651 - int want = calc_pages_for(data_off & ~PAGE_MASK, data_len); 1641 + int want = calc_pages_for(req->r_page_alignment, data_len); 1652 1642 1653 1643 if (unlikely(req->r_num_pages < want)) { 1654 1644 pr_warning("tid %lld reply %d > expected %d pages\n", ··· 1659 1651 } 1660 1652 m->pages = req->r_pages; 1661 1653 m->nr_pages = req->r_num_pages; 1654 + m->page_alignment = req->r_page_alignment; 1662 1655 #ifdef CONFIG_BLOCK 1663 1656 m->bio = req->r_bio; 1664 1657 #endif
+1 -2
net/ceph/pagevec.c
··· 13 13 * build a vector of user pages 14 14 */ 15 15 struct page **ceph_get_direct_page_vector(const char __user *data, 16 - int num_pages, 17 - loff_t off, size_t len) 16 + int num_pages) 18 17 { 19 18 struct page **pages; 20 19 int rc;