Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

Merge tag 'ceph-for-4.11-rc2' of git://github.com/ceph/ceph-client

Pull ceph fixes from Ilya Dryomov:

- a fix for the recently discovered misdirected requests bug present in
jewel and later on the server side and all stable kernels

- a fixup for -rc1 CRUSH changes

- two usability enhancements: osd_request_timeout option and
supported_features bus attribute.

* tag 'ceph-for-4.11-rc2' of git://github.com/ceph/ceph-client:
libceph: osd_request_timeout option
rbd: supported_features bus attribute
libceph: don't set weight to IN when OSD is destroyed
libceph: fix crush_decode() for older maps

+66 -8
+12 -4
drivers/block/rbd.c
··· 120 120 121 121 /* Feature bits */ 122 122 123 - #define RBD_FEATURE_LAYERING (1<<0) 124 - #define RBD_FEATURE_STRIPINGV2 (1<<1) 125 - #define RBD_FEATURE_EXCLUSIVE_LOCK (1<<2) 126 - #define RBD_FEATURE_DATA_POOL (1<<7) 123 + #define RBD_FEATURE_LAYERING (1ULL<<0) 124 + #define RBD_FEATURE_STRIPINGV2 (1ULL<<1) 125 + #define RBD_FEATURE_EXCLUSIVE_LOCK (1ULL<<2) 126 + #define RBD_FEATURE_DATA_POOL (1ULL<<7) 127 + 127 128 #define RBD_FEATURES_ALL (RBD_FEATURE_LAYERING | \ 128 129 RBD_FEATURE_STRIPINGV2 | \ 129 130 RBD_FEATURE_EXCLUSIVE_LOCK | \ ··· 500 499 return is_lock_owner; 501 500 } 502 501 502 + static ssize_t rbd_supported_features_show(struct bus_type *bus, char *buf) 503 + { 504 + return sprintf(buf, "0x%llx\n", RBD_FEATURES_SUPPORTED); 505 + } 506 + 503 507 static BUS_ATTR(add, S_IWUSR, NULL, rbd_add); 504 508 static BUS_ATTR(remove, S_IWUSR, NULL, rbd_remove); 505 509 static BUS_ATTR(add_single_major, S_IWUSR, NULL, rbd_add_single_major); 506 510 static BUS_ATTR(remove_single_major, S_IWUSR, NULL, rbd_remove_single_major); 511 + static BUS_ATTR(supported_features, S_IRUGO, rbd_supported_features_show, NULL); 507 512 508 513 static struct attribute *rbd_bus_attrs[] = { 509 514 &bus_attr_add.attr, 510 515 &bus_attr_remove.attr, 511 516 &bus_attr_add_single_major.attr, 512 517 &bus_attr_remove_single_major.attr, 518 + &bus_attr_supported_features.attr, 513 519 NULL, 514 520 }; 515 521
+2
include/linux/ceph/libceph.h
··· 48 48 unsigned long mount_timeout; /* jiffies */ 49 49 unsigned long osd_idle_ttl; /* jiffies */ 50 50 unsigned long osd_keepalive_timeout; /* jiffies */ 51 + unsigned long osd_request_timeout; /* jiffies */ 51 52 52 53 /* 53 54 * any type that can't be simply compared or doesn't need need ··· 69 68 #define CEPH_MOUNT_TIMEOUT_DEFAULT msecs_to_jiffies(60 * 1000) 70 69 #define CEPH_OSD_KEEPALIVE_DEFAULT msecs_to_jiffies(5 * 1000) 71 70 #define CEPH_OSD_IDLE_TTL_DEFAULT msecs_to_jiffies(60 * 1000) 71 + #define CEPH_OSD_REQUEST_TIMEOUT_DEFAULT 0 /* no timeout */ 72 72 73 73 #define CEPH_MONC_HUNT_INTERVAL msecs_to_jiffies(3 * 1000) 74 74 #define CEPH_MONC_PING_INTERVAL msecs_to_jiffies(10 * 1000)
+1
include/linux/ceph/osd_client.h
··· 189 189 190 190 /* internal */ 191 191 unsigned long r_stamp; /* jiffies, send or check time */ 192 + unsigned long r_start_stamp; /* jiffies */ 192 193 int r_attempts; 193 194 struct ceph_eversion r_replay_version; /* aka reassert_version */ 194 195 u32 r_last_force_resend;
+15
net/ceph/ceph_common.c
··· 230 230 Opt_osdkeepalivetimeout, 231 231 Opt_mount_timeout, 232 232 Opt_osd_idle_ttl, 233 + Opt_osd_request_timeout, 233 234 Opt_last_int, 234 235 /* int args above */ 235 236 Opt_fsid, ··· 257 256 {Opt_osdkeepalivetimeout, "osdkeepalive=%d"}, 258 257 {Opt_mount_timeout, "mount_timeout=%d"}, 259 258 {Opt_osd_idle_ttl, "osd_idle_ttl=%d"}, 259 + {Opt_osd_request_timeout, "osd_request_timeout=%d"}, 260 260 /* int args above */ 261 261 {Opt_fsid, "fsid=%s"}, 262 262 {Opt_name, "name=%s"}, ··· 363 361 opt->osd_keepalive_timeout = CEPH_OSD_KEEPALIVE_DEFAULT; 364 362 opt->mount_timeout = CEPH_MOUNT_TIMEOUT_DEFAULT; 365 363 opt->osd_idle_ttl = CEPH_OSD_IDLE_TTL_DEFAULT; 364 + opt->osd_request_timeout = CEPH_OSD_REQUEST_TIMEOUT_DEFAULT; 366 365 367 366 /* get mon ip(s) */ 368 367 /* ip1[:port1][,ip2[:port2]...] */ ··· 476 473 } 477 474 opt->mount_timeout = msecs_to_jiffies(intval * 1000); 478 475 break; 476 + case Opt_osd_request_timeout: 477 + /* 0 is "wait forever" (i.e. infinite timeout) */ 478 + if (intval < 0 || intval > INT_MAX / 1000) { 479 + pr_err("osd_request_timeout out of range\n"); 480 + err = -EINVAL; 481 + goto out; 482 + } 483 + opt->osd_request_timeout = msecs_to_jiffies(intval * 1000); 484 + break; 479 485 480 486 case Opt_share: 481 487 opt->flags &= ~CEPH_OPT_NOSHARE; ··· 569 557 if (opt->osd_keepalive_timeout != CEPH_OSD_KEEPALIVE_DEFAULT) 570 558 seq_printf(m, "osdkeepalivetimeout=%d,", 571 559 jiffies_to_msecs(opt->osd_keepalive_timeout) / 1000); 560 + if (opt->osd_request_timeout != CEPH_OSD_REQUEST_TIMEOUT_DEFAULT) 561 + seq_printf(m, "osd_request_timeout=%d,", 562 + jiffies_to_msecs(opt->osd_request_timeout) / 1000); 572 563 573 564 /* drop redundant comma */ 574 565 if (m->count != pos)
+35 -1
net/ceph/osd_client.c
··· 1709 1709 1710 1710 req->r_flags |= CEPH_OSD_FLAG_ONDISK; 1711 1711 atomic_inc(&req->r_osdc->num_requests); 1712 + 1713 + req->r_start_stamp = jiffies; 1712 1714 } 1713 1715 1714 1716 static void submit_request(struct ceph_osd_request *req, bool wrlocked) ··· 1789 1787 finish_request(req); 1790 1788 complete_all(&req->r_completion); 1791 1789 ceph_osdc_put_request(req); 1790 + } 1791 + 1792 + static void abort_request(struct ceph_osd_request *req, int err) 1793 + { 1794 + dout("%s req %p tid %llu err %d\n", __func__, req, req->r_tid, err); 1795 + 1796 + cancel_map_check(req); 1797 + complete_request(req, err); 1792 1798 } 1793 1799 1794 1800 static void check_pool_dne(struct ceph_osd_request *req) ··· 2497 2487 container_of(work, struct ceph_osd_client, timeout_work.work); 2498 2488 struct ceph_options *opts = osdc->client->options; 2499 2489 unsigned long cutoff = jiffies - opts->osd_keepalive_timeout; 2490 + unsigned long expiry_cutoff = jiffies - opts->osd_request_timeout; 2500 2491 LIST_HEAD(slow_osds); 2501 2492 struct rb_node *n, *p; 2502 2493 ··· 2513 2502 struct ceph_osd *osd = rb_entry(n, struct ceph_osd, o_node); 2514 2503 bool found = false; 2515 2504 2516 - for (p = rb_first(&osd->o_requests); p; p = rb_next(p)) { 2505 + for (p = rb_first(&osd->o_requests); p; ) { 2517 2506 struct ceph_osd_request *req = 2518 2507 rb_entry(p, struct ceph_osd_request, r_node); 2508 + 2509 + p = rb_next(p); /* abort_request() */ 2519 2510 2520 2511 if (time_before(req->r_stamp, cutoff)) { 2521 2512 dout(" req %p tid %llu on osd%d is laggy\n", 2522 2513 req, req->r_tid, osd->o_osd); 2523 2514 found = true; 2515 + } 2516 + if (opts->osd_request_timeout && 2517 + time_before(req->r_start_stamp, expiry_cutoff)) { 2518 + pr_err_ratelimited("tid %llu on osd%d timeout\n", 2519 + req->r_tid, osd->o_osd); 2520 + abort_request(req, -ETIMEDOUT); 2524 2521 } 2525 2522 } 2526 2523 for (p = rb_first(&osd->o_linger_requests); p; p = rb_next(p)) { ··· 2547 2528 2548 2529 if (found) 2549 2530 list_move_tail(&osd->o_keepalive_item, &slow_osds); 2531 + } 2532 + 2533 + if (opts->osd_request_timeout) { 2534 + for (p = rb_first(&osdc->homeless_osd.o_requests); p; ) { 2535 + struct ceph_osd_request *req = 2536 + rb_entry(p, struct ceph_osd_request, r_node); 2537 + 2538 + p = rb_next(p); /* abort_request() */ 2539 + 2540 + if (time_before(req->r_start_stamp, expiry_cutoff)) { 2541 + pr_err_ratelimited("tid %llu on osd%d timeout\n", 2542 + req->r_tid, osdc->homeless_osd.o_osd); 2543 + abort_request(req, -ETIMEDOUT); 2544 + } 2545 + } 2550 2546 } 2551 2547 2552 2548 if (atomic_read(&osdc->num_homeless) || !list_empty(&slow_osds))
+1 -3
net/ceph/osdmap.c
··· 390 390 dout("crush decode tunable chooseleaf_stable = %d\n", 391 391 c->chooseleaf_stable); 392 392 393 - crush_finalize(c); 394 - 395 393 done: 394 + crush_finalize(c); 396 395 dout("crush_decode success\n"); 397 396 return c; 398 397 ··· 1379 1380 if ((map->osd_state[osd] & CEPH_OSD_EXISTS) && 1380 1381 (xorstate & CEPH_OSD_EXISTS)) { 1381 1382 pr_info("osd%d does not exist\n", osd); 1382 - map->osd_weight[osd] = CEPH_OSD_IN; 1383 1383 ret = set_primary_affinity(map, osd, 1384 1384 CEPH_OSD_DEFAULT_PRIMARY_AFFINITY); 1385 1385 if (ret)