Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/sage/ceph-client

Pull ceph fixes from Sage Weil:
"These fix several bugs with RBD from 3.11 that didn't get tested in
time for the merge window: some error handling, a use-after-free, and
a sequencing issue when unmapping and image races with a notify
operation.

There is also a patch fixing a problem with the new ceph + fscache
code that just went in"

* 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/sage/ceph-client:
fscache: check consistency does not decrement refcount
rbd: fix error handling from rbd_snap_name()
rbd: ignore unmapped snapshots that no longer exist
rbd: fix use-after free of rbd_dev->disk
rbd: make rbd_obj_notify_ack() synchronous
rbd: complete notifies before cleaning up osd_client and rbd_dev
libceph: add function to ensure notifies are complete

+74 -19
+59 -18
drivers/block/rbd.c
··· 931 931 u64 snap_id) 932 932 { 933 933 u32 which; 934 + const char *snap_name; 934 935 935 936 which = rbd_dev_snap_index(rbd_dev, snap_id); 936 937 if (which == BAD_SNAP_INDEX) 937 - return NULL; 938 + return ERR_PTR(-ENOENT); 938 939 939 - return _rbd_dev_v1_snap_name(rbd_dev, which); 940 + snap_name = _rbd_dev_v1_snap_name(rbd_dev, which); 941 + return snap_name ? snap_name : ERR_PTR(-ENOMEM); 940 942 } 941 943 942 944 static const char *rbd_snap_name(struct rbd_device *rbd_dev, u64 snap_id) ··· 2814 2812 obj_request_done_set(obj_request); 2815 2813 } 2816 2814 2817 - static int rbd_obj_notify_ack(struct rbd_device *rbd_dev, u64 notify_id) 2815 + static int rbd_obj_notify_ack_sync(struct rbd_device *rbd_dev, u64 notify_id) 2818 2816 { 2819 2817 struct rbd_obj_request *obj_request; 2820 2818 struct ceph_osd_client *osdc = &rbd_dev->rbd_client->client->osdc; ··· 2829 2827 obj_request->osd_req = rbd_osd_req_create(rbd_dev, false, obj_request); 2830 2828 if (!obj_request->osd_req) 2831 2829 goto out; 2832 - obj_request->callback = rbd_obj_request_put; 2833 2830 2834 2831 osd_req_op_watch_init(obj_request->osd_req, 0, CEPH_OSD_OP_NOTIFY_ACK, 2835 2832 notify_id, 0, 0); 2836 2833 rbd_osd_req_format_read(obj_request); 2837 2834 2838 2835 ret = rbd_obj_request_submit(osdc, obj_request); 2839 - out: 2840 2836 if (ret) 2841 - rbd_obj_request_put(obj_request); 2837 + goto out; 2838 + ret = rbd_obj_request_wait(obj_request); 2839 + out: 2840 + rbd_obj_request_put(obj_request); 2842 2841 2843 2842 return ret; 2844 2843 } ··· 2859 2856 if (ret) 2860 2857 rbd_warn(rbd_dev, "header refresh error (%d)\n", ret); 2861 2858 2862 - rbd_obj_notify_ack(rbd_dev, notify_id); 2859 + rbd_obj_notify_ack_sync(rbd_dev, notify_id); 2863 2860 } 2864 2861 2865 2862 /* ··· 3331 3328 clear_bit(RBD_DEV_FLAG_EXISTS, &rbd_dev->flags); 3332 3329 } 3333 3330 3331 + static void rbd_dev_update_size(struct rbd_device *rbd_dev) 3332 + { 3333 + sector_t size; 3334 + bool removing; 3335 + 3336 + /* 3337 + * Don't hold the lock while doing disk operations, 3338 + * or lock ordering will conflict with the bdev mutex via: 3339 + * rbd_add() -> blkdev_get() -> rbd_open() 3340 + */ 3341 + spin_lock_irq(&rbd_dev->lock); 3342 + removing = test_bit(RBD_DEV_FLAG_REMOVING, &rbd_dev->flags); 3343 + spin_unlock_irq(&rbd_dev->lock); 3344 + /* 3345 + * If the device is being removed, rbd_dev->disk has 3346 + * been destroyed, so don't try to update its size 3347 + */ 3348 + if (!removing) { 3349 + size = (sector_t)rbd_dev->mapping.size / SECTOR_SIZE; 3350 + dout("setting size to %llu sectors", (unsigned long long)size); 3351 + set_capacity(rbd_dev->disk, size); 3352 + revalidate_disk(rbd_dev->disk); 3353 + } 3354 + } 3355 + 3334 3356 static int rbd_dev_refresh(struct rbd_device *rbd_dev) 3335 3357 { 3336 3358 u64 mapping_size; ··· 3375 3347 up_write(&rbd_dev->header_rwsem); 3376 3348 3377 3349 if (mapping_size != rbd_dev->mapping.size) { 3378 - sector_t size; 3379 - 3380 - size = (sector_t)rbd_dev->mapping.size / SECTOR_SIZE; 3381 - dout("setting size to %llu sectors", (unsigned long long)size); 3382 - set_capacity(rbd_dev->disk, size); 3383 - revalidate_disk(rbd_dev->disk); 3350 + rbd_dev_update_size(rbd_dev); 3384 3351 } 3385 3352 3386 3353 return ret; ··· 4084 4061 4085 4062 snap_id = snapc->snaps[which]; 4086 4063 snap_name = rbd_dev_v2_snap_name(rbd_dev, snap_id); 4087 - if (IS_ERR(snap_name)) 4088 - break; 4064 + if (IS_ERR(snap_name)) { 4065 + /* ignore no-longer existing snapshots */ 4066 + if (PTR_ERR(snap_name) == -ENOENT) 4067 + continue; 4068 + else 4069 + break; 4070 + } 4089 4071 found = !strcmp(name, snap_name); 4090 4072 kfree(snap_name); 4091 4073 } ··· 4169 4141 /* Look up the snapshot name, and make a copy */ 4170 4142 4171 4143 snap_name = rbd_snap_name(rbd_dev, spec->snap_id); 4172 - if (!snap_name) { 4173 - ret = -ENOMEM; 4144 + if (IS_ERR(snap_name)) { 4145 + ret = PTR_ERR(snap_name); 4174 4146 goto out_err; 4175 4147 } 4176 4148 ··· 5191 5163 if (ret < 0 || already) 5192 5164 return ret; 5193 5165 5194 - rbd_bus_del_dev(rbd_dev); 5195 5166 ret = rbd_dev_header_watch_sync(rbd_dev, false); 5196 5167 if (ret) 5197 5168 rbd_warn(rbd_dev, "failed to cancel watch event (%d)\n", ret); 5169 + 5170 + /* 5171 + * flush remaining watch callbacks - these must be complete 5172 + * before the osd_client is shutdown 5173 + */ 5174 + dout("%s: flushing notifies", __func__); 5175 + ceph_osdc_flush_notifies(&rbd_dev->rbd_client->client->osdc); 5176 + /* 5177 + * Don't free anything from rbd_dev->disk until after all 5178 + * notifies are completely processed. Otherwise 5179 + * rbd_bus_del_dev() will race with rbd_watch_cb(), resulting 5180 + * in a potential use after free of rbd_dev->disk or rbd_dev. 5181 + */ 5182 + rbd_bus_del_dev(rbd_dev); 5198 5183 rbd_dev_image_release(rbd_dev); 5199 5184 module_put(THIS_MODULE); 5200 5185
+2 -1
fs/fscache/cookie.c
··· 586 586 587 587 fscache_operation_init(op, NULL, NULL); 588 588 op->flags = FSCACHE_OP_MYTHREAD | 589 - (1 << FSCACHE_OP_WAITING); 589 + (1 << FSCACHE_OP_WAITING) | 590 + (1 << FSCACHE_OP_UNUSE_COOKIE); 590 591 591 592 spin_lock(&cookie->lock); 592 593
+2
include/linux/ceph/osd_client.h
··· 335 335 struct ceph_osd_request *req); 336 336 extern void ceph_osdc_sync(struct ceph_osd_client *osdc); 337 337 338 + extern void ceph_osdc_flush_notifies(struct ceph_osd_client *osdc); 339 + 338 340 extern int ceph_osdc_readpages(struct ceph_osd_client *osdc, 339 341 struct ceph_vino vino, 340 342 struct ceph_file_layout *layout,
+11
net/ceph/osd_client.c
··· 2216 2216 EXPORT_SYMBOL(ceph_osdc_sync); 2217 2217 2218 2218 /* 2219 + * Call all pending notify callbacks - for use after a watch is 2220 + * unregistered, to make sure no more callbacks for it will be invoked 2221 + */ 2222 + extern void ceph_osdc_flush_notifies(struct ceph_osd_client *osdc) 2223 + { 2224 + flush_workqueue(osdc->notify_wq); 2225 + } 2226 + EXPORT_SYMBOL(ceph_osdc_flush_notifies); 2227 + 2228 + 2229 + /* 2219 2230 * init, shutdown 2220 2231 */ 2221 2232 int ceph_osdc_init(struct ceph_osd_client *osdc, struct ceph_client *client)