Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

Merge tag 'ceph-for-5.18-rc8' of https://github.com/ceph/ceph-client

Pull ceph fix from Ilya Dryomov:
"A fix for a nasty use-after-free, marked for stable"

* tag 'ceph-for-5.18-rc8' of https://github.com/ceph/ceph-client:
libceph: fix misleading ceph_osdc_cancel_request() comment
libceph: fix potential use-after-free on linger ping and resends

+133 -189
+3
include/linux/ceph/osd_client.h
··· 287 287 rados_watcherrcb_t errcb; 288 288 void *data; 289 289 290 + struct ceph_pagelist *request_pl; 291 + struct page **notify_id_pages; 292 + 290 293 struct page ***preply_pages; 291 294 size_t *preply_len; 292 295 };
+130 -189
net/ceph/osd_client.c
··· 537 537 target_init(&req->r_t); 538 538 } 539 539 540 - /* 541 - * This is ugly, but it allows us to reuse linger registration and ping 542 - * requests, keeping the structure of the code around send_linger{_ping}() 543 - * reasonable. Setting up a min_nr=2 mempool for each linger request 544 - * and dealing with copying ops (this blasts req only, watch op remains 545 - * intact) isn't any better. 546 - */ 547 - static void request_reinit(struct ceph_osd_request *req) 548 - { 549 - struct ceph_osd_client *osdc = req->r_osdc; 550 - bool mempool = req->r_mempool; 551 - unsigned int num_ops = req->r_num_ops; 552 - u64 snapid = req->r_snapid; 553 - struct ceph_snap_context *snapc = req->r_snapc; 554 - bool linger = req->r_linger; 555 - struct ceph_msg *request_msg = req->r_request; 556 - struct ceph_msg *reply_msg = req->r_reply; 557 - 558 - dout("%s req %p\n", __func__, req); 559 - WARN_ON(kref_read(&req->r_kref) != 1); 560 - request_release_checks(req); 561 - 562 - WARN_ON(kref_read(&request_msg->kref) != 1); 563 - WARN_ON(kref_read(&reply_msg->kref) != 1); 564 - target_destroy(&req->r_t); 565 - 566 - request_init(req); 567 - req->r_osdc = osdc; 568 - req->r_mempool = mempool; 569 - req->r_num_ops = num_ops; 570 - req->r_snapid = snapid; 571 - req->r_snapc = snapc; 572 - req->r_linger = linger; 573 - req->r_request = request_msg; 574 - req->r_reply = reply_msg; 575 - } 576 - 577 540 struct ceph_osd_request *ceph_osdc_alloc_request(struct ceph_osd_client *osdc, 578 541 struct ceph_snap_context *snapc, 579 542 unsigned int num_ops, ··· 881 918 * @watch_opcode: CEPH_OSD_WATCH_OP_* 882 919 */ 883 920 static void osd_req_op_watch_init(struct ceph_osd_request *req, int which, 884 - u64 cookie, u8 watch_opcode) 921 + u8 watch_opcode, u64 cookie, u32 gen) 885 922 { 886 923 struct ceph_osd_req_op *op; 887 924 888 925 op = osd_req_op_init(req, which, CEPH_OSD_OP_WATCH, 0); 889 926 op->watch.cookie = cookie; 890 927 op->watch.op = watch_opcode; 891 - op->watch.gen = 0; 928 + op->watch.gen = gen; 929 + } 930 + 931 + /* 932 + * prot_ver, timeout and notify payload (may be empty) should already be 933 + * encoded in @request_pl 934 + */ 935 + static void osd_req_op_notify_init(struct ceph_osd_request *req, int which, 936 + u64 cookie, struct ceph_pagelist *request_pl) 937 + { 938 + struct ceph_osd_req_op *op; 939 + 940 + op = osd_req_op_init(req, which, CEPH_OSD_OP_NOTIFY, 0); 941 + op->notify.cookie = cookie; 942 + 943 + ceph_osd_data_pagelist_init(&op->notify.request_data, request_pl); 944 + op->indata_len = request_pl->length; 892 945 } 893 946 894 947 /* ··· 2710 2731 WARN_ON(!list_empty(&lreq->pending_lworks)); 2711 2732 WARN_ON(lreq->osd); 2712 2733 2713 - if (lreq->reg_req) 2714 - ceph_osdc_put_request(lreq->reg_req); 2715 - if (lreq->ping_req) 2716 - ceph_osdc_put_request(lreq->ping_req); 2734 + if (lreq->request_pl) 2735 + ceph_pagelist_release(lreq->request_pl); 2736 + if (lreq->notify_id_pages) 2737 + ceph_release_page_vector(lreq->notify_id_pages, 1); 2738 + 2739 + ceph_osdc_put_request(lreq->reg_req); 2740 + ceph_osdc_put_request(lreq->ping_req); 2717 2741 target_destroy(&lreq->t); 2718 2742 kfree(lreq); 2719 2743 } ··· 2985 3003 struct ceph_osd_linger_request *lreq = req->r_priv; 2986 3004 2987 3005 mutex_lock(&lreq->lock); 3006 + if (req != lreq->reg_req) { 3007 + dout("%s lreq %p linger_id %llu unknown req (%p != %p)\n", 3008 + __func__, lreq, lreq->linger_id, req, lreq->reg_req); 3009 + goto out; 3010 + } 3011 + 2988 3012 dout("%s lreq %p linger_id %llu result %d\n", __func__, lreq, 2989 3013 lreq->linger_id, req->r_result); 2990 3014 linger_reg_commit_complete(lreq, req->r_result); ··· 3014 3026 } 3015 3027 } 3016 3028 3029 + out: 3017 3030 mutex_unlock(&lreq->lock); 3018 3031 linger_put(lreq); 3019 3032 } ··· 3037 3048 struct ceph_osd_linger_request *lreq = req->r_priv; 3038 3049 3039 3050 mutex_lock(&lreq->lock); 3051 + if (req != lreq->reg_req) { 3052 + dout("%s lreq %p linger_id %llu unknown req (%p != %p)\n", 3053 + __func__, lreq, lreq->linger_id, req, lreq->reg_req); 3054 + goto out; 3055 + } 3056 + 3040 3057 dout("%s lreq %p linger_id %llu result %d last_error %d\n", __func__, 3041 3058 lreq, lreq->linger_id, req->r_result, lreq->last_error); 3042 3059 if (req->r_result < 0) { ··· 3052 3057 } 3053 3058 } 3054 3059 3060 + out: 3055 3061 mutex_unlock(&lreq->lock); 3056 3062 linger_put(lreq); 3057 3063 } 3058 3064 3059 3065 static void send_linger(struct ceph_osd_linger_request *lreq) 3060 3066 { 3061 - struct ceph_osd_request *req = lreq->reg_req; 3062 - struct ceph_osd_req_op *op = &req->r_ops[0]; 3067 + struct ceph_osd_client *osdc = lreq->osdc; 3068 + struct ceph_osd_request *req; 3069 + int ret; 3063 3070 3064 - verify_osdc_wrlocked(req->r_osdc); 3071 + verify_osdc_wrlocked(osdc); 3072 + mutex_lock(&lreq->lock); 3065 3073 dout("%s lreq %p linger_id %llu\n", __func__, lreq, lreq->linger_id); 3066 3074 3067 - if (req->r_osd) 3068 - cancel_linger_request(req); 3075 + if (lreq->reg_req) { 3076 + if (lreq->reg_req->r_osd) 3077 + cancel_linger_request(lreq->reg_req); 3078 + ceph_osdc_put_request(lreq->reg_req); 3079 + } 3069 3080 3070 - request_reinit(req); 3081 + req = ceph_osdc_alloc_request(osdc, NULL, 1, true, GFP_NOIO); 3082 + BUG_ON(!req); 3083 + 3071 3084 target_copy(&req->r_t, &lreq->t); 3072 3085 req->r_mtime = lreq->mtime; 3073 3086 3074 - mutex_lock(&lreq->lock); 3075 3087 if (lreq->is_watch && lreq->committed) { 3076 - WARN_ON(op->op != CEPH_OSD_OP_WATCH || 3077 - op->watch.cookie != lreq->linger_id); 3078 - op->watch.op = CEPH_OSD_WATCH_OP_RECONNECT; 3079 - op->watch.gen = ++lreq->register_gen; 3088 + osd_req_op_watch_init(req, 0, CEPH_OSD_WATCH_OP_RECONNECT, 3089 + lreq->linger_id, ++lreq->register_gen); 3080 3090 dout("lreq %p reconnect register_gen %u\n", lreq, 3081 - op->watch.gen); 3091 + req->r_ops[0].watch.gen); 3082 3092 req->r_callback = linger_reconnect_cb; 3083 3093 } else { 3084 - if (!lreq->is_watch) 3094 + if (lreq->is_watch) { 3095 + osd_req_op_watch_init(req, 0, CEPH_OSD_WATCH_OP_WATCH, 3096 + lreq->linger_id, 0); 3097 + } else { 3085 3098 lreq->notify_id = 0; 3086 - else 3087 - WARN_ON(op->watch.op != CEPH_OSD_WATCH_OP_WATCH); 3099 + 3100 + refcount_inc(&lreq->request_pl->refcnt); 3101 + osd_req_op_notify_init(req, 0, lreq->linger_id, 3102 + lreq->request_pl); 3103 + ceph_osd_data_pages_init( 3104 + osd_req_op_data(req, 0, notify, response_data), 3105 + lreq->notify_id_pages, PAGE_SIZE, 0, false, false); 3106 + } 3088 3107 dout("lreq %p register\n", lreq); 3089 3108 req->r_callback = linger_commit_cb; 3090 3109 } 3091 - mutex_unlock(&lreq->lock); 3110 + 3111 + ret = ceph_osdc_alloc_messages(req, GFP_NOIO); 3112 + BUG_ON(ret); 3092 3113 3093 3114 req->r_priv = linger_get(lreq); 3094 3115 req->r_linger = true; 3116 + lreq->reg_req = req; 3117 + mutex_unlock(&lreq->lock); 3095 3118 3096 3119 submit_request(req, true); 3097 3120 } ··· 3119 3106 struct ceph_osd_linger_request *lreq = req->r_priv; 3120 3107 3121 3108 mutex_lock(&lreq->lock); 3109 + if (req != lreq->ping_req) { 3110 + dout("%s lreq %p linger_id %llu unknown req (%p != %p)\n", 3111 + __func__, lreq, lreq->linger_id, req, lreq->ping_req); 3112 + goto out; 3113 + } 3114 + 3122 3115 dout("%s lreq %p linger_id %llu result %d ping_sent %lu last_error %d\n", 3123 3116 __func__, lreq, lreq->linger_id, req->r_result, lreq->ping_sent, 3124 3117 lreq->last_error); ··· 3140 3121 lreq->register_gen, req->r_ops[0].watch.gen); 3141 3122 } 3142 3123 3124 + out: 3143 3125 mutex_unlock(&lreq->lock); 3144 3126 linger_put(lreq); 3145 3127 } ··· 3148 3128 static void send_linger_ping(struct ceph_osd_linger_request *lreq) 3149 3129 { 3150 3130 struct ceph_osd_client *osdc = lreq->osdc; 3151 - struct ceph_osd_request *req = lreq->ping_req; 3152 - struct ceph_osd_req_op *op = &req->r_ops[0]; 3131 + struct ceph_osd_request *req; 3132 + int ret; 3153 3133 3154 3134 if (ceph_osdmap_flag(osdc, CEPH_OSDMAP_PAUSERD)) { 3155 3135 dout("%s PAUSERD\n", __func__); ··· 3161 3141 __func__, lreq, lreq->linger_id, lreq->ping_sent, 3162 3142 lreq->register_gen); 3163 3143 3164 - if (req->r_osd) 3165 - cancel_linger_request(req); 3144 + if (lreq->ping_req) { 3145 + if (lreq->ping_req->r_osd) 3146 + cancel_linger_request(lreq->ping_req); 3147 + ceph_osdc_put_request(lreq->ping_req); 3148 + } 3166 3149 3167 - request_reinit(req); 3150 + req = ceph_osdc_alloc_request(osdc, NULL, 1, true, GFP_NOIO); 3151 + BUG_ON(!req); 3152 + 3168 3153 target_copy(&req->r_t, &lreq->t); 3169 - 3170 - WARN_ON(op->op != CEPH_OSD_OP_WATCH || 3171 - op->watch.cookie != lreq->linger_id || 3172 - op->watch.op != CEPH_OSD_WATCH_OP_PING); 3173 - op->watch.gen = lreq->register_gen; 3154 + osd_req_op_watch_init(req, 0, CEPH_OSD_WATCH_OP_PING, lreq->linger_id, 3155 + lreq->register_gen); 3174 3156 req->r_callback = linger_ping_cb; 3157 + 3158 + ret = ceph_osdc_alloc_messages(req, GFP_NOIO); 3159 + BUG_ON(ret); 3160 + 3175 3161 req->r_priv = linger_get(lreq); 3176 3162 req->r_linger = true; 3163 + lreq->ping_req = req; 3177 3164 3178 3165 ceph_osdc_get_request(req); 3179 3166 account_request(req); ··· 3196 3169 3197 3170 down_write(&osdc->lock); 3198 3171 linger_register(lreq); 3199 - if (lreq->is_watch) { 3200 - lreq->reg_req->r_ops[0].watch.cookie = lreq->linger_id; 3201 - lreq->ping_req->r_ops[0].watch.cookie = lreq->linger_id; 3202 - } else { 3203 - lreq->reg_req->r_ops[0].notify.cookie = lreq->linger_id; 3204 - } 3205 3172 3206 3173 calc_target(osdc, &lreq->t, false); 3207 3174 osd = lookup_create_osd(osdc, lreq->t.osd, true); ··· 3227 3206 */ 3228 3207 static void __linger_cancel(struct ceph_osd_linger_request *lreq) 3229 3208 { 3230 - if (lreq->is_watch && lreq->ping_req->r_osd) 3209 + if (lreq->ping_req && lreq->ping_req->r_osd) 3231 3210 cancel_linger_request(lreq->ping_req); 3232 - if (lreq->reg_req->r_osd) 3211 + if (lreq->reg_req && lreq->reg_req->r_osd) 3233 3212 cancel_linger_request(lreq->reg_req); 3234 3213 cancel_linger_map_check(lreq); 3235 3214 unlink_linger(lreq->osd, lreq); ··· 4591 4570 EXPORT_SYMBOL(ceph_osdc_start_request); 4592 4571 4593 4572 /* 4594 - * Unregister a registered request. The request is not completed: 4595 - * ->r_result isn't set and __complete_request() isn't called. 4573 + * Unregister request. If @req was registered, it isn't completed: 4574 + * r_result isn't set and __complete_request() isn't invoked. 4575 + * 4576 + * If @req wasn't registered, this call may have raced with 4577 + * handle_reply(), in which case r_result would already be set and 4578 + * __complete_request() would be getting invoked, possibly even 4579 + * concurrently with this call. 4596 4580 */ 4597 4581 void ceph_osdc_cancel_request(struct ceph_osd_request *req) 4598 4582 { ··· 4683 4657 } 4684 4658 EXPORT_SYMBOL(ceph_osdc_sync); 4685 4659 4686 - static struct ceph_osd_request * 4687 - alloc_linger_request(struct ceph_osd_linger_request *lreq) 4688 - { 4689 - struct ceph_osd_request *req; 4690 - 4691 - req = ceph_osdc_alloc_request(lreq->osdc, NULL, 1, false, GFP_NOIO); 4692 - if (!req) 4693 - return NULL; 4694 - 4695 - ceph_oid_copy(&req->r_base_oid, &lreq->t.base_oid); 4696 - ceph_oloc_copy(&req->r_base_oloc, &lreq->t.base_oloc); 4697 - return req; 4698 - } 4699 - 4700 - static struct ceph_osd_request * 4701 - alloc_watch_request(struct ceph_osd_linger_request *lreq, u8 watch_opcode) 4702 - { 4703 - struct ceph_osd_request *req; 4704 - 4705 - req = alloc_linger_request(lreq); 4706 - if (!req) 4707 - return NULL; 4708 - 4709 - /* 4710 - * Pass 0 for cookie because we don't know it yet, it will be 4711 - * filled in by linger_submit(). 4712 - */ 4713 - osd_req_op_watch_init(req, 0, 0, watch_opcode); 4714 - 4715 - if (ceph_osdc_alloc_messages(req, GFP_NOIO)) { 4716 - ceph_osdc_put_request(req); 4717 - return NULL; 4718 - } 4719 - 4720 - return req; 4721 - } 4722 - 4723 4660 /* 4724 4661 * Returns a handle, caller owns a ref. 4725 4662 */ ··· 4711 4722 ceph_oloc_copy(&lreq->t.base_oloc, oloc); 4712 4723 lreq->t.flags = CEPH_OSD_FLAG_WRITE; 4713 4724 ktime_get_real_ts64(&lreq->mtime); 4714 - 4715 - lreq->reg_req = alloc_watch_request(lreq, CEPH_OSD_WATCH_OP_WATCH); 4716 - if (!lreq->reg_req) { 4717 - ret = -ENOMEM; 4718 - goto err_put_lreq; 4719 - } 4720 - 4721 - lreq->ping_req = alloc_watch_request(lreq, CEPH_OSD_WATCH_OP_PING); 4722 - if (!lreq->ping_req) { 4723 - ret = -ENOMEM; 4724 - goto err_put_lreq; 4725 - } 4726 4725 4727 4726 linger_submit(lreq); 4728 4727 ret = linger_reg_commit_wait(lreq); ··· 4749 4772 ceph_oloc_copy(&req->r_base_oloc, &lreq->t.base_oloc); 4750 4773 req->r_flags = CEPH_OSD_FLAG_WRITE; 4751 4774 ktime_get_real_ts64(&req->r_mtime); 4752 - osd_req_op_watch_init(req, 0, lreq->linger_id, 4753 - CEPH_OSD_WATCH_OP_UNWATCH); 4775 + osd_req_op_watch_init(req, 0, CEPH_OSD_WATCH_OP_UNWATCH, 4776 + lreq->linger_id, 0); 4754 4777 4755 4778 ret = ceph_osdc_alloc_messages(req, GFP_NOIO); 4756 4779 if (ret) ··· 4836 4859 } 4837 4860 EXPORT_SYMBOL(ceph_osdc_notify_ack); 4838 4861 4839 - static int osd_req_op_notify_init(struct ceph_osd_request *req, int which, 4840 - u64 cookie, u32 prot_ver, u32 timeout, 4841 - void *payload, u32 payload_len) 4842 - { 4843 - struct ceph_osd_req_op *op; 4844 - struct ceph_pagelist *pl; 4845 - int ret; 4846 - 4847 - op = osd_req_op_init(req, which, CEPH_OSD_OP_NOTIFY, 0); 4848 - op->notify.cookie = cookie; 4849 - 4850 - pl = ceph_pagelist_alloc(GFP_NOIO); 4851 - if (!pl) 4852 - return -ENOMEM; 4853 - 4854 - ret = ceph_pagelist_encode_32(pl, 1); /* prot_ver */ 4855 - ret |= ceph_pagelist_encode_32(pl, timeout); 4856 - ret |= ceph_pagelist_encode_32(pl, payload_len); 4857 - ret |= ceph_pagelist_append(pl, payload, payload_len); 4858 - if (ret) { 4859 - ceph_pagelist_release(pl); 4860 - return -ENOMEM; 4861 - } 4862 - 4863 - ceph_osd_data_pagelist_init(&op->notify.request_data, pl); 4864 - op->indata_len = pl->length; 4865 - return 0; 4866 - } 4867 - 4868 4862 /* 4869 4863 * @timeout: in seconds 4870 4864 * ··· 4854 4906 size_t *preply_len) 4855 4907 { 4856 4908 struct ceph_osd_linger_request *lreq; 4857 - struct page **pages; 4858 4909 int ret; 4859 4910 4860 4911 WARN_ON(!timeout); ··· 4866 4919 if (!lreq) 4867 4920 return -ENOMEM; 4868 4921 4922 + lreq->request_pl = ceph_pagelist_alloc(GFP_NOIO); 4923 + if (!lreq->request_pl) { 4924 + ret = -ENOMEM; 4925 + goto out_put_lreq; 4926 + } 4927 + 4928 + ret = ceph_pagelist_encode_32(lreq->request_pl, 1); /* prot_ver */ 4929 + ret |= ceph_pagelist_encode_32(lreq->request_pl, timeout); 4930 + ret |= ceph_pagelist_encode_32(lreq->request_pl, payload_len); 4931 + ret |= ceph_pagelist_append(lreq->request_pl, payload, payload_len); 4932 + if (ret) { 4933 + ret = -ENOMEM; 4934 + goto out_put_lreq; 4935 + } 4936 + 4937 + /* for notify_id */ 4938 + lreq->notify_id_pages = ceph_alloc_page_vector(1, GFP_NOIO); 4939 + if (IS_ERR(lreq->notify_id_pages)) { 4940 + ret = PTR_ERR(lreq->notify_id_pages); 4941 + lreq->notify_id_pages = NULL; 4942 + goto out_put_lreq; 4943 + } 4944 + 4869 4945 lreq->preply_pages = preply_pages; 4870 4946 lreq->preply_len = preply_len; 4871 4947 4872 4948 ceph_oid_copy(&lreq->t.base_oid, oid); 4873 4949 ceph_oloc_copy(&lreq->t.base_oloc, oloc); 4874 4950 lreq->t.flags = CEPH_OSD_FLAG_READ; 4875 - 4876 - lreq->reg_req = alloc_linger_request(lreq); 4877 - if (!lreq->reg_req) { 4878 - ret = -ENOMEM; 4879 - goto out_put_lreq; 4880 - } 4881 - 4882 - /* 4883 - * Pass 0 for cookie because we don't know it yet, it will be 4884 - * filled in by linger_submit(). 4885 - */ 4886 - ret = osd_req_op_notify_init(lreq->reg_req, 0, 0, 1, timeout, 4887 - payload, payload_len); 4888 - if (ret) 4889 - goto out_put_lreq; 4890 - 4891 - /* for notify_id */ 4892 - pages = ceph_alloc_page_vector(1, GFP_NOIO); 4893 - if (IS_ERR(pages)) { 4894 - ret = PTR_ERR(pages); 4895 - goto out_put_lreq; 4896 - } 4897 - ceph_osd_data_pages_init(osd_req_op_data(lreq->reg_req, 0, notify, 4898 - response_data), 4899 - pages, PAGE_SIZE, 0, false, true); 4900 - 4901 - ret = ceph_osdc_alloc_messages(lreq->reg_req, GFP_NOIO); 4902 - if (ret) 4903 - goto out_put_lreq; 4904 4951 4905 4952 linger_submit(lreq); 4906 4953 ret = linger_reg_commit_wait(lreq);