Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

RDMA/hns: Optimize the base address table config for MTR

The base address table is allocated by dma allocator, and the size is
always aligned to PAGE_SIZE. If a fixed size is used to allocate the
table, the number of base address entries stored in the table will be
smaller than that can actually stored.

Link: https://lore.kernel.org/r/1621589395-2435-2-git-send-email-liweihang@huawei.com
Signed-off-by: Xi Wang <wangxi11@huawei.com>
Signed-off-by: Weihang Li <liweihang@huawei.com>
Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>

authored by

Xi Wang and committed by
Jason Gunthorpe
7b0006db 9ecf6ac1

+67 -53
+20 -29
drivers/infiniband/hw/hns/hns_roce_alloc.c
··· 208 208 209 209 /* Calc the trunk size and num by required size and page_shift */ 210 210 if (flags & HNS_ROCE_BUF_DIRECT) { 211 - buf->trunk_shift = ilog2(ALIGN(size, PAGE_SIZE)); 211 + buf->trunk_shift = order_base_2(ALIGN(size, PAGE_SIZE)); 212 212 ntrunk = 1; 213 213 } else { 214 - buf->trunk_shift = ilog2(ALIGN(page_size, PAGE_SIZE)); 214 + buf->trunk_shift = order_base_2(ALIGN(page_size, PAGE_SIZE)); 215 215 ntrunk = DIV_ROUND_UP(size, 1 << buf->trunk_shift); 216 216 } 217 217 ··· 252 252 } 253 253 254 254 int hns_roce_get_kmem_bufs(struct hns_roce_dev *hr_dev, dma_addr_t *bufs, 255 - int buf_cnt, int start, struct hns_roce_buf *buf) 255 + int buf_cnt, struct hns_roce_buf *buf, 256 + unsigned int page_shift) 256 257 { 257 - int i, end; 258 - int total; 258 + unsigned int offset, max_size; 259 + int total = 0; 260 + int i; 259 261 260 - end = start + buf_cnt; 261 - if (end > buf->npages) { 262 - dev_err(hr_dev->dev, 263 - "failed to check kmem bufs, end %d + %d total %u!\n", 264 - start, buf_cnt, buf->npages); 262 + if (page_shift > buf->trunk_shift) { 263 + dev_err(hr_dev->dev, "failed to check kmem buf shift %u > %u\n", 264 + page_shift, buf->trunk_shift); 265 265 return -EINVAL; 266 266 } 267 267 268 - total = 0; 269 - for (i = start; i < end; i++) 270 - bufs[total++] = hns_roce_buf_page(buf, i); 268 + offset = 0; 269 + max_size = buf->ntrunks << buf->trunk_shift; 270 + for (i = 0; i < buf_cnt && offset < max_size; i++) { 271 + bufs[total++] = hns_roce_buf_dma_addr(buf, offset); 272 + offset += (1 << page_shift); 273 + } 271 274 272 275 return total; 273 276 } 274 277 275 278 int hns_roce_get_umem_bufs(struct hns_roce_dev *hr_dev, dma_addr_t *bufs, 276 - int buf_cnt, int start, struct ib_umem *umem, 279 + int buf_cnt, struct ib_umem *umem, 277 280 unsigned int page_shift) 278 281 { 279 282 struct ib_block_iter biter; 280 283 int total = 0; 281 - int idx = 0; 282 - u64 addr; 283 - 284 - if (page_shift < HNS_HW_PAGE_SHIFT) { 285 - dev_err(hr_dev->dev, "failed to check umem page shift %u!\n", 286 - page_shift); 287 - return -EINVAL; 288 - } 289 284 290 285 /* convert system page cnt to hw page cnt */ 291 286 rdma_umem_for_each_dma_block(umem, &biter, 1 << page_shift) { 292 - addr = rdma_block_iter_dma_address(&biter); 293 - if (idx >= start) { 294 - bufs[total++] = addr; 295 - if (total >= buf_cnt) 296 - goto done; 297 - } 298 - idx++; 287 + bufs[total++] = rdma_block_iter_dma_address(&biter); 288 + if (total >= buf_cnt) 289 + goto done; 299 290 } 300 291 301 292 done:
+2 -2
drivers/infiniband/hw/hns/hns_roce_cq.c
··· 202 202 struct hns_roce_buf_attr buf_attr = {}; 203 203 int ret; 204 204 205 - buf_attr.page_shift = hr_dev->caps.cqe_buf_pg_sz + HNS_HW_PAGE_SHIFT; 205 + buf_attr.page_shift = hr_dev->caps.cqe_buf_pg_sz + PAGE_SHIFT; 206 206 buf_attr.region[0].size = hr_cq->cq_depth * hr_cq->cqe_size; 207 207 buf_attr.region[0].hopnum = hr_dev->caps.cqe_hop_num; 208 208 buf_attr.region_count = 1; 209 209 210 210 ret = hns_roce_mtr_create(hr_dev, &hr_cq->mtr, &buf_attr, 211 - hr_dev->caps.cqe_ba_pg_sz + HNS_HW_PAGE_SHIFT, 211 + hr_dev->caps.cqe_ba_pg_sz + PAGE_SHIFT, 212 212 udata, addr); 213 213 if (ret) 214 214 ibdev_err(ibdev, "failed to alloc CQ mtr, ret = %d.\n", ret);
+10 -5
drivers/infiniband/hw/hns/hns_roce_device.h
··· 1060 1060 (offset & ((1 << buf->trunk_shift) - 1)); 1061 1061 } 1062 1062 1063 - static inline dma_addr_t hns_roce_buf_page(struct hns_roce_buf *buf, u32 idx) 1063 + static inline dma_addr_t hns_roce_buf_dma_addr(struct hns_roce_buf *buf, 1064 + unsigned int offset) 1064 1065 { 1065 - unsigned int offset = idx << buf->page_shift; 1066 - 1067 1066 return buf->trunk_list[offset >> buf->trunk_shift].map + 1068 1067 (offset & ((1 << buf->trunk_shift) - 1)); 1068 + } 1069 + 1070 + static inline dma_addr_t hns_roce_buf_page(struct hns_roce_buf *buf, u32 idx) 1071 + { 1072 + return hns_roce_buf_dma_addr(buf, idx << buf->page_shift); 1069 1073 } 1070 1074 1071 1075 #define hr_hw_page_align(x) ALIGN(x, 1 << HNS_HW_PAGE_SHIFT) ··· 1208 1204 u32 page_shift, u32 flags); 1209 1205 1210 1206 int hns_roce_get_kmem_bufs(struct hns_roce_dev *hr_dev, dma_addr_t *bufs, 1211 - int buf_cnt, int start, struct hns_roce_buf *buf); 1207 + int buf_cnt, struct hns_roce_buf *buf, 1208 + unsigned int page_shift); 1212 1209 int hns_roce_get_umem_bufs(struct hns_roce_dev *hr_dev, dma_addr_t *bufs, 1213 - int buf_cnt, int start, struct ib_umem *umem, 1210 + int buf_cnt, struct ib_umem *umem, 1214 1211 unsigned int page_shift); 1215 1212 1216 1213 int hns_roce_create_srq(struct ib_srq *srq,
+22 -4
drivers/infiniband/hw/hns/hns_roce_hw_v2.c
··· 2018 2018 caps->llm_buf_pg_sz = 0; 2019 2019 2020 2020 /* MR */ 2021 + caps->mpt_ba_pg_sz = 0; 2022 + caps->mpt_buf_pg_sz = 0; 2021 2023 caps->pbl_ba_pg_sz = HNS_ROCE_BA_PG_SZ_SUPPORTED_16K; 2022 2024 caps->pbl_buf_pg_sz = 0; 2023 2025 calc_pg_sz(caps->num_mtpts, caps->mtpt_entry_sz, caps->mpt_hop_num, ··· 2027 2025 HEM_TYPE_MTPT); 2028 2026 2029 2027 /* QP */ 2030 - caps->qpc_timer_ba_pg_sz = 0; 2028 + caps->qpc_ba_pg_sz = 0; 2029 + caps->qpc_buf_pg_sz = 0; 2030 + caps->qpc_timer_ba_pg_sz = 0; 2031 2031 caps->qpc_timer_buf_pg_sz = 0; 2032 + caps->sccc_ba_pg_sz = 0; 2033 + caps->sccc_buf_pg_sz = 0; 2032 2034 caps->mtt_ba_pg_sz = 0; 2033 2035 caps->mtt_buf_pg_sz = 0; 2034 2036 calc_pg_sz(caps->num_qps, caps->qpc_sz, caps->qpc_hop_num, ··· 2045 2039 &caps->sccc_ba_pg_sz, HEM_TYPE_SCCC); 2046 2040 2047 2041 /* CQ */ 2042 + caps->cqc_ba_pg_sz = 0; 2043 + caps->cqc_buf_pg_sz = 0; 2044 + caps->cqc_timer_ba_pg_sz = 0; 2045 + caps->cqc_timer_buf_pg_sz = 0; 2046 + caps->cqe_ba_pg_sz = HNS_ROCE_BA_PG_SZ_SUPPORTED_256K; 2047 + caps->cqe_buf_pg_sz = 0; 2048 2048 calc_pg_sz(caps->num_cqs, caps->cqc_entry_sz, caps->cqc_hop_num, 2049 2049 caps->cqc_bt_num, &caps->cqc_buf_pg_sz, &caps->cqc_ba_pg_sz, 2050 2050 HEM_TYPE_CQC); ··· 2065 2053 2066 2054 /* SRQ */ 2067 2055 if (caps->flags & HNS_ROCE_CAP_FLAG_SRQ) { 2056 + caps->srqc_ba_pg_sz = 0; 2057 + caps->srqc_buf_pg_sz = 0; 2058 + caps->srqwqe_ba_pg_sz = 0; 2059 + caps->srqwqe_buf_pg_sz = 0; 2060 + caps->idx_ba_pg_sz = 0; 2061 + caps->idx_buf_pg_sz = 0; 2068 2062 calc_pg_sz(caps->num_srqs, caps->srqc_entry_sz, 2069 2063 caps->srqc_hop_num, caps->srqc_bt_num, 2070 2064 &caps->srqc_buf_pg_sz, &caps->srqc_ba_pg_sz, ··· 6179 6161 else 6180 6162 eq->hop_num = hr_dev->caps.eqe_hop_num; 6181 6163 6182 - buf_attr.page_shift = hr_dev->caps.eqe_buf_pg_sz + HNS_HW_PAGE_SHIFT; 6164 + buf_attr.page_shift = hr_dev->caps.eqe_buf_pg_sz + PAGE_SHIFT; 6183 6165 buf_attr.region[0].size = eq->entries * eq->eqe_size; 6184 6166 buf_attr.region[0].hopnum = eq->hop_num; 6185 6167 buf_attr.region_count = 1; 6186 6168 6187 6169 err = hns_roce_mtr_create(hr_dev, &eq->mtr, &buf_attr, 6188 - hr_dev->caps.eqe_ba_pg_sz + 6189 - HNS_HW_PAGE_SHIFT, NULL, 0); 6170 + hr_dev->caps.eqe_ba_pg_sz + PAGE_SHIFT, NULL, 6171 + 0); 6190 6172 if (err) 6191 6173 dev_err(hr_dev->dev, "Failed to alloc EQE mtr, err %d\n", err); 6192 6174
+7 -7
drivers/infiniband/hw/hns/hns_roce_mr.c
··· 122 122 buf_attr.mtt_only = is_fast; 123 123 124 124 err = hns_roce_mtr_create(hr_dev, &mr->pbl_mtr, &buf_attr, 125 - hr_dev->caps.pbl_ba_pg_sz + HNS_HW_PAGE_SHIFT, 125 + hr_dev->caps.pbl_ba_pg_sz + PAGE_SHIFT, 126 126 udata, start); 127 127 if (err) 128 128 ibdev_err(ibdev, "failed to alloc pbl mtr, ret = %d.\n", err); ··· 737 737 return -ENOMEM; 738 738 739 739 if (mtr->umem) 740 - npage = hns_roce_get_umem_bufs(hr_dev, pages, page_count, 0, 740 + npage = hns_roce_get_umem_bufs(hr_dev, pages, page_count, 741 741 mtr->umem, page_shift); 742 742 else 743 - npage = hns_roce_get_kmem_bufs(hr_dev, pages, page_count, 0, 744 - mtr->kmem); 743 + npage = hns_roce_get_kmem_bufs(hr_dev, pages, page_count, 744 + mtr->kmem, page_shift); 745 745 746 746 if (npage != page_count) { 747 747 ibdev_err(ibdev, "failed to get mtr page %d != %d.\n", npage, ··· 753 753 if (mtr->hem_cfg.is_direct && npage > 1) { 754 754 ret = mtr_check_direct_pages(pages, npage, page_shift); 755 755 if (ret) { 756 - ibdev_err(ibdev, "failed to check %s mtr, idx = %d.\n", 757 - mtr->umem ? "user" : "kernel", ret); 756 + ibdev_err(ibdev, "failed to check %s page: %d / %d.\n", 757 + mtr->umem ? "umtr" : "kmtr", ret, npage); 758 758 ret = -ENOBUFS; 759 759 goto err_alloc_list; 760 760 } ··· 799 799 if (r->offset + r->count > page_cnt) { 800 800 ret = -EINVAL; 801 801 ibdev_err(ibdev, 802 - "failed to check mtr%u end %u + %u, max %u.\n", 802 + "failed to check mtr%u count %u + %u > %u.\n", 803 803 i, r->offset, r->count, page_cnt); 804 804 return ret; 805 805 }
+1 -1
drivers/infiniband/hw/hns/hns_roce_qp.c
··· 761 761 goto err_inline; 762 762 } 763 763 ret = hns_roce_mtr_create(hr_dev, &hr_qp->mtr, &buf_attr, 764 - HNS_HW_PAGE_SHIFT + hr_dev->caps.mtt_ba_pg_sz, 764 + PAGE_SHIFT + hr_dev->caps.mtt_ba_pg_sz, 765 765 udata, addr); 766 766 if (ret) { 767 767 ibdev_err(ibdev, "failed to create WQE mtr, ret = %d.\n", ret);
+5 -5
drivers/infiniband/hw/hns/hns_roce_srq.c
··· 167 167 168 168 srq->idx_que.entry_shift = ilog2(HNS_ROCE_IDX_QUE_ENTRY_SZ); 169 169 170 - buf_attr.page_shift = hr_dev->caps.idx_buf_pg_sz + HNS_HW_PAGE_SHIFT; 170 + buf_attr.page_shift = hr_dev->caps.idx_buf_pg_sz + PAGE_SHIFT; 171 171 buf_attr.region[0].size = to_hr_hem_entries_size(srq->wqe_cnt, 172 172 srq->idx_que.entry_shift); 173 173 buf_attr.region[0].hopnum = hr_dev->caps.idx_hop_num; 174 174 buf_attr.region_count = 1; 175 175 176 176 ret = hns_roce_mtr_create(hr_dev, &idx_que->mtr, &buf_attr, 177 - hr_dev->caps.idx_ba_pg_sz + HNS_HW_PAGE_SHIFT, 177 + hr_dev->caps.idx_ba_pg_sz + PAGE_SHIFT, 178 178 udata, addr); 179 179 if (ret) { 180 180 ibdev_err(ibdev, ··· 222 222 HNS_ROCE_SGE_SIZE * 223 223 srq->max_gs))); 224 224 225 - buf_attr.page_shift = hr_dev->caps.srqwqe_buf_pg_sz + HNS_HW_PAGE_SHIFT; 225 + buf_attr.page_shift = hr_dev->caps.srqwqe_buf_pg_sz + PAGE_SHIFT; 226 226 buf_attr.region[0].size = to_hr_hem_entries_size(srq->wqe_cnt, 227 227 srq->wqe_shift); 228 228 buf_attr.region[0].hopnum = hr_dev->caps.srqwqe_hop_num; 229 229 buf_attr.region_count = 1; 230 230 231 231 ret = hns_roce_mtr_create(hr_dev, &srq->buf_mtr, &buf_attr, 232 - hr_dev->caps.srqwqe_ba_pg_sz + 233 - HNS_HW_PAGE_SHIFT, udata, addr); 232 + hr_dev->caps.srqwqe_ba_pg_sz + PAGE_SHIFT, 233 + udata, addr); 234 234 if (ret) 235 235 ibdev_err(ibdev, 236 236 "failed to alloc SRQ buf mtr, ret = %d.\n", ret);