Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

scsi: target: tcmu: scatter_/gather_data_area() rework

scatter_data_area() and gather_data_area() are not easy to understand since
data is copied in nested loops over sg_list and tcmu dbi list. Since sg
list can contain only partly filled pages, the loop has to be prepared to
handle sg pages not matching dbi pages one by one.

Existing implementation uses kmap_atomic()/kunmap_atomic() due to
performance reasons. But instead of using these calls strictly nested for
sg and dpi pages, the code holds the mappings in an overlapping way, which
indeed is a bug that would trigger on archs using highmem.

The scatterlist lib contains the sg_miter_start/_next/_stop functions which
can be used to simplify such complicated loops.

The new code now processes the dbi list in the outer loop, while sg list is
handled by the inner one. That way the code can take advantage of the
sg_miter_* family calls.

Calling sg_miter_stop() after the end of the inner loop enforces strict
nesting of atomic kmaps.

Since the nested loops in scatter_/gather_data_area were very similar, I
replaced them by the new helper function tcmu_copy_data().

Link: https://lore.kernel.org/r/20201019115118.11949-1-bostroesser@gmail.com
Acked-by: Mike Christie <michael.christie@oracle.com>
Signed-off-by: Bodo Stroesser <bostroesser@gmail.com>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>

authored by

Bodo Stroesser and committed by
Martin K. Petersen
c8ed1ff8 b9dd44fd

+78 -92
+78 -92
drivers/target/target_core_user.c
··· 586 586 } 587 587 588 588 static int new_block_to_iov(struct tcmu_dev *udev, struct tcmu_cmd *cmd, 589 - struct iovec **iov, int prev_dbi, int *remain) 589 + struct iovec **iov, int prev_dbi, int len) 590 590 { 591 591 /* Get the next dbi */ 592 592 int dbi = tcmu_cmd_get_dbi(cmd); 593 - /* Do not add more than DATA_BLOCK_SIZE to iov */ 594 - int len = min_t(int, DATA_BLOCK_SIZE, *remain); 595 593 596 - *remain -= len; 594 + /* Do not add more than DATA_BLOCK_SIZE to iov */ 595 + if (len > DATA_BLOCK_SIZE) 596 + len = DATA_BLOCK_SIZE; 597 + 597 598 /* 598 599 * The following code will gather and map the blocks to the same iovec 599 600 * when the blocks are all next to each other. ··· 619 618 int dbi = -2; 620 619 621 620 /* We prepare the IOVs for DMA_FROM_DEVICE transfer direction */ 622 - while (data_length > 0) 623 - dbi = new_block_to_iov(udev, cmd, iov, dbi, &data_length); 621 + for (; data_length > 0; data_length -= DATA_BLOCK_SIZE) 622 + dbi = new_block_to_iov(udev, cmd, iov, dbi, data_length); 624 623 } 625 624 626 625 static struct tcmu_cmd *tcmu_alloc_cmd(struct se_cmd *se_cmd) ··· 689 688 690 689 #define UPDATE_HEAD(head, used, size) smp_store_release(&head, ((head % size) + used) % size) 691 690 691 + #define TCMU_SG_TO_DATA_AREA 1 692 + #define TCMU_DATA_AREA_TO_SG 2 693 + 694 + static inline void tcmu_copy_data(struct tcmu_dev *udev, 695 + struct tcmu_cmd *tcmu_cmd, uint32_t direction, 696 + struct scatterlist *sg, unsigned int sg_nents, 697 + struct iovec **iov, size_t data_len) 698 + { 699 + /* start value of dbi + 1 must not be a valid dbi */ 700 + int dbi = -2; 701 + size_t block_remaining, cp_len; 702 + struct sg_mapping_iter sg_iter; 703 + unsigned int sg_flags; 704 + struct page *page; 705 + void *data_page_start, *data_addr; 706 + 707 + if (direction == TCMU_SG_TO_DATA_AREA) 708 + sg_flags = SG_MITER_ATOMIC | SG_MITER_FROM_SG; 709 + else 710 + sg_flags = SG_MITER_ATOMIC | SG_MITER_TO_SG; 711 + sg_miter_start(&sg_iter, sg, sg_nents, sg_flags); 712 + 713 + while (data_len) { 714 + if (direction == TCMU_SG_TO_DATA_AREA) 715 + dbi = new_block_to_iov(udev, tcmu_cmd, iov, dbi, 716 + data_len); 717 + else 718 + dbi = tcmu_cmd_get_dbi(tcmu_cmd); 719 + page = tcmu_get_block_page(udev, dbi); 720 + if (direction == TCMU_DATA_AREA_TO_SG) 721 + flush_dcache_page(page); 722 + data_page_start = kmap_atomic(page); 723 + block_remaining = DATA_BLOCK_SIZE; 724 + 725 + while (block_remaining && data_len) { 726 + if (!sg_miter_next(&sg_iter)) { 727 + /* set length to 0 to abort outer loop */ 728 + data_len = 0; 729 + pr_debug("tcmu_move_data: aborting data copy due to exhausted sg_list\n"); 730 + break; 731 + } 732 + cp_len = min3(sg_iter.length, block_remaining, data_len); 733 + 734 + data_addr = data_page_start + 735 + DATA_BLOCK_SIZE - block_remaining; 736 + if (direction == TCMU_SG_TO_DATA_AREA) 737 + memcpy(data_addr, sg_iter.addr, cp_len); 738 + else 739 + memcpy(sg_iter.addr, data_addr, cp_len); 740 + 741 + data_len -= cp_len; 742 + block_remaining -= cp_len; 743 + sg_iter.consumed = cp_len; 744 + } 745 + sg_miter_stop(&sg_iter); 746 + 747 + kunmap_atomic(data_page_start); 748 + if (direction == TCMU_SG_TO_DATA_AREA) 749 + flush_dcache_page(page); 750 + } 751 + } 752 + 692 753 static void scatter_data_area(struct tcmu_dev *udev, struct tcmu_cmd *tcmu_cmd, 693 754 struct iovec **iov) 694 755 { 695 756 struct se_cmd *se_cmd = tcmu_cmd->se_cmd; 696 - /* start value of dbi + 1 must not be a valid dbi */ 697 - int i, dbi = -2; 698 - int block_remaining = 0; 699 - int data_len = se_cmd->data_length; 700 - void *from, *to = NULL; 701 - size_t copy_bytes, offset; 702 - struct scatterlist *sg; 703 - struct page *page = NULL; 704 757 705 - for_each_sg(se_cmd->t_data_sg, sg, se_cmd->t_data_nents, i) { 706 - int sg_remaining = sg->length; 707 - from = kmap_atomic(sg_page(sg)) + sg->offset; 708 - while (sg_remaining > 0) { 709 - if (block_remaining == 0) { 710 - if (to) { 711 - flush_dcache_page(page); 712 - kunmap_atomic(to); 713 - } 714 - 715 - /* get next dbi and add to IOVs */ 716 - dbi = new_block_to_iov(udev, tcmu_cmd, iov, dbi, 717 - &data_len); 718 - page = tcmu_get_block_page(udev, dbi); 719 - to = kmap_atomic(page); 720 - block_remaining = DATA_BLOCK_SIZE; 721 - } 722 - 723 - copy_bytes = min_t(size_t, sg_remaining, 724 - block_remaining); 725 - offset = DATA_BLOCK_SIZE - block_remaining; 726 - memcpy(to + offset, from + sg->length - sg_remaining, 727 - copy_bytes); 728 - 729 - sg_remaining -= copy_bytes; 730 - block_remaining -= copy_bytes; 731 - } 732 - kunmap_atomic(from - sg->offset); 733 - } 734 - 735 - if (to) { 736 - flush_dcache_page(page); 737 - kunmap_atomic(to); 738 - } 758 + tcmu_copy_data(udev, tcmu_cmd, TCMU_SG_TO_DATA_AREA, se_cmd->t_data_sg, 759 + se_cmd->t_data_nents, iov, se_cmd->data_length); 739 760 } 740 761 741 - static void gather_data_area(struct tcmu_dev *udev, struct tcmu_cmd *cmd, 762 + static void gather_data_area(struct tcmu_dev *udev, struct tcmu_cmd *tcmu_cmd, 742 763 bool bidi, uint32_t read_len) 743 764 { 744 - struct se_cmd *se_cmd = cmd->se_cmd; 745 - int i, dbi; 746 - int block_remaining = 0; 747 - void *from = NULL, *to; 748 - size_t copy_bytes, offset; 749 - struct scatterlist *sg, *data_sg; 750 - struct page *page; 765 + struct se_cmd *se_cmd = tcmu_cmd->se_cmd; 766 + struct scatterlist *data_sg; 751 767 unsigned int data_nents; 752 - uint32_t count = 0; 753 768 754 769 if (!bidi) { 755 770 data_sg = se_cmd->t_data_sg; ··· 776 759 * buffer blocks, and before gathering the Data-In buffer 777 760 * the Data-Out buffer blocks should be skipped. 778 761 */ 779 - count = cmd->dbi_cnt - cmd->dbi_bidi_cnt; 762 + tcmu_cmd_set_dbi_cur(tcmu_cmd, 763 + tcmu_cmd->dbi_cnt - tcmu_cmd->dbi_bidi_cnt); 780 764 781 765 data_sg = se_cmd->t_bidi_data_sg; 782 766 data_nents = se_cmd->t_bidi_data_nents; 783 767 } 784 768 785 - tcmu_cmd_set_dbi_cur(cmd, count); 786 - 787 - for_each_sg(data_sg, sg, data_nents, i) { 788 - int sg_remaining = sg->length; 789 - to = kmap_atomic(sg_page(sg)) + sg->offset; 790 - while (sg_remaining > 0 && read_len > 0) { 791 - if (block_remaining == 0) { 792 - if (from) 793 - kunmap_atomic(from); 794 - 795 - block_remaining = DATA_BLOCK_SIZE; 796 - dbi = tcmu_cmd_get_dbi(cmd); 797 - page = tcmu_get_block_page(udev, dbi); 798 - from = kmap_atomic(page); 799 - flush_dcache_page(page); 800 - } 801 - copy_bytes = min_t(size_t, sg_remaining, 802 - block_remaining); 803 - if (read_len < copy_bytes) 804 - copy_bytes = read_len; 805 - offset = DATA_BLOCK_SIZE - block_remaining; 806 - memcpy(to + sg->length - sg_remaining, from + offset, 807 - copy_bytes); 808 - 809 - sg_remaining -= copy_bytes; 810 - block_remaining -= copy_bytes; 811 - read_len -= copy_bytes; 812 - } 813 - kunmap_atomic(to - sg->offset); 814 - if (read_len == 0) 815 - break; 816 - } 817 - if (from) 818 - kunmap_atomic(from); 769 + tcmu_copy_data(udev, tcmu_cmd, TCMU_DATA_AREA_TO_SG, data_sg, 770 + data_nents, NULL, read_len); 819 771 } 820 772 821 773 static inline size_t spc_bitmap_free(unsigned long *bitmap, uint32_t thresh)