Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

NFSv4/flexfiles: Read path updates for striped layouts

Updates read path to calculate and use dss_id to direct IO to the
appropriate stripe DS.

Signed-off-by: Jonathan Curley <jcurley@purestorage.com>
Signed-off-by: Anna Schumaker <anna.schumaker@oracle.com>

authored by

Jonathan Curley and committed by
Anna Schumaker
4934ccbe a1491919

+98 -24
+98 -24
fs/nfs/flexfilelayout/flexfilelayout.c
··· 770 770 static struct nfs4_pnfs_ds * 771 771 ff_layout_choose_ds_for_read(struct pnfs_layout_segment *lseg, 772 772 u32 start_idx, u32 *best_idx, 773 + u32 offset, u32 *dss_id, 773 774 bool check_device) 774 775 { 775 776 struct nfs4_ff_layout_segment *fls = FF_LAYOUT_LSEG(lseg); ··· 781 780 /* mirrors are initially sorted by efficiency */ 782 781 for (idx = start_idx; idx < fls->mirror_array_cnt; idx++) { 783 782 mirror = FF_LAYOUT_COMP(lseg, idx); 784 - ds = nfs4_ff_layout_prepare_ds(lseg, mirror, 0, false); 783 + *dss_id = nfs4_ff_layout_calc_dss_id( 784 + fls->stripe_unit, 785 + fls->mirror_array[idx]->dss_count, 786 + offset); 787 + ds = nfs4_ff_layout_prepare_ds(lseg, mirror, *dss_id, false); 785 788 if (IS_ERR(ds)) 786 789 continue; 787 790 788 791 if (check_device && 789 - nfs4_test_deviceid_unavailable(&mirror->dss[0].mirror_ds->id_node)) { 792 + nfs4_test_deviceid_unavailable(&mirror->dss[*dss_id].mirror_ds->id_node)) { 790 793 // reinitialize the error state in case if this is the last iteration 791 794 ds = ERR_PTR(-EINVAL); 792 795 continue; ··· 805 800 806 801 static struct nfs4_pnfs_ds * 807 802 ff_layout_choose_any_ds_for_read(struct pnfs_layout_segment *lseg, 808 - u32 start_idx, u32 *best_idx) 803 + u32 start_idx, u32 *best_idx, 804 + u32 offset, u32 *dss_id) 809 805 { 810 - return ff_layout_choose_ds_for_read(lseg, start_idx, best_idx, false); 806 + return ff_layout_choose_ds_for_read(lseg, start_idx, best_idx, 807 + offset, dss_id, false); 811 808 } 812 809 813 810 static struct nfs4_pnfs_ds * 814 811 ff_layout_choose_valid_ds_for_read(struct pnfs_layout_segment *lseg, 815 - u32 start_idx, u32 *best_idx) 812 + u32 start_idx, u32 *best_idx, 813 + u32 offset, u32 *dss_id) 816 814 { 817 - return ff_layout_choose_ds_for_read(lseg, start_idx, best_idx, true); 815 + return ff_layout_choose_ds_for_read(lseg, start_idx, best_idx, 816 + offset, dss_id, true); 818 817 } 819 818 820 819 static struct nfs4_pnfs_ds * 821 820 ff_layout_choose_best_ds_for_read(struct pnfs_layout_segment *lseg, 822 - u32 start_idx, u32 *best_idx) 821 + u32 start_idx, u32 *best_idx, 822 + u32 offset, u32 *dss_id) 823 823 { 824 824 struct nfs4_pnfs_ds *ds; 825 825 826 - ds = ff_layout_choose_valid_ds_for_read(lseg, start_idx, best_idx); 826 + ds = ff_layout_choose_valid_ds_for_read(lseg, start_idx, best_idx, 827 + offset, dss_id); 827 828 if (!IS_ERR(ds)) 828 829 return ds; 829 - return ff_layout_choose_any_ds_for_read(lseg, start_idx, best_idx); 830 + return ff_layout_choose_any_ds_for_read(lseg, start_idx, best_idx, 831 + offset, dss_id); 830 832 } 831 833 832 834 static struct nfs4_pnfs_ds * 833 835 ff_layout_get_ds_for_read(struct nfs_pageio_descriptor *pgio, 834 - u32 *best_idx) 836 + u32 *best_idx, 837 + u32 offset, 838 + u32 *dss_id) 835 839 { 836 840 struct pnfs_layout_segment *lseg = pgio->pg_lseg; 837 841 struct nfs4_pnfs_ds *ds; 838 842 839 843 ds = ff_layout_choose_best_ds_for_read(lseg, pgio->pg_mirror_idx, 840 - best_idx); 844 + best_idx, offset, dss_id); 841 845 if (!IS_ERR(ds) || !pgio->pg_mirror_idx) 842 846 return ds; 843 - return ff_layout_choose_best_ds_for_read(lseg, 0, best_idx); 847 + return ff_layout_choose_best_ds_for_read(lseg, 0, best_idx, 848 + offset, dss_id); 844 849 } 845 850 846 851 static void ··· 869 854 } 870 855 } 871 856 857 + static bool 858 + ff_layout_lseg_is_striped(const struct nfs4_ff_layout_segment *fls) 859 + { 860 + return fls->mirror_array[0]->dss_count > 1; 861 + } 862 + 863 + /* 864 + * ff_layout_pg_test(). Called by nfs_can_coalesce_requests() 865 + * 866 + * Return 0 if @req cannot be coalesced into @pgio, otherwise return the number 867 + * of bytes (maximum @req->wb_bytes) that can be coalesced. 868 + */ 869 + static size_t 870 + ff_layout_pg_test(struct nfs_pageio_descriptor *pgio, struct nfs_page *prev, 871 + struct nfs_page *req) 872 + { 873 + unsigned int size; 874 + u64 p_stripe, r_stripe; 875 + u32 stripe_offset; 876 + u64 segment_offset = pgio->pg_lseg->pls_range.offset; 877 + u32 stripe_unit = FF_LAYOUT_LSEG(pgio->pg_lseg)->stripe_unit; 878 + 879 + /* calls nfs_generic_pg_test */ 880 + size = pnfs_generic_pg_test(pgio, prev, req); 881 + if (!size) 882 + return 0; 883 + else if (!ff_layout_lseg_is_striped(FF_LAYOUT_LSEG(pgio->pg_lseg))) 884 + return size; 885 + 886 + /* see if req and prev are in the same stripe */ 887 + if (prev) { 888 + p_stripe = (u64)req_offset(prev) - segment_offset; 889 + r_stripe = (u64)req_offset(req) - segment_offset; 890 + do_div(p_stripe, stripe_unit); 891 + do_div(r_stripe, stripe_unit); 892 + 893 + if (p_stripe != r_stripe) 894 + return 0; 895 + } 896 + 897 + /* calculate remaining bytes in the current stripe */ 898 + div_u64_rem((u64)req_offset(req) - segment_offset, 899 + stripe_unit, 900 + &stripe_offset); 901 + WARN_ON_ONCE(stripe_offset > stripe_unit); 902 + if (stripe_offset >= stripe_unit) 903 + return 0; 904 + return min(stripe_unit - (unsigned int)stripe_offset, size); 905 + } 906 + 872 907 static void 873 908 ff_layout_pg_init_read(struct nfs_pageio_descriptor *pgio, 874 909 struct nfs_page *req) ··· 926 861 struct nfs_pgio_mirror *pgm; 927 862 struct nfs4_ff_layout_mirror *mirror; 928 863 struct nfs4_pnfs_ds *ds; 929 - u32 ds_idx; 864 + u32 ds_idx, dss_id; 930 865 931 866 if (NFS_SERVER(pgio->pg_inode)->flags & 932 867 (NFS_MOUNT_SOFT|NFS_MOUNT_SOFTERR)) ··· 947 882 /* Reset wb_nio, since getting layout segment was successful */ 948 883 req->wb_nio = 0; 949 884 950 - ds = ff_layout_get_ds_for_read(pgio, &ds_idx); 885 + ds = ff_layout_get_ds_for_read(pgio, &ds_idx, 886 + req_offset(req), &dss_id); 951 887 if (IS_ERR(ds)) { 952 888 if (!ff_layout_no_fallback_to_mds(pgio->pg_lseg)) 953 889 goto out_mds; ··· 960 894 961 895 mirror = FF_LAYOUT_COMP(pgio->pg_lseg, ds_idx); 962 896 pgm = &pgio->pg_mirrors[0]; 963 - pgm->pg_bsize = mirror->dss[0].mirror_ds->ds_versions[0].rsize; 897 + pgm->pg_bsize = mirror->dss[dss_id].mirror_ds->ds_versions[0].rsize; 964 898 965 899 pgio->pg_mirror_idx = ds_idx; 966 900 return; ··· 1098 1032 1099 1033 static const struct nfs_pageio_ops ff_layout_pg_read_ops = { 1100 1034 .pg_init = ff_layout_pg_init_read, 1101 - .pg_test = pnfs_generic_pg_test, 1035 + .pg_test = ff_layout_pg_test, 1102 1036 .pg_doio = pnfs_generic_pg_readpages, 1103 1037 .pg_cleanup = pnfs_generic_pg_cleanup, 1104 1038 }; ··· 1153 1087 { 1154 1088 u32 idx = hdr->pgio_mirror_idx + 1; 1155 1089 u32 new_idx = 0; 1090 + u32 dss_id = 0; 1156 1091 struct nfs4_pnfs_ds *ds; 1157 1092 1158 - ds = ff_layout_choose_any_ds_for_read(hdr->lseg, idx, &new_idx); 1093 + ds = ff_layout_choose_any_ds_for_read(hdr->lseg, idx, &new_idx, 1094 + hdr->args.offset, &dss_id); 1159 1095 if (IS_ERR(ds)) 1160 1096 pnfs_error_mark_layout_for_return(hdr->inode, hdr->lseg); 1161 1097 else ··· 1952 1884 u32 idx = hdr->pgio_mirror_idx; 1953 1885 int vers; 1954 1886 struct nfs_fh *fh; 1887 + u32 dss_id; 1955 1888 bool ds_fatal_error = false; 1956 1889 1957 1890 dprintk("--> %s ino %lu pgbase %u req %zu@%llu\n", ··· 1960 1891 hdr->args.pgbase, (size_t)hdr->args.count, offset); 1961 1892 1962 1893 mirror = FF_LAYOUT_COMP(lseg, idx); 1963 - ds = nfs4_ff_layout_prepare_ds(lseg, mirror, 0, false); 1894 + dss_id = nfs4_ff_layout_calc_dss_id( 1895 + FF_LAYOUT_LSEG(lseg)->stripe_unit, 1896 + mirror->dss_count, 1897 + offset); 1898 + ds = nfs4_ff_layout_prepare_ds(lseg, mirror, dss_id, false); 1964 1899 if (IS_ERR(ds)) { 1965 1900 ds_fatal_error = nfs_error_is_fatal(PTR_ERR(ds)); 1966 1901 goto out_failed; 1967 1902 } 1968 1903 1969 1904 ds_clnt = nfs4_ff_find_or_create_ds_client(mirror, ds->ds_clp, 1970 - hdr->inode, 0); 1905 + hdr->inode, dss_id); 1971 1906 if (IS_ERR(ds_clnt)) 1972 1907 goto out_failed; 1973 1908 1974 - ds_cred = ff_layout_get_ds_cred(mirror, &lseg->pls_range, hdr->cred, 0); 1909 + ds_cred = ff_layout_get_ds_cred(mirror, &lseg->pls_range, hdr->cred, dss_id); 1975 1910 if (!ds_cred) 1976 1911 goto out_failed; 1977 1912 1978 - vers = nfs4_ff_layout_ds_version(mirror, 0); 1913 + vers = nfs4_ff_layout_ds_version(mirror, dss_id); 1979 1914 1980 1915 dprintk("%s USE DS: %s cl_count %d vers %d\n", __func__, 1981 1916 ds->ds_remotestr, refcount_read(&ds->ds_clp->cl_count), vers); ··· 1987 1914 hdr->pgio_done_cb = ff_layout_read_done_cb; 1988 1915 refcount_inc(&ds->ds_clp->cl_count); 1989 1916 hdr->ds_clp = ds->ds_clp; 1990 - fh = nfs4_ff_layout_select_ds_fh(mirror, 0); 1917 + fh = nfs4_ff_layout_select_ds_fh(mirror, dss_id); 1991 1918 if (fh) 1992 1919 hdr->args.fh = fh; 1993 1920 1994 - nfs4_ff_layout_select_ds_stateid(mirror, 0, &hdr->args.stateid); 1921 + nfs4_ff_layout_select_ds_stateid(mirror, dss_id, &hdr->args.stateid); 1995 1922 1996 1923 /* 1997 1924 * Note that if we ever decide to split across DSes, ··· 2001 1928 hdr->mds_offset = offset; 2002 1929 2003 1930 /* Start IO accounting for local read */ 2004 - localio = ff_local_open_fh(lseg, idx, 0, ds->ds_clp, ds_cred, fh, FMODE_READ); 1931 + localio = ff_local_open_fh(lseg, idx, dss_id, ds->ds_clp, ds_cred, fh, 1932 + FMODE_READ); 2005 1933 if (localio) { 2006 1934 hdr->task.tk_start = ktime_get(); 2007 1935 ff_layout_read_record_layoutstats_start(&hdr->task, hdr);