Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

Merge branch 'for-linus' of git://git.open-osd.org/linux-open-osd

* 'for-linus' of git://git.open-osd.org/linux-open-osd:
ore: Make ore its own module
exofs: Rename raid engine from exofs/ios.c => ore
exofs: ios: Move to a per inode components & device-table
exofs: Move exofs specific osd operations out of ios.c
exofs: Add offset/length to exofs_get_io_state
exofs: Fix truncate for the raid-groups case
exofs: Small cleanup of exofs_fill_super
exofs: BUG: Avoid sbi realloc
exofs: Remove pnfs-osd private definitions
nfs_xdr: Move nfs4_string definition out of #ifdef CONFIG_NFS_V4

+618 -505
+4 -1
fs/exofs/Kbuild
··· 12 12 # Kbuild - Gets included from the Kernels Makefile and build system 13 13 # 14 14 15 - exofs-y := ios.o inode.o file.o symlink.o namei.o dir.o super.o 15 + # ore module library 16 + obj-$(CONFIG_ORE) += ore.o 17 + 18 + exofs-y := inode.o file.o symlink.o namei.o dir.o super.o 16 19 obj-$(CONFIG_EXOFS_FS) += exofs.o
+4
fs/exofs/Kconfig
··· 1 + config ORE 2 + tristate 3 + 1 4 config EXOFS_FS 2 5 tristate "exofs: OSD based file system support" 3 6 depends on SCSI_OSD_ULD 7 + select ORE 4 8 help 5 9 EXOFS is a file system that uses an OSD storage device, 6 10 as its backing storage.
+40 -119
fs/exofs/exofs.h
··· 36 36 #include <linux/fs.h> 37 37 #include <linux/time.h> 38 38 #include <linux/backing-dev.h> 39 - #include "common.h" 39 + #include <scsi/osd_ore.h> 40 40 41 - /* FIXME: Remove once pnfs hits mainline 42 - * #include <linux/exportfs/pnfs_osd_xdr.h> 43 - */ 44 - #include "pnfs.h" 41 + #include "common.h" 45 42 46 43 #define EXOFS_ERR(fmt, a...) printk(KERN_ERR "exofs: " fmt, ##a) 47 44 ··· 53 56 /* u64 has problems with printk this will cast it to unsigned long long */ 54 57 #define _LLU(x) (unsigned long long)(x) 55 58 56 - struct exofs_layout { 57 - osd_id s_pid; /* partition ID of file system*/ 58 - 59 - /* Our way of looking at the data_map */ 60 - unsigned stripe_unit; 61 - unsigned mirrors_p1; 62 - 63 - unsigned group_width; 64 - u64 group_depth; 65 - unsigned group_count; 66 - 67 - enum exofs_inode_layout_gen_functions lay_func; 68 - 69 - unsigned s_numdevs; /* Num of devices in array */ 70 - struct osd_dev *s_ods[0]; /* Variable length */ 71 - }; 72 - 73 59 /* 74 60 * our extension to the in-memory superblock 75 61 */ 76 62 struct exofs_sb_info { 63 + struct backing_dev_info bdi; /* register our bdi with VFS */ 77 64 struct exofs_sb_stats s_ess; /* Written often, pre-allocate*/ 78 65 int s_timeout; /* timeout for OSD operations */ 79 66 uint64_t s_nextid; /* highest object ID used */ ··· 65 84 spinlock_t s_next_gen_lock; /* spinlock for gen # update */ 66 85 u32 s_next_generation; /* next gen # to use */ 67 86 atomic_t s_curr_pending; /* number of pending commands */ 68 - uint8_t s_cred[OSD_CAP_LEN]; /* credential for the fscb */ 69 - struct backing_dev_info bdi; /* register our bdi with VFS */ 70 87 71 88 struct pnfs_osd_data_map data_map; /* Default raid to use 72 89 * FIXME: Needed ? 73 90 */ 74 - /* struct exofs_layout dir_layout;*/ /* Default dir layout */ 75 - struct exofs_layout layout; /* Default files layout, 76 - * contains the variable osd_dev 77 - * array. Keep last */ 91 + struct ore_layout layout; /* Default files layout */ 92 + struct ore_comp one_comp; /* id & cred of partition id=0*/ 93 + struct ore_components comps; /* comps for the partition */ 78 94 struct osd_dev *_min_one_dev[1]; /* Place holder for one dev */ 79 95 }; 80 96 ··· 85 107 uint32_t i_data[EXOFS_IDATA];/*short symlink names and device #s*/ 86 108 uint32_t i_dir_start_lookup; /* which page to start lookup */ 87 109 uint64_t i_commit_size; /* the object's written length */ 88 - uint8_t i_cred[OSD_CAP_LEN];/* all-powerful credential */ 110 + struct ore_comp one_comp; /* same component for all devices */ 111 + struct ore_components comps; /* inode view of the device table */ 89 112 }; 90 113 91 114 static inline osd_id exofs_oi_objno(struct exofs_i_info *oi) 92 115 { 93 116 return oi->vfs_inode.i_ino + EXOFS_OBJ_OFF; 94 - } 95 - 96 - struct exofs_io_state; 97 - typedef void (*exofs_io_done_fn)(struct exofs_io_state *or, void *private); 98 - 99 - struct exofs_io_state { 100 - struct kref kref; 101 - 102 - void *private; 103 - exofs_io_done_fn done; 104 - 105 - struct exofs_layout *layout; 106 - struct osd_obj_id obj; 107 - u8 *cred; 108 - 109 - /* Global read/write IO*/ 110 - loff_t offset; 111 - unsigned long length; 112 - void *kern_buff; 113 - 114 - struct page **pages; 115 - unsigned nr_pages; 116 - unsigned pgbase; 117 - unsigned pages_consumed; 118 - 119 - /* Attributes */ 120 - unsigned in_attr_len; 121 - struct osd_attr *in_attr; 122 - unsigned out_attr_len; 123 - struct osd_attr *out_attr; 124 - 125 - /* Variable array of size numdevs */ 126 - unsigned numdevs; 127 - struct exofs_per_dev_state { 128 - struct osd_request *or; 129 - struct bio *bio; 130 - loff_t offset; 131 - unsigned length; 132 - unsigned dev; 133 - } per_dev[]; 134 - }; 135 - 136 - static inline unsigned exofs_io_state_size(unsigned numdevs) 137 - { 138 - return sizeof(struct exofs_io_state) + 139 - sizeof(struct exofs_per_dev_state) * numdevs; 140 117 } 141 118 142 119 /* ··· 138 205 } 139 206 140 207 /* 141 - * Given a layout, object_number and stripe_index return the associated global 142 - * dev_index 143 - */ 144 - unsigned exofs_layout_od_id(struct exofs_layout *layout, 145 - osd_id obj_no, unsigned layout_index); 146 - /* 147 208 * Maximum count of links to a file 148 209 */ 149 210 #define EXOFS_LINK_MAX 32000 ··· 146 219 * function declarations * 147 220 *************************/ 148 221 149 - /* ios.c */ 150 - void exofs_make_credential(u8 cred_a[OSD_CAP_LEN], 151 - const struct osd_obj_id *obj); 152 - int exofs_read_kern(struct osd_dev *od, u8 *cred, struct osd_obj_id *obj, 153 - u64 offset, void *p, unsigned length); 154 - 155 - int exofs_get_io_state(struct exofs_layout *layout, 156 - struct exofs_io_state **ios); 157 - void exofs_put_io_state(struct exofs_io_state *ios); 158 - 159 - int exofs_check_io(struct exofs_io_state *ios, u64 *resid); 160 - 161 - int exofs_sbi_create(struct exofs_io_state *ios); 162 - int exofs_sbi_remove(struct exofs_io_state *ios); 163 - int exofs_sbi_write(struct exofs_io_state *ios); 164 - int exofs_sbi_read(struct exofs_io_state *ios); 165 - 166 - int extract_attr_from_ios(struct exofs_io_state *ios, struct osd_attr *attr); 167 - 168 - int exofs_oi_truncate(struct exofs_i_info *oi, u64 new_len); 169 - static inline int exofs_oi_write(struct exofs_i_info *oi, 170 - struct exofs_io_state *ios) 171 - { 172 - ios->obj.id = exofs_oi_objno(oi); 173 - ios->cred = oi->i_cred; 174 - return exofs_sbi_write(ios); 175 - } 176 - 177 - static inline int exofs_oi_read(struct exofs_i_info *oi, 178 - struct exofs_io_state *ios) 179 - { 180 - ios->obj.id = exofs_oi_objno(oi); 181 - ios->cred = oi->i_cred; 182 - return exofs_sbi_read(ios); 183 - } 184 - 185 222 /* inode.c */ 186 - unsigned exofs_max_io_pages(struct exofs_layout *layout, 223 + unsigned exofs_max_io_pages(struct ore_layout *layout, 187 224 unsigned expected_pages); 188 225 int exofs_setattr(struct dentry *, struct iattr *); 189 226 int exofs_write_begin(struct file *file, struct address_space *mapping, ··· 172 281 struct inode *); 173 282 174 283 /* super.c */ 284 + void exofs_make_credential(u8 cred_a[OSD_CAP_LEN], 285 + const struct osd_obj_id *obj); 175 286 int exofs_sbi_write_stats(struct exofs_sb_info *sbi); 176 287 177 288 /********************* ··· 188 295 189 296 /* inode.c */ 190 297 extern const struct address_space_operations exofs_aops; 191 - extern const struct osd_attr g_attr_logical_length; 192 298 193 299 /* namei.c */ 194 300 extern const struct inode_operations exofs_dir_inode_operations; ··· 196 304 /* symlink.c */ 197 305 extern const struct inode_operations exofs_symlink_inode_operations; 198 306 extern const struct inode_operations exofs_fast_symlink_inode_operations; 307 + 308 + /* exofs_init_comps will initialize an ore_components device array 309 + * pointing to a single ore_comp struct, and a round-robin view 310 + * of the device table. 311 + * The first device of each inode is the [inode->ino % num_devices] 312 + * and the rest of the devices sequentially following where the 313 + * first device is after the last device. 314 + * It is assumed that the global device array at @sbi is twice 315 + * bigger and that the device table repeats twice. 316 + * See: exofs_read_lookup_dev_table() 317 + */ 318 + static inline void exofs_init_comps(struct ore_components *comps, 319 + struct ore_comp *one_comp, 320 + struct exofs_sb_info *sbi, osd_id oid) 321 + { 322 + unsigned dev_mod = (unsigned)oid, first_dev; 323 + 324 + one_comp->obj.partition = sbi->one_comp.obj.partition; 325 + one_comp->obj.id = oid; 326 + exofs_make_credential(one_comp->cred, &one_comp->obj); 327 + 328 + comps->numdevs = sbi->comps.numdevs; 329 + comps->single_comp = EC_SINGLE_COMP; 330 + comps->comps = one_comp; 331 + 332 + /* Round robin device view of the table */ 333 + first_dev = (dev_mod * sbi->layout.mirrors_p1) % sbi->comps.numdevs; 334 + comps->ods = sbi->comps.ods + first_dev; 335 + } 199 336 200 337 #endif
+76 -76
fs/exofs/inode.c
··· 43 43 PAGE_SIZE / sizeof(struct page *), 44 44 }; 45 45 46 - unsigned exofs_max_io_pages(struct exofs_layout *layout, 46 + unsigned exofs_max_io_pages(struct ore_layout *layout, 47 47 unsigned expected_pages) 48 48 { 49 49 unsigned pages = min_t(unsigned, expected_pages, MAX_PAGES_KMALLOC); ··· 58 58 struct exofs_sb_info *sbi; 59 59 struct inode *inode; 60 60 unsigned expected_pages; 61 - struct exofs_io_state *ios; 61 + struct ore_io_state *ios; 62 62 63 63 struct page **pages; 64 64 unsigned alloc_pages; ··· 110 110 { 111 111 unsigned pages; 112 112 113 - if (!pcol->ios) { /* First time allocate io_state */ 114 - int ret = exofs_get_io_state(&pcol->sbi->layout, &pcol->ios); 115 - 116 - if (ret) 117 - return ret; 118 - } 119 - 120 113 /* TODO: easily support bio chaining */ 121 114 pages = exofs_max_io_pages(&pcol->sbi->layout, pcol->expected_pages); 122 115 ··· 133 140 pcol->pages = NULL; 134 141 135 142 if (pcol->ios) { 136 - exofs_put_io_state(pcol->ios); 143 + ore_put_io_state(pcol->ios); 137 144 pcol->ios = NULL; 138 145 } 139 146 } ··· 193 200 u64 resid; 194 201 u64 good_bytes; 195 202 u64 length = 0; 196 - int ret = exofs_check_io(pcol->ios, &resid); 203 + int ret = ore_check_io(pcol->ios, &resid); 197 204 198 205 if (likely(!ret)) 199 206 good_bytes = pcol->length; ··· 234 241 } 235 242 236 243 /* callback of async reads */ 237 - static void readpages_done(struct exofs_io_state *ios, void *p) 244 + static void readpages_done(struct ore_io_state *ios, void *p) 238 245 { 239 246 struct page_collect *pcol = p; 240 247 ··· 262 269 static int read_exec(struct page_collect *pcol) 263 270 { 264 271 struct exofs_i_info *oi = exofs_i(pcol->inode); 265 - struct exofs_io_state *ios = pcol->ios; 272 + struct ore_io_state *ios; 266 273 struct page_collect *pcol_copy = NULL; 267 274 int ret; 268 275 269 276 if (!pcol->pages) 270 277 return 0; 271 278 279 + if (!pcol->ios) { 280 + int ret = ore_get_rw_state(&pcol->sbi->layout, &oi->comps, true, 281 + pcol->pg_first << PAGE_CACHE_SHIFT, 282 + pcol->length, &pcol->ios); 283 + 284 + if (ret) 285 + return ret; 286 + } 287 + 288 + ios = pcol->ios; 272 289 ios->pages = pcol->pages; 273 290 ios->nr_pages = pcol->nr_pages; 274 - ios->length = pcol->length; 275 - ios->offset = pcol->pg_first << PAGE_CACHE_SHIFT; 276 291 277 292 if (pcol->read_4_write) { 278 - exofs_oi_read(oi, pcol->ios); 293 + ore_read(pcol->ios); 279 294 return __readpages_done(pcol); 280 295 } 281 296 ··· 296 295 *pcol_copy = *pcol; 297 296 ios->done = readpages_done; 298 297 ios->private = pcol_copy; 299 - ret = exofs_oi_read(oi, ios); 298 + ret = ore_read(ios); 300 299 if (unlikely(ret)) 301 300 goto err; 302 301 303 302 atomic_inc(&pcol->sbi->s_curr_pending); 304 303 305 304 EXOFS_DBGMSG2("read_exec obj=0x%llx start=0x%llx length=0x%lx\n", 306 - ios->obj.id, _LLU(ios->offset), pcol->length); 305 + oi->one_comp.obj.id, _LLU(ios->offset), pcol->length); 307 306 308 307 /* pages ownership was passed to pcol_copy */ 309 308 _pcol_reset(pcol); ··· 458 457 } 459 458 460 459 /* Callback for osd_write. All writes are asynchronous */ 461 - static void writepages_done(struct exofs_io_state *ios, void *p) 460 + static void writepages_done(struct ore_io_state *ios, void *p) 462 461 { 463 462 struct page_collect *pcol = p; 464 463 int i; 465 464 u64 resid; 466 465 u64 good_bytes; 467 466 u64 length = 0; 468 - int ret = exofs_check_io(ios, &resid); 467 + int ret = ore_check_io(ios, &resid); 469 468 470 469 atomic_dec(&pcol->sbi->s_curr_pending); 471 470 ··· 508 507 static int write_exec(struct page_collect *pcol) 509 508 { 510 509 struct exofs_i_info *oi = exofs_i(pcol->inode); 511 - struct exofs_io_state *ios = pcol->ios; 510 + struct ore_io_state *ios; 512 511 struct page_collect *pcol_copy = NULL; 513 512 int ret; 514 513 515 514 if (!pcol->pages) 516 515 return 0; 516 + 517 + BUG_ON(pcol->ios); 518 + ret = ore_get_rw_state(&pcol->sbi->layout, &oi->comps, false, 519 + pcol->pg_first << PAGE_CACHE_SHIFT, 520 + pcol->length, &pcol->ios); 521 + 522 + if (unlikely(ret)) 523 + goto err; 517 524 518 525 pcol_copy = kmalloc(sizeof(*pcol_copy), GFP_KERNEL); 519 526 if (!pcol_copy) { ··· 532 523 533 524 *pcol_copy = *pcol; 534 525 526 + ios = pcol->ios; 535 527 ios->pages = pcol_copy->pages; 536 528 ios->nr_pages = pcol_copy->nr_pages; 537 - ios->offset = pcol_copy->pg_first << PAGE_CACHE_SHIFT; 538 - ios->length = pcol_copy->length; 539 529 ios->done = writepages_done; 540 530 ios->private = pcol_copy; 541 531 542 - ret = exofs_oi_write(oi, ios); 532 + ret = ore_write(ios); 543 533 if (unlikely(ret)) { 544 - EXOFS_ERR("write_exec: exofs_oi_write() Failed\n"); 534 + EXOFS_ERR("write_exec: ore_write() Failed\n"); 545 535 goto err; 546 536 } 547 537 ··· 852 844 return S_ISLNK(inode->i_mode) && (oi->i_data[0] != 0); 853 845 } 854 846 855 - const struct osd_attr g_attr_logical_length = ATTR_DEF( 856 - OSD_APAGE_OBJECT_INFORMATION, OSD_ATTR_OI_LOGICAL_LENGTH, 8); 857 - 858 847 static int _do_truncate(struct inode *inode, loff_t newsize) 859 848 { 860 849 struct exofs_i_info *oi = exofs_i(inode); 850 + struct exofs_sb_info *sbi = inode->i_sb->s_fs_info; 861 851 int ret; 862 852 863 853 inode->i_mtime = inode->i_ctime = CURRENT_TIME; 864 854 865 - ret = exofs_oi_truncate(oi, (u64)newsize); 855 + ret = ore_truncate(&sbi->layout, &oi->comps, (u64)newsize); 866 856 if (likely(!ret)) 867 857 truncate_setsize(inode, newsize); 868 858 ··· 923 917 [1] = g_attr_inode_file_layout, 924 918 [2] = g_attr_inode_dir_layout, 925 919 }; 926 - struct exofs_io_state *ios; 920 + struct ore_io_state *ios; 927 921 struct exofs_on_disk_inode_layout *layout; 928 922 int ret; 929 923 930 - ret = exofs_get_io_state(&sbi->layout, &ios); 924 + ret = ore_get_io_state(&sbi->layout, &oi->comps, &ios); 931 925 if (unlikely(ret)) { 932 - EXOFS_ERR("%s: exofs_get_io_state failed.\n", __func__); 926 + EXOFS_ERR("%s: ore_get_io_state failed.\n", __func__); 933 927 return ret; 934 928 } 935 929 936 - ios->obj.id = exofs_oi_objno(oi); 937 - exofs_make_credential(oi->i_cred, &ios->obj); 938 - ios->cred = oi->i_cred; 939 - 940 - attrs[1].len = exofs_on_disk_inode_layout_size(sbi->layout.s_numdevs); 941 - attrs[2].len = exofs_on_disk_inode_layout_size(sbi->layout.s_numdevs); 930 + attrs[1].len = exofs_on_disk_inode_layout_size(sbi->comps.numdevs); 931 + attrs[2].len = exofs_on_disk_inode_layout_size(sbi->comps.numdevs); 942 932 943 933 ios->in_attr = attrs; 944 934 ios->in_attr_len = ARRAY_SIZE(attrs); 945 935 946 - ret = exofs_sbi_read(ios); 936 + ret = ore_read(ios); 947 937 if (unlikely(ret)) { 948 938 EXOFS_ERR("object(0x%llx) corrupted, return empty file=>%d\n", 949 - _LLU(ios->obj.id), ret); 939 + _LLU(oi->one_comp.obj.id), ret); 950 940 memset(inode, 0, sizeof(*inode)); 951 941 inode->i_mode = 0040000 | (0777 & ~022); 952 942 /* If object is lost on target we might as well enable it's ··· 992 990 } 993 991 994 992 out: 995 - exofs_put_io_state(ios); 993 + ore_put_io_state(ios); 996 994 return ret; 997 995 } 998 996 ··· 1018 1016 return inode; 1019 1017 oi = exofs_i(inode); 1020 1018 __oi_init(oi); 1019 + exofs_init_comps(&oi->comps, &oi->one_comp, sb->s_fs_info, 1020 + exofs_oi_objno(oi)); 1021 1021 1022 1022 /* read the inode from the osd */ 1023 1023 ret = exofs_get_inode(sb, oi, &fcb); ··· 1111 1107 * set the obj_created flag so that other methods know that the object exists on 1112 1108 * the OSD. 1113 1109 */ 1114 - static void create_done(struct exofs_io_state *ios, void *p) 1110 + static void create_done(struct ore_io_state *ios, void *p) 1115 1111 { 1116 1112 struct inode *inode = p; 1117 1113 struct exofs_i_info *oi = exofs_i(inode); 1118 1114 struct exofs_sb_info *sbi = inode->i_sb->s_fs_info; 1119 1115 int ret; 1120 1116 1121 - ret = exofs_check_io(ios, NULL); 1122 - exofs_put_io_state(ios); 1117 + ret = ore_check_io(ios, NULL); 1118 + ore_put_io_state(ios); 1123 1119 1124 1120 atomic_dec(&sbi->s_curr_pending); 1125 1121 1126 1122 if (unlikely(ret)) { 1127 1123 EXOFS_ERR("object=0x%llx creation failed in pid=0x%llx", 1128 - _LLU(exofs_oi_objno(oi)), _LLU(sbi->layout.s_pid)); 1124 + _LLU(exofs_oi_objno(oi)), 1125 + _LLU(oi->one_comp.obj.partition)); 1129 1126 /*TODO: When FS is corrupted creation can fail, object already 1130 1127 * exist. Get rid of this asynchronous creation, if exist 1131 1128 * increment the obj counter and try the next object. Until we ··· 1145 1140 */ 1146 1141 struct inode *exofs_new_inode(struct inode *dir, int mode) 1147 1142 { 1148 - struct super_block *sb; 1143 + struct super_block *sb = dir->i_sb; 1144 + struct exofs_sb_info *sbi = sb->s_fs_info; 1149 1145 struct inode *inode; 1150 1146 struct exofs_i_info *oi; 1151 - struct exofs_sb_info *sbi; 1152 - struct exofs_io_state *ios; 1147 + struct ore_io_state *ios; 1153 1148 int ret; 1154 1149 1155 - sb = dir->i_sb; 1156 1150 inode = new_inode(sb); 1157 1151 if (!inode) 1158 1152 return ERR_PTR(-ENOMEM); ··· 1160 1156 __oi_init(oi); 1161 1157 1162 1158 set_obj_2bcreated(oi); 1163 - 1164 - sbi = sb->s_fs_info; 1165 1159 1166 1160 inode->i_mapping->backing_dev_info = sb->s_bdi; 1167 1161 inode_init_owner(inode, dir, mode); ··· 1172 1170 spin_unlock(&sbi->s_next_gen_lock); 1173 1171 insert_inode_hash(inode); 1174 1172 1173 + exofs_init_comps(&oi->comps, &oi->one_comp, sb->s_fs_info, 1174 + exofs_oi_objno(oi)); 1175 1175 exofs_sbi_write_stats(sbi); /* Make sure new sbi->s_nextid is on disk */ 1176 1176 1177 1177 mark_inode_dirty(inode); 1178 1178 1179 - ret = exofs_get_io_state(&sbi->layout, &ios); 1179 + ret = ore_get_io_state(&sbi->layout, &oi->comps, &ios); 1180 1180 if (unlikely(ret)) { 1181 - EXOFS_ERR("exofs_new_inode: exofs_get_io_state failed\n"); 1181 + EXOFS_ERR("exofs_new_inode: ore_get_io_state failed\n"); 1182 1182 return ERR_PTR(ret); 1183 1183 } 1184 1184 1185 - ios->obj.id = exofs_oi_objno(oi); 1186 - exofs_make_credential(oi->i_cred, &ios->obj); 1187 - 1188 1185 ios->done = create_done; 1189 1186 ios->private = inode; 1190 - ios->cred = oi->i_cred; 1191 - ret = exofs_sbi_create(ios); 1187 + 1188 + ret = ore_create(ios); 1192 1189 if (ret) { 1193 - exofs_put_io_state(ios); 1190 + ore_put_io_state(ios); 1194 1191 return ERR_PTR(ret); 1195 1192 } 1196 1193 atomic_inc(&sbi->s_curr_pending); ··· 1208 1207 /* 1209 1208 * Callback function from exofs_update_inode(). 1210 1209 */ 1211 - static void updatei_done(struct exofs_io_state *ios, void *p) 1210 + static void updatei_done(struct ore_io_state *ios, void *p) 1212 1211 { 1213 1212 struct updatei_args *args = p; 1214 1213 1215 - exofs_put_io_state(ios); 1214 + ore_put_io_state(ios); 1216 1215 1217 1216 atomic_dec(&args->sbi->s_curr_pending); 1218 1217 ··· 1228 1227 struct exofs_i_info *oi = exofs_i(inode); 1229 1228 struct super_block *sb = inode->i_sb; 1230 1229 struct exofs_sb_info *sbi = sb->s_fs_info; 1231 - struct exofs_io_state *ios; 1230 + struct ore_io_state *ios; 1232 1231 struct osd_attr attr; 1233 1232 struct exofs_fcb *fcb; 1234 1233 struct updatei_args *args; ··· 1267 1266 } else 1268 1267 memcpy(fcb->i_data, oi->i_data, sizeof(fcb->i_data)); 1269 1268 1270 - ret = exofs_get_io_state(&sbi->layout, &ios); 1269 + ret = ore_get_io_state(&sbi->layout, &oi->comps, &ios); 1271 1270 if (unlikely(ret)) { 1272 - EXOFS_ERR("%s: exofs_get_io_state failed.\n", __func__); 1271 + EXOFS_ERR("%s: ore_get_io_state failed.\n", __func__); 1273 1272 goto free_args; 1274 1273 } 1275 1274 ··· 1286 1285 ios->private = args; 1287 1286 } 1288 1287 1289 - ret = exofs_oi_write(oi, ios); 1288 + ret = ore_write(ios); 1290 1289 if (!do_sync && !ret) { 1291 1290 atomic_inc(&sbi->s_curr_pending); 1292 1291 goto out; /* deallocation in updatei_done */ 1293 1292 } 1294 1293 1295 - exofs_put_io_state(ios); 1294 + ore_put_io_state(ios); 1296 1295 free_args: 1297 1296 kfree(args); 1298 1297 out: ··· 1311 1310 * Callback function from exofs_delete_inode() - don't have much cleaning up to 1312 1311 * do. 1313 1312 */ 1314 - static void delete_done(struct exofs_io_state *ios, void *p) 1313 + static void delete_done(struct ore_io_state *ios, void *p) 1315 1314 { 1316 1315 struct exofs_sb_info *sbi = p; 1317 1316 1318 - exofs_put_io_state(ios); 1317 + ore_put_io_state(ios); 1319 1318 1320 1319 atomic_dec(&sbi->s_curr_pending); 1321 1320 } ··· 1330 1329 struct exofs_i_info *oi = exofs_i(inode); 1331 1330 struct super_block *sb = inode->i_sb; 1332 1331 struct exofs_sb_info *sbi = sb->s_fs_info; 1333 - struct exofs_io_state *ios; 1332 + struct ore_io_state *ios; 1334 1333 int ret; 1335 1334 1336 1335 truncate_inode_pages(&inode->i_data, 0); ··· 1350 1349 /* ignore the error, attempt a remove anyway */ 1351 1350 1352 1351 /* Now Remove the OSD objects */ 1353 - ret = exofs_get_io_state(&sbi->layout, &ios); 1352 + ret = ore_get_io_state(&sbi->layout, &oi->comps, &ios); 1354 1353 if (unlikely(ret)) { 1355 - EXOFS_ERR("%s: exofs_get_io_state failed\n", __func__); 1354 + EXOFS_ERR("%s: ore_get_io_state failed\n", __func__); 1356 1355 return; 1357 1356 } 1358 1357 1359 - ios->obj.id = exofs_oi_objno(oi); 1360 1358 ios->done = delete_done; 1361 1359 ios->private = sbi; 1362 - ios->cred = oi->i_cred; 1363 - ret = exofs_sbi_remove(ios); 1360 + 1361 + ret = ore_remove(ios); 1364 1362 if (ret) { 1365 - EXOFS_ERR("%s: exofs_sbi_remove failed\n", __func__); 1366 - exofs_put_io_state(ios); 1363 + EXOFS_ERR("%s: ore_remove failed\n", __func__); 1364 + ore_put_io_state(ios); 1367 1365 return; 1368 1366 } 1369 1367 atomic_inc(&sbi->s_curr_pending);
+202 -170
fs/exofs/ios.c fs/exofs/ore.c
··· 23 23 */ 24 24 25 25 #include <linux/slab.h> 26 - #include <scsi/scsi_device.h> 27 26 #include <asm/div64.h> 28 27 29 - #include "exofs.h" 28 + #include <scsi/osd_ore.h> 30 29 31 - #define EXOFS_DBGMSG2(M...) do {} while (0) 32 - /* #define EXOFS_DBGMSG2 EXOFS_DBGMSG */ 30 + #define ORE_ERR(fmt, a...) printk(KERN_ERR "ore: " fmt, ##a) 33 31 34 - void exofs_make_credential(u8 cred_a[OSD_CAP_LEN], const struct osd_obj_id *obj) 32 + #ifdef CONFIG_EXOFS_DEBUG 33 + #define ORE_DBGMSG(fmt, a...) \ 34 + printk(KERN_NOTICE "ore @%s:%d: " fmt, __func__, __LINE__, ##a) 35 + #else 36 + #define ORE_DBGMSG(fmt, a...) \ 37 + do { if (0) printk(fmt, ##a); } while (0) 38 + #endif 39 + 40 + /* u64 has problems with printk this will cast it to unsigned long long */ 41 + #define _LLU(x) (unsigned long long)(x) 42 + 43 + #define ORE_DBGMSG2(M...) do {} while (0) 44 + /* #define ORE_DBGMSG2 ORE_DBGMSG */ 45 + 46 + MODULE_AUTHOR("Boaz Harrosh <bharrosh@panasas.com>"); 47 + MODULE_DESCRIPTION("Objects Raid Engine ore.ko"); 48 + MODULE_LICENSE("GPL"); 49 + 50 + static u8 *_ios_cred(struct ore_io_state *ios, unsigned index) 35 51 { 36 - osd_sec_init_nosec_doall_caps(cred_a, obj, false, true); 52 + return ios->comps->comps[index & ios->comps->single_comp].cred; 37 53 } 38 54 39 - int exofs_read_kern(struct osd_dev *od, u8 *cred, struct osd_obj_id *obj, 40 - u64 offset, void *p, unsigned length) 55 + static struct osd_obj_id *_ios_obj(struct ore_io_state *ios, unsigned index) 41 56 { 42 - struct osd_request *or = osd_start_request(od, GFP_KERNEL); 43 - /* struct osd_sense_info osi = {.key = 0};*/ 44 - int ret; 45 - 46 - if (unlikely(!or)) { 47 - EXOFS_DBGMSG("%s: osd_start_request failed.\n", __func__); 48 - return -ENOMEM; 49 - } 50 - ret = osd_req_read_kern(or, obj, offset, p, length); 51 - if (unlikely(ret)) { 52 - EXOFS_DBGMSG("%s: osd_req_read_kern failed.\n", __func__); 53 - goto out; 54 - } 55 - 56 - ret = osd_finalize_request(or, 0, cred, NULL); 57 - if (unlikely(ret)) { 58 - EXOFS_DBGMSG("Failed to osd_finalize_request() => %d\n", ret); 59 - goto out; 60 - } 61 - 62 - ret = osd_execute_request(or); 63 - if (unlikely(ret)) 64 - EXOFS_DBGMSG("osd_execute_request() => %d\n", ret); 65 - /* osd_req_decode_sense(or, ret); */ 66 - 67 - out: 68 - osd_end_request(or); 69 - return ret; 57 + return &ios->comps->comps[index & ios->comps->single_comp].obj; 70 58 } 71 59 72 - int exofs_get_io_state(struct exofs_layout *layout, 73 - struct exofs_io_state **pios) 60 + static struct osd_dev *_ios_od(struct ore_io_state *ios, unsigned index) 74 61 { 75 - struct exofs_io_state *ios; 62 + return ios->comps->ods[index]; 63 + } 64 + 65 + int ore_get_rw_state(struct ore_layout *layout, struct ore_components *comps, 66 + bool is_reading, u64 offset, u64 length, 67 + struct ore_io_state **pios) 68 + { 69 + struct ore_io_state *ios; 76 70 77 71 /*TODO: Maybe use kmem_cach per sbi of size 78 72 * exofs_io_state_size(layout->s_numdevs) 79 73 */ 80 - ios = kzalloc(exofs_io_state_size(layout->s_numdevs), GFP_KERNEL); 74 + ios = kzalloc(ore_io_state_size(comps->numdevs), GFP_KERNEL); 81 75 if (unlikely(!ios)) { 82 - EXOFS_DBGMSG("Failed kzalloc bytes=%d\n", 83 - exofs_io_state_size(layout->s_numdevs)); 76 + ORE_DBGMSG("Failed kzalloc bytes=%d\n", 77 + ore_io_state_size(comps->numdevs)); 84 78 *pios = NULL; 85 79 return -ENOMEM; 86 80 } 87 81 88 82 ios->layout = layout; 89 - ios->obj.partition = layout->s_pid; 83 + ios->comps = comps; 84 + ios->offset = offset; 85 + ios->length = length; 86 + ios->reading = is_reading; 87 + 90 88 *pios = ios; 91 89 return 0; 92 90 } 91 + EXPORT_SYMBOL(ore_get_rw_state); 93 92 94 - void exofs_put_io_state(struct exofs_io_state *ios) 93 + int ore_get_io_state(struct ore_layout *layout, struct ore_components *comps, 94 + struct ore_io_state **ios) 95 + { 96 + return ore_get_rw_state(layout, comps, true, 0, 0, ios); 97 + } 98 + EXPORT_SYMBOL(ore_get_io_state); 99 + 100 + void ore_put_io_state(struct ore_io_state *ios) 95 101 { 96 102 if (ios) { 97 103 unsigned i; 98 104 99 105 for (i = 0; i < ios->numdevs; i++) { 100 - struct exofs_per_dev_state *per_dev = &ios->per_dev[i]; 106 + struct ore_per_dev_state *per_dev = &ios->per_dev[i]; 101 107 102 108 if (per_dev->or) 103 109 osd_end_request(per_dev->or); ··· 114 108 kfree(ios); 115 109 } 116 110 } 111 + EXPORT_SYMBOL(ore_put_io_state); 117 112 118 - unsigned exofs_layout_od_id(struct exofs_layout *layout, 119 - osd_id obj_no, unsigned layout_index) 120 - { 121 - /* switch (layout->lay_func) { 122 - case LAYOUT_MOVING_WINDOW: 123 - {*/ 124 - unsigned dev_mod = obj_no; 125 - 126 - return (layout_index + dev_mod * layout->mirrors_p1) % 127 - layout->s_numdevs; 128 - /* } 129 - case LAYOUT_FUNC_IMPLICT: 130 - return layout->devs[layout_index]; 131 - }*/ 132 - } 133 - 134 - static inline struct osd_dev *exofs_ios_od(struct exofs_io_state *ios, 135 - unsigned layout_index) 136 - { 137 - return ios->layout->s_ods[ 138 - exofs_layout_od_id(ios->layout, ios->obj.id, layout_index)]; 139 - } 140 - 141 - static void _sync_done(struct exofs_io_state *ios, void *p) 113 + static void _sync_done(struct ore_io_state *ios, void *p) 142 114 { 143 115 struct completion *waiting = p; 144 116 ··· 125 141 126 142 static void _last_io(struct kref *kref) 127 143 { 128 - struct exofs_io_state *ios = container_of( 129 - kref, struct exofs_io_state, kref); 144 + struct ore_io_state *ios = container_of( 145 + kref, struct ore_io_state, kref); 130 146 131 147 ios->done(ios, ios->private); 132 148 } 133 149 134 150 static void _done_io(struct osd_request *or, void *p) 135 151 { 136 - struct exofs_io_state *ios = p; 152 + struct ore_io_state *ios = p; 137 153 138 154 kref_put(&ios->kref, _last_io); 139 155 } 140 156 141 - static int exofs_io_execute(struct exofs_io_state *ios) 157 + static int ore_io_execute(struct ore_io_state *ios) 142 158 { 143 159 DECLARE_COMPLETION_ONSTACK(wait); 144 160 bool sync = (ios->done == NULL); ··· 154 170 if (unlikely(!or)) 155 171 continue; 156 172 157 - ret = osd_finalize_request(or, 0, ios->cred, NULL); 173 + ret = osd_finalize_request(or, 0, _ios_cred(ios, i), NULL); 158 174 if (unlikely(ret)) { 159 - EXOFS_DBGMSG("Failed to osd_finalize_request() => %d\n", 175 + ORE_DBGMSG("Failed to osd_finalize_request() => %d\n", 160 176 ret); 161 177 return ret; 162 178 } ··· 178 194 179 195 if (sync) { 180 196 wait_for_completion(&wait); 181 - ret = exofs_check_io(ios, NULL); 197 + ret = ore_check_io(ios, NULL); 182 198 } 183 199 return ret; 184 200 } ··· 198 214 } 199 215 } 200 216 201 - int exofs_check_io(struct exofs_io_state *ios, u64 *resid) 217 + int ore_check_io(struct ore_io_state *ios, u64 *resid) 202 218 { 203 219 enum osd_err_priority acumulated_osd_err = 0; 204 220 int acumulated_lin_err = 0; ··· 219 235 if (OSD_ERR_PRI_CLEAR_PAGES == osi.osd_err_pri) { 220 236 /* start read offset passed endof file */ 221 237 _clear_bio(ios->per_dev[i].bio); 222 - EXOFS_DBGMSG("start read offset passed end of file " 238 + ORE_DBGMSG("start read offset passed end of file " 223 239 "offset=0x%llx, length=0x%llx\n", 224 240 _LLU(ios->per_dev[i].offset), 225 241 _LLU(ios->per_dev[i].length)); ··· 243 259 244 260 return acumulated_lin_err; 245 261 } 262 + EXPORT_SYMBOL(ore_check_io); 246 263 247 264 /* 248 265 * L - logical offset into the file ··· 290 305 struct _striping_info { 291 306 u64 obj_offset; 292 307 u64 group_length; 308 + u64 M; /* for truncate */ 293 309 unsigned dev; 294 310 unsigned unit_off; 295 311 }; 296 312 297 - static void _calc_stripe_info(struct exofs_io_state *ios, u64 file_offset, 313 + static void _calc_stripe_info(struct ore_layout *layout, u64 file_offset, 298 314 struct _striping_info *si) 299 315 { 300 - u32 stripe_unit = ios->layout->stripe_unit; 301 - u32 group_width = ios->layout->group_width; 302 - u64 group_depth = ios->layout->group_depth; 316 + u32 stripe_unit = layout->stripe_unit; 317 + u32 group_width = layout->group_width; 318 + u64 group_depth = layout->group_depth; 303 319 304 320 u32 U = stripe_unit * group_width; 305 321 u64 T = U * group_depth; 306 - u64 S = T * ios->layout->group_count; 322 + u64 S = T * layout->group_count; 307 323 u64 M = div64_u64(file_offset, S); 308 324 309 325 /* ··· 319 333 320 334 /* "H - (N * U)" is just "H % U" so it's bound to u32 */ 321 335 si->dev = (u32)(H - (N * U)) / stripe_unit + G * group_width; 322 - si->dev *= ios->layout->mirrors_p1; 336 + si->dev *= layout->mirrors_p1; 323 337 324 338 div_u64_rem(file_offset, stripe_unit, &si->unit_off); 325 339 ··· 327 341 (M * group_depth * stripe_unit); 328 342 329 343 si->group_length = T - H; 344 + si->M = M; 330 345 } 331 346 332 - static int _add_stripe_unit(struct exofs_io_state *ios, unsigned *cur_pg, 333 - unsigned pgbase, struct exofs_per_dev_state *per_dev, 347 + static int _add_stripe_unit(struct ore_io_state *ios, unsigned *cur_pg, 348 + unsigned pgbase, struct ore_per_dev_state *per_dev, 334 349 int cur_len) 335 350 { 336 351 unsigned pg = *cur_pg; 337 352 struct request_queue *q = 338 - osd_request_queue(exofs_ios_od(ios, per_dev->dev)); 353 + osd_request_queue(_ios_od(ios, per_dev->dev)); 339 354 340 355 per_dev->length += cur_len; 341 356 ··· 348 361 349 362 per_dev->bio = bio_kmalloc(GFP_KERNEL, bio_size); 350 363 if (unlikely(!per_dev->bio)) { 351 - EXOFS_DBGMSG("Failed to allocate BIO size=%u\n", 364 + ORE_DBGMSG("Failed to allocate BIO size=%u\n", 352 365 bio_size); 353 366 return -ENOMEM; 354 367 } ··· 374 387 return 0; 375 388 } 376 389 377 - static int _prepare_one_group(struct exofs_io_state *ios, u64 length, 390 + static int _prepare_one_group(struct ore_io_state *ios, u64 length, 378 391 struct _striping_info *si) 379 392 { 380 393 unsigned stripe_unit = ios->layout->stripe_unit; ··· 387 400 int ret = 0; 388 401 389 402 while (length) { 390 - struct exofs_per_dev_state *per_dev = &ios->per_dev[dev]; 403 + struct ore_per_dev_state *per_dev = &ios->per_dev[dev]; 391 404 unsigned cur_len, page_off = 0; 392 405 393 406 if (!per_dev->length) { ··· 430 443 return ret; 431 444 } 432 445 433 - static int _prepare_for_striping(struct exofs_io_state *ios) 446 + static int _prepare_for_striping(struct ore_io_state *ios) 434 447 { 435 448 u64 length = ios->length; 436 449 u64 offset = ios->offset; ··· 439 452 440 453 if (!ios->pages) { 441 454 if (ios->kern_buff) { 442 - struct exofs_per_dev_state *per_dev = &ios->per_dev[0]; 455 + struct ore_per_dev_state *per_dev = &ios->per_dev[0]; 443 456 444 - _calc_stripe_info(ios, ios->offset, &si); 457 + _calc_stripe_info(ios->layout, ios->offset, &si); 445 458 per_dev->offset = si.obj_offset; 446 459 per_dev->dev = si.dev; 447 460 ··· 455 468 } 456 469 457 470 while (length) { 458 - _calc_stripe_info(ios, offset, &si); 471 + _calc_stripe_info(ios->layout, offset, &si); 459 472 460 473 if (length < si.group_length) 461 474 si.group_length = length; ··· 472 485 return ret; 473 486 } 474 487 475 - int exofs_sbi_create(struct exofs_io_state *ios) 488 + int ore_create(struct ore_io_state *ios) 476 489 { 477 490 int i, ret; 478 491 479 - for (i = 0; i < ios->layout->s_numdevs; i++) { 492 + for (i = 0; i < ios->comps->numdevs; i++) { 480 493 struct osd_request *or; 481 494 482 - or = osd_start_request(exofs_ios_od(ios, i), GFP_KERNEL); 495 + or = osd_start_request(_ios_od(ios, i), GFP_KERNEL); 483 496 if (unlikely(!or)) { 484 - EXOFS_ERR("%s: osd_start_request failed\n", __func__); 497 + ORE_ERR("%s: osd_start_request failed\n", __func__); 485 498 ret = -ENOMEM; 486 499 goto out; 487 500 } 488 501 ios->per_dev[i].or = or; 489 502 ios->numdevs++; 490 503 491 - osd_req_create_object(or, &ios->obj); 504 + osd_req_create_object(or, _ios_obj(ios, i)); 492 505 } 493 - ret = exofs_io_execute(ios); 506 + ret = ore_io_execute(ios); 494 507 495 508 out: 496 509 return ret; 497 510 } 511 + EXPORT_SYMBOL(ore_create); 498 512 499 - int exofs_sbi_remove(struct exofs_io_state *ios) 513 + int ore_remove(struct ore_io_state *ios) 500 514 { 501 515 int i, ret; 502 516 503 - for (i = 0; i < ios->layout->s_numdevs; i++) { 517 + for (i = 0; i < ios->comps->numdevs; i++) { 504 518 struct osd_request *or; 505 519 506 - or = osd_start_request(exofs_ios_od(ios, i), GFP_KERNEL); 520 + or = osd_start_request(_ios_od(ios, i), GFP_KERNEL); 507 521 if (unlikely(!or)) { 508 - EXOFS_ERR("%s: osd_start_request failed\n", __func__); 522 + ORE_ERR("%s: osd_start_request failed\n", __func__); 509 523 ret = -ENOMEM; 510 524 goto out; 511 525 } 512 526 ios->per_dev[i].or = or; 513 527 ios->numdevs++; 514 528 515 - osd_req_remove_object(or, &ios->obj); 529 + osd_req_remove_object(or, _ios_obj(ios, i)); 516 530 } 517 - ret = exofs_io_execute(ios); 531 + ret = ore_io_execute(ios); 518 532 519 533 out: 520 534 return ret; 521 535 } 536 + EXPORT_SYMBOL(ore_remove); 522 537 523 - static int _sbi_write_mirror(struct exofs_io_state *ios, int cur_comp) 538 + static int _write_mirror(struct ore_io_state *ios, int cur_comp) 524 539 { 525 - struct exofs_per_dev_state *master_dev = &ios->per_dev[cur_comp]; 540 + struct ore_per_dev_state *master_dev = &ios->per_dev[cur_comp]; 526 541 unsigned dev = ios->per_dev[cur_comp].dev; 527 542 unsigned last_comp = cur_comp + ios->layout->mirrors_p1; 528 543 int ret = 0; ··· 533 544 return 0; /* Just an empty slot */ 534 545 535 546 for (; cur_comp < last_comp; ++cur_comp, ++dev) { 536 - struct exofs_per_dev_state *per_dev = &ios->per_dev[cur_comp]; 547 + struct ore_per_dev_state *per_dev = &ios->per_dev[cur_comp]; 537 548 struct osd_request *or; 538 549 539 - or = osd_start_request(exofs_ios_od(ios, dev), GFP_KERNEL); 550 + or = osd_start_request(_ios_od(ios, dev), GFP_KERNEL); 540 551 if (unlikely(!or)) { 541 - EXOFS_ERR("%s: osd_start_request failed\n", __func__); 552 + ORE_ERR("%s: osd_start_request failed\n", __func__); 542 553 ret = -ENOMEM; 543 554 goto out; 544 555 } ··· 552 563 bio = bio_kmalloc(GFP_KERNEL, 553 564 master_dev->bio->bi_max_vecs); 554 565 if (unlikely(!bio)) { 555 - EXOFS_DBGMSG( 566 + ORE_DBGMSG( 556 567 "Failed to allocate BIO size=%u\n", 557 568 master_dev->bio->bi_max_vecs); 558 569 ret = -ENOMEM; ··· 571 582 bio->bi_rw |= REQ_WRITE; 572 583 } 573 584 574 - osd_req_write(or, &ios->obj, per_dev->offset, bio, 575 - per_dev->length); 576 - EXOFS_DBGMSG("write(0x%llx) offset=0x%llx " 585 + osd_req_write(or, _ios_obj(ios, dev), per_dev->offset, 586 + bio, per_dev->length); 587 + ORE_DBGMSG("write(0x%llx) offset=0x%llx " 577 588 "length=0x%llx dev=%d\n", 578 - _LLU(ios->obj.id), _LLU(per_dev->offset), 589 + _LLU(_ios_obj(ios, dev)->id), 590 + _LLU(per_dev->offset), 579 591 _LLU(per_dev->length), dev); 580 592 } else if (ios->kern_buff) { 581 - ret = osd_req_write_kern(or, &ios->obj, per_dev->offset, 582 - ios->kern_buff, ios->length); 593 + ret = osd_req_write_kern(or, _ios_obj(ios, dev), 594 + per_dev->offset, 595 + ios->kern_buff, ios->length); 583 596 if (unlikely(ret)) 584 597 goto out; 585 - EXOFS_DBGMSG2("write_kern(0x%llx) offset=0x%llx " 598 + ORE_DBGMSG2("write_kern(0x%llx) offset=0x%llx " 586 599 "length=0x%llx dev=%d\n", 587 - _LLU(ios->obj.id), _LLU(per_dev->offset), 600 + _LLU(_ios_obj(ios, dev)->id), 601 + _LLU(per_dev->offset), 588 602 _LLU(ios->length), dev); 589 603 } else { 590 - osd_req_set_attributes(or, &ios->obj); 591 - EXOFS_DBGMSG2("obj(0x%llx) set_attributes=%d dev=%d\n", 592 - _LLU(ios->obj.id), ios->out_attr_len, dev); 604 + osd_req_set_attributes(or, _ios_obj(ios, dev)); 605 + ORE_DBGMSG2("obj(0x%llx) set_attributes=%d dev=%d\n", 606 + _LLU(_ios_obj(ios, dev)->id), 607 + ios->out_attr_len, dev); 593 608 } 594 609 595 610 if (ios->out_attr) ··· 609 616 return ret; 610 617 } 611 618 612 - int exofs_sbi_write(struct exofs_io_state *ios) 619 + int ore_write(struct ore_io_state *ios) 613 620 { 614 621 int i; 615 622 int ret; ··· 619 626 return ret; 620 627 621 628 for (i = 0; i < ios->numdevs; i += ios->layout->mirrors_p1) { 622 - ret = _sbi_write_mirror(ios, i); 629 + ret = _write_mirror(ios, i); 623 630 if (unlikely(ret)) 624 631 return ret; 625 632 } 626 633 627 - ret = exofs_io_execute(ios); 634 + ret = ore_io_execute(ios); 628 635 return ret; 629 636 } 637 + EXPORT_SYMBOL(ore_write); 630 638 631 - static int _sbi_read_mirror(struct exofs_io_state *ios, unsigned cur_comp) 639 + static int _read_mirror(struct ore_io_state *ios, unsigned cur_comp) 632 640 { 633 641 struct osd_request *or; 634 - struct exofs_per_dev_state *per_dev = &ios->per_dev[cur_comp]; 635 - unsigned first_dev = (unsigned)ios->obj.id; 642 + struct ore_per_dev_state *per_dev = &ios->per_dev[cur_comp]; 643 + struct osd_obj_id *obj = _ios_obj(ios, cur_comp); 644 + unsigned first_dev = (unsigned)obj->id; 636 645 637 646 if (ios->pages && !per_dev->length) 638 647 return 0; /* Just an empty slot */ 639 648 640 649 first_dev = per_dev->dev + first_dev % ios->layout->mirrors_p1; 641 - or = osd_start_request(exofs_ios_od(ios, first_dev), GFP_KERNEL); 650 + or = osd_start_request(_ios_od(ios, first_dev), GFP_KERNEL); 642 651 if (unlikely(!or)) { 643 - EXOFS_ERR("%s: osd_start_request failed\n", __func__); 652 + ORE_ERR("%s: osd_start_request failed\n", __func__); 644 653 return -ENOMEM; 645 654 } 646 655 per_dev->or = or; 647 656 648 657 if (ios->pages) { 649 - osd_req_read(or, &ios->obj, per_dev->offset, 658 + osd_req_read(or, obj, per_dev->offset, 650 659 per_dev->bio, per_dev->length); 651 - EXOFS_DBGMSG("read(0x%llx) offset=0x%llx length=0x%llx" 652 - " dev=%d\n", _LLU(ios->obj.id), 660 + ORE_DBGMSG("read(0x%llx) offset=0x%llx length=0x%llx" 661 + " dev=%d\n", _LLU(obj->id), 653 662 _LLU(per_dev->offset), _LLU(per_dev->length), 654 663 first_dev); 655 664 } else if (ios->kern_buff) { 656 - int ret = osd_req_read_kern(or, &ios->obj, per_dev->offset, 665 + int ret = osd_req_read_kern(or, obj, per_dev->offset, 657 666 ios->kern_buff, ios->length); 658 - EXOFS_DBGMSG2("read_kern(0x%llx) offset=0x%llx " 667 + ORE_DBGMSG2("read_kern(0x%llx) offset=0x%llx " 659 668 "length=0x%llx dev=%d ret=>%d\n", 660 - _LLU(ios->obj.id), _LLU(per_dev->offset), 669 + _LLU(obj->id), _LLU(per_dev->offset), 661 670 _LLU(ios->length), first_dev, ret); 662 671 if (unlikely(ret)) 663 672 return ret; 664 673 } else { 665 - osd_req_get_attributes(or, &ios->obj); 666 - EXOFS_DBGMSG2("obj(0x%llx) get_attributes=%d dev=%d\n", 667 - _LLU(ios->obj.id), ios->in_attr_len, first_dev); 674 + osd_req_get_attributes(or, obj); 675 + ORE_DBGMSG2("obj(0x%llx) get_attributes=%d dev=%d\n", 676 + _LLU(obj->id), 677 + ios->in_attr_len, first_dev); 668 678 } 669 679 if (ios->out_attr) 670 680 osd_req_add_set_attr_list(or, ios->out_attr, ios->out_attr_len); ··· 678 682 return 0; 679 683 } 680 684 681 - int exofs_sbi_read(struct exofs_io_state *ios) 685 + int ore_read(struct ore_io_state *ios) 682 686 { 683 687 int i; 684 688 int ret; ··· 688 692 return ret; 689 693 690 694 for (i = 0; i < ios->numdevs; i += ios->layout->mirrors_p1) { 691 - ret = _sbi_read_mirror(ios, i); 695 + ret = _read_mirror(ios, i); 692 696 if (unlikely(ret)) 693 697 return ret; 694 698 } 695 699 696 - ret = exofs_io_execute(ios); 700 + ret = ore_io_execute(ios); 697 701 return ret; 698 702 } 703 + EXPORT_SYMBOL(ore_read); 699 704 700 - int extract_attr_from_ios(struct exofs_io_state *ios, struct osd_attr *attr) 705 + int extract_attr_from_ios(struct ore_io_state *ios, struct osd_attr *attr) 701 706 { 702 707 struct osd_attr cur_attr = {.attr_page = 0}; /* start with zeros */ 703 708 void *iter = NULL; ··· 718 721 719 722 return -EIO; 720 723 } 724 + EXPORT_SYMBOL(extract_attr_from_ios); 721 725 722 - static int _truncate_mirrors(struct exofs_io_state *ios, unsigned cur_comp, 726 + static int _truncate_mirrors(struct ore_io_state *ios, unsigned cur_comp, 723 727 struct osd_attr *attr) 724 728 { 725 729 int last_comp = cur_comp + ios->layout->mirrors_p1; 726 730 727 731 for (; cur_comp < last_comp; ++cur_comp) { 728 - struct exofs_per_dev_state *per_dev = &ios->per_dev[cur_comp]; 732 + struct ore_per_dev_state *per_dev = &ios->per_dev[cur_comp]; 729 733 struct osd_request *or; 730 734 731 - or = osd_start_request(exofs_ios_od(ios, cur_comp), GFP_KERNEL); 735 + or = osd_start_request(_ios_od(ios, cur_comp), GFP_KERNEL); 732 736 if (unlikely(!or)) { 733 - EXOFS_ERR("%s: osd_start_request failed\n", __func__); 737 + ORE_ERR("%s: osd_start_request failed\n", __func__); 734 738 return -ENOMEM; 735 739 } 736 740 per_dev->or = or; 737 741 738 - osd_req_set_attributes(or, &ios->obj); 742 + osd_req_set_attributes(or, _ios_obj(ios, cur_comp)); 739 743 osd_req_add_set_attr_list(or, attr, 1); 740 744 } 741 745 742 746 return 0; 743 747 } 744 748 745 - int exofs_oi_truncate(struct exofs_i_info *oi, u64 size) 749 + struct _trunc_info { 750 + struct _striping_info si; 751 + u64 prev_group_obj_off; 752 + u64 next_group_obj_off; 753 + 754 + unsigned first_group_dev; 755 + unsigned nex_group_dev; 756 + unsigned max_devs; 757 + }; 758 + 759 + void _calc_trunk_info(struct ore_layout *layout, u64 file_offset, 760 + struct _trunc_info *ti) 746 761 { 747 - struct exofs_sb_info *sbi = oi->vfs_inode.i_sb->s_fs_info; 748 - struct exofs_io_state *ios; 762 + unsigned stripe_unit = layout->stripe_unit; 763 + 764 + _calc_stripe_info(layout, file_offset, &ti->si); 765 + 766 + ti->prev_group_obj_off = ti->si.M * stripe_unit; 767 + ti->next_group_obj_off = ti->si.M ? (ti->si.M - 1) * stripe_unit : 0; 768 + 769 + ti->first_group_dev = ti->si.dev - (ti->si.dev % layout->group_width); 770 + ti->nex_group_dev = ti->first_group_dev + layout->group_width; 771 + ti->max_devs = layout->group_width * layout->group_count; 772 + } 773 + 774 + int ore_truncate(struct ore_layout *layout, struct ore_components *comps, 775 + u64 size) 776 + { 777 + struct ore_io_state *ios; 749 778 struct exofs_trunc_attr { 750 779 struct osd_attr attr; 751 780 __be64 newsize; 752 781 } *size_attrs; 753 - struct _striping_info si; 782 + struct _trunc_info ti; 754 783 int i, ret; 755 784 756 - ret = exofs_get_io_state(&sbi->layout, &ios); 785 + ret = ore_get_io_state(layout, comps, &ios); 757 786 if (unlikely(ret)) 758 787 return ret; 759 788 760 - size_attrs = kcalloc(ios->layout->group_width, sizeof(*size_attrs), 789 + _calc_trunk_info(ios->layout, size, &ti); 790 + 791 + size_attrs = kcalloc(ti.max_devs, sizeof(*size_attrs), 761 792 GFP_KERNEL); 762 793 if (unlikely(!size_attrs)) { 763 794 ret = -ENOMEM; 764 795 goto out; 765 796 } 766 797 767 - ios->obj.id = exofs_oi_objno(oi); 768 - ios->cred = oi->i_cred; 798 + ios->numdevs = ios->comps->numdevs; 769 799 770 - ios->numdevs = ios->layout->s_numdevs; 771 - _calc_stripe_info(ios, size, &si); 772 - 773 - for (i = 0; i < ios->layout->group_width; ++i) { 800 + for (i = 0; i < ti.max_devs; ++i) { 774 801 struct exofs_trunc_attr *size_attr = &size_attrs[i]; 775 802 u64 obj_size; 776 803 777 - if (i < si.dev) 778 - obj_size = si.obj_offset + 779 - ios->layout->stripe_unit - si.unit_off; 780 - else if (i == si.dev) 781 - obj_size = si.obj_offset; 782 - else /* i > si.dev */ 783 - obj_size = si.obj_offset - si.unit_off; 804 + if (i < ti.first_group_dev) 805 + obj_size = ti.prev_group_obj_off; 806 + else if (i >= ti.nex_group_dev) 807 + obj_size = ti.next_group_obj_off; 808 + else if (i < ti.si.dev) /* dev within this group */ 809 + obj_size = ti.si.obj_offset + 810 + ios->layout->stripe_unit - ti.si.unit_off; 811 + else if (i == ti.si.dev) 812 + obj_size = ti.si.obj_offset; 813 + else /* i > ti.dev */ 814 + obj_size = ti.si.obj_offset - ti.si.unit_off; 784 815 785 816 size_attr->newsize = cpu_to_be64(obj_size); 786 817 size_attr->attr = g_attr_logical_length; 787 818 size_attr->attr.val_ptr = &size_attr->newsize; 788 819 820 + ORE_DBGMSG("trunc(0x%llx) obj_offset=0x%llx dev=%d\n", 821 + _LLU(comps->comps->obj.id), _LLU(obj_size), i); 789 822 ret = _truncate_mirrors(ios, i * ios->layout->mirrors_p1, 790 823 &size_attr->attr); 791 824 if (unlikely(ret)) 792 825 goto out; 793 826 } 794 - ret = exofs_io_execute(ios); 827 + ret = ore_io_execute(ios); 795 828 796 829 out: 797 830 kfree(size_attrs); 798 - exofs_put_io_state(ios); 831 + ore_put_io_state(ios); 799 832 return ret; 800 833 } 834 + EXPORT_SYMBOL(ore_truncate); 835 + 836 + const struct osd_attr g_attr_logical_length = ATTR_DEF( 837 + OSD_APAGE_OBJECT_INFORMATION, OSD_ATTR_OI_LOGICAL_LENGTH, 8); 838 + EXPORT_SYMBOL(g_attr_logical_length);
-45
fs/exofs/pnfs.h
··· 1 - /* 2 - * Copyright (C) 2008, 2009 3 - * Boaz Harrosh <bharrosh@panasas.com> 4 - * 5 - * This file is part of exofs. 6 - * 7 - * exofs is free software; you can redistribute it and/or modify it under the 8 - * terms of the GNU General Public License version 2 as published by the Free 9 - * Software Foundation. 10 - * 11 - */ 12 - 13 - /* FIXME: Remove this file once pnfs hits mainline */ 14 - 15 - #ifndef __EXOFS_PNFS_H__ 16 - #define __EXOFS_PNFS_H__ 17 - 18 - #if ! defined(__PNFS_OSD_XDR_H__) 19 - 20 - enum pnfs_iomode { 21 - IOMODE_READ = 1, 22 - IOMODE_RW = 2, 23 - IOMODE_ANY = 3, 24 - }; 25 - 26 - /* Layout Structure */ 27 - enum pnfs_osd_raid_algorithm4 { 28 - PNFS_OSD_RAID_0 = 1, 29 - PNFS_OSD_RAID_4 = 2, 30 - PNFS_OSD_RAID_5 = 3, 31 - PNFS_OSD_RAID_PQ = 4 /* Reed-Solomon P+Q */ 32 - }; 33 - 34 - struct pnfs_osd_data_map { 35 - u32 odm_num_comps; 36 - u64 odm_stripe_unit; 37 - u32 odm_group_width; 38 - u32 odm_group_depth; 39 - u32 odm_mirror_cnt; 40 - u32 odm_raid_algorithm; 41 - }; 42 - 43 - #endif /* ! defined(__PNFS_OSD_XDR_H__) */ 44 - 45 - #endif /* __EXOFS_PNFS_H__ */
+162 -89
fs/exofs/super.c
··· 40 40 41 41 #include "exofs.h" 42 42 43 + #define EXOFS_DBGMSG2(M...) do {} while (0) 44 + 43 45 /****************************************************************************** 44 46 * MOUNT OPTIONS 45 47 *****************************************************************************/ ··· 210 208 } 211 209 212 210 /****************************************************************************** 213 - * SUPERBLOCK FUNCTIONS 211 + * Some osd helpers 214 212 *****************************************************************************/ 215 - static const struct super_operations exofs_sops; 216 - static const struct export_operations exofs_export_ops; 213 + void exofs_make_credential(u8 cred_a[OSD_CAP_LEN], const struct osd_obj_id *obj) 214 + { 215 + osd_sec_init_nosec_doall_caps(cred_a, obj, false, true); 216 + } 217 + 218 + static int exofs_read_kern(struct osd_dev *od, u8 *cred, struct osd_obj_id *obj, 219 + u64 offset, void *p, unsigned length) 220 + { 221 + struct osd_request *or = osd_start_request(od, GFP_KERNEL); 222 + /* struct osd_sense_info osi = {.key = 0};*/ 223 + int ret; 224 + 225 + if (unlikely(!or)) { 226 + EXOFS_DBGMSG("%s: osd_start_request failed.\n", __func__); 227 + return -ENOMEM; 228 + } 229 + ret = osd_req_read_kern(or, obj, offset, p, length); 230 + if (unlikely(ret)) { 231 + EXOFS_DBGMSG("%s: osd_req_read_kern failed.\n", __func__); 232 + goto out; 233 + } 234 + 235 + ret = osd_finalize_request(or, 0, cred, NULL); 236 + if (unlikely(ret)) { 237 + EXOFS_DBGMSG("Failed to osd_finalize_request() => %d\n", ret); 238 + goto out; 239 + } 240 + 241 + ret = osd_execute_request(or); 242 + if (unlikely(ret)) 243 + EXOFS_DBGMSG("osd_execute_request() => %d\n", ret); 244 + /* osd_req_decode_sense(or, ret); */ 245 + 246 + out: 247 + osd_end_request(or); 248 + EXOFS_DBGMSG2("read_kern(0x%llx) offset=0x%llx " 249 + "length=0x%llx dev=%p ret=>%d\n", 250 + _LLU(obj->id), _LLU(offset), _LLU(length), od, ret); 251 + return ret; 252 + } 217 253 218 254 static const struct osd_attr g_attr_sb_stats = ATTR_DEF( 219 255 EXOFS_APAGE_SB_DATA, ··· 263 223 struct osd_attr attrs[] = { 264 224 [0] = g_attr_sb_stats, 265 225 }; 266 - struct exofs_io_state *ios; 226 + struct ore_io_state *ios; 267 227 int ret; 268 228 269 - ret = exofs_get_io_state(&sbi->layout, &ios); 229 + ret = ore_get_io_state(&sbi->layout, &sbi->comps, &ios); 270 230 if (unlikely(ret)) { 271 - EXOFS_ERR("%s: exofs_get_io_state failed.\n", __func__); 231 + EXOFS_ERR("%s: ore_get_io_state failed.\n", __func__); 272 232 return ret; 273 233 } 274 - 275 - ios->cred = sbi->s_cred; 276 234 277 235 ios->in_attr = attrs; 278 236 ios->in_attr_len = ARRAY_SIZE(attrs); 279 237 280 - ret = exofs_sbi_read(ios); 238 + ret = ore_read(ios); 281 239 if (unlikely(ret)) { 282 240 EXOFS_ERR("Error reading super_block stats => %d\n", ret); 283 241 goto out; ··· 302 264 } 303 265 304 266 out: 305 - exofs_put_io_state(ios); 267 + ore_put_io_state(ios); 306 268 return ret; 307 269 } 308 270 309 - static void stats_done(struct exofs_io_state *ios, void *p) 271 + static void stats_done(struct ore_io_state *ios, void *p) 310 272 { 311 - exofs_put_io_state(ios); 273 + ore_put_io_state(ios); 312 274 /* Good thanks nothing to do anymore */ 313 275 } 314 276 ··· 318 280 struct osd_attr attrs[] = { 319 281 [0] = g_attr_sb_stats, 320 282 }; 321 - struct exofs_io_state *ios; 283 + struct ore_io_state *ios; 322 284 int ret; 323 285 324 - ret = exofs_get_io_state(&sbi->layout, &ios); 286 + ret = ore_get_io_state(&sbi->layout, &sbi->comps, &ios); 325 287 if (unlikely(ret)) { 326 - EXOFS_ERR("%s: exofs_get_io_state failed.\n", __func__); 288 + EXOFS_ERR("%s: ore_get_io_state failed.\n", __func__); 327 289 return ret; 328 290 } 329 291 ··· 331 293 sbi->s_ess.s_numfiles = cpu_to_le64(sbi->s_numfiles); 332 294 attrs[0].val_ptr = &sbi->s_ess; 333 295 334 - ios->cred = sbi->s_cred; 296 + 335 297 ios->done = stats_done; 336 298 ios->private = sbi; 337 299 ios->out_attr = attrs; 338 300 ios->out_attr_len = ARRAY_SIZE(attrs); 339 301 340 - ret = exofs_sbi_write(ios); 302 + ret = ore_write(ios); 341 303 if (unlikely(ret)) { 342 - EXOFS_ERR("%s: exofs_sbi_write failed.\n", __func__); 343 - exofs_put_io_state(ios); 304 + EXOFS_ERR("%s: ore_write failed.\n", __func__); 305 + ore_put_io_state(ios); 344 306 } 345 307 346 308 return ret; 347 309 } 310 + 311 + /****************************************************************************** 312 + * SUPERBLOCK FUNCTIONS 313 + *****************************************************************************/ 314 + static const struct super_operations exofs_sops; 315 + static const struct export_operations exofs_export_ops; 348 316 349 317 /* 350 318 * Write the superblock to the OSD ··· 359 315 { 360 316 struct exofs_sb_info *sbi; 361 317 struct exofs_fscb *fscb; 362 - struct exofs_io_state *ios; 318 + struct ore_comp one_comp; 319 + struct ore_components comps; 320 + struct ore_io_state *ios; 363 321 int ret = -ENOMEM; 364 322 365 323 fscb = kmalloc(sizeof(*fscb), GFP_KERNEL); ··· 377 331 * version). Otherwise the exofs_fscb is read-only from mkfs time. All 378 332 * the writeable info is set in exofs_sbi_write_stats() above. 379 333 */ 380 - ret = exofs_get_io_state(&sbi->layout, &ios); 334 + 335 + exofs_init_comps(&comps, &one_comp, sbi, EXOFS_SUPER_ID); 336 + 337 + ret = ore_get_io_state(&sbi->layout, &comps, &ios); 381 338 if (unlikely(ret)) 382 339 goto out; 383 340 ··· 394 345 fscb->s_newfs = 0; 395 346 fscb->s_version = EXOFS_FSCB_VER; 396 347 397 - ios->obj.id = EXOFS_SUPER_ID; 398 348 ios->offset = 0; 399 349 ios->kern_buff = fscb; 400 - ios->cred = sbi->s_cred; 401 350 402 - ret = exofs_sbi_write(ios); 351 + ret = ore_write(ios); 403 352 if (unlikely(ret)) 404 - EXOFS_ERR("%s: exofs_sbi_write failed.\n", __func__); 353 + EXOFS_ERR("%s: ore_write failed.\n", __func__); 405 354 else 406 355 sb->s_dirt = 0; 407 356 ··· 407 360 unlock_super(sb); 408 361 out: 409 362 EXOFS_DBGMSG("s_nextid=0x%llx ret=%d\n", _LLU(sbi->s_nextid), ret); 410 - exofs_put_io_state(ios); 363 + ore_put_io_state(ios); 411 364 kfree(fscb); 412 365 return ret; 413 366 } ··· 431 384 432 385 void exofs_free_sbi(struct exofs_sb_info *sbi) 433 386 { 434 - while (sbi->layout.s_numdevs) { 435 - int i = --sbi->layout.s_numdevs; 436 - struct osd_dev *od = sbi->layout.s_ods[i]; 387 + while (sbi->comps.numdevs) { 388 + int i = --sbi->comps.numdevs; 389 + struct osd_dev *od = sbi->comps.ods[i]; 437 390 438 391 if (od) { 439 - sbi->layout.s_ods[i] = NULL; 392 + sbi->comps.ods[i] = NULL; 440 393 osduld_put_device(od); 441 394 } 442 395 } 396 + if (sbi->comps.ods != sbi->_min_one_dev) 397 + kfree(sbi->comps.ods); 443 398 kfree(sbi); 444 399 } 445 400 ··· 468 419 msecs_to_jiffies(100)); 469 420 } 470 421 471 - _exofs_print_device("Unmounting", NULL, sbi->layout.s_ods[0], 472 - sbi->layout.s_pid); 422 + _exofs_print_device("Unmounting", NULL, sbi->comps.ods[0], 423 + sbi->one_comp.obj.partition); 473 424 474 425 bdi_destroy(&sbi->bdi); 475 426 exofs_free_sbi(sbi); ··· 550 501 return -EINVAL; 551 502 } 552 503 504 + EXOFS_DBGMSG("exofs: layout: " 505 + "num_comps=%u stripe_unit=0x%x group_width=%u " 506 + "group_depth=0x%llx mirrors_p1=%u raid_algorithm=%u\n", 507 + numdevs, 508 + sbi->layout.stripe_unit, 509 + sbi->layout.group_width, 510 + _LLU(sbi->layout.group_depth), 511 + sbi->layout.mirrors_p1, 512 + sbi->data_map.odm_raid_algorithm); 553 513 return 0; 554 514 } 555 515 556 - static unsigned __ra_pages(struct exofs_layout *layout) 516 + static unsigned __ra_pages(struct ore_layout *layout) 557 517 { 558 518 const unsigned _MIN_RA = 32; /* min 128K read-ahead */ 559 519 unsigned ra_pages = layout->group_width * layout->stripe_unit / ··· 605 547 return !(odi->systemid_len || odi->osdname_len); 606 548 } 607 549 608 - static int exofs_read_lookup_dev_table(struct exofs_sb_info **psbi, 550 + static int exofs_read_lookup_dev_table(struct exofs_sb_info *sbi, 551 + struct osd_dev *fscb_od, 609 552 unsigned table_count) 610 553 { 611 - struct exofs_sb_info *sbi = *psbi; 612 - struct osd_dev *fscb_od; 613 - struct osd_obj_id obj = {.partition = sbi->layout.s_pid, 614 - .id = EXOFS_DEVTABLE_ID}; 554 + struct ore_comp comp; 615 555 struct exofs_device_table *dt; 616 556 unsigned table_bytes = table_count * sizeof(dt->dt_dev_table[0]) + 617 557 sizeof(*dt); ··· 623 567 return -ENOMEM; 624 568 } 625 569 626 - fscb_od = sbi->layout.s_ods[0]; 627 - sbi->layout.s_ods[0] = NULL; 628 - sbi->layout.s_numdevs = 0; 629 - ret = exofs_read_kern(fscb_od, sbi->s_cred, &obj, 0, dt, table_bytes); 570 + sbi->comps.numdevs = 0; 571 + 572 + comp.obj.partition = sbi->one_comp.obj.partition; 573 + comp.obj.id = EXOFS_DEVTABLE_ID; 574 + exofs_make_credential(comp.cred, &comp.obj); 575 + 576 + ret = exofs_read_kern(fscb_od, comp.cred, &comp.obj, 0, dt, 577 + table_bytes); 630 578 if (unlikely(ret)) { 631 579 EXOFS_ERR("ERROR: reading device table\n"); 632 580 goto out; ··· 648 588 goto out; 649 589 650 590 if (likely(numdevs > 1)) { 651 - unsigned size = numdevs * sizeof(sbi->layout.s_ods[0]); 591 + unsigned size = numdevs * sizeof(sbi->comps.ods[0]); 652 592 653 - sbi = krealloc(sbi, sizeof(*sbi) + size, GFP_KERNEL); 654 - if (unlikely(!sbi)) { 593 + /* Twice bigger table: See exofs_init_comps() and below 594 + * comment 595 + */ 596 + sbi->comps.ods = kzalloc(size + size - 1, GFP_KERNEL); 597 + if (unlikely(!sbi->comps.ods)) { 598 + EXOFS_ERR("ERROR: faild allocating Device array[%d]\n", 599 + numdevs); 655 600 ret = -ENOMEM; 656 601 goto out; 657 602 } 658 - memset(&sbi->layout.s_ods[1], 0, 659 - size - sizeof(sbi->layout.s_ods[0])); 660 - *psbi = sbi; 661 603 } 662 604 663 605 for (i = 0; i < numdevs; i++) { ··· 681 619 * line. We always keep them in device-table order. 682 620 */ 683 621 if (fscb_od && osduld_device_same(fscb_od, &odi)) { 684 - sbi->layout.s_ods[i] = fscb_od; 685 - ++sbi->layout.s_numdevs; 622 + sbi->comps.ods[i] = fscb_od; 623 + ++sbi->comps.numdevs; 686 624 fscb_od = NULL; 687 625 continue; 688 626 } ··· 695 633 goto out; 696 634 } 697 635 698 - sbi->layout.s_ods[i] = od; 699 - ++sbi->layout.s_numdevs; 636 + sbi->comps.ods[i] = od; 637 + ++sbi->comps.numdevs; 700 638 701 639 /* Read the fscb of the other devices to make sure the FS 702 640 * partition is there. 703 641 */ 704 - ret = exofs_read_kern(od, sbi->s_cred, &obj, 0, &fscb, 642 + ret = exofs_read_kern(od, comp.cred, &comp.obj, 0, &fscb, 705 643 sizeof(fscb)); 706 644 if (unlikely(ret)) { 707 645 EXOFS_ERR("ERROR: Malformed participating device " ··· 718 656 719 657 out: 720 658 kfree(dt); 721 - if (unlikely(!ret && fscb_od)) { 722 - EXOFS_ERR( 723 - "ERROR: Bad device-table container device not present\n"); 724 - osduld_put_device(fscb_od); 725 - ret = -EINVAL; 726 - } 659 + if (likely(!ret)) { 660 + unsigned numdevs = sbi->comps.numdevs; 727 661 662 + if (unlikely(fscb_od)) { 663 + EXOFS_ERR("ERROR: Bad device-table container device not present\n"); 664 + osduld_put_device(fscb_od); 665 + return -EINVAL; 666 + } 667 + /* exofs round-robins the device table view according to inode 668 + * number. We hold a: twice bigger table hence inodes can point 669 + * to any device and have a sequential view of the table 670 + * starting at this device. See exofs_init_comps() 671 + */ 672 + for (i = 0; i < numdevs - 1; ++i) 673 + sbi->comps.ods[i + numdevs] = sbi->comps.ods[i]; 674 + } 728 675 return ret; 729 676 } 730 677 ··· 747 676 struct exofs_sb_info *sbi; /*extended info */ 748 677 struct osd_dev *od; /* Master device */ 749 678 struct exofs_fscb fscb; /*on-disk superblock info */ 750 - struct osd_obj_id obj; 679 + struct ore_comp comp; 751 680 unsigned table_count; 752 681 int ret; 753 682 754 683 sbi = kzalloc(sizeof(*sbi), GFP_KERNEL); 755 684 if (!sbi) 756 685 return -ENOMEM; 757 - 758 - ret = bdi_setup_and_register(&sbi->bdi, "exofs", BDI_CAP_MAP_COPY); 759 - if (ret) 760 - goto free_bdi; 761 686 762 687 /* use mount options to fill superblock */ 763 688 if (opts->is_osdname) { ··· 762 695 odi.osdname_len = strlen(opts->dev_name); 763 696 odi.osdname = (u8 *)opts->dev_name; 764 697 od = osduld_info_lookup(&odi); 698 + kfree(opts->dev_name); 699 + opts->dev_name = NULL; 765 700 } else { 766 701 od = osduld_path_lookup(opts->dev_name); 767 702 } ··· 778 709 sbi->layout.group_width = 1; 779 710 sbi->layout.group_depth = -1; 780 711 sbi->layout.group_count = 1; 781 - sbi->layout.s_ods[0] = od; 782 - sbi->layout.s_numdevs = 1; 783 - sbi->layout.s_pid = opts->pid; 784 712 sbi->s_timeout = opts->timeout; 713 + 714 + sbi->one_comp.obj.partition = opts->pid; 715 + sbi->one_comp.obj.id = 0; 716 + exofs_make_credential(sbi->one_comp.cred, &sbi->one_comp.obj); 717 + sbi->comps.numdevs = 1; 718 + sbi->comps.single_comp = EC_SINGLE_COMP; 719 + sbi->comps.comps = &sbi->one_comp; 720 + sbi->comps.ods = sbi->_min_one_dev; 785 721 786 722 /* fill in some other data by hand */ 787 723 memset(sb->s_id, 0, sizeof(sb->s_id)); ··· 798 724 sb->s_bdev = NULL; 799 725 sb->s_dev = 0; 800 726 801 - obj.partition = sbi->layout.s_pid; 802 - obj.id = EXOFS_SUPER_ID; 803 - exofs_make_credential(sbi->s_cred, &obj); 727 + comp.obj.partition = sbi->one_comp.obj.partition; 728 + comp.obj.id = EXOFS_SUPER_ID; 729 + exofs_make_credential(comp.cred, &comp.obj); 804 730 805 - ret = exofs_read_kern(od, sbi->s_cred, &obj, 0, &fscb, sizeof(fscb)); 731 + ret = exofs_read_kern(od, comp.cred, &comp.obj, 0, &fscb, sizeof(fscb)); 806 732 if (unlikely(ret)) 807 733 goto free_sbi; 808 734 ··· 831 757 832 758 table_count = le64_to_cpu(fscb.s_dev_table_count); 833 759 if (table_count) { 834 - ret = exofs_read_lookup_dev_table(&sbi, table_count); 760 + ret = exofs_read_lookup_dev_table(sbi, od, table_count); 835 761 if (unlikely(ret)) 836 762 goto free_sbi; 763 + } else { 764 + sbi->comps.ods[0] = od; 837 765 } 838 766 839 767 __sbi_read_stats(sbi); ··· 869 793 goto free_sbi; 870 794 } 871 795 872 - _exofs_print_device("Mounting", opts->dev_name, sbi->layout.s_ods[0], 873 - sbi->layout.s_pid); 874 - if (opts->is_osdname) 875 - kfree(opts->dev_name); 796 + ret = bdi_setup_and_register(&sbi->bdi, "exofs", BDI_CAP_MAP_COPY); 797 + if (ret) { 798 + EXOFS_DBGMSG("Failed to bdi_setup_and_register\n"); 799 + goto free_sbi; 800 + } 801 + 802 + _exofs_print_device("Mounting", opts->dev_name, sbi->comps.ods[0], 803 + sbi->one_comp.obj.partition); 876 804 return 0; 877 805 878 806 free_sbi: 879 - bdi_destroy(&sbi->bdi); 880 - free_bdi: 881 807 EXOFS_ERR("Unable to mount exofs on %s pid=0x%llx err=%d\n", 882 - opts->dev_name, sbi->layout.s_pid, ret); 808 + opts->dev_name, sbi->one_comp.obj.partition, ret); 883 809 exofs_free_sbi(sbi); 884 - if (opts->is_osdname) 885 - kfree(opts->dev_name); 886 810 return ret; 887 811 } 888 812 ··· 913 837 { 914 838 struct super_block *sb = dentry->d_sb; 915 839 struct exofs_sb_info *sbi = sb->s_fs_info; 916 - struct exofs_io_state *ios; 840 + struct ore_io_state *ios; 917 841 struct osd_attr attrs[] = { 918 842 ATTR_DEF(OSD_APAGE_PARTITION_QUOTAS, 919 843 OSD_ATTR_PQ_CAPACITY_QUOTA, sizeof(__be64)), ··· 922 846 }; 923 847 uint64_t capacity = ULLONG_MAX; 924 848 uint64_t used = ULLONG_MAX; 925 - uint8_t cred_a[OSD_CAP_LEN]; 926 849 int ret; 927 850 928 - ret = exofs_get_io_state(&sbi->layout, &ios); 851 + ret = ore_get_io_state(&sbi->layout, &sbi->comps, &ios); 929 852 if (ret) { 930 - EXOFS_DBGMSG("exofs_get_io_state failed.\n"); 853 + EXOFS_DBGMSG("ore_get_io_state failed.\n"); 931 854 return ret; 932 855 } 933 856 934 - exofs_make_credential(cred_a, &ios->obj); 935 - ios->cred = sbi->s_cred; 936 857 ios->in_attr = attrs; 937 858 ios->in_attr_len = ARRAY_SIZE(attrs); 938 859 939 - ret = exofs_sbi_read(ios); 860 + ret = ore_read(ios); 940 861 if (unlikely(ret)) 941 862 goto out; 942 863 ··· 962 889 buf->f_namelen = EXOFS_NAME_LEN; 963 890 964 891 out: 965 - exofs_put_io_state(ios); 892 + ore_put_io_state(ios); 966 893 return ret; 967 894 } 968 895
+5 -5
include/linux/nfs_xdr.h
··· 773 773 struct posix_acl * acl_default; 774 774 }; 775 775 776 + struct nfs4_string { 777 + unsigned int len; 778 + char *data; 779 + }; 780 + 776 781 #ifdef CONFIG_NFS_V4 777 782 778 783 typedef u64 clientid4; ··· 966 961 u32 has_links; 967 962 u32 has_symlinks; 968 963 struct nfs4_sequence_res seq_res; 969 - }; 970 - 971 - struct nfs4_string { 972 - unsigned int len; 973 - char *data; 974 964 }; 975 965 976 966 #define NFS4_PATHNAME_MAXCOMPONENTS 512
+125
include/scsi/osd_ore.h
··· 1 + /* 2 + * Copyright (C) 2011 3 + * Boaz Harrosh <bharrosh@panasas.com> 4 + * 5 + * Public Declarations of the ORE API 6 + * 7 + * This file is part of the ORE (Object Raid Engine) library. 8 + * 9 + * ORE is free software; you can redistribute it and/or modify 10 + * it under the terms of the GNU General Public License version 2 as published 11 + * by the Free Software Foundation. (GPL v2) 12 + * 13 + * ORE is distributed in the hope that it will be useful, 14 + * but WITHOUT ANY WARRANTY; without even the implied warranty of 15 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 16 + * GNU General Public License for more details. 17 + * 18 + * You should have received a copy of the GNU General Public License 19 + * along with the ORE; if not, write to the Free Software 20 + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA 21 + */ 22 + #ifndef __ORE_H__ 23 + #define __ORE_H__ 24 + 25 + #include <scsi/osd_initiator.h> 26 + #include <scsi/osd_attributes.h> 27 + #include <scsi/osd_sec.h> 28 + #include <linux/pnfs_osd_xdr.h> 29 + 30 + struct ore_comp { 31 + struct osd_obj_id obj; 32 + u8 cred[OSD_CAP_LEN]; 33 + }; 34 + 35 + struct ore_layout { 36 + /* Our way of looking at the data_map */ 37 + unsigned stripe_unit; 38 + unsigned mirrors_p1; 39 + 40 + unsigned group_width; 41 + u64 group_depth; 42 + unsigned group_count; 43 + }; 44 + 45 + struct ore_components { 46 + unsigned numdevs; /* Num of devices in array */ 47 + /* If @single_comp == EC_SINGLE_COMP, @comps points to a single 48 + * component. else there are @numdevs components 49 + */ 50 + enum EC_COMP_USAGE { 51 + EC_SINGLE_COMP = 0, EC_MULTPLE_COMPS = 0xffffffff 52 + } single_comp; 53 + struct ore_comp *comps; 54 + struct osd_dev **ods; /* osd_dev array */ 55 + }; 56 + 57 + struct ore_io_state; 58 + typedef void (*ore_io_done_fn)(struct ore_io_state *ios, void *private); 59 + 60 + struct ore_io_state { 61 + struct kref kref; 62 + 63 + void *private; 64 + ore_io_done_fn done; 65 + 66 + struct ore_layout *layout; 67 + struct ore_components *comps; 68 + 69 + /* Global read/write IO*/ 70 + loff_t offset; 71 + unsigned long length; 72 + void *kern_buff; 73 + 74 + struct page **pages; 75 + unsigned nr_pages; 76 + unsigned pgbase; 77 + unsigned pages_consumed; 78 + 79 + /* Attributes */ 80 + unsigned in_attr_len; 81 + struct osd_attr *in_attr; 82 + unsigned out_attr_len; 83 + struct osd_attr *out_attr; 84 + 85 + bool reading; 86 + 87 + /* Variable array of size numdevs */ 88 + unsigned numdevs; 89 + struct ore_per_dev_state { 90 + struct osd_request *or; 91 + struct bio *bio; 92 + loff_t offset; 93 + unsigned length; 94 + unsigned dev; 95 + } per_dev[]; 96 + }; 97 + 98 + static inline unsigned ore_io_state_size(unsigned numdevs) 99 + { 100 + return sizeof(struct ore_io_state) + 101 + sizeof(struct ore_per_dev_state) * numdevs; 102 + } 103 + 104 + /* ore.c */ 105 + int ore_get_rw_state(struct ore_layout *layout, struct ore_components *comps, 106 + bool is_reading, u64 offset, u64 length, 107 + struct ore_io_state **ios); 108 + int ore_get_io_state(struct ore_layout *layout, struct ore_components *comps, 109 + struct ore_io_state **ios); 110 + void ore_put_io_state(struct ore_io_state *ios); 111 + 112 + int ore_check_io(struct ore_io_state *ios, u64 *resid); 113 + 114 + int ore_create(struct ore_io_state *ios); 115 + int ore_remove(struct ore_io_state *ios); 116 + int ore_write(struct ore_io_state *ios); 117 + int ore_read(struct ore_io_state *ios); 118 + int ore_truncate(struct ore_layout *layout, struct ore_components *comps, 119 + u64 size); 120 + 121 + int extract_attr_from_ios(struct ore_io_state *ios, struct osd_attr *attr); 122 + 123 + extern const struct osd_attr g_attr_logical_length; 124 + 125 + #endif