Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

Merge branch 'readdir' (readdir speedup and sanity checking)

This makes getdents() and getdents64() do sanity checking on the
pathname that it gives to user space. And to mitigate the performance
impact of that, it first cleans up the way it does the user copying, so
that the code avoids doing the SMAP/PAN updates between each part of the
dirent structure write.

I really wanted to do this during the merge window, but didn't have
time. The conversion of filldir to unsafe_put_user() is something I've
had around for years now in a private branch, but the extra pathname
checking finally made me clean it up to the point where it is mergable.

It's worth noting that the filename validity checking really should be a
bit smarter: it would be much better to delay the error reporting until
the end of the readdir, so that non-corrupted filenames are still
returned. But that involves bigger changes, so let's see if anybody
actually hits the corrupt directory entry case before worrying about it
further.

* branch 'readdir':
Make filldir[64]() verify the directory entry filename is valid
Convert filldir[64]() from __put_user() to unsafe_put_user()

+133 -35
+133 -35
fs/readdir.c
··· 20 20 #include <linux/syscalls.h> 21 21 #include <linux/unistd.h> 22 22 #include <linux/compat.h> 23 - 24 23 #include <linux/uaccess.h> 24 + 25 + #include <asm/unaligned.h> 26 + 27 + /* 28 + * Note the "unsafe_put_user() semantics: we goto a 29 + * label for errors. 30 + * 31 + * Also note how we use a "while()" loop here, even though 32 + * only the biggest size needs to loop. The compiler (well, 33 + * at least gcc) is smart enough to turn the smaller sizes 34 + * into just if-statements, and this way we don't need to 35 + * care whether 'u64' or 'u32' is the biggest size. 36 + */ 37 + #define unsafe_copy_loop(dst, src, len, type, label) \ 38 + while (len >= sizeof(type)) { \ 39 + unsafe_put_user(get_unaligned((type *)src), \ 40 + (type __user *)dst, label); \ 41 + dst += sizeof(type); \ 42 + src += sizeof(type); \ 43 + len -= sizeof(type); \ 44 + } 45 + 46 + /* 47 + * We avoid doing 64-bit copies on 32-bit architectures. They 48 + * might be better, but the component names are mostly small, 49 + * and the 64-bit cases can end up being much more complex and 50 + * put much more register pressure on the code, so it's likely 51 + * not worth the pain of unaligned accesses etc. 52 + * 53 + * So limit the copies to "unsigned long" size. I did verify 54 + * that at least the x86-32 case is ok without this limiting, 55 + * but I worry about random other legacy 32-bit cases that 56 + * might not do as well. 57 + */ 58 + #define unsafe_copy_type(dst, src, len, type, label) do { \ 59 + if (sizeof(type) <= sizeof(unsigned long)) \ 60 + unsafe_copy_loop(dst, src, len, type, label); \ 61 + } while (0) 62 + 63 + /* 64 + * Copy the dirent name to user space, and NUL-terminate 65 + * it. This should not be a function call, since we're doing 66 + * the copy inside a "user_access_begin/end()" section. 67 + */ 68 + #define unsafe_copy_dirent_name(_dst, _src, _len, label) do { \ 69 + char __user *dst = (_dst); \ 70 + const char *src = (_src); \ 71 + size_t len = (_len); \ 72 + unsafe_copy_type(dst, src, len, u64, label); \ 73 + unsafe_copy_type(dst, src, len, u32, label); \ 74 + unsafe_copy_type(dst, src, len, u16, label); \ 75 + unsafe_copy_type(dst, src, len, u8, label); \ 76 + unsafe_put_user(0, dst, label); \ 77 + } while (0) 78 + 25 79 26 80 int iterate_dir(struct file *file, struct dir_context *ctx) 27 81 { ··· 117 63 return res; 118 64 } 119 65 EXPORT_SYMBOL(iterate_dir); 66 + 67 + /* 68 + * POSIX says that a dirent name cannot contain NULL or a '/'. 69 + * 70 + * It's not 100% clear what we should really do in this case. 71 + * The filesystem is clearly corrupted, but returning a hard 72 + * error means that you now don't see any of the other names 73 + * either, so that isn't a perfect alternative. 74 + * 75 + * And if you return an error, what error do you use? Several 76 + * filesystems seem to have decided on EUCLEAN being the error 77 + * code for EFSCORRUPTED, and that may be the error to use. Or 78 + * just EIO, which is perhaps more obvious to users. 79 + * 80 + * In order to see the other file names in the directory, the 81 + * caller might want to make this a "soft" error: skip the 82 + * entry, and return the error at the end instead. 83 + * 84 + * Note that this should likely do a "memchr(name, 0, len)" 85 + * check too, since that would be filesystem corruption as 86 + * well. However, that case can't actually confuse user space, 87 + * which has to do a strlen() on the name anyway to find the 88 + * filename length, and the above "soft error" worry means 89 + * that it's probably better left alone until we have that 90 + * issue clarified. 91 + */ 92 + static int verify_dirent_name(const char *name, int len) 93 + { 94 + if (WARN_ON_ONCE(!len)) 95 + return -EIO; 96 + if (WARN_ON_ONCE(memchr(name, '/', len))) 97 + return -EIO; 98 + return 0; 99 + } 120 100 121 101 /* 122 102 * Traditional linux readdir() handling.. ··· 261 173 int reclen = ALIGN(offsetof(struct linux_dirent, d_name) + namlen + 2, 262 174 sizeof(long)); 263 175 176 + buf->error = verify_dirent_name(name, namlen); 177 + if (unlikely(buf->error)) 178 + return buf->error; 264 179 buf->error = -EINVAL; /* only used if we fail.. */ 265 180 if (reclen > buf->count) 266 181 return -EINVAL; ··· 273 182 return -EOVERFLOW; 274 183 } 275 184 dirent = buf->previous; 276 - if (dirent) { 277 - if (signal_pending(current)) 278 - return -EINTR; 279 - if (__put_user(offset, &dirent->d_off)) 280 - goto efault; 281 - } 185 + if (dirent && signal_pending(current)) 186 + return -EINTR; 187 + 188 + /* 189 + * Note! This range-checks 'previous' (which may be NULL). 190 + * The real range was checked in getdents 191 + */ 192 + if (!user_access_begin(dirent, sizeof(*dirent))) 193 + goto efault; 194 + if (dirent) 195 + unsafe_put_user(offset, &dirent->d_off, efault_end); 282 196 dirent = buf->current_dir; 283 - if (__put_user(d_ino, &dirent->d_ino)) 284 - goto efault; 285 - if (__put_user(reclen, &dirent->d_reclen)) 286 - goto efault; 287 - if (copy_to_user(dirent->d_name, name, namlen)) 288 - goto efault; 289 - if (__put_user(0, dirent->d_name + namlen)) 290 - goto efault; 291 - if (__put_user(d_type, (char __user *) dirent + reclen - 1)) 292 - goto efault; 197 + unsafe_put_user(d_ino, &dirent->d_ino, efault_end); 198 + unsafe_put_user(reclen, &dirent->d_reclen, efault_end); 199 + unsafe_put_user(d_type, (char __user *) dirent + reclen - 1, efault_end); 200 + unsafe_copy_dirent_name(dirent->d_name, name, namlen, efault_end); 201 + user_access_end(); 202 + 293 203 buf->previous = dirent; 294 204 dirent = (void __user *)dirent + reclen; 295 205 buf->current_dir = dirent; 296 206 buf->count -= reclen; 297 207 return 0; 208 + efault_end: 209 + user_access_end(); 298 210 efault: 299 211 buf->error = -EFAULT; 300 212 return -EFAULT; ··· 353 259 int reclen = ALIGN(offsetof(struct linux_dirent64, d_name) + namlen + 1, 354 260 sizeof(u64)); 355 261 262 + buf->error = verify_dirent_name(name, namlen); 263 + if (unlikely(buf->error)) 264 + return buf->error; 356 265 buf->error = -EINVAL; /* only used if we fail.. */ 357 266 if (reclen > buf->count) 358 267 return -EINVAL; 359 268 dirent = buf->previous; 360 - if (dirent) { 361 - if (signal_pending(current)) 362 - return -EINTR; 363 - if (__put_user(offset, &dirent->d_off)) 364 - goto efault; 365 - } 269 + if (dirent && signal_pending(current)) 270 + return -EINTR; 271 + 272 + /* 273 + * Note! This range-checks 'previous' (which may be NULL). 274 + * The real range was checked in getdents 275 + */ 276 + if (!user_access_begin(dirent, sizeof(*dirent))) 277 + goto efault; 278 + if (dirent) 279 + unsafe_put_user(offset, &dirent->d_off, efault_end); 366 280 dirent = buf->current_dir; 367 - if (__put_user(ino, &dirent->d_ino)) 368 - goto efault; 369 - if (__put_user(0, &dirent->d_off)) 370 - goto efault; 371 - if (__put_user(reclen, &dirent->d_reclen)) 372 - goto efault; 373 - if (__put_user(d_type, &dirent->d_type)) 374 - goto efault; 375 - if (copy_to_user(dirent->d_name, name, namlen)) 376 - goto efault; 377 - if (__put_user(0, dirent->d_name + namlen)) 378 - goto efault; 281 + unsafe_put_user(ino, &dirent->d_ino, efault_end); 282 + unsafe_put_user(reclen, &dirent->d_reclen, efault_end); 283 + unsafe_put_user(d_type, &dirent->d_type, efault_end); 284 + unsafe_copy_dirent_name(dirent->d_name, name, namlen, efault_end); 285 + user_access_end(); 286 + 379 287 buf->previous = dirent; 380 288 dirent = (void __user *)dirent + reclen; 381 289 buf->current_dir = dirent; 382 290 buf->count -= reclen; 383 291 return 0; 292 + efault_end: 293 + user_access_end(); 384 294 efault: 385 295 buf->error = -EFAULT; 386 296 return -EFAULT;