Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

module: prepare to handle ROX allocations for text

In order to support ROX allocations for module text, it is necessary to
handle modifications to the code, such as relocations and alternatives
patching, without write access to that memory.

One option is to use text patching, but this would make module loading
extremely slow and will expose executable code that is not finally formed.

A better way is to have memory allocated with ROX permissions contain
invalid instructions and keep a writable, but not executable copy of the
module text. The relocations and alternative patches would be done on the
writable copy using the addresses of the ROX memory. Once the module is
completely ready, the updated text will be copied to ROX memory using text
patching in one go and the writable copy will be freed.

Add support for that to module initialization code and provide necessary
interfaces in execmem.

Link: https://lkml.kernel.org/r/20241023162711.2579610-5-rppt@kernel.org
Signed-off-by: Mike Rapoport (Microsoft) <rppt@kernel.org>
Reviewd-by: Luis Chamberlain <mcgrof@kernel.org>
Tested-by: kdevops <kdevops@lists.linux.dev>
Cc: Andreas Larsson <andreas@gaisler.com>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Ard Biesheuvel <ardb@kernel.org>
Cc: Arnd Bergmann <arnd@arndb.de>
Cc: Borislav Petkov (AMD) <bp@alien8.de>
Cc: Brian Cain <bcain@quicinc.com>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Christophe Leroy <christophe.leroy@csgroup.eu>
Cc: Christoph Hellwig <hch@lst.de>
Cc: Dave Hansen <dave.hansen@linux.intel.com>
Cc: Dinh Nguyen <dinguyen@kernel.org>
Cc: Geert Uytterhoeven <geert@linux-m68k.org>
Cc: Guo Ren <guoren@kernel.org>
Cc: Helge Deller <deller@gmx.de>
Cc: Huacai Chen <chenhuacai@kernel.org>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Johannes Berg <johannes@sipsolutions.net>
Cc: John Paul Adrian Glaubitz <glaubitz@physik.fu-berlin.de>
Cc: Kent Overstreet <kent.overstreet@linux.dev>
Cc: Liam R. Howlett <Liam.Howlett@Oracle.com>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: Masami Hiramatsu (Google) <mhiramat@kernel.org>
Cc: Matt Turner <mattst88@gmail.com>
Cc: Max Filippov <jcmvbkbc@gmail.com>
Cc: Michael Ellerman <mpe@ellerman.id.au>
Cc: Michal Simek <monstr@monstr.eu>
Cc: Oleg Nesterov <oleg@redhat.com>
Cc: Palmer Dabbelt <palmer@dabbelt.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Richard Weinberger <richard@nod.at>
Cc: Russell King <linux@armlinux.org.uk>
Cc: Song Liu <song@kernel.org>
Cc: Stafford Horne <shorne@gmail.com>
Cc: Steven Rostedt (Google) <rostedt@goodmis.org>
Cc: Suren Baghdasaryan <surenb@google.com>
Cc: Thomas Bogendoerfer <tsbogend@alpha.franken.de>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Uladzislau Rezki (Sony) <urezki@gmail.com>
Cc: Vineet Gupta <vgupta@kernel.org>
Cc: Will Deacon <will@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>

authored by

Mike Rapoport (Microsoft) and committed by
Andrew Morton
0c133b1e 0c3beacf

+126 -8
+23
include/linux/execmem.h
··· 46 46 /** 47 47 * enum execmem_range_flags - options for executable memory allocations 48 48 * @EXECMEM_KASAN_SHADOW: allocate kasan shadow 49 + * @EXECMEM_ROX_CACHE: allocations should use ROX cache of huge pages 49 50 */ 50 51 enum execmem_range_flags { 51 52 EXECMEM_KASAN_SHADOW = (1 << 0), 53 + EXECMEM_ROX_CACHE = (1 << 1), 52 54 }; 53 55 54 56 /** ··· 124 122 * @ptr: pointer to the memory that should be freed 125 123 */ 126 124 void execmem_free(void *ptr); 125 + 126 + /** 127 + * execmem_update_copy - copy an update to executable memory 128 + * @dst: destination address to update 129 + * @src: source address containing the data 130 + * @size: how many bytes of memory shold be copied 131 + * 132 + * Copy @size bytes from @src to @dst using text poking if the memory at 133 + * @dst is read-only. 134 + * 135 + * Return: a pointer to @dst or NULL on error 136 + */ 137 + void *execmem_update_copy(void *dst, const void *src, size_t size); 138 + 139 + /** 140 + * execmem_is_rox - check if execmem is read-only 141 + * @type - the execmem type to check 142 + * 143 + * Return: %true if the @type is read-only, %false if it's writable 144 + */ 145 + bool execmem_is_rox(enum execmem_type type); 127 146 128 147 #if defined(CONFIG_EXECMEM) && !defined(CONFIG_ARCH_WANTS_EXECMEM_LATE) 129 148 void execmem_init(void);
+16
include/linux/module.h
··· 367 367 368 368 struct module_memory { 369 369 void *base; 370 + void *rw_copy; 371 + bool is_rox; 370 372 unsigned int size; 371 373 372 374 #ifdef CONFIG_MODULES_TREE_LOOKUP ··· 769 767 770 768 void set_module_sig_enforced(void); 771 769 770 + void *__module_writable_address(struct module *mod, void *loc); 771 + 772 + static inline void *module_writable_address(struct module *mod, void *loc) 773 + { 774 + if (!IS_ENABLED(CONFIG_ARCH_HAS_EXECMEM_ROX) || !mod) 775 + return loc; 776 + return __module_writable_address(mod, loc); 777 + } 778 + 772 779 #else /* !CONFIG_MODULES... */ 773 780 774 781 static inline struct module *__module_address(unsigned long addr) ··· 884 873 static inline bool module_is_coming(struct module *mod) 885 874 { 886 875 return false; 876 + } 877 + 878 + static inline void *module_writable_address(struct module *mod, void *loc) 879 + { 880 + return loc; 887 881 } 888 882 #endif /* CONFIG_MODULES */ 889 883
+4
include/linux/moduleloader.h
··· 108 108 const Elf_Shdr *sechdrs, 109 109 struct module *mod); 110 110 111 + int module_post_finalize(const Elf_Ehdr *hdr, 112 + const Elf_Shdr *sechdrs, 113 + struct module *mod); 114 + 111 115 #ifdef CONFIG_MODULES 112 116 void flush_module_init_free_work(void); 113 117 #else
+2 -1
kernel/module/debug_kmemleak.c
··· 14 14 { 15 15 /* only scan writable, non-executable sections */ 16 16 for_each_mod_mem_type(type) { 17 - if (type != MOD_DATA && type != MOD_INIT_DATA) 17 + if (type != MOD_DATA && type != MOD_INIT_DATA && 18 + !mod->mem[type].is_rox) 18 19 kmemleak_no_scan(mod->mem[type].base); 19 20 } 20 21 }
+67 -7
kernel/module/main.c
··· 1189 1189 { 1190 1190 } 1191 1191 1192 + void *__module_writable_address(struct module *mod, void *loc) 1193 + { 1194 + for_class_mod_mem_type(type, text) { 1195 + struct module_memory *mem = &mod->mem[type]; 1196 + 1197 + if (loc >= mem->base && loc < mem->base + mem->size) 1198 + return loc + (mem->rw_copy - mem->base); 1199 + } 1200 + 1201 + return loc; 1202 + } 1203 + 1192 1204 static int module_memory_alloc(struct module *mod, enum mod_mem_type type) 1193 1205 { 1194 1206 unsigned int size = PAGE_ALIGN(mod->mem[type].size); ··· 1218 1206 if (!ptr) 1219 1207 return -ENOMEM; 1220 1208 1209 + mod->mem[type].base = ptr; 1210 + 1211 + if (execmem_is_rox(execmem_type)) { 1212 + ptr = vzalloc(size); 1213 + 1214 + if (!ptr) { 1215 + execmem_free(mod->mem[type].base); 1216 + return -ENOMEM; 1217 + } 1218 + 1219 + mod->mem[type].rw_copy = ptr; 1220 + mod->mem[type].is_rox = true; 1221 + } else { 1222 + mod->mem[type].rw_copy = mod->mem[type].base; 1223 + memset(mod->mem[type].base, 0, size); 1224 + } 1225 + 1221 1226 /* 1222 1227 * The pointer to these blocks of memory are stored on the module 1223 1228 * structure and we keep that around so long as the module is ··· 1248 1219 */ 1249 1220 kmemleak_not_leak(ptr); 1250 1221 1251 - memset(ptr, 0, size); 1252 - mod->mem[type].base = ptr; 1253 - 1254 1222 return 0; 1255 1223 } 1256 1224 1257 1225 static void module_memory_free(struct module *mod, enum mod_mem_type type, 1258 1226 bool unload_codetags) 1259 1227 { 1260 - void *ptr = mod->mem[type].base; 1228 + struct module_memory *mem = &mod->mem[type]; 1229 + void *ptr = mem->base; 1230 + 1231 + if (mem->is_rox) 1232 + vfree(mem->rw_copy); 1261 1233 1262 1234 if (!unload_codetags && mod_mem_type_is_core_data(type)) 1263 1235 return; ··· 2281 2251 for_each_mod_mem_type(type) { 2282 2252 if (!mod->mem[type].size) { 2283 2253 mod->mem[type].base = NULL; 2254 + mod->mem[type].rw_copy = NULL; 2284 2255 continue; 2285 2256 } 2286 2257 ··· 2298 2267 void *dest; 2299 2268 Elf_Shdr *shdr = &info->sechdrs[i]; 2300 2269 enum mod_mem_type type = shdr->sh_entsize >> SH_ENTSIZE_TYPE_SHIFT; 2270 + unsigned long offset = shdr->sh_entsize & SH_ENTSIZE_OFFSET_MASK; 2271 + unsigned long addr; 2301 2272 2302 2273 if (!(shdr->sh_flags & SHF_ALLOC)) 2303 2274 continue; 2304 2275 2305 - dest = mod->mem[type].base + (shdr->sh_entsize & SH_ENTSIZE_OFFSET_MASK); 2276 + addr = (unsigned long)mod->mem[type].base + offset; 2277 + dest = mod->mem[type].rw_copy + offset; 2306 2278 2307 2279 if (shdr->sh_type != SHT_NOBITS) { 2308 2280 /* ··· 2327 2293 * users of info can keep taking advantage and using the newly 2328 2294 * minted official memory area. 2329 2295 */ 2330 - shdr->sh_addr = (unsigned long)dest; 2296 + shdr->sh_addr = addr; 2331 2297 pr_debug("\t0x%lx 0x%.8lx %s\n", (long)shdr->sh_addr, 2332 2298 (long)shdr->sh_size, info->secstrings + shdr->sh_name); 2333 2299 } ··· 2475 2441 return 0; 2476 2442 } 2477 2443 2444 + int __weak module_post_finalize(const Elf_Ehdr *hdr, 2445 + const Elf_Shdr *sechdrs, 2446 + struct module *me) 2447 + { 2448 + return 0; 2449 + } 2450 + 2478 2451 static int post_relocation(struct module *mod, const struct load_info *info) 2479 2452 { 2453 + int ret; 2454 + 2480 2455 /* Sort exception table now relocations are done. */ 2481 2456 sort_extable(mod->extable, mod->extable + mod->num_exentries); 2482 2457 ··· 2497 2454 add_kallsyms(mod, info); 2498 2455 2499 2456 /* Arch-specific module finalizing. */ 2500 - return module_finalize(info->hdr, info->sechdrs, mod); 2457 + ret = module_finalize(info->hdr, info->sechdrs, mod); 2458 + if (ret) 2459 + return ret; 2460 + 2461 + for_each_mod_mem_type(type) { 2462 + struct module_memory *mem = &mod->mem[type]; 2463 + 2464 + if (mem->is_rox) { 2465 + if (!execmem_update_copy(mem->base, mem->rw_copy, 2466 + mem->size)) 2467 + return -ENOMEM; 2468 + 2469 + vfree(mem->rw_copy); 2470 + mem->rw_copy = NULL; 2471 + } 2472 + } 2473 + 2474 + return module_post_finalize(info->hdr, info->sechdrs, mod); 2501 2475 } 2502 2476 2503 2477 /* Call module constructors. */
+3
kernel/module/strict_rwx.c
··· 34 34 for_class_mod_mem_type(type, text) { 35 35 int ret; 36 36 37 + if (mod->mem[type].is_rox) 38 + continue; 39 + 37 40 if (IS_ENABLED(CONFIG_STRICT_MODULE_RWX)) 38 41 ret = module_set_memory(mod, type, set_memory_rox); 39 42 else
+11
mm/execmem.c
··· 10 10 #include <linux/vmalloc.h> 11 11 #include <linux/execmem.h> 12 12 #include <linux/moduleloader.h> 13 + #include <linux/text-patching.h> 13 14 14 15 static struct execmem_info *execmem_info __ro_after_init; 15 16 static struct execmem_info default_execmem_info __ro_after_init; ··· 68 67 */ 69 68 WARN_ON(in_interrupt()); 70 69 vfree(ptr); 70 + } 71 + 72 + void *execmem_update_copy(void *dst, const void *src, size_t size) 73 + { 74 + return text_poke_copy(dst, src, size); 75 + } 76 + 77 + bool execmem_is_rox(enum execmem_type type) 78 + { 79 + return !!(execmem_info->ranges[type].flags & EXECMEM_ROX_CACHE); 71 80 } 72 81 73 82 static bool execmem_validate(struct execmem_info *info)