Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

Merge tag 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/rmk/linux

Pull ARM updates from Russell King:

- fix a race condition handling PG_dcache_clean

- further cleanups for the fault handling, allowing RT to be enabled

- fixing nzones validation in adfs filesystem driver

- fix for module unwinding

* tag 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/rmk/linux:
ARM: 9463/1: Allow to enable RT
ARM: 9472/1: fix race condition on PG_dcache_clean in __sync_icache_dcache()
ARM: 9471/1: module: fix unwind section relocation out of range error
fs/adfs: validate nzones in adfs_validate_bblk()
ARM: provide individual is_translation_fault() and is_permission_fault()
ARM: move FSR fault status definitions before fsr_fs()
ARM: use BIT() and GENMASK() for fault status register fields
ARM: move is_permission_fault() and is_translation_fault() to fault.h
ARM: move vmalloc() lazy-page table population
ARM: ensure interrupts are enabled in __do_user_fault()

+128 -92
+1
arch/arm/Kconfig
··· 42 42 select ARCH_SUPPORTS_CFI 43 43 select ARCH_SUPPORTS_HUGETLBFS if ARM_LPAE 44 44 select ARCH_SUPPORTS_PER_VMA_LOCK 45 + select ARCH_SUPPORTS_RT 45 46 select ARCH_USE_BUILTIN_BSWAP 46 47 select ARCH_USE_CMPXCHG_LOCKREF 47 48 select ARCH_USE_MEMTEST
+12
arch/arm/kernel/module-plts.c
··· 225 225 mod->arch.init.plt = s; 226 226 else if (s->sh_type == SHT_SYMTAB) 227 227 syms = (Elf32_Sym *)s->sh_addr; 228 + #if defined(CONFIG_ARM_UNWIND) && !defined(CONFIG_VMSPLIT_3G) 229 + else if (s->sh_type == ELF_SECTION_UNWIND || 230 + (strncmp(".ARM.extab", secstrings + s->sh_name, 10) == 0)) { 231 + /* 232 + * To avoid the possible relocation out of range issue for 233 + * R_ARM_PREL31, mark unwind section .ARM.extab and .ARM.exidx as 234 + * executable so they will be allocated along with .text section to 235 + * meet +/-1GB range requirement of the R_ARM_PREL31 relocation 236 + */ 237 + s->sh_flags |= SHF_EXECINSTR; 238 + } 239 + #endif 228 240 } 229 241 230 242 if (!mod->arch.core.plt || !mod->arch.init.plt) {
+73 -85
arch/arm/mm/fault.c
··· 115 115 return (fsr & FSR_WRITE) && !(fsr & FSR_CM); 116 116 } 117 117 118 - static inline bool is_translation_fault(unsigned int fsr) 119 - { 120 - int fs = fsr_fs(fsr); 121 - #ifdef CONFIG_ARM_LPAE 122 - if ((fs & FS_MMU_NOLL_MASK) == FS_TRANS_NOLL) 123 - return true; 124 - #else 125 - if (fs == FS_L1_TRANS || fs == FS_L2_TRANS) 126 - return true; 127 - #endif 128 - return false; 129 - } 130 - 131 - static inline bool is_permission_fault(unsigned int fsr) 132 - { 133 - int fs = fsr_fs(fsr); 134 - #ifdef CONFIG_ARM_LPAE 135 - if ((fs & FS_MMU_NOLL_MASK) == FS_PERM_NOLL) 136 - return true; 137 - #else 138 - if (fs == FS_L1_PERM || fs == FS_L2_PERM) 139 - return true; 140 - #endif 141 - return false; 142 - } 143 - 144 118 static void die_kernel_fault(const char *msg, struct mm_struct *mm, 145 119 unsigned long addr, unsigned int fsr, 146 120 struct pt_regs *regs) ··· 164 190 165 191 /* 166 192 * Something tried to access memory that isn't in our memory map.. 167 - * User mode accesses just cause a SIGSEGV 193 + * User mode accesses just cause a SIGSEGV. Ensure interrupts are enabled 194 + * for preempt RT. 168 195 */ 169 196 static void 170 197 __do_user_fault(unsigned long addr, unsigned int fsr, unsigned int sig, 171 198 int code, struct pt_regs *regs) 172 199 { 173 200 struct task_struct *tsk = current; 201 + 202 + local_irq_enable(); 174 203 175 204 #ifdef CONFIG_DEBUG_USER 176 205 if (((user_debug & UDBG_SEGV) && (sig == SIGSEGV)) || ··· 235 258 } 236 259 #endif 237 260 261 + /* 262 + * Handle a vmalloc fault, copying the non-leaf page table entries from 263 + * init_mm.pgd. Any kernel context can trigger this, so we must not sleep 264 + * or enable interrupts. Having two CPUs execute this for the same page is 265 + * no problem, we'll just copy the same data twice. 266 + * 267 + * Returns false on failure. 268 + */ 269 + static bool __kprobes __maybe_unused vmalloc_fault(unsigned long addr) 270 + { 271 + unsigned int index; 272 + pgd_t *pgd, *pgd_k; 273 + p4d_t *p4d, *p4d_k; 274 + pud_t *pud, *pud_k; 275 + pmd_t *pmd, *pmd_k; 276 + 277 + index = pgd_index(addr); 278 + 279 + pgd = cpu_get_pgd() + index; 280 + pgd_k = init_mm.pgd + index; 281 + 282 + p4d = p4d_offset(pgd, addr); 283 + p4d_k = p4d_offset(pgd_k, addr); 284 + 285 + if (p4d_none(*p4d_k)) 286 + return false; 287 + if (!p4d_present(*p4d)) 288 + set_p4d(p4d, *p4d_k); 289 + 290 + pud = pud_offset(p4d, addr); 291 + pud_k = pud_offset(p4d_k, addr); 292 + 293 + if (pud_none(*pud_k)) 294 + return false; 295 + if (!pud_present(*pud)) 296 + set_pud(pud, *pud_k); 297 + 298 + pmd = pmd_offset(pud, addr); 299 + pmd_k = pmd_offset(pud_k, addr); 300 + 301 + #ifdef CONFIG_ARM_LPAE 302 + /* 303 + * Only one hardware entry per PMD with LPAE. 304 + */ 305 + index = 0; 306 + #else 307 + /* 308 + * On ARM one Linux PGD entry contains two hardware entries (see page 309 + * tables layout in pgtable.h). We normally guarantee that we always 310 + * fill both L1 entries. But create_mapping() doesn't follow the rule. 311 + * It can create inidividual L1 entries, so here we have to call 312 + * pmd_none() check for the entry really corresponded to address, not 313 + * for the first of pair. 314 + */ 315 + index = (addr >> SECTION_SHIFT) & 1; 316 + #endif 317 + if (pmd_none(pmd_k[index])) 318 + return false; 319 + 320 + copy_pmd(pmd, pmd_k); 321 + 322 + return true; 323 + } 324 + 238 325 static int __kprobes 239 326 do_kernel_address_page_fault(struct mm_struct *mm, unsigned long addr, 240 327 unsigned int fsr, struct pt_regs *regs) ··· 309 268 * should not be faulting in kernel space, which includes the 310 269 * vector/khelper page. Handle the branch predictor hardening 311 270 * while interrupts are still disabled, then send a SIGSEGV. 271 + * Note that __do_user_fault() will enable interrupts. 312 272 */ 313 273 harden_branch_predictor(); 314 274 __do_user_fault(addr, fsr, SIGSEGV, SEGV_MAPERR, regs); ··· 534 492 * directly to do_kernel_address_page_fault() to handle. 535 493 * 536 494 * Otherwise, we're probably faulting in the vmalloc() area, so try to fix 537 - * that up. Note that we must not take any locks or enable interrupts in 538 - * this case. 495 + * that up via vmalloc_fault(). 539 496 * 540 - * If vmalloc() fixup fails, that means the non-leaf page tables did not 497 + * If vmalloc_fault() fails, that means the non-leaf page tables did not 541 498 * contain an entry for this address, so handle this via 542 499 * do_kernel_address_page_fault(). 543 500 */ ··· 545 504 do_translation_fault(unsigned long addr, unsigned int fsr, 546 505 struct pt_regs *regs) 547 506 { 548 - unsigned int index; 549 - pgd_t *pgd, *pgd_k; 550 - p4d_t *p4d, *p4d_k; 551 - pud_t *pud, *pud_k; 552 - pmd_t *pmd, *pmd_k; 553 - 554 507 if (addr < TASK_SIZE) 555 508 return do_page_fault(addr, fsr, regs); 556 509 557 - if (user_mode(regs)) 558 - goto bad_area; 510 + if (!user_mode(regs) && vmalloc_fault(addr)) 511 + return 0; 559 512 560 - index = pgd_index(addr); 561 - 562 - pgd = cpu_get_pgd() + index; 563 - pgd_k = init_mm.pgd + index; 564 - 565 - p4d = p4d_offset(pgd, addr); 566 - p4d_k = p4d_offset(pgd_k, addr); 567 - 568 - if (p4d_none(*p4d_k)) 569 - goto bad_area; 570 - if (!p4d_present(*p4d)) 571 - set_p4d(p4d, *p4d_k); 572 - 573 - pud = pud_offset(p4d, addr); 574 - pud_k = pud_offset(p4d_k, addr); 575 - 576 - if (pud_none(*pud_k)) 577 - goto bad_area; 578 - if (!pud_present(*pud)) 579 - set_pud(pud, *pud_k); 580 - 581 - pmd = pmd_offset(pud, addr); 582 - pmd_k = pmd_offset(pud_k, addr); 583 - 584 - #ifdef CONFIG_ARM_LPAE 585 - /* 586 - * Only one hardware entry per PMD with LPAE. 587 - */ 588 - index = 0; 589 - #else 590 - /* 591 - * On ARM one Linux PGD entry contains two hardware entries (see page 592 - * tables layout in pgtable.h). We normally guarantee that we always 593 - * fill both L1 entries. But create_mapping() doesn't follow the rule. 594 - * It can create inidividual L1 entries, so here we have to call 595 - * pmd_none() check for the entry really corresponded to address, not 596 - * for the first of pair. 597 - */ 598 - index = (addr >> SECTION_SHIFT) & 1; 599 - #endif 600 - if (pmd_none(pmd_k[index])) 601 - goto bad_area; 602 - 603 - copy_pmd(pmd, pmd_k); 604 - return 0; 605 - 606 - bad_area: 607 513 do_kernel_address_page_fault(current->mm, addr, fsr, regs); 608 514 609 515 return 0;
+36 -6
arch/arm/mm/fault.h
··· 5 5 /* 6 6 * Fault status register encodings. We steal bit 31 for our own purposes. 7 7 */ 8 - #define FSR_LNX_PF (1 << 31) 9 - #define FSR_CM (1 << 13) 10 - #define FSR_WRITE (1 << 11) 11 - #define FSR_FS4 (1 << 10) 12 - #define FSR_FS3_0 (15) 13 - #define FSR_FS5_0 (0x3f) 8 + #define FSR_LNX_PF BIT(31) 9 + #define FSR_CM BIT(13) 10 + #define FSR_WRITE BIT(11) 14 11 15 12 #ifdef CONFIG_ARM_LPAE 16 13 #define FSR_FS_AEA 17 ··· 15 18 #define FS_PERM_NOLL 0xC 16 19 #define FS_MMU_NOLL_MASK 0x3C 17 20 21 + #define FSR_FS5_0 GENMASK(5, 0) 22 + 18 23 static inline int fsr_fs(unsigned int fsr) 19 24 { 20 25 return fsr & FSR_FS5_0; 26 + } 27 + 28 + static inline bool is_translation_fault(unsigned int fsr) 29 + { 30 + int fs = fsr_fs(fsr); 31 + 32 + return (fs & FS_MMU_NOLL_MASK) == FS_TRANS_NOLL; 33 + } 34 + 35 + static inline bool is_permission_fault(unsigned int fsr) 36 + { 37 + int fs = fsr_fs(fsr); 38 + 39 + return (fs & FS_MMU_NOLL_MASK) == FS_PERM_NOLL; 21 40 } 22 41 #else 23 42 #define FSR_FS_AEA 22 ··· 42 29 #define FS_L1_PERM 0xD 43 30 #define FS_L2_PERM 0xF 44 31 32 + #define FSR_FS4 BIT(10) 33 + #define FSR_FS3_0 GENMASK(3, 0) 34 + 45 35 static inline int fsr_fs(unsigned int fsr) 46 36 { 47 37 return (fsr & FSR_FS3_0) | (fsr & FSR_FS4) >> 6; 38 + } 39 + 40 + static inline bool is_translation_fault(unsigned int fsr) 41 + { 42 + int fs = fsr_fs(fsr); 43 + 44 + return fs == FS_L1_TRANS || fs == FS_L2_TRANS; 45 + } 46 + 47 + static inline bool is_permission_fault(unsigned int fsr) 48 + { 49 + int fs = fsr_fs(fsr); 50 + 51 + return fs == FS_L1_PERM || fs == FS_L2_PERM; 48 52 } 49 53 #endif 50 54
+3 -1
arch/arm/mm/flush.c
··· 304 304 else 305 305 mapping = NULL; 306 306 307 - if (!test_and_set_bit(PG_dcache_clean, &folio->flags.f)) 307 + if (!test_bit(PG_dcache_clean, &folio->flags.f)) { 308 308 __flush_dcache_folio(mapping, folio); 309 + set_bit(PG_dcache_clean, &folio->flags.f); 310 + } 309 311 310 312 if (pte_exec(pteval)) 311 313 __flush_icache_all();
+3
fs/adfs/super.c
··· 317 317 if (adfs_checkdiscrecord(dr)) 318 318 return -EILSEQ; 319 319 320 + if ((dr->nzones | dr->nzones_high << 8) == 0) 321 + return -EILSEQ; 322 + 320 323 *drp = dr; 321 324 return 0; 322 325 }