Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

Merge tag 'arm64-fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/arm64/linux

Pull arm64 fixes from Will Deacon:
"Here are some arm64 fixes for -rc5.

The only non-trivial change (in terms of the diffstat) is fixing our
SVE ptrace API for big-endian machines, but the majority of this is
actually the addition of much-needed comments and updates to the
documentation to try to avoid this mess biting us again in future.

There are still a couple of small things on the horizon, but nothing
major at this point.

Summary:

- Fix broken SVE ptrace API when running in a big-endian configuration

- Fix performance regression due to off-by-one in TLBI range checking

- Fix build regression when using Clang"

* tag 'arm64-fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/arm64/linux:
arm64/sve: Fix missing SVE/FPSIMD endianness conversions
arm64: tlbflush: Ensure start/end of address range are aligned to stride
arm64: Don't unconditionally add -Wno-psabi to KBUILD_CFLAGS

+78 -10
+16
Documentation/arm64/sve.txt
··· 56 56 is to connect to a target process first and then attempt a 57 57 ptrace(PTRACE_GETREGSET, pid, NT_ARM_SVE, &iov). 58 58 59 + * Whenever SVE scalable register values (Zn, Pn, FFR) are exchanged in memory 60 + between userspace and the kernel, the register value is encoded in memory in 61 + an endianness-invariant layout, with bits [(8 * i + 7) : (8 * i)] encoded at 62 + byte offset i from the start of the memory representation. This affects for 63 + example the signal frame (struct sve_context) and ptrace interface 64 + (struct user_sve_header) and associated data. 65 + 66 + Beware that on big-endian systems this results in a different byte order than 67 + for the FPSIMD V-registers, which are stored as single host-endian 128-bit 68 + values, with bits [(127 - 8 * i) : (120 - 8 * i)] of the register encoded at 69 + byte offset i. (struct fpsimd_context, struct user_fpsimd_state). 70 + 59 71 60 72 2. Vector length terminology 61 73 ----------------------------- ··· 135 123 * If the registers are present, the remainder of the record has a vl-dependent 136 124 size and layout. Macros SVE_SIG_* are defined [1] to facilitate access to 137 125 the members. 126 + 127 + * Each scalable register (Zn, Pn, FFR) is stored in an endianness-invariant 128 + layout, with bits [(8 * i + 7) : (8 * i)] stored at byte offset i from the 129 + start of the register's representation in memory. 138 130 139 131 * If the SVE context is too big to fit in sigcontext.__reserved[], then extra 140 132 space is allocated on the stack, an extra_context record is written in
+1 -1
arch/arm64/Makefile
··· 51 51 52 52 KBUILD_CFLAGS += -mgeneral-regs-only $(lseinstr) $(brokengasinst) 53 53 KBUILD_CFLAGS += -fno-asynchronous-unwind-tables 54 - KBUILD_CFLAGS += -Wno-psabi 54 + KBUILD_CFLAGS += $(call cc-disable-warning, psabi) 55 55 KBUILD_AFLAGS += $(lseinstr) $(brokengasinst) 56 56 57 57 KBUILD_CFLAGS += $(call cc-option,-mabi=lp64)
+3
arch/arm64/include/asm/tlbflush.h
··· 195 195 unsigned long asid = ASID(vma->vm_mm); 196 196 unsigned long addr; 197 197 198 + start = round_down(start, stride); 199 + end = round_up(end, stride); 200 + 198 201 if ((end - start) >= (MAX_TLBI_OPS * stride)) { 199 202 flush_tlb_mm(vma->vm_mm); 200 203 return;
+7
arch/arm64/include/uapi/asm/kvm.h
··· 260 260 KVM_REG_SIZE_U256 | \ 261 261 ((i) & (KVM_ARM64_SVE_MAX_SLICES - 1))) 262 262 263 + /* 264 + * Register values for KVM_REG_ARM64_SVE_ZREG(), KVM_REG_ARM64_SVE_PREG() and 265 + * KVM_REG_ARM64_SVE_FFR() are represented in memory in an endianness- 266 + * invariant layout which differs from the layout used for the FPSIMD 267 + * V-registers on big-endian systems: see sigcontext.h for more explanation. 268 + */ 269 + 263 270 #define KVM_ARM64_SVE_VQ_MIN __SVE_VQ_MIN 264 271 #define KVM_ARM64_SVE_VQ_MAX __SVE_VQ_MAX 265 272
+4
arch/arm64/include/uapi/asm/ptrace.h
··· 176 176 * FPCR uint32_t FPCR 177 177 * 178 178 * Additional data might be appended in the future. 179 + * 180 + * The Z-, P- and FFR registers are represented in memory in an endianness- 181 + * invariant layout which differs from the layout used for the FPSIMD 182 + * V-registers on big-endian systems: see sigcontext.h for more explanation. 179 183 */ 180 184 181 185 #define SVE_PT_SVE_ZREG_SIZE(vq) __SVE_ZREG_SIZE(vq)
+14
arch/arm64/include/uapi/asm/sigcontext.h
··· 77 77 __uint128_t vregs[32]; 78 78 }; 79 79 80 + /* 81 + * Note: similarly to all other integer fields, each V-register is stored in an 82 + * endianness-dependent format, with the byte at offset i from the start of the 83 + * in-memory representation of the register value containing 84 + * 85 + * bits [(7 + 8 * i) : (8 * i)] of the register on little-endian hosts; or 86 + * bits [(127 - 8 * i) : (120 - 8 * i)] on big-endian hosts. 87 + */ 88 + 80 89 /* ESR_EL1 context */ 81 90 #define ESR_MAGIC 0x45535201 82 91 ··· 213 204 * FFR uint16_t[vq] first-fault status register 214 205 * 215 206 * Additional data might be appended in the future. 207 + * 208 + * Unlike vregs[] in fpsimd_context, each SVE scalable register (Z-, P- or FFR) 209 + * is encoded in memory in an endianness-invariant format, with the byte at 210 + * offset i from the start of the in-memory representation containing bits 211 + * [(7 + 8 * i) : (8 * i)] of the register value. 216 212 */ 217 213 218 214 #define SVE_SIG_ZREG_SIZE(vq) __SVE_ZREG_SIZE(vq)
+33 -9
arch/arm64/kernel/fpsimd.c
··· 39 39 #include <linux/slab.h> 40 40 #include <linux/stddef.h> 41 41 #include <linux/sysctl.h> 42 + #include <linux/swab.h> 42 43 43 44 #include <asm/esr.h> 44 45 #include <asm/fpsimd.h> ··· 353 352 #define ZREG(sve_state, vq, n) ((char *)(sve_state) + \ 354 353 (SVE_SIG_ZREG_OFFSET(vq, n) - SVE_SIG_REGS_OFFSET)) 355 354 355 + #ifdef CONFIG_CPU_BIG_ENDIAN 356 + static __uint128_t arm64_cpu_to_le128(__uint128_t x) 357 + { 358 + u64 a = swab64(x); 359 + u64 b = swab64(x >> 64); 360 + 361 + return ((__uint128_t)a << 64) | b; 362 + } 363 + #else 364 + static __uint128_t arm64_cpu_to_le128(__uint128_t x) 365 + { 366 + return x; 367 + } 368 + #endif 369 + 370 + #define arm64_le128_to_cpu(x) arm64_cpu_to_le128(x) 371 + 356 372 /* 357 373 * Transfer the FPSIMD state in task->thread.uw.fpsimd_state to 358 374 * task->thread.sve_state. ··· 387 369 void *sst = task->thread.sve_state; 388 370 struct user_fpsimd_state const *fst = &task->thread.uw.fpsimd_state; 389 371 unsigned int i; 372 + __uint128_t *p; 390 373 391 374 if (!system_supports_sve()) 392 375 return; 393 376 394 377 vq = sve_vq_from_vl(task->thread.sve_vl); 395 - for (i = 0; i < 32; ++i) 396 - memcpy(ZREG(sst, vq, i), &fst->vregs[i], 397 - sizeof(fst->vregs[i])); 378 + for (i = 0; i < 32; ++i) { 379 + p = (__uint128_t *)ZREG(sst, vq, i); 380 + *p = arm64_cpu_to_le128(fst->vregs[i]); 381 + } 398 382 } 399 383 400 384 /* ··· 415 395 void const *sst = task->thread.sve_state; 416 396 struct user_fpsimd_state *fst = &task->thread.uw.fpsimd_state; 417 397 unsigned int i; 398 + __uint128_t const *p; 418 399 419 400 if (!system_supports_sve()) 420 401 return; 421 402 422 403 vq = sve_vq_from_vl(task->thread.sve_vl); 423 - for (i = 0; i < 32; ++i) 424 - memcpy(&fst->vregs[i], ZREG(sst, vq, i), 425 - sizeof(fst->vregs[i])); 404 + for (i = 0; i < 32; ++i) { 405 + p = (__uint128_t const *)ZREG(sst, vq, i); 406 + fst->vregs[i] = arm64_le128_to_cpu(*p); 407 + } 426 408 } 427 409 428 410 #ifdef CONFIG_ARM64_SVE ··· 513 491 void *sst = task->thread.sve_state; 514 492 struct user_fpsimd_state const *fst = &task->thread.uw.fpsimd_state; 515 493 unsigned int i; 494 + __uint128_t *p; 516 495 517 496 if (!test_tsk_thread_flag(task, TIF_SVE)) 518 497 return; ··· 522 499 523 500 memset(sst, 0, SVE_SIG_REGS_SIZE(vq)); 524 501 525 - for (i = 0; i < 32; ++i) 526 - memcpy(ZREG(sst, vq, i), &fst->vregs[i], 527 - sizeof(fst->vregs[i])); 502 + for (i = 0; i < 32; ++i) { 503 + p = (__uint128_t *)ZREG(sst, vq, i); 504 + *p = arm64_cpu_to_le128(fst->vregs[i]); 505 + } 528 506 } 529 507 530 508 int sve_set_vector_length(struct task_struct *task,