From b992f01e66150fc5e90be4a96f5eb8e634c8249e Mon Sep 17 00:00:00 2001 From: "Naveen N. Rao" Date: Thu, 6 Jan 2022 17:15:05 +0530 Subject: bpf: Guard against accessing NULL pt_regs in bpf_get_task_stack() task_pt_regs() can return NULL on powerpc for kernel threads. This is then used in __bpf_get_stack() to check for user mode, resulting in a kernel oops. Guard against this by checking return value of task_pt_regs() before trying to obtain the call chain. Fixes: fa28dcb82a38f8 ("bpf: Introduce helper bpf_get_task_stack()") Cc: stable@vger.kernel.org # v5.9+ Signed-off-by: Naveen N. Rao Acked-by: Daniel Borkmann Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/d5ef83c361cc255494afd15ff1b4fb02a36e1dcf.1641468127.git.naveen.n.rao@linux.vnet.ibm.com --- kernel/bpf/stackmap.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/kernel/bpf/stackmap.c b/kernel/bpf/stackmap.c index 49e567209c6b..22c8ae94e4c1 100644 --- a/kernel/bpf/stackmap.c +++ b/kernel/bpf/stackmap.c @@ -472,13 +472,14 @@ BPF_CALL_4(bpf_get_task_stack, struct task_struct *, task, void *, buf, u32, size, u64, flags) { struct pt_regs *regs; - long res; + long res = -EINVAL; if (!try_get_task_stack(task)) return -EFAULT; regs = task_pt_regs(task); - res = __bpf_get_stack(regs, task, NULL, buf, size, flags); + if (regs) + res = __bpf_get_stack(regs, task, NULL, buf, size, flags); put_task_stack(task); return res; -- cgit v1.2.3 From fab07611fb2e6a15fac05c4583045ca5582fd826 Mon Sep 17 00:00:00 2001 From: "Naveen N. Rao" Date: Thu, 6 Jan 2022 17:15:06 +0530 Subject: powerpc32/bpf: Fix codegen for bpf-to-bpf calls Pad instructions emitted for BPF_CALL so that the number of instructions generated does not change for different function addresses. This is especially important for calls to other bpf functions, whose address will only be known during extra pass. Fixes: 51c66ad849a703 ("powerpc/bpf: Implement extended BPF on PPC32") Cc: stable@vger.kernel.org # v5.13+ Signed-off-by: Naveen N. Rao Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/52d8fe51f7620a6f27f377791564d79d75463576.1641468127.git.naveen.n.rao@linux.vnet.ibm.com --- arch/powerpc/net/bpf_jit_comp32.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/arch/powerpc/net/bpf_jit_comp32.c b/arch/powerpc/net/bpf_jit_comp32.c index faaebd446cad..c20b49bf8f5b 100644 --- a/arch/powerpc/net/bpf_jit_comp32.c +++ b/arch/powerpc/net/bpf_jit_comp32.c @@ -191,6 +191,9 @@ void bpf_jit_emit_func_call_rel(u32 *image, struct codegen_context *ctx, u64 fun if (image && rel < 0x2000000 && rel >= -0x2000000) { PPC_BL_ABS(func); + EMIT(PPC_RAW_NOP()); + EMIT(PPC_RAW_NOP()); + EMIT(PPC_RAW_NOP()); } else { /* Load function address into r0 */ EMIT(PPC_RAW_LIS(_R0, IMM_H(func))); -- cgit v1.2.3 From f9320c49993ca3c0ec0f9a7026b313735306bb8b Mon Sep 17 00:00:00 2001 From: "Naveen N. Rao" Date: Thu, 6 Jan 2022 17:15:07 +0530 Subject: powerpc/bpf: Update ldimm64 instructions during extra pass These instructions are updated after the initial JIT, so redo codegen during the extra pass. Rename bpf_jit_fixup_subprog_calls() to clarify that this is more than just subprog calls. Fixes: 69c087ba6225b5 ("bpf: Add bpf_for_each_map_elem() helper") Cc: stable@vger.kernel.org # v5.15 Signed-off-by: Naveen N. Rao Tested-by: Jiri Olsa Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/7cc162af77ba918eb3ecd26ec9e7824bc44b1fae.1641468127.git.naveen.n.rao@linux.vnet.ibm.com --- arch/powerpc/net/bpf_jit_comp.c | 29 +++++++++++++++++++++++------ arch/powerpc/net/bpf_jit_comp32.c | 6 ++++++ arch/powerpc/net/bpf_jit_comp64.c | 7 ++++++- 3 files changed, 35 insertions(+), 7 deletions(-) diff --git a/arch/powerpc/net/bpf_jit_comp.c b/arch/powerpc/net/bpf_jit_comp.c index d6ffdd0f2309..56dd1f4e3e44 100644 --- a/arch/powerpc/net/bpf_jit_comp.c +++ b/arch/powerpc/net/bpf_jit_comp.c @@ -23,15 +23,15 @@ static void bpf_jit_fill_ill_insns(void *area, unsigned int size) memset32(area, BREAKPOINT_INSTRUCTION, size / 4); } -/* Fix the branch target addresses for subprog calls */ -static int bpf_jit_fixup_subprog_calls(struct bpf_prog *fp, u32 *image, - struct codegen_context *ctx, u32 *addrs) +/* Fix updated addresses (for subprog calls, ldimm64, et al) during extra pass */ +static int bpf_jit_fixup_addresses(struct bpf_prog *fp, u32 *image, + struct codegen_context *ctx, u32 *addrs) { const struct bpf_insn *insn = fp->insnsi; bool func_addr_fixed; u64 func_addr; u32 tmp_idx; - int i, ret; + int i, j, ret; for (i = 0; i < fp->len; i++) { /* @@ -66,6 +66,23 @@ static int bpf_jit_fixup_subprog_calls(struct bpf_prog *fp, u32 *image, * of the JITed sequence remains unchanged. */ ctx->idx = tmp_idx; + } else if (insn[i].code == (BPF_LD | BPF_IMM | BPF_DW)) { + tmp_idx = ctx->idx; + ctx->idx = addrs[i] / 4; +#ifdef CONFIG_PPC32 + PPC_LI32(ctx->b2p[insn[i].dst_reg] - 1, (u32)insn[i + 1].imm); + PPC_LI32(ctx->b2p[insn[i].dst_reg], (u32)insn[i].imm); + for (j = ctx->idx - addrs[i] / 4; j < 4; j++) + EMIT(PPC_RAW_NOP()); +#else + func_addr = ((u64)(u32)insn[i].imm) | (((u64)(u32)insn[i + 1].imm) << 32); + PPC_LI64(b2p[insn[i].dst_reg], func_addr); + /* overwrite rest with nops */ + for (j = ctx->idx - addrs[i] / 4; j < 5; j++) + EMIT(PPC_RAW_NOP()); +#endif + ctx->idx = tmp_idx; + i++; } } @@ -200,13 +217,13 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *fp) /* * Do not touch the prologue and epilogue as they will remain * unchanged. Only fix the branch target address for subprog - * calls in the body. + * calls in the body, and ldimm64 instructions. * * This does not change the offsets and lengths of the subprog * call instruction sequences and hence, the size of the JITed * image as well. */ - bpf_jit_fixup_subprog_calls(fp, code_base, &cgctx, addrs); + bpf_jit_fixup_addresses(fp, code_base, &cgctx, addrs); /* There is no need to perform the usual passes. */ goto skip_codegen_passes; diff --git a/arch/powerpc/net/bpf_jit_comp32.c b/arch/powerpc/net/bpf_jit_comp32.c index c20b49bf8f5b..cf8dd8aea386 100644 --- a/arch/powerpc/net/bpf_jit_comp32.c +++ b/arch/powerpc/net/bpf_jit_comp32.c @@ -293,6 +293,8 @@ int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, struct codegen_context * bool func_addr_fixed; u64 func_addr; u32 true_cond; + u32 tmp_idx; + int j; /* * addrs[] maps a BPF bytecode address into a real offset from @@ -908,8 +910,12 @@ int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, struct codegen_context * * 16 byte instruction that uses two 'struct bpf_insn' */ case BPF_LD | BPF_IMM | BPF_DW: /* dst = (u64) imm */ + tmp_idx = ctx->idx; PPC_LI32(dst_reg_h, (u32)insn[i + 1].imm); PPC_LI32(dst_reg, (u32)insn[i].imm); + /* padding to allow full 4 instructions for later patching */ + for (j = ctx->idx - tmp_idx; j < 4; j++) + EMIT(PPC_RAW_NOP()); /* Adjust for two bpf instructions */ addrs[++i] = ctx->idx * 4; break; diff --git a/arch/powerpc/net/bpf_jit_comp64.c b/arch/powerpc/net/bpf_jit_comp64.c index 9eae8d8ed340..bfdd417c6b81 100644 --- a/arch/powerpc/net/bpf_jit_comp64.c +++ b/arch/powerpc/net/bpf_jit_comp64.c @@ -319,6 +319,7 @@ int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, struct codegen_context * u64 imm64; u32 true_cond; u32 tmp_idx; + int j; /* * addrs[] maps a BPF bytecode address into a real offset from @@ -848,9 +849,13 @@ int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, struct codegen_context * case BPF_LD | BPF_IMM | BPF_DW: /* dst = (u64) imm */ imm64 = ((u64)(u32) insn[i].imm) | (((u64)(u32) insn[i+1].imm) << 32); + tmp_idx = ctx->idx; + PPC_LI64(dst_reg, imm64); + /* padding to allow full 5 instructions for later patching */ + for (j = ctx->idx - tmp_idx; j < 5; j++) + EMIT(PPC_RAW_NOP()); /* Adjust for two bpf instructions */ addrs[++i] = ctx->idx * 4; - PPC_LI64(dst_reg, imm64); break; /* -- cgit v1.2.3 From 88a71086c48ae98e93c0208044827621e9717f7e Mon Sep 17 00:00:00 2001 From: "Naveen N. Rao" Date: Thu, 6 Jan 2022 17:15:08 +0530 Subject: tools/bpf: Rename 'struct event' to avoid naming conflict On ppc64le, trying to build bpf seltests throws the below warning: In file included from runqslower.bpf.c:5: ./runqslower.h:7:8: error: redefinition of 'event' struct event { ^ /home/naveen/linux/tools/testing/selftests/bpf/tools/build/runqslower/vmlinux.h:156602:8: note: previous definition is here struct event { ^ This happens since 'struct event' is defined in drivers/net/ethernet/alteon/acenic.h . Rename the one in runqslower to a more appropriate 'runq_event' to avoid the naming conflict. Signed-off-by: Naveen N. Rao Acked-by: Daniel Borkmann Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/c13cb3767d26257ca4387b8296b632b433a58db6.1641468127.git.naveen.n.rao@linux.vnet.ibm.com --- tools/bpf/runqslower/runqslower.bpf.c | 2 +- tools/bpf/runqslower/runqslower.c | 2 +- tools/bpf/runqslower/runqslower.h | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/tools/bpf/runqslower/runqslower.bpf.c b/tools/bpf/runqslower/runqslower.bpf.c index ab9353f2fd46..9a5c1f008fe6 100644 --- a/tools/bpf/runqslower/runqslower.bpf.c +++ b/tools/bpf/runqslower/runqslower.bpf.c @@ -68,7 +68,7 @@ int handle__sched_switch(u64 *ctx) */ struct task_struct *prev = (struct task_struct *)ctx[1]; struct task_struct *next = (struct task_struct *)ctx[2]; - struct event event = {}; + struct runq_event event = {}; u64 *tsp, delta_us; long state; u32 pid; diff --git a/tools/bpf/runqslower/runqslower.c b/tools/bpf/runqslower/runqslower.c index 2414cc764461..d78f4148597f 100644 --- a/tools/bpf/runqslower/runqslower.c +++ b/tools/bpf/runqslower/runqslower.c @@ -100,7 +100,7 @@ static int bump_memlock_rlimit(void) void handle_event(void *ctx, int cpu, void *data, __u32 data_sz) { - const struct event *e = data; + const struct runq_event *e = data; struct tm *tm; char ts[32]; time_t t; diff --git a/tools/bpf/runqslower/runqslower.h b/tools/bpf/runqslower/runqslower.h index 9db225425e5f..4f70f07200c2 100644 --- a/tools/bpf/runqslower/runqslower.h +++ b/tools/bpf/runqslower/runqslower.h @@ -4,7 +4,7 @@ #define TASK_COMM_LEN 16 -struct event { +struct runq_event { char task[TASK_COMM_LEN]; __u64 delta_us; pid_t pid; -- cgit v1.2.3 From 3f5f766d5f7f95a69a630da3544a1a0cee1cdddf Mon Sep 17 00:00:00 2001 From: "Naveen N. Rao" Date: Thu, 6 Jan 2022 17:15:12 +0530 Subject: powerpc64/bpf: Limit 'ldbrx' to processors compliant with ISA v2.06 Johan reported the below crash with test_bpf on ppc64 e5500: test_bpf: #296 ALU_END_FROM_LE 64: 0x0123456789abcdef -> 0x67452301 jited:1 Oops: Exception in kernel mode, sig: 4 [#1] BE PAGE_SIZE=4K SMP NR_CPUS=24 QEMU e500 Modules linked in: test_bpf(+) CPU: 0 PID: 76 Comm: insmod Not tainted 5.14.0-03771-g98c2059e008a-dirty #1 NIP: 8000000000061c3c LR: 80000000006dea64 CTR: 8000000000061c18 REGS: c0000000032d3420 TRAP: 0700 Not tainted (5.14.0-03771-g98c2059e008a-dirty) MSR: 0000000080089000 CR: 88002822 XER: 20000000 IRQMASK: 0 <...> NIP [8000000000061c3c] 0x8000000000061c3c LR [80000000006dea64] .__run_one+0x104/0x17c [test_bpf] Call Trace: .__run_one+0x60/0x17c [test_bpf] (unreliable) .test_bpf_init+0x6a8/0xdc8 [test_bpf] .do_one_initcall+0x6c/0x28c .do_init_module+0x68/0x28c .load_module+0x2460/0x2abc .__do_sys_init_module+0x120/0x18c .system_call_exception+0x110/0x1b8 system_call_common+0xf0/0x210 --- interrupt: c00 at 0x101d0acc <...> ---[ end trace 47b2bf19090bb3d0 ]--- Illegal instruction The illegal instruction turned out to be 'ldbrx' emitted for BPF_FROM_[L|B]E, which was only introduced in ISA v2.06. Guard use of the same and implement an alternative approach for older processors. Fixes: 156d0e290e969c ("powerpc/ebpf/jit: Implement JIT compiler for extended BPF") Reported-by: Johan Almbladh Signed-off-by: Naveen N. Rao Tested-by: Johan Almbladh Acked-by: Johan Almbladh Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/d1e51c6fdf572062cf3009a751c3406bda01b832.1641468127.git.naveen.n.rao@linux.vnet.ibm.com --- arch/powerpc/include/asm/ppc-opcode.h | 1 + arch/powerpc/net/bpf_jit_comp64.c | 22 +++++++++++++--------- 2 files changed, 14 insertions(+), 9 deletions(-) diff --git a/arch/powerpc/include/asm/ppc-opcode.h b/arch/powerpc/include/asm/ppc-opcode.h index efad07081cc0..9675303b724e 100644 --- a/arch/powerpc/include/asm/ppc-opcode.h +++ b/arch/powerpc/include/asm/ppc-opcode.h @@ -500,6 +500,7 @@ #define PPC_RAW_LDX(r, base, b) (0x7c00002a | ___PPC_RT(r) | ___PPC_RA(base) | ___PPC_RB(b)) #define PPC_RAW_LHZ(r, base, i) (0xa0000000 | ___PPC_RT(r) | ___PPC_RA(base) | IMM_L(i)) #define PPC_RAW_LHBRX(r, base, b) (0x7c00062c | ___PPC_RT(r) | ___PPC_RA(base) | ___PPC_RB(b)) +#define PPC_RAW_LWBRX(r, base, b) (0x7c00042c | ___PPC_RT(r) | ___PPC_RA(base) | ___PPC_RB(b)) #define PPC_RAW_LDBRX(r, base, b) (0x7c000428 | ___PPC_RT(r) | ___PPC_RA(base) | ___PPC_RB(b)) #define PPC_RAW_STWCX(s, a, b) (0x7c00012d | ___PPC_RS(s) | ___PPC_RA(a) | ___PPC_RB(b)) #define PPC_RAW_CMPWI(a, i) (0x2c000000 | ___PPC_RA(a) | IMM_L(i)) diff --git a/arch/powerpc/net/bpf_jit_comp64.c b/arch/powerpc/net/bpf_jit_comp64.c index bfdd417c6b81..e1e8c934308a 100644 --- a/arch/powerpc/net/bpf_jit_comp64.c +++ b/arch/powerpc/net/bpf_jit_comp64.c @@ -634,17 +634,21 @@ int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, struct codegen_context * EMIT(PPC_RAW_MR(dst_reg, b2p[TMP_REG_1])); break; case 64: - /* - * Way easier and faster(?) to store the value - * into stack and then use ldbrx - * - * ctx->seen will be reliable in pass2, but - * the instructions generated will remain the - * same across all passes - */ + /* Store the value to stack and then use byte-reverse loads */ PPC_BPF_STL(dst_reg, 1, bpf_jit_stack_local(ctx)); EMIT(PPC_RAW_ADDI(b2p[TMP_REG_1], 1, bpf_jit_stack_local(ctx))); - EMIT(PPC_RAW_LDBRX(dst_reg, 0, b2p[TMP_REG_1])); + if (cpu_has_feature(CPU_FTR_ARCH_206)) { + EMIT(PPC_RAW_LDBRX(dst_reg, 0, b2p[TMP_REG_1])); + } else { + EMIT(PPC_RAW_LWBRX(dst_reg, 0, b2p[TMP_REG_1])); + if (IS_ENABLED(CONFIG_CPU_LITTLE_ENDIAN)) + EMIT(PPC_RAW_SLDI(dst_reg, dst_reg, 32)); + EMIT(PPC_RAW_LI(b2p[TMP_REG_2], 4)); + EMIT(PPC_RAW_LWBRX(b2p[TMP_REG_2], b2p[TMP_REG_2], b2p[TMP_REG_1])); + if (IS_ENABLED(CONFIG_CPU_BIG_ENDIAN)) + EMIT(PPC_RAW_SLDI(b2p[TMP_REG_2], b2p[TMP_REG_2], 32)); + EMIT(PPC_RAW_OR(dst_reg, dst_reg, b2p[TMP_REG_2])); + } break; } break; -- cgit v1.2.3 From 252745240ba0ae774d2f80c5e185ed59fbc4fb41 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Fri, 14 Jan 2022 11:26:25 +0000 Subject: powerpc/audit: Fix syscall_get_arch() Commit 770cec16cdc9 ("powerpc/audit: Simplify syscall_get_arch()") and commit 898a1ef06ad4 ("powerpc/audit: Avoid unneccessary #ifdef in syscall_get_arguments()") replaced test_tsk_thread_flag(task, TIF_32BIT)) by is_32bit_task(). But is_32bit_task() applies on current task while be want the test done on task 'task' So add a new macro is_tsk_32bit_task() to check any task. Fixes: 770cec16cdc9 ("powerpc/audit: Simplify syscall_get_arch()") Fixes: 898a1ef06ad4 ("powerpc/audit: Avoid unneccessary #ifdef in syscall_get_arguments()") Cc: stable@vger.kernel.org Reported-by: Dmitry V. Levin Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/c55cddb8f65713bf5859ed675d75a50cb37d5995.1642159570.git.christophe.leroy@csgroup.eu --- arch/powerpc/include/asm/syscall.h | 4 ++-- arch/powerpc/include/asm/thread_info.h | 2 ++ 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/arch/powerpc/include/asm/syscall.h b/arch/powerpc/include/asm/syscall.h index 52d05b465e3e..25fc8ad9a27a 100644 --- a/arch/powerpc/include/asm/syscall.h +++ b/arch/powerpc/include/asm/syscall.h @@ -90,7 +90,7 @@ static inline void syscall_get_arguments(struct task_struct *task, unsigned long val, mask = -1UL; unsigned int n = 6; - if (is_32bit_task()) + if (is_tsk_32bit_task(task)) mask = 0xffffffff; while (n--) { @@ -105,7 +105,7 @@ static inline void syscall_get_arguments(struct task_struct *task, static inline int syscall_get_arch(struct task_struct *task) { - if (is_32bit_task()) + if (is_tsk_32bit_task(task)) return AUDIT_ARCH_PPC; else if (IS_ENABLED(CONFIG_CPU_LITTLE_ENDIAN)) return AUDIT_ARCH_PPC64LE; diff --git a/arch/powerpc/include/asm/thread_info.h b/arch/powerpc/include/asm/thread_info.h index 5725029aaa29..d6e649b3c70b 100644 --- a/arch/powerpc/include/asm/thread_info.h +++ b/arch/powerpc/include/asm/thread_info.h @@ -168,8 +168,10 @@ static inline bool test_thread_local_flags(unsigned int flags) #ifdef CONFIG_COMPAT #define is_32bit_task() (test_thread_flag(TIF_32BIT)) +#define is_tsk_32bit_task(tsk) (test_tsk_thread_flag(tsk, TIF_32BIT)) #else #define is_32bit_task() (IS_ENABLED(CONFIG_PPC32)) +#define is_tsk_32bit_task(tsk) (IS_ENABLED(CONFIG_PPC32)) #endif #if defined(CONFIG_PPC64) -- cgit v1.2.3 From 87b9d74fb0be80054c729e8d6a119ca0955cedf3 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Mon, 10 Jan 2022 15:29:53 +0000 Subject: powerpc/time: Fix build failure due to do_hard_irq_enable() on PPC32 CC arch/powerpc/kernel/time.o In file included from : ./arch/powerpc/include/asm/hw_irq.h: In function 'do_hard_irq_enable': ././include/linux/compiler_types.h:335:45: error: call to '__compiletime_assert_35' declared with attribute error: BUILD_BUG failed 335 | _compiletime_assert(condition, msg, __compiletime_assert_, __COUNTER__) | ^ ././include/linux/compiler_types.h:316:25: note: in definition of macro '__compiletime_assert' 316 | prefix ## suffix(); \ | ^~~~~~ ././include/linux/compiler_types.h:335:9: note: in expansion of macro '_compiletime_assert' 335 | _compiletime_assert(condition, msg, __compiletime_assert_, __COUNTER__) | ^~~~~~~~~~~~~~~~~~~ ./include/linux/build_bug.h:39:37: note: in expansion of macro 'compiletime_assert' 39 | #define BUILD_BUG_ON_MSG(cond, msg) compiletime_assert(!(cond), msg) | ^~~~~~~~~~~~~~~~~~ ./include/linux/build_bug.h:59:21: note: in expansion of macro 'BUILD_BUG_ON_MSG' 59 | #define BUILD_BUG() BUILD_BUG_ON_MSG(1, "BUILD_BUG failed") | ^~~~~~~~~~~~~~~~ ./arch/powerpc/include/asm/hw_irq.h:483:9: note: in expansion of macro 'BUILD_BUG' 483 | BUILD_BUG(); | ^~~~~~~~~ should_hard_irq_enable() returns false on PPC32 so this BUILD_BUG() shouldn't trigger. Force inlining of should_hard_irq_enable() Fixes: 0faf20a1ad16 ("powerpc/64s/interrupt: Don't enable MSR[EE] in irq handlers unless perf is in use") Signed-off-by: Christophe Leroy Acked-by: Nicholas Piggin Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/247e01e0e10f4dbc59b5ff89e81702eb1ee7641e.1641828571.git.christophe.leroy@csgroup.eu --- arch/powerpc/include/asm/hw_irq.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/powerpc/include/asm/hw_irq.h b/arch/powerpc/include/asm/hw_irq.h index a58fb4aa6c81..674e5aaafcbd 100644 --- a/arch/powerpc/include/asm/hw_irq.h +++ b/arch/powerpc/include/asm/hw_irq.h @@ -473,7 +473,7 @@ static inline bool arch_irq_disabled_regs(struct pt_regs *regs) return !(regs->msr & MSR_EE); } -static inline bool should_hard_irq_enable(void) +static __always_inline bool should_hard_irq_enable(void) { return false; } -- cgit v1.2.3 From d37823c3528e5e0705fc7746bcbc2afffb619259 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Mon, 10 Jan 2022 15:29:25 +0000 Subject: powerpc/32s: Fix kasan_init_region() for KASAN It has been reported some configuration where the kernel doesn't boot with KASAN enabled. This is due to wrong BAT allocation for the KASAN area: ---[ Data Block Address Translation ]--- 0: 0xc0000000-0xcfffffff 0x00000000 256M Kernel rw m 1: 0xd0000000-0xdfffffff 0x10000000 256M Kernel rw m 2: 0xe0000000-0xefffffff 0x20000000 256M Kernel rw m 3: 0xf8000000-0xf9ffffff 0x2a000000 32M Kernel rw m 4: 0xfa000000-0xfdffffff 0x2c000000 64M Kernel rw m A BAT must have both virtual and physical addresses alignment matching the size of the BAT. This is not the case for BAT 4 above. Fix kasan_init_region() by using block_size() function that is in book3s32/mmu.c. To be able to reuse it here, make it non static and change its name to bat_block_size() in order to avoid name conflict with block_size() defined in Also reuse find_free_bat() to avoid an error message from setbat() when no BAT is available. And allocate memory outside of linear memory mapping to avoid wasting that precious space. With this change we get correct alignment for BATs and KASAN shadow memory is allocated outside the linear memory space. ---[ Data Block Address Translation ]--- 0: 0xc0000000-0xcfffffff 0x00000000 256M Kernel rw 1: 0xd0000000-0xdfffffff 0x10000000 256M Kernel rw 2: 0xe0000000-0xefffffff 0x20000000 256M Kernel rw 3: 0xf8000000-0xfbffffff 0x7c000000 64M Kernel rw 4: 0xfc000000-0xfdffffff 0x7a000000 32M Kernel rw Fixes: 7974c4732642 ("powerpc/32s: Implement dedicated kasan_init_region()") Cc: stable@vger.kernel.org Reported-by: Maxime Bizon Signed-off-by: Christophe Leroy Tested-by: Maxime Bizon Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/7a50ef902494d1325227d47d33dada01e52e5518.1641818726.git.christophe.leroy@csgroup.eu --- arch/powerpc/include/asm/book3s/32/mmu-hash.h | 2 + arch/powerpc/mm/book3s32/mmu.c | 10 ++--- arch/powerpc/mm/kasan/book3s_32.c | 59 ++++++++++++++------------- 3 files changed, 38 insertions(+), 33 deletions(-) diff --git a/arch/powerpc/include/asm/book3s/32/mmu-hash.h b/arch/powerpc/include/asm/book3s/32/mmu-hash.h index 7be27862329f..78c6a5fde1d6 100644 --- a/arch/powerpc/include/asm/book3s/32/mmu-hash.h +++ b/arch/powerpc/include/asm/book3s/32/mmu-hash.h @@ -223,6 +223,8 @@ static __always_inline void update_user_segments(u32 val) update_user_segment(15, val); } +int __init find_free_bat(void); +unsigned int bat_block_size(unsigned long base, unsigned long top); #endif /* !__ASSEMBLY__ */ /* We happily ignore the smaller BATs on 601, we don't actually use diff --git a/arch/powerpc/mm/book3s32/mmu.c b/arch/powerpc/mm/book3s32/mmu.c index 94045b265b6b..203735caf691 100644 --- a/arch/powerpc/mm/book3s32/mmu.c +++ b/arch/powerpc/mm/book3s32/mmu.c @@ -76,7 +76,7 @@ unsigned long p_block_mapped(phys_addr_t pa) return 0; } -static int __init find_free_bat(void) +int __init find_free_bat(void) { int b; int n = mmu_has_feature(MMU_FTR_USE_HIGH_BATS) ? 8 : 4; @@ -100,7 +100,7 @@ static int __init find_free_bat(void) * - block size has to be a power of two. This is calculated by finding the * highest bit set to 1. */ -static unsigned int block_size(unsigned long base, unsigned long top) +unsigned int bat_block_size(unsigned long base, unsigned long top) { unsigned int max_size = SZ_256M; unsigned int base_shift = (ffs(base) - 1) & 31; @@ -145,7 +145,7 @@ static unsigned long __init __mmu_mapin_ram(unsigned long base, unsigned long to int idx; while ((idx = find_free_bat()) != -1 && base != top) { - unsigned int size = block_size(base, top); + unsigned int size = bat_block_size(base, top); if (size < 128 << 10) break; @@ -201,12 +201,12 @@ void mmu_mark_initmem_nx(void) unsigned long size; for (i = 0; i < nb - 1 && base < top;) { - size = block_size(base, top); + size = bat_block_size(base, top); setibat(i++, PAGE_OFFSET + base, base, size, PAGE_KERNEL_TEXT); base += size; } if (base < top) { - size = block_size(base, top); + size = bat_block_size(base, top); if ((top - base) > size) { size <<= 1; if (strict_kernel_rwx_enabled() && base + size > border) diff --git a/arch/powerpc/mm/kasan/book3s_32.c b/arch/powerpc/mm/kasan/book3s_32.c index 35b287b0a8da..450a67ef0bbe 100644 --- a/arch/powerpc/mm/kasan/book3s_32.c +++ b/arch/powerpc/mm/kasan/book3s_32.c @@ -10,48 +10,51 @@ int __init kasan_init_region(void *start, size_t size) { unsigned long k_start = (unsigned long)kasan_mem_to_shadow(start); unsigned long k_end = (unsigned long)kasan_mem_to_shadow(start + size); - unsigned long k_cur = k_start; - int k_size = k_end - k_start; - int k_size_base = 1 << (ffs(k_size) - 1); + unsigned long k_nobat = k_start; + unsigned long k_cur; + phys_addr_t phys; int ret; - void *block; - block = memblock_alloc(k_size, k_size_base); - - if (block && k_size_base >= SZ_128K && k_start == ALIGN(k_start, k_size_base)) { - int shift = ffs(k_size - k_size_base); - int k_size_more = shift ? 1 << (shift - 1) : 0; - - setbat(-1, k_start, __pa(block), k_size_base, PAGE_KERNEL); - if (k_size_more >= SZ_128K) - setbat(-1, k_start + k_size_base, __pa(block) + k_size_base, - k_size_more, PAGE_KERNEL); - if (v_block_mapped(k_start)) - k_cur = k_start + k_size_base; - if (v_block_mapped(k_start + k_size_base)) - k_cur = k_start + k_size_base + k_size_more; - - update_bats(); + while (k_nobat < k_end) { + unsigned int k_size = bat_block_size(k_nobat, k_end); + int idx = find_free_bat(); + + if (idx == -1) + break; + if (k_size < SZ_128K) + break; + phys = memblock_phys_alloc_range(k_size, k_size, 0, + MEMBLOCK_ALLOC_ANYWHERE); + if (!phys) + break; + + setbat(idx, k_nobat, phys, k_size, PAGE_KERNEL); + k_nobat += k_size; } + if (k_nobat != k_start) + update_bats(); - if (!block) - block = memblock_alloc(k_size, PAGE_SIZE); - if (!block) - return -ENOMEM; + if (k_nobat < k_end) { + phys = memblock_phys_alloc_range(k_end - k_nobat, PAGE_SIZE, 0, + MEMBLOCK_ALLOC_ANYWHERE); + if (!phys) + return -ENOMEM; + } ret = kasan_init_shadow_page_tables(k_start, k_end); if (ret) return ret; - kasan_update_early_region(k_start, k_cur, __pte(0)); + kasan_update_early_region(k_start, k_nobat, __pte(0)); - for (; k_cur < k_end; k_cur += PAGE_SIZE) { + for (k_cur = k_nobat; k_cur < k_end; k_cur += PAGE_SIZE) { pmd_t *pmd = pmd_off_k(k_cur); - void *va = block + k_cur - k_start; - pte_t pte = pfn_pte(PHYS_PFN(__pa(va)), PAGE_KERNEL); + pte_t pte = pfn_pte(PHYS_PFN(phys + k_cur - k_nobat), PAGE_KERNEL); __set_pte_at(&init_mm, k_cur, pte_offset_kernel(pmd, k_cur), pte, 0); } flush_tlb_kernel_range(k_start, k_end); + memset(kasan_mem_to_shadow(start), 0, k_end - k_start); + return 0; } -- cgit v1.2.3 From 429a64f6e91fbfe4912d17247c27d0d66767b1c2 Mon Sep 17 00:00:00 2001 From: Athira Rajeev Date: Fri, 14 Jan 2022 08:43:55 +0530 Subject: powerpc/perf: Only define power_pmu_wants_prompt_pmi() for CONFIG_PPC64 power_pmu_wants_prompt_pmi() is used to decide if PMIs should be taken promptly. This is valid only for ppc64 and is used only if CONFIG_PPC_BOOK3S_64=y. Hence include the function under config check for PPC64. Fixes warning for 32-bit compilation: arch/powerpc/perf/core-book3s.c:2455:6: warning: no previous prototype for 'power_pmu_wants_prompt_pmi' 2455 | bool power_pmu_wants_prompt_pmi(void) | ^~~~~~~~~~~~~~~~~~~~~~~~~~ Fixes: 5a7745b96f43 ("powerpc/64s/perf: add power_pmu_wants_prompt_pmi to say whether perf wants PMIs to be soft-NMI") Reported-by: kernel test robot Signed-off-by: Athira Rajeev Reviewed-by: Nicholas Piggin [mpe: Move inside existing CONFIG_PPC64 ifdef block] Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20220114031355.87480-1-atrajeev@linux.vnet.ibm.com --- arch/powerpc/perf/core-book3s.c | 58 ++++++++++++++++++++--------------------- 1 file changed, 28 insertions(+), 30 deletions(-) diff --git a/arch/powerpc/perf/core-book3s.c b/arch/powerpc/perf/core-book3s.c index a684901b6965..32b98b7a1f86 100644 --- a/arch/powerpc/perf/core-book3s.c +++ b/arch/powerpc/perf/core-book3s.c @@ -776,6 +776,34 @@ static void pmao_restore_workaround(bool ebb) mtspr(SPRN_PMC6, pmcs[5]); } +/* + * If the perf subsystem wants performance monitor interrupts as soon as + * possible (e.g., to sample the instruction address and stack chain), + * this should return true. The IRQ masking code can then enable MSR[EE] + * in some places (e.g., interrupt handlers) that allows PMI interrupts + * through to improve accuracy of profiles, at the cost of some performance. + * + * The PMU counters can be enabled by other means (e.g., sysfs raw SPR + * access), but in that case there is no need for prompt PMI handling. + * + * This currently returns true if any perf counter is being used. It + * could possibly return false if only events are being counted rather than + * samples being taken, but for now this is good enough. + */ +bool power_pmu_wants_prompt_pmi(void) +{ + struct cpu_hw_events *cpuhw; + + /* + * This could simply test local_paca->pmcregs_in_use if that were not + * under ifdef KVM. + */ + if (!ppmu) + return false; + + cpuhw = this_cpu_ptr(&cpu_hw_events); + return cpuhw->n_events; +} #endif /* CONFIG_PPC64 */ static void perf_event_interrupt(struct pt_regs *regs); @@ -2438,36 +2466,6 @@ static void perf_event_interrupt(struct pt_regs *regs) perf_sample_event_took(sched_clock() - start_clock); } -/* - * If the perf subsystem wants performance monitor interrupts as soon as - * possible (e.g., to sample the instruction address and stack chain), - * this should return true. The IRQ masking code can then enable MSR[EE] - * in some places (e.g., interrupt handlers) that allows PMI interrupts - * though to improve accuracy of profiles, at the cost of some performance. - * - * The PMU counters can be enabled by other means (e.g., sysfs raw SPR - * access), but in that case there is no need for prompt PMI handling. - * - * This currently returns true if any perf counter is being used. It - * could possibly return false if only events are being counted rather than - * samples being taken, but for now this is good enough. - */ -bool power_pmu_wants_prompt_pmi(void) -{ - struct cpu_hw_events *cpuhw; - - /* - * This could simply test local_paca->pmcregs_in_use if that were not - * under ifdef KVM. - */ - - if (!ppmu) - return false; - - cpuhw = this_cpu_ptr(&cpu_hw_events); - return cpuhw->n_events; -} - static int power_pmu_prepare_cpu(unsigned int cpu) { struct cpu_hw_events *cpuhw = &per_cpu(cpu_hw_events, cpu); -- cgit v1.2.3 From aee101d7b95a03078945681dd7f7ea5e4a1e7686 Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Mon, 17 Jan 2022 23:44:03 +1000 Subject: powerpc/64s: Mask SRR0 before checking against the masked NIP Commit 314f6c23dd8d ("powerpc/64s: Mask NIP before checking against SRR0") masked off the low 2 bits of the NIP value in the interrupt stack frame in case they are non-zero and mis-compare against a SRR0 register value of a CPU which always reads back 0 from the 2 low bits which are reserved. This now causes the opposite problem that an implementation which does implement those bits in SRR0 will mis-compare against the masked NIP value in which they have been cleared. QEMU is one such implementation, and this is allowed by the architecture. This can be triggered by sigfuz by setting low bits of PT_NIP in the signal context. Fix this for now by masking the SRR0 bits as well. Cleaner is probably to sanitise these values before putting them in registers or stack, but this is the quick and backportable fix. Fixes: 314f6c23dd8d ("powerpc/64s: Mask NIP before checking against SRR0") Signed-off-by: Nicholas Piggin Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20220117134403.2995059-1-npiggin@gmail.com --- arch/powerpc/kernel/interrupt_64.S | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/powerpc/kernel/interrupt_64.S b/arch/powerpc/kernel/interrupt_64.S index 92088f848266..7bab2d7de372 100644 --- a/arch/powerpc/kernel/interrupt_64.S +++ b/arch/powerpc/kernel/interrupt_64.S @@ -30,6 +30,7 @@ COMPAT_SYS_CALL_TABLE: .ifc \srr,srr mfspr r11,SPRN_SRR0 ld r12,_NIP(r1) + clrrdi r11,r11,2 clrrdi r12,r12,2 100: tdne r11,r12 EMIT_WARN_ENTRY 100b,__FILE__,__LINE__,(BUGFLAG_WARNING | BUGFLAG_ONCE) @@ -40,6 +41,7 @@ COMPAT_SYS_CALL_TABLE: .else mfspr r11,SPRN_HSRR0 ld r12,_NIP(r1) + clrrdi r11,r11,2 clrrdi r12,r12,2 100: tdne r11,r12 EMIT_WARN_ENTRY 100b,__FILE__,__LINE__,(BUGFLAG_WARNING | BUGFLAG_ONCE) -- cgit v1.2.3