From a6fb72f1e9e9f1905746646fd1e40932336291da Mon Sep 17 00:00:00 2001 From: Andrea Gelmini Date: Wed, 11 Aug 2010 21:07:34 +0200 Subject: arch: tile: kernel/proc.c Removed duplicated #include Signed-off-by: Andrea Gelmini Signed-off-by: Chris Metcalf --- arch/tile/kernel/proc.c | 1 - 1 file changed, 1 deletion(-) (limited to 'arch/tile/kernel') diff --git a/arch/tile/kernel/proc.c b/arch/tile/kernel/proc.c index 92ef925d2f8d..2e02c41ddf3b 100644 --- a/arch/tile/kernel/proc.c +++ b/arch/tile/kernel/proc.c @@ -23,7 +23,6 @@ #include #include #include -#include #include #include #include -- cgit v1.2.3 From bc63de7c5bcc44b0098d09931f69a19e93d8a7ba Mon Sep 17 00:00:00 2001 From: Chris Metcalf Date: Fri, 13 Aug 2010 08:23:07 -0400 Subject: arch/tile: correct a bug in freeing bootmem by VA for the optional second initrd. Signed-off-by: Chris Metcalf --- arch/tile/kernel/setup.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'arch/tile/kernel') diff --git a/arch/tile/kernel/setup.c b/arch/tile/kernel/setup.c index 4dd21c1e6d5e..e7d54c73d5c1 100644 --- a/arch/tile/kernel/setup.c +++ b/arch/tile/kernel/setup.c @@ -953,7 +953,7 @@ static void __init load_hv_initrd(void) if (rc != stat.size) { pr_err("Error reading %d bytes from hvfs file '%s': %d\n", stat.size, initramfs_file, rc); - free_bootmem((unsigned long) initrd, stat.size); + free_initrd_mem((unsigned long) initrd, stat.size); return; } initrd_start = (unsigned long) initrd; @@ -962,7 +962,7 @@ static void __init load_hv_initrd(void) void __init free_initrd_mem(unsigned long begin, unsigned long end) { - free_bootmem(begin, end - begin); + free_bootmem(__pa(begin), end - begin); } static void __init validate_hv(void) -- cgit v1.2.3 From 749dc6f252b57d5cb9c1f4c1c4aafe4c92a28207 Mon Sep 17 00:00:00 2001 From: Chris Metcalf Date: Fri, 13 Aug 2010 08:24:22 -0400 Subject: arch/tile: Use separate, better minsec values for clocksource and sched_clock. We were using the same 5-sec minsec for the clocksource and sched_clock that we were using for the clock_event_device. For the clock_event_device that's exactly right since it has a short maximum countdown time. But for sched_clock we want to avoid wraparound when converting from ticks to nsec over a much longer window, so we force a shift of 10. And for clocksource it seems dodgy to use a 5-sec minsec as well, so we copy some other platforms and force a shift of 22. Signed-off-by: Chris Metcalf --- arch/tile/kernel/time.c | 33 +++++++++++++++++++-------------- 1 file changed, 19 insertions(+), 14 deletions(-) (limited to 'arch/tile/kernel') diff --git a/arch/tile/kernel/time.c b/arch/tile/kernel/time.c index b9ab25a889b5..6bed820e1421 100644 --- a/arch/tile/kernel/time.c +++ b/arch/tile/kernel/time.c @@ -36,16 +36,6 @@ /* How many cycles per second we are running at. */ static cycles_t cycles_per_sec __write_once; -/* - * We set up shift and multiply values with a minsec of five seconds, - * since our timer counter counts down 31 bits at a frequency of - * no less than 500 MHz. See @minsec for clocks_calc_mult_shift(). - * We could use a different value for the 64-bit free-running - * cycle counter, but we use the same one for consistency, and since - * we will be reasonably precise with this value anyway. - */ -#define TILE_MINSEC 5 - cycles_t get_clock_rate(void) { return cycles_per_sec; @@ -68,6 +58,14 @@ cycles_t get_cycles(void) } #endif +/* + * We use a relatively small shift value so that sched_clock() + * won't wrap around very often. + */ +#define SCHED_CLOCK_SHIFT 10 + +static unsigned long sched_clock_mult __write_once; + static cycles_t clocksource_get_cycles(struct clocksource *cs) { return get_cycles(); @@ -78,6 +76,7 @@ static struct clocksource cycle_counter_cs = { .rating = 300, .read = clocksource_get_cycles, .mask = CLOCKSOURCE_MASK(64), + .shift = 22, /* typical value, e.g. x86 tsc uses this */ .flags = CLOCK_SOURCE_IS_CONTINUOUS, }; @@ -88,8 +87,10 @@ static struct clocksource cycle_counter_cs = { void __init setup_clock(void) { cycles_per_sec = hv_sysconf(HV_SYSCONF_CPU_SPEED); - clocksource_calc_mult_shift(&cycle_counter_cs, cycles_per_sec, - TILE_MINSEC); + sched_clock_mult = + clocksource_hz2mult(cycles_per_sec, SCHED_CLOCK_SHIFT); + cycle_counter_cs.mult = + clocksource_hz2mult(cycles_per_sec, cycle_counter_cs.shift); } void __init calibrate_delay(void) @@ -117,9 +118,14 @@ void __init time_init(void) * counter, plus bit 31, which signifies that the counter has wrapped * from zero to (2**31) - 1. The INT_TILE_TIMER interrupt will be * raised as long as bit 31 is set. + * + * The TILE_MINSEC value represents the largest range of real-time + * we can possibly cover with the timer, based on MAX_TICK combined + * with the slowest reasonable clock rate we might run at. */ #define MAX_TICK 0x7fffffff /* we have 31 bits of countdown timer */ +#define TILE_MINSEC 5 /* timer covers no more than 5 seconds */ static int tile_timer_set_next_event(unsigned long ticks, struct clock_event_device *evt) @@ -211,8 +217,7 @@ void do_timer_interrupt(struct pt_regs *regs, int fault_num) unsigned long long sched_clock(void) { return clocksource_cyc2ns(get_cycles(), - cycle_counter_cs.mult, - cycle_counter_cs.shift); + sched_clock_mult, SCHED_CLOCK_SHIFT); } int setup_profiling_timer(unsigned int multiplier) -- cgit v1.2.3 From 32020effaf713c0c669864301bcd5dac6b9bb9e0 Mon Sep 17 00:00:00 2001 From: Chris Metcalf Date: Fri, 13 Aug 2010 08:32:21 -0400 Subject: arch/tile: Fix a couple of issues with the COMPAT code for TILE-Gx. First, the siginfo preamble wasn't quite right; we need to indicate that we are padding up to 4 ints of preamble for 64-bit code, and then for compat mode we need to pad differently, using only 3 ints. Second, the C ABI requires a save area of two registers, not two pointers, since in compat mode we have 64-bit registers all of which we need to save, even though we only have 32-bit VAs. Signed-off-by: Chris Metcalf --- arch/tile/kernel/compat_signal.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'arch/tile/kernel') diff --git a/arch/tile/kernel/compat_signal.c b/arch/tile/kernel/compat_signal.c index d5efb215dd5f..9c710db43f13 100644 --- a/arch/tile/kernel/compat_signal.c +++ b/arch/tile/kernel/compat_signal.c @@ -56,13 +56,15 @@ struct compat_ucontext { sigset_t uc_sigmask; /* mask last for extensibility */ }; +#define COMPAT_SI_PAD_SIZE ((SI_MAX_SIZE - 3 * sizeof(int)) / sizeof(int)) + struct compat_siginfo { int si_signo; int si_errno; int si_code; union { - int _pad[SI_PAD_SIZE]; + int _pad[COMPAT_SI_PAD_SIZE]; /* kill() */ struct { -- cgit v1.2.3 From 1fcbe027b5d29ec9cd0eeb753c14fb366ae852ac Mon Sep 17 00:00:00 2001 From: Chris Metcalf Date: Fri, 13 Aug 2010 08:40:57 -0400 Subject: arch/tile: support backtracing on TILE-Gx This functionality was stubbed out until recently. Now we support our normal backtracing API on TILE-Gx as well as on TILE64/TILEPro. This change includes a tweak to the instruction encoding caused by adding addxli for compat mode. Signed-off-by: Chris Metcalf --- arch/tile/kernel/backtrace.c | 137 +++++++++++++++++++++++++++++++------------ arch/tile/kernel/stack.c | 8 --- arch/tile/kernel/traps.c | 4 +- 3 files changed, 102 insertions(+), 47 deletions(-) (limited to 'arch/tile/kernel') diff --git a/arch/tile/kernel/backtrace.c b/arch/tile/kernel/backtrace.c index 77265f3b58d6..d3c41c1ff6bd 100644 --- a/arch/tile/kernel/backtrace.c +++ b/arch/tile/kernel/backtrace.c @@ -19,9 +19,6 @@ #include -#if TILE_CHIP < 10 - - #include @@ -29,6 +26,27 @@ #define TREG_LR 55 +#if TILE_CHIP >= 10 +#define tile_bundle_bits tilegx_bundle_bits +#define TILE_MAX_INSTRUCTIONS_PER_BUNDLE TILEGX_MAX_INSTRUCTIONS_PER_BUNDLE +#define TILE_BUNDLE_ALIGNMENT_IN_BYTES TILEGX_BUNDLE_ALIGNMENT_IN_BYTES +#define tile_decoded_instruction tilegx_decoded_instruction +#define tile_mnemonic tilegx_mnemonic +#define parse_insn_tile parse_insn_tilegx +#define TILE_OPC_IRET TILEGX_OPC_IRET +#define TILE_OPC_ADDI TILEGX_OPC_ADDI +#define TILE_OPC_ADDLI TILEGX_OPC_ADDLI +#define TILE_OPC_INFO TILEGX_OPC_INFO +#define TILE_OPC_INFOL TILEGX_OPC_INFOL +#define TILE_OPC_JRP TILEGX_OPC_JRP +#define TILE_OPC_MOVE TILEGX_OPC_MOVE +#define OPCODE_STORE TILEGX_OPC_ST +typedef long long bt_int_reg_t; +#else +#define OPCODE_STORE TILE_OPC_SW +typedef int bt_int_reg_t; +#endif + /** A decoded bundle used for backtracer analysis. */ struct BacktraceBundle { tile_bundle_bits bits; @@ -41,7 +59,7 @@ struct BacktraceBundle { /* This implementation only makes sense for native tools. */ /** Default function to read memory. */ static bool bt_read_memory(void *result, VirtualAddress addr, - size_t size, void *extra) + unsigned int size, void *extra) { /* FIXME: this should do some horrible signal stuff to catch * SEGV cleanly and fail. @@ -106,6 +124,12 @@ static bool bt_has_addi_sp(const struct BacktraceBundle *bundle, int *adjust) find_matching_insn(bundle, TILE_OPC_ADDI, vals, 2); if (insn == NULL) insn = find_matching_insn(bundle, TILE_OPC_ADDLI, vals, 2); +#if TILE_CHIP >= 10 + if (insn == NULL) + insn = find_matching_insn(bundle, TILEGX_OPC_ADDXLI, vals, 2); + if (insn == NULL) + insn = find_matching_insn(bundle, TILEGX_OPC_ADDXI, vals, 2); +#endif if (insn == NULL) return false; @@ -190,13 +214,52 @@ static inline bool bt_has_move_r52_sp(const struct BacktraceBundle *bundle) return find_matching_insn(bundle, TILE_OPC_MOVE, vals, 2) != NULL; } -/** Does this bundle contain the instruction 'sw sp, lr'? */ +/** Does this bundle contain a store of lr to sp? */ static inline bool bt_has_sw_sp_lr(const struct BacktraceBundle *bundle) { static const int vals[2] = { TREG_SP, TREG_LR }; - return find_matching_insn(bundle, TILE_OPC_SW, vals, 2) != NULL; + return find_matching_insn(bundle, OPCODE_STORE, vals, 2) != NULL; +} + +#if TILE_CHIP >= 10 +/** Track moveli values placed into registers. */ +static inline void bt_update_moveli(const struct BacktraceBundle *bundle, + int moveli_args[]) +{ + int i; + for (i = 0; i < bundle->num_insns; i++) { + const struct tile_decoded_instruction *insn = + &bundle->insns[i]; + + if (insn->opcode->mnemonic == TILEGX_OPC_MOVELI) { + int reg = insn->operand_values[0]; + moveli_args[reg] = insn->operand_values[1]; + } + } } +/** Does this bundle contain an 'add sp, sp, reg' instruction + * from a register that we saw a moveli into, and if so, what + * is the value in the register? + */ +static bool bt_has_add_sp(const struct BacktraceBundle *bundle, int *adjust, + int moveli_args[]) +{ + static const int vals[2] = { TREG_SP, TREG_SP }; + + const struct tile_decoded_instruction *insn = + find_matching_insn(bundle, TILEGX_OPC_ADDX, vals, 2); + if (insn) { + int reg = insn->operand_values[2]; + if (moveli_args[reg]) { + *adjust = moveli_args[reg]; + return true; + } + } + return false; +} +#endif + /** Locates the caller's PC and SP for a program starting at the * given address. */ @@ -227,6 +290,11 @@ static void find_caller_pc_and_caller_sp(CallerLocation *location, int next_bundle = 0; VirtualAddress pc; +#if TILE_CHIP >= 10 + /* Naively try to track moveli values to support addx for -m32. */ + int moveli_args[TILEGX_NUM_REGISTERS] = { 0 }; +#endif + /* Default to assuming that the caller's sp is the current sp. * This is necessary to handle the case where we start backtracing * right at the end of the epilog. @@ -380,7 +448,11 @@ static void find_caller_pc_and_caller_sp(CallerLocation *location, if (!sp_determined) { int adjust; - if (bt_has_addi_sp(&bundle, &adjust)) { + if (bt_has_addi_sp(&bundle, &adjust) +#if TILE_CHIP >= 10 + || bt_has_add_sp(&bundle, &adjust, moveli_args) +#endif + ) { location->sp_location = SP_LOC_OFFSET; if (adjust <= 0) { @@ -427,6 +499,11 @@ static void find_caller_pc_and_caller_sp(CallerLocation *location, sp_determined = true; } } + +#if TILE_CHIP >= 10 + /* Track moveli arguments for -m32 mode. */ + bt_update_moveli(&bundle, moveli_args); +#endif } if (bt_has_iret(&bundle)) { @@ -502,11 +579,10 @@ void backtrace_init(BacktraceIterator *state, break; } - /* The frame pointer should theoretically be aligned mod 8. If - * it's not even aligned mod 4 then something terrible happened - * and we should mark it as invalid. + /* If the frame pointer is not aligned to the basic word size + * something terrible happened and we should mark it as invalid. */ - if (fp % 4 != 0) + if (fp % sizeof(bt_int_reg_t) != 0) fp = -1; /* -1 means "don't know initial_frame_caller_pc". */ @@ -547,9 +623,16 @@ void backtrace_init(BacktraceIterator *state, state->read_memory_func_extra = read_memory_func_extra; } +/* Handle the case where the register holds more bits than the VA. */ +static bool valid_addr_reg(bt_int_reg_t reg) +{ + return ((VirtualAddress)reg == reg); +} + bool backtrace_next(BacktraceIterator *state) { - VirtualAddress next_fp, next_pc, next_frame[2]; + VirtualAddress next_fp, next_pc; + bt_int_reg_t next_frame[2]; if (state->fp == -1) { /* No parent frame. */ @@ -563,11 +646,9 @@ bool backtrace_next(BacktraceIterator *state) } next_fp = next_frame[1]; - if (next_fp % 4 != 0) { - /* Caller's frame pointer is suspect, so give up. - * Technically it should be aligned mod 8, but we will - * be forgiving here. - */ + if (!valid_addr_reg(next_frame[1]) || + next_fp % sizeof(bt_int_reg_t) != 0) { + /* Caller's frame pointer is suspect, so give up. */ return false; } @@ -585,7 +666,7 @@ bool backtrace_next(BacktraceIterator *state) } else { /* Get the caller PC from the frame linkage area. */ next_pc = next_frame[0]; - if (next_pc == 0 || + if (!valid_addr_reg(next_frame[0]) || next_pc == 0 || next_pc % TILE_BUNDLE_ALIGNMENT_IN_BYTES != 0) { /* The PC is suspect, so give up. */ return false; @@ -599,23 +680,3 @@ bool backtrace_next(BacktraceIterator *state) return true; } - -#else /* TILE_CHIP < 10 */ - -void backtrace_init(BacktraceIterator *state, - BacktraceMemoryReader read_memory_func, - void *read_memory_func_extra, - VirtualAddress pc, VirtualAddress lr, - VirtualAddress sp, VirtualAddress r52) -{ - state->pc = pc; - state->sp = sp; - state->fp = -1; - state->initial_frame_caller_pc = -1; - state->read_memory_func = read_memory_func; - state->read_memory_func_extra = read_memory_func_extra; -} - -bool backtrace_next(BacktraceIterator *state) { return false; } - -#endif /* TILE_CHIP < 10 */ diff --git a/arch/tile/kernel/stack.c b/arch/tile/kernel/stack.c index b6268d3ae869..38a68b0b4581 100644 --- a/arch/tile/kernel/stack.c +++ b/arch/tile/kernel/stack.c @@ -108,7 +108,6 @@ static bool read_memory_func(void *result, VirtualAddress address, /* Return a pt_regs pointer for a valid fault handler frame */ static struct pt_regs *valid_fault_handler(struct KBacktraceIterator* kbt) { -#ifndef __tilegx__ const char *fault = NULL; /* happy compiler */ char fault_buf[64]; VirtualAddress sp = kbt->it.sp; @@ -146,7 +145,6 @@ static struct pt_regs *valid_fault_handler(struct KBacktraceIterator* kbt) } if (!kbt->profile || (INT_MASK(p->faultnum) & QUEUED_INTERRUPTS) == 0) return p; -#endif return NULL; } @@ -351,12 +349,6 @@ void tile_show_stack(struct KBacktraceIterator *kbt, int headers) kbt->task->pid, kbt->task->tgid, kbt->task->comm, smp_processor_id(), get_cycles()); } -#ifdef __tilegx__ - if (kbt->is_current) { - __insn_mtspr(SPR_SIM_CONTROL, - SIM_DUMP_SPR_ARG(SIM_DUMP_BACKTRACE)); - } -#endif kbt->verbose = 1; i = 0; for (; !KBacktraceIterator_end(kbt); KBacktraceIterator_next(kbt)) { diff --git a/arch/tile/kernel/traps.c b/arch/tile/kernel/traps.c index 3870abbeeaa2..0f362dc2c57f 100644 --- a/arch/tile/kernel/traps.c +++ b/arch/tile/kernel/traps.c @@ -128,7 +128,9 @@ static int special_ill(bundle_bits bundle, int *sigp, int *codep) #ifdef __tilegx__ if ((bundle & TILEGX_BUNDLE_MODE_MASK) != 0) return 0; - if (get_Opcode_X1(bundle) != UNARY_OPCODE_X1) + if (get_Opcode_X1(bundle) != RRR_0_OPCODE_X1) + return 0; + if (get_RRROpcodeExtension_X1(bundle) != UNARY_RRR_0_OPCODE_X1) return 0; if (get_UnaryOpcodeExtension_X1(bundle) != ILL_UNARY_OPCODE_X1) return 0; -- cgit v1.2.3 From ba00376b0b13f234d839541a7b36a5bf5c2a4036 Mon Sep 17 00:00:00 2001 From: Chris Metcalf Date: Fri, 13 Aug 2010 16:37:00 -0400 Subject: arch/tile: extend syscall ABI to set r1 on return as well. Until now, the tile architecture ABI for syscall return has just been that r0 holds the return value, and an error is only signalled like it is for kernel code, with a negative small number. However, this means that in multiple places in userspace we end up writing the same three-cycle idiom that tests for a small negative number for error. It seems cleaner to instead move that code into the kernel, and set r1 to hold zero on success or errno on failure; previously, r1 was just zeroed on return from the kernel (to avoid leaking kernel state). This way a single conditional branch after the syscall is sufficient to test for the failure case. The number of cycles taken is the same, but the error-checking code is in just one place, so total code size is smaller, and random userspace syscall code is easier to understand. Signed-off-by: Chris Metcalf --- arch/tile/kernel/intvec_32.S | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) (limited to 'arch/tile/kernel') diff --git a/arch/tile/kernel/intvec_32.S b/arch/tile/kernel/intvec_32.S index 3404c75f8e64..84f296ca9e63 100644 --- a/arch/tile/kernel/intvec_32.S +++ b/arch/tile/kernel/intvec_32.S @@ -952,7 +952,7 @@ STD_ENTRY(interrupt_return) * able to safely read all the remaining words on those cache * lines without waiting for the memory subsystem. */ - pop_reg_zero r0, r1, sp, PTREGS_OFFSET_REG(30) - PTREGS_OFFSET_REG(0) + pop_reg_zero r0, r28, sp, PTREGS_OFFSET_REG(30) - PTREGS_OFFSET_REG(0) pop_reg_zero r30, r2, sp, PTREGS_OFFSET_PC - PTREGS_OFFSET_REG(30) pop_reg_zero r21, r3, sp, PTREGS_OFFSET_EX1 - PTREGS_OFFSET_PC pop_reg_zero lr, r4, sp, PTREGS_OFFSET_REG(52) - PTREGS_OFFSET_EX1 @@ -1017,7 +1017,17 @@ STD_ENTRY(interrupt_return) { move r22, zero; move r23, zero } { move r24, zero; move r25, zero } { move r26, zero; move r27, zero } - { move r28, zero; move r29, zero } + + /* Set r1 to errno if we are returning an error, otherwise zero. */ + { + moveli r29, 1024 + sub r1, zero, r0 + } + slt_u r29, r1, r29 + { + mnz r1, r29, r1 + move r29, zero + } iret /* -- cgit v1.2.3 From d7627467b7a8dd6944885290a03a07ceb28c10eb Mon Sep 17 00:00:00 2001 From: David Howells Date: Tue, 17 Aug 2010 23:52:56 +0100 Subject: Make do_execve() take a const filename pointer Make do_execve() take a const filename pointer so that kernel_execve() compiles correctly on ARM: arch/arm/kernel/sys_arm.c:88: warning: passing argument 1 of 'do_execve' discards qualifiers from pointer target type This also requires the argv and envp arguments to be consted twice, once for the pointer array and once for the strings the array points to. This is because do_execve() passes a pointer to the filename (now const) to copy_strings_kernel(). A simpler alternative would be to cast the filename pointer in do_execve() when it's passed to copy_strings_kernel(). do_execve() may not change any of the strings it is passed as part of the argv or envp lists as they are some of them in .rodata, so marking these strings as const should be fine. Further kernel_execve() and sys_execve() need to be changed to match. This has been test built on x86_64, frv, arm and mips. Signed-off-by: David Howells Tested-by: Ralf Baechle Acked-by: Russell King Signed-off-by: Linus Torvalds --- arch/tile/kernel/process.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'arch/tile/kernel') diff --git a/arch/tile/kernel/process.c b/arch/tile/kernel/process.c index ed590ad0acdc..985cc28c74c5 100644 --- a/arch/tile/kernel/process.c +++ b/arch/tile/kernel/process.c @@ -543,8 +543,9 @@ long _sys_vfork(struct pt_regs *regs) /* * sys_execve() executes a new program. */ -long _sys_execve(char __user *path, char __user *__user *argv, - char __user *__user *envp, struct pt_regs *regs) +long _sys_execve(const char __user *path, + const char __user *const __user *argv, + const char __user *const __user *envp, struct pt_regs *regs) { long error; char *filename; -- cgit v1.2.3 From e6e6c46d759cd013cb57eba112a4129a3a353c4b Mon Sep 17 00:00:00 2001 From: Chris Metcalf Date: Wed, 15 Sep 2010 11:16:05 -0400 Subject: arch/tile: finish const-ifying sys_execve() The sys_execve() implementation was properly const-ified but not the declaration, the syscall wrappers, or the compat version. This change completes the constification process. Signed-off-by: Chris Metcalf --- arch/tile/kernel/process.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'arch/tile/kernel') diff --git a/arch/tile/kernel/process.c b/arch/tile/kernel/process.c index 985cc28c74c5..88be49e3aa25 100644 --- a/arch/tile/kernel/process.c +++ b/arch/tile/kernel/process.c @@ -561,8 +561,9 @@ out: } #ifdef CONFIG_COMPAT -long _compat_sys_execve(char __user *path, compat_uptr_t __user *argv, - compat_uptr_t __user *envp, struct pt_regs *regs) +long _compat_sys_execve(const char __user *path, + const compat_uptr_t __user *argv, + const compat_uptr_t __user *envp, struct pt_regs *regs) { long error; char *filename; -- cgit v1.2.3 From 74fca9da097b74117ae2cef9e5f0d9b0e28ccbb7 Mon Sep 17 00:00:00 2001 From: Chris Metcalf Date: Wed, 15 Sep 2010 11:16:08 -0400 Subject: arch/tile: Change struct sigcontext to be more useful Rather than just using pt_regs, it now contains the actual saved state explicitly, similar to pt_regs. By doing it this way, we provide a cleaner API for userspace (or equivalently, we avoid the need for libc to provide its own definition of sigcontext). While we're at it, move PT_FLAGS_xxx to where they are not visible from userspace. And always pass siginfo and mcontext to signal handlers, even if they claim they don't need it, since sometimes they actually try to use it anyway in practice. Signed-off-by: Chris Metcalf --- arch/tile/kernel/signal.c | 27 +++++++++++++++------------ arch/tile/kernel/stack.c | 2 +- 2 files changed, 16 insertions(+), 13 deletions(-) (limited to 'arch/tile/kernel') diff --git a/arch/tile/kernel/signal.c b/arch/tile/kernel/signal.c index 45b66a3c991f..ce183aa1492c 100644 --- a/arch/tile/kernel/signal.c +++ b/arch/tile/kernel/signal.c @@ -61,13 +61,19 @@ int restore_sigcontext(struct pt_regs *regs, /* Always make any pending restarted system calls return -EINTR */ current_thread_info()->restart_block.fn = do_no_restart_syscall; + /* + * Enforce that sigcontext is like pt_regs, and doesn't mess + * up our stack alignment rules. + */ + BUILD_BUG_ON(sizeof(struct sigcontext) != sizeof(struct pt_regs)); + BUILD_BUG_ON(sizeof(struct sigcontext) % 8 != 0); + for (i = 0; i < sizeof(struct pt_regs)/sizeof(long); ++i) - err |= __get_user(((long *)regs)[i], - &((long __user *)(&sc->regs))[i]); + err |= __get_user(regs->regs[i], &sc->gregs[i]); regs->faultnum = INT_SWINT_1_SIGRETURN; - err |= __get_user(*pr0, &sc->regs.regs[0]); + err |= __get_user(*pr0, &sc->gregs[0]); return err; } @@ -112,8 +118,7 @@ int setup_sigcontext(struct sigcontext __user *sc, struct pt_regs *regs) int i, err = 0; for (i = 0; i < sizeof(struct pt_regs)/sizeof(long); ++i) - err |= __put_user(((long *)regs)[i], - &((long __user *)(&sc->regs))[i]); + err |= __put_user(regs->regs[i], &sc->gregs[i]); return err; } @@ -203,19 +208,17 @@ static int setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info, * Set up registers for signal handler. * Registers that we don't modify keep the value they had from * user-space at the time we took the signal. + * We always pass siginfo and mcontext, regardless of SA_SIGINFO, + * since some things rely on this (e.g. glibc's debug/segfault.c). */ regs->pc = (unsigned long) ka->sa.sa_handler; regs->ex1 = PL_ICS_EX1(USER_PL, 1); /* set crit sec in handler */ regs->sp = (unsigned long) frame; regs->lr = restorer; regs->regs[0] = (unsigned long) usig; - - if (ka->sa.sa_flags & SA_SIGINFO) { - /* Need extra arguments, so mark to restore caller-saves. */ - regs->regs[1] = (unsigned long) &frame->info; - regs->regs[2] = (unsigned long) &frame->uc; - regs->flags |= PT_FLAGS_CALLER_SAVES; - } + regs->regs[1] = (unsigned long) &frame->info; + regs->regs[2] = (unsigned long) &frame->uc; + regs->flags |= PT_FLAGS_CALLER_SAVES; /* * Notify any tracer that was single-stepping it. diff --git a/arch/tile/kernel/stack.c b/arch/tile/kernel/stack.c index 38a68b0b4581..ea2e0ce28380 100644 --- a/arch/tile/kernel/stack.c +++ b/arch/tile/kernel/stack.c @@ -175,7 +175,7 @@ static struct pt_regs *valid_sigframe(struct KBacktraceIterator* kbt) pr_err(" \n", frame->info.si_signo); } - return &frame->uc.uc_mcontext.regs; + return (struct pt_regs *)&frame->uc.uc_mcontext; } return NULL; } -- cgit v1.2.3 From a802fc685426303ab627b7ad3fd5c97b5dea7e00 Mon Sep 17 00:00:00 2001 From: Chris Metcalf Date: Wed, 15 Sep 2010 11:16:10 -0400 Subject: arch/tile: Save and restore extra user state for tilegx During context switch, save and restore a couple of additional bits of tilegx user state that can be persistently modified by userspace. Signed-off-by: Chris Metcalf --- arch/tile/kernel/process.c | 23 ++++++++++++++++------- 1 file changed, 16 insertions(+), 7 deletions(-) (limited to 'arch/tile/kernel') diff --git a/arch/tile/kernel/process.c b/arch/tile/kernel/process.c index 88be49e3aa25..c37b6e3873c1 100644 --- a/arch/tile/kernel/process.c +++ b/arch/tile/kernel/process.c @@ -408,6 +408,15 @@ static void save_arch_state(struct thread_struct *t) #if CHIP_HAS_PROC_STATUS_SPR() t->proc_status = __insn_mfspr(SPR_PROC_STATUS); #endif +#if !CHIP_HAS_FIXED_INTVEC_BASE() + t->interrupt_vector_base = __insn_mfspr(SPR_INTERRUPT_VECTOR_BASE_0); +#endif +#if CHIP_HAS_TILE_RTF_HWM() + t->tile_rtf_hwm = __insn_mfspr(SPR_TILE_RTF_HWM); +#endif +#if CHIP_HAS_DSTREAM_PF() + t->dstream_pf = __insn_mfspr(SPR_DSTREAM_PF); +#endif } static void restore_arch_state(const struct thread_struct *t) @@ -428,14 +437,14 @@ static void restore_arch_state(const struct thread_struct *t) #if CHIP_HAS_PROC_STATUS_SPR() __insn_mtspr(SPR_PROC_STATUS, t->proc_status); #endif +#if !CHIP_HAS_FIXED_INTVEC_BASE() + __insn_mtspr(SPR_INTERRUPT_VECTOR_BASE_0, t->interrupt_vector_base); +#endif #if CHIP_HAS_TILE_RTF_HWM() - /* - * Clear this whenever we switch back to a process in case - * the previous process was monkeying with it. Even if enabled - * in CBOX_MSR1 via TILE_RTF_HWM_MIN, it's still just a - * performance hint, so isn't worth a full save/restore. - */ - __insn_mtspr(SPR_TILE_RTF_HWM, 0); + __insn_mtspr(SPR_TILE_RTF_HWM, t->tile_rtf_hwm); +#endif +#if CHIP_HAS_DSTREAM_PF() + __insn_mtspr(SPR_DSTREAM_PF, t->dstream_pf); #endif } -- cgit v1.2.3 From 7040dea4d2a0609241c7a98a944b7c432c69db2e Mon Sep 17 00:00:00 2001 From: Chris Metcalf Date: Wed, 15 Sep 2010 11:17:05 -0400 Subject: arch/tile: fix formatting bug in register dumps This cut-and-paste bug was caused by rewriting the register dump code to use only a single printk per line of output. Signed-off-by: Chris Metcalf --- arch/tile/kernel/process.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/tile/kernel') diff --git a/arch/tile/kernel/process.c b/arch/tile/kernel/process.c index c37b6e3873c1..84c29111756c 100644 --- a/arch/tile/kernel/process.c +++ b/arch/tile/kernel/process.c @@ -667,7 +667,7 @@ void show_regs(struct pt_regs *regs) regs->regs[51], regs->regs[52], regs->tp); pr_err(" sp : "REGFMT" lr : "REGFMT"\n", regs->sp, regs->lr); #else - for (i = 0; i < 52; i += 3) + for (i = 0; i < 52; i += 4) pr_err(" r%-2d: "REGFMT" r%-2d: "REGFMT " r%-2d: "REGFMT" r%-2d: "REGFMT"\n", i, regs->regs[i], i+1, regs->regs[i+1], -- cgit v1.2.3 From d1ea13c6e2cce0106531852daaa93dd97aec9580 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Thu, 23 Sep 2010 18:40:07 +0200 Subject: genirq: Cleanup irq_chip->typename leftovers 3 years transition phase is enough. Cleanup the last users and remove the cruft. Signed-off-by: Thomas Gleixner Cc: Leo Chen Cc: Hirokazu Takata Cc: Chris Metcalf Cc: Jeff Dike Cc: Chris Zankel --- arch/tile/kernel/irq.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'arch/tile/kernel') diff --git a/arch/tile/kernel/irq.c b/arch/tile/kernel/irq.c index 596c60086930..9a27d563fc30 100644 --- a/arch/tile/kernel/irq.c +++ b/arch/tile/kernel/irq.c @@ -208,7 +208,7 @@ static void tile_irq_chip_eoi(unsigned int irq) } static struct irq_chip tile_irq_chip = { - .typename = "tile_irq_chip", + .name = "tile_irq_chip", .ack = tile_irq_chip_ack, .eoi = tile_irq_chip_eoi, .mask = tile_irq_chip_mask, @@ -288,7 +288,7 @@ int show_interrupts(struct seq_file *p, void *v) for_each_online_cpu(j) seq_printf(p, "%10u ", kstat_irqs_cpu(i, j)); #endif - seq_printf(p, " %14s", irq_desc[i].chip->typename); + seq_printf(p, " %14s", irq_desc[i].chip->name); seq_printf(p, " %s", action->name); for (action = action->next; action; action = action->next) -- cgit v1.2.3 From ea44e06e79a393fb577272399ef7521a143b6b59 Mon Sep 17 00:00:00 2001 From: Chris Metcalf Date: Fri, 24 Sep 2010 17:19:20 -0400 Subject: arch/tile: remove dead code from intvec_32.S This "bpt_code" instruction was killed off in our development line a while ago (the actual definition of bpt_code that is used is in kernel/traps.c) but I didn't push it for 2.6.36 because it seemed harmless and I didn't want to try to push more than absolutely necessary. However, we recently fixed a bug in our gcc that had been causing "-gdwarf2" not to be passed to the assembler, and passing this flag causes an erroneous assembler failure in the presence of code in a data section, sometimes. While we'd like to track down the bug in the assembler, we'd also like to make sure 2.6.36 builds with the current toolchain, so I'm removing this dead code as well. Signed-off-by: Chris Metcalf --- arch/tile/kernel/intvec_32.S | 7 ------- 1 file changed, 7 deletions(-) (limited to 'arch/tile/kernel') diff --git a/arch/tile/kernel/intvec_32.S b/arch/tile/kernel/intvec_32.S index 84f296ca9e63..8f58bdff20d7 100644 --- a/arch/tile/kernel/intvec_32.S +++ b/arch/tile/kernel/intvec_32.S @@ -1506,13 +1506,6 @@ handle_ill: } STD_ENDPROC(handle_ill) - .pushsection .rodata, "a" - .align 8 -bpt_code: - bpt - ENDPROC(bpt_code) - .popsection - /* Various stub interrupt handlers and syscall handlers */ STD_ENTRY_LOCAL(_kernel_double_fault) -- cgit v1.2.3 From d929b6aeaacbe78cbfef4a80e3eed1bf0464d984 Mon Sep 17 00:00:00 2001 From: Chris Metcalf Date: Thu, 14 Oct 2010 14:34:33 -0400 Subject: arch/tile: Use With this change we now include into the "tile" version of the header. To take full advantage of the prototypes there, we also change our naming convention for "struct pt_regs *" syscalls so that, e.g., _sys_execve() is the "true" syscall entry, which sets the appropriate register to point to the pt_regs before calling sys_execve(). While doing this I realized I no longer needed the fork and vfork entry point stubs, since those functions aren't in the generic syscall ABI, so I removed them as well. Signed-off-by: Chris Metcalf --- arch/tile/kernel/compat.c | 6 ++++++ arch/tile/kernel/compat_signal.c | 10 +++++----- arch/tile/kernel/intvec_32.S | 23 +++++++++-------------- arch/tile/kernel/process.c | 31 +++++++++++-------------------- arch/tile/kernel/signal.c | 6 +++--- arch/tile/kernel/sys.c | 9 +++++++++ 6 files changed, 43 insertions(+), 42 deletions(-) (limited to 'arch/tile/kernel') diff --git a/arch/tile/kernel/compat.c b/arch/tile/kernel/compat.c index b1e06d041555..5f24e54bf3c5 100644 --- a/arch/tile/kernel/compat.c +++ b/arch/tile/kernel/compat.c @@ -157,6 +157,12 @@ long tile_compat_sys_msgrcv(int msqid, /* Pass full 64-bit values through ptrace. */ #define compat_sys_ptrace tile_compat_sys_ptrace +/* Call the trampolines to manage pt_regs where necessary. */ +#define compat_sys_execve _compat_sys_execve +#define compat_sys_sigaltstack _compat_sys_sigaltstack +#define compat_sys_rt_sigreturn _compat_sys_rt_sigreturn +#define sys_clone _sys_clone + /* * Note that we can't include here since the header * guard will defeat us; checks for __SYSCALL as well. diff --git a/arch/tile/kernel/compat_signal.c b/arch/tile/kernel/compat_signal.c index 9c710db43f13..fb64b99959d4 100644 --- a/arch/tile/kernel/compat_signal.c +++ b/arch/tile/kernel/compat_signal.c @@ -256,9 +256,9 @@ int copy_siginfo_from_user32(siginfo_t *to, struct compat_siginfo __user *from) return err; } -long _compat_sys_sigaltstack(const struct compat_sigaltstack __user *uss_ptr, - struct compat_sigaltstack __user *uoss_ptr, - struct pt_regs *regs) +long compat_sys_sigaltstack(const struct compat_sigaltstack __user *uss_ptr, + struct compat_sigaltstack __user *uoss_ptr, + struct pt_regs *regs) { stack_t uss, uoss; int ret; @@ -291,7 +291,7 @@ long _compat_sys_sigaltstack(const struct compat_sigaltstack __user *uss_ptr, return ret; } -long _compat_sys_rt_sigreturn(struct pt_regs *regs) +long compat_sys_rt_sigreturn(struct pt_regs *regs) { struct compat_rt_sigframe __user *frame = (struct compat_rt_sigframe __user *) compat_ptr(regs->sp); @@ -312,7 +312,7 @@ long _compat_sys_rt_sigreturn(struct pt_regs *regs) if (restore_sigcontext(regs, &frame->uc.uc_mcontext, &r0)) goto badframe; - if (_compat_sys_sigaltstack(&frame->uc.uc_stack, NULL, regs) != 0) + if (compat_sys_sigaltstack(&frame->uc.uc_stack, NULL, regs) != 0) goto badframe; return r0; diff --git a/arch/tile/kernel/intvec_32.S b/arch/tile/kernel/intvec_32.S index 8f58bdff20d7..7c7e9ac2580d 100644 --- a/arch/tile/kernel/intvec_32.S +++ b/arch/tile/kernel/intvec_32.S @@ -1524,28 +1524,23 @@ STD_ENTRY_LOCAL(bad_intr) /* Put address of pt_regs in reg and jump. */ #define PTREGS_SYSCALL(x, reg) \ - STD_ENTRY(x); \ + STD_ENTRY(_##x); \ { \ PTREGS_PTR(reg, PTREGS_OFFSET_BASE); \ - j _##x \ + j x \ }; \ - STD_ENDPROC(x) + STD_ENDPROC(_##x) PTREGS_SYSCALL(sys_execve, r3) PTREGS_SYSCALL(sys_sigaltstack, r2) PTREGS_SYSCALL(sys_rt_sigreturn, r0) +PTREGS_SYSCALL(sys_cmpxchg_badaddr, r1) -/* Save additional callee-saves to pt_regs, put address in reg and jump. */ -#define PTREGS_SYSCALL_ALL_REGS(x, reg) \ - STD_ENTRY(x); \ - push_extra_callee_saves reg; \ - j _##x; \ - STD_ENDPROC(x) - -PTREGS_SYSCALL_ALL_REGS(sys_fork, r0) -PTREGS_SYSCALL_ALL_REGS(sys_vfork, r0) -PTREGS_SYSCALL_ALL_REGS(sys_clone, r4) -PTREGS_SYSCALL_ALL_REGS(sys_cmpxchg_badaddr, r1) +/* Save additional callee-saves to pt_regs, put address in r4 and jump. */ +STD_ENTRY(_sys_clone) + push_extra_callee_saves r4 + j sys_clone + STD_ENDPROC(_sys_clone) /* * This entrypoint is taken for the cmpxchg and atomic_update fast diff --git a/arch/tile/kernel/process.c b/arch/tile/kernel/process.c index 84c29111756c..42ff73d5f637 100644 --- a/arch/tile/kernel/process.c +++ b/arch/tile/kernel/process.c @@ -528,14 +528,9 @@ struct task_struct *__sched _switch_to(struct task_struct *prev, return __switch_to(prev, next, next_current_ksp0(next)); } -long _sys_fork(struct pt_regs *regs) -{ - return do_fork(SIGCHLD, regs->sp, regs, 0, NULL, NULL); -} - -long _sys_clone(unsigned long clone_flags, unsigned long newsp, - void __user *parent_tidptr, void __user *child_tidptr, - struct pt_regs *regs) +SYSCALL_DEFINE5(clone, unsigned long, clone_flags, unsigned long, newsp, + void __user *, parent_tidptr, void __user *, child_tidptr, + struct pt_regs *, regs) { if (!newsp) newsp = regs->sp; @@ -543,18 +538,13 @@ long _sys_clone(unsigned long clone_flags, unsigned long newsp, parent_tidptr, child_tidptr); } -long _sys_vfork(struct pt_regs *regs) -{ - return do_fork(CLONE_VFORK | CLONE_VM | SIGCHLD, regs->sp, - regs, 0, NULL, NULL); -} - /* * sys_execve() executes a new program. */ -long _sys_execve(const char __user *path, - const char __user *const __user *argv, - const char __user *const __user *envp, struct pt_regs *regs) +SYSCALL_DEFINE4(execve, const char __user *, path, + const char __user *const __user *, argv, + const char __user *const __user *, envp, + struct pt_regs *, regs) { long error; char *filename; @@ -570,9 +560,10 @@ out: } #ifdef CONFIG_COMPAT -long _compat_sys_execve(const char __user *path, - const compat_uptr_t __user *argv, - const compat_uptr_t __user *envp, struct pt_regs *regs) +long compat_sys_execve(const char __user *path, + const compat_uptr_t __user *argv, + const compat_uptr_t __user *envp, + struct pt_regs *regs) { long error; char *filename; diff --git a/arch/tile/kernel/signal.c b/arch/tile/kernel/signal.c index ce183aa1492c..fb28e85ae3ae 100644 --- a/arch/tile/kernel/signal.c +++ b/arch/tile/kernel/signal.c @@ -41,8 +41,8 @@ #define _BLOCKABLE (~(sigmask(SIGKILL) | sigmask(SIGSTOP))) -long _sys_sigaltstack(const stack_t __user *uss, - stack_t __user *uoss, struct pt_regs *regs) +SYSCALL_DEFINE3(sigaltstack, const stack_t __user *, uss, + stack_t __user *, uoss, struct pt_regs *, regs) { return do_sigaltstack(uss, uoss, regs->sp); } @@ -78,7 +78,7 @@ int restore_sigcontext(struct pt_regs *regs, } /* sigreturn() returns long since it restores r0 in the interrupted code. */ -long _sys_rt_sigreturn(struct pt_regs *regs) +SYSCALL_DEFINE1(rt_sigreturn, struct pt_regs *, regs) { struct rt_sigframe __user *frame = (struct rt_sigframe __user *)(regs->sp); diff --git a/arch/tile/kernel/sys.c b/arch/tile/kernel/sys.c index f0f87eab8c39..7e764669a022 100644 --- a/arch/tile/kernel/sys.c +++ b/arch/tile/kernel/sys.c @@ -110,6 +110,15 @@ SYSCALL_DEFINE6(mmap, unsigned long, addr, unsigned long, len, #define sys_sync_file_range sys_sync_file_range2 #endif +/* Call the trampolines to manage pt_regs where necessary. */ +#define sys_execve _sys_execve +#define sys_sigaltstack _sys_sigaltstack +#define sys_rt_sigreturn _sys_rt_sigreturn +#define sys_clone _sys_clone +#ifndef __tilegx__ +#define sys_cmpxchg_badaddr _sys_cmpxchg_badaddr +#endif + /* * Note that we can't include here since the header * guard will defeat us; checks for __SYSCALL as well. -- cgit v1.2.3 From d6f0f22c3c409f9ed88e64f881f8308730be76f0 Mon Sep 17 00:00:00 2001 From: Chris Metcalf Date: Thu, 14 Oct 2010 14:42:58 -0400 Subject: arch/tile: update some comments to clarify register usage. Signed-off-by: Chris Metcalf --- arch/tile/kernel/intvec_32.S | 2 ++ arch/tile/kernel/process.c | 7 ++++--- 2 files changed, 6 insertions(+), 3 deletions(-) (limited to 'arch/tile/kernel') diff --git a/arch/tile/kernel/intvec_32.S b/arch/tile/kernel/intvec_32.S index 7c7e9ac2580d..c62c2f421ef3 100644 --- a/arch/tile/kernel/intvec_32.S +++ b/arch/tile/kernel/intvec_32.S @@ -1553,6 +1553,8 @@ STD_ENTRY(_sys_clone) * to be available to it on entry. It does not modify any callee-save * registers (including "lr"). It does not check what PL it is being * called at, so you'd better not call it other than at PL0. + * The wrapper assumes it only clobbers r20-r29, so if + * it ever is necessary to use more registers, be aware. * * It does not use the stack, but since it might be re-interrupted by * a page fault which would assume the stack was valid, it does diff --git a/arch/tile/kernel/process.c b/arch/tile/kernel/process.c index 42ff73d5f637..221f12bd27fa 100644 --- a/arch/tile/kernel/process.c +++ b/arch/tile/kernel/process.c @@ -214,9 +214,10 @@ int copy_thread(unsigned long clone_flags, unsigned long sp, /* * Copy the callee-saved registers from the passed pt_regs struct * into the context-switch callee-saved registers area. - * We have to restore the callee-saved registers since we may - * be cloning a userspace task with userspace register state, - * and we won't be unwinding the same kernel frames to restore them. + * This way when we start the interrupt-return sequence, the + * callee-save registers will be correctly in registers, which + * is how we assume the compiler leaves them as we start doing + * the normal return-from-interrupt path after calling C code. * Zero out the C ABI save area to mark the top of the stack. */ ksp = (unsigned long) childregs; -- cgit v1.2.3 From 77d233036ea886398770f208aa22235acf0d011c Mon Sep 17 00:00:00 2001 From: Chris Metcalf Date: Thu, 14 Oct 2010 14:47:35 -0400 Subject: arch/tile: Add a warning if we try to allocate too much vmalloc memory. Signed-off-by: Chris Metcalf --- arch/tile/kernel/setup.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'arch/tile/kernel') diff --git a/arch/tile/kernel/setup.c b/arch/tile/kernel/setup.c index e7d54c73d5c1..c1a697e68546 100644 --- a/arch/tile/kernel/setup.c +++ b/arch/tile/kernel/setup.c @@ -1334,6 +1334,10 @@ static void __init pcpu_fc_populate_pte(unsigned long addr) pte_t *pte; BUG_ON(pgd_addr_invalid(addr)); + if (addr < VMALLOC_START || addr >= VMALLOC_END) + panic("PCPU addr %#lx outside vmalloc range %#lx..%#lx;" + " try increasing CONFIG_VMALLOC_RESERVE\n", + addr, VMALLOC_START, VMALLOC_END); pgd = swapper_pg_dir + pgd_index(addr); pud = pud_offset(pgd, addr); -- cgit v1.2.3 From a4dbc5ee52e8741522a2f87067b80116c6c7a934 Mon Sep 17 00:00:00 2001 From: Chris Metcalf Date: Thu, 14 Oct 2010 15:14:29 -0400 Subject: arch/tile: change lower bound on syscall error return to -4095 Previously we were using -1023, which is fine for normal syscall error returns, but the common value in use for other platforms is -4095, and one Tilera-specific driver does use values in the -1100 range, so tickled this bug. Signed-off-by: Chris Metcalf --- arch/tile/kernel/intvec_32.S | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/tile/kernel') diff --git a/arch/tile/kernel/intvec_32.S b/arch/tile/kernel/intvec_32.S index c62c2f421ef3..f87c5c044d6b 100644 --- a/arch/tile/kernel/intvec_32.S +++ b/arch/tile/kernel/intvec_32.S @@ -1020,7 +1020,7 @@ STD_ENTRY(interrupt_return) /* Set r1 to errno if we are returning an error, otherwise zero. */ { - moveli r29, 1024 + moveli r29, 4096 sub r1, zero, r0 } slt_u r29, r1, r29 -- cgit v1.2.3 From dabe98c972091818762e02841ab1f982e573e7d0 Mon Sep 17 00:00:00 2001 From: Chris Metcalf Date: Thu, 14 Oct 2010 15:19:04 -0400 Subject: arch/tile: prevent corrupt top frame from causing backtracer runaway The backtracer will normally cut itself off after 100 frames anyway, but it's messy. With this change we notice that the frame being reported is the same as the last one, and cut off the dump with a message similar to what gdb displays in the same circumstance. Signed-off-by: Chris Metcalf --- arch/tile/kernel/stack.c | 35 ++++++++++++++++++++++------------- 1 file changed, 22 insertions(+), 13 deletions(-) (limited to 'arch/tile/kernel') diff --git a/arch/tile/kernel/stack.c b/arch/tile/kernel/stack.c index ea2e0ce28380..0d54106be3d6 100644 --- a/arch/tile/kernel/stack.c +++ b/arch/tile/kernel/stack.c @@ -30,6 +30,10 @@ #include #include +#define KBT_ONGOING 0 /* Backtrace still ongoing */ +#define KBT_DONE 1 /* Backtrace cleanly completed */ +#define KBT_RUNNING 2 /* Can't run backtrace on a running task */ +#define KBT_LOOP 3 /* Backtrace entered a loop */ /* Is address on the specified kernel stack? */ static int in_kernel_stack(struct KBacktraceIterator *kbt, VirtualAddress sp) @@ -207,11 +211,11 @@ static int KBacktraceIterator_next_item_inclusive( for (;;) { do { if (!KBacktraceIterator_is_sigreturn(kbt)) - return 1; + return KBT_ONGOING; } while (backtrace_next(&kbt->it)); if (!KBacktraceIterator_restart(kbt)) - return 0; + return KBT_DONE; } } @@ -264,7 +268,7 @@ void KBacktraceIterator_init(struct KBacktraceIterator *kbt, kbt->pgtable = NULL; kbt->verbose = 0; /* override in caller if desired */ kbt->profile = 0; /* override in caller if desired */ - kbt->end = 0; + kbt->end = KBT_ONGOING; kbt->new_context = 0; if (is_current) { HV_PhysAddr pgdir_pa = hv_inquire_context().page_table; @@ -290,7 +294,7 @@ void KBacktraceIterator_init(struct KBacktraceIterator *kbt, if (regs == NULL) { if (is_current || t->state == TASK_RUNNING) { /* Can't do this; we need registers */ - kbt->end = 1; + kbt->end = KBT_RUNNING; return; } pc = get_switch_to_pc(); @@ -305,26 +309,29 @@ void KBacktraceIterator_init(struct KBacktraceIterator *kbt, } backtrace_init(&kbt->it, read_memory_func, kbt, pc, lr, sp, r52); - kbt->end = !KBacktraceIterator_next_item_inclusive(kbt); + kbt->end = KBacktraceIterator_next_item_inclusive(kbt); } EXPORT_SYMBOL(KBacktraceIterator_init); int KBacktraceIterator_end(struct KBacktraceIterator *kbt) { - return kbt->end; + return kbt->end != KBT_ONGOING; } EXPORT_SYMBOL(KBacktraceIterator_end); void KBacktraceIterator_next(struct KBacktraceIterator *kbt) { + VirtualAddress old_pc = kbt->it.pc, old_sp = kbt->it.sp; kbt->new_context = 0; - if (!backtrace_next(&kbt->it) && - !KBacktraceIterator_restart(kbt)) { - kbt->end = 1; - return; - } - - kbt->end = !KBacktraceIterator_next_item_inclusive(kbt); + if (!backtrace_next(&kbt->it) && !KBacktraceIterator_restart(kbt)) { + kbt->end = KBT_DONE; + return; + } + kbt->end = KBacktraceIterator_next_item_inclusive(kbt); + if (old_pc == kbt->it.pc && old_sp == kbt->it.sp) { + /* Trapped in a loop; give up. */ + kbt->end = KBT_LOOP; + } } EXPORT_SYMBOL(KBacktraceIterator_next); @@ -387,6 +394,8 @@ void tile_show_stack(struct KBacktraceIterator *kbt, int headers) break; } } + if (kbt->end == KBT_LOOP) + pr_err("Stack dump stopped; next frame identical to this one\n"); if (headers) pr_err("Stack dump complete\n"); } -- cgit v1.2.3 From 6038f373a3dc1f1c26496e60b6c40b164716f07e Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Sun, 15 Aug 2010 18:52:59 +0200 Subject: llseek: automatically add .llseek fop All file_operations should get a .llseek operation so we can make nonseekable_open the default for future file operations without a .llseek pointer. The three cases that we can automatically detect are no_llseek, seq_lseek and default_llseek. For cases where we can we can automatically prove that the file offset is always ignored, we use noop_llseek, which maintains the current behavior of not returning an error from a seek. New drivers should normally not use noop_llseek but instead use no_llseek and call nonseekable_open at open time. Existing drivers can be converted to do the same when the maintainer knows for certain that no user code relies on calling seek on the device file. The generated code is often incorrectly indented and right now contains comments that clarify for each added line why a specific variant was chosen. In the version that gets submitted upstream, the comments will be gone and I will manually fix the indentation, because there does not seem to be a way to do that using coccinelle. Some amount of new code is currently sitting in linux-next that should get the same modifications, which I will do at the end of the merge window. Many thanks to Julia Lawall for helping me learn to write a semantic patch that does all this. ===== begin semantic patch ===== // This adds an llseek= method to all file operations, // as a preparation for making no_llseek the default. // // The rules are // - use no_llseek explicitly if we do nonseekable_open // - use seq_lseek for sequential files // - use default_llseek if we know we access f_pos // - use noop_llseek if we know we don't access f_pos, // but we still want to allow users to call lseek // @ open1 exists @ identifier nested_open; @@ nested_open(...) { <+... nonseekable_open(...) ...+> } @ open exists@ identifier open_f; identifier i, f; identifier open1.nested_open; @@ int open_f(struct inode *i, struct file *f) { <+... ( nonseekable_open(...) | nested_open(...) ) ...+> } @ read disable optional_qualifier exists @ identifier read_f; identifier f, p, s, off; type ssize_t, size_t, loff_t; expression E; identifier func; @@ ssize_t read_f(struct file *f, char *p, size_t s, loff_t *off) { <+... ( *off = E | *off += E | func(..., off, ...) | E = *off ) ...+> } @ read_no_fpos disable optional_qualifier exists @ identifier read_f; identifier f, p, s, off; type ssize_t, size_t, loff_t; @@ ssize_t read_f(struct file *f, char *p, size_t s, loff_t *off) { ... when != off } @ write @ identifier write_f; identifier f, p, s, off; type ssize_t, size_t, loff_t; expression E; identifier func; @@ ssize_t write_f(struct file *f, const char *p, size_t s, loff_t *off) { <+... ( *off = E | *off += E | func(..., off, ...) | E = *off ) ...+> } @ write_no_fpos @ identifier write_f; identifier f, p, s, off; type ssize_t, size_t, loff_t; @@ ssize_t write_f(struct file *f, const char *p, size_t s, loff_t *off) { ... when != off } @ fops0 @ identifier fops; @@ struct file_operations fops = { ... }; @ has_llseek depends on fops0 @ identifier fops0.fops; identifier llseek_f; @@ struct file_operations fops = { ... .llseek = llseek_f, ... }; @ has_read depends on fops0 @ identifier fops0.fops; identifier read_f; @@ struct file_operations fops = { ... .read = read_f, ... }; @ has_write depends on fops0 @ identifier fops0.fops; identifier write_f; @@ struct file_operations fops = { ... .write = write_f, ... }; @ has_open depends on fops0 @ identifier fops0.fops; identifier open_f; @@ struct file_operations fops = { ... .open = open_f, ... }; // use no_llseek if we call nonseekable_open //////////////////////////////////////////// @ nonseekable1 depends on !has_llseek && has_open @ identifier fops0.fops; identifier nso ~= "nonseekable_open"; @@ struct file_operations fops = { ... .open = nso, ... +.llseek = no_llseek, /* nonseekable */ }; @ nonseekable2 depends on !has_llseek @ identifier fops0.fops; identifier open.open_f; @@ struct file_operations fops = { ... .open = open_f, ... +.llseek = no_llseek, /* open uses nonseekable */ }; // use seq_lseek for sequential files ///////////////////////////////////// @ seq depends on !has_llseek @ identifier fops0.fops; identifier sr ~= "seq_read"; @@ struct file_operations fops = { ... .read = sr, ... +.llseek = seq_lseek, /* we have seq_read */ }; // use default_llseek if there is a readdir /////////////////////////////////////////// @ fops1 depends on !has_llseek && !nonseekable1 && !nonseekable2 && !seq @ identifier fops0.fops; identifier readdir_e; @@ // any other fop is used that changes pos struct file_operations fops = { ... .readdir = readdir_e, ... +.llseek = default_llseek, /* readdir is present */ }; // use default_llseek if at least one of read/write touches f_pos ///////////////////////////////////////////////////////////////// @ fops2 depends on !fops1 && !has_llseek && !nonseekable1 && !nonseekable2 && !seq @ identifier fops0.fops; identifier read.read_f; @@ // read fops use offset struct file_operations fops = { ... .read = read_f, ... +.llseek = default_llseek, /* read accesses f_pos */ }; @ fops3 depends on !fops1 && !fops2 && !has_llseek && !nonseekable1 && !nonseekable2 && !seq @ identifier fops0.fops; identifier write.write_f; @@ // write fops use offset struct file_operations fops = { ... .write = write_f, ... + .llseek = default_llseek, /* write accesses f_pos */ }; // Use noop_llseek if neither read nor write accesses f_pos /////////////////////////////////////////////////////////// @ fops4 depends on !fops1 && !fops2 && !fops3 && !has_llseek && !nonseekable1 && !nonseekable2 && !seq @ identifier fops0.fops; identifier read_no_fpos.read_f; identifier write_no_fpos.write_f; @@ // write fops use offset struct file_operations fops = { ... .write = write_f, .read = read_f, ... +.llseek = noop_llseek, /* read and write both use no f_pos */ }; @ depends on has_write && !has_read && !fops1 && !fops2 && !has_llseek && !nonseekable1 && !nonseekable2 && !seq @ identifier fops0.fops; identifier write_no_fpos.write_f; @@ struct file_operations fops = { ... .write = write_f, ... +.llseek = noop_llseek, /* write uses no f_pos */ }; @ depends on has_read && !has_write && !fops1 && !fops2 && !has_llseek && !nonseekable1 && !nonseekable2 && !seq @ identifier fops0.fops; identifier read_no_fpos.read_f; @@ struct file_operations fops = { ... .read = read_f, ... +.llseek = noop_llseek, /* read uses no f_pos */ }; @ depends on !has_read && !has_write && !fops1 && !fops2 && !has_llseek && !nonseekable1 && !nonseekable2 && !seq @ identifier fops0.fops; @@ struct file_operations fops = { ... +.llseek = noop_llseek, /* no read or write fn */ }; ===== End semantic patch ===== Signed-off-by: Arnd Bergmann Cc: Julia Lawall Cc: Christoph Hellwig --- arch/tile/kernel/hardwall.c | 1 + 1 file changed, 1 insertion(+) (limited to 'arch/tile/kernel') diff --git a/arch/tile/kernel/hardwall.c b/arch/tile/kernel/hardwall.c index 584b965dc824..1e54a7843410 100644 --- a/arch/tile/kernel/hardwall.c +++ b/arch/tile/kernel/hardwall.c @@ -774,6 +774,7 @@ static const struct file_operations dev_hardwall_fops = { #endif .flush = hardwall_flush, .release = hardwall_release, + .llseek = noop_llseek, }; static struct cdev hardwall_dev; -- cgit v1.2.3 From bf65e440e8248f22b2eacf8d47961bb9d52260f7 Mon Sep 17 00:00:00 2001 From: Chris Metcalf Date: Thu, 14 Oct 2010 16:00:11 -0400 Subject: arch/tile: add Tilera's header as an open-source header This change adds one of the Tilera standard headers to the set of headers shipped with Linux. The header provides methods for programmatically interacting with the Tilera simulator. The current provides inline assembly for the _sim_syscall function, so the declaration and definition previously provided manually in Linux are no longer needed. We now use the standard sim_validate_lines_evicted() method from rather than rolling our own direct call to sim_syscall(). Signed-off-by: Chris Metcalf --- arch/tile/kernel/entry.S | 22 ---------------------- 1 file changed, 22 deletions(-) (limited to 'arch/tile/kernel') diff --git a/arch/tile/kernel/entry.S b/arch/tile/kernel/entry.S index 3d01383b1b0e..80d13f013bb2 100644 --- a/arch/tile/kernel/entry.S +++ b/arch/tile/kernel/entry.S @@ -25,28 +25,6 @@ STD_ENTRY(current_text_addr) { move r0, lr; jrp lr } STD_ENDPROC(current_text_addr) -STD_ENTRY(_sim_syscall) - /* - * Wait for r0-r9 to be ready (and lr on the off chance we - * want the syscall to locate its caller), then make a magic - * simulator syscall. - * - * We carefully stall until the registers are readable in case they - * are the target of a slow load, etc. so that tile-sim will - * definitely be able to read all of them inside the magic syscall. - * - * Technically this is wrong for r3-r9 and lr, since an interrupt - * could come in and restore the registers with a slow load right - * before executing the mtspr. We may need to modify tile-sim to - * explicitly stall for this case, but we do not yet have - * a way to implement such a stall. - */ - { and zero, lr, r9 ; and zero, r8, r7 } - { and zero, r6, r5 ; and zero, r4, r3 } - { and zero, r2, r1 ; mtspr SIM_CONTROL, r0 } - { jrp lr } - STD_ENDPROC(_sim_syscall) - /* * Implement execve(). The i386 code has a note that forking from kernel * space results in no copy on write until the execve, so we should be -- cgit v1.2.3 From a78c942df64ef4cf495fd4d8715e48501bd7f8a4 Mon Sep 17 00:00:00 2001 From: Chris Metcalf Date: Thu, 14 Oct 2010 16:23:03 -0400 Subject: arch/tile: parameterize system PLs to support KVM port While not a port to KVM (yet), this change modifies the kernel to be able to build either at PL1 or at PL2 with a suitable config switch. Pushing up this change avoids handling branch merge issues going forward with the KVM work. Signed-off-by: Chris Metcalf --- arch/tile/kernel/entry.S | 12 ++++---- arch/tile/kernel/head_32.S | 5 ++-- arch/tile/kernel/intvec_32.S | 67 ++++++++++++++++++++++++-------------------- arch/tile/kernel/irq.c | 16 +++++------ arch/tile/kernel/messaging.c | 2 +- arch/tile/kernel/process.c | 12 +++++++- arch/tile/kernel/regs_32.S | 2 +- arch/tile/kernel/setup.c | 28 ++++++++++-------- arch/tile/kernel/smp.c | 2 +- arch/tile/kernel/traps.c | 2 +- 10 files changed, 85 insertions(+), 63 deletions(-) (limited to 'arch/tile/kernel') diff --git a/arch/tile/kernel/entry.S b/arch/tile/kernel/entry.S index 80d13f013bb2..fd8dc42abdcb 100644 --- a/arch/tile/kernel/entry.S +++ b/arch/tile/kernel/entry.S @@ -15,7 +15,9 @@ #include #include #include +#include #include +#include #ifdef __tilegx__ #define bnzt bnezt @@ -80,7 +82,7 @@ STD_ENTRY(KBacktraceIterator_init_current) STD_ENTRY(cpu_idle_on_new_stack) { move sp, r1 - mtspr SYSTEM_SAVE_1_0, r2 + mtspr SPR_SYSTEM_SAVE_K_0, r2 } jal free_thread_info j cpu_idle @@ -102,15 +104,15 @@ STD_ENTRY(smp_nap) STD_ENTRY(_cpu_idle) { lnk r0 - movei r1, 1 + movei r1, KERNEL_PL } { addli r0, r0, _cpu_idle_nap - . mtspr INTERRUPT_CRITICAL_SECTION, r1 } - IRQ_ENABLE(r2, r3) /* unmask, but still with ICS set */ - mtspr EX_CONTEXT_1_1, r1 /* PL1, ICS clear */ - mtspr EX_CONTEXT_1_0, r0 + IRQ_ENABLE(r2, r3) /* unmask, but still with ICS set */ + mtspr SPR_EX_CONTEXT_K_1, r1 /* Kernel PL, ICS clear */ + mtspr SPR_EX_CONTEXT_K_0, r0 iret .global _cpu_idle_nap _cpu_idle_nap: diff --git a/arch/tile/kernel/head_32.S b/arch/tile/kernel/head_32.S index 2b4f6c091701..90e7c4435693 100644 --- a/arch/tile/kernel/head_32.S +++ b/arch/tile/kernel/head_32.S @@ -23,6 +23,7 @@ #include #include #include +#include /* * This module contains the entry code for kernel images. It performs the @@ -76,7 +77,7 @@ ENTRY(_start) } 1: - /* Get our processor number and save it away in SAVE_1_0. */ + /* Get our processor number and save it away in SAVE_K_0. */ jal hv_inquire_topology mulll_uu r4, r1, r2 /* r1 == y, r2 == width */ add r4, r4, r0 /* r0 == x, so r4 == cpu == y*width + x */ @@ -124,7 +125,7 @@ ENTRY(_start) lw r0, r0 lw sp, r1 or r4, sp, r4 - mtspr SYSTEM_SAVE_1_0, r4 /* save ksp0 + cpu */ + mtspr SPR_SYSTEM_SAVE_K_0, r4 /* save ksp0 + cpu */ addi sp, sp, -STACK_TOP_DELTA { move lr, zero /* stop backtraces in the called function */ diff --git a/arch/tile/kernel/intvec_32.S b/arch/tile/kernel/intvec_32.S index f87c5c044d6b..206dc7e1fe36 100644 --- a/arch/tile/kernel/intvec_32.S +++ b/arch/tile/kernel/intvec_32.S @@ -32,8 +32,8 @@ # error "No support for kernel preemption currently" #endif -#if INT_INTCTRL_1 < 32 || INT_INTCTRL_1 >= 48 -# error INT_INTCTRL_1 coded to set high interrupt mask +#if INT_INTCTRL_K < 32 || INT_INTCTRL_K >= 48 +# error INT_INTCTRL_K coded to set high interrupt mask #endif #define PTREGS_PTR(reg, ptreg) addli reg, sp, C_ABI_SAVE_AREA_SIZE + (ptreg) @@ -132,8 +132,8 @@ intvec_\vecname: /* Temporarily save a register so we have somewhere to work. */ - mtspr SYSTEM_SAVE_1_1, r0 - mfspr r0, EX_CONTEXT_1_1 + mtspr SPR_SYSTEM_SAVE_K_1, r0 + mfspr r0, SPR_EX_CONTEXT_K_1 /* The cmpxchg code clears sp to force us to reset it here on fault. */ { @@ -167,18 +167,18 @@ intvec_\vecname: * The page_fault handler may be downcalled directly by the * hypervisor even when Linux is running and has ICS set. * - * In this case the contents of EX_CONTEXT_1_1 reflect the + * In this case the contents of EX_CONTEXT_K_1 reflect the * previous fault and can't be relied on to choose whether or * not to reinitialize the stack pointer. So we add a test - * to see whether SYSTEM_SAVE_1_2 has the high bit set, + * to see whether SYSTEM_SAVE_K_2 has the high bit set, * and if so we don't reinitialize sp, since we must be coming * from Linux. (In fact the precise case is !(val & ~1), * but any Linux PC has to have the high bit set.) * - * Note that the hypervisor *always* sets SYSTEM_SAVE_1_2 for + * Note that the hypervisor *always* sets SYSTEM_SAVE_K_2 for * any path that turns into a downcall to one of our TLB handlers. */ - mfspr r0, SYSTEM_SAVE_1_2 + mfspr r0, SPR_SYSTEM_SAVE_K_2 { blz r0, 0f /* high bit in S_S_1_2 is for a PC to use */ move r0, sp @@ -187,12 +187,12 @@ intvec_\vecname: 2: /* - * SYSTEM_SAVE_1_0 holds the cpu number in the low bits, and + * SYSTEM_SAVE_K_0 holds the cpu number in the low bits, and * the current stack top in the higher bits. So we recover * our stack top by just masking off the low bits, then * point sp at the top aligned address on the actual stack page. */ - mfspr r0, SYSTEM_SAVE_1_0 + mfspr r0, SPR_SYSTEM_SAVE_K_0 mm r0, r0, zero, LOG2_THREAD_SIZE, 31 0: @@ -254,7 +254,7 @@ intvec_\vecname: sw sp, r3 addli sp, sp, PTREGS_OFFSET_PC - PTREGS_OFFSET_REG(3) } - mfspr r0, EX_CONTEXT_1_0 + mfspr r0, SPR_EX_CONTEXT_K_0 .ifc \processing,handle_syscall /* * Bump the saved PC by one bundle so that when we return, we won't @@ -267,7 +267,7 @@ intvec_\vecname: sw sp, r0 addli sp, sp, PTREGS_OFFSET_EX1 - PTREGS_OFFSET_PC } - mfspr r0, EX_CONTEXT_1_1 + mfspr r0, SPR_EX_CONTEXT_K_1 { sw sp, r0 addi sp, sp, PTREGS_OFFSET_FAULTNUM - PTREGS_OFFSET_EX1 @@ -289,7 +289,7 @@ intvec_\vecname: .endif addli sp, sp, PTREGS_OFFSET_REG(0) - PTREGS_OFFSET_FAULTNUM } - mfspr r0, SYSTEM_SAVE_1_1 /* Original r0 */ + mfspr r0, SPR_SYSTEM_SAVE_K_1 /* Original r0 */ { sw sp, r0 addi sp, sp, -PTREGS_OFFSET_REG(0) - 4 @@ -309,12 +309,12 @@ intvec_\vecname: * See discussion below at "finish_interrupt_save". */ .ifc \c_routine, do_page_fault - mfspr r2, SYSTEM_SAVE_1_3 /* address of page fault */ - mfspr r3, SYSTEM_SAVE_1_2 /* info about page fault */ + mfspr r2, SPR_SYSTEM_SAVE_K_3 /* address of page fault */ + mfspr r3, SPR_SYSTEM_SAVE_K_2 /* info about page fault */ .else .ifc \vecnum, INT_DOUBLE_FAULT { - mfspr r2, SYSTEM_SAVE_1_2 /* double fault info from HV */ + mfspr r2, SPR_SYSTEM_SAVE_K_2 /* double fault info from HV */ movei r3, 0 } .else @@ -467,7 +467,7 @@ intvec_\vecname: /* Load tp with our per-cpu offset. */ #ifdef CONFIG_SMP { - mfspr r20, SYSTEM_SAVE_1_0 + mfspr r20, SPR_SYSTEM_SAVE_K_0 moveli r21, lo16(__per_cpu_offset) } { @@ -487,7 +487,7 @@ intvec_\vecname: * We load flags in r32 here so we can jump to .Lrestore_regs * directly after do_page_fault_ics() if necessary. */ - mfspr r32, EX_CONTEXT_1_1 + mfspr r32, SPR_EX_CONTEXT_K_1 { andi r32, r32, SPR_EX_CONTEXT_1_1__PL_MASK /* mask off ICS */ PTREGS_PTR(r21, PTREGS_OFFSET_FLAGS) @@ -957,11 +957,11 @@ STD_ENTRY(interrupt_return) pop_reg_zero r21, r3, sp, PTREGS_OFFSET_EX1 - PTREGS_OFFSET_PC pop_reg_zero lr, r4, sp, PTREGS_OFFSET_REG(52) - PTREGS_OFFSET_EX1 { - mtspr EX_CONTEXT_1_0, r21 + mtspr SPR_EX_CONTEXT_K_0, r21 move r5, zero } { - mtspr EX_CONTEXT_1_1, lr + mtspr SPR_EX_CONTEXT_K_1, lr andi lr, lr, SPR_EX_CONTEXT_1_1__PL_MASK /* mask off ICS */ } @@ -1199,7 +1199,7 @@ STD_ENTRY(interrupt_return) STD_ENDPROC(interrupt_return) /* - * This interrupt variant clears the INT_INTCTRL_1 interrupt mask bit + * This interrupt variant clears the INT_INTCTRL_K interrupt mask bit * before returning, so we can properly get more downcalls. */ .pushsection .text.handle_interrupt_downcall,"ax" @@ -1208,11 +1208,11 @@ handle_interrupt_downcall: check_single_stepping normal, .Ldispatch_downcall .Ldispatch_downcall: - /* Clear INTCTRL_1 from the set of interrupts we ever enable. */ + /* Clear INTCTRL_K from the set of interrupts we ever enable. */ GET_INTERRUPTS_ENABLED_MASK_PTR(r30) { addi r30, r30, 4 - movei r31, INT_MASK(INT_INTCTRL_1) + movei r31, INT_MASK(INT_INTCTRL_K) } { lw r20, r30 @@ -1227,7 +1227,7 @@ handle_interrupt_downcall: } FEEDBACK_REENTER(handle_interrupt_downcall) - /* Allow INTCTRL_1 to be enabled next time we enable interrupts. */ + /* Allow INTCTRL_K to be enabled next time we enable interrupts. */ lw r20, r30 or r20, r20, r31 sw r30, r20 @@ -1509,7 +1509,7 @@ handle_ill: /* Various stub interrupt handlers and syscall handlers */ STD_ENTRY_LOCAL(_kernel_double_fault) - mfspr r1, EX_CONTEXT_1_0 + mfspr r1, SPR_EX_CONTEXT_K_0 move r2, lr move r3, sp move r4, r52 @@ -1518,7 +1518,7 @@ STD_ENTRY_LOCAL(_kernel_double_fault) STD_ENDPROC(_kernel_double_fault) STD_ENTRY_LOCAL(bad_intr) - mfspr r2, EX_CONTEXT_1_0 + mfspr r2, SPR_EX_CONTEXT_K_0 panic "Unhandled interrupt %#x: PC %#lx" STD_ENDPROC(bad_intr) @@ -1560,7 +1560,7 @@ STD_ENTRY(_sys_clone) * a page fault which would assume the stack was valid, it does * save/restore the stack pointer and zero it out to make sure it gets reset. * Since we always keep interrupts disabled, the hypervisor won't - * clobber our EX_CONTEXT_1_x registers, so we don't save/restore them + * clobber our EX_CONTEXT_K_x registers, so we don't save/restore them * (other than to advance the PC on return). * * We have to manually validate the user vs kernel address range @@ -1766,7 +1766,7 @@ ENTRY(sys_cmpxchg) /* Do slow mtspr here so the following "mf" waits less. */ { move sp, r27 - mtspr EX_CONTEXT_1_0, r28 + mtspr SPR_EX_CONTEXT_K_0, r28 } mf @@ -1785,7 +1785,7 @@ ENTRY(sys_cmpxchg) } { move sp, r27 - mtspr EX_CONTEXT_1_0, r28 + mtspr SPR_EX_CONTEXT_K_0, r28 } iret @@ -1813,7 +1813,7 @@ ENTRY(sys_cmpxchg) #endif /* Issue the slow SPR here while the tns result is in flight. */ - mfspr r28, EX_CONTEXT_1_0 + mfspr r28, SPR_EX_CONTEXT_K_0 { addi r28, r28, 8 /* return to the instruction after the swint1 */ @@ -1901,7 +1901,7 @@ ENTRY(sys_cmpxchg) .Lcmpxchg64_mismatch: { move sp, r27 - mtspr EX_CONTEXT_1_0, r28 + mtspr SPR_EX_CONTEXT_K_0, r28 } mf { @@ -1982,8 +1982,13 @@ int_unalign: int_hand INT_PERF_COUNT, PERF_COUNT, \ op_handle_perf_interrupt, handle_nmi int_hand INT_INTCTRL_3, INTCTRL_3, bad_intr +#if CONFIG_KERNEL_PL == 2 + dc_dispatch INT_INTCTRL_2, INTCTRL_2 + int_hand INT_INTCTRL_1, INTCTRL_1, bad_intr +#else int_hand INT_INTCTRL_2, INTCTRL_2, bad_intr dc_dispatch INT_INTCTRL_1, INTCTRL_1 +#endif int_hand INT_INTCTRL_0, INTCTRL_0, bad_intr int_hand INT_MESSAGE_RCV_DWNCL, MESSAGE_RCV_DWNCL, \ hv_message_intr, handle_interrupt_downcall diff --git a/arch/tile/kernel/irq.c b/arch/tile/kernel/irq.c index 596c60086930..394d554f21aa 100644 --- a/arch/tile/kernel/irq.c +++ b/arch/tile/kernel/irq.c @@ -61,9 +61,9 @@ static DEFINE_SPINLOCK(available_irqs_lock); #if CHIP_HAS_IPI() /* Use SPRs to manipulate device interrupts. */ -#define mask_irqs(irq_mask) __insn_mtspr(SPR_IPI_MASK_SET_1, irq_mask) -#define unmask_irqs(irq_mask) __insn_mtspr(SPR_IPI_MASK_RESET_1, irq_mask) -#define clear_irqs(irq_mask) __insn_mtspr(SPR_IPI_EVENT_RESET_1, irq_mask) +#define mask_irqs(irq_mask) __insn_mtspr(SPR_IPI_MASK_SET_K, irq_mask) +#define unmask_irqs(irq_mask) __insn_mtspr(SPR_IPI_MASK_RESET_K, irq_mask) +#define clear_irqs(irq_mask) __insn_mtspr(SPR_IPI_EVENT_RESET_K, irq_mask) #else /* Use HV to manipulate device interrupts. */ #define mask_irqs(irq_mask) hv_disable_intr(irq_mask) @@ -89,16 +89,16 @@ void tile_dev_intr(struct pt_regs *regs, int intnum) * masked by a previous interrupt. Then, mask out the ones * we're going to handle. */ - unsigned long masked = __insn_mfspr(SPR_IPI_MASK_1); - original_irqs = __insn_mfspr(SPR_IPI_EVENT_1) & ~masked; - __insn_mtspr(SPR_IPI_MASK_SET_1, original_irqs); + unsigned long masked = __insn_mfspr(SPR_IPI_MASK_K); + original_irqs = __insn_mfspr(SPR_IPI_EVENT_K) & ~masked; + __insn_mtspr(SPR_IPI_MASK_SET_K, original_irqs); #else /* * Hypervisor performs the equivalent of the Gx code above and * then puts the pending interrupt mask into a system save reg * for us to find. */ - original_irqs = __insn_mfspr(SPR_SYSTEM_SAVE_1_3); + original_irqs = __insn_mfspr(SPR_SYSTEM_SAVE_K_3); #endif remaining_irqs = original_irqs; @@ -225,7 +225,7 @@ void __cpuinit setup_irq_regs(void) /* Enable interrupt delivery. */ unmask_irqs(~0UL); #if CHIP_HAS_IPI() - raw_local_irq_unmask(INT_IPI_1); + raw_local_irq_unmask(INT_IPI_K); #endif } diff --git a/arch/tile/kernel/messaging.c b/arch/tile/kernel/messaging.c index 6d23ed271d10..997e3933f726 100644 --- a/arch/tile/kernel/messaging.c +++ b/arch/tile/kernel/messaging.c @@ -34,7 +34,7 @@ void __cpuinit init_messaging(void) panic("hv_register_message_state: error %d", rc); /* Make sure downcall interrupts will be enabled. */ - raw_local_irq_unmask(INT_INTCTRL_1); + raw_local_irq_unmask(INT_INTCTRL_K); } void hv_message_intr(struct pt_regs *regs, int intnum) diff --git a/arch/tile/kernel/process.c b/arch/tile/kernel/process.c index 221f12bd27fa..8430f45daea6 100644 --- a/arch/tile/kernel/process.c +++ b/arch/tile/kernel/process.c @@ -305,15 +305,25 @@ int dump_task_regs(struct task_struct *tsk, elf_gregset_t *regs) /* Allow user processes to access the DMA SPRs */ void grant_dma_mpls(void) { +#if CONFIG_KERNEL_PL == 2 + __insn_mtspr(SPR_MPL_DMA_CPL_SET_1, 1); + __insn_mtspr(SPR_MPL_DMA_NOTIFY_SET_1, 1); +#else __insn_mtspr(SPR_MPL_DMA_CPL_SET_0, 1); __insn_mtspr(SPR_MPL_DMA_NOTIFY_SET_0, 1); +#endif } /* Forbid user processes from accessing the DMA SPRs */ void restrict_dma_mpls(void) { +#if CONFIG_KERNEL_PL == 2 + __insn_mtspr(SPR_MPL_DMA_CPL_SET_2, 1); + __insn_mtspr(SPR_MPL_DMA_NOTIFY_SET_2, 1); +#else __insn_mtspr(SPR_MPL_DMA_CPL_SET_1, 1); __insn_mtspr(SPR_MPL_DMA_NOTIFY_SET_1, 1); +#endif } /* Pause the DMA engine, then save off its state registers. */ @@ -524,7 +534,7 @@ struct task_struct *__sched _switch_to(struct task_struct *prev, * Switch kernel SP, PC, and callee-saved registers. * In the context of the new task, return the old task pointer * (i.e. the task that actually called __switch_to). - * Pass the value to use for SYSTEM_SAVE_1_0 when we reset our sp. + * Pass the value to use for SYSTEM_SAVE_K_0 when we reset our sp. */ return __switch_to(prev, next, next_current_ksp0(next)); } diff --git a/arch/tile/kernel/regs_32.S b/arch/tile/kernel/regs_32.S index e88d6e122783..caa13101c264 100644 --- a/arch/tile/kernel/regs_32.S +++ b/arch/tile/kernel/regs_32.S @@ -85,7 +85,7 @@ STD_ENTRY_SECTION(__switch_to, .sched.text) { /* Update sp and ksp0 simultaneously to avoid backtracer warnings. */ move sp, r13 - mtspr SYSTEM_SAVE_1_0, r2 + mtspr SPR_SYSTEM_SAVE_K_0, r2 } FOR_EACH_CALLEE_SAVED_REG(LOAD_REG) .L__switch_to_pc: diff --git a/arch/tile/kernel/setup.c b/arch/tile/kernel/setup.c index c1a697e68546..6444a2bbe1a4 100644 --- a/arch/tile/kernel/setup.c +++ b/arch/tile/kernel/setup.c @@ -187,11 +187,11 @@ early_param("vmalloc", parse_vmalloc); #ifdef CONFIG_HIGHMEM /* - * Determine for each controller where its lowmem is mapped and how - * much of it is mapped there. On controller zero, the first few - * megabytes are mapped at 0xfd000000 as code, so in principle we - * could start our data mappings higher up, but for now we don't - * bother, to avoid additional confusion. + * Determine for each controller where its lowmem is mapped and how much of + * it is mapped there. On controller zero, the first few megabytes are + * already mapped in as code at MEM_SV_INTRPT, so in principle we could + * start our data mappings higher up, but for now we don't bother, to avoid + * additional confusion. * * One question is whether, on systems with more than 768 Mb and * controllers of different sizes, to map in a proportionate amount of @@ -876,6 +876,9 @@ void __cpuinit setup_cpu(int boot) #if CHIP_HAS_SN_PROC() raw_local_irq_unmask(INT_SNITLB_MISS); #endif +#ifdef __tilegx__ + raw_local_irq_unmask(INT_SINGLE_STEP_K); +#endif /* * Allow user access to many generic SPRs, like the cycle @@ -893,11 +896,12 @@ void __cpuinit setup_cpu(int boot) #endif /* - * Set the MPL for interrupt control 0 to user level. - * This includes access to the SYSTEM_SAVE and EX_CONTEXT SPRs, - * as well as the PL 0 interrupt mask. + * Set the MPL for interrupt control 0 & 1 to the corresponding + * values. This includes access to the SYSTEM_SAVE and EX_CONTEXT + * SPRs, as well as the interrupt mask. */ __insn_mtspr(SPR_MPL_INTCTRL_0_SET_0, 1); + __insn_mtspr(SPR_MPL_INTCTRL_1_SET_1, 1); /* Initialize IRQ support for this cpu. */ setup_irq_regs(); @@ -1033,7 +1037,7 @@ static void __init validate_va(void) * In addition, make sure we CAN'T use the end of memory, since * we use the last chunk of each pgd for the pgd_list. */ - int i, fc_fd_ok = 0; + int i, user_kernel_ok = 0; unsigned long max_va = 0; unsigned long list_va = ((PGD_LIST_OFFSET / sizeof(pgd_t)) << PGDIR_SHIFT); @@ -1044,13 +1048,13 @@ static void __init validate_va(void) break; if (range.start <= MEM_USER_INTRPT && range.start + range.size >= MEM_HV_INTRPT) - fc_fd_ok = 1; + user_kernel_ok = 1; if (range.start == 0) max_va = range.size; BUG_ON(range.start + range.size > list_va); } - if (!fc_fd_ok) - early_panic("Hypervisor not configured for VAs 0xfc/0xfd\n"); + if (!user_kernel_ok) + early_panic("Hypervisor not configured for user/kernel VAs\n"); if (max_va == 0) early_panic("Hypervisor not configured for low VAs\n"); if (max_va < KERNEL_HIGH_VADDR) diff --git a/arch/tile/kernel/smp.c b/arch/tile/kernel/smp.c index 1cb5ec79de04..75255d90aff3 100644 --- a/arch/tile/kernel/smp.c +++ b/arch/tile/kernel/smp.c @@ -212,7 +212,7 @@ void __init ipi_init(void) tile.x = cpu_x(cpu); tile.y = cpu_y(cpu); - if (hv_get_ipi_pte(tile, 1, &pte) != 0) + if (hv_get_ipi_pte(tile, KERNEL_PL, &pte) != 0) panic("Failed to initialize IPI for cpu %d\n", cpu); offset = hv_pte_get_pfn(pte) << PAGE_SHIFT; diff --git a/arch/tile/kernel/traps.c b/arch/tile/kernel/traps.c index 0f362dc2c57f..7826a8b17997 100644 --- a/arch/tile/kernel/traps.c +++ b/arch/tile/kernel/traps.c @@ -278,7 +278,7 @@ void __kprobes do_trap(struct pt_regs *regs, int fault_num, case INT_DOUBLE_FAULT: /* * For double fault, "reason" is actually passed as - * SYSTEM_SAVE_1_2, the hypervisor's double-fault info, so + * SYSTEM_SAVE_K_2, the hypervisor's double-fault info, so * we can provide the original fault number rather than * the uninteresting "INT_DOUBLE_FAULT" so the user can * learn what actually struck while PL0 ICS was set. -- cgit v1.2.3 From 233325b94999d4bb8df227bb39904a57509e4995 Mon Sep 17 00:00:00 2001 From: Chris Metcalf Date: Thu, 14 Oct 2010 16:32:41 -0400 Subject: arch/tile: enable single-step support for TILE-Gx This is not quite the complete support, since we're not yet shipping intvec_64.S, but it is the support relevant to the set of files we are currently shipping, and makes it easier to track changes between our internal sources and our public GIT repository. Signed-off-by: Chris Metcalf --- arch/tile/kernel/intvec_32.S | 7 +++- arch/tile/kernel/single_step.c | 73 +++++++++++++++++++++++++++++++++++++++++- arch/tile/kernel/traps.c | 2 +- 3 files changed, 79 insertions(+), 3 deletions(-) (limited to 'arch/tile/kernel') diff --git a/arch/tile/kernel/intvec_32.S b/arch/tile/kernel/intvec_32.S index 206dc7e1fe36..f5821626247f 100644 --- a/arch/tile/kernel/intvec_32.S +++ b/arch/tile/kernel/intvec_32.S @@ -1472,7 +1472,12 @@ handle_ill: lw r26, r24 sw r28, r26 - /* Clear TIF_SINGLESTEP */ + /* + * Clear TIF_SINGLESTEP to prevent recursion if we execute an ill. + * The normal non-arch flow redundantly clears TIF_SINGLESTEP, but we + * need to clear it here and can't really impose on all other arches. + * So what's another write between friends? + */ GET_THREAD_INFO(r0) addi r1, r0, THREAD_INFO_FLAGS_OFFSET diff --git a/arch/tile/kernel/single_step.c b/arch/tile/kernel/single_step.c index 5ec4b9c651f2..1eb3b39e36c7 100644 --- a/arch/tile/kernel/single_step.c +++ b/arch/tile/kernel/single_step.c @@ -15,7 +15,7 @@ * Derived from iLib's single-stepping code. */ -#ifndef __tilegx__ /* No support for single-step yet. */ +#ifndef __tilegx__ /* Hardware support for single step unavailable. */ /* These functions are only used on the TILE platform */ #include @@ -660,4 +660,75 @@ void single_step_once(struct pt_regs *regs) regs->pc += 8; } +#else +#include +#include +#include + +static DEFINE_PER_CPU(unsigned long, ss_saved_pc); + + +/* + * Called directly on the occasion of an interrupt. + * + * If the process doesn't have single step set, then we use this as an + * opportunity to turn single step off. + * + * It has been mentioned that we could conditionally turn off single stepping + * on each entry into the kernel and rely on single_step_once to turn it + * on for the processes that matter (as we already do), but this + * implementation is somewhat more efficient in that we muck with registers + * once on a bum interrupt rather than on every entry into the kernel. + * + * If SINGLE_STEP_CONTROL_K has CANCELED set, then an interrupt occurred, + * so we have to run through this process again before we can say that an + * instruction has executed. + * + * swint will set CANCELED, but it's a legitimate instruction. Fortunately + * it changes the PC. If it hasn't changed, then we know that the interrupt + * wasn't generated by swint and we'll need to run this process again before + * we can say an instruction has executed. + * + * If either CANCELED == 0 or the PC's changed, we send out SIGTRAPs and get + * on with our lives. + */ + +void gx_singlestep_handle(struct pt_regs *regs, int fault_num) +{ + unsigned long *ss_pc = &__get_cpu_var(ss_saved_pc); + struct thread_info *info = (void *)current_thread_info(); + int is_single_step = test_ti_thread_flag(info, TIF_SINGLESTEP); + unsigned long control = __insn_mfspr(SPR_SINGLE_STEP_CONTROL_K); + + if (is_single_step == 0) { + __insn_mtspr(SPR_SINGLE_STEP_EN_K_K, 0); + + } else if ((*ss_pc != regs->pc) || + (!(control & SPR_SINGLE_STEP_CONTROL_1__CANCELED_MASK))) { + + ptrace_notify(SIGTRAP); + control |= SPR_SINGLE_STEP_CONTROL_1__CANCELED_MASK; + control |= SPR_SINGLE_STEP_CONTROL_1__INHIBIT_MASK; + __insn_mtspr(SPR_SINGLE_STEP_CONTROL_K, control); + } +} + + +/* + * Called from need_singlestep. Set up the control registers and the enable + * register, then return back. + */ + +void single_step_once(struct pt_regs *regs) +{ + unsigned long *ss_pc = &__get_cpu_var(ss_saved_pc); + unsigned long control = __insn_mfspr(SPR_SINGLE_STEP_CONTROL_K); + + *ss_pc = regs->pc; + control |= SPR_SINGLE_STEP_CONTROL_1__CANCELED_MASK; + control |= SPR_SINGLE_STEP_CONTROL_1__INHIBIT_MASK; + __insn_mtspr(SPR_SINGLE_STEP_CONTROL_K, control); + __insn_mtspr(SPR_SINGLE_STEP_EN_K_K, 1 << USER_PL); +} + #endif /* !__tilegx__ */ diff --git a/arch/tile/kernel/traps.c b/arch/tile/kernel/traps.c index 7826a8b17997..5474fc2e77e8 100644 --- a/arch/tile/kernel/traps.c +++ b/arch/tile/kernel/traps.c @@ -260,7 +260,7 @@ void __kprobes do_trap(struct pt_regs *regs, int fault_num, address = regs->pc; break; case INT_UNALIGN_DATA: -#ifndef __tilegx__ /* FIXME: GX: no single-step yet */ +#ifndef __tilegx__ /* Emulated support for single step debugging */ if (unaligned_fixup >= 0) { struct single_step_state *state = current_thread_info()->step_state; -- cgit v1.2.3 From c569cac8b69397d8bc80f95bc6edf13ed902e28b Mon Sep 17 00:00:00 2001 From: Chris Metcalf Date: Thu, 14 Oct 2010 16:46:22 -0400 Subject: arch/tile: support new info op generated by compiler This just syncs the backtracing support in the kernel to the upstream backtrace library. Signed-off-by: Chris Metcalf --- arch/tile/kernel/backtrace.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'arch/tile/kernel') diff --git a/arch/tile/kernel/backtrace.c b/arch/tile/kernel/backtrace.c index d3c41c1ff6bd..55a6a74974b4 100644 --- a/arch/tile/kernel/backtrace.c +++ b/arch/tile/kernel/backtrace.c @@ -369,6 +369,10 @@ static void find_caller_pc_and_caller_sp(CallerLocation *location, /* Weird; reserved value, ignore it. */ continue; } + if (info_operand & ENTRY_POINT_INFO_OP) { + /* This info op is ignored by the backtracer. */ + continue; + } /* Skip info ops which are not in the * "one_ago" mode we want right now. -- cgit v1.2.3 From ce7f2a39675ea5e8e27b3f3bb03e5041c7d10412 Mon Sep 17 00:00:00 2001 From: Chris Metcalf Date: Thu, 14 Oct 2010 16:48:00 -0400 Subject: arch/tile: make ptrace() work properly for TILE-Gx COMPAT mode Previously, we tried to pass 64-bit arguments through the "COMPAT" mode 32-bit syscall API, which turned out not to work well. Now we just use straight 32-bit arguments in COMPAT mode, thus requiring individual registers to be read/written with two syscalls. Of course this is uncommon, since usually all the registers are read or written at once. The restructuring applies to all the tile platforms, but is plausibly better than the original code in any case. Signed-off-by: Chris Metcalf --- arch/tile/kernel/compat.c | 4 +-- arch/tile/kernel/ptrace.c | 78 ++++++++++++++++++++++------------------------- 2 files changed, 38 insertions(+), 44 deletions(-) (limited to 'arch/tile/kernel') diff --git a/arch/tile/kernel/compat.c b/arch/tile/kernel/compat.c index 5f24e54bf3c5..77739cdd9462 100644 --- a/arch/tile/kernel/compat.c +++ b/arch/tile/kernel/compat.c @@ -154,8 +154,8 @@ long tile_compat_sys_msgrcv(int msqid, #define compat_sys_fstat64 sys_newfstat #define compat_sys_fstatat64 sys_newfstatat -/* Pass full 64-bit values through ptrace. */ -#define compat_sys_ptrace tile_compat_sys_ptrace +/* The native sys_ptrace dynamically handles compat binaries. */ +#define compat_sys_ptrace sys_ptrace /* Call the trampolines to manage pt_regs where necessary. */ #define compat_sys_execve _compat_sys_execve diff --git a/arch/tile/kernel/ptrace.c b/arch/tile/kernel/ptrace.c index 7161bd03d2fd..5b20c2874d51 100644 --- a/arch/tile/kernel/ptrace.c +++ b/arch/tile/kernel/ptrace.c @@ -31,25 +31,6 @@ void user_disable_single_step(struct task_struct *child) clear_tsk_thread_flag(child, TIF_SINGLESTEP); } -/* - * This routine will put a word on the process's privileged stack. - */ -static void putreg(struct task_struct *task, - unsigned long addr, unsigned long value) -{ - unsigned int regno = addr / sizeof(unsigned long); - struct pt_regs *childregs = task_pt_regs(task); - childregs->regs[regno] = value; - childregs->flags |= PT_FLAGS_RESTORE_REGS; -} - -static unsigned long getreg(struct task_struct *task, unsigned long addr) -{ - unsigned int regno = addr / sizeof(unsigned long); - struct pt_regs *childregs = task_pt_regs(task); - return childregs->regs[regno]; -} - /* * Called by kernel/ptrace.c when detaching.. */ @@ -66,59 +47,72 @@ void ptrace_disable(struct task_struct *child) long arch_ptrace(struct task_struct *child, long request, long addr, long data) { - unsigned long __user *datap; + unsigned long __user *datap = (long __user __force *)data; unsigned long tmp; int i; long ret = -EIO; - -#ifdef CONFIG_COMPAT - if (task_thread_info(current)->status & TS_COMPAT) - data = (u32)data; - if (task_thread_info(child)->status & TS_COMPAT) - addr = (u32)addr; -#endif - datap = (unsigned long __user __force *)data; + unsigned long *childregs; + char *childreg; switch (request) { case PTRACE_PEEKUSR: /* Read register from pt_regs. */ - if (addr & (sizeof(data)-1)) - break; if (addr < 0 || addr >= PTREGS_SIZE) break; - tmp = getreg(child, addr); /* Read register */ - ret = put_user(tmp, datap); + childreg = (char *)task_pt_regs(child) + addr; +#ifdef CONFIG_COMPAT + if (is_compat_task()) { + if (addr & (sizeof(compat_long_t)-1)) + break; + ret = put_user(*(compat_long_t *)childreg, + (compat_long_t __user *)datap); + } else +#endif + { + if (addr & (sizeof(long)-1)) + break; + ret = put_user(*(long *)childreg, datap); + } break; case PTRACE_POKEUSR: /* Write register in pt_regs. */ - if (addr & (sizeof(data)-1)) - break; if (addr < 0 || addr >= PTREGS_SIZE) break; - putreg(child, addr, data); /* Write register */ + childreg = (char *)task_pt_regs(child) + addr; +#ifdef CONFIG_COMPAT + if (is_compat_task()) { + if (addr & (sizeof(compat_long_t)-1)) + break; + *(compat_long_t *)childreg = data; + } else +#endif + { + if (addr & (sizeof(long)-1)) + break; + *(long *)childreg = data; + } ret = 0; break; case PTRACE_GETREGS: /* Get all registers from the child. */ if (!access_ok(VERIFY_WRITE, datap, PTREGS_SIZE)) break; - for (i = 0; i < PTREGS_SIZE; i += sizeof(long)) { - ret = __put_user(getreg(child, i), datap); + childregs = (long *)task_pt_regs(child); + for (i = 0; i < sizeof(struct pt_regs)/sizeof(long); ++i) { + ret = __put_user(childregs[i], &datap[i]); if (ret != 0) break; - datap++; } break; case PTRACE_SETREGS: /* Set all registers in the child. */ if (!access_ok(VERIFY_READ, datap, PTREGS_SIZE)) break; - for (i = 0; i < PTREGS_SIZE; i += sizeof(long)) { - ret = __get_user(tmp, datap); + childregs = (long *)task_pt_regs(child); + for (i = 0; i < sizeof(struct pt_regs)/sizeof(long); ++i) { + ret = __get_user(childregs[i], &datap[i]); if (ret != 0) break; - putreg(child, i, tmp); - datap++; } break; -- cgit v1.2.3 From e18105c128734b1671739ad4d85e216ebec28c61 Mon Sep 17 00:00:00 2001 From: Chris Metcalf Date: Thu, 14 Oct 2010 16:50:26 -0400 Subject: arch/tile: convert a BUG_ON to BUILD_BUG_ON Inspired by Akinobu Mita's cleanup work. Signed-off-by: Chris Metcalf --- arch/tile/kernel/setup.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/tile/kernel') diff --git a/arch/tile/kernel/setup.c b/arch/tile/kernel/setup.c index 6444a2bbe1a4..f3a50e74f9a4 100644 --- a/arch/tile/kernel/setup.c +++ b/arch/tile/kernel/setup.c @@ -311,7 +311,7 @@ static void __init setup_memory(void) #endif /* We are using a char to hold the cpu_2_node[] mapping */ - BUG_ON(MAX_NUMNODES > 127); + BUILD_BUG_ON(MAX_NUMNODES > 127); /* Discover the ranges of memory available to us */ for (i = 0; ; ++i) { -- cgit v1.2.3 From 9b05a69e0534ec70bc94921936ffa05b330507cb Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Wed, 27 Oct 2010 15:33:47 -0700 Subject: ptrace: change signature of arch_ptrace() Fix up the arguments to arch_ptrace() to take account of the fact that @addr and @data are now unsigned long rather than long as of a preceding patch in this series. Signed-off-by: Namhyung Kim Cc: Acked-by: Roland McGrath Acked-by: David Howells Acked-by: Geert Uytterhoeven Acked-by: David S. Miller Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/tile/kernel/ptrace.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) (limited to 'arch/tile/kernel') diff --git a/arch/tile/kernel/ptrace.c b/arch/tile/kernel/ptrace.c index 5b20c2874d51..f634729211d1 100644 --- a/arch/tile/kernel/ptrace.c +++ b/arch/tile/kernel/ptrace.c @@ -45,7 +45,8 @@ void ptrace_disable(struct task_struct *child) clear_tsk_thread_flag(child, TIF_SYSCALL_TRACE); } -long arch_ptrace(struct task_struct *child, long request, long addr, long data) +long arch_ptrace(struct task_struct *child, long request, + unsigned long addr, unsigned long data) { unsigned long __user *datap = (long __user __force *)data; unsigned long tmp; @@ -98,7 +99,8 @@ long arch_ptrace(struct task_struct *child, long request, long addr, long data) if (!access_ok(VERIFY_WRITE, datap, PTREGS_SIZE)) break; childregs = (long *)task_pt_regs(child); - for (i = 0; i < sizeof(struct pt_regs)/sizeof(long); ++i) { + for (i = 0; i < sizeof(struct pt_regs)/sizeof(unsigned long); + ++i) { ret = __put_user(childregs[i], &datap[i]); if (ret != 0) break; @@ -109,7 +111,8 @@ long arch_ptrace(struct task_struct *child, long request, long addr, long data) if (!access_ok(VERIFY_READ, datap, PTREGS_SIZE)) break; childregs = (long *)task_pt_regs(child); - for (i = 0; i < sizeof(struct pt_regs)/sizeof(long); ++i) { + for (i = 0; i < sizeof(struct pt_regs)/sizeof(unsigned long); + ++i) { ret = __get_user(childregs[i], &datap[i]); if (ret != 0) break; -- cgit v1.2.3 From 8c0acac3676103113a2e259291a07c073ac07879 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Wed, 27 Oct 2010 15:34:04 -0700 Subject: ptrace: cleanup arch_ptrace() on tile Remove checking @addr less than 0 because @addr is now unsigned. Signed-off-by: Namhyung Kim Acked-by: Chris Metcalf Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/tile/kernel/ptrace.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'arch/tile/kernel') diff --git a/arch/tile/kernel/ptrace.c b/arch/tile/kernel/ptrace.c index f634729211d1..9cd29884c09f 100644 --- a/arch/tile/kernel/ptrace.c +++ b/arch/tile/kernel/ptrace.c @@ -58,7 +58,7 @@ long arch_ptrace(struct task_struct *child, long request, switch (request) { case PTRACE_PEEKUSR: /* Read register from pt_regs. */ - if (addr < 0 || addr >= PTREGS_SIZE) + if (addr >= PTREGS_SIZE) break; childreg = (char *)task_pt_regs(child) + addr; #ifdef CONFIG_COMPAT @@ -77,7 +77,7 @@ long arch_ptrace(struct task_struct *child, long request, break; case PTRACE_POKEUSR: /* Write register in pt_regs. */ - if (addr < 0 || addr >= PTREGS_SIZE) + if (addr >= PTREGS_SIZE) break; childreg = (char *)task_pt_regs(child) + addr; #ifdef CONFIG_COMPAT -- cgit v1.2.3 From 61d8e11e519ee7912ab59610fba1aaf08e3c1d84 Mon Sep 17 00:00:00 2001 From: Zimny Lech Date: Wed, 27 Oct 2010 15:34:53 -0700 Subject: Remove duplicate includes from many files Signed-off-by: Zimny Lech Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/tile/kernel/setup.c | 2 -- 1 file changed, 2 deletions(-) (limited to 'arch/tile/kernel') diff --git a/arch/tile/kernel/setup.c b/arch/tile/kernel/setup.c index f3a50e74f9a4..ae51cad12da0 100644 --- a/arch/tile/kernel/setup.c +++ b/arch/tile/kernel/setup.c @@ -30,8 +30,6 @@ #include #include #include -#include -#include #include #include #include -- cgit v1.2.3 From 38a6f4266989c4dae68eccb1a5cb4580a48003e4 Mon Sep 17 00:00:00 2001 From: Chris Metcalf Date: Mon, 1 Nov 2010 15:21:35 -0400 Subject: arch/tile: complete migration to new kmap_atomic scheme This change makes KM_TYPE_NR independent of the actual deprecated list of km_type values, which are no longer used in tile code anywhere. For now we leave it set to 8, allowing that many nested mappings, and thus reserving 32MB of address space. A few remaining places using KM_* values were cleaned up as well. Signed-off-by: Chris Metcalf --- arch/tile/kernel/machine_kexec.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'arch/tile/kernel') diff --git a/arch/tile/kernel/machine_kexec.c b/arch/tile/kernel/machine_kexec.c index ba7a265d6179..0d8b9e933487 100644 --- a/arch/tile/kernel/machine_kexec.c +++ b/arch/tile/kernel/machine_kexec.c @@ -182,13 +182,13 @@ static void kexec_find_and_set_command_line(struct kimage *image) if ((entry & IND_SOURCE)) { void *va = - kmap_atomic_pfn(entry >> PAGE_SHIFT, KM_USER0); + kmap_atomic_pfn(entry >> PAGE_SHIFT); r = kexec_bn2cl(va); if (r) { command_line = r; break; } - kunmap_atomic(va, KM_USER0); + kunmap_atomic(va); } } @@ -198,7 +198,7 @@ static void kexec_find_and_set_command_line(struct kimage *image) hverr = hv_set_command_line( (HV_VirtAddr) command_line, strlen(command_line)); - kunmap_atomic(command_line, KM_USER0); + kunmap_atomic(command_line); } else { pr_info("%s: no command line found; making empty\n", __func__); -- cgit v1.2.3 From 5d966115de84c22cd4df029cb00be0e51fab6c10 Mon Sep 17 00:00:00 2001 From: Chris Metcalf Date: Mon, 1 Nov 2010 15:24:29 -0400 Subject: arch/tile: bomb raw_local_irq_ to arch_local_irq_ This completes the tile migration to the new naming scheme for the architecture-specific irq management code. Signed-off-by: Chris Metcalf --- arch/tile/kernel/early_printk.c | 2 +- arch/tile/kernel/hardwall.c | 4 ++-- arch/tile/kernel/irq.c | 4 ++-- arch/tile/kernel/messaging.c | 2 +- arch/tile/kernel/reboot.c | 6 +++--- arch/tile/kernel/setup.c | 8 ++++---- arch/tile/kernel/smp.c | 2 +- arch/tile/kernel/time.c | 8 ++++---- 8 files changed, 18 insertions(+), 18 deletions(-) (limited to 'arch/tile/kernel') diff --git a/arch/tile/kernel/early_printk.c b/arch/tile/kernel/early_printk.c index 2c54fd43a8a0..493a0e66d916 100644 --- a/arch/tile/kernel/early_printk.c +++ b/arch/tile/kernel/early_printk.c @@ -54,7 +54,7 @@ void early_printk(const char *fmt, ...) void early_panic(const char *fmt, ...) { va_list ap; - raw_local_irq_disable_all(); + arch_local_irq_disable_all(); va_start(ap, fmt); early_printk("Kernel panic - not syncing: "); early_vprintk(fmt, ap); diff --git a/arch/tile/kernel/hardwall.c b/arch/tile/kernel/hardwall.c index 1e54a7843410..70b829a15ae5 100644 --- a/arch/tile/kernel/hardwall.c +++ b/arch/tile/kernel/hardwall.c @@ -151,12 +151,12 @@ enum direction_protect { static void enable_firewall_interrupts(void) { - raw_local_irq_unmask_now(INT_UDN_FIREWALL); + arch_local_irq_unmask_now(INT_UDN_FIREWALL); } static void disable_firewall_interrupts(void) { - raw_local_irq_mask_now(INT_UDN_FIREWALL); + arch_local_irq_mask_now(INT_UDN_FIREWALL); } /* Set up hardwall on this cpu based on the passed hardwall_info. */ diff --git a/arch/tile/kernel/irq.c b/arch/tile/kernel/irq.c index e63917687e99..128805ef8f2c 100644 --- a/arch/tile/kernel/irq.c +++ b/arch/tile/kernel/irq.c @@ -26,7 +26,7 @@ #define IS_HW_CLEARED 1 /* - * The set of interrupts we enable for raw_local_irq_enable(). + * The set of interrupts we enable for arch_local_irq_enable(). * This is initialized to have just a single interrupt that the kernel * doesn't actually use as a sentinel. During kernel init, * interrupts are added as the kernel gets prepared to support them. @@ -225,7 +225,7 @@ void __cpuinit setup_irq_regs(void) /* Enable interrupt delivery. */ unmask_irqs(~0UL); #if CHIP_HAS_IPI() - raw_local_irq_unmask(INT_IPI_K); + arch_local_irq_unmask(INT_IPI_K); #endif } diff --git a/arch/tile/kernel/messaging.c b/arch/tile/kernel/messaging.c index 997e3933f726..0858ee6b520f 100644 --- a/arch/tile/kernel/messaging.c +++ b/arch/tile/kernel/messaging.c @@ -34,7 +34,7 @@ void __cpuinit init_messaging(void) panic("hv_register_message_state: error %d", rc); /* Make sure downcall interrupts will be enabled. */ - raw_local_irq_unmask(INT_INTCTRL_K); + arch_local_irq_unmask(INT_INTCTRL_K); } void hv_message_intr(struct pt_regs *regs, int intnum) diff --git a/arch/tile/kernel/reboot.c b/arch/tile/kernel/reboot.c index acd86d20beba..baa3d905fee2 100644 --- a/arch/tile/kernel/reboot.c +++ b/arch/tile/kernel/reboot.c @@ -27,7 +27,7 @@ void machine_halt(void) { warn_early_printk(); - raw_local_irq_disable_all(); + arch_local_irq_disable_all(); smp_send_stop(); hv_halt(); } @@ -35,14 +35,14 @@ void machine_halt(void) void machine_power_off(void) { warn_early_printk(); - raw_local_irq_disable_all(); + arch_local_irq_disable_all(); smp_send_stop(); hv_power_off(); } void machine_restart(char *cmd) { - raw_local_irq_disable_all(); + arch_local_irq_disable_all(); smp_send_stop(); hv_restart((HV_VirtAddr) "vmlinux", (HV_VirtAddr) cmd); } diff --git a/arch/tile/kernel/setup.c b/arch/tile/kernel/setup.c index ae51cad12da0..fb0b3cbeae14 100644 --- a/arch/tile/kernel/setup.c +++ b/arch/tile/kernel/setup.c @@ -868,14 +868,14 @@ void __cpuinit setup_cpu(int boot) /* Allow asynchronous TLB interrupts. */ #if CHIP_HAS_TILE_DMA() - raw_local_irq_unmask(INT_DMATLB_MISS); - raw_local_irq_unmask(INT_DMATLB_ACCESS); + arch_local_irq_unmask(INT_DMATLB_MISS); + arch_local_irq_unmask(INT_DMATLB_ACCESS); #endif #if CHIP_HAS_SN_PROC() - raw_local_irq_unmask(INT_SNITLB_MISS); + arch_local_irq_unmask(INT_SNITLB_MISS); #endif #ifdef __tilegx__ - raw_local_irq_unmask(INT_SINGLE_STEP_K); + arch_local_irq_unmask(INT_SINGLE_STEP_K); #endif /* diff --git a/arch/tile/kernel/smp.c b/arch/tile/kernel/smp.c index 75255d90aff3..9575b37a8b75 100644 --- a/arch/tile/kernel/smp.c +++ b/arch/tile/kernel/smp.c @@ -115,7 +115,7 @@ static void smp_start_cpu_interrupt(void) static void smp_stop_cpu_interrupt(void) { set_cpu_online(smp_processor_id(), 0); - raw_local_irq_disable_all(); + arch_local_irq_disable_all(); for (;;) asm("nap"); } diff --git a/arch/tile/kernel/time.c b/arch/tile/kernel/time.c index 6bed820e1421..f2e156e44692 100644 --- a/arch/tile/kernel/time.c +++ b/arch/tile/kernel/time.c @@ -132,7 +132,7 @@ static int tile_timer_set_next_event(unsigned long ticks, { BUG_ON(ticks > MAX_TICK); __insn_mtspr(SPR_TILE_TIMER_CONTROL, ticks); - raw_local_irq_unmask_now(INT_TILE_TIMER); + arch_local_irq_unmask_now(INT_TILE_TIMER); return 0; } @@ -143,7 +143,7 @@ static int tile_timer_set_next_event(unsigned long ticks, static void tile_timer_set_mode(enum clock_event_mode mode, struct clock_event_device *evt) { - raw_local_irq_mask_now(INT_TILE_TIMER); + arch_local_irq_mask_now(INT_TILE_TIMER); } /* @@ -172,7 +172,7 @@ void __cpuinit setup_tile_timer(void) evt->cpumask = cpumask_of(smp_processor_id()); /* Start out with timer not firing. */ - raw_local_irq_mask_now(INT_TILE_TIMER); + arch_local_irq_mask_now(INT_TILE_TIMER); /* Register tile timer. */ clockevents_register_device(evt); @@ -188,7 +188,7 @@ void do_timer_interrupt(struct pt_regs *regs, int fault_num) * Mask the timer interrupt here, since we are a oneshot timer * and there are now by definition no events pending. */ - raw_local_irq_mask(INT_TILE_TIMER); + arch_local_irq_mask(INT_TILE_TIMER); /* Track time spent here in an interrupt context */ irq_enter(); -- cgit v1.2.3 From 34a89d26bdc4ba46a406fa3842239e921c493d44 Mon Sep 17 00:00:00 2001 From: Chris Metcalf Date: Thu, 28 Oct 2010 15:03:30 -0400 Subject: arch/tile: correct double syscall restart for nested signals This change is modelled on similar fixes for other architectures. The pt_regs "faultnum" member is set to the trap (fault) number that caused us to enter the kernel, and is INT_SWINT_1 for the syscall software interrupt. We already supported a pseudo value, INT_SWINT_1_SIGRETURN, that we used for the rt_sigreturn syscall; it avoided the case where one signal was handled, then we "tail-called" to another handler. This change avoids the similar case where we start to call one handler, then are preempted into another handler when we start trying to run the first handler. We clear ->faultnum after calling handle_signal(), and to be paranoid also in the case where there was no signal to deliver. Signed-off-by: Chris Metcalf --- arch/tile/kernel/signal.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'arch/tile/kernel') diff --git a/arch/tile/kernel/signal.c b/arch/tile/kernel/signal.c index fb28e85ae3ae..704ce0bce833 100644 --- a/arch/tile/kernel/signal.c +++ b/arch/tile/kernel/signal.c @@ -330,7 +330,7 @@ void do_signal(struct pt_regs *regs) current_thread_info()->status &= ~TS_RESTORE_SIGMASK; } - return; + goto done; } /* Did we come from a system call? */ @@ -358,4 +358,8 @@ void do_signal(struct pt_regs *regs) current_thread_info()->status &= ~TS_RESTORE_SIGMASK; sigprocmask(SIG_SETMASK, ¤t->saved_sigmask, NULL); } + +done: + /* Avoid double syscall restart if there are nested signals. */ + regs->faultnum = INT_SWINT_1_SIGRETURN; } -- cgit v1.2.3 From 1deb9c5dfb179819ecdbf80a1d121e26c63caab3 Mon Sep 17 00:00:00 2001 From: Chris Metcalf Date: Thu, 28 Oct 2010 15:47:06 -0400 Subject: arch/tile: don't allow user code to set the PL via ptrace or signal return The kernel was allowing any component of the pt_regs to be updated either by signal handlers writing to the stack, or by processes writing via PTRACE_POKEUSR or PTRACE_SETREGS, which meant they could set their PL up from 0 to 1 and get access to kernel code and data (or, in practice, cause a kernel panic). We now always reset the ex1 field, allowing the user to set their ICS bit only. Signed-off-by: Chris Metcalf --- arch/tile/kernel/ptrace.c | 39 +++++++++++++++++++++------------------ arch/tile/kernel/signal.c | 3 +++ 2 files changed, 24 insertions(+), 18 deletions(-) (limited to 'arch/tile/kernel') diff --git a/arch/tile/kernel/ptrace.c b/arch/tile/kernel/ptrace.c index 9cd29884c09f..e92e40527d6d 100644 --- a/arch/tile/kernel/ptrace.c +++ b/arch/tile/kernel/ptrace.c @@ -50,10 +50,10 @@ long arch_ptrace(struct task_struct *child, long request, { unsigned long __user *datap = (long __user __force *)data; unsigned long tmp; - int i; long ret = -EIO; - unsigned long *childregs; char *childreg; + struct pt_regs copyregs; + int ex1_offset; switch (request) { @@ -80,6 +80,16 @@ long arch_ptrace(struct task_struct *child, long request, if (addr >= PTREGS_SIZE) break; childreg = (char *)task_pt_regs(child) + addr; + + /* Guard against overwrites of the privilege level. */ + ex1_offset = PTREGS_OFFSET_EX1; +#if defined(CONFIG_COMPAT) && defined(__BIG_ENDIAN) + if (is_compat_task()) /* point at low word */ + ex1_offset += sizeof(compat_long_t); +#endif + if (addr == ex1_offset) + data = PL_ICS_EX1(USER_PL, EX1_ICS(data)); + #ifdef CONFIG_COMPAT if (is_compat_task()) { if (addr & (sizeof(compat_long_t)-1)) @@ -96,26 +106,19 @@ long arch_ptrace(struct task_struct *child, long request, break; case PTRACE_GETREGS: /* Get all registers from the child. */ - if (!access_ok(VERIFY_WRITE, datap, PTREGS_SIZE)) - break; - childregs = (long *)task_pt_regs(child); - for (i = 0; i < sizeof(struct pt_regs)/sizeof(unsigned long); - ++i) { - ret = __put_user(childregs[i], &datap[i]); - if (ret != 0) - break; + if (copy_to_user(datap, task_pt_regs(child), + sizeof(struct pt_regs)) == 0) { + ret = 0; } break; case PTRACE_SETREGS: /* Set all registers in the child. */ - if (!access_ok(VERIFY_READ, datap, PTREGS_SIZE)) - break; - childregs = (long *)task_pt_regs(child); - for (i = 0; i < sizeof(struct pt_regs)/sizeof(unsigned long); - ++i) { - ret = __get_user(childregs[i], &datap[i]); - if (ret != 0) - break; + if (copy_from_user(©regs, datap, + sizeof(struct pt_regs)) == 0) { + copyregs.ex1 = + PL_ICS_EX1(USER_PL, EX1_ICS(copyregs.ex1)); + *task_pt_regs(child) = copyregs; + ret = 0; } break; diff --git a/arch/tile/kernel/signal.c b/arch/tile/kernel/signal.c index 704ce0bce833..687719d4abd1 100644 --- a/arch/tile/kernel/signal.c +++ b/arch/tile/kernel/signal.c @@ -71,6 +71,9 @@ int restore_sigcontext(struct pt_regs *regs, for (i = 0; i < sizeof(struct pt_regs)/sizeof(long); ++i) err |= __get_user(regs->regs[i], &sc->gregs[i]); + /* Ensure that the PL is always set to USER_PL. */ + regs->ex1 = PL_ICS_EX1(USER_PL, EX1_ICS(regs->ex1)); + regs->faultnum = INT_SWINT_1_SIGRETURN; err |= __get_user(*pr0, &sc->gregs[0]); -- cgit v1.2.3 From 2c7387ef9969bb073c25ecbdcc5be30770267b16 Mon Sep 17 00:00:00 2001 From: Chris Metcalf Date: Thu, 28 Oct 2010 16:07:07 -0400 Subject: asm-generic/stat.h: support 64-bit file time_t for stat() The existing asm-generic/stat.h specifies st_mtime, etc., as a 32-value, and works well for 32-bit architectures (currently microblaze, score, and 32-bit tile). However, for 64-bit architectures it isn't sufficient to return 32 bits of time_t; this isn't good insurance against the 2037 rollover. (It also makes glibc support less convenient, since we can't use glibc's handy STAT_IS_KERNEL_STAT mode.) This change extends the two "timespec" fields for each of the three atime, mtime, and ctime fields from "int" to "long". As a result, on 32-bit platforms nothing changes, and 64-bit platforms will now work as expected. The only wrinkle is 32-bit userspace under 64-bit kernels taking advantage of COMPAT mode. For these, we leave the "struct stat64" definitions with the "int" versions of the time_t and nsec fields, so that architectures can implement compat_sys_stat64() and friends with sys_stat64(), etc., and get the expected 32-bit structure layout. This requires a field-by-field copy in the kernel, implemented by the code guarded under __ARCH_WANT_STAT64. This does mean that the shape of the "struct stat" and "struct stat64" structures is different on a 64-bit kernel, but only one of the two structures should ever be used by any given process: "struct stat" is meant for 64-bit userspace only, and "struct stat64" for 32-bit userspace only. (On a 32-bit kernel the two structures continue to have the same shape, since "long" is 32 bits.) The alternative is keeping the two structures the same shape on 64-bit kernels, which means a 64-bit time_t in "struct stat64" for 32-bit processes. This is a little unnatural since 32-bit userspace can't do anything with 64 bits of time_t information, since time_t is just "long", not "int64_t"; and in any case 32-bit userspace might expect to be running under a 32-bit kernel, which can't provide the high 32 bits anyway. In the case of a 32-bit kernel we'd then be extending the kernel's 32-bit time_t to 64 bits, then truncating it back to 32 bits again in userspace, for no particular reason. And, as mentioned above, if we have 64-bit time_t for 32-bit processes we can't easily use glibc's STAT_IS_KERNEL_STAT, since glibc's stat structure requires an embedded "struct timespec", which is a pair of "long" (32-bit) values in a 32-bit userspace. "Inventive" solutions are possible, but are pretty hacky. Signed-off-by: Chris Metcalf Acked-by: Arnd Bergmann --- arch/tile/kernel/compat.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'arch/tile/kernel') diff --git a/arch/tile/kernel/compat.c b/arch/tile/kernel/compat.c index 77739cdd9462..67617a05e602 100644 --- a/arch/tile/kernel/compat.c +++ b/arch/tile/kernel/compat.c @@ -148,11 +148,11 @@ long tile_compat_sys_msgrcv(int msqid, #define compat_sys_readahead sys32_readahead #define compat_sys_sync_file_range compat_sys_sync_file_range2 -/* The native 64-bit "struct stat" matches the 32-bit "struct stat64". */ -#define compat_sys_stat64 sys_newstat -#define compat_sys_lstat64 sys_newlstat -#define compat_sys_fstat64 sys_newfstat -#define compat_sys_fstatat64 sys_newfstatat +/* We leverage the "struct stat64" type for 32-bit time_t/nsec. */ +#define compat_sys_stat64 sys_stat64 +#define compat_sys_lstat64 sys_lstat64 +#define compat_sys_fstat64 sys_fstat64 +#define compat_sys_fstatat64 sys_fstatat64 /* The native sys_ptrace dynamically handles compat binaries. */ #define compat_sys_ptrace sys_ptrace -- cgit v1.2.3 From d02db4f8d79c5841ba32b326edb75ea6acd081ca Mon Sep 17 00:00:00 2001 From: Chris Metcalf Date: Mon, 1 Nov 2010 12:46:10 -0400 Subject: arch/tile: mark "hardwall" device as non-seekable Arnd's recent patch series tagged this device with noop_llseek, conservatively. In fact, it should be no_llseek, which we arrange for by opening the device with nonseekable_open(). Signed-off-by: Chris Metcalf --- arch/tile/kernel/hardwall.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/tile/kernel') diff --git a/arch/tile/kernel/hardwall.c b/arch/tile/kernel/hardwall.c index 70b829a15ae5..e910530436e6 100644 --- a/arch/tile/kernel/hardwall.c +++ b/arch/tile/kernel/hardwall.c @@ -768,13 +768,13 @@ static int hardwall_release(struct inode *inode, struct file *file) } static const struct file_operations dev_hardwall_fops = { + .open = nonseekable_open, .unlocked_ioctl = hardwall_ioctl, #ifdef CONFIG_COMPAT .compat_ioctl = hardwall_compat_ioctl, #endif .flush = hardwall_flush, .release = hardwall_release, - .llseek = noop_llseek, }; static struct cdev hardwall_dev; -- cgit v1.2.3 From 451a3c24b0135bce54542009b5fde43846c7cf67 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Wed, 17 Nov 2010 16:26:55 +0100 Subject: BKL: remove extraneous #include The big kernel lock has been removed from all these files at some point, leaving only the #include. Remove this too as a cleanup. Signed-off-by: Arnd Bergmann Signed-off-by: Linus Torvalds --- arch/tile/kernel/compat.c | 1 - arch/tile/kernel/compat_signal.c | 1 - arch/tile/kernel/signal.c | 1 - arch/tile/kernel/smpboot.c | 1 - arch/tile/kernel/sys.c | 1 - 5 files changed, 5 deletions(-) (limited to 'arch/tile/kernel') diff --git a/arch/tile/kernel/compat.c b/arch/tile/kernel/compat.c index 67617a05e602..dbc213adf5e1 100644 --- a/arch/tile/kernel/compat.c +++ b/arch/tile/kernel/compat.c @@ -21,7 +21,6 @@ #include #include #include -#include #include #include #include diff --git a/arch/tile/kernel/compat_signal.c b/arch/tile/kernel/compat_signal.c index fb64b99959d4..543d6a33aa26 100644 --- a/arch/tile/kernel/compat_signal.c +++ b/arch/tile/kernel/compat_signal.c @@ -15,7 +15,6 @@ #include #include #include -#include #include #include #include diff --git a/arch/tile/kernel/signal.c b/arch/tile/kernel/signal.c index 687719d4abd1..757407e36696 100644 --- a/arch/tile/kernel/signal.c +++ b/arch/tile/kernel/signal.c @@ -16,7 +16,6 @@ #include #include #include -#include #include #include #include diff --git a/arch/tile/kernel/smpboot.c b/arch/tile/kernel/smpboot.c index 74d62d098edf..b949edcec200 100644 --- a/arch/tile/kernel/smpboot.c +++ b/arch/tile/kernel/smpboot.c @@ -18,7 +18,6 @@ #include #include #include -#include #include #include #include diff --git a/arch/tile/kernel/sys.c b/arch/tile/kernel/sys.c index 7e764669a022..e2187d24a9b4 100644 --- a/arch/tile/kernel/sys.c +++ b/arch/tile/kernel/sys.c @@ -20,7 +20,6 @@ #include #include #include -#include #include #include #include -- cgit v1.2.3 From f02cbbe657939489347cbda598401a56913ffcbd Mon Sep 17 00:00:00 2001 From: Chris Metcalf Date: Tue, 2 Nov 2010 12:05:10 -0400 Subject: pci root complex: support for tile architecture This change enables PCI root complex support for TILEPro. Unlike TILE-Gx, TILEPro has no support for memory-mapped I/O, so the PCI support consists of hypervisor upcalls for PIO, DMA, etc. However, the performance is fine for the devices we have tested with so far (1Gb Ethernet, SATA, etc.). The header was tweaked to be a little bit more aggressive about disabling attempts to map/unmap IO port space. The hacky header was rolled into the header and the result was simplified. Both of the latter two headers were preliminary versions not meant for release before now - oh well. There is one quirk for our TILEmpower platform, which accidentally negotiates up to 5GT and needs to be kicked down to 2.5GT. Signed-off-by: Chris Metcalf --- arch/tile/kernel/Makefile | 1 + arch/tile/kernel/pci.c | 621 ++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 622 insertions(+) create mode 100644 arch/tile/kernel/pci.c (limited to 'arch/tile/kernel') diff --git a/arch/tile/kernel/Makefile b/arch/tile/kernel/Makefile index 112b1e248f05..b4c8e8ec45dc 100644 --- a/arch/tile/kernel/Makefile +++ b/arch/tile/kernel/Makefile @@ -15,3 +15,4 @@ obj-$(CONFIG_SMP) += smpboot.o smp.o tlb.o obj-$(CONFIG_MODULES) += module.o obj-$(CONFIG_EARLY_PRINTK) += early_printk.o obj-$(CONFIG_KEXEC) += machine_kexec.o relocate_kernel.o +obj-$(CONFIG_PCI) += pci.o diff --git a/arch/tile/kernel/pci.c b/arch/tile/kernel/pci.c new file mode 100644 index 000000000000..a1ee25be9ad9 --- /dev/null +++ b/arch/tile/kernel/pci.c @@ -0,0 +1,621 @@ +/* + * Copyright 2010 Tilera Corporation. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, version 2. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or + * NON INFRINGEMENT. See the GNU General Public License for + * more details. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include + + +/* + * Initialization flow and process + * ------------------------------- + * + * This files containes the routines to search for PCI buses, + * enumerate the buses, and configure any attached devices. + * + * There are two entry points here: + * 1) tile_pci_init + * This sets up the pci_controller structs, and opens the + * FDs to the hypervisor. This is called from setup_arch() early + * in the boot process. + * 2) pcibios_init + * This probes the PCI bus(es) for any attached hardware. It's + * called by subsys_initcall. All of the real work is done by the + * generic Linux PCI layer. + * + */ + +/* + * This flag tells if the platform is TILEmpower that needs + * special configuration for the PLX switch chip. + */ +int __write_once tile_plx_gen1; + +static struct pci_controller controllers[TILE_NUM_PCIE]; +static int num_controllers; + +static struct pci_ops tile_cfg_ops; + + +/* + * We don't need to worry about the alignment of resources. + */ +resource_size_t pcibios_align_resource(void *data, const struct resource *res, + resource_size_t size, resource_size_t align) +{ + return res->start; +} +EXPORT_SYMBOL(pcibios_align_resource); + +/* + * Open a FD to the hypervisor PCI device. + * + * controller_id is the controller number, config type is 0 or 1 for + * config0 or config1 operations. + */ +static int __init tile_pcie_open(int controller_id, int config_type) +{ + char filename[32]; + int fd; + + sprintf(filename, "pcie/%d/config%d", controller_id, config_type); + + fd = hv_dev_open((HV_VirtAddr)filename, 0); + + return fd; +} + + +/* + * Get the IRQ numbers from the HV and set up the handlers for them. + */ +static int __init tile_init_irqs(int controller_id, + struct pci_controller *controller) +{ + char filename[32]; + int fd; + int ret; + int x; + struct pcie_rc_config rc_config; + + sprintf(filename, "pcie/%d/ctl", controller_id); + fd = hv_dev_open((HV_VirtAddr)filename, 0); + if (fd < 0) { + pr_err("PCI: hv_dev_open(%s) failed\n", filename); + return -1; + } + ret = hv_dev_pread(fd, 0, (HV_VirtAddr)(&rc_config), + sizeof(rc_config), PCIE_RC_CONFIG_MASK_OFF); + hv_dev_close(fd); + if (ret != sizeof(rc_config)) { + pr_err("PCI: wanted %zd bytes, got %d\n", + sizeof(rc_config), ret); + return -1; + } + /* Record irq_base so that we can map INTx to IRQ # later. */ + controller->irq_base = rc_config.intr; + + for (x = 0; x < 4; x++) + tile_irq_activate(rc_config.intr + x, + TILE_IRQ_HW_CLEAR); + + if (rc_config.plx_gen1) + controller->plx_gen1 = 1; + + return 0; +} + +/* + * First initialization entry point, called from setup_arch(). + * + * Find valid controllers and fill in pci_controller structs for each + * of them. + * + * Returns the number of controllers discovered. + */ +int __init tile_pci_init(void) +{ + int i; + + pr_info("PCI: Searching for controllers...\n"); + + /* Do any configuration we need before using the PCIe */ + + for (i = 0; i < TILE_NUM_PCIE; i++) { + int hv_cfg_fd0 = -1; + int hv_cfg_fd1 = -1; + int hv_mem_fd = -1; + char name[32]; + struct pci_controller *controller; + + /* + * Open the fd to the HV. If it fails then this + * device doesn't exist. + */ + hv_cfg_fd0 = tile_pcie_open(i, 0); + if (hv_cfg_fd0 < 0) + continue; + hv_cfg_fd1 = tile_pcie_open(i, 1); + if (hv_cfg_fd1 < 0) { + pr_err("PCI: Couldn't open config fd to HV " + "for controller %d\n", i); + goto err_cont; + } + + sprintf(name, "pcie/%d/mem", i); + hv_mem_fd = hv_dev_open((HV_VirtAddr)name, 0); + if (hv_mem_fd < 0) { + pr_err("PCI: Could not open mem fd to HV!\n"); + goto err_cont; + } + + pr_info("PCI: Found PCI controller #%d\n", i); + + controller = &controllers[num_controllers]; + + if (tile_init_irqs(i, controller)) { + pr_err("PCI: Could not initialize " + "IRQs, aborting.\n"); + goto err_cont; + } + + controller->index = num_controllers; + controller->hv_cfg_fd[0] = hv_cfg_fd0; + controller->hv_cfg_fd[1] = hv_cfg_fd1; + controller->hv_mem_fd = hv_mem_fd; + controller->first_busno = 0; + controller->last_busno = 0xff; + controller->ops = &tile_cfg_ops; + + num_controllers++; + continue; + +err_cont: + if (hv_cfg_fd0 >= 0) + hv_dev_close(hv_cfg_fd0); + if (hv_cfg_fd1 >= 0) + hv_dev_close(hv_cfg_fd1); + if (hv_mem_fd >= 0) + hv_dev_close(hv_mem_fd); + continue; + } + + /* + * Before using the PCIe, see if we need to do any platform-specific + * configuration, such as the PLX switch Gen 1 issue on TILEmpower. + */ + for (i = 0; i < num_controllers; i++) { + struct pci_controller *controller = &controllers[i]; + + if (controller->plx_gen1) + tile_plx_gen1 = 1; + } + + return num_controllers; +} + +/* + * (pin - 1) converts from the PCI standard's [1:4] convention to + * a normal [0:3] range. + */ +static int tile_map_irq(struct pci_dev *dev, u8 slot, u8 pin) +{ + struct pci_controller *controller = + (struct pci_controller *)dev->sysdata; + return (pin - 1) + controller->irq_base; +} + + +static void __init fixup_read_and_payload_sizes(void) +{ + struct pci_dev *dev = NULL; + int smallest_max_payload = 0x1; /* Tile maxes out at 256 bytes. */ + int max_read_size = 0x2; /* Limit to 512 byte reads. */ + u16 new_values; + + /* Scan for the smallest maximum payload size. */ + while ((dev = pci_get_device(PCI_ANY_ID, PCI_ANY_ID, dev)) != NULL) { + int pcie_caps_offset; + u32 devcap; + int max_payload; + + pcie_caps_offset = pci_find_capability(dev, PCI_CAP_ID_EXP); + if (pcie_caps_offset == 0) + continue; + + pci_read_config_dword(dev, pcie_caps_offset + PCI_EXP_DEVCAP, + &devcap); + max_payload = devcap & PCI_EXP_DEVCAP_PAYLOAD; + if (max_payload < smallest_max_payload) + smallest_max_payload = max_payload; + } + + /* Now, set the max_payload_size for all devices to that value. */ + new_values = (max_read_size << 12) | (smallest_max_payload << 5); + while ((dev = pci_get_device(PCI_ANY_ID, PCI_ANY_ID, dev)) != NULL) { + int pcie_caps_offset; + u16 devctl; + + pcie_caps_offset = pci_find_capability(dev, PCI_CAP_ID_EXP); + if (pcie_caps_offset == 0) + continue; + + pci_read_config_word(dev, pcie_caps_offset + PCI_EXP_DEVCTL, + &devctl); + devctl &= ~(PCI_EXP_DEVCTL_PAYLOAD | PCI_EXP_DEVCTL_READRQ); + devctl |= new_values; + pci_write_config_word(dev, pcie_caps_offset + PCI_EXP_DEVCTL, + devctl); + } +} + + +/* + * Second PCI initialization entry point, called by subsys_initcall. + * + * The controllers have been set up by the time we get here, by a call to + * tile_pci_init. + */ +static int __init pcibios_init(void) +{ + int i; + + pr_info("PCI: Probing PCI hardware\n"); + + /* + * Delay a bit in case devices aren't ready. Some devices are + * known to require at least 20ms here, but we use a more + * conservative value. + */ + mdelay(250); + + /* Scan all of the recorded PCI controllers. */ + for (i = 0; i < num_controllers; i++) { + struct pci_controller *controller = &controllers[i]; + struct pci_bus *bus; + + pr_info("PCI: initializing controller #%d\n", i); + + /* + * This comes from the generic Linux PCI driver. + * + * It reads the PCI tree for this bus into the Linux + * data structures. + * + * This is inlined in linux/pci.h and calls into + * pci_scan_bus_parented() in probe.c. + */ + bus = pci_scan_bus(0, controller->ops, controller); + controller->root_bus = bus; + controller->last_busno = bus->subordinate; + + } + + /* Do machine dependent PCI interrupt routing */ + pci_fixup_irqs(pci_common_swizzle, tile_map_irq); + + /* + * This comes from the generic Linux PCI driver. + * + * It allocates all of the resources (I/O memory, etc) + * associated with the devices read in above. + */ + + pci_assign_unassigned_resources(); + + /* Configure the max_read_size and max_payload_size values. */ + fixup_read_and_payload_sizes(); + + /* Record the I/O resources in the PCI controller structure. */ + for (i = 0; i < num_controllers; i++) { + struct pci_bus *root_bus = controllers[i].root_bus; + struct pci_bus *next_bus; + struct pci_dev *dev; + + list_for_each_entry(dev, &root_bus->devices, bus_list) { + /* Find the PCI host controller, ie. the 1st bridge. */ + if ((dev->class >> 8) == PCI_CLASS_BRIDGE_PCI && + (PCI_SLOT(dev->devfn) == 0)) { + next_bus = dev->subordinate; + controllers[i].mem_resources[0] = + *next_bus->resource[0]; + controllers[i].mem_resources[1] = + *next_bus->resource[1]; + controllers[i].mem_resources[2] = + *next_bus->resource[2]; + + break; + } + } + + } + + return 0; +} +subsys_initcall(pcibios_init); + +/* + * No bus fixups needed. + */ +void __devinit pcibios_fixup_bus(struct pci_bus *bus) +{ + /* Nothing needs to be done. */ +} + +/* + * This can be called from the generic PCI layer, but doesn't need to + * do anything. + */ +char __devinit *pcibios_setup(char *str) +{ + /* Nothing needs to be done. */ + return str; +} + +/* + * This is called from the generic Linux layer. + */ +void __init pcibios_update_irq(struct pci_dev *dev, int irq) +{ + pci_write_config_byte(dev, PCI_INTERRUPT_LINE, irq); +} + +/* + * Enable memory and/or address decoding, as appropriate, for the + * device described by the 'dev' struct. + * + * This is called from the generic PCI layer, and can be called + * for bridges or endpoints. + */ +int pcibios_enable_device(struct pci_dev *dev, int mask) +{ + u16 cmd, old_cmd; + u8 header_type; + int i; + struct resource *r; + + pci_read_config_byte(dev, PCI_HEADER_TYPE, &header_type); + + pci_read_config_word(dev, PCI_COMMAND, &cmd); + old_cmd = cmd; + if ((header_type & 0x7F) == PCI_HEADER_TYPE_BRIDGE) { + /* + * For bridges, we enable both memory and I/O decoding + * in call cases. + */ + cmd |= PCI_COMMAND_IO; + cmd |= PCI_COMMAND_MEMORY; + } else { + /* + * For endpoints, we enable memory and/or I/O decoding + * only if they have a memory resource of that type. + */ + for (i = 0; i < 6; i++) { + r = &dev->resource[i]; + if (r->flags & IORESOURCE_UNSET) { + pr_err("PCI: Device %s not available " + "because of resource collisions\n", + pci_name(dev)); + return -EINVAL; + } + if (r->flags & IORESOURCE_IO) + cmd |= PCI_COMMAND_IO; + if (r->flags & IORESOURCE_MEM) + cmd |= PCI_COMMAND_MEMORY; + } + } + + /* + * We only write the command if it changed. + */ + if (cmd != old_cmd) + pci_write_config_word(dev, PCI_COMMAND, cmd); + return 0; +} + +void __iomem *pci_iomap(struct pci_dev *dev, int bar, unsigned long max) +{ + unsigned long start = pci_resource_start(dev, bar); + unsigned long len = pci_resource_len(dev, bar); + unsigned long flags = pci_resource_flags(dev, bar); + + if (!len) + return NULL; + if (max && len > max) + len = max; + + if (!(flags & IORESOURCE_MEM)) { + pr_info("PCI: Trying to map invalid resource %#lx\n", flags); + start = 0; + } + + return (void __iomem *)start; +} +EXPORT_SYMBOL(pci_iomap); + + +/**************************************************************** + * + * Tile PCI config space read/write routines + * + ****************************************************************/ + +/* + * These are the normal read and write ops + * These are expanded with macros from pci_bus_read_config_byte() etc. + * + * devfn is the combined PCI slot & function. + * + * offset is in bytes, from the start of config space for the + * specified bus & slot. + */ + +static int __devinit tile_cfg_read(struct pci_bus *bus, + unsigned int devfn, + int offset, + int size, + u32 *val) +{ + struct pci_controller *controller = bus->sysdata; + int busnum = bus->number & 0xff; + int slot = (devfn >> 3) & 0x1f; + int function = devfn & 0x7; + u32 addr; + int config_mode = 1; + + /* + * There is no bridge between the Tile and bus 0, so we + * use config0 to talk to bus 0. + * + * If we're talking to a bus other than zero then we + * must have found a bridge. + */ + if (busnum == 0) { + /* + * We fake an empty slot for (busnum == 0) && (slot > 0), + * since there is only one slot on bus 0. + */ + if (slot) { + *val = 0xFFFFFFFF; + return 0; + } + config_mode = 0; + } + + addr = busnum << 20; /* Bus in 27:20 */ + addr |= slot << 15; /* Slot (device) in 19:15 */ + addr |= function << 12; /* Function is in 14:12 */ + addr |= (offset & 0xFFF); /* byte address in 0:11 */ + + return hv_dev_pread(controller->hv_cfg_fd[config_mode], 0, + (HV_VirtAddr)(val), size, addr); +} + + +/* + * See tile_cfg_read() for relevent comments. + * Note that "val" is the value to write, not a pointer to that value. + */ +static int __devinit tile_cfg_write(struct pci_bus *bus, + unsigned int devfn, + int offset, + int size, + u32 val) +{ + struct pci_controller *controller = bus->sysdata; + int busnum = bus->number & 0xff; + int slot = (devfn >> 3) & 0x1f; + int function = devfn & 0x7; + u32 addr; + int config_mode = 1; + HV_VirtAddr valp = (HV_VirtAddr)&val; + + /* + * For bus 0 slot 0 we use config 0 accesses. + */ + if (busnum == 0) { + /* + * We fake an empty slot for (busnum == 0) && (slot > 0), + * since there is only one slot on bus 0. + */ + if (slot) + return 0; + config_mode = 0; + } + + addr = busnum << 20; /* Bus in 27:20 */ + addr |= slot << 15; /* Slot (device) in 19:15 */ + addr |= function << 12; /* Function is in 14:12 */ + addr |= (offset & 0xFFF); /* byte address in 0:11 */ + +#ifdef __BIG_ENDIAN + /* Point to the correct part of the 32-bit "val". */ + valp += 4 - size; +#endif + + return hv_dev_pwrite(controller->hv_cfg_fd[config_mode], 0, + valp, size, addr); +} + + +static struct pci_ops tile_cfg_ops = { + .read = tile_cfg_read, + .write = tile_cfg_write, +}; + + +/* + * In the following, each PCI controller's mem_resources[1] + * represents its (non-prefetchable) PCI memory resource. + * mem_resources[0] and mem_resources[2] refer to its PCI I/O and + * prefetchable PCI memory resources, respectively. + * For more details, see pci_setup_bridge() in setup-bus.c. + * By comparing the target PCI memory address against the + * end address of controller 0, we can determine the controller + * that should accept the PCI memory access. + */ +#define TILE_READ(size, type) \ +type _tile_read##size(unsigned long addr) \ +{ \ + type val; \ + int idx = 0; \ + if (addr > controllers[0].mem_resources[1].end && \ + addr > controllers[0].mem_resources[2].end) \ + idx = 1; \ + if (hv_dev_pread(controllers[idx].hv_mem_fd, 0, \ + (HV_VirtAddr)(&val), sizeof(type), addr)) \ + pr_err("PCI: read %zd bytes at 0x%lX failed\n", \ + sizeof(type), addr); \ + return val; \ +} \ +EXPORT_SYMBOL(_tile_read##size) + +TILE_READ(b, u8); +TILE_READ(w, u16); +TILE_READ(l, u32); +TILE_READ(q, u64); + +#define TILE_WRITE(size, type) \ +void _tile_write##size(type val, unsigned long addr) \ +{ \ + int idx = 0; \ + if (addr > controllers[0].mem_resources[1].end && \ + addr > controllers[0].mem_resources[2].end) \ + idx = 1; \ + if (hv_dev_pwrite(controllers[idx].hv_mem_fd, 0, \ + (HV_VirtAddr)(&val), sizeof(type), addr)) \ + pr_err("PCI: write %zd bytes at 0x%lX failed\n", \ + sizeof(type), addr); \ +} \ +EXPORT_SYMBOL(_tile_write##size) + +TILE_WRITE(b, u8); +TILE_WRITE(w, u16); +TILE_WRITE(l, u32); +TILE_WRITE(q, u64); -- cgit v1.2.3 From 4d658d13c90f14cf3510ca15cafe2f4aa9e23d64 Mon Sep 17 00:00:00 2001 From: Chris Metcalf Date: Wed, 24 Nov 2010 13:42:15 -0500 Subject: arch/tile: make glibc's sysconf(_SC_NPROCESSORS_CONF) work correctly glibc assumes that it can count /sys/devices/system/cpu/cpu* to get the number of configured cpus. For this to be valid on tile, we need to generate a "cpu" entry for all cpus, including the ones that are not currently allocated for Linux's use. Signed-off-by: Chris Metcalf --- arch/tile/kernel/setup.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/tile/kernel') diff --git a/arch/tile/kernel/setup.c b/arch/tile/kernel/setup.c index fb0b3cbeae14..f18573643ed1 100644 --- a/arch/tile/kernel/setup.c +++ b/arch/tile/kernel/setup.c @@ -840,7 +840,7 @@ static int __init topology_init(void) for_each_online_node(i) register_one_node(i); - for_each_present_cpu(i) + for (i = 0; i < smp_height * smp_width; ++i) register_cpu(&cpu_devices[i], i); return 0; -- cgit v1.2.3 From bc4cf2bb271b2d557fc510426755da786fc985be Mon Sep 17 00:00:00 2001 From: Chris Metcalf Date: Tue, 14 Dec 2010 15:57:49 -0500 Subject: arch/tile: handle CLONE_SETTLS in copy_thread(), not user space Previously we were just setting up the "tp" register in the new task as started by clone() in libc. However, this is not quite right, since in principle a signal might be delivered to the new task before it had its TLS set up. (Of course, this race window still exists for resetting the libc getpid() cached value in the new task, in principle. But in any case, we are now doing this exactly the way all other architectures do it.) This change is important for 2.6.37 since the tile glibc we will be submitting upstream will not set TLS in user space any more, so it will only work on a kernel that has this fix. It should also be taken for 2.6.36.x in the stable tree if possible. Signed-off-by: Chris Metcalf Cc: stable --- arch/tile/kernel/process.c | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'arch/tile/kernel') diff --git a/arch/tile/kernel/process.c b/arch/tile/kernel/process.c index 8430f45daea6..e90eb53173b0 100644 --- a/arch/tile/kernel/process.c +++ b/arch/tile/kernel/process.c @@ -211,6 +211,13 @@ int copy_thread(unsigned long clone_flags, unsigned long sp, childregs->regs[0] = 0; /* return value is zero */ childregs->sp = sp; /* override with new user stack pointer */ + /* + * If CLONE_SETTLS is set, set "tp" in the new task to "r4", + * which is passed in as arg #5 to sys_clone(). + */ + if (clone_flags & CLONE_SETTLS) + childregs->tp = regs->regs[4]; + /* * Copy the callee-saved registers from the passed pt_regs struct * into the context-switch callee-saved registers area. @@ -539,6 +546,7 @@ struct task_struct *__sched _switch_to(struct task_struct *prev, return __switch_to(prev, next, next_current_ksp0(next)); } +/* Note there is an implicit fifth argument if (clone_flags & CLONE_SETTLS). */ SYSCALL_DEFINE5(clone, unsigned long, clone_flags, unsigned long, newsp, void __user *, parent_tidptr, void __user *, child_tidptr, struct pt_regs *, regs) -- cgit v1.2.3 From 81711cee933599fa114abb0d258d8bbabef8adfb Mon Sep 17 00:00:00 2001 From: Chris Metcalf Date: Tue, 14 Dec 2010 16:07:25 -0500 Subject: arch/tile: handle rt_sigreturn() more cleanly The current tile rt_sigreturn() syscall pattern uses the common idiom of loading up pt_regs with all the saved registers from the time of the signal, then anticipating the fact that we will clobber the ABI "return value" register (r0) as we return from the syscall by setting the rt_sigreturn return value to whatever random value was in the pt_regs for r0. However, this breaks in our 64-bit kernel when running "compat" tasks, since we always sign-extend the "return value" register to properly handle returned pointers that are in the upper 2GB of the 32-bit compat address space. Doing this to the sigreturn path then causes occasional random corruption of the 64-bit r0 register. Instead, we stop doing the crazy "load the return-value register" hack in sigreturn. We already have some sigreturn-specific assembly code that we use to pass the pt_regs pointer to C code. We extend that code to also set the link register to point to a spot a few instructions after the usual syscall return address so we don't clobber the saved r0. Now it no longer matters what the rt_sigreturn syscall returns, and the pt_regs structure can be cleanly and completely reloaded. Signed-off-by: Chris Metcalf --- arch/tile/kernel/compat_signal.c | 6 +++--- arch/tile/kernel/intvec_32.S | 24 +++++++++++++++++++++--- arch/tile/kernel/signal.c | 10 ++++------ 3 files changed, 28 insertions(+), 12 deletions(-) (limited to 'arch/tile/kernel') diff --git a/arch/tile/kernel/compat_signal.c b/arch/tile/kernel/compat_signal.c index 543d6a33aa26..dbb0dfc7bece 100644 --- a/arch/tile/kernel/compat_signal.c +++ b/arch/tile/kernel/compat_signal.c @@ -290,12 +290,12 @@ long compat_sys_sigaltstack(const struct compat_sigaltstack __user *uss_ptr, return ret; } +/* The assembly shim for this function arranges to ignore the return value. */ long compat_sys_rt_sigreturn(struct pt_regs *regs) { struct compat_rt_sigframe __user *frame = (struct compat_rt_sigframe __user *) compat_ptr(regs->sp); sigset_t set; - long r0; if (!access_ok(VERIFY_READ, frame, sizeof(*frame))) goto badframe; @@ -308,13 +308,13 @@ long compat_sys_rt_sigreturn(struct pt_regs *regs) recalc_sigpending(); spin_unlock_irq(¤t->sighand->siglock); - if (restore_sigcontext(regs, &frame->uc.uc_mcontext, &r0)) + if (restore_sigcontext(regs, &frame->uc.uc_mcontext)) goto badframe; if (compat_sys_sigaltstack(&frame->uc.uc_stack, NULL, regs) != 0) goto badframe; - return r0; + return 0; badframe: force_sig(SIGSEGV, current); diff --git a/arch/tile/kernel/intvec_32.S b/arch/tile/kernel/intvec_32.S index f5821626247f..5eed4a02bf62 100644 --- a/arch/tile/kernel/intvec_32.S +++ b/arch/tile/kernel/intvec_32.S @@ -1342,8 +1342,8 @@ handle_syscall: lw r20, r20 /* Jump to syscall handler. */ - jalr r20; .Lhandle_syscall_link: - FEEDBACK_REENTER(handle_syscall) + jalr r20 +.Lhandle_syscall_link: /* value of "lr" after "jalr r20" above */ /* * Write our r0 onto the stack so it gets restored instead @@ -1352,6 +1352,9 @@ handle_syscall: PTREGS_PTR(r29, PTREGS_OFFSET_REG(0)) sw r29, r0 +.Lsyscall_sigreturn_skip: + FEEDBACK_REENTER(handle_syscall) + /* Do syscall trace again, if requested. */ lw r30, r31 andi r30, r30, _TIF_SYSCALL_TRACE @@ -1536,9 +1539,24 @@ STD_ENTRY_LOCAL(bad_intr) }; \ STD_ENDPROC(_##x) +/* + * Special-case sigreturn to not write r0 to the stack on return. + * This is technically more efficient, but it also avoids difficulties + * in the 64-bit OS when handling 32-bit compat code, since we must not + * sign-extend r0 for the sigreturn return-value case. + */ +#define PTREGS_SYSCALL_SIGRETURN(x, reg) \ + STD_ENTRY(_##x); \ + addli lr, lr, .Lsyscall_sigreturn_skip - .Lhandle_syscall_link; \ + { \ + PTREGS_PTR(reg, PTREGS_OFFSET_BASE); \ + j x \ + }; \ + STD_ENDPROC(_##x) + PTREGS_SYSCALL(sys_execve, r3) PTREGS_SYSCALL(sys_sigaltstack, r2) -PTREGS_SYSCALL(sys_rt_sigreturn, r0) +PTREGS_SYSCALL_SIGRETURN(sys_rt_sigreturn, r0) PTREGS_SYSCALL(sys_cmpxchg_badaddr, r1) /* Save additional callee-saves to pt_regs, put address in r4 and jump. */ diff --git a/arch/tile/kernel/signal.c b/arch/tile/kernel/signal.c index 757407e36696..1260321155f1 100644 --- a/arch/tile/kernel/signal.c +++ b/arch/tile/kernel/signal.c @@ -52,7 +52,7 @@ SYSCALL_DEFINE3(sigaltstack, const stack_t __user *, uss, */ int restore_sigcontext(struct pt_regs *regs, - struct sigcontext __user *sc, long *pr0) + struct sigcontext __user *sc) { int err = 0; int i; @@ -75,17 +75,15 @@ int restore_sigcontext(struct pt_regs *regs, regs->faultnum = INT_SWINT_1_SIGRETURN; - err |= __get_user(*pr0, &sc->gregs[0]); return err; } -/* sigreturn() returns long since it restores r0 in the interrupted code. */ +/* The assembly shim for this function arranges to ignore the return value. */ SYSCALL_DEFINE1(rt_sigreturn, struct pt_regs *, regs) { struct rt_sigframe __user *frame = (struct rt_sigframe __user *)(regs->sp); sigset_t set; - long r0; if (!access_ok(VERIFY_READ, frame, sizeof(*frame))) goto badframe; @@ -98,13 +96,13 @@ SYSCALL_DEFINE1(rt_sigreturn, struct pt_regs *, regs) recalc_sigpending(); spin_unlock_irq(¤t->sighand->siglock); - if (restore_sigcontext(regs, &frame->uc.uc_mcontext, &r0)) + if (restore_sigcontext(regs, &frame->uc.uc_mcontext)) goto badframe; if (do_sigaltstack(&frame->uc.uc_stack, NULL, regs->sp) == -EFAULT) goto badframe; - return r0; + return 0; badframe: force_sig(SIGSEGV, current); -- cgit v1.2.3