diff options
Diffstat (limited to 'arch/tile/kernel')
28 files changed, 1409 insertions, 731 deletions
diff --git a/arch/tile/kernel/Makefile b/arch/tile/kernel/Makefile index b4dbc057baad..5de99248d8df 100644 --- a/arch/tile/kernel/Makefile +++ b/arch/tile/kernel/Makefile @@ -3,16 +3,15 @@ # extra-y := vmlinux.lds head_$(BITS).o -obj-y := backtrace.o entry.o init_task.o irq.o messaging.o \ +obj-y := backtrace.o entry.o irq.o messaging.o \ pci-dma.o proc.o process.o ptrace.o reboot.o \ setup.o signal.o single_step.o stack.o sys.o sysfs.o time.o traps.o \ intvec_$(BITS).o regs_$(BITS).o tile-desc_$(BITS).o obj-$(CONFIG_HARDWALL) += hardwall.o -obj-$(CONFIG_TILEGX) += futex_64.o obj-$(CONFIG_COMPAT) += compat.o compat_signal.o obj-$(CONFIG_SMP) += smpboot.o smp.o tlb.o obj-$(CONFIG_MODULES) += module.o obj-$(CONFIG_EARLY_PRINTK) += early_printk.o -obj-$(CONFIG_KEXEC) += machine_kexec.o relocate_kernel.o +obj-$(CONFIG_KEXEC) += machine_kexec.o relocate_kernel_$(BITS).o obj-$(CONFIG_PCI) += pci.o diff --git a/arch/tile/kernel/backtrace.c b/arch/tile/kernel/backtrace.c index 9092ce8aa6b4..f8b74ca83b92 100644 --- a/arch/tile/kernel/backtrace.c +++ b/arch/tile/kernel/backtrace.c @@ -14,6 +14,7 @@ #include <linux/kernel.h> #include <linux/string.h> +#include <asm/byteorder.h> #include <asm/backtrace.h> #include <asm/tile-desc.h> #include <arch/abi.h> @@ -336,8 +337,12 @@ static void find_caller_pc_and_caller_sp(CallerLocation *location, bytes_to_prefetch / sizeof(tile_bundle_bits); } - /* Decode the next bundle. */ - bundle.bits = prefetched_bundles[next_bundle++]; + /* + * Decode the next bundle. + * TILE always stores instruction bundles in little-endian + * mode, even when the chip is running in big-endian mode. + */ + bundle.bits = le64_to_cpu(prefetched_bundles[next_bundle++]); bundle.num_insns = parse_insn_tile(bundle.bits, pc, bundle.insns); num_info_ops = bt_get_info_ops(&bundle, info_operands); diff --git a/arch/tile/kernel/compat_signal.c b/arch/tile/kernel/compat_signal.c index 77763ccd5a7d..474571b84085 100644 --- a/arch/tile/kernel/compat_signal.c +++ b/arch/tile/kernel/compat_signal.c @@ -118,8 +118,6 @@ struct compat_rt_sigframe { struct compat_ucontext uc; }; -#define _BLOCKABLE (~(sigmask(SIGKILL) | sigmask(SIGSTOP))) - long compat_sys_rt_sigaction(int sig, struct compat_sigaction __user *act, struct compat_sigaction __user *oact, size_t sigsetsize) @@ -302,7 +300,6 @@ long compat_sys_rt_sigreturn(struct pt_regs *regs) if (__copy_from_user(&set, &frame->uc.uc_sigmask, sizeof(set))) goto badframe; - sigdelsetmask(&set, ~_BLOCKABLE); set_current_blocked(&set); if (restore_sigcontext(regs, &frame->uc.uc_mcontext)) @@ -403,19 +400,17 @@ int compat_setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info, * Set up registers for signal handler. * Registers that we don't modify keep the value they had from * user-space at the time we took the signal. + * We always pass siginfo and mcontext, regardless of SA_SIGINFO, + * since some things rely on this (e.g. glibc's debug/segfault.c). */ regs->pc = ptr_to_compat_reg(ka->sa.sa_handler); regs->ex1 = PL_ICS_EX1(USER_PL, 1); /* set crit sec in handler */ regs->sp = ptr_to_compat_reg(frame); regs->lr = restorer; regs->regs[0] = (unsigned long) usig; - - if (ka->sa.sa_flags & SA_SIGINFO) { - /* Need extra arguments, so mark to restore caller-saves. */ - regs->regs[1] = ptr_to_compat_reg(&frame->info); - regs->regs[2] = ptr_to_compat_reg(&frame->uc); - regs->flags |= PT_FLAGS_CALLER_SAVES; - } + regs->regs[1] = ptr_to_compat_reg(&frame->info); + regs->regs[2] = ptr_to_compat_reg(&frame->uc); + regs->flags |= PT_FLAGS_CALLER_SAVES; /* * Notify any tracer that was single-stepping it. diff --git a/arch/tile/kernel/entry.S b/arch/tile/kernel/entry.S index 431e9ae60488..c31637baff28 100644 --- a/arch/tile/kernel/entry.S +++ b/arch/tile/kernel/entry.S @@ -68,23 +68,10 @@ STD_ENTRY(KBacktraceIterator_init_current) jrp lr /* keep backtracer happy */ STD_ENDPROC(KBacktraceIterator_init_current) -/* - * Reset our stack to r1/r2 (sp and ksp0+cpu respectively), then - * free the old stack (passed in r0) and re-invoke cpu_idle(). - * We update sp and ksp0 simultaneously to avoid backtracer warnings. - */ -STD_ENTRY(cpu_idle_on_new_stack) - { - move sp, r1 - mtspr SPR_SYSTEM_SAVE_K_0, r2 - } - jal free_thread_info - j cpu_idle - STD_ENDPROC(cpu_idle_on_new_stack) - /* Loop forever on a nap during SMP boot. */ STD_ENTRY(smp_nap) nap + nop /* avoid provoking the icache prefetch with a jump */ j smp_nap /* we are not architecturally guaranteed not to exit nap */ jrp lr /* clue in the backtracer */ STD_ENDPROC(smp_nap) @@ -99,11 +86,13 @@ STD_ENTRY(smp_nap) */ STD_ENTRY(_cpu_idle) movei r1, 1 + IRQ_ENABLE_LOAD(r2, r3) mtspr INTERRUPT_CRITICAL_SECTION, r1 - IRQ_ENABLE(r2, r3) /* unmask, but still with ICS set */ + IRQ_ENABLE_APPLY(r2, r3) /* unmask, but still with ICS set */ mtspr INTERRUPT_CRITICAL_SECTION, zero .global _cpu_idle_nap _cpu_idle_nap: nap + nop /* avoid provoking the icache prefetch with a jump */ jrp lr STD_ENDPROC(_cpu_idle) diff --git a/arch/tile/kernel/hardwall.c b/arch/tile/kernel/hardwall.c index 8c41891aab34..20273ee37deb 100644 --- a/arch/tile/kernel/hardwall.c +++ b/arch/tile/kernel/hardwall.c @@ -33,59 +33,157 @@ /* - * This data structure tracks the rectangle data, etc., associated - * one-to-one with a "struct file *" from opening HARDWALL_FILE. + * Implement a per-cpu "hardwall" resource class such as UDN or IPI. + * We use "hardwall" nomenclature throughout for historical reasons. + * The lock here controls access to the list data structure as well as + * to the items on the list. + */ +struct hardwall_type { + int index; + int is_xdn; + int is_idn; + int disabled; + const char *name; + struct list_head list; + spinlock_t lock; + struct proc_dir_entry *proc_dir; +}; + +enum hardwall_index { + HARDWALL_UDN = 0, +#ifndef __tilepro__ + HARDWALL_IDN = 1, + HARDWALL_IPI = 2, +#endif + _HARDWALL_TYPES +}; + +static struct hardwall_type hardwall_types[] = { + { /* user-space access to UDN */ + 0, + 1, + 0, + 0, + "udn", + LIST_HEAD_INIT(hardwall_types[HARDWALL_UDN].list), + __SPIN_LOCK_INITIALIZER(hardwall_types[HARDWALL_UDN].lock), + NULL + }, +#ifndef __tilepro__ + { /* user-space access to IDN */ + 1, + 1, + 1, + 1, /* disabled pending hypervisor support */ + "idn", + LIST_HEAD_INIT(hardwall_types[HARDWALL_IDN].list), + __SPIN_LOCK_INITIALIZER(hardwall_types[HARDWALL_IDN].lock), + NULL + }, + { /* access to user-space IPI */ + 2, + 0, + 0, + 0, + "ipi", + LIST_HEAD_INIT(hardwall_types[HARDWALL_IPI].list), + __SPIN_LOCK_INITIALIZER(hardwall_types[HARDWALL_IPI].lock), + NULL + }, +#endif +}; + +/* + * This data structure tracks the cpu data, etc., associated + * one-to-one with a "struct file *" from opening a hardwall device file. * Note that the file's private data points back to this structure. */ struct hardwall_info { - struct list_head list; /* "rectangles" list */ + struct list_head list; /* for hardwall_types.list */ struct list_head task_head; /* head of tasks in this hardwall */ - struct cpumask cpumask; /* cpus in the rectangle */ + struct hardwall_type *type; /* type of this resource */ + struct cpumask cpumask; /* cpus reserved */ + int id; /* integer id for this hardwall */ + int teardown_in_progress; /* are we tearing this one down? */ + + /* Remaining fields only valid for user-network resources. */ int ulhc_x; /* upper left hand corner x coord */ int ulhc_y; /* upper left hand corner y coord */ int width; /* rectangle width */ int height; /* rectangle height */ - int id; /* integer id for this hardwall */ - int teardown_in_progress; /* are we tearing this one down? */ +#if CHIP_HAS_REV1_XDN() + atomic_t xdn_pending_count; /* cores in phase 1 of drain */ +#endif }; -/* Currently allocated hardwall rectangles */ -static LIST_HEAD(rectangles); /* /proc/tile/hardwall */ static struct proc_dir_entry *hardwall_proc_dir; /* Functions to manage files in /proc/tile/hardwall. */ -static void hardwall_add_proc(struct hardwall_info *rect); -static void hardwall_remove_proc(struct hardwall_info *rect); - -/* - * Guard changes to the hardwall data structures. - * This could be finer grained (e.g. one lock for the list of hardwall - * rectangles, then separate embedded locks for each one's list of tasks), - * but there are subtle correctness issues when trying to start with - * a task's "hardwall" pointer and lock the correct rectangle's embedded - * lock in the presence of a simultaneous deactivation, so it seems - * easier to have a single lock, given that none of these data - * structures are touched very frequently during normal operation. - */ -static DEFINE_SPINLOCK(hardwall_lock); +static void hardwall_add_proc(struct hardwall_info *); +static void hardwall_remove_proc(struct hardwall_info *); /* Allow disabling UDN access. */ -static int udn_disabled; static int __init noudn(char *str) { pr_info("User-space UDN access is disabled\n"); - udn_disabled = 1; + hardwall_types[HARDWALL_UDN].disabled = 1; return 0; } early_param("noudn", noudn); +#ifndef __tilepro__ +/* Allow disabling IDN access. */ +static int __init noidn(char *str) +{ + pr_info("User-space IDN access is disabled\n"); + hardwall_types[HARDWALL_IDN].disabled = 1; + return 0; +} +early_param("noidn", noidn); + +/* Allow disabling IPI access. */ +static int __init noipi(char *str) +{ + pr_info("User-space IPI access is disabled\n"); + hardwall_types[HARDWALL_IPI].disabled = 1; + return 0; +} +early_param("noipi", noipi); +#endif + /* - * Low-level primitives + * Low-level primitives for UDN/IDN */ +#ifdef __tilepro__ +#define mtspr_XDN(hwt, name, val) \ + do { (void)(hwt); __insn_mtspr(SPR_UDN_##name, (val)); } while (0) +#define mtspr_MPL_XDN(hwt, name, val) \ + do { (void)(hwt); __insn_mtspr(SPR_MPL_UDN_##name, (val)); } while (0) +#define mfspr_XDN(hwt, name) \ + ((void)(hwt), __insn_mfspr(SPR_UDN_##name)) +#else +#define mtspr_XDN(hwt, name, val) \ + do { \ + if ((hwt)->is_idn) \ + __insn_mtspr(SPR_IDN_##name, (val)); \ + else \ + __insn_mtspr(SPR_UDN_##name, (val)); \ + } while (0) +#define mtspr_MPL_XDN(hwt, name, val) \ + do { \ + if ((hwt)->is_idn) \ + __insn_mtspr(SPR_MPL_IDN_##name, (val)); \ + else \ + __insn_mtspr(SPR_MPL_UDN_##name, (val)); \ + } while (0) +#define mfspr_XDN(hwt, name) \ + ((hwt)->is_idn ? __insn_mfspr(SPR_IDN_##name) : __insn_mfspr(SPR_UDN_##name)) +#endif + /* Set a CPU bit if the CPU is online. */ #define cpu_online_set(cpu, dst) do { \ if (cpu_online(cpu)) \ @@ -101,7 +199,7 @@ static int contains(struct hardwall_info *r, int x, int y) } /* Compute the rectangle parameters and validate the cpumask. */ -static int setup_rectangle(struct hardwall_info *r, struct cpumask *mask) +static int check_rectangle(struct hardwall_info *r, struct cpumask *mask) { int x, y, cpu, ulhc, lrhc; @@ -114,8 +212,6 @@ static int setup_rectangle(struct hardwall_info *r, struct cpumask *mask) r->ulhc_y = cpu_y(ulhc); r->width = cpu_x(lrhc) - r->ulhc_x + 1; r->height = cpu_y(lrhc) - r->ulhc_y + 1; - cpumask_copy(&r->cpumask, mask); - r->id = ulhc; /* The ulhc cpu id can be the hardwall id. */ /* Width and height must be positive */ if (r->width <= 0 || r->height <= 0) @@ -128,7 +224,7 @@ static int setup_rectangle(struct hardwall_info *r, struct cpumask *mask) return -EINVAL; /* - * Note that offline cpus can't be drained when this UDN + * Note that offline cpus can't be drained when this user network * rectangle eventually closes. We used to detect this * situation and print a warning, but it annoyed users and * they ignored it anyway, so now we just return without a @@ -137,16 +233,6 @@ static int setup_rectangle(struct hardwall_info *r, struct cpumask *mask) return 0; } -/* Do the two given rectangles overlap on any cpu? */ -static int overlaps(struct hardwall_info *a, struct hardwall_info *b) -{ - return a->ulhc_x + a->width > b->ulhc_x && /* A not to the left */ - b->ulhc_x + b->width > a->ulhc_x && /* B not to the left */ - a->ulhc_y + a->height > b->ulhc_y && /* A not above */ - b->ulhc_y + b->height > a->ulhc_y; /* B not above */ -} - - /* * Hardware management of hardwall setup, teardown, trapping, * and enabling/disabling PL0 access to the networks. @@ -157,23 +243,35 @@ enum direction_protect { N_PROTECT = (1 << 0), E_PROTECT = (1 << 1), S_PROTECT = (1 << 2), - W_PROTECT = (1 << 3) + W_PROTECT = (1 << 3), + C_PROTECT = (1 << 4), }; -static void enable_firewall_interrupts(void) +static inline int xdn_which_interrupt(struct hardwall_type *hwt) +{ +#ifndef __tilepro__ + if (hwt->is_idn) + return INT_IDN_FIREWALL; +#endif + return INT_UDN_FIREWALL; +} + +static void enable_firewall_interrupts(struct hardwall_type *hwt) { - arch_local_irq_unmask_now(INT_UDN_FIREWALL); + arch_local_irq_unmask_now(xdn_which_interrupt(hwt)); } -static void disable_firewall_interrupts(void) +static void disable_firewall_interrupts(struct hardwall_type *hwt) { - arch_local_irq_mask_now(INT_UDN_FIREWALL); + arch_local_irq_mask_now(xdn_which_interrupt(hwt)); } /* Set up hardwall on this cpu based on the passed hardwall_info. */ -static void hardwall_setup_ipi_func(void *info) +static void hardwall_setup_func(void *info) { struct hardwall_info *r = info; + struct hardwall_type *hwt = r->type; + int cpu = smp_processor_id(); int x = cpu % smp_width; int y = cpu / smp_width; @@ -187,13 +285,12 @@ static void hardwall_setup_ipi_func(void *info) if (y == r->ulhc_y + r->height - 1) bits |= S_PROTECT; BUG_ON(bits == 0); - __insn_mtspr(SPR_UDN_DIRECTION_PROTECT, bits); - enable_firewall_interrupts(); - + mtspr_XDN(hwt, DIRECTION_PROTECT, bits); + enable_firewall_interrupts(hwt); } /* Set up all cpus on edge of rectangle to enable/disable hardwall SPRs. */ -static void hardwall_setup(struct hardwall_info *r) +static void hardwall_protect_rectangle(struct hardwall_info *r) { int x, y, cpu, delta; struct cpumask rect_cpus; @@ -217,37 +314,50 @@ static void hardwall_setup(struct hardwall_info *r) } /* Then tell all the cpus to set up their protection SPR */ - on_each_cpu_mask(&rect_cpus, hardwall_setup_ipi_func, r, 1); + on_each_cpu_mask(&rect_cpus, hardwall_setup_func, r, 1); } void __kprobes do_hardwall_trap(struct pt_regs* regs, int fault_num) { struct hardwall_info *rect; + struct hardwall_type *hwt; struct task_struct *p; struct siginfo info; - int x, y; int cpu = smp_processor_id(); int found_processes; unsigned long flags; - struct pt_regs *old_regs = set_irq_regs(regs); + irq_enter(); + /* Figure out which network trapped. */ + switch (fault_num) { +#ifndef __tilepro__ + case INT_IDN_FIREWALL: + hwt = &hardwall_types[HARDWALL_IDN]; + break; +#endif + case INT_UDN_FIREWALL: + hwt = &hardwall_types[HARDWALL_UDN]; + break; + default: + BUG(); + } + BUG_ON(hwt->disabled); + /* This tile trapped a network access; find the rectangle. */ - x = cpu % smp_width; - y = cpu / smp_width; - spin_lock_irqsave(&hardwall_lock, flags); - list_for_each_entry(rect, &rectangles, list) { - if (contains(rect, x, y)) + spin_lock_irqsave(&hwt->lock, flags); + list_for_each_entry(rect, &hwt->list, list) { + if (cpumask_test_cpu(cpu, &rect->cpumask)) break; } /* * It shouldn't be possible not to find this cpu on the * rectangle list, since only cpus in rectangles get hardwalled. - * The hardwall is only removed after the UDN is drained. + * The hardwall is only removed after the user network is drained. */ - BUG_ON(&rect->list == &rectangles); + BUG_ON(&rect->list == &hwt->list); /* * If we already started teardown on this hardwall, don't worry; @@ -255,30 +365,32 @@ void __kprobes do_hardwall_trap(struct pt_regs* regs, int fault_num) * to quiesce. */ if (rect->teardown_in_progress) { - pr_notice("cpu %d: detected hardwall violation %#lx" + pr_notice("cpu %d: detected %s hardwall violation %#lx" " while teardown already in progress\n", - cpu, (long) __insn_mfspr(SPR_UDN_DIRECTION_PROTECT)); + cpu, hwt->name, + (long)mfspr_XDN(hwt, DIRECTION_PROTECT)); goto done; } /* * Kill off any process that is activated in this rectangle. * We bypass security to deliver the signal, since it must be - * one of the activated processes that generated the UDN + * one of the activated processes that generated the user network * message that caused this trap, and all the activated * processes shared a single open file so are pretty tightly * bound together from a security point of view to begin with. */ rect->teardown_in_progress = 1; wmb(); /* Ensure visibility of rectangle before notifying processes. */ - pr_notice("cpu %d: detected hardwall violation %#lx...\n", - cpu, (long) __insn_mfspr(SPR_UDN_DIRECTION_PROTECT)); + pr_notice("cpu %d: detected %s hardwall violation %#lx...\n", + cpu, hwt->name, (long)mfspr_XDN(hwt, DIRECTION_PROTECT)); info.si_signo = SIGILL; info.si_errno = 0; info.si_code = ILL_HARDWALL; found_processes = 0; - list_for_each_entry(p, &rect->task_head, thread.hardwall_list) { - BUG_ON(p->thread.hardwall != rect); + list_for_each_entry(p, &rect->task_head, + thread.hardwall[hwt->index].list) { + BUG_ON(p->thread.hardwall[hwt->index].info != rect); if (!(p->flags & PF_EXITING)) { found_processes = 1; pr_notice("hardwall: killing %d\n", p->pid); @@ -289,7 +401,7 @@ void __kprobes do_hardwall_trap(struct pt_regs* regs, int fault_num) pr_notice("hardwall: no associated processes!\n"); done: - spin_unlock_irqrestore(&hardwall_lock, flags); + spin_unlock_irqrestore(&hwt->lock, flags); /* * We have to disable firewall interrupts now, or else when we @@ -298,48 +410,87 @@ void __kprobes do_hardwall_trap(struct pt_regs* regs, int fault_num) * haven't yet drained the network, and that would allow packets * to cross out of the hardwall region. */ - disable_firewall_interrupts(); + disable_firewall_interrupts(hwt); irq_exit(); set_irq_regs(old_regs); } -/* Allow access from user space to the UDN. */ -void grant_network_mpls(void) +/* Allow access from user space to the user network. */ +void grant_hardwall_mpls(struct hardwall_type *hwt) { - __insn_mtspr(SPR_MPL_UDN_ACCESS_SET_0, 1); - __insn_mtspr(SPR_MPL_UDN_AVAIL_SET_0, 1); - __insn_mtspr(SPR_MPL_UDN_COMPLETE_SET_0, 1); - __insn_mtspr(SPR_MPL_UDN_TIMER_SET_0, 1); +#ifndef __tilepro__ + if (!hwt->is_xdn) { + __insn_mtspr(SPR_MPL_IPI_0_SET_0, 1); + return; + } +#endif + mtspr_MPL_XDN(hwt, ACCESS_SET_0, 1); + mtspr_MPL_XDN(hwt, AVAIL_SET_0, 1); + mtspr_MPL_XDN(hwt, COMPLETE_SET_0, 1); + mtspr_MPL_XDN(hwt, TIMER_SET_0, 1); #if !CHIP_HAS_REV1_XDN() - __insn_mtspr(SPR_MPL_UDN_REFILL_SET_0, 1); - __insn_mtspr(SPR_MPL_UDN_CA_SET_0, 1); + mtspr_MPL_XDN(hwt, REFILL_SET_0, 1); + mtspr_MPL_XDN(hwt, CA_SET_0, 1); #endif } -/* Deny access from user space to the UDN. */ -void restrict_network_mpls(void) +/* Deny access from user space to the user network. */ +void restrict_hardwall_mpls(struct hardwall_type *hwt) { - __insn_mtspr(SPR_MPL_UDN_ACCESS_SET_1, 1); - __insn_mtspr(SPR_MPL_UDN_AVAIL_SET_1, 1); - __insn_mtspr(SPR_MPL_UDN_COMPLETE_SET_1, 1); - __insn_mtspr(SPR_MPL_UDN_TIMER_SET_1, 1); +#ifndef __tilepro__ + if (!hwt->is_xdn) { + __insn_mtspr(SPR_MPL_IPI_0_SET_1, 1); + return; + } +#endif + mtspr_MPL_XDN(hwt, ACCESS_SET_1, 1); + mtspr_MPL_XDN(hwt, AVAIL_SET_1, 1); + mtspr_MPL_XDN(hwt, COMPLETE_SET_1, 1); + mtspr_MPL_XDN(hwt, TIMER_SET_1, 1); #if !CHIP_HAS_REV1_XDN() - __insn_mtspr(SPR_MPL_UDN_REFILL_SET_1, 1); - __insn_mtspr(SPR_MPL_UDN_CA_SET_1, 1); + mtspr_MPL_XDN(hwt, REFILL_SET_1, 1); + mtspr_MPL_XDN(hwt, CA_SET_1, 1); #endif } +/* Restrict or deny as necessary for the task we're switching to. */ +void hardwall_switch_tasks(struct task_struct *prev, + struct task_struct *next) +{ + int i; + for (i = 0; i < HARDWALL_TYPES; ++i) { + if (prev->thread.hardwall[i].info != NULL) { + if (next->thread.hardwall[i].info == NULL) + restrict_hardwall_mpls(&hardwall_types[i]); + } else if (next->thread.hardwall[i].info != NULL) { + grant_hardwall_mpls(&hardwall_types[i]); + } + } +} + +/* Does this task have the right to IPI the given cpu? */ +int hardwall_ipi_valid(int cpu) +{ +#ifdef __tilegx__ + struct hardwall_info *info = + current->thread.hardwall[HARDWALL_IPI].info; + return info && cpumask_test_cpu(cpu, &info->cpumask); +#else + return 0; +#endif +} /* - * Code to create, activate, deactivate, and destroy hardwall rectangles. + * Code to create, activate, deactivate, and destroy hardwall resources. */ -/* Create a hardwall for the given rectangle */ -static struct hardwall_info *hardwall_create( - size_t size, const unsigned char __user *bits) +/* Create a hardwall for the given resource */ +static struct hardwall_info *hardwall_create(struct hardwall_type *hwt, + size_t size, + const unsigned char __user *bits) { - struct hardwall_info *iter, *rect; + struct hardwall_info *iter, *info; struct cpumask mask; unsigned long flags; int rc; @@ -370,55 +521,62 @@ static struct hardwall_info *hardwall_create( } } - /* Allocate a new rectangle optimistically. */ - rect = kmalloc(sizeof(struct hardwall_info), + /* Allocate a new hardwall_info optimistically. */ + info = kmalloc(sizeof(struct hardwall_info), GFP_KERNEL | __GFP_ZERO); - if (rect == NULL) + if (info == NULL) return ERR_PTR(-ENOMEM); - INIT_LIST_HEAD(&rect->task_head); + INIT_LIST_HEAD(&info->task_head); + info->type = hwt; /* Compute the rectangle size and validate that it's plausible. */ - rc = setup_rectangle(rect, &mask); - if (rc != 0) { - kfree(rect); - return ERR_PTR(rc); + cpumask_copy(&info->cpumask, &mask); + info->id = find_first_bit(cpumask_bits(&mask), nr_cpumask_bits); + if (hwt->is_xdn) { + rc = check_rectangle(info, &mask); + if (rc != 0) { + kfree(info); + return ERR_PTR(rc); + } } /* Confirm it doesn't overlap and add it to the list. */ - spin_lock_irqsave(&hardwall_lock, flags); - list_for_each_entry(iter, &rectangles, list) { - if (overlaps(iter, rect)) { - spin_unlock_irqrestore(&hardwall_lock, flags); - kfree(rect); + spin_lock_irqsave(&hwt->lock, flags); + list_for_each_entry(iter, &hwt->list, list) { + if (cpumask_intersects(&iter->cpumask, &info->cpumask)) { + spin_unlock_irqrestore(&hwt->lock, flags); + kfree(info); return ERR_PTR(-EBUSY); } } - list_add_tail(&rect->list, &rectangles); - spin_unlock_irqrestore(&hardwall_lock, flags); + list_add_tail(&info->list, &hwt->list); + spin_unlock_irqrestore(&hwt->lock, flags); /* Set up appropriate hardwalling on all affected cpus. */ - hardwall_setup(rect); + if (hwt->is_xdn) + hardwall_protect_rectangle(info); /* Create a /proc/tile/hardwall entry. */ - hardwall_add_proc(rect); + hardwall_add_proc(info); - return rect; + return info; } /* Activate a given hardwall on this cpu for this process. */ -static int hardwall_activate(struct hardwall_info *rect) +static int hardwall_activate(struct hardwall_info *info) { - int cpu, x, y; + int cpu; unsigned long flags; struct task_struct *p = current; struct thread_struct *ts = &p->thread; + struct hardwall_type *hwt; - /* Require a rectangle. */ - if (rect == NULL) + /* Require a hardwall. */ + if (info == NULL) return -ENODATA; - /* Not allowed to activate a rectangle that is being torn down. */ - if (rect->teardown_in_progress) + /* Not allowed to activate a hardwall that is being torn down. */ + if (info->teardown_in_progress) return -EINVAL; /* @@ -428,78 +586,87 @@ static int hardwall_activate(struct hardwall_info *rect) if (cpumask_weight(&p->cpus_allowed) != 1) return -EPERM; - /* Make sure we are bound to a cpu in this rectangle. */ + /* Make sure we are bound to a cpu assigned to this resource. */ cpu = smp_processor_id(); BUG_ON(cpumask_first(&p->cpus_allowed) != cpu); - x = cpu_x(cpu); - y = cpu_y(cpu); - if (!contains(rect, x, y)) + if (!cpumask_test_cpu(cpu, &info->cpumask)) return -EINVAL; /* If we are already bound to this hardwall, it's a no-op. */ - if (ts->hardwall) { - BUG_ON(ts->hardwall != rect); + hwt = info->type; + if (ts->hardwall[hwt->index].info) { + BUG_ON(ts->hardwall[hwt->index].info != info); return 0; } - /* Success! This process gets to use the user networks on this cpu. */ - ts->hardwall = rect; - spin_lock_irqsave(&hardwall_lock, flags); - list_add(&ts->hardwall_list, &rect->task_head); - spin_unlock_irqrestore(&hardwall_lock, flags); - grant_network_mpls(); - printk(KERN_DEBUG "Pid %d (%s) activated for hardwall: cpu %d\n", - p->pid, p->comm, cpu); + /* Success! This process gets to use the resource on this cpu. */ + ts->hardwall[hwt->index].info = info; + spin_lock_irqsave(&hwt->lock, flags); + list_add(&ts->hardwall[hwt->index].list, &info->task_head); + spin_unlock_irqrestore(&hwt->lock, flags); + grant_hardwall_mpls(hwt); + printk(KERN_DEBUG "Pid %d (%s) activated for %s hardwall: cpu %d\n", + p->pid, p->comm, hwt->name, cpu); return 0; } /* - * Deactivate a task's hardwall. Must hold hardwall_lock. + * Deactivate a task's hardwall. Must hold lock for hardwall_type. * This method may be called from free_task(), so we don't want to * rely on too many fields of struct task_struct still being valid. * We assume the cpus_allowed, pid, and comm fields are still valid. */ -static void _hardwall_deactivate(struct task_struct *task) +static void _hardwall_deactivate(struct hardwall_type *hwt, + struct task_struct *task) { struct thread_struct *ts = &task->thread; if (cpumask_weight(&task->cpus_allowed) != 1) { - pr_err("pid %d (%s) releasing networks with" + pr_err("pid %d (%s) releasing %s hardwall with" " an affinity mask containing %d cpus!\n", - task->pid, task->comm, + task->pid, task->comm, hwt->name, cpumask_weight(&task->cpus_allowed)); BUG(); } - BUG_ON(ts->hardwall == NULL); - ts->hardwall = NULL; - list_del(&ts->hardwall_list); + BUG_ON(ts->hardwall[hwt->index].info == NULL); + ts->hardwall[hwt->index].info = NULL; + list_del(&ts->hardwall[hwt->index].list); if (task == current) - restrict_network_mpls(); + restrict_hardwall_mpls(hwt); } /* Deactivate a task's hardwall. */ -int hardwall_deactivate(struct task_struct *task) +static int hardwall_deactivate(struct hardwall_type *hwt, + struct task_struct *task) { unsigned long flags; int activated; - spin_lock_irqsave(&hardwall_lock, flags); - activated = (task->thread.hardwall != NULL); + spin_lock_irqsave(&hwt->lock, flags); + activated = (task->thread.hardwall[hwt->index].info != NULL); if (activated) - _hardwall_deactivate(task); - spin_unlock_irqrestore(&hardwall_lock, flags); + _hardwall_deactivate(hwt, task); + spin_unlock_irqrestore(&hwt->lock, flags); if (!activated) return -EINVAL; - printk(KERN_DEBUG "Pid %d (%s) deactivated for hardwall: cpu %d\n", - task->pid, task->comm, smp_processor_id()); + printk(KERN_DEBUG "Pid %d (%s) deactivated for %s hardwall: cpu %d\n", + task->pid, task->comm, hwt->name, smp_processor_id()); return 0; } -/* Stop a UDN switch before draining the network. */ -static void stop_udn_switch(void *ignored) +void hardwall_deactivate_all(struct task_struct *task) +{ + int i; + for (i = 0; i < HARDWALL_TYPES; ++i) + if (task->thread.hardwall[i].info) + hardwall_deactivate(&hardwall_types[i], task); +} + +/* Stop the switch before draining the network. */ +static void stop_xdn_switch(void *arg) { #if !CHIP_HAS_REV1_XDN() /* Freeze the switch and the demux. */ @@ -507,13 +674,71 @@ static void stop_udn_switch(void *ignored) SPR_UDN_SP_FREEZE__SP_FRZ_MASK | SPR_UDN_SP_FREEZE__DEMUX_FRZ_MASK | SPR_UDN_SP_FREEZE__NON_DEST_EXT_MASK); +#else + /* + * Drop all packets bound for the core or off the edge. + * We rely on the normal hardwall protection setup code + * to have set the low four bits to trigger firewall interrupts, + * and shift those bits up to trigger "drop on send" semantics, + * plus adding "drop on send to core" for all switches. + * In practice it seems the switches latch the DIRECTION_PROTECT + * SPR so they won't start dropping if they're already + * delivering the last message to the core, but it doesn't + * hurt to enable it here. + */ + struct hardwall_type *hwt = arg; + unsigned long protect = mfspr_XDN(hwt, DIRECTION_PROTECT); + mtspr_XDN(hwt, DIRECTION_PROTECT, (protect | C_PROTECT) << 5); #endif } +static void empty_xdn_demuxes(struct hardwall_type *hwt) +{ +#ifndef __tilepro__ + if (hwt->is_idn) { + while (__insn_mfspr(SPR_IDN_DATA_AVAIL) & (1 << 0)) + (void) __tile_idn0_receive(); + while (__insn_mfspr(SPR_IDN_DATA_AVAIL) & (1 << 1)) + (void) __tile_idn1_receive(); + return; + } +#endif + while (__insn_mfspr(SPR_UDN_DATA_AVAIL) & (1 << 0)) + (void) __tile_udn0_receive(); + while (__insn_mfspr(SPR_UDN_DATA_AVAIL) & (1 << 1)) + (void) __tile_udn1_receive(); + while (__insn_mfspr(SPR_UDN_DATA_AVAIL) & (1 << 2)) + (void) __tile_udn2_receive(); + while (__insn_mfspr(SPR_UDN_DATA_AVAIL) & (1 << 3)) + (void) __tile_udn3_receive(); +} + /* Drain all the state from a stopped switch. */ -static void drain_udn_switch(void *ignored) +static void drain_xdn_switch(void *arg) { -#if !CHIP_HAS_REV1_XDN() + struct hardwall_info *info = arg; + struct hardwall_type *hwt = info->type; + +#if CHIP_HAS_REV1_XDN() + /* + * The switches have been configured to drop any messages + * destined for cores (or off the edge of the rectangle). + * But the current message may continue to be delivered, + * so we wait until all the cores have finished any pending + * messages before we stop draining. + */ + int pending = mfspr_XDN(hwt, PENDING); + while (pending--) { + empty_xdn_demuxes(hwt); + if (hwt->is_idn) + __tile_idn_send(0); + else + __tile_udn_send(0); + } + atomic_dec(&info->xdn_pending_count); + while (atomic_read(&info->xdn_pending_count)) + empty_xdn_demuxes(hwt); +#else int i; int from_tile_words, ca_count; @@ -533,15 +758,7 @@ static void drain_udn_switch(void *ignored) (void) __insn_mfspr(SPR_UDN_DEMUX_WRITE_FIFO); /* Empty out demuxes. */ - while (__insn_mfspr(SPR_UDN_DATA_AVAIL) & (1 << 0)) - (void) __tile_udn0_receive(); - while (__insn_mfspr(SPR_UDN_DATA_AVAIL) & (1 << 1)) - (void) __tile_udn1_receive(); - while (__insn_mfspr(SPR_UDN_DATA_AVAIL) & (1 << 2)) - (void) __tile_udn2_receive(); - while (__insn_mfspr(SPR_UDN_DATA_AVAIL) & (1 << 3)) - (void) __tile_udn3_receive(); - BUG_ON((__insn_mfspr(SPR_UDN_DATA_AVAIL) & 0xF) != 0); + empty_xdn_demuxes(hwt); /* Empty out catch all. */ ca_count = __insn_mfspr(SPR_UDN_DEMUX_CA_COUNT); @@ -563,21 +780,25 @@ static void drain_udn_switch(void *ignored) #endif } -/* Reset random UDN state registers at boot up and during hardwall teardown. */ -void reset_network_state(void) +/* Reset random XDN state registers at boot up and during hardwall teardown. */ +static void reset_xdn_network_state(struct hardwall_type *hwt) { -#if !CHIP_HAS_REV1_XDN() - /* Reset UDN coordinates to their standard value */ - unsigned int cpu = smp_processor_id(); - unsigned int x = cpu % smp_width; - unsigned int y = cpu / smp_width; -#endif - - if (udn_disabled) + if (hwt->disabled) return; + /* Clear out other random registers so we have a clean slate. */ + mtspr_XDN(hwt, DIRECTION_PROTECT, 0); + mtspr_XDN(hwt, AVAIL_EN, 0); + mtspr_XDN(hwt, DEADLOCK_TIMEOUT, 0); + #if !CHIP_HAS_REV1_XDN() - __insn_mtspr(SPR_UDN_TILE_COORD, (x << 18) | (y << 7)); + /* Reset UDN coordinates to their standard value */ + { + unsigned int cpu = smp_processor_id(); + unsigned int x = cpu % smp_width; + unsigned int y = cpu / smp_width; + __insn_mtspr(SPR_UDN_TILE_COORD, (x << 18) | (y << 7)); + } /* Set demux tags to predefined values and enable them. */ __insn_mtspr(SPR_UDN_TAG_VALID, 0xf); @@ -585,56 +806,50 @@ void reset_network_state(void) __insn_mtspr(SPR_UDN_TAG_1, (1 << 1)); __insn_mtspr(SPR_UDN_TAG_2, (1 << 2)); __insn_mtspr(SPR_UDN_TAG_3, (1 << 3)); -#endif - /* Clear out other random registers so we have a clean slate. */ - __insn_mtspr(SPR_UDN_AVAIL_EN, 0); - __insn_mtspr(SPR_UDN_DEADLOCK_TIMEOUT, 0); -#if !CHIP_HAS_REV1_XDN() + /* Set other rev0 random registers to a clean state. */ __insn_mtspr(SPR_UDN_REFILL_EN, 0); __insn_mtspr(SPR_UDN_DEMUX_QUEUE_SEL, 0); __insn_mtspr(SPR_UDN_SP_FIFO_SEL, 0); -#endif /* Start the switch and demux. */ -#if !CHIP_HAS_REV1_XDN() __insn_mtspr(SPR_UDN_SP_FREEZE, 0); #endif } -/* Restart a UDN switch after draining. */ -static void restart_udn_switch(void *ignored) +void reset_network_state(void) { - reset_network_state(); - - /* Disable firewall interrupts. */ - __insn_mtspr(SPR_UDN_DIRECTION_PROTECT, 0); - disable_firewall_interrupts(); + reset_xdn_network_state(&hardwall_types[HARDWALL_UDN]); +#ifndef __tilepro__ + reset_xdn_network_state(&hardwall_types[HARDWALL_IDN]); +#endif } -/* Build a struct cpumask containing all valid tiles in bounding rectangle. */ -static void fill_mask(struct hardwall_info *r, struct cpumask *result) +/* Restart an XDN switch after draining. */ +static void restart_xdn_switch(void *arg) { - int x, y, cpu; + struct hardwall_type *hwt = arg; - cpumask_clear(result); +#if CHIP_HAS_REV1_XDN() + /* One last drain step to avoid races with injection and draining. */ + empty_xdn_demuxes(hwt); +#endif - cpu = r->ulhc_y * smp_width + r->ulhc_x; - for (y = 0; y < r->height; ++y, cpu += smp_width - r->width) { - for (x = 0; x < r->width; ++x, ++cpu) - cpu_online_set(cpu, result); - } + reset_xdn_network_state(hwt); + + /* Disable firewall interrupts. */ + disable_firewall_interrupts(hwt); } /* Last reference to a hardwall is gone, so clear the network. */ -static void hardwall_destroy(struct hardwall_info *rect) +static void hardwall_destroy(struct hardwall_info *info) { struct task_struct *task; + struct hardwall_type *hwt; unsigned long flags; - struct cpumask mask; - /* Make sure this file actually represents a rectangle. */ - if (rect == NULL) + /* Make sure this file actually represents a hardwall. */ + if (info == NULL) return; /* @@ -644,39 +859,53 @@ static void hardwall_destroy(struct hardwall_info *rect) * deactivate any remaining tasks before freeing the * hardwall_info object itself. */ - spin_lock_irqsave(&hardwall_lock, flags); - list_for_each_entry(task, &rect->task_head, thread.hardwall_list) - _hardwall_deactivate(task); - spin_unlock_irqrestore(&hardwall_lock, flags); - - /* Drain the UDN. */ - printk(KERN_DEBUG "Clearing hardwall rectangle %dx%d %d,%d\n", - rect->width, rect->height, rect->ulhc_x, rect->ulhc_y); - fill_mask(rect, &mask); - on_each_cpu_mask(&mask, stop_udn_switch, NULL, 1); - on_each_cpu_mask(&mask, drain_udn_switch, NULL, 1); + hwt = info->type; + info->teardown_in_progress = 1; + spin_lock_irqsave(&hwt->lock, flags); + list_for_each_entry(task, &info->task_head, + thread.hardwall[hwt->index].list) + _hardwall_deactivate(hwt, task); + spin_unlock_irqrestore(&hwt->lock, flags); + + if (hwt->is_xdn) { + /* Configure the switches for draining the user network. */ + printk(KERN_DEBUG + "Clearing %s hardwall rectangle %dx%d %d,%d\n", + hwt->name, info->width, info->height, + info->ulhc_x, info->ulhc_y); + on_each_cpu_mask(&info->cpumask, stop_xdn_switch, hwt, 1); + + /* Drain the network. */ +#if CHIP_HAS_REV1_XDN() + atomic_set(&info->xdn_pending_count, + cpumask_weight(&info->cpumask)); + on_each_cpu_mask(&info->cpumask, drain_xdn_switch, info, 0); +#else + on_each_cpu_mask(&info->cpumask, drain_xdn_switch, info, 1); +#endif - /* Restart switch and disable firewall. */ - on_each_cpu_mask(&mask, restart_udn_switch, NULL, 1); + /* Restart switch and disable firewall. */ + on_each_cpu_mask(&info->cpumask, restart_xdn_switch, hwt, 1); + } /* Remove the /proc/tile/hardwall entry. */ - hardwall_remove_proc(rect); - - /* Now free the rectangle from the list. */ - spin_lock_irqsave(&hardwall_lock, flags); - BUG_ON(!list_empty(&rect->task_head)); - list_del(&rect->list); - spin_unlock_irqrestore(&hardwall_lock, flags); - kfree(rect); + hardwall_remove_proc(info); + + /* Now free the hardwall from the list. */ + spin_lock_irqsave(&hwt->lock, flags); + BUG_ON(!list_empty(&info->task_head)); + list_del(&info->list); + spin_unlock_irqrestore(&hwt->lock, flags); + kfree(info); } static int hardwall_proc_show(struct seq_file *sf, void *v) { - struct hardwall_info *rect = sf->private; + struct hardwall_info *info = sf->private; char buf[256]; - int rc = cpulist_scnprintf(buf, sizeof(buf), &rect->cpumask); + int rc = cpulist_scnprintf(buf, sizeof(buf), &info->cpumask); buf[rc++] = '\n'; seq_write(sf, buf, rc); return 0; @@ -695,31 +924,45 @@ static const struct file_operations hardwall_proc_fops = { .release = single_release, }; -static void hardwall_add_proc(struct hardwall_info *rect) +static void hardwall_add_proc(struct hardwall_info *info) { char buf[64]; - snprintf(buf, sizeof(buf), "%d", rect->id); - proc_create_data(buf, 0444, hardwall_proc_dir, - &hardwall_proc_fops, rect); + snprintf(buf, sizeof(buf), "%d", info->id); + proc_create_data(buf, 0444, info->type->proc_dir, + &hardwall_proc_fops, info); } -static void hardwall_remove_proc(struct hardwall_info *rect) +static void hardwall_remove_proc(struct hardwall_info *info) { char buf[64]; - snprintf(buf, sizeof(buf), "%d", rect->id); - remove_proc_entry(buf, hardwall_proc_dir); + snprintf(buf, sizeof(buf), "%d", info->id); + remove_proc_entry(buf, info->type->proc_dir); } int proc_pid_hardwall(struct task_struct *task, char *buffer) { - struct hardwall_info *rect = task->thread.hardwall; - return rect ? sprintf(buffer, "%d\n", rect->id) : 0; + int i; + int n = 0; + for (i = 0; i < HARDWALL_TYPES; ++i) { + struct hardwall_info *info = task->thread.hardwall[i].info; + if (info) + n += sprintf(&buffer[n], "%s: %d\n", + info->type->name, info->id); + } + return n; } void proc_tile_hardwall_init(struct proc_dir_entry *root) { - if (!udn_disabled) - hardwall_proc_dir = proc_mkdir("hardwall", root); + int i; + for (i = 0; i < HARDWALL_TYPES; ++i) { + struct hardwall_type *hwt = &hardwall_types[i]; + if (hwt->disabled) + continue; + if (hardwall_proc_dir == NULL) + hardwall_proc_dir = proc_mkdir("hardwall", root); + hwt->proc_dir = proc_mkdir(hwt->name, hardwall_proc_dir); + } } @@ -729,34 +972,45 @@ void proc_tile_hardwall_init(struct proc_dir_entry *root) static long hardwall_ioctl(struct file *file, unsigned int a, unsigned long b) { - struct hardwall_info *rect = file->private_data; + struct hardwall_info *info = file->private_data; + int minor = iminor(file->f_mapping->host); + struct hardwall_type* hwt; if (_IOC_TYPE(a) != HARDWALL_IOCTL_BASE) return -EINVAL; + BUILD_BUG_ON(HARDWALL_TYPES != _HARDWALL_TYPES); + BUILD_BUG_ON(HARDWALL_TYPES != + sizeof(hardwall_types)/sizeof(hardwall_types[0])); + + if (minor < 0 || minor >= HARDWALL_TYPES) + return -EINVAL; + hwt = &hardwall_types[minor]; + WARN_ON(info && hwt != info->type); + switch (_IOC_NR(a)) { case _HARDWALL_CREATE: - if (udn_disabled) + if (hwt->disabled) return -ENOSYS; - if (rect != NULL) + if (info != NULL) return -EALREADY; - rect = hardwall_create(_IOC_SIZE(a), - (const unsigned char __user *)b); - if (IS_ERR(rect)) - return PTR_ERR(rect); - file->private_data = rect; + info = hardwall_create(hwt, _IOC_SIZE(a), + (const unsigned char __user *)b); + if (IS_ERR(info)) + return PTR_ERR(info); + file->private_data = info; return 0; case _HARDWALL_ACTIVATE: - return hardwall_activate(rect); + return hardwall_activate(info); case _HARDWALL_DEACTIVATE: - if (current->thread.hardwall != rect) + if (current->thread.hardwall[hwt->index].info != info) return -EINVAL; - return hardwall_deactivate(current); + return hardwall_deactivate(hwt, current); case _HARDWALL_GET_ID: - return rect ? rect->id : -EINVAL; + return info ? info->id : -EINVAL; default: return -EINVAL; @@ -775,26 +1029,28 @@ static long hardwall_compat_ioctl(struct file *file, /* The user process closed the file; revoke access to user networks. */ static int hardwall_flush(struct file *file, fl_owner_t owner) { - struct hardwall_info *rect = file->private_data; + struct hardwall_info *info = file->private_data; struct task_struct *task, *tmp; unsigned long flags; - if (rect) { + if (info) { /* * NOTE: if multiple threads are activated on this hardwall * file, the other threads will continue having access to the - * UDN until they are context-switched out and back in again. + * user network until they are context-switched out and back + * in again. * * NOTE: A NULL files pointer means the task is being torn * down, so in that case we also deactivate it. */ - spin_lock_irqsave(&hardwall_lock, flags); - list_for_each_entry_safe(task, tmp, &rect->task_head, - thread.hardwall_list) { + struct hardwall_type *hwt = info->type; + spin_lock_irqsave(&hwt->lock, flags); + list_for_each_entry_safe(task, tmp, &info->task_head, + thread.hardwall[hwt->index].list) { if (task->files == owner || task->files == NULL) - _hardwall_deactivate(task); + _hardwall_deactivate(hwt, task); } - spin_unlock_irqrestore(&hardwall_lock, flags); + spin_unlock_irqrestore(&hwt->lock, flags); } return 0; @@ -824,11 +1080,11 @@ static int __init dev_hardwall_init(void) int rc; dev_t dev; - rc = alloc_chrdev_region(&dev, 0, 1, "hardwall"); + rc = alloc_chrdev_region(&dev, 0, HARDWALL_TYPES, "hardwall"); if (rc < 0) return rc; cdev_init(&hardwall_dev, &dev_hardwall_fops); - rc = cdev_add(&hardwall_dev, dev, 1); + rc = cdev_add(&hardwall_dev, dev, HARDWALL_TYPES); if (rc < 0) return rc; diff --git a/arch/tile/kernel/head_32.S b/arch/tile/kernel/head_32.S index 1a39b7c1c87e..f71bfeeaf1a9 100644 --- a/arch/tile/kernel/head_32.S +++ b/arch/tile/kernel/head_32.S @@ -69,7 +69,7 @@ ENTRY(_start) } { moveli lr, lo16(1f) - move r5, zero + moveli r5, CTX_PAGE_FLAG } { auli lr, lr, ha16(1f) @@ -141,11 +141,11 @@ ENTRY(empty_zero_page) .macro PTE va, cpa, bits1, no_org=0 .ifeq \no_org - .org swapper_pg_dir + HV_L1_INDEX(\va) * HV_PTE_SIZE + .org swapper_pg_dir + PGD_INDEX(\va) * HV_PTE_SIZE .endif .word HV_PTE_PAGE | HV_PTE_DIRTY | HV_PTE_PRESENT | HV_PTE_ACCESSED | \ (HV_PTE_MODE_CACHE_NO_L3 << HV_PTE_INDEX_MODE) - .word (\bits1) | (HV_CPA_TO_PFN(\cpa) << (HV_PTE_INDEX_PFN - 32)) + .word (\bits1) | (HV_CPA_TO_PTFN(\cpa) << (HV_PTE_INDEX_PTFN - 32)) .endm __PAGE_ALIGNED_DATA @@ -166,7 +166,7 @@ ENTRY(swapper_pg_dir) /* The true text VAs are mapped as VA = PA + MEM_SV_INTRPT */ PTE MEM_SV_INTRPT, 0, (1 << (HV_PTE_INDEX_READABLE - 32)) | \ (1 << (HV_PTE_INDEX_EXECUTABLE - 32)) - .org swapper_pg_dir + HV_L1_SIZE + .org swapper_pg_dir + PGDIR_SIZE END(swapper_pg_dir) /* diff --git a/arch/tile/kernel/head_64.S b/arch/tile/kernel/head_64.S index 6bc3a932fe45..f9a2734f7b82 100644 --- a/arch/tile/kernel/head_64.S +++ b/arch/tile/kernel/head_64.S @@ -114,7 +114,7 @@ ENTRY(_start) shl16insli r0, r0, hw0(swapper_pg_dir - PAGE_OFFSET) } { - move r3, zero + moveli r3, CTX_PAGE_FLAG j hv_install_context } 1: @@ -210,19 +210,19 @@ ENTRY(empty_zero_page) .macro PTE cpa, bits1 .quad HV_PTE_PAGE | HV_PTE_DIRTY | HV_PTE_PRESENT | HV_PTE_ACCESSED |\ HV_PTE_GLOBAL | (HV_PTE_MODE_CACHE_NO_L3 << HV_PTE_INDEX_MODE) |\ - (\bits1) | (HV_CPA_TO_PFN(\cpa) << HV_PTE_INDEX_PFN) + (\bits1) | (HV_CPA_TO_PTFN(\cpa) << HV_PTE_INDEX_PTFN) .endm __PAGE_ALIGNED_DATA .align PAGE_SIZE ENTRY(swapper_pg_dir) - .org swapper_pg_dir + HV_L0_INDEX(PAGE_OFFSET) * HV_PTE_SIZE + .org swapper_pg_dir + PGD_INDEX(PAGE_OFFSET) * HV_PTE_SIZE .Lsv_data_pmd: .quad 0 /* PTE temp_data_pmd - PAGE_OFFSET, 0 */ - .org swapper_pg_dir + HV_L0_INDEX(MEM_SV_START) * HV_PTE_SIZE + .org swapper_pg_dir + PGD_INDEX(MEM_SV_START) * HV_PTE_SIZE .Lsv_code_pmd: .quad 0 /* PTE temp_code_pmd - PAGE_OFFSET, 0 */ - .org swapper_pg_dir + HV_L0_SIZE + .org swapper_pg_dir + SIZEOF_PGD END(swapper_pg_dir) .align HV_PAGE_TABLE_ALIGN @@ -233,11 +233,11 @@ ENTRY(temp_data_pmd) * permissions later. */ .set addr, 0 - .rept HV_L1_ENTRIES + .rept PTRS_PER_PMD PTE addr, HV_PTE_READABLE | HV_PTE_WRITABLE - .set addr, addr + HV_PAGE_SIZE_LARGE + .set addr, addr + HPAGE_SIZE .endr - .org temp_data_pmd + HV_L1_SIZE + .org temp_data_pmd + SIZEOF_PMD END(temp_data_pmd) .align HV_PAGE_TABLE_ALIGN @@ -248,11 +248,11 @@ ENTRY(temp_code_pmd) * permissions later. */ .set addr, 0 - .rept HV_L1_ENTRIES + .rept PTRS_PER_PMD PTE addr, HV_PTE_READABLE | HV_PTE_EXECUTABLE - .set addr, addr + HV_PAGE_SIZE_LARGE + .set addr, addr + HPAGE_SIZE .endr - .org temp_code_pmd + HV_L1_SIZE + .org temp_code_pmd + SIZEOF_PMD END(temp_code_pmd) /* diff --git a/arch/tile/kernel/hvglue.lds b/arch/tile/kernel/hvglue.lds index 2b7cd0a659a9..d44c5a67a1ed 100644 --- a/arch/tile/kernel/hvglue.lds +++ b/arch/tile/kernel/hvglue.lds @@ -55,4 +55,5 @@ hv_store_mapping = TEXT_OFFSET + 0x106a0; hv_inquire_realpa = TEXT_OFFSET + 0x106c0; hv_flush_all = TEXT_OFFSET + 0x106e0; hv_get_ipi_pte = TEXT_OFFSET + 0x10700; -hv_glue_internals = TEXT_OFFSET + 0x10720; +hv_set_pte_super_shift = TEXT_OFFSET + 0x10720; +hv_glue_internals = TEXT_OFFSET + 0x10740; diff --git a/arch/tile/kernel/init_task.c b/arch/tile/kernel/init_task.c deleted file mode 100644 index 928b31870669..000000000000 --- a/arch/tile/kernel/init_task.c +++ /dev/null @@ -1,59 +0,0 @@ -/* - * Copyright 2010 Tilera Corporation. All Rights Reserved. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation, version 2. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or - * NON INFRINGEMENT. See the GNU General Public License for - * more details. - */ - -#include <linux/mm.h> -#include <linux/fs.h> -#include <linux/init_task.h> -#include <linux/mqueue.h> -#include <linux/module.h> -#include <linux/start_kernel.h> -#include <linux/uaccess.h> - -static struct signal_struct init_signals = INIT_SIGNALS(init_signals); -static struct sighand_struct init_sighand = INIT_SIGHAND(init_sighand); - -/* - * Initial thread structure. - * - * We need to make sure that this is THREAD_SIZE aligned due to the - * way process stacks are handled. This is done by having a special - * "init_task" linker map entry.. - */ -union thread_union init_thread_union __init_task_data = { - INIT_THREAD_INFO(init_task) -}; - -/* - * Initial task structure. - * - * All other task structs will be allocated on slabs in fork.c - */ -struct task_struct init_task = INIT_TASK(init_task); -EXPORT_SYMBOL(init_task); - -/* - * per-CPU stack and boot info. - */ -DEFINE_PER_CPU(unsigned long, boot_sp) = - (unsigned long)init_stack + THREAD_SIZE; - -#ifdef CONFIG_SMP -DEFINE_PER_CPU(unsigned long, boot_pc) = (unsigned long)start_kernel; -#else -/* - * The variable must be __initdata since it references __init code. - * With CONFIG_SMP it is per-cpu data, which is exempt from validation. - */ -unsigned long __initdata boot_pc = (unsigned long)start_kernel; -#endif diff --git a/arch/tile/kernel/intvec_32.S b/arch/tile/kernel/intvec_32.S index aecc8ed5f39b..6943515100f8 100644 --- a/arch/tile/kernel/intvec_32.S +++ b/arch/tile/kernel/intvec_32.S @@ -799,6 +799,10 @@ handle_interrupt: * This routine takes a boolean in r30 indicating if this is an NMI. * If so, we also expect a boolean in r31 indicating whether to * re-enable the oprofile interrupts. + * + * Note that .Lresume_userspace is jumped to directly in several + * places, and we need to make sure r30 is set correctly in those + * callers as well. */ STD_ENTRY(interrupt_return) /* If we're resuming to kernel space, don't check thread flags. */ @@ -835,6 +839,18 @@ STD_ENTRY(interrupt_return) FEEDBACK_REENTER(interrupt_return) /* + * Use r33 to hold whether we have already loaded the callee-saves + * into ptregs. We don't want to do it twice in this loop, since + * then we'd clobber whatever changes are made by ptrace, etc. + * Get base of stack in r32. + */ + { + GET_THREAD_INFO(r32) + movei r33, 0 + } + +.Lretry_work_pending: + /* * Disable interrupts so as to make sure we don't * miss an interrupt that sets any of the thread flags (like * need_resched or sigpending) between sampling and the iret. @@ -844,9 +860,6 @@ STD_ENTRY(interrupt_return) IRQ_DISABLE(r20, r21) TRACE_IRQS_OFF /* Note: clobbers registers r0-r29 */ - /* Get base of stack in r32; note r30/31 are used as arguments here. */ - GET_THREAD_INFO(r32) - /* Check to see if there is any work to do before returning to user. */ { @@ -862,16 +875,18 @@ STD_ENTRY(interrupt_return) /* * Make sure we have all the registers saved for signal - * handling or single-step. Call out to C code to figure out - * exactly what we need to do for each flag bit, then if - * necessary, reload the flags and recheck. + * handling, notify-resume, or single-step. Call out to C + * code to figure out exactly what we need to do for each flag bit, + * then if necessary, reload the flags and recheck. */ - push_extra_callee_saves r0 { PTREGS_PTR(r0, PTREGS_OFFSET_BASE) - jal do_work_pending + bnz r33, 1f } - bnz r0, .Lresume_userspace + push_extra_callee_saves r0 + movei r33, 1 +1: jal do_work_pending + bnz r0, .Lretry_work_pending /* * In the NMI case we @@ -1176,10 +1191,12 @@ handle_syscall: add r20, r20, tp lw r21, r20 addi r21, r21, 1 - sw r20, r21 + { + sw r20, r21 + GET_THREAD_INFO(r31) + } /* Trace syscalls, if requested. */ - GET_THREAD_INFO(r31) addi r31, r31, THREAD_INFO_FLAGS_OFFSET lw r30, r31 andi r30, r30, _TIF_SYSCALL_TRACE @@ -1237,7 +1254,10 @@ handle_syscall: bzt r30, 1f jal do_syscall_trace FEEDBACK_REENTER(handle_syscall) -1: j .Lresume_userspace /* jump into middle of interrupt_return */ +1: { + movei r30, 0 /* not an NMI */ + j .Lresume_userspace /* jump into middle of interrupt_return */ + } .Linvalid_syscall: /* Report an invalid syscall back to the user program */ @@ -1246,7 +1266,10 @@ handle_syscall: movei r28, -ENOSYS } sw r29, r28 - j .Lresume_userspace /* jump into middle of interrupt_return */ + { + movei r30, 0 /* not an NMI */ + j .Lresume_userspace /* jump into middle of interrupt_return */ + } STD_ENDPROC(handle_syscall) /* Return the address for oprofile to suppress in backtraces. */ @@ -1262,7 +1285,10 @@ STD_ENTRY(ret_from_fork) jal sim_notify_fork jal schedule_tail FEEDBACK_REENTER(ret_from_fork) - j .Lresume_userspace /* jump into middle of interrupt_return */ + { + movei r30, 0 /* not an NMI */ + j .Lresume_userspace /* jump into middle of interrupt_return */ + } STD_ENDPROC(ret_from_fork) /* @@ -1349,7 +1375,10 @@ handle_ill: 3: /* set PC and continue */ lw r26, r24 - sw r28, r26 + { + sw r28, r26 + GET_THREAD_INFO(r0) + } /* * Clear TIF_SINGLESTEP to prevent recursion if we execute an ill. @@ -1357,7 +1386,6 @@ handle_ill: * need to clear it here and can't really impose on all other arches. * So what's another write between friends? */ - GET_THREAD_INFO(r0) addi r1, r0, THREAD_INFO_FLAGS_OFFSET { @@ -1376,7 +1404,10 @@ handle_ill: jal send_sigtrap /* issue a SIGTRAP */ FEEDBACK_REENTER(handle_ill) - j .Lresume_userspace /* jump into middle of interrupt_return */ + { + movei r30, 0 /* not an NMI */ + j .Lresume_userspace /* jump into middle of interrupt_return */ + } .Ldispatch_normal_ill: { diff --git a/arch/tile/kernel/intvec_64.S b/arch/tile/kernel/intvec_64.S index 79c93e10ba27..7c06d597ffd0 100644 --- a/arch/tile/kernel/intvec_64.S +++ b/arch/tile/kernel/intvec_64.S @@ -22,6 +22,7 @@ #include <asm/irqflags.h> #include <asm/asm-offsets.h> #include <asm/types.h> +#include <asm/signal.h> #include <hv/hypervisor.h> #include <arch/abi.h> #include <arch/interrupts.h> @@ -219,7 +220,9 @@ intvec_\vecname: * This routine saves just the first four registers, plus the * stack context so we can do proper backtracing right away, * and defers to handle_interrupt to save the rest. - * The backtracer needs pc, ex1, lr, sp, r52, and faultnum. + * The backtracer needs pc, ex1, lr, sp, r52, and faultnum, + * and needs sp set to its final location at the bottom of + * the stack frame. */ addli r0, r0, PTREGS_OFFSET_LR - (PTREGS_SIZE + KSTK_PTREGS_GAP) wh64 r0 /* cache line 7 */ @@ -449,23 +452,6 @@ intvec_\vecname: push_reg r5, r52 st r52, r4 - /* Load tp with our per-cpu offset. */ -#ifdef CONFIG_SMP - { - mfspr r20, SPR_SYSTEM_SAVE_K_0 - moveli r21, hw2_last(__per_cpu_offset) - } - { - shl16insli r21, r21, hw1(__per_cpu_offset) - bfextu r20, r20, 0, LOG2_THREAD_SIZE-1 - } - shl16insli r21, r21, hw0(__per_cpu_offset) - shl3add r20, r20, r21 - ld tp, r20 -#else - move tp, zero -#endif - /* * If we will be returning to the kernel, we will need to * reset the interrupt masks to the state they had before. @@ -488,6 +474,44 @@ intvec_\vecname: .endif st r21, r32 + /* + * we've captured enough state to the stack (including in + * particular our EX_CONTEXT state) that we can now release + * the interrupt critical section and replace it with our + * standard "interrupts disabled" mask value. This allows + * synchronous interrupts (and profile interrupts) to punch + * through from this point onwards. + * + * It's important that no code before this point touch memory + * other than our own stack (to keep the invariant that this + * is all that gets touched under ICS), and that no code after + * this point reference any interrupt-specific SPR, in particular + * the EX_CONTEXT_K_ values. + */ + .ifc \function,handle_nmi + IRQ_DISABLE_ALL(r20) + .else + IRQ_DISABLE(r20, r21) + .endif + mtspr INTERRUPT_CRITICAL_SECTION, zero + + /* Load tp with our per-cpu offset. */ +#ifdef CONFIG_SMP + { + mfspr r20, SPR_SYSTEM_SAVE_K_0 + moveli r21, hw2_last(__per_cpu_offset) + } + { + shl16insli r21, r21, hw1(__per_cpu_offset) + bfextu r20, r20, 0, LOG2_THREAD_SIZE-1 + } + shl16insli r21, r21, hw0(__per_cpu_offset) + shl3add r20, r20, r21 + ld tp, r20 +#else + move tp, zero +#endif + #ifdef __COLLECT_LINKER_FEEDBACK__ /* * Notify the feedback routines that we were in the @@ -512,21 +536,6 @@ intvec_\vecname: #endif /* - * we've captured enough state to the stack (including in - * particular our EX_CONTEXT state) that we can now release - * the interrupt critical section and replace it with our - * standard "interrupts disabled" mask value. This allows - * synchronous interrupts (and profile interrupts) to punch - * through from this point onwards. - */ - .ifc \function,handle_nmi - IRQ_DISABLE_ALL(r20) - .else - IRQ_DISABLE(r20, r21) - .endif - mtspr INTERRUPT_CRITICAL_SECTION, zero - - /* * Prepare the first 256 stack bytes to be rapidly accessible * without having to fetch the background data. */ @@ -605,6 +614,10 @@ handle_interrupt: * This routine takes a boolean in r30 indicating if this is an NMI. * If so, we also expect a boolean in r31 indicating whether to * re-enable the oprofile interrupts. + * + * Note that .Lresume_userspace is jumped to directly in several + * places, and we need to make sure r30 is set correctly in those + * callers as well. */ STD_ENTRY(interrupt_return) /* If we're resuming to kernel space, don't check thread flags. */ @@ -642,6 +655,20 @@ STD_ENTRY(interrupt_return) FEEDBACK_REENTER(interrupt_return) /* + * Use r33 to hold whether we have already loaded the callee-saves + * into ptregs. We don't want to do it twice in this loop, since + * then we'd clobber whatever changes are made by ptrace, etc. + */ + { + movei r33, 0 + move r32, sp + } + + /* Get base of stack in r32. */ + EXTRACT_THREAD_INFO(r32) + +.Lretry_work_pending: + /* * Disable interrupts so as to make sure we don't * miss an interrupt that sets any of the thread flags (like * need_resched or sigpending) between sampling and the iret. @@ -651,9 +678,6 @@ STD_ENTRY(interrupt_return) IRQ_DISABLE(r20, r21) TRACE_IRQS_OFF /* Note: clobbers registers r0-r29 */ - /* Get base of stack in r32; note r30/31 are used as arguments here. */ - GET_THREAD_INFO(r32) - /* Check to see if there is any work to do before returning to user. */ { @@ -669,16 +693,18 @@ STD_ENTRY(interrupt_return) /* * Make sure we have all the registers saved for signal - * handling or single-step. Call out to C code to figure out + * handling or notify-resume. Call out to C code to figure out * exactly what we need to do for each flag bit, then if * necessary, reload the flags and recheck. */ - push_extra_callee_saves r0 { PTREGS_PTR(r0, PTREGS_OFFSET_BASE) - jal do_work_pending + bnez r33, 1f } - bnez r0, .Lresume_userspace + push_extra_callee_saves r0 + movei r33, 1 +1: jal do_work_pending + bnez r0, .Lretry_work_pending /* * In the NMI case we @@ -718,9 +744,10 @@ STD_ENTRY(interrupt_return) beqzt r30, .Lrestore_regs j 3f 2: TRACE_IRQS_ON + IRQ_ENABLE_LOAD(r20, r21) movei r0, 1 mtspr INTERRUPT_CRITICAL_SECTION, r0 - IRQ_ENABLE(r20, r21) + IRQ_ENABLE_APPLY(r20, r21) beqzt r30, .Lrestore_regs 3: @@ -737,7 +764,6 @@ STD_ENTRY(interrupt_return) * that will save some cycles if this turns out to be a syscall. */ .Lrestore_regs: - FEEDBACK_REENTER(interrupt_return) /* called from elsewhere */ /* * Rotate so we have one high bit and one low bit to test. @@ -963,11 +989,16 @@ handle_syscall: shl16insli r20, r20, hw0(irq_stat + IRQ_CPUSTAT_SYSCALL_COUNT_OFFSET) add r20, r20, tp ld4s r21, r20 - addi r21, r21, 1 - st4 r20, r21 + { + addi r21, r21, 1 + move r31, sp + } + { + st4 r20, r21 + EXTRACT_THREAD_INFO(r31) + } /* Trace syscalls, if requested. */ - GET_THREAD_INFO(r31) addi r31, r31, THREAD_INFO_FLAGS_OFFSET ld r30, r31 andi r30, r30, _TIF_SYSCALL_TRACE @@ -1039,11 +1070,28 @@ handle_syscall: /* Do syscall trace again, if requested. */ ld r30, r31 - andi r30, r30, _TIF_SYSCALL_TRACE - beqzt r30, 1f + andi r0, r30, _TIF_SYSCALL_TRACE + { + andi r0, r30, _TIF_SINGLESTEP + beqzt r0, 1f + } jal do_syscall_trace FEEDBACK_REENTER(handle_syscall) -1: j .Lresume_userspace /* jump into middle of interrupt_return */ + andi r0, r30, _TIF_SINGLESTEP + +1: beqzt r0, 2f + + /* Single stepping -- notify ptrace. */ + { + movei r0, SIGTRAP + jal ptrace_notify + } + FEEDBACK_REENTER(handle_syscall) + +2: { + movei r30, 0 /* not an NMI */ + j .Lresume_userspace /* jump into middle of interrupt_return */ + } .Lcompat_syscall: /* @@ -1077,7 +1125,10 @@ handle_syscall: movei r28, -ENOSYS } st r29, r28 - j .Lresume_userspace /* jump into middle of interrupt_return */ + { + movei r30, 0 /* not an NMI */ + j .Lresume_userspace /* jump into middle of interrupt_return */ + } STD_ENDPROC(handle_syscall) /* Return the address for oprofile to suppress in backtraces. */ @@ -1093,7 +1144,10 @@ STD_ENTRY(ret_from_fork) jal sim_notify_fork jal schedule_tail FEEDBACK_REENTER(ret_from_fork) - j .Lresume_userspace + { + movei r30, 0 /* not an NMI */ + j .Lresume_userspace /* jump into middle of interrupt_return */ + } STD_ENDPROC(ret_from_fork) /* Various stub interrupt handlers and syscall handlers */ @@ -1156,6 +1210,18 @@ int_unalign: push_extra_callee_saves r0 j do_trap +/* Fill the return address stack with nonzero entries. */ +STD_ENTRY(fill_ra_stack) + { + move r0, lr + jal 1f + } +1: jal 2f +2: jal 3f +3: jal 4f +4: jrp r0 + STD_ENDPROC(fill_ra_stack) + /* Include .intrpt1 array of interrupt vectors */ .section ".intrpt1", "ax" @@ -1166,7 +1232,7 @@ int_unalign: #define do_hardwall_trap bad_intr #endif - int_hand INT_MEM_ERROR, MEM_ERROR, bad_intr + int_hand INT_MEM_ERROR, MEM_ERROR, do_trap int_hand INT_SINGLE_STEP_3, SINGLE_STEP_3, bad_intr #if CONFIG_KERNEL_PL == 2 int_hand INT_SINGLE_STEP_2, SINGLE_STEP_2, gx_singlestep_handle @@ -1191,7 +1257,7 @@ int_unalign: int_hand INT_UNALIGN_DATA, UNALIGN_DATA, int_unalign int_hand INT_DTLB_MISS, DTLB_MISS, do_page_fault int_hand INT_DTLB_ACCESS, DTLB_ACCESS, do_page_fault - int_hand INT_IDN_FIREWALL, IDN_FIREWALL, bad_intr + int_hand INT_IDN_FIREWALL, IDN_FIREWALL, do_hardwall_trap int_hand INT_UDN_FIREWALL, UDN_FIREWALL, do_hardwall_trap int_hand INT_TILE_TIMER, TILE_TIMER, do_timer_interrupt int_hand INT_IDN_TIMER, IDN_TIMER, bad_intr diff --git a/arch/tile/kernel/machine_kexec.c b/arch/tile/kernel/machine_kexec.c index 6255f2eab112..f0b54a934712 100644 --- a/arch/tile/kernel/machine_kexec.c +++ b/arch/tile/kernel/machine_kexec.c @@ -31,6 +31,8 @@ #include <asm/pgalloc.h> #include <asm/cacheflush.h> #include <asm/checksum.h> +#include <asm/tlbflush.h> +#include <asm/homecache.h> #include <hv/hypervisor.h> @@ -222,11 +224,22 @@ struct page *kimage_alloc_pages_arch(gfp_t gfp_mask, unsigned int order) return alloc_pages_node(0, gfp_mask, order); } +/* + * Address range in which pa=va mapping is set in setup_quasi_va_is_pa(). + * For tilepro, PAGE_OFFSET is used since this is the largest possbile value + * for tilepro, while for tilegx, we limit it to entire middle level page + * table which we assume has been allocated and is undoubtedly large enough. + */ +#ifndef __tilegx__ +#define QUASI_VA_IS_PA_ADDR_RANGE PAGE_OFFSET +#else +#define QUASI_VA_IS_PA_ADDR_RANGE PGDIR_SIZE +#endif + static void setup_quasi_va_is_pa(void) { - HV_PTE *pgtable; HV_PTE pte; - int i; + unsigned long i; /* * Flush our TLB to prevent conflicts between the previous contents @@ -234,16 +247,22 @@ static void setup_quasi_va_is_pa(void) */ local_flush_tlb_all(); - /* setup VA is PA, at least up to PAGE_OFFSET */ - - pgtable = (HV_PTE *)current->mm->pgd; + /* + * setup VA is PA, at least up to QUASI_VA_IS_PA_ADDR_RANGE. + * Note here we assume that level-1 page table is defined by + * HPAGE_SIZE. + */ pte = hv_pte(_PAGE_KERNEL | _PAGE_HUGE_PAGE); pte = hv_pte_set_mode(pte, HV_PTE_MODE_CACHE_NO_L3); - - for (i = 0; i < pgd_index(PAGE_OFFSET); i++) { + for (i = 0; i < (QUASI_VA_IS_PA_ADDR_RANGE >> HPAGE_SHIFT); i++) { + unsigned long vaddr = i << HPAGE_SHIFT; + pgd_t *pgd = pgd_offset(current->mm, vaddr); + pud_t *pud = pud_offset(pgd, vaddr); + pte_t *ptep = (pte_t *) pmd_offset(pud, vaddr); unsigned long pfn = i << (HPAGE_SHIFT - PAGE_SHIFT); + if (pfn_valid(pfn)) - __set_pte(&pgtable[i], pfn_pte(pfn, pte)); + __set_pte(ptep, pfn_pte(pfn, pte)); } } @@ -251,6 +270,7 @@ static void setup_quasi_va_is_pa(void) void machine_kexec(struct kimage *image) { void *reboot_code_buffer; + pte_t *ptep; void (*rnk)(unsigned long, void *, unsigned long) __noreturn; @@ -266,8 +286,10 @@ void machine_kexec(struct kimage *image) */ homecache_change_page_home(image->control_code_page, 0, smp_processor_id()); - reboot_code_buffer = vmap(&image->control_code_page, 1, 0, - __pgprot(_PAGE_KERNEL | _PAGE_EXECUTABLE)); + reboot_code_buffer = page_address(image->control_code_page); + BUG_ON(reboot_code_buffer == NULL); + ptep = virt_to_pte(NULL, (unsigned long)reboot_code_buffer); + __set_pte(ptep, pte_mkexec(*ptep)); memcpy(reboot_code_buffer, relocate_new_kernel, relocate_new_kernel_size); __flush_icache_range( diff --git a/arch/tile/kernel/module.c b/arch/tile/kernel/module.c index b90ab9925674..001cbfa10ac6 100644 --- a/arch/tile/kernel/module.c +++ b/arch/tile/kernel/module.c @@ -67,6 +67,8 @@ void *module_alloc(unsigned long size) area = __get_vm_area(size, VM_ALLOC, MEM_MODULE_START, MEM_MODULE_END); if (!area) goto error; + area->nr_pages = npages; + area->pages = pages; if (map_vm_area(area, prot_rwx, &pages)) { vunmap(area->addr); @@ -157,7 +159,17 @@ int apply_relocate_add(Elf_Shdr *sechdrs, switch (ELF_R_TYPE(rel[i].r_info)) { -#define MUNGE(func) (*location = ((*location & ~func(-1)) | func(value))) +#ifdef __LITTLE_ENDIAN +# define MUNGE(func) \ + (*location = ((*location & ~func(-1)) | func(value))) +#else +/* + * Instructions are always little-endian, so when we read them as data, + * we have to swap them around before and after modifying them. + */ +# define MUNGE(func) \ + (*location = swab64((swab64(*location) & ~func(-1)) | func(value))) +#endif #ifndef __tilegx__ case R_TILE_32: diff --git a/arch/tile/kernel/pci.c b/arch/tile/kernel/pci.c index a1bb59eecc18..b56d12bf5900 100644 --- a/arch/tile/kernel/pci.c +++ b/arch/tile/kernel/pci.c @@ -141,7 +141,7 @@ static int __devinit tile_init_irqs(int controller_id, * * Returns the number of controllers discovered. */ -int __devinit tile_pci_init(void) +int __init tile_pci_init(void) { int i; @@ -287,7 +287,7 @@ static void __devinit fixup_read_and_payload_sizes(void) * The controllers have been set up by the time we get here, by a call to * tile_pci_init. */ -int __devinit pcibios_init(void) +int __init pcibios_init(void) { int i; diff --git a/arch/tile/kernel/proc.c b/arch/tile/kernel/proc.c index 7a9327046404..dafc447b5125 100644 --- a/arch/tile/kernel/proc.c +++ b/arch/tile/kernel/proc.c @@ -22,6 +22,7 @@ #include <linux/proc_fs.h> #include <linux/sysctl.h> #include <linux/hardirq.h> +#include <linux/hugetlb.h> #include <linux/mman.h> #include <asm/unaligned.h> #include <asm/pgtable.h> @@ -146,7 +147,6 @@ static ctl_table unaligned_table[] = { }, {} }; -#endif static struct ctl_path tile_path[] = { { .procname = "tile" }, @@ -155,10 +155,9 @@ static struct ctl_path tile_path[] = { static int __init proc_sys_tile_init(void) { -#ifndef __tilegx__ /* FIXME: GX: no support for unaligned access yet */ register_sysctl_paths(tile_path, unaligned_table); -#endif return 0; } arch_initcall(proc_sys_tile_init); +#endif diff --git a/arch/tile/kernel/process.c b/arch/tile/kernel/process.c index 30caecac94dc..6be799150501 100644 --- a/arch/tile/kernel/process.c +++ b/arch/tile/kernel/process.c @@ -28,6 +28,7 @@ #include <linux/tracehook.h> #include <linux/signal.h> #include <asm/stack.h> +#include <asm/switch_to.h> #include <asm/homecache.h> #include <asm/syscalls.h> #include <asm/traps.h> @@ -113,27 +114,10 @@ void cpu_idle(void) } } -struct thread_info *alloc_thread_info_node(struct task_struct *task, int node) -{ - struct page *page; - gfp_t flags = GFP_KERNEL; - -#ifdef CONFIG_DEBUG_STACK_USAGE - flags |= __GFP_ZERO; -#endif - - page = alloc_pages_node(node, flags, THREAD_SIZE_ORDER); - if (!page) - return NULL; - - return (struct thread_info *)page_address(page); -} - /* - * Free a thread_info node, and all of its derivative - * data structures. + * Release a thread_info structure */ -void free_thread_info(struct thread_info *info) +void arch_release_thread_info(struct thread_info *info) { struct single_step_state *step_state = info->step_state; @@ -144,10 +128,10 @@ void free_thread_info(struct thread_info *info) * Calling deactivate here just frees up the data structures. * If the task we're freeing held the last reference to a * hardwall fd, it would have been released prior to this point - * anyway via exit_files(), and "hardwall" would be NULL by now. + * anyway via exit_files(), and the hardwall_task.info pointers + * would be NULL by now. */ - if (info->task->thread.hardwall) - hardwall_deactivate(info->task); + hardwall_deactivate_all(info->task); #endif if (step_state) { @@ -168,8 +152,6 @@ void free_thread_info(struct thread_info *info) */ kfree(step_state); } - - free_pages((unsigned long)info, THREAD_SIZE_ORDER); } static void save_arch_state(struct thread_struct *t); @@ -263,7 +245,8 @@ int copy_thread(unsigned long clone_flags, unsigned long sp, #ifdef CONFIG_HARDWALL /* New thread does not own any networks. */ - p->thread.hardwall = NULL; + memset(&p->thread.hardwall[0], 0, + sizeof(struct hardwall_task) * HARDWALL_TYPES); #endif @@ -285,7 +268,7 @@ struct task_struct *validate_current(void) static struct task_struct corrupt = { .comm = "<corrupt>" }; struct task_struct *tsk = current; if (unlikely((unsigned long)tsk < PAGE_OFFSET || - (void *)tsk > high_memory || + (high_memory && (void *)tsk > high_memory) || ((unsigned long)tsk & (__alignof__(*tsk) - 1)) != 0)) { pr_err("Corrupt 'current' %p (sp %#lx)\n", tsk, stack_pointer); tsk = &corrupt; @@ -533,12 +516,7 @@ struct task_struct *__sched _switch_to(struct task_struct *prev, #ifdef CONFIG_HARDWALL /* Enable or disable access to the network registers appropriately. */ - if (prev->thread.hardwall != NULL) { - if (next->thread.hardwall == NULL) - restrict_network_mpls(); - } else if (next->thread.hardwall != NULL) { - grant_network_mpls(); - } + hardwall_switch_tasks(prev, next); #endif /* @@ -566,6 +544,10 @@ struct task_struct *__sched _switch_to(struct task_struct *prev, */ int do_work_pending(struct pt_regs *regs, u32 thread_info_flags) { + /* If we enter in kernel mode, do nothing and exit the caller loop. */ + if (!user_mode(regs)) + return 0; + if (thread_info_flags & _TIF_NEED_RESCHED) { schedule(); return 1; @@ -583,13 +565,10 @@ int do_work_pending(struct pt_regs *regs, u32 thread_info_flags) if (thread_info_flags & _TIF_NOTIFY_RESUME) { clear_thread_flag(TIF_NOTIFY_RESUME); tracehook_notify_resume(regs); - if (current->replacement_session_keyring) - key_replace_session_keyring(); return 1; } if (thread_info_flags & _TIF_SINGLESTEP) { - if ((regs->ex1 & SPR_EX_CONTEXT_1_1__PL_MASK) == 0) - single_step_once(regs); + single_step_once(regs); return 0; } panic("work_pending: bad flags %#x\n", thread_info_flags); diff --git a/arch/tile/kernel/relocate_kernel.S b/arch/tile/kernel/relocate_kernel_32.S index 010b418515f8..010b418515f8 100644 --- a/arch/tile/kernel/relocate_kernel.S +++ b/arch/tile/kernel/relocate_kernel_32.S diff --git a/arch/tile/kernel/relocate_kernel_64.S b/arch/tile/kernel/relocate_kernel_64.S new file mode 100644 index 000000000000..1c09a4f5a4ea --- /dev/null +++ b/arch/tile/kernel/relocate_kernel_64.S @@ -0,0 +1,260 @@ +/* + * Copyright 2011 Tilera Corporation. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, version 2. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or + * NON INFRINGEMENT. See the GNU General Public License for + * more details. + * + * copy new kernel into place and then call hv_reexec + * + */ + +#include <linux/linkage.h> +#include <arch/chip.h> +#include <asm/page.h> +#include <hv/hypervisor.h> + +#undef RELOCATE_NEW_KERNEL_VERBOSE + +STD_ENTRY(relocate_new_kernel) + + move r30, r0 /* page list */ + move r31, r1 /* address of page we are on */ + move r32, r2 /* start address of new kernel */ + + shrui r1, r1, PAGE_SHIFT + addi r1, r1, 1 + shli sp, r1, PAGE_SHIFT + addi sp, sp, -8 + /* we now have a stack (whether we need one or not) */ + + moveli r40, hw2_last(hv_console_putc) + shl16insli r40, r40, hw1(hv_console_putc) + shl16insli r40, r40, hw0(hv_console_putc) + +#ifdef RELOCATE_NEW_KERNEL_VERBOSE + moveli r0, 'r' + jalr r40 + + moveli r0, '_' + jalr r40 + + moveli r0, 'n' + jalr r40 + + moveli r0, '_' + jalr r40 + + moveli r0, 'k' + jalr r40 + + moveli r0, '\n' + jalr r40 +#endif + + /* + * Throughout this code r30 is pointer to the element of page + * list we are working on. + * + * Normally we get to the next element of the page list by + * incrementing r30 by eight. The exception is if the element + * on the page list is an IND_INDIRECTION in which case we use + * the element with the low bits masked off as the new value + * of r30. + * + * To get this started, we need the value passed to us (which + * will always be an IND_INDIRECTION) in memory somewhere with + * r30 pointing at it. To do that, we push the value passed + * to us on the stack and make r30 point to it. + */ + + st sp, r30 + move r30, sp + addi sp, sp, -16 + +#if CHIP_HAS_CBOX_HOME_MAP() + /* + * On TILE-GX, we need to flush all tiles' caches, since we may + * have been doing hash-for-home caching there. Note that we + * must do this _after_ we're completely done modifying any memory + * other than our output buffer (which we know is locally cached). + * We want the caches to be fully clean when we do the reexec, + * because the hypervisor is going to do this flush again at that + * point, and we don't want that second flush to overwrite any memory. + */ + { + move r0, zero /* cache_pa */ + moveli r1, hw2_last(HV_FLUSH_EVICT_L2) + } + { + shl16insli r1, r1, hw1(HV_FLUSH_EVICT_L2) + movei r2, -1 /* cache_cpumask; -1 means all client tiles */ + } + { + shl16insli r1, r1, hw0(HV_FLUSH_EVICT_L2) /* cache_control */ + move r3, zero /* tlb_va */ + } + { + move r4, zero /* tlb_length */ + move r5, zero /* tlb_pgsize */ + } + { + move r6, zero /* tlb_cpumask */ + move r7, zero /* asids */ + } + { + moveli r20, hw2_last(hv_flush_remote) + move r8, zero /* asidcount */ + } + shl16insli r20, r20, hw1(hv_flush_remote) + shl16insli r20, r20, hw0(hv_flush_remote) + + jalr r20 +#endif + + /* r33 is destination pointer, default to zero */ + + moveli r33, 0 + +.Lloop: ld r10, r30 + + andi r9, r10, 0xf /* low 4 bits tell us what type it is */ + xor r10, r10, r9 /* r10 is now value with low 4 bits stripped */ + + cmpeqi r0, r9, 0x1 /* IND_DESTINATION */ + beqzt r0, .Ltry2 + + move r33, r10 + +#ifdef RELOCATE_NEW_KERNEL_VERBOSE + moveli r0, 'd' + jalr r40 +#endif + + addi r30, r30, 8 + j .Lloop + +.Ltry2: + cmpeqi r0, r9, 0x2 /* IND_INDIRECTION */ + beqzt r0, .Ltry4 + + move r30, r10 + +#ifdef RELOCATE_NEW_KERNEL_VERBOSE + moveli r0, 'i' + jalr r40 +#endif + + j .Lloop + +.Ltry4: + cmpeqi r0, r9, 0x4 /* IND_DONE */ + beqzt r0, .Ltry8 + + mf + +#ifdef RELOCATE_NEW_KERNEL_VERBOSE + moveli r0, 'D' + jalr r40 + moveli r0, '\n' + jalr r40 +#endif + + move r0, r32 + + moveli r41, hw2_last(hv_reexec) + shl16insli r41, r41, hw1(hv_reexec) + shl16insli r41, r41, hw0(hv_reexec) + + jalr r41 + + /* we should not get here */ + + moveli r0, '?' + jalr r40 + moveli r0, '\n' + jalr r40 + + j .Lhalt + +.Ltry8: cmpeqi r0, r9, 0x8 /* IND_SOURCE */ + beqz r0, .Lerr /* unknown type */ + + /* copy page at r10 to page at r33 */ + + move r11, r33 + + moveli r0, hw2_last(PAGE_SIZE) + shl16insli r0, r0, hw1(PAGE_SIZE) + shl16insli r0, r0, hw0(PAGE_SIZE) + add r33, r33, r0 + + /* copy word at r10 to word at r11 until r11 equals r33 */ + + /* We know page size must be multiple of 8, so we can unroll + * 8 times safely without any edge case checking. + * + * Issue a flush of the destination every 8 words to avoid + * incoherence when starting the new kernel. (Now this is + * just good paranoia because the hv_reexec call will also + * take care of this.) + */ + +1: + { ld r0, r10; addi r10, r10, 8 } + { st r11, r0; addi r11, r11, 8 } + { ld r0, r10; addi r10, r10, 8 } + { st r11, r0; addi r11, r11, 8 } + { ld r0, r10; addi r10, r10, 8 } + { st r11, r0; addi r11, r11, 8 } + { ld r0, r10; addi r10, r10, 8 } + { st r11, r0; addi r11, r11, 8 } + { ld r0, r10; addi r10, r10, 8 } + { st r11, r0; addi r11, r11, 8 } + { ld r0, r10; addi r10, r10, 8 } + { st r11, r0; addi r11, r11, 8 } + { ld r0, r10; addi r10, r10, 8 } + { st r11, r0; addi r11, r11, 8 } + { ld r0, r10; addi r10, r10, 8 } + { st r11, r0 } + { flush r11 ; addi r11, r11, 8 } + + cmpeq r0, r33, r11 + beqzt r0, 1b + +#ifdef RELOCATE_NEW_KERNEL_VERBOSE + moveli r0, 's' + jalr r40 +#endif + + addi r30, r30, 8 + j .Lloop + + +.Lerr: moveli r0, 'e' + jalr r40 + moveli r0, 'r' + jalr r40 + moveli r0, 'r' + jalr r40 + moveli r0, '\n' + jalr r40 +.Lhalt: + moveli r41, hw2_last(hv_halt) + shl16insli r41, r41, hw1(hv_halt) + shl16insli r41, r41, hw0(hv_halt) + + jalr r41 + STD_ENDPROC(relocate_new_kernel) + + .section .rodata,"a" + + .globl relocate_new_kernel_size +relocate_new_kernel_size: + .long .Lend_relocate_new_kernel - relocate_new_kernel diff --git a/arch/tile/kernel/setup.c b/arch/tile/kernel/setup.c index 5f85d8b34dbb..dd87f3420390 100644 --- a/arch/tile/kernel/setup.c +++ b/arch/tile/kernel/setup.c @@ -28,6 +28,8 @@ #include <linux/highmem.h> #include <linux/smp.h> #include <linux/timex.h> +#include <linux/hugetlb.h> +#include <linux/start_kernel.h> #include <asm/setup.h> #include <asm/sections.h> #include <asm/cacheflush.h> @@ -49,9 +51,6 @@ char chip_model[64] __write_once; struct pglist_data node_data[MAX_NUMNODES] __read_mostly; EXPORT_SYMBOL(node_data); -/* We only create bootmem data on node 0. */ -static bootmem_data_t __initdata node0_bdata; - /* Information on the NUMA nodes that we compute early */ unsigned long __cpuinitdata node_start_pfn[MAX_NUMNODES]; unsigned long __cpuinitdata node_end_pfn[MAX_NUMNODES]; @@ -61,6 +60,22 @@ unsigned long __initdata node_free_pfn[MAX_NUMNODES]; static unsigned long __initdata node_percpu[MAX_NUMNODES]; +/* + * per-CPU stack and boot info. + */ +DEFINE_PER_CPU(unsigned long, boot_sp) = + (unsigned long)init_stack + THREAD_SIZE; + +#ifdef CONFIG_SMP +DEFINE_PER_CPU(unsigned long, boot_pc) = (unsigned long)start_kernel; +#else +/* + * The variable must be __initdata since it references __init code. + * With CONFIG_SMP it is per-cpu data, which is exempt from validation. + */ +unsigned long __initdata boot_pc = (unsigned long)start_kernel; +#endif + #ifdef CONFIG_HIGHMEM /* Page frame index of end of lowmem on each controller. */ unsigned long __cpuinitdata node_lowmem_end_pfn[MAX_NUMNODES]; @@ -103,13 +118,11 @@ unsigned long __initdata pci_reserve_end_pfn = -1U; static int __init setup_maxmem(char *str) { - long maxmem_mb; - if (str == NULL || strict_strtol(str, 0, &maxmem_mb) != 0 || - maxmem_mb == 0) + unsigned long long maxmem; + if (str == NULL || (maxmem = memparse(str, NULL)) == 0) return -EINVAL; - maxmem_pfn = (maxmem_mb >> (HPAGE_SHIFT - 20)) << - (HPAGE_SHIFT - PAGE_SHIFT); + maxmem_pfn = (maxmem >> HPAGE_SHIFT) << (HPAGE_SHIFT - PAGE_SHIFT); pr_info("Forcing RAM used to no more than %dMB\n", maxmem_pfn >> (20 - PAGE_SHIFT)); return 0; @@ -119,14 +132,15 @@ early_param("maxmem", setup_maxmem); static int __init setup_maxnodemem(char *str) { char *endp; - long maxnodemem_mb, node; + unsigned long long maxnodemem; + long node; node = str ? simple_strtoul(str, &endp, 0) : INT_MAX; - if (node >= MAX_NUMNODES || *endp != ':' || - strict_strtol(endp+1, 0, &maxnodemem_mb) != 0) + if (node >= MAX_NUMNODES || *endp != ':') return -EINVAL; - maxnodemem_pfn[node] = (maxnodemem_mb >> (HPAGE_SHIFT - 20)) << + maxnodemem = memparse(endp+1, NULL); + maxnodemem_pfn[node] = (maxnodemem >> HPAGE_SHIFT) << (HPAGE_SHIFT - PAGE_SHIFT); pr_info("Forcing RAM used on node %ld to no more than %dMB\n", node, maxnodemem_pfn[node] >> (20 - PAGE_SHIFT)); @@ -519,37 +533,96 @@ static void __init setup_memory(void) #endif } -static void __init setup_bootmem_allocator(void) +/* + * On 32-bit machines, we only put bootmem on the low controller, + * since PAs > 4GB can't be used in bootmem. In principle one could + * imagine, e.g., multiple 1 GB controllers all of which could support + * bootmem, but in practice using controllers this small isn't a + * particularly interesting scenario, so we just keep it simple and + * use only the first controller for bootmem on 32-bit machines. + */ +static inline int node_has_bootmem(int nid) { - unsigned long bootmap_size, first_alloc_pfn, last_alloc_pfn; +#ifdef CONFIG_64BIT + return 1; +#else + return nid == 0; +#endif +} - /* Provide a node 0 bdata. */ - NODE_DATA(0)->bdata = &node0_bdata; +static inline unsigned long alloc_bootmem_pfn(int nid, + unsigned long size, + unsigned long goal) +{ + void *kva = __alloc_bootmem_node(NODE_DATA(nid), size, + PAGE_SIZE, goal); + unsigned long pfn = kaddr_to_pfn(kva); + BUG_ON(goal && PFN_PHYS(pfn) != goal); + return pfn; +} -#ifdef CONFIG_PCI - /* Don't let boot memory alias the PCI region. */ - last_alloc_pfn = min(max_low_pfn, pci_reserve_start_pfn); +static void __init setup_bootmem_allocator_node(int i) +{ + unsigned long start, end, mapsize, mapstart; + + if (node_has_bootmem(i)) { + NODE_DATA(i)->bdata = &bootmem_node_data[i]; + } else { + /* Share controller zero's bdata for now. */ + NODE_DATA(i)->bdata = &bootmem_node_data[0]; + return; + } + + /* Skip up to after the bss in node 0. */ + start = (i == 0) ? min_low_pfn : node_start_pfn[i]; + + /* Only lowmem, if we're a HIGHMEM build. */ +#ifdef CONFIG_HIGHMEM + end = node_lowmem_end_pfn[i]; #else - last_alloc_pfn = max_low_pfn; + end = node_end_pfn[i]; #endif - /* - * Initialize the boot-time allocator (with low memory only): - * The first argument says where to put the bitmap, and the - * second says where the end of allocatable memory is. - */ - bootmap_size = init_bootmem(min_low_pfn, last_alloc_pfn); + /* No memory here. */ + if (end == start) + return; + + /* Figure out where the bootmem bitmap is located. */ + mapsize = bootmem_bootmap_pages(end - start); + if (i == 0) { + /* Use some space right before the heap on node 0. */ + mapstart = start; + start += mapsize; + } else { + /* Allocate bitmap on node 0 to avoid page table issues. */ + mapstart = alloc_bootmem_pfn(0, PFN_PHYS(mapsize), 0); + } + /* Initialize a node. */ + init_bootmem_node(NODE_DATA(i), mapstart, start, end); + + /* Free all the space back into the allocator. */ + free_bootmem(PFN_PHYS(start), PFN_PHYS(end - start)); + +#if defined(CONFIG_PCI) /* - * Let the bootmem allocator use all the space we've given it - * except for its own bitmap. + * Throw away any memory aliased by the PCI region. FIXME: this + * is a temporary hack to work around bug 10502, and needs to be + * fixed properly. */ - first_alloc_pfn = min_low_pfn + PFN_UP(bootmap_size); - if (first_alloc_pfn >= last_alloc_pfn) - early_panic("Not enough memory on controller 0 for bootmem\n"); + if (pci_reserve_start_pfn < end && pci_reserve_end_pfn > start) + reserve_bootmem(PFN_PHYS(pci_reserve_start_pfn), + PFN_PHYS(pci_reserve_end_pfn - + pci_reserve_start_pfn), + BOOTMEM_EXCLUSIVE); +#endif +} - free_bootmem(PFN_PHYS(first_alloc_pfn), - PFN_PHYS(last_alloc_pfn - first_alloc_pfn)); +static void __init setup_bootmem_allocator(void) +{ + int i; + for (i = 0; i < MAX_NUMNODES; ++i) + setup_bootmem_allocator_node(i); #ifdef CONFIG_KEXEC if (crashk_res.start != crashk_res.end) @@ -580,14 +653,6 @@ static int __init percpu_size(void) return size; } -static inline unsigned long alloc_bootmem_pfn(int size, unsigned long goal) -{ - void *kva = __alloc_bootmem(size, PAGE_SIZE, goal); - unsigned long pfn = kaddr_to_pfn(kva); - BUG_ON(goal && PFN_PHYS(pfn) != goal); - return pfn; -} - static void __init zone_sizes_init(void) { unsigned long zones_size[MAX_NR_ZONES] = { 0 }; @@ -625,21 +690,22 @@ static void __init zone_sizes_init(void) * though, there'll be no lowmem, so we just alloc_bootmem * the memmap. There will be no percpu memory either. */ - if (__pfn_to_highbits(start) == 0) { - /* In low PAs, allocate via bootmem. */ + if (i != 0 && cpu_isset(i, isolnodes)) { + node_memmap_pfn[i] = + alloc_bootmem_pfn(0, memmap_size, 0); + BUG_ON(node_percpu[i] != 0); + } else if (node_has_bootmem(start)) { unsigned long goal = 0; node_memmap_pfn[i] = - alloc_bootmem_pfn(memmap_size, goal); + alloc_bootmem_pfn(i, memmap_size, 0); if (kdata_huge) goal = PFN_PHYS(lowmem_end) - node_percpu[i]; if (node_percpu[i]) node_percpu_pfn[i] = - alloc_bootmem_pfn(node_percpu[i], goal); - } else if (cpu_isset(i, isolnodes)) { - node_memmap_pfn[i] = alloc_bootmem_pfn(memmap_size, 0); - BUG_ON(node_percpu[i] != 0); + alloc_bootmem_pfn(i, node_percpu[i], + goal); } else { - /* In high PAs, just reserve some pages. */ + /* In non-bootmem zones, just reserve some pages. */ node_memmap_pfn[i] = node_free_pfn[i]; node_free_pfn[i] += PFN_UP(memmap_size); if (!kdata_huge) { @@ -663,16 +729,9 @@ static void __init zone_sizes_init(void) zones_size[ZONE_NORMAL] = end - start; #endif - /* - * Everyone shares node 0's bootmem allocator, but - * we use alloc_remap(), above, to put the actual - * struct page array on the individual controllers, - * which is most of the data that we actually care about. - * We can't place bootmem allocators on the other - * controllers since the bootmem allocator can only - * operate on 32-bit physical addresses. - */ - NODE_DATA(i)->bdata = NODE_DATA(0)->bdata; + /* Take zone metadata from controller 0 if we're isolnode. */ + if (node_isset(i, isolnodes)) + NODE_DATA(i)->bdata = &bootmem_node_data[0]; free_area_init_node(i, zones_size, start, NULL); printk(KERN_DEBUG " Normal zone: %ld per-cpu pages\n", @@ -855,6 +914,22 @@ subsys_initcall(topology_init); #endif /* CONFIG_NUMA */ +/* + * Initialize hugepage support on this cpu. We do this on all cores + * early in boot: before argument parsing for the boot cpu, and after + * argument parsing but before the init functions run on the secondaries. + * So the values we set up here in the hypervisor may be overridden on + * the boot cpu as arguments are parsed. + */ +static __cpuinit void init_super_pages(void) +{ +#ifdef CONFIG_HUGETLB_SUPER_PAGES + int i; + for (i = 0; i < HUGE_SHIFT_ENTRIES; ++i) + hv_set_pte_super_shift(i, huge_shift[i]); +#endif +} + /** * setup_cpu() - Do all necessary per-cpu, tile-specific initialization. * @boot: Is this the boot cpu? @@ -909,10 +984,19 @@ void __cpuinit setup_cpu(int boot) /* Reset the network state on this cpu. */ reset_network_state(); #endif + + init_super_pages(); } #ifdef CONFIG_BLK_DEV_INITRD +/* + * Note that the kernel can potentially support other compression + * techniques than gz, though we don't do so by default. If we ever + * decide to do so we can either look for other filename extensions, + * or just allow a file with this name to be compressed with an + * arbitrary compressor (somewhat counterintuitively). + */ static int __initdata set_initramfs_file; static char __initdata initramfs_file[128] = "initramfs.cpio.gz"; @@ -928,9 +1012,9 @@ static int __init setup_initramfs_file(char *str) early_param("initramfs_file", setup_initramfs_file); /* - * We look for an additional "initramfs.cpio.gz" file in the hvfs. + * We look for an "initramfs.cpio.gz" file in the hvfs. * If there is one, we allocate some memory for it and it will be - * unpacked to the initramfs after any built-in initramfs_data. + * unpacked to the initramfs. */ static void __init load_hv_initrd(void) { @@ -1100,7 +1184,7 @@ EXPORT_SYMBOL(hash_for_home_map); /* * cpu_cacheable_map lists all the cpus whose caches the hypervisor can - * flush on our behalf. It is set to cpu_possible_map OR'ed with + * flush on our behalf. It is set to cpu_possible_mask OR'ed with * hash_for_home_map, and it is what should be passed to * hv_flush_remote() to flush all caches. Note that if there are * dedicated hypervisor driver tiles that have authorized use of their @@ -1186,7 +1270,7 @@ static void __init setup_cpu_maps(void) sizeof(cpu_lotar_map)); if (rc < 0) { pr_err("warning: no HV_INQ_TILES_LOTAR; using AVAIL\n"); - cpu_lotar_map = cpu_possible_map; + cpu_lotar_map = *cpu_possible_mask; } #if CHIP_HAS_CBOX_HOME_MAP() @@ -1196,9 +1280,9 @@ static void __init setup_cpu_maps(void) sizeof(hash_for_home_map)); if (rc < 0) early_panic("hv_inquire_tiles(HFH_CACHE) failed: rc %d\n", rc); - cpumask_or(&cpu_cacheable_map, &cpu_possible_map, &hash_for_home_map); + cpumask_or(&cpu_cacheable_map, cpu_possible_mask, &hash_for_home_map); #else - cpu_cacheable_map = cpu_possible_map; + cpu_cacheable_map = *cpu_possible_mask; #endif } @@ -1390,13 +1474,13 @@ void __init setup_per_cpu_areas(void) for (i = 0; i < size; i += PAGE_SIZE, ++pfn, ++pg) { /* Update the vmalloc mapping and page home. */ - pte_t *ptep = - virt_to_pte(NULL, (unsigned long)ptr + i); + unsigned long addr = (unsigned long)ptr + i; + pte_t *ptep = virt_to_pte(NULL, addr); pte_t pte = *ptep; BUG_ON(pfn != pte_pfn(pte)); pte = hv_pte_set_mode(pte, HV_PTE_MODE_CACHE_TILE_L3); pte = set_remote_cache_cpu(pte, cpu); - set_pte(ptep, pte); + set_pte_at(&init_mm, addr, ptep, pte); /* Update the lowmem mapping for consistency. */ lowmem_va = (unsigned long)pfn_to_kaddr(pfn); @@ -1409,7 +1493,7 @@ void __init setup_per_cpu_areas(void) BUG_ON(pte_huge(*ptep)); } BUG_ON(pfn != pte_pfn(*ptep)); - set_pte(ptep, pte); + set_pte_at(&init_mm, lowmem_va, ptep, pte); } } diff --git a/arch/tile/kernel/signal.c b/arch/tile/kernel/signal.c index f79d4b88c747..e29b0553211d 100644 --- a/arch/tile/kernel/signal.c +++ b/arch/tile/kernel/signal.c @@ -37,8 +37,6 @@ #define DEBUG_SIG 0 -#define _BLOCKABLE (~(sigmask(SIGKILL) | sigmask(SIGSTOP))) - SYSCALL_DEFINE3(sigaltstack, const stack_t __user *, uss, stack_t __user *, uoss, struct pt_regs *, regs) { @@ -96,7 +94,6 @@ SYSCALL_DEFINE1(rt_sigreturn, struct pt_regs *, regs) if (__copy_from_user(&set, &frame->uc.uc_sigmask, sizeof(set))) goto badframe; - sigdelsetmask(&set, ~_BLOCKABLE); set_current_blocked(&set); if (restore_sigcontext(regs, &frame->uc.uc_mcontext)) @@ -242,10 +239,11 @@ give_sigsegv: * OK, we're invoking a handler */ -static int handle_signal(unsigned long sig, siginfo_t *info, - struct k_sigaction *ka, sigset_t *oldset, +static void handle_signal(unsigned long sig, siginfo_t *info, + struct k_sigaction *ka, struct pt_regs *regs) { + sigset_t *oldset = sigmask_to_save(); int ret; /* Are we from a system call? */ @@ -278,15 +276,9 @@ static int handle_signal(unsigned long sig, siginfo_t *info, else #endif ret = setup_rt_frame(sig, ka, info, oldset, regs); - if (ret == 0) { - /* This code is only called from system calls or from - * the work_pending path in the return-to-user code, and - * either way we can re-enable interrupts unconditionally. - */ - block_sigmask(ka, sig); - } - - return ret; + if (ret) + return; + signal_delivered(sig, info, ka, regs, 0); } /* @@ -299,7 +291,6 @@ void do_signal(struct pt_regs *regs) siginfo_t info; int signr; struct k_sigaction ka; - sigset_t *oldset; /* * i386 will check if we're coming from kernel mode and bail out @@ -308,24 +299,10 @@ void do_signal(struct pt_regs *regs) * helpful, we can reinstate the check on "!user_mode(regs)". */ - if (current_thread_info()->status & TS_RESTORE_SIGMASK) - oldset = ¤t->saved_sigmask; - else - oldset = ¤t->blocked; - signr = get_signal_to_deliver(&info, &ka, regs, NULL); if (signr > 0) { /* Whee! Actually deliver the signal. */ - if (handle_signal(signr, &info, &ka, oldset, regs) == 0) { - /* - * A signal was successfully delivered; the saved - * sigmask will have been stored in the signal frame, - * and will be restored by sigreturn, so we can simply - * clear the TS_RESTORE_SIGMASK flag. - */ - current_thread_info()->status &= ~TS_RESTORE_SIGMASK; - } - + handle_signal(signr, &info, &ka, regs); goto done; } @@ -350,10 +327,7 @@ void do_signal(struct pt_regs *regs) } /* If there's no signal to deliver, just put the saved sigmask back. */ - if (current_thread_info()->status & TS_RESTORE_SIGMASK) { - current_thread_info()->status &= ~TS_RESTORE_SIGMASK; - sigprocmask(SIG_SETMASK, ¤t->saved_sigmask, NULL); - } + restore_saved_sigmask(); done: /* Avoid double syscall restart if there are nested signals. */ diff --git a/arch/tile/kernel/single_step.c b/arch/tile/kernel/single_step.c index bc1eb586e24d..27742e87e255 100644 --- a/arch/tile/kernel/single_step.c +++ b/arch/tile/kernel/single_step.c @@ -153,9 +153,25 @@ static tile_bundle_bits rewrite_load_store_unaligned( if (((unsigned long)addr % size) == 0) return bundle; -#ifndef __LITTLE_ENDIAN -# error We assume little-endian representation with copy_xx_user size 2 here -#endif + /* + * Return SIGBUS with the unaligned address, if requested. + * Note that we return SIGBUS even for completely invalid addresses + * as long as they are in fact unaligned; this matches what the + * tilepro hardware would be doing, if it could provide us with the + * actual bad address in an SPR, which it doesn't. + */ + if (unaligned_fixup == 0) { + siginfo_t info = { + .si_signo = SIGBUS, + .si_code = BUS_ADRALN, + .si_addr = addr + }; + trace_unhandled_signal("unaligned trap", regs, + (unsigned long)addr, SIGBUS); + force_sig_info(info.si_signo, &info, current); + return (tilepro_bundle_bits) 0; + } + /* Handle unaligned load/store */ if (mem_op == MEMOP_LOAD || mem_op == MEMOP_LOAD_POSTINCR) { unsigned short val_16; @@ -176,8 +192,19 @@ static tile_bundle_bits rewrite_load_store_unaligned( state->update = 1; } } else { + unsigned short val_16; val = (val_reg == TREG_ZERO) ? 0 : regs->regs[val_reg]; - err = copy_to_user(addr, &val, size); + switch (size) { + case 2: + val_16 = val; + err = copy_to_user(addr, &val_16, sizeof(val_16)); + break; + case 4: + err = copy_to_user(addr, &val, sizeof(val)); + break; + default: + BUG(); + } } if (err) { @@ -192,18 +219,6 @@ static tile_bundle_bits rewrite_load_store_unaligned( return (tile_bundle_bits) 0; } - if (unaligned_fixup == 0) { - siginfo_t info = { - .si_signo = SIGBUS, - .si_code = BUS_ADRALN, - .si_addr = addr - }; - trace_unhandled_signal("unaligned trap", regs, - (unsigned long)addr, SIGBUS); - force_sig_info(info.si_signo, &info, current); - return (tile_bundle_bits) 0; - } - if (unaligned_printk || unaligned_fixup_count == 0) { pr_info("Process %d/%s: PC %#lx: Fixup of" " unaligned %s at %#lx.\n", @@ -339,12 +354,10 @@ void single_step_once(struct pt_regs *regs) } /* allocate a cache line of writable, executable memory */ - down_write(¤t->mm->mmap_sem); - buffer = (void __user *) do_mmap(NULL, 0, 64, + buffer = (void __user *) vm_mmap(NULL, 0, 64, PROT_EXEC | PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, 0); - up_write(¤t->mm->mmap_sem); if (IS_ERR((void __force *)buffer)) { kfree(state); diff --git a/arch/tile/kernel/smp.c b/arch/tile/kernel/smp.c index a44e103c5a63..cbc73a8b8fe1 100644 --- a/arch/tile/kernel/smp.c +++ b/arch/tile/kernel/smp.c @@ -103,7 +103,7 @@ static void smp_stop_cpu_interrupt(void) set_cpu_online(smp_processor_id(), 0); arch_local_irq_disable_all(); for (;;) - asm("nap"); + asm("nap; nop"); } /* This function calls the 'stop' function on all other CPUs in the system. */ @@ -113,6 +113,12 @@ void smp_send_stop(void) send_IPI_allbutself(MSG_TAG_STOP_CPU); } +/* On panic, just wait; we may get an smp_send_stop() later on. */ +void panic_smp_self_stop(void) +{ + while (1) + asm("nap; nop"); +} /* * Dispatch code called from hv_message_intr() for HV_MSG_TILE hv messages. @@ -197,7 +203,7 @@ void __init ipi_init(void) if (hv_get_ipi_pte(tile, KERNEL_PL, &pte) != 0) panic("Failed to initialize IPI for cpu %d\n", cpu); - offset = hv_pte_get_pfn(pte) << PAGE_SHIFT; + offset = PFN_PHYS(pte_pfn(pte)); ipi_mappings[cpu] = ioremap_prot(offset, PAGE_SIZE, pte); } #endif diff --git a/arch/tile/kernel/smpboot.c b/arch/tile/kernel/smpboot.c index b949edcec200..84873fbe8f27 100644 --- a/arch/tile/kernel/smpboot.c +++ b/arch/tile/kernel/smpboot.c @@ -196,6 +196,8 @@ void __cpuinit online_secondary(void) /* This must be done before setting cpu_online_mask */ wmb(); + notify_cpu_starting(smp_processor_id()); + /* * We need to hold call_lock, so there is no inconsistency * between the time smp_call_function() determines number of @@ -220,7 +222,7 @@ void __cpuinit online_secondary(void) cpu_idle(); } -int __cpuinit __cpu_up(unsigned int cpu) +int __cpuinit __cpu_up(unsigned int cpu, struct task_struct *tidle) { /* Wait 5s total for all CPUs for them to come online */ static int timeout; diff --git a/arch/tile/kernel/stack.c b/arch/tile/kernel/stack.c index 37ee4d037e0b..b2f44c28dda6 100644 --- a/arch/tile/kernel/stack.c +++ b/arch/tile/kernel/stack.c @@ -21,10 +21,12 @@ #include <linux/stacktrace.h> #include <linux/uaccess.h> #include <linux/mmzone.h> +#include <linux/dcache.h> +#include <linux/fs.h> #include <asm/backtrace.h> #include <asm/page.h> -#include <asm/tlbflush.h> #include <asm/ucontext.h> +#include <asm/switch_to.h> #include <asm/sigframe.h> #include <asm/stack.h> #include <arch/abi.h> @@ -44,72 +46,23 @@ static int in_kernel_stack(struct KBacktraceIterator *kbt, unsigned long sp) return sp >= kstack_base && sp < kstack_base + THREAD_SIZE; } -/* Is address valid for reading? */ -static int valid_address(struct KBacktraceIterator *kbt, unsigned long address) -{ - HV_PTE *l1_pgtable = kbt->pgtable; - HV_PTE *l2_pgtable; - unsigned long pfn; - HV_PTE pte; - struct page *page; - - if (l1_pgtable == NULL) - return 0; /* can't read user space in other tasks */ - -#ifdef CONFIG_64BIT - /* Find the real l1_pgtable by looking in the l0_pgtable. */ - pte = l1_pgtable[HV_L0_INDEX(address)]; - if (!hv_pte_get_present(pte)) - return 0; - pfn = hv_pte_get_pfn(pte); - if (pte_huge(pte)) { - if (!pfn_valid(pfn)) { - pr_err("L0 huge page has bad pfn %#lx\n", pfn); - return 0; - } - return hv_pte_get_present(pte) && hv_pte_get_readable(pte); - } - page = pfn_to_page(pfn); - BUG_ON(PageHighMem(page)); /* No HIGHMEM on 64-bit. */ - l1_pgtable = (HV_PTE *)pfn_to_kaddr(pfn); -#endif - pte = l1_pgtable[HV_L1_INDEX(address)]; - if (!hv_pte_get_present(pte)) - return 0; - pfn = hv_pte_get_pfn(pte); - if (pte_huge(pte)) { - if (!pfn_valid(pfn)) { - pr_err("huge page has bad pfn %#lx\n", pfn); - return 0; - } - return hv_pte_get_present(pte) && hv_pte_get_readable(pte); - } - - page = pfn_to_page(pfn); - if (PageHighMem(page)) { - pr_err("L2 page table not in LOWMEM (%#llx)\n", - HV_PFN_TO_CPA(pfn)); - return 0; - } - l2_pgtable = (HV_PTE *)pfn_to_kaddr(pfn); - pte = l2_pgtable[HV_L2_INDEX(address)]; - return hv_pte_get_present(pte) && hv_pte_get_readable(pte); -} - /* Callback for backtracer; basically a glorified memcpy */ static bool read_memory_func(void *result, unsigned long address, unsigned int size, void *vkbt) { int retval; struct KBacktraceIterator *kbt = (struct KBacktraceIterator *)vkbt; + + if (address == 0) + return 0; if (__kernel_text_address(address)) { /* OK to read kernel code. */ } else if (address >= PAGE_OFFSET) { /* We only tolerate kernel-space reads of this task's stack */ if (!in_kernel_stack(kbt, address)) return 0; - } else if (!valid_address(kbt, address)) { - return 0; /* invalid user-space address */ + } else if (!kbt->is_current) { + return 0; /* can't read from other user address spaces */ } pagefault_disable(); retval = __copy_from_user_inatomic(result, @@ -127,6 +80,8 @@ static struct pt_regs *valid_fault_handler(struct KBacktraceIterator* kbt) unsigned long sp = kbt->it.sp; struct pt_regs *p; + if (sp % sizeof(long) != 0) + return NULL; if (!in_kernel_stack(kbt, sp)) return NULL; if (!in_kernel_stack(kbt, sp + C_ABI_SAVE_AREA_SIZE + PTREGS_SIZE-1)) @@ -169,27 +124,27 @@ static int is_sigreturn(unsigned long pc) } /* Return a pt_regs pointer for a valid signal handler frame */ -static struct pt_regs *valid_sigframe(struct KBacktraceIterator* kbt) +static struct pt_regs *valid_sigframe(struct KBacktraceIterator* kbt, + struct rt_sigframe* kframe) { BacktraceIterator *b = &kbt->it; - if (b->pc == VDSO_BASE) { - struct rt_sigframe *frame; - unsigned long sigframe_top = - b->sp + sizeof(struct rt_sigframe) - 1; - if (!valid_address(kbt, b->sp) || - !valid_address(kbt, sigframe_top)) { - if (kbt->verbose) - pr_err(" (odd signal: sp %#lx?)\n", - (unsigned long)(b->sp)); + if (b->pc == VDSO_BASE && b->sp < PAGE_OFFSET && + b->sp % sizeof(long) == 0) { + int retval; + pagefault_disable(); + retval = __copy_from_user_inatomic( + kframe, (void __user __force *)b->sp, + sizeof(*kframe)); + pagefault_enable(); + if (retval != 0 || + (unsigned int)(kframe->info.si_signo) >= _NSIG) return NULL; - } - frame = (struct rt_sigframe *)b->sp; if (kbt->verbose) { pr_err(" <received signal %d>\n", - frame->info.si_signo); + kframe->info.si_signo); } - return (struct pt_regs *)&frame->uc.uc_mcontext; + return (struct pt_regs *)&kframe->uc.uc_mcontext; } return NULL; } @@ -202,10 +157,11 @@ static int KBacktraceIterator_is_sigreturn(struct KBacktraceIterator *kbt) static int KBacktraceIterator_restart(struct KBacktraceIterator *kbt) { struct pt_regs *p; + struct rt_sigframe kframe; p = valid_fault_handler(kbt); if (p == NULL) - p = valid_sigframe(kbt); + p = valid_sigframe(kbt, &kframe); if (p == NULL) return 0; backtrace_init(&kbt->it, read_memory_func, kbt, @@ -265,41 +221,19 @@ void KBacktraceIterator_init(struct KBacktraceIterator *kbt, /* * Set up callback information. We grab the kernel stack base - * so we will allow reads of that address range, and if we're - * asking about the current process we grab the page table - * so we can check user accesses before trying to read them. - * We flush the TLB to avoid any weird skew issues. + * so we will allow reads of that address range. */ - is_current = (t == NULL); + is_current = (t == NULL || t == current); kbt->is_current = is_current; if (is_current) t = validate_current(); kbt->task = t; - kbt->pgtable = NULL; kbt->verbose = 0; /* override in caller if desired */ kbt->profile = 0; /* override in caller if desired */ kbt->end = KBT_ONGOING; - kbt->new_context = 0; - if (is_current) { - HV_PhysAddr pgdir_pa = hv_inquire_context().page_table; - if (pgdir_pa == (unsigned long)swapper_pg_dir - PAGE_OFFSET) { - /* - * Not just an optimization: this also allows - * this to work at all before va/pa mappings - * are set up. - */ - kbt->pgtable = swapper_pg_dir; - } else { - struct page *page = pfn_to_page(PFN_DOWN(pgdir_pa)); - if (!PageHighMem(page)) - kbt->pgtable = __va(pgdir_pa); - else - pr_err("page table not in LOWMEM" - " (%#llx)\n", pgdir_pa); - } - local_flush_tlb_all(); + kbt->new_context = 1; + if (is_current) validate_stack(regs); - } if (regs == NULL) { if (is_current || t->state == TASK_RUNNING) { @@ -345,6 +279,78 @@ void KBacktraceIterator_next(struct KBacktraceIterator *kbt) } EXPORT_SYMBOL(KBacktraceIterator_next); +static void describe_addr(struct KBacktraceIterator *kbt, + unsigned long address, + int have_mmap_sem, char *buf, size_t bufsize) +{ + struct vm_area_struct *vma; + size_t namelen, remaining; + unsigned long size, offset, adjust; + char *p, *modname; + const char *name; + int rc; + + /* + * Look one byte back for every caller frame (i.e. those that + * aren't a new context) so we look up symbol data for the + * call itself, not the following instruction, which may be on + * a different line (or in a different function). + */ + adjust = !kbt->new_context; + address -= adjust; + + if (address >= PAGE_OFFSET) { + /* Handle kernel symbols. */ + BUG_ON(bufsize < KSYM_NAME_LEN); + name = kallsyms_lookup(address, &size, &offset, + &modname, buf); + if (name == NULL) { + buf[0] = '\0'; + return; + } + namelen = strlen(buf); + remaining = (bufsize - 1) - namelen; + p = buf + namelen; + rc = snprintf(p, remaining, "+%#lx/%#lx ", + offset + adjust, size); + if (modname && rc < remaining) + snprintf(p + rc, remaining - rc, "[%s] ", modname); + buf[bufsize-1] = '\0'; + return; + } + + /* If we don't have the mmap_sem, we can't show any more info. */ + buf[0] = '\0'; + if (!have_mmap_sem) + return; + + /* Find vma info. */ + vma = find_vma(kbt->task->mm, address); + if (vma == NULL || address < vma->vm_start) { + snprintf(buf, bufsize, "[unmapped address] "); + return; + } + + if (vma->vm_file) { + char *s; + p = d_path(&vma->vm_file->f_path, buf, bufsize); + if (IS_ERR(p)) + p = "?"; + s = strrchr(p, '/'); + if (s) + p = s+1; + } else { + p = "anon"; + } + + /* Generate a string description of the vma info. */ + namelen = strlen(p); + remaining = (bufsize - 1) - namelen; + memmove(buf, p, namelen); + snprintf(buf + namelen, remaining, "[%lx+%lx] ", + vma->vm_start, vma->vm_end - vma->vm_start); +} + /* * This method wraps the backtracer's more generic support. * It is only invoked from the architecture-specific code; show_stack() @@ -353,6 +359,7 @@ EXPORT_SYMBOL(KBacktraceIterator_next); void tile_show_stack(struct KBacktraceIterator *kbt, int headers) { int i; + int have_mmap_sem = 0; if (headers) { /* @@ -369,31 +376,16 @@ void tile_show_stack(struct KBacktraceIterator *kbt, int headers) kbt->verbose = 1; i = 0; for (; !KBacktraceIterator_end(kbt); KBacktraceIterator_next(kbt)) { - char *modname; - const char *name; - unsigned long address = kbt->it.pc; - unsigned long offset, size; char namebuf[KSYM_NAME_LEN+100]; + unsigned long address = kbt->it.pc; - if (address >= PAGE_OFFSET) - name = kallsyms_lookup(address, &size, &offset, - &modname, namebuf); - else - name = NULL; - - if (!name) - namebuf[0] = '\0'; - else { - size_t namelen = strlen(namebuf); - size_t remaining = (sizeof(namebuf) - 1) - namelen; - char *p = namebuf + namelen; - int rc = snprintf(p, remaining, "+%#lx/%#lx ", - offset, size); - if (modname && rc < remaining) - snprintf(p + rc, remaining - rc, - "[%s] ", modname); - namebuf[sizeof(namebuf)-1] = '\0'; - } + /* Try to acquire the mmap_sem as we pass into userspace. */ + if (address < PAGE_OFFSET && !have_mmap_sem && kbt->task->mm) + have_mmap_sem = + down_read_trylock(&kbt->task->mm->mmap_sem); + + describe_addr(kbt, address, have_mmap_sem, + namebuf, sizeof(namebuf)); pr_err(" frame %d: 0x%lx %s(sp 0x%lx)\n", i++, address, namebuf, (unsigned long)(kbt->it.sp)); @@ -408,6 +400,8 @@ void tile_show_stack(struct KBacktraceIterator *kbt, int headers) pr_err("Stack dump stopped; next frame identical to this one\n"); if (headers) pr_err("Stack dump complete\n"); + if (have_mmap_sem) + up_read(&kbt->task->mm->mmap_sem); } EXPORT_SYMBOL(tile_show_stack); diff --git a/arch/tile/kernel/sys.c b/arch/tile/kernel/sys.c index cb44ba7ccd2d..b08095b402d6 100644 --- a/arch/tile/kernel/sys.c +++ b/arch/tile/kernel/sys.c @@ -32,11 +32,17 @@ #include <asm/syscalls.h> #include <asm/pgtable.h> #include <asm/homecache.h> +#include <asm/cachectl.h> #include <arch/chip.h> -SYSCALL_DEFINE0(flush_cache) +SYSCALL_DEFINE3(cacheflush, unsigned long, addr, unsigned long, len, + unsigned long, flags) { - homecache_evict(cpumask_of(smp_processor_id())); + if (flags & DCACHE) + homecache_evict(cpumask_of(smp_processor_id())); + if (flags & ICACHE) + flush_remote(0, HV_FLUSH_EVICT_L1I, mm_cpumask(current->mm), + 0, 0, 0, NULL, NULL, 0); return 0; } diff --git a/arch/tile/kernel/sysfs.c b/arch/tile/kernel/sysfs.c index 71ae728e9d0b..e25b0a89c18f 100644 --- a/arch/tile/kernel/sysfs.c +++ b/arch/tile/kernel/sysfs.c @@ -93,6 +93,10 @@ HV_CONF_ATTR(mezz_part, HV_CONFSTR_MEZZ_PART_NUM) HV_CONF_ATTR(mezz_serial, HV_CONFSTR_MEZZ_SERIAL_NUM) HV_CONF_ATTR(mezz_revision, HV_CONFSTR_MEZZ_REV) HV_CONF_ATTR(mezz_description, HV_CONFSTR_MEZZ_DESC) +HV_CONF_ATTR(cpumod_part, HV_CONFSTR_CPUMOD_PART_NUM) +HV_CONF_ATTR(cpumod_serial, HV_CONFSTR_CPUMOD_SERIAL_NUM) +HV_CONF_ATTR(cpumod_revision, HV_CONFSTR_CPUMOD_REV) +HV_CONF_ATTR(cpumod_description,HV_CONFSTR_CPUMOD_DESC) HV_CONF_ATTR(switch_control, HV_CONFSTR_SWITCH_CONTROL) static struct attribute *board_attrs[] = { @@ -104,6 +108,10 @@ static struct attribute *board_attrs[] = { &dev_attr_mezz_serial.attr, &dev_attr_mezz_revision.attr, &dev_attr_mezz_description.attr, + &dev_attr_cpumod_part.attr, + &dev_attr_cpumod_serial.attr, + &dev_attr_cpumod_revision.attr, + &dev_attr_cpumod_description.attr, &dev_attr_switch_control.attr, NULL }; diff --git a/arch/tile/kernel/tlb.c b/arch/tile/kernel/tlb.c index a5f241c24cac..3fd54d5bbd4c 100644 --- a/arch/tile/kernel/tlb.c +++ b/arch/tile/kernel/tlb.c @@ -15,6 +15,7 @@ #include <linux/cpumask.h> #include <linux/module.h> +#include <linux/hugetlb.h> #include <asm/tlbflush.h> #include <asm/homecache.h> #include <hv/hypervisor.h> @@ -49,25 +50,25 @@ void flush_tlb_current_task(void) flush_tlb_mm(current->mm); } -void flush_tlb_page_mm(const struct vm_area_struct *vma, struct mm_struct *mm, +void flush_tlb_page_mm(struct vm_area_struct *vma, struct mm_struct *mm, unsigned long va) { - unsigned long size = hv_page_size(vma); + unsigned long size = vma_kernel_pagesize(vma); int cache = (vma->vm_flags & VM_EXEC) ? HV_FLUSH_EVICT_L1I : 0; flush_remote(0, cache, mm_cpumask(mm), va, size, size, mm_cpumask(mm), NULL, 0); } -void flush_tlb_page(const struct vm_area_struct *vma, unsigned long va) +void flush_tlb_page(struct vm_area_struct *vma, unsigned long va) { flush_tlb_page_mm(vma, vma->vm_mm, va); } EXPORT_SYMBOL(flush_tlb_page); -void flush_tlb_range(const struct vm_area_struct *vma, +void flush_tlb_range(struct vm_area_struct *vma, unsigned long start, unsigned long end) { - unsigned long size = hv_page_size(vma); + unsigned long size = vma_kernel_pagesize(vma); struct mm_struct *mm = vma->vm_mm; int cache = (vma->vm_flags & VM_EXEC) ? HV_FLUSH_EVICT_L1I : 0; flush_remote(0, cache, mm_cpumask(mm), start, end - start, size, diff --git a/arch/tile/kernel/traps.c b/arch/tile/kernel/traps.c index 2bb6602a1ee7..5b19a23c8908 100644 --- a/arch/tile/kernel/traps.c +++ b/arch/tile/kernel/traps.c @@ -195,12 +195,31 @@ static int special_ill(bundle_bits bundle, int *sigp, int *codep) return 1; } +static const char *const int_name[] = { + [INT_MEM_ERROR] = "Memory error", + [INT_ILL] = "Illegal instruction", + [INT_GPV] = "General protection violation", + [INT_UDN_ACCESS] = "UDN access", + [INT_IDN_ACCESS] = "IDN access", +#if CHIP_HAS_SN() + [INT_SN_ACCESS] = "SN access", +#endif + [INT_SWINT_3] = "Software interrupt 3", + [INT_SWINT_2] = "Software interrupt 2", + [INT_SWINT_0] = "Software interrupt 0", + [INT_UNALIGN_DATA] = "Unaligned data", + [INT_DOUBLE_FAULT] = "Double fault", +#ifdef __tilegx__ + [INT_ILL_TRANS] = "Illegal virtual address", +#endif +}; + void __kprobes do_trap(struct pt_regs *regs, int fault_num, unsigned long reason) { siginfo_t info = { 0 }; int signo, code; - unsigned long address; + unsigned long address = 0; bundle_bits instr; /* Re-enable interrupts. */ @@ -211,10 +230,17 @@ void __kprobes do_trap(struct pt_regs *regs, int fault_num, * current process and hope for the best. */ if (!user_mode(regs)) { + const char *name; if (fixup_exception(regs)) /* only UNALIGN_DATA in practice */ return; - pr_alert("Kernel took bad trap %d at PC %#lx\n", - fault_num, regs->pc); + if (fault_num >= 0 && + fault_num < sizeof(int_name)/sizeof(int_name[0]) && + int_name[fault_num] != NULL) + name = int_name[fault_num]; + else + name = "Unknown interrupt"; + pr_alert("Kernel took bad trap %d (%s) at PC %#lx\n", + fault_num, name, regs->pc); if (fault_num == INT_GPV) pr_alert("GPV_REASON is %#lx\n", reason); show_regs(regs); @@ -223,6 +249,10 @@ void __kprobes do_trap(struct pt_regs *regs, int fault_num, } switch (fault_num) { + case INT_MEM_ERROR: + signo = SIGBUS; + code = BUS_OBJERR; + break; case INT_ILL: if (copy_from_user(&instr, (void __user *)regs->pc, sizeof(instr))) { @@ -289,7 +319,10 @@ void __kprobes do_trap(struct pt_regs *regs, int fault_num, address = regs->pc; break; #ifdef __tilegx__ - case INT_ILL_TRANS: + case INT_ILL_TRANS: { + /* Avoid a hardware erratum with the return address stack. */ + fill_ra_stack(); + signo = SIGSEGV; code = SEGV_MAPERR; if (reason & SPR_ILL_TRANS_REASON__I_STREAM_VA_RMASK) @@ -297,6 +330,7 @@ void __kprobes do_trap(struct pt_regs *regs, int fault_num, else address = 0; /* FIXME: GX: single-step for address */ break; + } #endif default: panic("Unexpected do_trap interrupt number %d", fault_num); @@ -308,7 +342,8 @@ void __kprobes do_trap(struct pt_regs *regs, int fault_num, info.si_addr = (void __user *)address; if (signo == SIGILL) info.si_trapno = fault_num; - trace_unhandled_signal("trap", regs, address, signo); + if (signo != SIGTRAP) + trace_unhandled_signal("trap", regs, address, signo); force_sig_info(signo, &info, current); } |
