diff options
Diffstat (limited to 'arch/sparc')
67 files changed, 1929 insertions, 1040 deletions
diff --git a/arch/sparc/Kconfig b/arch/sparc/Kconfig index 56442d2d7bbc..b4f7a98acbc5 100644 --- a/arch/sparc/Kconfig +++ b/arch/sparc/Kconfig @@ -20,6 +20,7 @@ config SPARC select HAVE_OPROFILE select HAVE_ARCH_KGDB if !SMP || SPARC64 select HAVE_ARCH_TRACEHOOK + select HAVE_EXIT_THREAD select SYSCTL_EXCEPTION_TRACE select ARCH_WANT_OPTIONAL_GPIOLIB select ARCH_HAS_ATOMIC64_DEC_IF_POSITIVE @@ -182,9 +183,9 @@ config NR_CPUS int "Maximum number of CPUs" depends on SMP range 2 32 if SPARC32 - range 2 1024 if SPARC64 + range 2 4096 if SPARC64 default 32 if SPARC32 - default 64 if SPARC64 + default 4096 if SPARC64 source kernel/Kconfig.hz @@ -549,9 +550,6 @@ config SYSVIPC_COMPAT depends on COMPAT && SYSVIPC default y -config KEYS_COMPAT - def_bool y if COMPAT && KEYS - endmenu source "net/Kconfig" diff --git a/arch/sparc/include/asm/atomic_64.h b/arch/sparc/include/asm/atomic_64.h index f2fbf9e16faf..29070c9a70f9 100644 --- a/arch/sparc/include/asm/atomic_64.h +++ b/arch/sparc/include/asm/atomic_64.h @@ -74,7 +74,11 @@ ATOMIC_OP(xor) #define atomic64_add_negative(i, v) (atomic64_add_return(i, v) < 0) #define atomic_cmpxchg(v, o, n) (cmpxchg(&((v)->counter), (o), (n))) -#define atomic_xchg(v, new) (xchg(&((v)->counter), new)) + +static inline int atomic_xchg(atomic_t *v, int new) +{ + return xchg(&v->counter, new); +} static inline int __atomic_add_unless(atomic_t *v, int a, int u) { diff --git a/arch/sparc/include/asm/futex_64.h b/arch/sparc/include/asm/futex_64.h index 4e899b0dabf7..1cfd89d92208 100644 --- a/arch/sparc/include/asm/futex_64.h +++ b/arch/sparc/include/asm/futex_64.h @@ -29,22 +29,14 @@ : "r" (uaddr), "r" (oparg), "i" (-EFAULT) \ : "memory") -static inline int futex_atomic_op_inuser(int encoded_op, u32 __user *uaddr) +static inline int arch_futex_atomic_op_inuser(int op, int oparg, int *oval, + u32 __user *uaddr) { - int op = (encoded_op >> 28) & 7; - int cmp = (encoded_op >> 24) & 15; - int oparg = (encoded_op << 8) >> 20; - int cmparg = (encoded_op << 20) >> 20; int oldval = 0, ret, tem; - if (unlikely(!access_ok(VERIFY_WRITE, uaddr, sizeof(u32)))) - return -EFAULT; if (unlikely((((unsigned long) uaddr) & 0x3UL))) return -EINVAL; - if (encoded_op & (FUTEX_OP_OPARG_SHIFT << 28)) - oparg = 1 << oparg; - pagefault_disable(); switch (op) { @@ -69,17 +61,9 @@ static inline int futex_atomic_op_inuser(int encoded_op, u32 __user *uaddr) pagefault_enable(); - if (!ret) { - switch (cmp) { - case FUTEX_OP_CMP_EQ: ret = (oldval == cmparg); break; - case FUTEX_OP_CMP_NE: ret = (oldval != cmparg); break; - case FUTEX_OP_CMP_LT: ret = (oldval < cmparg); break; - case FUTEX_OP_CMP_GE: ret = (oldval >= cmparg); break; - case FUTEX_OP_CMP_LE: ret = (oldval <= cmparg); break; - case FUTEX_OP_CMP_GT: ret = (oldval > cmparg); break; - default: ret = -ENOSYS; - } - } + if (!ret) + *oval = oldval; + return ret; } diff --git a/arch/sparc/include/asm/mmu_64.h b/arch/sparc/include/asm/mmu_64.h index 70067ce184b1..83b36a5371ff 100644 --- a/arch/sparc/include/asm/mmu_64.h +++ b/arch/sparc/include/asm/mmu_64.h @@ -52,7 +52,7 @@ #define CTX_NR_MASK TAG_CONTEXT_BITS #define CTX_HW_MASK (CTX_NR_MASK | CTX_PGSZ_MASK) -#define CTX_FIRST_VERSION ((_AC(1,UL) << CTX_VERSION_SHIFT) + _AC(1,UL)) +#define CTX_FIRST_VERSION BIT(CTX_VERSION_SHIFT) #define CTX_VALID(__ctx) \ (!(((__ctx.sparc64_ctx_val) ^ tlb_context_cache) & CTX_VERSION_MASK)) #define CTX_HWBITS(__ctx) ((__ctx.sparc64_ctx_val) & CTX_HW_MASK) @@ -92,7 +92,8 @@ struct tsb_config { typedef struct { spinlock_t lock; unsigned long sparc64_ctx_val; - unsigned long huge_pte_count; + unsigned long hugetlb_pte_count; + unsigned long thp_pte_count; struct tsb_config tsb_block[MM_NUM_TSBS]; struct hv_tsb_descr tsb_descr[MM_NUM_TSBS]; } mm_context_t; diff --git a/arch/sparc/include/asm/mmu_context_64.h b/arch/sparc/include/asm/mmu_context_64.h index b84be675e507..0cdeb2b483a0 100644 --- a/arch/sparc/include/asm/mmu_context_64.h +++ b/arch/sparc/include/asm/mmu_context_64.h @@ -17,22 +17,19 @@ extern spinlock_t ctx_alloc_lock; extern unsigned long tlb_context_cache; extern unsigned long mmu_context_bmap[]; +DECLARE_PER_CPU(struct mm_struct *, per_cpu_secondary_mm); void get_new_mmu_context(struct mm_struct *mm); -#ifdef CONFIG_SMP -void smp_new_mmu_context_version(void); -#else -#define smp_new_mmu_context_version() do { } while (0) -#endif - int init_new_context(struct task_struct *tsk, struct mm_struct *mm); void destroy_context(struct mm_struct *mm); void __tsb_context_switch(unsigned long pgd_pa, struct tsb_config *tsb_base, struct tsb_config *tsb_huge, - unsigned long tsb_descr_pa); + unsigned long tsb_descr_pa, + unsigned long secondary_ctx); -static inline void tsb_context_switch(struct mm_struct *mm) +static inline void tsb_context_switch_ctx(struct mm_struct *mm, + unsigned long ctx) { __tsb_context_switch(__pa(mm->pgd), &mm->context.tsb_block[0], @@ -43,9 +40,12 @@ static inline void tsb_context_switch(struct mm_struct *mm) #else NULL #endif - , __pa(&mm->context.tsb_descr[0])); + , __pa(&mm->context.tsb_descr[0]), + ctx); } +#define tsb_context_switch(X) tsb_context_switch_ctx(X, 0) + void tsb_grow(struct mm_struct *mm, unsigned long tsb_index, unsigned long mm_rss); @@ -74,8 +74,9 @@ void __flush_tlb_mm(unsigned long, unsigned long); static inline void switch_mm(struct mm_struct *old_mm, struct mm_struct *mm, struct task_struct *tsk) { unsigned long ctx_valid, flags; - int cpu; + int cpu = smp_processor_id(); + per_cpu(per_cpu_secondary_mm, cpu) = mm; if (unlikely(mm == &init_mm)) return; @@ -114,14 +115,12 @@ static inline void switch_mm(struct mm_struct *old_mm, struct mm_struct *mm, str * cpu0 to update it's TSB because at that point the cpu_vm_mask * only had cpu1 set in it. */ - load_secondary_context(mm); - tsb_context_switch(mm); + tsb_context_switch_ctx(mm, CTX_HWBITS(mm->context)); /* Any time a processor runs a context on an address space * for the first time, we must flush that context out of the * local TLB. */ - cpu = smp_processor_id(); if (!ctx_valid || !cpumask_test_cpu(cpu, mm_cpumask(mm))) { cpumask_set_cpu(cpu, mm_cpumask(mm)); __flush_tlb_mm(CTX_HWBITS(mm->context), @@ -131,26 +130,7 @@ static inline void switch_mm(struct mm_struct *old_mm, struct mm_struct *mm, str } #define deactivate_mm(tsk,mm) do { } while (0) - -/* Activate a new MM instance for the current task. */ -static inline void activate_mm(struct mm_struct *active_mm, struct mm_struct *mm) -{ - unsigned long flags; - int cpu; - - spin_lock_irqsave(&mm->context.lock, flags); - if (!CTX_VALID(mm->context)) - get_new_mmu_context(mm); - cpu = smp_processor_id(); - if (!cpumask_test_cpu(cpu, mm_cpumask(mm))) - cpumask_set_cpu(cpu, mm_cpumask(mm)); - - load_secondary_context(mm); - __flush_tlb_mm(CTX_HWBITS(mm->context), SECONDARY_CONTEXT); - tsb_context_switch(mm); - spin_unlock_irqrestore(&mm->context.lock, flags); -} - +#define activate_mm(active_mm, mm) switch_mm(active_mm, mm, NULL) #endif /* !(__ASSEMBLY__) */ #endif /* !(__SPARC64_MMU_CONTEXT_H) */ diff --git a/arch/sparc/include/asm/page_64.h b/arch/sparc/include/asm/page_64.h index 8c2a8c937540..c1263fc390db 100644 --- a/arch/sparc/include/asm/page_64.h +++ b/arch/sparc/include/asm/page_64.h @@ -25,6 +25,7 @@ #define HPAGE_MASK (~(HPAGE_SIZE - 1UL)) #define HUGETLB_PAGE_ORDER (HPAGE_SHIFT - PAGE_SHIFT) #define HAVE_ARCH_HUGETLB_UNMAPPED_AREA +#define REAL_HPAGE_PER_HPAGE (_AC(1,UL) << (HPAGE_SHIFT - REAL_HPAGE_SHIFT)) #endif #ifndef __ASSEMBLY__ diff --git a/arch/sparc/include/asm/pgtable_32.h b/arch/sparc/include/asm/pgtable_32.h index 91b963a887b7..29c3b400f949 100644 --- a/arch/sparc/include/asm/pgtable_32.h +++ b/arch/sparc/include/asm/pgtable_32.h @@ -91,9 +91,9 @@ extern unsigned long pfn_base; * ZERO_PAGE is a global shared page that is always zero: used * for zero-mapped memory areas etc.. */ -extern unsigned long empty_zero_page; +extern unsigned long empty_zero_page[PAGE_SIZE / sizeof(unsigned long)]; -#define ZERO_PAGE(vaddr) (virt_to_page(&empty_zero_page)) +#define ZERO_PAGE(vaddr) (virt_to_page(empty_zero_page)) /* * In general all page table modifications should use the V8 atomic diff --git a/arch/sparc/include/asm/pgtable_64.h b/arch/sparc/include/asm/pgtable_64.h index 408b715c95a5..9d81579f3d54 100644 --- a/arch/sparc/include/asm/pgtable_64.h +++ b/arch/sparc/include/asm/pgtable_64.h @@ -668,26 +668,27 @@ static inline unsigned long pmd_pfn(pmd_t pmd) return pte_pfn(pte); } -#ifdef CONFIG_TRANSPARENT_HUGEPAGE -static inline unsigned long pmd_dirty(pmd_t pmd) +#define __HAVE_ARCH_PMD_WRITE +static inline unsigned long pmd_write(pmd_t pmd) { pte_t pte = __pte(pmd_val(pmd)); - return pte_dirty(pte); + return pte_write(pte); } -static inline unsigned long pmd_young(pmd_t pmd) +#ifdef CONFIG_TRANSPARENT_HUGEPAGE +static inline unsigned long pmd_dirty(pmd_t pmd) { pte_t pte = __pte(pmd_val(pmd)); - return pte_young(pte); + return pte_dirty(pte); } -static inline unsigned long pmd_write(pmd_t pmd) +static inline unsigned long pmd_young(pmd_t pmd) { pte_t pte = __pte(pmd_val(pmd)); - return pte_write(pte); + return pte_young(pte); } static inline unsigned long pmd_trans_huge(pmd_t pmd) diff --git a/arch/sparc/include/asm/pil.h b/arch/sparc/include/asm/pil.h index 266937030546..522b43db2ed3 100644 --- a/arch/sparc/include/asm/pil.h +++ b/arch/sparc/include/asm/pil.h @@ -20,7 +20,6 @@ #define PIL_SMP_CALL_FUNC 1 #define PIL_SMP_RECEIVE_SIGNAL 2 #define PIL_SMP_CAPTURE 3 -#define PIL_SMP_CTX_NEW_VERSION 4 #define PIL_DEVICE_IRQ 5 #define PIL_SMP_CALL_FUNC_SNGL 6 #define PIL_DEFERRED_PCR_WORK 7 diff --git a/arch/sparc/include/asm/setup.h b/arch/sparc/include/asm/setup.h index 29d64b1758ed..3fae200dd251 100644 --- a/arch/sparc/include/asm/setup.h +++ b/arch/sparc/include/asm/setup.h @@ -16,7 +16,7 @@ extern char reboot_command[]; */ extern unsigned char boot_cpu_id; -extern unsigned long empty_zero_page; +extern unsigned long empty_zero_page[PAGE_SIZE / sizeof(unsigned long)]; extern int serial_console; static inline int con_is_present(void) @@ -59,8 +59,11 @@ extern atomic_t dcpage_flushes; extern atomic_t dcpage_flushes_xcall; extern int sysctl_tsb_ratio; -#endif +#ifdef CONFIG_SERIAL_SUNHV +void sunhv_migrate_hvcons_irq(int cpu); +#endif +#endif void sun_do_break(void); extern int stop_a_enabled; extern int scons_pwroff; diff --git a/arch/sparc/include/asm/trap_block.h b/arch/sparc/include/asm/trap_block.h index ec9c04de3664..ff05992dae7a 100644 --- a/arch/sparc/include/asm/trap_block.h +++ b/arch/sparc/include/asm/trap_block.h @@ -54,6 +54,7 @@ extern struct trap_per_cpu trap_block[NR_CPUS]; void init_cur_cpu_trap(struct thread_info *); void setup_tba(void); extern int ncpus_probed; +extern u64 cpu_mondo_counter[NR_CPUS]; unsigned long real_hard_smp_processor_id(void); diff --git a/arch/sparc/include/asm/uaccess_64.h b/arch/sparc/include/asm/uaccess_64.h index ea6e9a20f3ff..f428512481f9 100644 --- a/arch/sparc/include/asm/uaccess_64.h +++ b/arch/sparc/include/asm/uaccess_64.h @@ -98,7 +98,6 @@ struct exception_table_entry { unsigned int insn, fixup; }; -void __ret_efault(void); void __retl_efault(void); /* Uh, these should become the main single-value transfer routines.. @@ -179,20 +178,6 @@ int __put_user_bad(void); __gu_ret; \ }) -#define __get_user_nocheck_ret(data, addr, size, type, retval) ({ \ - register unsigned long __gu_val __asm__ ("l1"); \ - switch (size) { \ - case 1: __get_user_asm_ret(__gu_val, ub, addr, retval); break; \ - case 2: __get_user_asm_ret(__gu_val, uh, addr, retval); break; \ - case 4: __get_user_asm_ret(__gu_val, uw, addr, retval); break; \ - case 8: __get_user_asm_ret(__gu_val, x, addr, retval); break; \ - default: \ - if (__get_user_bad()) \ - return retval; \ - } \ - data = (__force type) __gu_val; \ -}) - #define __get_user_asm(x, size, addr, ret) \ __asm__ __volatile__( \ "/* Get user asm, inline. */\n" \ @@ -214,80 +199,35 @@ __asm__ __volatile__( \ : "=r" (ret), "=r" (x) : "r" (__m(addr)), \ "i" (-EFAULT)) -#define __get_user_asm_ret(x, size, addr, retval) \ -if (__builtin_constant_p(retval) && retval == -EFAULT) \ - __asm__ __volatile__( \ - "/* Get user asm ret, inline. */\n" \ - "1:\t" "ld"#size "a [%1] %%asi, %0\n\n\t" \ - ".section __ex_table,\"a\"\n\t" \ - ".align 4\n\t" \ - ".word 1b,__ret_efault\n\n\t" \ - ".previous\n\t" \ - : "=r" (x) : "r" (__m(addr))); \ -else \ - __asm__ __volatile__( \ - "/* Get user asm ret, inline. */\n" \ - "1:\t" "ld"#size "a [%1] %%asi, %0\n\n\t" \ - ".section .fixup,#alloc,#execinstr\n\t" \ - ".align 4\n" \ - "3:\n\t" \ - "ret\n\t" \ - " restore %%g0, %2, %%o0\n\n\t" \ - ".previous\n\t" \ - ".section __ex_table,\"a\"\n\t" \ - ".align 4\n\t" \ - ".word 1b, 3b\n\n\t" \ - ".previous\n\t" \ - : "=r" (x) : "r" (__m(addr)), "i" (retval)) - int __get_user_bad(void); unsigned long __must_check ___copy_from_user(void *to, const void __user *from, unsigned long size); -unsigned long copy_from_user_fixup(void *to, const void __user *from, - unsigned long size); static inline unsigned long __must_check copy_from_user(void *to, const void __user *from, unsigned long size) { - unsigned long ret = ___copy_from_user(to, from, size); - - if (unlikely(ret)) - ret = copy_from_user_fixup(to, from, size); - - return ret; + return ___copy_from_user(to, from, size); } #define __copy_from_user copy_from_user unsigned long __must_check ___copy_to_user(void __user *to, const void *from, unsigned long size); -unsigned long copy_to_user_fixup(void __user *to, const void *from, - unsigned long size); static inline unsigned long __must_check copy_to_user(void __user *to, const void *from, unsigned long size) { - unsigned long ret = ___copy_to_user(to, from, size); - - if (unlikely(ret)) - ret = copy_to_user_fixup(to, from, size); - return ret; + return ___copy_to_user(to, from, size); } #define __copy_to_user copy_to_user unsigned long __must_check ___copy_in_user(void __user *to, const void __user *from, unsigned long size); -unsigned long copy_in_user_fixup(void __user *to, void __user *from, - unsigned long size); static inline unsigned long __must_check copy_in_user(void __user *to, void __user *from, unsigned long size) { - unsigned long ret = ___copy_in_user(to, from, size); - - if (unlikely(ret)) - ret = copy_in_user_fixup(to, from, size); - return ret; + return ___copy_in_user(to, from, size); } #define __copy_in_user copy_in_user diff --git a/arch/sparc/include/asm/vio.h b/arch/sparc/include/asm/vio.h index 8174f6cdbbbb..9dca7a892978 100644 --- a/arch/sparc/include/asm/vio.h +++ b/arch/sparc/include/asm/vio.h @@ -327,6 +327,7 @@ struct vio_dev { int compat_len; u64 dev_no; + u64 id; unsigned long channel_id; diff --git a/arch/sparc/kernel/ds.c b/arch/sparc/kernel/ds.c index f87a55d77094..9b3f2e212b37 100644 --- a/arch/sparc/kernel/ds.c +++ b/arch/sparc/kernel/ds.c @@ -908,7 +908,7 @@ static int register_services(struct ds_info *dp) pbuf.req.handle = cp->handle; pbuf.req.major = 1; pbuf.req.minor = 0; - strcpy(pbuf.req.svc_id, cp->service_id); + strcpy(pbuf.id_buf, cp->service_id); err = __ds_send(lp, &pbuf, msg_len); if (err > 0) diff --git a/arch/sparc/kernel/dtlb_prot.S b/arch/sparc/kernel/dtlb_prot.S index d668ca149e64..4087a62f96b0 100644 --- a/arch/sparc/kernel/dtlb_prot.S +++ b/arch/sparc/kernel/dtlb_prot.S @@ -25,13 +25,13 @@ /* PROT ** ICACHE line 2: More real fault processing */ ldxa [%g4] ASI_DMMU, %g5 ! Put tagaccess in %g5 + srlx %g5, PAGE_SHIFT, %g5 + sllx %g5, PAGE_SHIFT, %g5 ! Clear context ID bits bgu,pn %xcc, winfix_trampoline ! Yes, perform winfixup mov FAULT_CODE_DTLB | FAULT_CODE_WRITE, %g4 ba,pt %xcc, sparc64_realfault_common ! Nope, normal fault nop nop - nop - nop /* PROT ** ICACHE line 3: Unused... */ nop diff --git a/arch/sparc/kernel/head_64.S b/arch/sparc/kernel/head_64.S index 51faf92ace00..7eeeb1d5a410 100644 --- a/arch/sparc/kernel/head_64.S +++ b/arch/sparc/kernel/head_64.S @@ -922,47 +922,11 @@ prom_tba: .xword 0 tlb_type: .word 0 /* Must NOT end up in BSS */ .section ".fixup",#alloc,#execinstr - .globl __ret_efault, __retl_efault, __ret_one, __retl_one -ENTRY(__ret_efault) - ret - restore %g0, -EFAULT, %o0 -ENDPROC(__ret_efault) - ENTRY(__retl_efault) retl mov -EFAULT, %o0 ENDPROC(__retl_efault) -ENTRY(__retl_one) - retl - mov 1, %o0 -ENDPROC(__retl_one) - -ENTRY(__retl_one_fp) - VISExitHalf - retl - mov 1, %o0 -ENDPROC(__retl_one_fp) - -ENTRY(__ret_one_asi) - wr %g0, ASI_AIUS, %asi - ret - restore %g0, 1, %o0 -ENDPROC(__ret_one_asi) - -ENTRY(__retl_one_asi) - wr %g0, ASI_AIUS, %asi - retl - mov 1, %o0 -ENDPROC(__retl_one_asi) - -ENTRY(__retl_one_asi_fp) - wr %g0, ASI_AIUS, %asi - VISExitHalf - retl - mov 1, %o0 -ENDPROC(__retl_one_asi_fp) - ENTRY(__retl_o1) retl mov %o1, %o0 diff --git a/arch/sparc/kernel/irq_64.c b/arch/sparc/kernel/irq_64.c index e22416ce56ea..bfbde8c4ffb2 100644 --- a/arch/sparc/kernel/irq_64.c +++ b/arch/sparc/kernel/irq_64.c @@ -1034,17 +1034,26 @@ static void __init init_cpu_send_mondo_info(struct trap_per_cpu *tb) { #ifdef CONFIG_SMP unsigned long page; + void *mondo, *p; - BUILD_BUG_ON((NR_CPUS * sizeof(u16)) > (PAGE_SIZE - 64)); + BUILD_BUG_ON((NR_CPUS * sizeof(u16)) > PAGE_SIZE); + + /* Make sure mondo block is 64byte aligned */ + p = kzalloc(127, GFP_KERNEL); + if (!p) { + prom_printf("SUN4V: Error, cannot allocate mondo block.\n"); + prom_halt(); + } + mondo = (void *)(((unsigned long)p + 63) & ~0x3f); + tb->cpu_mondo_block_pa = __pa(mondo); page = get_zeroed_page(GFP_KERNEL); if (!page) { - prom_printf("SUN4V: Error, cannot allocate cpu mondo page.\n"); + prom_printf("SUN4V: Error, cannot allocate cpu list page.\n"); prom_halt(); } - tb->cpu_mondo_block_pa = __pa(page); - tb->cpu_list_pa = __pa(page + 64); + tb->cpu_list_pa = __pa(page); #endif } diff --git a/arch/sparc/kernel/jump_label.c b/arch/sparc/kernel/jump_label.c index 59bbeff55024..07933b9e9ce0 100644 --- a/arch/sparc/kernel/jump_label.c +++ b/arch/sparc/kernel/jump_label.c @@ -13,19 +13,30 @@ void arch_jump_label_transform(struct jump_entry *entry, enum jump_label_type type) { - u32 val; u32 *insn = (u32 *) (unsigned long) entry->code; + u32 val; if (type == JUMP_LABEL_JMP) { s32 off = (s32)entry->target - (s32)entry->code; + bool use_v9_branch = false; + + BUG_ON(off & 3); #ifdef CONFIG_SPARC64 - /* ba,pt %xcc, . + (off << 2) */ - val = 0x10680000 | ((u32) off >> 2); -#else - /* ba . + (off << 2) */ - val = 0x10800000 | ((u32) off >> 2); + if (off <= 0xfffff && off >= -0x100000) + use_v9_branch = true; #endif + if (use_v9_branch) { + /* WDISP19 - target is . + immed << 2 */ + /* ba,pt %xcc, . + off */ + val = 0x10680000 | (((u32) off >> 2) & 0x7ffff); + } else { + /* WDISP22 - target is . + immed << 2 */ + BUG_ON(off > 0x7fffff); + BUG_ON(off < -0x800000); + /* ba . + off */ + val = 0x10800000 | (((u32) off >> 2) & 0x3fffff); + } } else { val = 0x01000000; } diff --git a/arch/sparc/kernel/kernel.h b/arch/sparc/kernel/kernel.h index e7f652be9e61..44f32dd4477f 100644 --- a/arch/sparc/kernel/kernel.h +++ b/arch/sparc/kernel/kernel.h @@ -37,7 +37,6 @@ void handle_stdfmna(struct pt_regs *regs, unsigned long sfar, unsigned long sfsr /* smp_64.c */ void __irq_entry smp_call_function_client(int irq, struct pt_regs *regs); void __irq_entry smp_call_function_single_client(int irq, struct pt_regs *regs); -void __irq_entry smp_new_mmu_context_version_client(int irq, struct pt_regs *regs); void __irq_entry smp_penguin_jailcell(int irq, struct pt_regs *regs); void __irq_entry smp_receive_signal_client(int irq, struct pt_regs *regs); diff --git a/arch/sparc/kernel/ktlb.S b/arch/sparc/kernel/ktlb.S index ef0d8e9e1210..f22bec0db645 100644 --- a/arch/sparc/kernel/ktlb.S +++ b/arch/sparc/kernel/ktlb.S @@ -20,6 +20,10 @@ kvmap_itlb: mov TLB_TAG_ACCESS, %g4 ldxa [%g4] ASI_IMMU, %g4 + /* The kernel executes in context zero, therefore we do not + * need to clear the context ID bits out of %g4 here. + */ + /* sun4v_itlb_miss branches here with the missing virtual * address already loaded into %g4 */ @@ -128,6 +132,10 @@ kvmap_dtlb: mov TLB_TAG_ACCESS, %g4 ldxa [%g4] ASI_DMMU, %g4 + /* The kernel executes in context zero, therefore we do not + * need to clear the context ID bits out of %g4 here. + */ + /* sun4v_dtlb_miss branches here with the missing virtual * address already loaded into %g4 */ @@ -251,6 +259,10 @@ kvmap_dtlb_longpath: nop .previous + /* The kernel executes in context zero, therefore we do not + * need to clear the context ID bits out of %g5 here. + */ + be,pt %xcc, sparc64_realfault_common mov FAULT_CODE_DTLB, %g4 ba,pt %xcc, winfix_trampoline diff --git a/arch/sparc/kernel/ldc.c b/arch/sparc/kernel/ldc.c index 59d503866431..9cc600b2d68c 100644 --- a/arch/sparc/kernel/ldc.c +++ b/arch/sparc/kernel/ldc.c @@ -1733,9 +1733,14 @@ static int read_nonraw(struct ldc_channel *lp, void *buf, unsigned int size) lp->rcv_nxt = p->seqid; + /* + * If this is a control-only packet, there is nothing + * else to do but advance the rx queue since the packet + * was already processed above. + */ if (!(p->type & LDC_DATA)) { new = rx_advance(lp, new); - goto no_data; + break; } if (p->stype & (LDC_ACK | LDC_NACK)) { err = data_ack_nack(lp, p); diff --git a/arch/sparc/kernel/pci.c b/arch/sparc/kernel/pci.c index 9f9614df9e1e..c2b202d763a1 100644 --- a/arch/sparc/kernel/pci.c +++ b/arch/sparc/kernel/pci.c @@ -245,6 +245,18 @@ static void pci_parse_of_addrs(struct platform_device *op, } } +static void pci_init_dev_archdata(struct dev_archdata *sd, void *iommu, + void *stc, void *host_controller, + struct platform_device *op, + int numa_node) +{ + sd->iommu = iommu; + sd->stc = stc; + sd->host_controller = host_controller; + sd->op = op; + sd->numa_node = numa_node; +} + static struct pci_dev *of_create_pci_dev(struct pci_pbm_info *pbm, struct device_node *node, struct pci_bus *bus, int devfn) @@ -259,13 +271,10 @@ static struct pci_dev *of_create_pci_dev(struct pci_pbm_info *pbm, if (!dev) return NULL; + op = of_find_device_by_node(node); sd = &dev->dev.archdata; - sd->iommu = pbm->iommu; - sd->stc = &pbm->stc; - sd->host_controller = pbm; - sd->op = op = of_find_device_by_node(node); - sd->numa_node = pbm->numa_node; - + pci_init_dev_archdata(sd, pbm->iommu, &pbm->stc, pbm, op, + pbm->numa_node); sd = &op->dev.archdata; sd->iommu = pbm->iommu; sd->stc = &pbm->stc; @@ -1003,9 +1012,13 @@ int pcibios_add_device(struct pci_dev *dev) * Copy dev_archdata from PF to VF */ if (dev->is_virtfn) { + struct dev_archdata *psd; + pdev = dev->physfn; - memcpy(&dev->dev.archdata, &pdev->dev.archdata, - sizeof(struct dev_archdata)); + psd = &pdev->dev.archdata; + pci_init_dev_archdata(&dev->dev.archdata, psd->iommu, + psd->stc, psd->host_controller, NULL, + psd->numa_node); } return 0; } diff --git a/arch/sparc/kernel/perf_event.c b/arch/sparc/kernel/perf_event.c index 6596f66ce112..a5d0c2f08110 100644 --- a/arch/sparc/kernel/perf_event.c +++ b/arch/sparc/kernel/perf_event.c @@ -926,6 +926,8 @@ static void read_in_all_counters(struct cpu_hw_events *cpuc) sparc_perf_event_update(cp, &cp->hw, cpuc->current_idx[i]); cpuc->current_idx[i] = PIC_NO_INDEX; + if (cp->hw.state & PERF_HES_STOPPED) + cp->hw.state |= PERF_HES_ARCH; } } } @@ -958,10 +960,12 @@ static void calculate_single_pcr(struct cpu_hw_events *cpuc) enc = perf_event_get_enc(cpuc->events[i]); cpuc->pcr[0] &= ~mask_for_index(idx); - if (hwc->state & PERF_HES_STOPPED) + if (hwc->state & PERF_HES_ARCH) { cpuc->pcr[0] |= nop_for_index(idx); - else + } else { cpuc->pcr[0] |= event_encoding(enc, idx); + hwc->state = 0; + } } out: cpuc->pcr[0] |= cpuc->event[0]->hw.config_base; @@ -987,6 +991,9 @@ static void calculate_multiple_pcrs(struct cpu_hw_events *cpuc) cpuc->current_idx[i] = idx; + if (cp->hw.state & PERF_HES_ARCH) + continue; + sparc_pmu_start(cp, PERF_EF_RELOAD); } out: @@ -1078,6 +1085,8 @@ static void sparc_pmu_start(struct perf_event *event, int flags) event->hw.state = 0; sparc_pmu_enable_event(cpuc, &event->hw, idx); + + perf_event_update_userpage(event); } static void sparc_pmu_stop(struct perf_event *event, int flags) @@ -1370,9 +1379,9 @@ static int sparc_pmu_add(struct perf_event *event, int ef_flags) cpuc->events[n0] = event->hw.event_base; cpuc->current_idx[n0] = PIC_NO_INDEX; - event->hw.state = PERF_HES_UPTODATE; + event->hw.state = PERF_HES_UPTODATE | PERF_HES_STOPPED; if (!(ef_flags & PERF_EF_START)) - event->hw.state |= PERF_HES_STOPPED; + event->hw.state |= PERF_HES_ARCH; /* * If group events scheduling transaction was started, diff --git a/arch/sparc/kernel/process_32.c b/arch/sparc/kernel/process_32.c index c5113c7ce2fd..b7780a5bef11 100644 --- a/arch/sparc/kernel/process_32.c +++ b/arch/sparc/kernel/process_32.c @@ -184,21 +184,21 @@ unsigned long thread_saved_pc(struct task_struct *tsk) /* * Free current thread data structures etc.. */ -void exit_thread(void) +void exit_thread(struct task_struct *tsk) { #ifndef CONFIG_SMP - if(last_task_used_math == current) { + if (last_task_used_math == tsk) { #else - if (test_thread_flag(TIF_USEDFPU)) { + if (test_ti_thread_flag(task_thread_info(tsk), TIF_USEDFPU)) { #endif /* Keep process from leaving FPU in a bogon state. */ put_psr(get_psr() | PSR_EF); - fpsave(¤t->thread.float_regs[0], ¤t->thread.fsr, - ¤t->thread.fpqueue[0], ¤t->thread.fpqdepth); + fpsave(&tsk->thread.float_regs[0], &tsk->thread.fsr, + &tsk->thread.fpqueue[0], &tsk->thread.fpqdepth); #ifndef CONFIG_SMP last_task_used_math = NULL; #else - clear_thread_flag(TIF_USEDFPU); + clear_ti_thread_flag(task_thread_info(tsk), TIF_USEDFPU); #endif } } diff --git a/arch/sparc/kernel/process_64.c b/arch/sparc/kernel/process_64.c index 46a59643bb1c..7c3fc5886038 100644 --- a/arch/sparc/kernel/process_64.c +++ b/arch/sparc/kernel/process_64.c @@ -417,9 +417,9 @@ unsigned long thread_saved_pc(struct task_struct *tsk) } /* Free current thread data structures etc.. */ -void exit_thread(void) +void exit_thread(struct task_struct *tsk) { - struct thread_info *t = current_thread_info(); + struct thread_info *t = task_thread_info(tsk); if (t->utraps) { if (t->utraps[0] < 2) diff --git a/arch/sparc/kernel/ptrace_64.c b/arch/sparc/kernel/ptrace_64.c index 9ddc4928a089..c1566170964f 100644 --- a/arch/sparc/kernel/ptrace_64.c +++ b/arch/sparc/kernel/ptrace_64.c @@ -311,7 +311,7 @@ static int genregs64_set(struct task_struct *target, } if (!ret) { - unsigned long y; + unsigned long y = regs->y; ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, &y, diff --git a/arch/sparc/kernel/signal_32.c b/arch/sparc/kernel/signal_32.c index c3c12efe0bc0..9c0c8fd0b292 100644 --- a/arch/sparc/kernel/signal_32.c +++ b/arch/sparc/kernel/signal_32.c @@ -89,7 +89,7 @@ asmlinkage void do_sigreturn(struct pt_regs *regs) sf = (struct signal_frame __user *) regs->u_regs[UREG_FP]; /* 1. Make sure we are not getting garbage from the user */ - if (!invalid_frame_pointer(sf, sizeof(*sf))) + if (invalid_frame_pointer(sf, sizeof(*sf))) goto segv_and_exit; if (get_user(ufp, &sf->info.si_regs.u_regs[UREG_FP])) @@ -150,7 +150,7 @@ asmlinkage void do_rt_sigreturn(struct pt_regs *regs) synchronize_user_stack(); sf = (struct rt_signal_frame __user *) regs->u_regs[UREG_FP]; - if (!invalid_frame_pointer(sf, sizeof(*sf))) + if (invalid_frame_pointer(sf, sizeof(*sf))) goto segv; if (get_user(ufp, &sf->regs.u_regs[UREG_FP])) diff --git a/arch/sparc/kernel/smp_64.c b/arch/sparc/kernel/smp_64.c index 19cd08d18672..46866b2097e8 100644 --- a/arch/sparc/kernel/smp_64.c +++ b/arch/sparc/kernel/smp_64.c @@ -617,22 +617,48 @@ retry: } } -/* Multi-cpu list version. */ +#define CPU_MONDO_COUNTER(cpuid) (cpu_mondo_counter[cpuid]) +#define MONDO_USEC_WAIT_MIN 2 +#define MONDO_USEC_WAIT_MAX 100 +#define MONDO_RETRY_LIMIT 500000 + +/* Multi-cpu list version. + * + * Deliver xcalls to 'cnt' number of cpus in 'cpu_list'. + * Sometimes not all cpus receive the mondo, requiring us to re-send + * the mondo until all cpus have received, or cpus are truly stuck + * unable to receive mondo, and we timeout. + * Occasionally a target cpu strand is borrowed briefly by hypervisor to + * perform guest service, such as PCIe error handling. Consider the + * service time, 1 second overall wait is reasonable for 1 cpu. + * Here two in-between mondo check wait time are defined: 2 usec for + * single cpu quick turn around and up to 100usec for large cpu count. + * Deliver mondo to large number of cpus could take longer, we adjusts + * the retry count as long as target cpus are making forward progress. + */ static void hypervisor_xcall_deliver(struct trap_per_cpu *tb, int cnt) { - int retries, this_cpu, prev_sent, i, saw_cpu_error; + int this_cpu, tot_cpus, prev_sent, i, rem; + int usec_wait, retries, tot_retries; + u16 first_cpu = 0xffff; + unsigned long xc_rcvd = 0; unsigned long status; + int ecpuerror_id = 0; + int enocpu_id = 0; u16 *cpu_list; + u16 cpu; this_cpu = smp_processor_id(); - cpu_list = __va(tb->cpu_list_pa); - - saw_cpu_error = 0; - retries = 0; + usec_wait = cnt * MONDO_USEC_WAIT_MIN; + if (usec_wait > MONDO_USEC_WAIT_MAX) + usec_wait = MONDO_USEC_WAIT_MAX; + retries = tot_retries = 0; + tot_cpus = cnt; prev_sent = 0; + do { - int forward_progress, n_sent; + int n_sent, mondo_delivered, target_cpu_busy; status = sun4v_cpu_mondo_send(cnt, tb->cpu_list_pa, @@ -640,94 +666,113 @@ static void hypervisor_xcall_deliver(struct trap_per_cpu *tb, int cnt) /* HV_EOK means all cpus received the xcall, we're done. */ if (likely(status == HV_EOK)) - break; + goto xcall_done; + + /* If not these non-fatal errors, panic */ + if (unlikely((status != HV_EWOULDBLOCK) && + (status != HV_ECPUERROR) && + (status != HV_ENOCPU))) + goto fatal_errors; /* First, see if we made any forward progress. * + * Go through the cpu_list, count the target cpus that have + * received our mondo (n_sent), and those that did not (rem). + * Re-pack cpu_list with the cpus remain to be retried in the + * front - this simplifies tracking the truly stalled cpus. + * * The hypervisor indicates successful sends by setting * cpu list entries to the value 0xffff. + * + * EWOULDBLOCK means some target cpus did not receive the + * mondo and retry usually helps. + * + * ECPUERROR means at least one target cpu is in error state, + * it's usually safe to skip the faulty cpu and retry. + * + * ENOCPU means one of the target cpu doesn't belong to the + * domain, perhaps offlined which is unexpected, but not + * fatal and it's okay to skip the offlined cpu. */ + rem = 0; n_sent = 0; for (i = 0; i < cnt; i++) { - if (likely(cpu_list[i] == 0xffff)) + cpu = cpu_list[i]; + if (likely(cpu == 0xffff)) { n_sent++; + } else if ((status == HV_ECPUERROR) && + (sun4v_cpu_state(cpu) == HV_CPU_STATE_ERROR)) { + ecpuerror_id = cpu + 1; + } else if (status == HV_ENOCPU && !cpu_online(cpu)) { + enocpu_id = cpu + 1; + } else { + cpu_list[rem++] = cpu; + } } - forward_progress = 0; - if (n_sent > prev_sent) - forward_progress = 1; + /* No cpu remained, we're done. */ + if (rem == 0) + break; - prev_sent = n_sent; + /* Otherwise, update the cpu count for retry. */ + cnt = rem; - /* If we get a HV_ECPUERROR, then one or more of the cpus - * in the list are in error state. Use the cpu_state() - * hypervisor call to find out which cpus are in error state. + /* Record the overall number of mondos received by the + * first of the remaining cpus. */ - if (unlikely(status == HV_ECPUERROR)) { - for (i = 0; i < cnt; i++) { - long err; - u16 cpu; + if (first_cpu != cpu_list[0]) { + first_cpu = cpu_list[0]; + xc_rcvd = CPU_MONDO_COUNTER(first_cpu); + } - cpu = cpu_list[i]; - if (cpu == 0xffff) - continue; + /* Was any mondo delivered successfully? */ + mondo_delivered = (n_sent > prev_sent); + prev_sent = n_sent; - err = sun4v_cpu_state(cpu); - if (err == HV_CPU_STATE_ERROR) { - saw_cpu_error = (cpu + 1); - cpu_list[i] = 0xffff; - } - } - } else if (unlikely(status != HV_EWOULDBLOCK)) - goto fatal_mondo_error; + /* or, was any target cpu busy processing other mondos? */ + target_cpu_busy = (xc_rcvd < CPU_MONDO_COUNTER(first_cpu)); + xc_rcvd = CPU_MONDO_COUNTER(first_cpu); - /* Don't bother rewriting the CPU list, just leave the - * 0xffff and non-0xffff entries in there and the - * hypervisor will do the right thing. - * - * Only advance timeout state if we didn't make any - * forward progress. + /* Retry count is for no progress. If we're making progress, + * reset the retry count. */ - if (unlikely(!forward_progress)) { - if (unlikely(++retries > 10000)) - goto fatal_mondo_timeout; - - /* Delay a little bit to let other cpus catch up - * on their cpu mondo queue work. - */ - udelay(2 * cnt); + if (likely(mondo_delivered || target_cpu_busy)) { + tot_retries += retries; + retries = 0; + } else if (unlikely(retries > MONDO_RETRY_LIMIT)) { + goto fatal_mondo_timeout; } - } while (1); - if (unlikely(saw_cpu_error)) - goto fatal_mondo_cpu_error; + /* Delay a little bit to let other cpus catch up on + * their cpu mondo queue work. + */ + if (!mondo_delivered) + udelay(usec_wait); - return; + retries++; + } while (1); -fatal_mondo_cpu_error: - printk(KERN_CRIT "CPU[%d]: SUN4V mondo cpu error, some target cpus " - "(including %d) were in error state\n", - this_cpu, saw_cpu_error - 1); +xcall_done: + if (unlikely(ecpuerror_id > 0)) { + pr_crit("CPU[%d]: SUN4V mondo cpu error, target cpu(%d) was in error state\n", + this_cpu, ecpuerror_id - 1); + } else if (unlikely(enocpu_id > 0)) { + pr_crit("CPU[%d]: SUN4V mondo cpu error, target cpu(%d) does not belong to the domain\n", + this_cpu, enocpu_id - 1); + } return; +fatal_errors: + /* fatal errors include bad alignment, etc */ + pr_crit("CPU[%d]: Args were cnt(%d) cpulist_pa(%lx) mondo_block_pa(%lx)\n", + this_cpu, tot_cpus, tb->cpu_list_pa, tb->cpu_mondo_block_pa); + panic("Unexpected SUN4V mondo error %lu\n", status); + fatal_mondo_timeout: - printk(KERN_CRIT "CPU[%d]: SUN4V mondo timeout, no forward " - " progress after %d retries.\n", - this_cpu, retries); - goto dump_cpu_list_and_out; - -fatal_mondo_error: - printk(KERN_CRIT "CPU[%d]: Unexpected SUN4V mondo error %lu\n", - this_cpu, status); - printk(KERN_CRIT "CPU[%d]: Args were cnt(%d) cpulist_pa(%lx) " - "mondo_block_pa(%lx)\n", - this_cpu, cnt, tb->cpu_list_pa, tb->cpu_mondo_block_pa); - -dump_cpu_list_and_out: - printk(KERN_CRIT "CPU[%d]: CPU list [ ", this_cpu); - for (i = 0; i < cnt; i++) - printk("%u ", cpu_list[i]); - printk("]\n"); + /* some cpus being non-responsive to the cpu mondo */ + pr_crit("CPU[%d]: SUN4V mondo timeout, cpu(%d) made no forward progress after %d retries. Total target cpus(%d).\n", + this_cpu, first_cpu, (tot_retries + retries), tot_cpus); + panic("SUN4V mondo timeout panic\n"); } static void (*xcall_deliver_impl)(struct trap_per_cpu *, int); @@ -959,37 +1004,6 @@ void flush_dcache_page_all(struct mm_struct *mm, struct page *page) preempt_enable(); } -void __irq_entry smp_new_mmu_context_version_client(int irq, struct pt_regs *regs) -{ - struct mm_struct *mm; - unsigned long flags; - - clear_softint(1 << irq); - - /* See if we need to allocate a new TLB context because - * the version of the one we are using is now out of date. - */ - mm = current->active_mm; - if (unlikely(!mm || (mm == &init_mm))) - return; - - spin_lock_irqsave(&mm->context.lock, flags); - - if (unlikely(!CTX_VALID(mm->context))) - get_new_mmu_context(mm); - - spin_unlock_irqrestore(&mm->context.lock, flags); - - load_secondary_context(mm); - __flush_tlb_mm(CTX_HWBITS(mm->context), - SECONDARY_CONTEXT); -} - -void smp_new_mmu_context_version(void) -{ - smp_cross_call(&xcall_new_mmu_context_version, 0, 0, 0); -} - #ifdef CONFIG_KGDB void kgdb_roundup_cpus(unsigned long flags) { @@ -1429,8 +1443,12 @@ void smp_send_stop(void) int cpu; if (tlb_type == hypervisor) { + int this_cpu = smp_processor_id(); +#ifdef CONFIG_SERIAL_SUNHV + sunhv_migrate_hvcons_irq(this_cpu); +#endif for_each_online_cpu(cpu) { - if (cpu == smp_processor_id()) + if (cpu == this_cpu) continue; #ifdef CONFIG_SUN_LDOMS if (ldom_domaining_enabled) { diff --git a/arch/sparc/kernel/sparc_ksyms_64.c b/arch/sparc/kernel/sparc_ksyms_64.c index a92d5d2c46a3..51b25325a961 100644 --- a/arch/sparc/kernel/sparc_ksyms_64.c +++ b/arch/sparc/kernel/sparc_ksyms_64.c @@ -27,7 +27,6 @@ EXPORT_SYMBOL(__flushw_user); EXPORT_SYMBOL_GPL(real_hard_smp_processor_id); /* from head_64.S */ -EXPORT_SYMBOL(__ret_efault); EXPORT_SYMBOL(tlb_type); EXPORT_SYMBOL(sun4v_chip_type); EXPORT_SYMBOL(prom_root_node); diff --git a/arch/sparc/kernel/sun4v_ivec.S b/arch/sparc/kernel/sun4v_ivec.S index 559bc5e9c199..34631995859a 100644 --- a/arch/sparc/kernel/sun4v_ivec.S +++ b/arch/sparc/kernel/sun4v_ivec.S @@ -26,6 +26,21 @@ sun4v_cpu_mondo: ldxa [%g0] ASI_SCRATCHPAD, %g4 sub %g4, TRAP_PER_CPU_FAULT_INFO, %g4 + /* Get smp_processor_id() into %g3 */ + sethi %hi(trap_block), %g5 + or %g5, %lo(trap_block), %g5 + sub %g4, %g5, %g3 + srlx %g3, TRAP_BLOCK_SZ_SHIFT, %g3 + + /* Increment cpu_mondo_counter[smp_processor_id()] */ + sethi %hi(cpu_mondo_counter), %g5 + or %g5, %lo(cpu_mondo_counter), %g5 + sllx %g3, 3, %g3 + add %g5, %g3, %g5 + ldx [%g5], %g3 + add %g3, 1, %g3 + stx %g3, [%g5] + /* Get CPU mondo queue base phys address into %g7. */ ldx [%g4 + TRAP_PER_CPU_CPU_MONDO_PA], %g7 diff --git a/arch/sparc/kernel/sys_sparc_32.c b/arch/sparc/kernel/sys_sparc_32.c index 646988d4c1a3..740f43b9b541 100644 --- a/arch/sparc/kernel/sys_sparc_32.c +++ b/arch/sparc/kernel/sys_sparc_32.c @@ -201,23 +201,27 @@ SYSCALL_DEFINE5(rt_sigaction, int, sig, asmlinkage long sys_getdomainname(char __user *name, int len) { - int nlen, err; - + int nlen, err; + char tmp[__NEW_UTS_LEN + 1]; + if (len < 0) return -EINVAL; - down_read(&uts_sem); - + down_read(&uts_sem); + nlen = strlen(utsname()->domainname) + 1; err = -EINVAL; if (nlen > len) - goto out; + goto out_unlock; + memcpy(tmp, utsname()->domainname, nlen); - err = -EFAULT; - if (!copy_to_user(name, utsname()->domainname, nlen)) - err = 0; + up_read(&uts_sem); -out: + if (copy_to_user(name, tmp, nlen)) + return -EFAULT; + return 0; + +out_unlock: up_read(&uts_sem); return err; } diff --git a/arch/sparc/kernel/sys_sparc_64.c b/arch/sparc/kernel/sys_sparc_64.c index c690c8e16a96..7301fa2091bc 100644 --- a/arch/sparc/kernel/sys_sparc_64.c +++ b/arch/sparc/kernel/sys_sparc_64.c @@ -118,7 +118,7 @@ unsigned long arch_get_unmapped_area(struct file *filp, unsigned long addr, unsi vma = find_vma(mm, addr); if (task_size - len >= addr && - (!vma || addr + len <= vma->vm_start)) + (!vma || addr + len <= vm_start_gap(vma))) return addr; } @@ -181,7 +181,7 @@ arch_get_unmapped_area_topdown(struct file *filp, const unsigned long addr0, vma = find_vma(mm, addr); if (task_size - len >= addr && - (!vma || addr + len <= vma->vm_start)) + (!vma || addr + len <= vm_start_gap(vma))) return addr; } @@ -264,7 +264,7 @@ static unsigned long mmap_rnd(void) unsigned long rnd = 0UL; if (current->flags & PF_RANDOMIZE) { - unsigned long val = get_random_int(); + unsigned long val = get_random_long(); if (test_thread_flag(TIF_32BIT)) rnd = (val % (1UL << (23UL-PAGE_SHIFT))); else @@ -524,23 +524,27 @@ extern void check_pending(int signum); SYSCALL_DEFINE2(getdomainname, char __user *, name, int, len) { - int nlen, err; + int nlen, err; + char tmp[__NEW_UTS_LEN + 1]; if (len < 0) return -EINVAL; - down_read(&uts_sem); - + down_read(&uts_sem); + nlen = strlen(utsname()->domainname) + 1; err = -EINVAL; if (nlen > len) - goto out; + goto out_unlock; + memcpy(tmp, utsname()->domainname, nlen); + + up_read(&uts_sem); - err = -EFAULT; - if (!copy_to_user(name, utsname()->domainname, nlen)) - err = 0; + if (copy_to_user(name, tmp, nlen)) + return -EFAULT; + return 0; -out: +out_unlock: up_read(&uts_sem); return err; } diff --git a/arch/sparc/kernel/traps_64.c b/arch/sparc/kernel/traps_64.c index d21cd625c0de..d883c5951e8b 100644 --- a/arch/sparc/kernel/traps_64.c +++ b/arch/sparc/kernel/traps_64.c @@ -85,7 +85,7 @@ static void dump_tl1_traplog(struct tl1_traplog *p) void bad_trap(struct pt_regs *regs, long lvl) { - char buffer[32]; + char buffer[36]; siginfo_t info; if (notify_die(DIE_TRAP, "bad trap", regs, @@ -116,7 +116,7 @@ void bad_trap(struct pt_regs *regs, long lvl) void bad_trap_tl1(struct pt_regs *regs, long lvl) { - char buffer[32]; + char buffer[36]; if (notify_die(DIE_TRAP_TL1, "bad trap tl1", regs, 0, lvl, SIGTRAP) == NOTIFY_STOP) @@ -2659,6 +2659,7 @@ void do_getpsr(struct pt_regs *regs) } } +u64 cpu_mondo_counter[NR_CPUS] = {0}; struct trap_per_cpu trap_block[NR_CPUS]; EXPORT_SYMBOL(trap_block); diff --git a/arch/sparc/kernel/tsb.S b/arch/sparc/kernel/tsb.S index be98685c14c6..7d961f6e3907 100644 --- a/arch/sparc/kernel/tsb.S +++ b/arch/sparc/kernel/tsb.S @@ -29,13 +29,17 @@ */ tsb_miss_dtlb: mov TLB_TAG_ACCESS, %g4 + ldxa [%g4] ASI_DMMU, %g4 + srlx %g4, PAGE_SHIFT, %g4 ba,pt %xcc, tsb_miss_page_table_walk - ldxa [%g4] ASI_DMMU, %g4 + sllx %g4, PAGE_SHIFT, %g4 tsb_miss_itlb: mov TLB_TAG_ACCESS, %g4 + ldxa [%g4] ASI_IMMU, %g4 + srlx %g4, PAGE_SHIFT, %g4 ba,pt %xcc, tsb_miss_page_table_walk - ldxa [%g4] ASI_IMMU, %g4 + sllx %g4, PAGE_SHIFT, %g4 /* At this point we have: * %g1 -- PAGE_SIZE TSB entry address @@ -284,6 +288,10 @@ tsb_do_dtlb_fault: nop .previous + /* Clear context ID bits. */ + srlx %g5, PAGE_SHIFT, %g5 + sllx %g5, PAGE_SHIFT, %g5 + be,pt %xcc, sparc64_realfault_common mov FAULT_CODE_DTLB, %g4 ba,pt %xcc, winfix_trampoline @@ -367,6 +375,7 @@ tsb_flush: * %o1: TSB base config pointer * %o2: TSB huge config pointer, or NULL if none * %o3: Hypervisor TSB descriptor physical address + * %o4: Secondary context to load, if non-zero * * We have to run this whole thing with interrupts * disabled so that the current cpu doesn't change @@ -379,6 +388,17 @@ __tsb_context_switch: rdpr %pstate, %g1 wrpr %g1, PSTATE_IE, %pstate + brz,pn %o4, 1f + mov SECONDARY_CONTEXT, %o5 + +661: stxa %o4, [%o5] ASI_DMMU + .section .sun4v_1insn_patch, "ax" + .word 661b + stxa %o4, [%o5] ASI_MMU + .previous + flush %g6 + +1: TRAP_LOAD_TRAP_BLOCK(%g2, %g3) stx %o0, [%g2 + TRAP_PER_CPU_PGD_PADDR] @@ -462,13 +482,16 @@ __tsb_context_switch: .type copy_tsb,#function copy_tsb: /* %o0=old_tsb_base, %o1=old_tsb_size * %o2=new_tsb_base, %o3=new_tsb_size + * %o4=page_size_shift */ sethi %uhi(TSB_PASS_BITS), %g7 srlx %o3, 4, %o3 - add %o0, %o1, %g1 /* end of old tsb */ + add %o0, %o1, %o1 /* end of old tsb */ sllx %g7, 32, %g7 sub %o3, 1, %o3 /* %o3 == new tsb hash mask */ + mov %o4, %g1 /* page_size_shift */ + 661: prefetcha [%o0] ASI_N, #one_read .section .tsb_phys_patch, "ax" .word 661b @@ -493,9 +516,9 @@ copy_tsb: /* %o0=old_tsb_base, %o1=old_tsb_size /* This can definitely be computed faster... */ srlx %o0, 4, %o5 /* Build index */ and %o5, 511, %o5 /* Mask index */ - sllx %o5, PAGE_SHIFT, %o5 /* Put into vaddr position */ + sllx %o5, %g1, %o5 /* Put into vaddr position */ or %o4, %o5, %o4 /* Full VADDR. */ - srlx %o4, PAGE_SHIFT, %o4 /* Shift down to create index */ + srlx %o4, %g1, %o4 /* Shift down to create index */ and %o4, %o3, %o4 /* Mask with new_tsb_nents-1 */ sllx %o4, 4, %o4 /* Shift back up into tsb ent offset */ TSB_STORE(%o2 + %o4, %g2) /* Store TAG */ @@ -503,7 +526,7 @@ copy_tsb: /* %o0=old_tsb_base, %o1=old_tsb_size TSB_STORE(%o2 + %o4, %g3) /* Store TTE */ 80: add %o0, 16, %o0 - cmp %o0, %g1 + cmp %o0, %o1 bne,pt %xcc, 90b nop diff --git a/arch/sparc/kernel/ttable_64.S b/arch/sparc/kernel/ttable_64.S index c6dfdaa29e20..170ead662f2a 100644 --- a/arch/sparc/kernel/ttable_64.S +++ b/arch/sparc/kernel/ttable_64.S @@ -50,7 +50,7 @@ tl0_resv03e: BTRAP(0x3e) BTRAP(0x3f) BTRAP(0x40) tl0_irq1: TRAP_IRQ(smp_call_function_client, 1) tl0_irq2: TRAP_IRQ(smp_receive_signal_client, 2) tl0_irq3: TRAP_IRQ(smp_penguin_jailcell, 3) -tl0_irq4: TRAP_IRQ(smp_new_mmu_context_version_client, 4) +tl0_irq4: BTRAP(0x44) #else tl0_irq1: BTRAP(0x41) tl0_irq2: BTRAP(0x42) diff --git a/arch/sparc/kernel/vio.c b/arch/sparc/kernel/vio.c index cb5789c9f961..34824ca396f0 100644 --- a/arch/sparc/kernel/vio.c +++ b/arch/sparc/kernel/vio.c @@ -284,13 +284,16 @@ static struct vio_dev *vio_create_one(struct mdesc_handle *hp, u64 mp, if (!id) { dev_set_name(&vdev->dev, "%s", bus_id_name); vdev->dev_no = ~(u64)0; + vdev->id = ~(u64)0; } else if (!cfg_handle) { dev_set_name(&vdev->dev, "%s-%llu", bus_id_name, *id); vdev->dev_no = *id; + vdev->id = ~(u64)0; } else { dev_set_name(&vdev->dev, "%s-%llu-%llu", bus_id_name, *cfg_handle, *id); vdev->dev_no = *cfg_handle; + vdev->id = *id; } vdev->dev.parent = parent; @@ -333,27 +336,84 @@ static void vio_add(struct mdesc_handle *hp, u64 node) (void) vio_create_one(hp, node, &root_vdev->dev); } +struct vio_md_node_query { + const char *type; + u64 dev_no; + u64 id; +}; + static int vio_md_node_match(struct device *dev, void *arg) { + struct vio_md_node_query *query = (struct vio_md_node_query *) arg; struct vio_dev *vdev = to_vio_dev(dev); - if (vdev->mp == (u64) arg) - return 1; + if (vdev->dev_no != query->dev_no) + return 0; + if (vdev->id != query->id) + return 0; + if (strcmp(vdev->type, query->type)) + return 0; - return 0; + return 1; } static void vio_remove(struct mdesc_handle *hp, u64 node) { + const char *type; + const u64 *id, *cfg_handle; + u64 a; + struct vio_md_node_query query; struct device *dev; - dev = device_find_child(&root_vdev->dev, (void *) node, + type = mdesc_get_property(hp, node, "device-type", NULL); + if (!type) { + type = mdesc_get_property(hp, node, "name", NULL); + if (!type) + type = mdesc_node_name(hp, node); + } + + query.type = type; + + id = mdesc_get_property(hp, node, "id", NULL); + cfg_handle = NULL; + mdesc_for_each_arc(a, hp, node, MDESC_ARC_TYPE_BACK) { + u64 target; + + target = mdesc_arc_target(hp, a); + cfg_handle = mdesc_get_property(hp, target, + "cfg-handle", NULL); + if (cfg_handle) + break; + } + + if (!id) { + query.dev_no = ~(u64)0; + query.id = ~(u64)0; + } else if (!cfg_handle) { + query.dev_no = *id; + query.id = ~(u64)0; + } else { + query.dev_no = *cfg_handle; + query.id = *id; + } + + dev = device_find_child(&root_vdev->dev, &query, vio_md_node_match); if (dev) { printk(KERN_INFO "VIO: Removing device %s\n", dev_name(dev)); device_unregister(dev); put_device(dev); + } else { + if (!id) + printk(KERN_ERR "VIO: Removed unknown %s node.\n", + type); + else if (!cfg_handle) + printk(KERN_ERR "VIO: Removed unknown %s node %llu.\n", + type, *id); + else + printk(KERN_ERR "VIO: Removed unknown %s node %llu-%llu.\n", + type, *cfg_handle, *id); } } diff --git a/arch/sparc/kernel/vmlinux.lds.S b/arch/sparc/kernel/vmlinux.lds.S index 4a41d412dd3d..7d02b1fef025 100644 --- a/arch/sparc/kernel/vmlinux.lds.S +++ b/arch/sparc/kernel/vmlinux.lds.S @@ -52,6 +52,7 @@ SECTIONS LOCK_TEXT KPROBES_TEXT IRQENTRY_TEXT + SOFTIRQENTRY_TEXT *(.gnu.warning) } = 0 _etext = .; diff --git a/arch/sparc/lib/GENcopy_from_user.S b/arch/sparc/lib/GENcopy_from_user.S index b7d0bd6b1406..69a439fa2fc1 100644 --- a/arch/sparc/lib/GENcopy_from_user.S +++ b/arch/sparc/lib/GENcopy_from_user.S @@ -3,11 +3,11 @@ * Copyright (C) 2007 David S. Miller (davem@davemloft.net) */ -#define EX_LD(x) \ +#define EX_LD(x,y) \ 98: x; \ .section __ex_table,"a";\ .align 4; \ - .word 98b, __retl_one; \ + .word 98b, y; \ .text; \ .align 4; diff --git a/arch/sparc/lib/GENcopy_to_user.S b/arch/sparc/lib/GENcopy_to_user.S index 780550e1afc7..9947427ce354 100644 --- a/arch/sparc/lib/GENcopy_to_user.S +++ b/arch/sparc/lib/GENcopy_to_user.S @@ -3,11 +3,11 @@ * Copyright (C) 2007 David S. Miller (davem@davemloft.net) */ -#define EX_ST(x) \ +#define EX_ST(x,y) \ 98: x; \ .section __ex_table,"a";\ .align 4; \ - .word 98b, __retl_one; \ + .word 98b, y; \ .text; \ .align 4; diff --git a/arch/sparc/lib/GENmemcpy.S b/arch/sparc/lib/GENmemcpy.S index 89358ee94851..059ea24ad73d 100644 --- a/arch/sparc/lib/GENmemcpy.S +++ b/arch/sparc/lib/GENmemcpy.S @@ -4,21 +4,18 @@ */ #ifdef __KERNEL__ +#include <linux/linkage.h> #define GLOBAL_SPARE %g7 #else #define GLOBAL_SPARE %g5 #endif #ifndef EX_LD -#define EX_LD(x) x +#define EX_LD(x,y) x #endif #ifndef EX_ST -#define EX_ST(x) x -#endif - -#ifndef EX_RETVAL -#define EX_RETVAL(x) x +#define EX_ST(x,y) x #endif #ifndef LOAD @@ -45,6 +42,29 @@ .register %g3,#scratch .text + +#ifndef EX_RETVAL +#define EX_RETVAL(x) x +ENTRY(GEN_retl_o4_1) + add %o4, %o2, %o4 + retl + add %o4, 1, %o0 +ENDPROC(GEN_retl_o4_1) +ENTRY(GEN_retl_g1_8) + add %g1, %o2, %g1 + retl + add %g1, 8, %o0 +ENDPROC(GEN_retl_g1_8) +ENTRY(GEN_retl_o2_4) + retl + add %o2, 4, %o0 +ENDPROC(GEN_retl_o2_4) +ENTRY(GEN_retl_o2_1) + retl + add %o2, 1, %o0 +ENDPROC(GEN_retl_o2_1) +#endif + .align 64 .globl FUNC_NAME @@ -73,8 +93,8 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ sub %g0, %o4, %o4 sub %o2, %o4, %o2 1: subcc %o4, 1, %o4 - EX_LD(LOAD(ldub, %o1, %g1)) - EX_ST(STORE(stb, %g1, %o0)) + EX_LD(LOAD(ldub, %o1, %g1),GEN_retl_o4_1) + EX_ST(STORE(stb, %g1, %o0),GEN_retl_o4_1) add %o1, 1, %o1 bne,pt %XCC, 1b add %o0, 1, %o0 @@ -82,8 +102,8 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ andn %o2, 0x7, %g1 sub %o2, %g1, %o2 1: subcc %g1, 0x8, %g1 - EX_LD(LOAD(ldx, %o1, %g2)) - EX_ST(STORE(stx, %g2, %o0)) + EX_LD(LOAD(ldx, %o1, %g2),GEN_retl_g1_8) + EX_ST(STORE(stx, %g2, %o0),GEN_retl_g1_8) add %o1, 0x8, %o1 bne,pt %XCC, 1b add %o0, 0x8, %o0 @@ -100,8 +120,8 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ 1: subcc %o2, 4, %o2 - EX_LD(LOAD(lduw, %o1, %g1)) - EX_ST(STORE(stw, %g1, %o1 + %o3)) + EX_LD(LOAD(lduw, %o1, %g1),GEN_retl_o2_4) + EX_ST(STORE(stw, %g1, %o1 + %o3),GEN_retl_o2_4) bgu,pt %XCC, 1b add %o1, 4, %o1 @@ -111,8 +131,8 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ .align 32 90: subcc %o2, 1, %o2 - EX_LD(LOAD(ldub, %o1, %g1)) - EX_ST(STORE(stb, %g1, %o1 + %o3)) + EX_LD(LOAD(ldub, %o1, %g1),GEN_retl_o2_1) + EX_ST(STORE(stb, %g1, %o1 + %o3),GEN_retl_o2_1) bgu,pt %XCC, 90b add %o1, 1, %o1 retl diff --git a/arch/sparc/lib/Makefile b/arch/sparc/lib/Makefile index 3269b0234093..4f2384a4286a 100644 --- a/arch/sparc/lib/Makefile +++ b/arch/sparc/lib/Makefile @@ -38,7 +38,7 @@ lib-$(CONFIG_SPARC64) += NG4patch.o NG4copy_page.o NG4clear_page.o NG4memset.o lib-$(CONFIG_SPARC64) += GENmemcpy.o GENcopy_from_user.o GENcopy_to_user.o lib-$(CONFIG_SPARC64) += GENpatch.o GENpage.o GENbzero.o -lib-$(CONFIG_SPARC64) += copy_in_user.o user_fixup.o memmove.o +lib-$(CONFIG_SPARC64) += copy_in_user.o memmove.o lib-$(CONFIG_SPARC64) += mcount.o ipcsum.o xor.o hweight.o ffs.o obj-$(CONFIG_SPARC64) += iomap.o diff --git a/arch/sparc/lib/NG2copy_from_user.S b/arch/sparc/lib/NG2copy_from_user.S index d5242b8c4f94..b79a6998d87c 100644 --- a/arch/sparc/lib/NG2copy_from_user.S +++ b/arch/sparc/lib/NG2copy_from_user.S @@ -3,19 +3,19 @@ * Copyright (C) 2007 David S. Miller (davem@davemloft.net) */ -#define EX_LD(x) \ +#define EX_LD(x,y) \ 98: x; \ .section __ex_table,"a";\ .align 4; \ - .word 98b, __retl_one_asi;\ + .word 98b, y; \ .text; \ .align 4; -#define EX_LD_FP(x) \ +#define EX_LD_FP(x,y) \ 98: x; \ .section __ex_table,"a";\ .align 4; \ - .word 98b, __retl_one_asi_fp;\ + .word 98b, y##_fp; \ .text; \ .align 4; diff --git a/arch/sparc/lib/NG2copy_to_user.S b/arch/sparc/lib/NG2copy_to_user.S index 4e962d993b10..dcec55f254ab 100644 --- a/arch/sparc/lib/NG2copy_to_user.S +++ b/arch/sparc/lib/NG2copy_to_user.S @@ -3,19 +3,19 @@ * Copyright (C) 2007 David S. Miller (davem@davemloft.net) */ -#define EX_ST(x) \ +#define EX_ST(x,y) \ 98: x; \ .section __ex_table,"a";\ .align 4; \ - .word 98b, __retl_one_asi;\ + .word 98b, y; \ .text; \ .align 4; -#define EX_ST_FP(x) \ +#define EX_ST_FP(x,y) \ 98: x; \ .section __ex_table,"a";\ .align 4; \ - .word 98b, __retl_one_asi_fp;\ + .word 98b, y##_fp; \ .text; \ .align 4; diff --git a/arch/sparc/lib/NG2memcpy.S b/arch/sparc/lib/NG2memcpy.S index d5f585df2f3f..c629dbd121b6 100644 --- a/arch/sparc/lib/NG2memcpy.S +++ b/arch/sparc/lib/NG2memcpy.S @@ -4,6 +4,7 @@ */ #ifdef __KERNEL__ +#include <linux/linkage.h> #include <asm/visasm.h> #include <asm/asi.h> #define GLOBAL_SPARE %g7 @@ -32,21 +33,17 @@ #endif #ifndef EX_LD -#define EX_LD(x) x +#define EX_LD(x,y) x #endif #ifndef EX_LD_FP -#define EX_LD_FP(x) x +#define EX_LD_FP(x,y) x #endif #ifndef EX_ST -#define EX_ST(x) x +#define EX_ST(x,y) x #endif #ifndef EX_ST_FP -#define EX_ST_FP(x) x -#endif - -#ifndef EX_RETVAL -#define EX_RETVAL(x) x +#define EX_ST_FP(x,y) x #endif #ifndef LOAD @@ -140,45 +137,110 @@ fsrc2 %x6, %f12; \ fsrc2 %x7, %f14; #define FREG_LOAD_1(base, x0) \ - EX_LD_FP(LOAD(ldd, base + 0x00, %x0)) + EX_LD_FP(LOAD(ldd, base + 0x00, %x0), NG2_retl_o2_plus_g1) #define FREG_LOAD_2(base, x0, x1) \ - EX_LD_FP(LOAD(ldd, base + 0x00, %x0)); \ - EX_LD_FP(LOAD(ldd, base + 0x08, %x1)); + EX_LD_FP(LOAD(ldd, base + 0x00, %x0), NG2_retl_o2_plus_g1); \ + EX_LD_FP(LOAD(ldd, base + 0x08, %x1), NG2_retl_o2_plus_g1); #define FREG_LOAD_3(base, x0, x1, x2) \ - EX_LD_FP(LOAD(ldd, base + 0x00, %x0)); \ - EX_LD_FP(LOAD(ldd, base + 0x08, %x1)); \ - EX_LD_FP(LOAD(ldd, base + 0x10, %x2)); + EX_LD_FP(LOAD(ldd, base + 0x00, %x0), NG2_retl_o2_plus_g1); \ + EX_LD_FP(LOAD(ldd, base + 0x08, %x1), NG2_retl_o2_plus_g1); \ + EX_LD_FP(LOAD(ldd, base + 0x10, %x2), NG2_retl_o2_plus_g1); #define FREG_LOAD_4(base, x0, x1, x2, x3) \ - EX_LD_FP(LOAD(ldd, base + 0x00, %x0)); \ - EX_LD_FP(LOAD(ldd, base + 0x08, %x1)); \ - EX_LD_FP(LOAD(ldd, base + 0x10, %x2)); \ - EX_LD_FP(LOAD(ldd, base + 0x18, %x3)); + EX_LD_FP(LOAD(ldd, base + 0x00, %x0), NG2_retl_o2_plus_g1); \ + EX_LD_FP(LOAD(ldd, base + 0x08, %x1), NG2_retl_o2_plus_g1); \ + EX_LD_FP(LOAD(ldd, base + 0x10, %x2), NG2_retl_o2_plus_g1); \ + EX_LD_FP(LOAD(ldd, base + 0x18, %x3), NG2_retl_o2_plus_g1); #define FREG_LOAD_5(base, x0, x1, x2, x3, x4) \ - EX_LD_FP(LOAD(ldd, base + 0x00, %x0)); \ - EX_LD_FP(LOAD(ldd, base + 0x08, %x1)); \ - EX_LD_FP(LOAD(ldd, base + 0x10, %x2)); \ - EX_LD_FP(LOAD(ldd, base + 0x18, %x3)); \ - EX_LD_FP(LOAD(ldd, base + 0x20, %x4)); + EX_LD_FP(LOAD(ldd, base + 0x00, %x0), NG2_retl_o2_plus_g1); \ + EX_LD_FP(LOAD(ldd, base + 0x08, %x1), NG2_retl_o2_plus_g1); \ + EX_LD_FP(LOAD(ldd, base + 0x10, %x2), NG2_retl_o2_plus_g1); \ + EX_LD_FP(LOAD(ldd, base + 0x18, %x3), NG2_retl_o2_plus_g1); \ + EX_LD_FP(LOAD(ldd, base + 0x20, %x4), NG2_retl_o2_plus_g1); #define FREG_LOAD_6(base, x0, x1, x2, x3, x4, x5) \ - EX_LD_FP(LOAD(ldd, base + 0x00, %x0)); \ - EX_LD_FP(LOAD(ldd, base + 0x08, %x1)); \ - EX_LD_FP(LOAD(ldd, base + 0x10, %x2)); \ - EX_LD_FP(LOAD(ldd, base + 0x18, %x3)); \ - EX_LD_FP(LOAD(ldd, base + 0x20, %x4)); \ - EX_LD_FP(LOAD(ldd, base + 0x28, %x5)); + EX_LD_FP(LOAD(ldd, base + 0x00, %x0), NG2_retl_o2_plus_g1); \ + EX_LD_FP(LOAD(ldd, base + 0x08, %x1), NG2_retl_o2_plus_g1); \ + EX_LD_FP(LOAD(ldd, base + 0x10, %x2), NG2_retl_o2_plus_g1); \ + EX_LD_FP(LOAD(ldd, base + 0x18, %x3), NG2_retl_o2_plus_g1); \ + EX_LD_FP(LOAD(ldd, base + 0x20, %x4), NG2_retl_o2_plus_g1); \ + EX_LD_FP(LOAD(ldd, base + 0x28, %x5), NG2_retl_o2_plus_g1); #define FREG_LOAD_7(base, x0, x1, x2, x3, x4, x5, x6) \ - EX_LD_FP(LOAD(ldd, base + 0x00, %x0)); \ - EX_LD_FP(LOAD(ldd, base + 0x08, %x1)); \ - EX_LD_FP(LOAD(ldd, base + 0x10, %x2)); \ - EX_LD_FP(LOAD(ldd, base + 0x18, %x3)); \ - EX_LD_FP(LOAD(ldd, base + 0x20, %x4)); \ - EX_LD_FP(LOAD(ldd, base + 0x28, %x5)); \ - EX_LD_FP(LOAD(ldd, base + 0x30, %x6)); + EX_LD_FP(LOAD(ldd, base + 0x00, %x0), NG2_retl_o2_plus_g1); \ + EX_LD_FP(LOAD(ldd, base + 0x08, %x1), NG2_retl_o2_plus_g1); \ + EX_LD_FP(LOAD(ldd, base + 0x10, %x2), NG2_retl_o2_plus_g1); \ + EX_LD_FP(LOAD(ldd, base + 0x18, %x3), NG2_retl_o2_plus_g1); \ + EX_LD_FP(LOAD(ldd, base + 0x20, %x4), NG2_retl_o2_plus_g1); \ + EX_LD_FP(LOAD(ldd, base + 0x28, %x5), NG2_retl_o2_plus_g1); \ + EX_LD_FP(LOAD(ldd, base + 0x30, %x6), NG2_retl_o2_plus_g1); .register %g2,#scratch .register %g3,#scratch .text +#ifndef EX_RETVAL +#define EX_RETVAL(x) x +__restore_fp: + VISExitHalf +__restore_asi: + retl + wr %g0, ASI_AIUS, %asi +ENTRY(NG2_retl_o2) + ba,pt %xcc, __restore_asi + mov %o2, %o0 +ENDPROC(NG2_retl_o2) +ENTRY(NG2_retl_o2_plus_1) + ba,pt %xcc, __restore_asi + add %o2, 1, %o0 +ENDPROC(NG2_retl_o2_plus_1) +ENTRY(NG2_retl_o2_plus_4) + ba,pt %xcc, __restore_asi + add %o2, 4, %o0 +ENDPROC(NG2_retl_o2_plus_4) +ENTRY(NG2_retl_o2_plus_8) + ba,pt %xcc, __restore_asi + add %o2, 8, %o0 +ENDPROC(NG2_retl_o2_plus_8) +ENTRY(NG2_retl_o2_plus_o4_plus_1) + add %o4, 1, %o4 + ba,pt %xcc, __restore_asi + add %o2, %o4, %o0 +ENDPROC(NG2_retl_o2_plus_o4_plus_1) +ENTRY(NG2_retl_o2_plus_o4_plus_8) + add %o4, 8, %o4 + ba,pt %xcc, __restore_asi + add %o2, %o4, %o0 +ENDPROC(NG2_retl_o2_plus_o4_plus_8) +ENTRY(NG2_retl_o2_plus_o4_plus_16) + add %o4, 16, %o4 + ba,pt %xcc, __restore_asi + add %o2, %o4, %o0 +ENDPROC(NG2_retl_o2_plus_o4_plus_16) +ENTRY(NG2_retl_o2_plus_g1_fp) + ba,pt %xcc, __restore_fp + add %o2, %g1, %o0 +ENDPROC(NG2_retl_o2_plus_g1_fp) +ENTRY(NG2_retl_o2_plus_g1_plus_64_fp) + add %g1, 64, %g1 + ba,pt %xcc, __restore_fp + add %o2, %g1, %o0 +ENDPROC(NG2_retl_o2_plus_g1_plus_64_fp) +ENTRY(NG2_retl_o2_plus_g1_plus_1) + add %g1, 1, %g1 + ba,pt %xcc, __restore_asi + add %o2, %g1, %o0 +ENDPROC(NG2_retl_o2_plus_g1_plus_1) +ENTRY(NG2_retl_o2_and_7_plus_o4) + and %o2, 7, %o2 + ba,pt %xcc, __restore_asi + add %o2, %o4, %o0 +ENDPROC(NG2_retl_o2_and_7_plus_o4) +ENTRY(NG2_retl_o2_and_7_plus_o4_plus_8) + and %o2, 7, %o2 + add %o4, 8, %o4 + ba,pt %xcc, __restore_asi + add %o2, %o4, %o0 +ENDPROC(NG2_retl_o2_and_7_plus_o4_plus_8) +#endif + .align 64 .globl FUNC_NAME @@ -230,8 +292,8 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ sub %g0, %o4, %o4 ! bytes to align dst sub %o2, %o4, %o2 1: subcc %o4, 1, %o4 - EX_LD(LOAD(ldub, %o1, %g1)) - EX_ST(STORE(stb, %g1, %o0)) + EX_LD(LOAD(ldub, %o1, %g1), NG2_retl_o2_plus_o4_plus_1) + EX_ST(STORE(stb, %g1, %o0), NG2_retl_o2_plus_o4_plus_1) add %o1, 1, %o1 bne,pt %XCC, 1b add %o0, 1, %o0 @@ -281,11 +343,11 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ nop /* fall through for 0 < low bits < 8 */ 110: sub %o4, 64, %g2 - EX_LD_FP(LOAD_BLK(%g2, %f0)) -1: EX_ST_FP(STORE_INIT(%g0, %o4 + %g3)) - EX_LD_FP(LOAD_BLK(%o4, %f16)) + EX_LD_FP(LOAD_BLK(%g2, %f0), NG2_retl_o2_plus_g1) +1: EX_ST_FP(STORE_INIT(%g0, %o4 + %g3), NG2_retl_o2_plus_g1) + EX_LD_FP(LOAD_BLK(%o4, %f16), NG2_retl_o2_plus_g1) FREG_FROB(f0, f2, f4, f6, f8, f10, f12, f14, f16) - EX_ST_FP(STORE_BLK(%f0, %o4 + %g3)) + EX_ST_FP(STORE_BLK(%f0, %o4 + %g3), NG2_retl_o2_plus_g1) FREG_MOVE_8(f16, f18, f20, f22, f24, f26, f28, f30) subcc %g1, 64, %g1 add %o4, 64, %o4 @@ -296,10 +358,10 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ 120: sub %o4, 56, %g2 FREG_LOAD_7(%g2, f0, f2, f4, f6, f8, f10, f12) -1: EX_ST_FP(STORE_INIT(%g0, %o4 + %g3)) - EX_LD_FP(LOAD_BLK(%o4, %f16)) +1: EX_ST_FP(STORE_INIT(%g0, %o4 + %g3), NG2_retl_o2_plus_g1) + EX_LD_FP(LOAD_BLK(%o4, %f16), NG2_retl_o2_plus_g1) FREG_FROB(f0, f2, f4, f6, f8, f10, f12, f16, f18) - EX_ST_FP(STORE_BLK(%f0, %o4 + %g3)) + EX_ST_FP(STORE_BLK(%f0, %o4 + %g3), NG2_retl_o2_plus_g1) FREG_MOVE_7(f18, f20, f22, f24, f26, f28, f30) subcc %g1, 64, %g1 add %o4, 64, %o4 @@ -310,10 +372,10 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ 130: sub %o4, 48, %g2 FREG_LOAD_6(%g2, f0, f2, f4, f6, f8, f10) -1: EX_ST_FP(STORE_INIT(%g0, %o4 + %g3)) - EX_LD_FP(LOAD_BLK(%o4, %f16)) +1: EX_ST_FP(STORE_INIT(%g0, %o4 + %g3), NG2_retl_o2_plus_g1) + EX_LD_FP(LOAD_BLK(%o4, %f16), NG2_retl_o2_plus_g1) FREG_FROB(f0, f2, f4, f6, f8, f10, f16, f18, f20) - EX_ST_FP(STORE_BLK(%f0, %o4 + %g3)) + EX_ST_FP(STORE_BLK(%f0, %o4 + %g3), NG2_retl_o2_plus_g1) FREG_MOVE_6(f20, f22, f24, f26, f28, f30) subcc %g1, 64, %g1 add %o4, 64, %o4 @@ -324,10 +386,10 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ 140: sub %o4, 40, %g2 FREG_LOAD_5(%g2, f0, f2, f4, f6, f8) -1: EX_ST_FP(STORE_INIT(%g0, %o4 + %g3)) - EX_LD_FP(LOAD_BLK(%o4, %f16)) +1: EX_ST_FP(STORE_INIT(%g0, %o4 + %g3), NG2_retl_o2_plus_g1) + EX_LD_FP(LOAD_BLK(%o4, %f16), NG2_retl_o2_plus_g1) FREG_FROB(f0, f2, f4, f6, f8, f16, f18, f20, f22) - EX_ST_FP(STORE_BLK(%f0, %o4 + %g3)) + EX_ST_FP(STORE_BLK(%f0, %o4 + %g3), NG2_retl_o2_plus_g1) FREG_MOVE_5(f22, f24, f26, f28, f30) subcc %g1, 64, %g1 add %o4, 64, %o4 @@ -338,10 +400,10 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ 150: sub %o4, 32, %g2 FREG_LOAD_4(%g2, f0, f2, f4, f6) -1: EX_ST_FP(STORE_INIT(%g0, %o4 + %g3)) - EX_LD_FP(LOAD_BLK(%o4, %f16)) +1: EX_ST_FP(STORE_INIT(%g0, %o4 + %g3), NG2_retl_o2_plus_g1) + EX_LD_FP(LOAD_BLK(%o4, %f16), NG2_retl_o2_plus_g1) FREG_FROB(f0, f2, f4, f6, f16, f18, f20, f22, f24) - EX_ST_FP(STORE_BLK(%f0, %o4 + %g3)) + EX_ST_FP(STORE_BLK(%f0, %o4 + %g3), NG2_retl_o2_plus_g1) FREG_MOVE_4(f24, f26, f28, f30) subcc %g1, 64, %g1 add %o4, 64, %o4 @@ -352,10 +414,10 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ 160: sub %o4, 24, %g2 FREG_LOAD_3(%g2, f0, f2, f4) -1: EX_ST_FP(STORE_INIT(%g0, %o4 + %g3)) - EX_LD_FP(LOAD_BLK(%o4, %f16)) +1: EX_ST_FP(STORE_INIT(%g0, %o4 + %g3), NG2_retl_o2_plus_g1) + EX_LD_FP(LOAD_BLK(%o4, %f16), NG2_retl_o2_plus_g1) FREG_FROB(f0, f2, f4, f16, f18, f20, f22, f24, f26) - EX_ST_FP(STORE_BLK(%f0, %o4 + %g3)) + EX_ST_FP(STORE_BLK(%f0, %o4 + %g3), NG2_retl_o2_plus_g1) FREG_MOVE_3(f26, f28, f30) subcc %g1, 64, %g1 add %o4, 64, %o4 @@ -366,10 +428,10 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ 170: sub %o4, 16, %g2 FREG_LOAD_2(%g2, f0, f2) -1: EX_ST_FP(STORE_INIT(%g0, %o4 + %g3)) - EX_LD_FP(LOAD_BLK(%o4, %f16)) +1: EX_ST_FP(STORE_INIT(%g0, %o4 + %g3), NG2_retl_o2_plus_g1) + EX_LD_FP(LOAD_BLK(%o4, %f16), NG2_retl_o2_plus_g1) FREG_FROB(f0, f2, f16, f18, f20, f22, f24, f26, f28) - EX_ST_FP(STORE_BLK(%f0, %o4 + %g3)) + EX_ST_FP(STORE_BLK(%f0, %o4 + %g3), NG2_retl_o2_plus_g1) FREG_MOVE_2(f28, f30) subcc %g1, 64, %g1 add %o4, 64, %o4 @@ -380,10 +442,10 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ 180: sub %o4, 8, %g2 FREG_LOAD_1(%g2, f0) -1: EX_ST_FP(STORE_INIT(%g0, %o4 + %g3)) - EX_LD_FP(LOAD_BLK(%o4, %f16)) +1: EX_ST_FP(STORE_INIT(%g0, %o4 + %g3), NG2_retl_o2_plus_g1) + EX_LD_FP(LOAD_BLK(%o4, %f16), NG2_retl_o2_plus_g1) FREG_FROB(f0, f16, f18, f20, f22, f24, f26, f28, f30) - EX_ST_FP(STORE_BLK(%f0, %o4 + %g3)) + EX_ST_FP(STORE_BLK(%f0, %o4 + %g3), NG2_retl_o2_plus_g1) FREG_MOVE_1(f30) subcc %g1, 64, %g1 add %o4, 64, %o4 @@ -393,10 +455,10 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ nop 190: -1: EX_ST_FP(STORE_INIT(%g0, %o4 + %g3)) +1: EX_ST_FP(STORE_INIT(%g0, %o4 + %g3), NG2_retl_o2_plus_g1) subcc %g1, 64, %g1 - EX_LD_FP(LOAD_BLK(%o4, %f0)) - EX_ST_FP(STORE_BLK(%f0, %o4 + %g3)) + EX_LD_FP(LOAD_BLK(%o4, %f0), NG2_retl_o2_plus_g1_plus_64) + EX_ST_FP(STORE_BLK(%f0, %o4 + %g3), NG2_retl_o2_plus_g1_plus_64) add %o4, 64, %o4 bne,pt %xcc, 1b LOAD(prefetch, %o4 + 64, #one_read) @@ -423,28 +485,28 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ andn %o2, 0xf, %o4 and %o2, 0xf, %o2 1: subcc %o4, 0x10, %o4 - EX_LD(LOAD(ldx, %o1, %o5)) + EX_LD(LOAD(ldx, %o1, %o5), NG2_retl_o2_plus_o4_plus_16) add %o1, 0x08, %o1 - EX_LD(LOAD(ldx, %o1, %g1)) + EX_LD(LOAD(ldx, %o1, %g1), NG2_retl_o2_plus_o4_plus_16) sub %o1, 0x08, %o1 - EX_ST(STORE(stx, %o5, %o1 + GLOBAL_SPARE)) + EX_ST(STORE(stx, %o5, %o1 + GLOBAL_SPARE), NG2_retl_o2_plus_o4_plus_16) add %o1, 0x8, %o1 - EX_ST(STORE(stx, %g1, %o1 + GLOBAL_SPARE)) + EX_ST(STORE(stx, %g1, %o1 + GLOBAL_SPARE), NG2_retl_o2_plus_o4_plus_8) bgu,pt %XCC, 1b add %o1, 0x8, %o1 73: andcc %o2, 0x8, %g0 be,pt %XCC, 1f nop sub %o2, 0x8, %o2 - EX_LD(LOAD(ldx, %o1, %o5)) - EX_ST(STORE(stx, %o5, %o1 + GLOBAL_SPARE)) + EX_LD(LOAD(ldx, %o1, %o5), NG2_retl_o2_plus_8) + EX_ST(STORE(stx, %o5, %o1 + GLOBAL_SPARE), NG2_retl_o2_plus_8) add %o1, 0x8, %o1 1: andcc %o2, 0x4, %g0 be,pt %XCC, 1f nop sub %o2, 0x4, %o2 - EX_LD(LOAD(lduw, %o1, %o5)) - EX_ST(STORE(stw, %o5, %o1 + GLOBAL_SPARE)) + EX_LD(LOAD(lduw, %o1, %o5), NG2_retl_o2_plus_4) + EX_ST(STORE(stw, %o5, %o1 + GLOBAL_SPARE), NG2_retl_o2_plus_4) add %o1, 0x4, %o1 1: cmp %o2, 0 be,pt %XCC, 85f @@ -460,8 +522,8 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ sub %o2, %g1, %o2 1: subcc %g1, 1, %g1 - EX_LD(LOAD(ldub, %o1, %o5)) - EX_ST(STORE(stb, %o5, %o1 + GLOBAL_SPARE)) + EX_LD(LOAD(ldub, %o1, %o5), NG2_retl_o2_plus_g1_plus_1) + EX_ST(STORE(stb, %o5, %o1 + GLOBAL_SPARE), NG2_retl_o2_plus_g1_plus_1) bgu,pt %icc, 1b add %o1, 1, %o1 @@ -477,16 +539,16 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ 8: mov 64, GLOBAL_SPARE andn %o1, 0x7, %o1 - EX_LD(LOAD(ldx, %o1, %g2)) + EX_LD(LOAD(ldx, %o1, %g2), NG2_retl_o2) sub GLOBAL_SPARE, %g1, GLOBAL_SPARE andn %o2, 0x7, %o4 sllx %g2, %g1, %g2 1: add %o1, 0x8, %o1 - EX_LD(LOAD(ldx, %o1, %g3)) + EX_LD(LOAD(ldx, %o1, %g3), NG2_retl_o2_and_7_plus_o4) subcc %o4, 0x8, %o4 srlx %g3, GLOBAL_SPARE, %o5 or %o5, %g2, %o5 - EX_ST(STORE(stx, %o5, %o0)) + EX_ST(STORE(stx, %o5, %o0), NG2_retl_o2_and_7_plus_o4_plus_8) add %o0, 0x8, %o0 bgu,pt %icc, 1b sllx %g3, %g1, %g2 @@ -506,8 +568,8 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ 1: subcc %o2, 4, %o2 - EX_LD(LOAD(lduw, %o1, %g1)) - EX_ST(STORE(stw, %g1, %o1 + GLOBAL_SPARE)) + EX_LD(LOAD(lduw, %o1, %g1), NG2_retl_o2_plus_4) + EX_ST(STORE(stw, %g1, %o1 + GLOBAL_SPARE), NG2_retl_o2_plus_4) bgu,pt %XCC, 1b add %o1, 4, %o1 @@ -517,8 +579,8 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ .align 32 90: subcc %o2, 1, %o2 - EX_LD(LOAD(ldub, %o1, %g1)) - EX_ST(STORE(stb, %g1, %o1 + GLOBAL_SPARE)) + EX_LD(LOAD(ldub, %o1, %g1), NG2_retl_o2_plus_1) + EX_ST(STORE(stb, %g1, %o1 + GLOBAL_SPARE), NG2_retl_o2_plus_1) bgu,pt %XCC, 90b add %o1, 1, %o1 retl diff --git a/arch/sparc/lib/NG4copy_from_user.S b/arch/sparc/lib/NG4copy_from_user.S index 2e8ee7ad07a9..16a286c1a528 100644 --- a/arch/sparc/lib/NG4copy_from_user.S +++ b/arch/sparc/lib/NG4copy_from_user.S @@ -3,19 +3,19 @@ * Copyright (C) 2012 David S. Miller (davem@davemloft.net) */ -#define EX_LD(x) \ +#define EX_LD(x, y) \ 98: x; \ .section __ex_table,"a";\ .align 4; \ - .word 98b, __retl_one_asi;\ + .word 98b, y; \ .text; \ .align 4; -#define EX_LD_FP(x) \ +#define EX_LD_FP(x,y) \ 98: x; \ .section __ex_table,"a";\ .align 4; \ - .word 98b, __retl_one_asi_fp;\ + .word 98b, y##_fp; \ .text; \ .align 4; diff --git a/arch/sparc/lib/NG4copy_to_user.S b/arch/sparc/lib/NG4copy_to_user.S index be0bf4590df8..6b0276ffc858 100644 --- a/arch/sparc/lib/NG4copy_to_user.S +++ b/arch/sparc/lib/NG4copy_to_user.S @@ -3,19 +3,19 @@ * Copyright (C) 2012 David S. Miller (davem@davemloft.net) */ -#define EX_ST(x) \ +#define EX_ST(x,y) \ 98: x; \ .section __ex_table,"a";\ .align 4; \ - .word 98b, __retl_one_asi;\ + .word 98b, y; \ .text; \ .align 4; -#define EX_ST_FP(x) \ +#define EX_ST_FP(x,y) \ 98: x; \ .section __ex_table,"a";\ .align 4; \ - .word 98b, __retl_one_asi_fp;\ + .word 98b, y##_fp; \ .text; \ .align 4; diff --git a/arch/sparc/lib/NG4memcpy.S b/arch/sparc/lib/NG4memcpy.S index 8e13ee1f4454..75bb93b1437f 100644 --- a/arch/sparc/lib/NG4memcpy.S +++ b/arch/sparc/lib/NG4memcpy.S @@ -4,6 +4,7 @@ */ #ifdef __KERNEL__ +#include <linux/linkage.h> #include <asm/visasm.h> #include <asm/asi.h> #define GLOBAL_SPARE %g7 @@ -46,22 +47,19 @@ #endif #ifndef EX_LD -#define EX_LD(x) x +#define EX_LD(x,y) x #endif #ifndef EX_LD_FP -#define EX_LD_FP(x) x +#define EX_LD_FP(x,y) x #endif #ifndef EX_ST -#define EX_ST(x) x +#define EX_ST(x,y) x #endif #ifndef EX_ST_FP -#define EX_ST_FP(x) x +#define EX_ST_FP(x,y) x #endif -#ifndef EX_RETVAL -#define EX_RETVAL(x) x -#endif #ifndef LOAD #define LOAD(type,addr,dest) type [addr], dest @@ -94,6 +92,158 @@ .register %g3,#scratch .text +#ifndef EX_RETVAL +#define EX_RETVAL(x) x +__restore_asi_fp: + VISExitHalf +__restore_asi: + retl + wr %g0, ASI_AIUS, %asi + +ENTRY(NG4_retl_o2) + ba,pt %xcc, __restore_asi + mov %o2, %o0 +ENDPROC(NG4_retl_o2) +ENTRY(NG4_retl_o2_plus_1) + ba,pt %xcc, __restore_asi + add %o2, 1, %o0 +ENDPROC(NG4_retl_o2_plus_1) +ENTRY(NG4_retl_o2_plus_4) + ba,pt %xcc, __restore_asi + add %o2, 4, %o0 +ENDPROC(NG4_retl_o2_plus_4) +ENTRY(NG4_retl_o2_plus_o5) + ba,pt %xcc, __restore_asi + add %o2, %o5, %o0 +ENDPROC(NG4_retl_o2_plus_o5) +ENTRY(NG4_retl_o2_plus_o5_plus_4) + add %o5, 4, %o5 + ba,pt %xcc, __restore_asi + add %o2, %o5, %o0 +ENDPROC(NG4_retl_o2_plus_o5_plus_4) +ENTRY(NG4_retl_o2_plus_o5_plus_8) + add %o5, 8, %o5 + ba,pt %xcc, __restore_asi + add %o2, %o5, %o0 +ENDPROC(NG4_retl_o2_plus_o5_plus_8) +ENTRY(NG4_retl_o2_plus_o5_plus_16) + add %o5, 16, %o5 + ba,pt %xcc, __restore_asi + add %o2, %o5, %o0 +ENDPROC(NG4_retl_o2_plus_o5_plus_16) +ENTRY(NG4_retl_o2_plus_o5_plus_24) + add %o5, 24, %o5 + ba,pt %xcc, __restore_asi + add %o2, %o5, %o0 +ENDPROC(NG4_retl_o2_plus_o5_plus_24) +ENTRY(NG4_retl_o2_plus_o5_plus_32) + add %o5, 32, %o5 + ba,pt %xcc, __restore_asi + add %o2, %o5, %o0 +ENDPROC(NG4_retl_o2_plus_o5_plus_32) +ENTRY(NG4_retl_o2_plus_g1) + ba,pt %xcc, __restore_asi + add %o2, %g1, %o0 +ENDPROC(NG4_retl_o2_plus_g1) +ENTRY(NG4_retl_o2_plus_g1_plus_1) + add %g1, 1, %g1 + ba,pt %xcc, __restore_asi + add %o2, %g1, %o0 +ENDPROC(NG4_retl_o2_plus_g1_plus_1) +ENTRY(NG4_retl_o2_plus_g1_plus_8) + add %g1, 8, %g1 + ba,pt %xcc, __restore_asi + add %o2, %g1, %o0 +ENDPROC(NG4_retl_o2_plus_g1_plus_8) +ENTRY(NG4_retl_o2_plus_o4) + ba,pt %xcc, __restore_asi + add %o2, %o4, %o0 +ENDPROC(NG4_retl_o2_plus_o4) +ENTRY(NG4_retl_o2_plus_o4_plus_8) + add %o4, 8, %o4 + ba,pt %xcc, __restore_asi + add %o2, %o4, %o0 +ENDPROC(NG4_retl_o2_plus_o4_plus_8) +ENTRY(NG4_retl_o2_plus_o4_plus_16) + add %o4, 16, %o4 + ba,pt %xcc, __restore_asi + add %o2, %o4, %o0 +ENDPROC(NG4_retl_o2_plus_o4_plus_16) +ENTRY(NG4_retl_o2_plus_o4_plus_24) + add %o4, 24, %o4 + ba,pt %xcc, __restore_asi + add %o2, %o4, %o0 +ENDPROC(NG4_retl_o2_plus_o4_plus_24) +ENTRY(NG4_retl_o2_plus_o4_plus_32) + add %o4, 32, %o4 + ba,pt %xcc, __restore_asi + add %o2, %o4, %o0 +ENDPROC(NG4_retl_o2_plus_o4_plus_32) +ENTRY(NG4_retl_o2_plus_o4_plus_40) + add %o4, 40, %o4 + ba,pt %xcc, __restore_asi + add %o2, %o4, %o0 +ENDPROC(NG4_retl_o2_plus_o4_plus_40) +ENTRY(NG4_retl_o2_plus_o4_plus_48) + add %o4, 48, %o4 + ba,pt %xcc, __restore_asi + add %o2, %o4, %o0 +ENDPROC(NG4_retl_o2_plus_o4_plus_48) +ENTRY(NG4_retl_o2_plus_o4_plus_56) + add %o4, 56, %o4 + ba,pt %xcc, __restore_asi + add %o2, %o4, %o0 +ENDPROC(NG4_retl_o2_plus_o4_plus_56) +ENTRY(NG4_retl_o2_plus_o4_plus_64) + add %o4, 64, %o4 + ba,pt %xcc, __restore_asi + add %o2, %o4, %o0 +ENDPROC(NG4_retl_o2_plus_o4_plus_64) +ENTRY(NG4_retl_o2_plus_o4_fp) + ba,pt %xcc, __restore_asi_fp + add %o2, %o4, %o0 +ENDPROC(NG4_retl_o2_plus_o4_fp) +ENTRY(NG4_retl_o2_plus_o4_plus_8_fp) + add %o4, 8, %o4 + ba,pt %xcc, __restore_asi_fp + add %o2, %o4, %o0 +ENDPROC(NG4_retl_o2_plus_o4_plus_8_fp) +ENTRY(NG4_retl_o2_plus_o4_plus_16_fp) + add %o4, 16, %o4 + ba,pt %xcc, __restore_asi_fp + add %o2, %o4, %o0 +ENDPROC(NG4_retl_o2_plus_o4_plus_16_fp) +ENTRY(NG4_retl_o2_plus_o4_plus_24_fp) + add %o4, 24, %o4 + ba,pt %xcc, __restore_asi_fp + add %o2, %o4, %o0 +ENDPROC(NG4_retl_o2_plus_o4_plus_24_fp) +ENTRY(NG4_retl_o2_plus_o4_plus_32_fp) + add %o4, 32, %o4 + ba,pt %xcc, __restore_asi_fp + add %o2, %o4, %o0 +ENDPROC(NG4_retl_o2_plus_o4_plus_32_fp) +ENTRY(NG4_retl_o2_plus_o4_plus_40_fp) + add %o4, 40, %o4 + ba,pt %xcc, __restore_asi_fp + add %o2, %o4, %o0 +ENDPROC(NG4_retl_o2_plus_o4_plus_40_fp) +ENTRY(NG4_retl_o2_plus_o4_plus_48_fp) + add %o4, 48, %o4 + ba,pt %xcc, __restore_asi_fp + add %o2, %o4, %o0 +ENDPROC(NG4_retl_o2_plus_o4_plus_48_fp) +ENTRY(NG4_retl_o2_plus_o4_plus_56_fp) + add %o4, 56, %o4 + ba,pt %xcc, __restore_asi_fp + add %o2, %o4, %o0 +ENDPROC(NG4_retl_o2_plus_o4_plus_56_fp) +ENTRY(NG4_retl_o2_plus_o4_plus_64_fp) + add %o4, 64, %o4 + ba,pt %xcc, __restore_asi_fp + add %o2, %o4, %o0 +ENDPROC(NG4_retl_o2_plus_o4_plus_64_fp) +#endif .align 64 .globl FUNC_NAME @@ -124,12 +274,13 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ brz,pt %g1, 51f sub %o2, %g1, %o2 -1: EX_LD(LOAD(ldub, %o1 + 0x00, %g2)) + +1: EX_LD(LOAD(ldub, %o1 + 0x00, %g2), NG4_retl_o2_plus_g1) add %o1, 1, %o1 subcc %g1, 1, %g1 add %o0, 1, %o0 bne,pt %icc, 1b - EX_ST(STORE(stb, %g2, %o0 - 0x01)) + EX_ST(STORE(stb, %g2, %o0 - 0x01), NG4_retl_o2_plus_g1_plus_1) 51: LOAD(prefetch, %o1 + 0x040, #n_reads_strong) LOAD(prefetch, %o1 + 0x080, #n_reads_strong) @@ -154,43 +305,43 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ brz,pt %g1, .Llarge_aligned sub %o2, %g1, %o2 -1: EX_LD(LOAD(ldx, %o1 + 0x00, %g2)) +1: EX_LD(LOAD(ldx, %o1 + 0x00, %g2), NG4_retl_o2_plus_g1) add %o1, 8, %o1 subcc %g1, 8, %g1 add %o0, 8, %o0 bne,pt %icc, 1b - EX_ST(STORE(stx, %g2, %o0 - 0x08)) + EX_ST(STORE(stx, %g2, %o0 - 0x08), NG4_retl_o2_plus_g1_plus_8) .Llarge_aligned: /* len >= 0x80 && src 8-byte aligned && dest 8-byte aligned */ andn %o2, 0x3f, %o4 sub %o2, %o4, %o2 -1: EX_LD(LOAD(ldx, %o1 + 0x00, %g1)) +1: EX_LD(LOAD(ldx, %o1 + 0x00, %g1), NG4_retl_o2_plus_o4) add %o1, 0x40, %o1 - EX_LD(LOAD(ldx, %o1 - 0x38, %g2)) + EX_LD(LOAD(ldx, %o1 - 0x38, %g2), NG4_retl_o2_plus_o4) subcc %o4, 0x40, %o4 - EX_LD(LOAD(ldx, %o1 - 0x30, %g3)) - EX_LD(LOAD(ldx, %o1 - 0x28, GLOBAL_SPARE)) - EX_LD(LOAD(ldx, %o1 - 0x20, %o5)) - EX_ST(STORE_INIT(%g1, %o0)) + EX_LD(LOAD(ldx, %o1 - 0x30, %g3), NG4_retl_o2_plus_o4_plus_64) + EX_LD(LOAD(ldx, %o1 - 0x28, GLOBAL_SPARE), NG4_retl_o2_plus_o4_plus_64) + EX_LD(LOAD(ldx, %o1 - 0x20, %o5), NG4_retl_o2_plus_o4_plus_64) + EX_ST(STORE_INIT(%g1, %o0), NG4_retl_o2_plus_o4_plus_64) add %o0, 0x08, %o0 - EX_ST(STORE_INIT(%g2, %o0)) + EX_ST(STORE_INIT(%g2, %o0), NG4_retl_o2_plus_o4_plus_56) add %o0, 0x08, %o0 - EX_LD(LOAD(ldx, %o1 - 0x18, %g2)) - EX_ST(STORE_INIT(%g3, %o0)) + EX_LD(LOAD(ldx, %o1 - 0x18, %g2), NG4_retl_o2_plus_o4_plus_48) + EX_ST(STORE_INIT(%g3, %o0), NG4_retl_o2_plus_o4_plus_48) add %o0, 0x08, %o0 - EX_LD(LOAD(ldx, %o1 - 0x10, %g3)) - EX_ST(STORE_INIT(GLOBAL_SPARE, %o0)) + EX_LD(LOAD(ldx, %o1 - 0x10, %g3), NG4_retl_o2_plus_o4_plus_40) + EX_ST(STORE_INIT(GLOBAL_SPARE, %o0), NG4_retl_o2_plus_o4_plus_40) add %o0, 0x08, %o0 - EX_LD(LOAD(ldx, %o1 - 0x08, GLOBAL_SPARE)) - EX_ST(STORE_INIT(%o5, %o0)) + EX_LD(LOAD(ldx, %o1 - 0x08, GLOBAL_SPARE), NG4_retl_o2_plus_o4_plus_32) + EX_ST(STORE_INIT(%o5, %o0), NG4_retl_o2_plus_o4_plus_32) add %o0, 0x08, %o0 - EX_ST(STORE_INIT(%g2, %o0)) + EX_ST(STORE_INIT(%g2, %o0), NG4_retl_o2_plus_o4_plus_24) add %o0, 0x08, %o0 - EX_ST(STORE_INIT(%g3, %o0)) + EX_ST(STORE_INIT(%g3, %o0), NG4_retl_o2_plus_o4_plus_16) add %o0, 0x08, %o0 - EX_ST(STORE_INIT(GLOBAL_SPARE, %o0)) + EX_ST(STORE_INIT(GLOBAL_SPARE, %o0), NG4_retl_o2_plus_o4_plus_8) add %o0, 0x08, %o0 bne,pt %icc, 1b LOAD(prefetch, %o1 + 0x200, #n_reads_strong) @@ -216,17 +367,17 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ sub %o2, %o4, %o2 alignaddr %o1, %g0, %g1 add %o1, %o4, %o1 - EX_LD_FP(LOAD(ldd, %g1 + 0x00, %f0)) -1: EX_LD_FP(LOAD(ldd, %g1 + 0x08, %f2)) + EX_LD_FP(LOAD(ldd, %g1 + 0x00, %f0), NG4_retl_o2_plus_o4) +1: EX_LD_FP(LOAD(ldd, %g1 + 0x08, %f2), NG4_retl_o2_plus_o4) subcc %o4, 0x40, %o4 - EX_LD_FP(LOAD(ldd, %g1 + 0x10, %f4)) - EX_LD_FP(LOAD(ldd, %g1 + 0x18, %f6)) - EX_LD_FP(LOAD(ldd, %g1 + 0x20, %f8)) - EX_LD_FP(LOAD(ldd, %g1 + 0x28, %f10)) - EX_LD_FP(LOAD(ldd, %g1 + 0x30, %f12)) - EX_LD_FP(LOAD(ldd, %g1 + 0x38, %f14)) + EX_LD_FP(LOAD(ldd, %g1 + 0x10, %f4), NG4_retl_o2_plus_o4_plus_64) + EX_LD_FP(LOAD(ldd, %g1 + 0x18, %f6), NG4_retl_o2_plus_o4_plus_64) + EX_LD_FP(LOAD(ldd, %g1 + 0x20, %f8), NG4_retl_o2_plus_o4_plus_64) + EX_LD_FP(LOAD(ldd, %g1 + 0x28, %f10), NG4_retl_o2_plus_o4_plus_64) + EX_LD_FP(LOAD(ldd, %g1 + 0x30, %f12), NG4_retl_o2_plus_o4_plus_64) + EX_LD_FP(LOAD(ldd, %g1 + 0x38, %f14), NG4_retl_o2_plus_o4_plus_64) faligndata %f0, %f2, %f16 - EX_LD_FP(LOAD(ldd, %g1 + 0x40, %f0)) + EX_LD_FP(LOAD(ldd, %g1 + 0x40, %f0), NG4_retl_o2_plus_o4_plus_64) faligndata %f2, %f4, %f18 add %g1, 0x40, %g1 faligndata %f4, %f6, %f20 @@ -235,14 +386,14 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ faligndata %f10, %f12, %f26 faligndata %f12, %f14, %f28 faligndata %f14, %f0, %f30 - EX_ST_FP(STORE(std, %f16, %o0 + 0x00)) - EX_ST_FP(STORE(std, %f18, %o0 + 0x08)) - EX_ST_FP(STORE(std, %f20, %o0 + 0x10)) - EX_ST_FP(STORE(std, %f22, %o0 + 0x18)) - EX_ST_FP(STORE(std, %f24, %o0 + 0x20)) - EX_ST_FP(STORE(std, %f26, %o0 + 0x28)) - EX_ST_FP(STORE(std, %f28, %o0 + 0x30)) - EX_ST_FP(STORE(std, %f30, %o0 + 0x38)) + EX_ST_FP(STORE(std, %f16, %o0 + 0x00), NG4_retl_o2_plus_o4_plus_64) + EX_ST_FP(STORE(std, %f18, %o0 + 0x08), NG4_retl_o2_plus_o4_plus_56) + EX_ST_FP(STORE(std, %f20, %o0 + 0x10), NG4_retl_o2_plus_o4_plus_48) + EX_ST_FP(STORE(std, %f22, %o0 + 0x18), NG4_retl_o2_plus_o4_plus_40) + EX_ST_FP(STORE(std, %f24, %o0 + 0x20), NG4_retl_o2_plus_o4_plus_32) + EX_ST_FP(STORE(std, %f26, %o0 + 0x28), NG4_retl_o2_plus_o4_plus_24) + EX_ST_FP(STORE(std, %f28, %o0 + 0x30), NG4_retl_o2_plus_o4_plus_16) + EX_ST_FP(STORE(std, %f30, %o0 + 0x38), NG4_retl_o2_plus_o4_plus_8) add %o0, 0x40, %o0 bne,pt %icc, 1b LOAD(prefetch, %g1 + 0x200, #n_reads_strong) @@ -270,37 +421,38 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ andncc %o2, 0x20 - 1, %o5 be,pn %icc, 2f sub %o2, %o5, %o2 -1: EX_LD(LOAD(ldx, %o1 + 0x00, %g1)) - EX_LD(LOAD(ldx, %o1 + 0x08, %g2)) - EX_LD(LOAD(ldx, %o1 + 0x10, GLOBAL_SPARE)) - EX_LD(LOAD(ldx, %o1 + 0x18, %o4)) +1: EX_LD(LOAD(ldx, %o1 + 0x00, %g1), NG4_retl_o2_plus_o5) + EX_LD(LOAD(ldx, %o1 + 0x08, %g2), NG4_retl_o2_plus_o5) + EX_LD(LOAD(ldx, %o1 + 0x10, GLOBAL_SPARE), NG4_retl_o2_plus_o5) + EX_LD(LOAD(ldx, %o1 + 0x18, %o4), NG4_retl_o2_plus_o5) add %o1, 0x20, %o1 subcc %o5, 0x20, %o5 - EX_ST(STORE(stx, %g1, %o0 + 0x00)) - EX_ST(STORE(stx, %g2, %o0 + 0x08)) - EX_ST(STORE(stx, GLOBAL_SPARE, %o0 + 0x10)) - EX_ST(STORE(stx, %o4, %o0 + 0x18)) + EX_ST(STORE(stx, %g1, %o0 + 0x00), NG4_retl_o2_plus_o5_plus_32) + EX_ST(STORE(stx, %g2, %o0 + 0x08), NG4_retl_o2_plus_o5_plus_24) + EX_ST(STORE(stx, GLOBAL_SPARE, %o0 + 0x10), NG4_retl_o2_plus_o5_plus_24) + EX_ST(STORE(stx, %o4, %o0 + 0x18), NG4_retl_o2_plus_o5_plus_8) bne,pt %icc, 1b add %o0, 0x20, %o0 2: andcc %o2, 0x18, %o5 be,pt %icc, 3f sub %o2, %o5, %o2 -1: EX_LD(LOAD(ldx, %o1 + 0x00, %g1)) + +1: EX_LD(LOAD(ldx, %o1 + 0x00, %g1), NG4_retl_o2_plus_o5) add %o1, 0x08, %o1 add %o0, 0x08, %o0 subcc %o5, 0x08, %o5 bne,pt %icc, 1b - EX_ST(STORE(stx, %g1, %o0 - 0x08)) + EX_ST(STORE(stx, %g1, %o0 - 0x08), NG4_retl_o2_plus_o5_plus_8) 3: brz,pt %o2, .Lexit cmp %o2, 0x04 bl,pn %icc, .Ltiny nop - EX_LD(LOAD(lduw, %o1 + 0x00, %g1)) + EX_LD(LOAD(lduw, %o1 + 0x00, %g1), NG4_retl_o2) add %o1, 0x04, %o1 add %o0, 0x04, %o0 subcc %o2, 0x04, %o2 bne,pn %icc, .Ltiny - EX_ST(STORE(stw, %g1, %o0 - 0x04)) + EX_ST(STORE(stw, %g1, %o0 - 0x04), NG4_retl_o2_plus_4) ba,a,pt %icc, .Lexit .Lmedium_unaligned: /* First get dest 8 byte aligned. */ @@ -309,12 +461,12 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ brz,pt %g1, 2f sub %o2, %g1, %o2 -1: EX_LD(LOAD(ldub, %o1 + 0x00, %g2)) +1: EX_LD(LOAD(ldub, %o1 + 0x00, %g2), NG4_retl_o2_plus_g1) add %o1, 1, %o1 subcc %g1, 1, %g1 add %o0, 1, %o0 bne,pt %icc, 1b - EX_ST(STORE(stb, %g2, %o0 - 0x01)) + EX_ST(STORE(stb, %g2, %o0 - 0x01), NG4_retl_o2_plus_g1_plus_1) 2: and %o1, 0x7, %g1 brz,pn %g1, .Lmedium_noprefetch @@ -322,16 +474,16 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ mov 64, %g2 sub %g2, %g1, %g2 andn %o1, 0x7, %o1 - EX_LD(LOAD(ldx, %o1 + 0x00, %o4)) + EX_LD(LOAD(ldx, %o1 + 0x00, %o4), NG4_retl_o2) sllx %o4, %g1, %o4 andn %o2, 0x08 - 1, %o5 sub %o2, %o5, %o2 -1: EX_LD(LOAD(ldx, %o1 + 0x08, %g3)) +1: EX_LD(LOAD(ldx, %o1 + 0x08, %g3), NG4_retl_o2_plus_o5) add %o1, 0x08, %o1 subcc %o5, 0x08, %o5 srlx %g3, %g2, GLOBAL_SPARE or GLOBAL_SPARE, %o4, GLOBAL_SPARE - EX_ST(STORE(stx, GLOBAL_SPARE, %o0 + 0x00)) + EX_ST(STORE(stx, GLOBAL_SPARE, %o0 + 0x00), NG4_retl_o2_plus_o5_plus_8) add %o0, 0x08, %o0 bne,pt %icc, 1b sllx %g3, %g1, %o4 @@ -342,17 +494,17 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ ba,pt %icc, .Lsmall_unaligned .Ltiny: - EX_LD(LOAD(ldub, %o1 + 0x00, %g1)) + EX_LD(LOAD(ldub, %o1 + 0x00, %g1), NG4_retl_o2) subcc %o2, 1, %o2 be,pn %icc, .Lexit - EX_ST(STORE(stb, %g1, %o0 + 0x00)) - EX_LD(LOAD(ldub, %o1 + 0x01, %g1)) + EX_ST(STORE(stb, %g1, %o0 + 0x00), NG4_retl_o2_plus_1) + EX_LD(LOAD(ldub, %o1 + 0x01, %g1), NG4_retl_o2) subcc %o2, 1, %o2 be,pn %icc, .Lexit - EX_ST(STORE(stb, %g1, %o0 + 0x01)) - EX_LD(LOAD(ldub, %o1 + 0x02, %g1)) + EX_ST(STORE(stb, %g1, %o0 + 0x01), NG4_retl_o2_plus_1) + EX_LD(LOAD(ldub, %o1 + 0x02, %g1), NG4_retl_o2) ba,pt %icc, .Lexit - EX_ST(STORE(stb, %g1, %o0 + 0x02)) + EX_ST(STORE(stb, %g1, %o0 + 0x02), NG4_retl_o2) .Lsmall: andcc %g2, 0x3, %g0 @@ -360,22 +512,22 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ andn %o2, 0x4 - 1, %o5 sub %o2, %o5, %o2 1: - EX_LD(LOAD(lduw, %o1 + 0x00, %g1)) + EX_LD(LOAD(lduw, %o1 + 0x00, %g1), NG4_retl_o2_plus_o5) add %o1, 0x04, %o1 subcc %o5, 0x04, %o5 add %o0, 0x04, %o0 bne,pt %icc, 1b - EX_ST(STORE(stw, %g1, %o0 - 0x04)) + EX_ST(STORE(stw, %g1, %o0 - 0x04), NG4_retl_o2_plus_o5_plus_4) brz,pt %o2, .Lexit nop ba,a,pt %icc, .Ltiny .Lsmall_unaligned: -1: EX_LD(LOAD(ldub, %o1 + 0x00, %g1)) +1: EX_LD(LOAD(ldub, %o1 + 0x00, %g1), NG4_retl_o2) add %o1, 1, %o1 add %o0, 1, %o0 subcc %o2, 1, %o2 bne,pt %icc, 1b - EX_ST(STORE(stb, %g1, %o0 - 0x01)) + EX_ST(STORE(stb, %g1, %o0 - 0x01), NG4_retl_o2_plus_1) ba,a,pt %icc, .Lexit .size FUNC_NAME, .-FUNC_NAME diff --git a/arch/sparc/lib/NGcopy_from_user.S b/arch/sparc/lib/NGcopy_from_user.S index 5d1e4d1ac21e..9cd42fcbc781 100644 --- a/arch/sparc/lib/NGcopy_from_user.S +++ b/arch/sparc/lib/NGcopy_from_user.S @@ -3,11 +3,11 @@ * Copyright (C) 2006, 2007 David S. Miller (davem@davemloft.net) */ -#define EX_LD(x) \ +#define EX_LD(x,y) \ 98: x; \ .section __ex_table,"a";\ .align 4; \ - .word 98b, __ret_one_asi;\ + .word 98b, y; \ .text; \ .align 4; diff --git a/arch/sparc/lib/NGcopy_to_user.S b/arch/sparc/lib/NGcopy_to_user.S index ff630dcb273c..5c358afd464e 100644 --- a/arch/sparc/lib/NGcopy_to_user.S +++ b/arch/sparc/lib/NGcopy_to_user.S @@ -3,11 +3,11 @@ * Copyright (C) 2006, 2007 David S. Miller (davem@davemloft.net) */ -#define EX_ST(x) \ +#define EX_ST(x,y) \ 98: x; \ .section __ex_table,"a";\ .align 4; \ - .word 98b, __ret_one_asi;\ + .word 98b, y; \ .text; \ .align 4; diff --git a/arch/sparc/lib/NGmemcpy.S b/arch/sparc/lib/NGmemcpy.S index 96a14caf6966..d88c4ed50a00 100644 --- a/arch/sparc/lib/NGmemcpy.S +++ b/arch/sparc/lib/NGmemcpy.S @@ -4,6 +4,7 @@ */ #ifdef __KERNEL__ +#include <linux/linkage.h> #include <asm/asi.h> #include <asm/thread_info.h> #define GLOBAL_SPARE %g7 @@ -27,15 +28,11 @@ #endif #ifndef EX_LD -#define EX_LD(x) x +#define EX_LD(x,y) x #endif #ifndef EX_ST -#define EX_ST(x) x -#endif - -#ifndef EX_RETVAL -#define EX_RETVAL(x) x +#define EX_ST(x,y) x #endif #ifndef LOAD @@ -79,6 +76,92 @@ .register %g3,#scratch .text +#ifndef EX_RETVAL +#define EX_RETVAL(x) x +__restore_asi: + ret + wr %g0, ASI_AIUS, %asi + restore +ENTRY(NG_ret_i2_plus_i4_plus_1) + ba,pt %xcc, __restore_asi + add %i2, %i5, %i0 +ENDPROC(NG_ret_i2_plus_i4_plus_1) +ENTRY(NG_ret_i2_plus_g1) + ba,pt %xcc, __restore_asi + add %i2, %g1, %i0 +ENDPROC(NG_ret_i2_plus_g1) +ENTRY(NG_ret_i2_plus_g1_minus_8) + sub %g1, 8, %g1 + ba,pt %xcc, __restore_asi + add %i2, %g1, %i0 +ENDPROC(NG_ret_i2_plus_g1_minus_8) +ENTRY(NG_ret_i2_plus_g1_minus_16) + sub %g1, 16, %g1 + ba,pt %xcc, __restore_asi + add %i2, %g1, %i0 +ENDPROC(NG_ret_i2_plus_g1_minus_16) +ENTRY(NG_ret_i2_plus_g1_minus_24) + sub %g1, 24, %g1 + ba,pt %xcc, __restore_asi + add %i2, %g1, %i0 +ENDPROC(NG_ret_i2_plus_g1_minus_24) +ENTRY(NG_ret_i2_plus_g1_minus_32) + sub %g1, 32, %g1 + ba,pt %xcc, __restore_asi + add %i2, %g1, %i0 +ENDPROC(NG_ret_i2_plus_g1_minus_32) +ENTRY(NG_ret_i2_plus_g1_minus_40) + sub %g1, 40, %g1 + ba,pt %xcc, __restore_asi + add %i2, %g1, %i0 +ENDPROC(NG_ret_i2_plus_g1_minus_40) +ENTRY(NG_ret_i2_plus_g1_minus_48) + sub %g1, 48, %g1 + ba,pt %xcc, __restore_asi + add %i2, %g1, %i0 +ENDPROC(NG_ret_i2_plus_g1_minus_48) +ENTRY(NG_ret_i2_plus_g1_minus_56) + sub %g1, 56, %g1 + ba,pt %xcc, __restore_asi + add %i2, %g1, %i0 +ENDPROC(NG_ret_i2_plus_g1_minus_56) +ENTRY(NG_ret_i2_plus_i4) + ba,pt %xcc, __restore_asi + add %i2, %i4, %i0 +ENDPROC(NG_ret_i2_plus_i4) +ENTRY(NG_ret_i2_plus_i4_minus_8) + sub %i4, 8, %i4 + ba,pt %xcc, __restore_asi + add %i2, %i4, %i0 +ENDPROC(NG_ret_i2_plus_i4_minus_8) +ENTRY(NG_ret_i2_plus_8) + ba,pt %xcc, __restore_asi + add %i2, 8, %i0 +ENDPROC(NG_ret_i2_plus_8) +ENTRY(NG_ret_i2_plus_4) + ba,pt %xcc, __restore_asi + add %i2, 4, %i0 +ENDPROC(NG_ret_i2_plus_4) +ENTRY(NG_ret_i2_plus_1) + ba,pt %xcc, __restore_asi + add %i2, 1, %i0 +ENDPROC(NG_ret_i2_plus_1) +ENTRY(NG_ret_i2_plus_g1_plus_1) + add %g1, 1, %g1 + ba,pt %xcc, __restore_asi + add %i2, %g1, %i0 +ENDPROC(NG_ret_i2_plus_g1_plus_1) +ENTRY(NG_ret_i2) + ba,pt %xcc, __restore_asi + mov %i2, %i0 +ENDPROC(NG_ret_i2) +ENTRY(NG_ret_i2_and_7_plus_i4) + and %i2, 7, %i2 + ba,pt %xcc, __restore_asi + add %i2, %i4, %i0 +ENDPROC(NG_ret_i2_and_7_plus_i4) +#endif + .align 64 .globl FUNC_NAME @@ -126,8 +209,8 @@ FUNC_NAME: /* %i0=dst, %i1=src, %i2=len */ sub %g0, %i4, %i4 ! bytes to align dst sub %i2, %i4, %i2 1: subcc %i4, 1, %i4 - EX_LD(LOAD(ldub, %i1, %g1)) - EX_ST(STORE(stb, %g1, %o0)) + EX_LD(LOAD(ldub, %i1, %g1), NG_ret_i2_plus_i4_plus_1) + EX_ST(STORE(stb, %g1, %o0), NG_ret_i2_plus_i4_plus_1) add %i1, 1, %i1 bne,pt %XCC, 1b add %o0, 1, %o0 @@ -160,7 +243,7 @@ FUNC_NAME: /* %i0=dst, %i1=src, %i2=len */ and %i4, 0x7, GLOBAL_SPARE sll GLOBAL_SPARE, 3, GLOBAL_SPARE mov 64, %i5 - EX_LD(LOAD_TWIN(%i1, %g2, %g3)) + EX_LD(LOAD_TWIN(%i1, %g2, %g3), NG_ret_i2_plus_g1) sub %i5, GLOBAL_SPARE, %i5 mov 16, %o4 mov 32, %o5 @@ -178,31 +261,31 @@ FUNC_NAME: /* %i0=dst, %i1=src, %i2=len */ srlx WORD3, PRE_SHIFT, TMP; \ or WORD2, TMP, WORD2; -8: EX_LD(LOAD_TWIN(%i1 + %o4, %o2, %o3)) +8: EX_LD(LOAD_TWIN(%i1 + %o4, %o2, %o3), NG_ret_i2_plus_g1) MIX_THREE_WORDS(%g2, %g3, %o2, %i5, GLOBAL_SPARE, %o1) LOAD(prefetch, %i1 + %i3, #one_read) - EX_ST(STORE_INIT(%g2, %o0 + 0x00)) - EX_ST(STORE_INIT(%g3, %o0 + 0x08)) + EX_ST(STORE_INIT(%g2, %o0 + 0x00), NG_ret_i2_plus_g1) + EX_ST(STORE_INIT(%g3, %o0 + 0x08), NG_ret_i2_plus_g1_minus_8) - EX_LD(LOAD_TWIN(%i1 + %o5, %g2, %g3)) + EX_LD(LOAD_TWIN(%i1 + %o5, %g2, %g3), NG_ret_i2_plus_g1_minus_16) MIX_THREE_WORDS(%o2, %o3, %g2, %i5, GLOBAL_SPARE, %o1) - EX_ST(STORE_INIT(%o2, %o0 + 0x10)) - EX_ST(STORE_INIT(%o3, %o0 + 0x18)) + EX_ST(STORE_INIT(%o2, %o0 + 0x10), NG_ret_i2_plus_g1_minus_16) + EX_ST(STORE_INIT(%o3, %o0 + 0x18), NG_ret_i2_plus_g1_minus_24) - EX_LD(LOAD_TWIN(%i1 + %o7, %o2, %o3)) + EX_LD(LOAD_TWIN(%i1 + %o7, %o2, %o3), NG_ret_i2_plus_g1_minus_32) MIX_THREE_WORDS(%g2, %g3, %o2, %i5, GLOBAL_SPARE, %o1) - EX_ST(STORE_INIT(%g2, %o0 + 0x20)) - EX_ST(STORE_INIT(%g3, %o0 + 0x28)) + EX_ST(STORE_INIT(%g2, %o0 + 0x20), NG_ret_i2_plus_g1_minus_32) + EX_ST(STORE_INIT(%g3, %o0 + 0x28), NG_ret_i2_plus_g1_minus_40) - EX_LD(LOAD_TWIN(%i1 + %i3, %g2, %g3)) + EX_LD(LOAD_TWIN(%i1 + %i3, %g2, %g3), NG_ret_i2_plus_g1_minus_48) add %i1, 64, %i1 MIX_THREE_WORDS(%o2, %o3, %g2, %i5, GLOBAL_SPARE, %o1) - EX_ST(STORE_INIT(%o2, %o0 + 0x30)) - EX_ST(STORE_INIT(%o3, %o0 + 0x38)) + EX_ST(STORE_INIT(%o2, %o0 + 0x30), NG_ret_i2_plus_g1_minus_48) + EX_ST(STORE_INIT(%o3, %o0 + 0x38), NG_ret_i2_plus_g1_minus_56) subcc %g1, 64, %g1 bne,pt %XCC, 8b @@ -211,31 +294,31 @@ FUNC_NAME: /* %i0=dst, %i1=src, %i2=len */ ba,pt %XCC, 60f add %i1, %i4, %i1 -9: EX_LD(LOAD_TWIN(%i1 + %o4, %o2, %o3)) +9: EX_LD(LOAD_TWIN(%i1 + %o4, %o2, %o3), NG_ret_i2_plus_g1) MIX_THREE_WORDS(%g3, %o2, %o3, %i5, GLOBAL_SPARE, %o1) LOAD(prefetch, %i1 + %i3, #one_read) - EX_ST(STORE_INIT(%g3, %o0 + 0x00)) - EX_ST(STORE_INIT(%o2, %o0 + 0x08)) + EX_ST(STORE_INIT(%g3, %o0 + 0x00), NG_ret_i2_plus_g1) + EX_ST(STORE_INIT(%o2, %o0 + 0x08), NG_ret_i2_plus_g1_minus_8) - EX_LD(LOAD_TWIN(%i1 + %o5, %g2, %g3)) + EX_LD(LOAD_TWIN(%i1 + %o5, %g2, %g3), NG_ret_i2_plus_g1_minus_16) MIX_THREE_WORDS(%o3, %g2, %g3, %i5, GLOBAL_SPARE, %o1) - EX_ST(STORE_INIT(%o3, %o0 + 0x10)) - EX_ST(STORE_INIT(%g2, %o0 + 0x18)) + EX_ST(STORE_INIT(%o3, %o0 + 0x10), NG_ret_i2_plus_g1_minus_16) + EX_ST(STORE_INIT(%g2, %o0 + 0x18), NG_ret_i2_plus_g1_minus_24) - EX_LD(LOAD_TWIN(%i1 + %o7, %o2, %o3)) + EX_LD(LOAD_TWIN(%i1 + %o7, %o2, %o3), NG_ret_i2_plus_g1_minus_32) MIX_THREE_WORDS(%g3, %o2, %o3, %i5, GLOBAL_SPARE, %o1) - EX_ST(STORE_INIT(%g3, %o0 + 0x20)) - EX_ST(STORE_INIT(%o2, %o0 + 0x28)) + EX_ST(STORE_INIT(%g3, %o0 + 0x20), NG_ret_i2_plus_g1_minus_32) + EX_ST(STORE_INIT(%o2, %o0 + 0x28), NG_ret_i2_plus_g1_minus_40) - EX_LD(LOAD_TWIN(%i1 + %i3, %g2, %g3)) + EX_LD(LOAD_TWIN(%i1 + %i3, %g2, %g3), NG_ret_i2_plus_g1_minus_48) add %i1, 64, %i1 MIX_THREE_WORDS(%o3, %g2, %g3, %i5, GLOBAL_SPARE, %o1) - EX_ST(STORE_INIT(%o3, %o0 + 0x30)) - EX_ST(STORE_INIT(%g2, %o0 + 0x38)) + EX_ST(STORE_INIT(%o3, %o0 + 0x30), NG_ret_i2_plus_g1_minus_48) + EX_ST(STORE_INIT(%g2, %o0 + 0x38), NG_ret_i2_plus_g1_minus_56) subcc %g1, 64, %g1 bne,pt %XCC, 9b @@ -249,25 +332,25 @@ FUNC_NAME: /* %i0=dst, %i1=src, %i2=len */ * one twin load ahead, then add 8 back into source when * we finish the loop. */ - EX_LD(LOAD_TWIN(%i1, %o4, %o5)) + EX_LD(LOAD_TWIN(%i1, %o4, %o5), NG_ret_i2_plus_g1) mov 16, %o7 mov 32, %g2 mov 48, %g3 mov 64, %o1 -1: EX_LD(LOAD_TWIN(%i1 + %o7, %o2, %o3)) +1: EX_LD(LOAD_TWIN(%i1 + %o7, %o2, %o3), NG_ret_i2_plus_g1) LOAD(prefetch, %i1 + %o1, #one_read) - EX_ST(STORE_INIT(%o5, %o0 + 0x00)) ! initializes cache line - EX_ST(STORE_INIT(%o2, %o0 + 0x08)) - EX_LD(LOAD_TWIN(%i1 + %g2, %o4, %o5)) - EX_ST(STORE_INIT(%o3, %o0 + 0x10)) - EX_ST(STORE_INIT(%o4, %o0 + 0x18)) - EX_LD(LOAD_TWIN(%i1 + %g3, %o2, %o3)) - EX_ST(STORE_INIT(%o5, %o0 + 0x20)) - EX_ST(STORE_INIT(%o2, %o0 + 0x28)) - EX_LD(LOAD_TWIN(%i1 + %o1, %o4, %o5)) + EX_ST(STORE_INIT(%o5, %o0 + 0x00), NG_ret_i2_plus_g1) ! initializes cache line + EX_ST(STORE_INIT(%o2, %o0 + 0x08), NG_ret_i2_plus_g1_minus_8) + EX_LD(LOAD_TWIN(%i1 + %g2, %o4, %o5), NG_ret_i2_plus_g1_minus_16) + EX_ST(STORE_INIT(%o3, %o0 + 0x10), NG_ret_i2_plus_g1_minus_16) + EX_ST(STORE_INIT(%o4, %o0 + 0x18), NG_ret_i2_plus_g1_minus_24) + EX_LD(LOAD_TWIN(%i1 + %g3, %o2, %o3), NG_ret_i2_plus_g1_minus_32) + EX_ST(STORE_INIT(%o5, %o0 + 0x20), NG_ret_i2_plus_g1_minus_32) + EX_ST(STORE_INIT(%o2, %o0 + 0x28), NG_ret_i2_plus_g1_minus_40) + EX_LD(LOAD_TWIN(%i1 + %o1, %o4, %o5), NG_ret_i2_plus_g1_minus_48) add %i1, 64, %i1 - EX_ST(STORE_INIT(%o3, %o0 + 0x30)) - EX_ST(STORE_INIT(%o4, %o0 + 0x38)) + EX_ST(STORE_INIT(%o3, %o0 + 0x30), NG_ret_i2_plus_g1_minus_48) + EX_ST(STORE_INIT(%o4, %o0 + 0x38), NG_ret_i2_plus_g1_minus_56) subcc %g1, 64, %g1 bne,pt %XCC, 1b add %o0, 64, %o0 @@ -282,20 +365,20 @@ FUNC_NAME: /* %i0=dst, %i1=src, %i2=len */ mov 32, %g2 mov 48, %g3 mov 64, %o1 -1: EX_LD(LOAD_TWIN(%i1 + %g0, %o4, %o5)) - EX_LD(LOAD_TWIN(%i1 + %o7, %o2, %o3)) +1: EX_LD(LOAD_TWIN(%i1 + %g0, %o4, %o5), NG_ret_i2_plus_g1) + EX_LD(LOAD_TWIN(%i1 + %o7, %o2, %o3), NG_ret_i2_plus_g1) LOAD(prefetch, %i1 + %o1, #one_read) - EX_ST(STORE_INIT(%o4, %o0 + 0x00)) ! initializes cache line - EX_ST(STORE_INIT(%o5, %o0 + 0x08)) - EX_LD(LOAD_TWIN(%i1 + %g2, %o4, %o5)) - EX_ST(STORE_INIT(%o2, %o0 + 0x10)) - EX_ST(STORE_INIT(%o3, %o0 + 0x18)) - EX_LD(LOAD_TWIN(%i1 + %g3, %o2, %o3)) + EX_ST(STORE_INIT(%o4, %o0 + 0x00), NG_ret_i2_plus_g1) ! initializes cache line + EX_ST(STORE_INIT(%o5, %o0 + 0x08), NG_ret_i2_plus_g1_minus_8) + EX_LD(LOAD_TWIN(%i1 + %g2, %o4, %o5), NG_ret_i2_plus_g1_minus_16) + EX_ST(STORE_INIT(%o2, %o0 + 0x10), NG_ret_i2_plus_g1_minus_16) + EX_ST(STORE_INIT(%o3, %o0 + 0x18), NG_ret_i2_plus_g1_minus_24) + EX_LD(LOAD_TWIN(%i1 + %g3, %o2, %o3), NG_ret_i2_plus_g1_minus_32) add %i1, 64, %i1 - EX_ST(STORE_INIT(%o4, %o0 + 0x20)) - EX_ST(STORE_INIT(%o5, %o0 + 0x28)) - EX_ST(STORE_INIT(%o2, %o0 + 0x30)) - EX_ST(STORE_INIT(%o3, %o0 + 0x38)) + EX_ST(STORE_INIT(%o4, %o0 + 0x20), NG_ret_i2_plus_g1_minus_32) + EX_ST(STORE_INIT(%o5, %o0 + 0x28), NG_ret_i2_plus_g1_minus_40) + EX_ST(STORE_INIT(%o2, %o0 + 0x30), NG_ret_i2_plus_g1_minus_48) + EX_ST(STORE_INIT(%o3, %o0 + 0x38), NG_ret_i2_plus_g1_minus_56) subcc %g1, 64, %g1 bne,pt %XCC, 1b add %o0, 64, %o0 @@ -321,28 +404,28 @@ FUNC_NAME: /* %i0=dst, %i1=src, %i2=len */ andn %i2, 0xf, %i4 and %i2, 0xf, %i2 1: subcc %i4, 0x10, %i4 - EX_LD(LOAD(ldx, %i1, %o4)) + EX_LD(LOAD(ldx, %i1, %o4), NG_ret_i2_plus_i4) add %i1, 0x08, %i1 - EX_LD(LOAD(ldx, %i1, %g1)) + EX_LD(LOAD(ldx, %i1, %g1), NG_ret_i2_plus_i4) sub %i1, 0x08, %i1 - EX_ST(STORE(stx, %o4, %i1 + %i3)) + EX_ST(STORE(stx, %o4, %i1 + %i3), NG_ret_i2_plus_i4) add %i1, 0x8, %i1 - EX_ST(STORE(stx, %g1, %i1 + %i3)) + EX_ST(STORE(stx, %g1, %i1 + %i3), NG_ret_i2_plus_i4_minus_8) bgu,pt %XCC, 1b add %i1, 0x8, %i1 73: andcc %i2, 0x8, %g0 be,pt %XCC, 1f nop sub %i2, 0x8, %i2 - EX_LD(LOAD(ldx, %i1, %o4)) - EX_ST(STORE(stx, %o4, %i1 + %i3)) + EX_LD(LOAD(ldx, %i1, %o4), NG_ret_i2_plus_8) + EX_ST(STORE(stx, %o4, %i1 + %i3), NG_ret_i2_plus_8) add %i1, 0x8, %i1 1: andcc %i2, 0x4, %g0 be,pt %XCC, 1f nop sub %i2, 0x4, %i2 - EX_LD(LOAD(lduw, %i1, %i5)) - EX_ST(STORE(stw, %i5, %i1 + %i3)) + EX_LD(LOAD(lduw, %i1, %i5), NG_ret_i2_plus_4) + EX_ST(STORE(stw, %i5, %i1 + %i3), NG_ret_i2_plus_4) add %i1, 0x4, %i1 1: cmp %i2, 0 be,pt %XCC, 85f @@ -358,8 +441,8 @@ FUNC_NAME: /* %i0=dst, %i1=src, %i2=len */ sub %i2, %g1, %i2 1: subcc %g1, 1, %g1 - EX_LD(LOAD(ldub, %i1, %i5)) - EX_ST(STORE(stb, %i5, %i1 + %i3)) + EX_LD(LOAD(ldub, %i1, %i5), NG_ret_i2_plus_g1_plus_1) + EX_ST(STORE(stb, %i5, %i1 + %i3), NG_ret_i2_plus_g1_plus_1) bgu,pt %icc, 1b add %i1, 1, %i1 @@ -375,16 +458,16 @@ FUNC_NAME: /* %i0=dst, %i1=src, %i2=len */ 8: mov 64, %i3 andn %i1, 0x7, %i1 - EX_LD(LOAD(ldx, %i1, %g2)) + EX_LD(LOAD(ldx, %i1, %g2), NG_ret_i2) sub %i3, %g1, %i3 andn %i2, 0x7, %i4 sllx %g2, %g1, %g2 1: add %i1, 0x8, %i1 - EX_LD(LOAD(ldx, %i1, %g3)) + EX_LD(LOAD(ldx, %i1, %g3), NG_ret_i2_and_7_plus_i4) subcc %i4, 0x8, %i4 srlx %g3, %i3, %i5 or %i5, %g2, %i5 - EX_ST(STORE(stx, %i5, %o0)) + EX_ST(STORE(stx, %i5, %o0), NG_ret_i2_and_7_plus_i4) add %o0, 0x8, %o0 bgu,pt %icc, 1b sllx %g3, %g1, %g2 @@ -404,8 +487,8 @@ FUNC_NAME: /* %i0=dst, %i1=src, %i2=len */ 1: subcc %i2, 4, %i2 - EX_LD(LOAD(lduw, %i1, %g1)) - EX_ST(STORE(stw, %g1, %i1 + %i3)) + EX_LD(LOAD(lduw, %i1, %g1), NG_ret_i2_plus_4) + EX_ST(STORE(stw, %g1, %i1 + %i3), NG_ret_i2_plus_4) bgu,pt %XCC, 1b add %i1, 4, %i1 @@ -415,8 +498,8 @@ FUNC_NAME: /* %i0=dst, %i1=src, %i2=len */ .align 32 90: subcc %i2, 1, %i2 - EX_LD(LOAD(ldub, %i1, %g1)) - EX_ST(STORE(stb, %g1, %i1 + %i3)) + EX_LD(LOAD(ldub, %i1, %g1), NG_ret_i2_plus_1) + EX_ST(STORE(stb, %g1, %i1 + %i3), NG_ret_i2_plus_1) bgu,pt %XCC, 90b add %i1, 1, %i1 ret diff --git a/arch/sparc/lib/U1copy_from_user.S b/arch/sparc/lib/U1copy_from_user.S index ecc5692fa2b4..bb6ff73229e3 100644 --- a/arch/sparc/lib/U1copy_from_user.S +++ b/arch/sparc/lib/U1copy_from_user.S @@ -3,19 +3,19 @@ * Copyright (C) 1999, 2000, 2004 David S. Miller (davem@redhat.com) */ -#define EX_LD(x) \ +#define EX_LD(x,y) \ 98: x; \ .section __ex_table,"a";\ .align 4; \ - .word 98b, __retl_one; \ + .word 98b, y; \ .text; \ .align 4; -#define EX_LD_FP(x) \ +#define EX_LD_FP(x,y) \ 98: x; \ .section __ex_table,"a";\ .align 4; \ - .word 98b, __retl_one_fp;\ + .word 98b, y; \ .text; \ .align 4; diff --git a/arch/sparc/lib/U1copy_to_user.S b/arch/sparc/lib/U1copy_to_user.S index 9eea392e44d4..ed92ce739558 100644 --- a/arch/sparc/lib/U1copy_to_user.S +++ b/arch/sparc/lib/U1copy_to_user.S @@ -3,19 +3,19 @@ * Copyright (C) 1999, 2000, 2004 David S. Miller (davem@redhat.com) */ -#define EX_ST(x) \ +#define EX_ST(x,y) \ 98: x; \ .section __ex_table,"a";\ .align 4; \ - .word 98b, __retl_one; \ + .word 98b, y; \ .text; \ .align 4; -#define EX_ST_FP(x) \ +#define EX_ST_FP(x,y) \ 98: x; \ .section __ex_table,"a";\ .align 4; \ - .word 98b, __retl_one_fp;\ + .word 98b, y; \ .text; \ .align 4; diff --git a/arch/sparc/lib/U1memcpy.S b/arch/sparc/lib/U1memcpy.S index 3e6209ebb7d7..f30d2ab2c371 100644 --- a/arch/sparc/lib/U1memcpy.S +++ b/arch/sparc/lib/U1memcpy.S @@ -5,6 +5,7 @@ */ #ifdef __KERNEL__ +#include <linux/linkage.h> #include <asm/visasm.h> #include <asm/asi.h> #define GLOBAL_SPARE g7 @@ -23,21 +24,17 @@ #endif #ifndef EX_LD -#define EX_LD(x) x +#define EX_LD(x,y) x #endif #ifndef EX_LD_FP -#define EX_LD_FP(x) x +#define EX_LD_FP(x,y) x #endif #ifndef EX_ST -#define EX_ST(x) x +#define EX_ST(x,y) x #endif #ifndef EX_ST_FP -#define EX_ST_FP(x) x -#endif - -#ifndef EX_RETVAL -#define EX_RETVAL(x) x +#define EX_ST_FP(x,y) x #endif #ifndef LOAD @@ -78,53 +75,169 @@ faligndata %f7, %f8, %f60; \ faligndata %f8, %f9, %f62; -#define MAIN_LOOP_CHUNK(src, dest, fdest, fsrc, len, jmptgt) \ - EX_LD_FP(LOAD_BLK(%src, %fdest)); \ - EX_ST_FP(STORE_BLK(%fsrc, %dest)); \ - add %src, 0x40, %src; \ - subcc %len, 0x40, %len; \ - be,pn %xcc, jmptgt; \ - add %dest, 0x40, %dest; \ - -#define LOOP_CHUNK1(src, dest, len, branch_dest) \ - MAIN_LOOP_CHUNK(src, dest, f0, f48, len, branch_dest) -#define LOOP_CHUNK2(src, dest, len, branch_dest) \ - MAIN_LOOP_CHUNK(src, dest, f16, f48, len, branch_dest) -#define LOOP_CHUNK3(src, dest, len, branch_dest) \ - MAIN_LOOP_CHUNK(src, dest, f32, f48, len, branch_dest) +#define MAIN_LOOP_CHUNK(src, dest, fdest, fsrc, jmptgt) \ + EX_LD_FP(LOAD_BLK(%src, %fdest), U1_gs_80_fp); \ + EX_ST_FP(STORE_BLK(%fsrc, %dest), U1_gs_80_fp); \ + add %src, 0x40, %src; \ + subcc %GLOBAL_SPARE, 0x40, %GLOBAL_SPARE; \ + be,pn %xcc, jmptgt; \ + add %dest, 0x40, %dest; \ + +#define LOOP_CHUNK1(src, dest, branch_dest) \ + MAIN_LOOP_CHUNK(src, dest, f0, f48, branch_dest) +#define LOOP_CHUNK2(src, dest, branch_dest) \ + MAIN_LOOP_CHUNK(src, dest, f16, f48, branch_dest) +#define LOOP_CHUNK3(src, dest, branch_dest) \ + MAIN_LOOP_CHUNK(src, dest, f32, f48, branch_dest) #define DO_SYNC membar #Sync; #define STORE_SYNC(dest, fsrc) \ - EX_ST_FP(STORE_BLK(%fsrc, %dest)); \ + EX_ST_FP(STORE_BLK(%fsrc, %dest), U1_gs_80_fp); \ add %dest, 0x40, %dest; \ DO_SYNC #define STORE_JUMP(dest, fsrc, target) \ - EX_ST_FP(STORE_BLK(%fsrc, %dest)); \ + EX_ST_FP(STORE_BLK(%fsrc, %dest), U1_gs_40_fp); \ add %dest, 0x40, %dest; \ ba,pt %xcc, target; \ nop; -#define FINISH_VISCHUNK(dest, f0, f1, left) \ - subcc %left, 8, %left;\ - bl,pn %xcc, 95f; \ - faligndata %f0, %f1, %f48; \ - EX_ST_FP(STORE(std, %f48, %dest)); \ +#define FINISH_VISCHUNK(dest, f0, f1) \ + subcc %g3, 8, %g3; \ + bl,pn %xcc, 95f; \ + faligndata %f0, %f1, %f48; \ + EX_ST_FP(STORE(std, %f48, %dest), U1_g3_8_fp); \ add %dest, 8, %dest; -#define UNEVEN_VISCHUNK_LAST(dest, f0, f1, left) \ - subcc %left, 8, %left; \ - bl,pn %xcc, 95f; \ +#define UNEVEN_VISCHUNK_LAST(dest, f0, f1) \ + subcc %g3, 8, %g3; \ + bl,pn %xcc, 95f; \ fsrc2 %f0, %f1; -#define UNEVEN_VISCHUNK(dest, f0, f1, left) \ - UNEVEN_VISCHUNK_LAST(dest, f0, f1, left) \ +#define UNEVEN_VISCHUNK(dest, f0, f1) \ + UNEVEN_VISCHUNK_LAST(dest, f0, f1) \ ba,a,pt %xcc, 93f; .register %g2,#scratch .register %g3,#scratch .text +#ifndef EX_RETVAL +#define EX_RETVAL(x) x +ENTRY(U1_g1_1_fp) + VISExitHalf + add %g1, 1, %g1 + add %g1, %g2, %g1 + retl + add %g1, %o2, %o0 +ENDPROC(U1_g1_1_fp) +ENTRY(U1_g2_0_fp) + VISExitHalf + retl + add %g2, %o2, %o0 +ENDPROC(U1_g2_0_fp) +ENTRY(U1_g2_8_fp) + VISExitHalf + add %g2, 8, %g2 + retl + add %g2, %o2, %o0 +ENDPROC(U1_g2_8_fp) +ENTRY(U1_gs_0_fp) + VISExitHalf + add %GLOBAL_SPARE, %g3, %o0 + retl + add %o0, %o2, %o0 +ENDPROC(U1_gs_0_fp) +ENTRY(U1_gs_80_fp) + VISExitHalf + add %GLOBAL_SPARE, 0x80, %GLOBAL_SPARE + add %GLOBAL_SPARE, %g3, %o0 + retl + add %o0, %o2, %o0 +ENDPROC(U1_gs_80_fp) +ENTRY(U1_gs_40_fp) + VISExitHalf + add %GLOBAL_SPARE, 0x40, %GLOBAL_SPARE + add %GLOBAL_SPARE, %g3, %o0 + retl + add %o0, %o2, %o0 +ENDPROC(U1_gs_40_fp) +ENTRY(U1_g3_0_fp) + VISExitHalf + retl + add %g3, %o2, %o0 +ENDPROC(U1_g3_0_fp) +ENTRY(U1_g3_8_fp) + VISExitHalf + add %g3, 8, %g3 + retl + add %g3, %o2, %o0 +ENDPROC(U1_g3_8_fp) +ENTRY(U1_o2_0_fp) + VISExitHalf + retl + mov %o2, %o0 +ENDPROC(U1_o2_0_fp) +ENTRY(U1_o2_1_fp) + VISExitHalf + retl + add %o2, 1, %o0 +ENDPROC(U1_o2_1_fp) +ENTRY(U1_gs_0) + VISExitHalf + retl + add %GLOBAL_SPARE, %o2, %o0 +ENDPROC(U1_gs_0) +ENTRY(U1_gs_8) + VISExitHalf + add %GLOBAL_SPARE, %o2, %GLOBAL_SPARE + retl + add %GLOBAL_SPARE, 0x8, %o0 +ENDPROC(U1_gs_8) +ENTRY(U1_gs_10) + VISExitHalf + add %GLOBAL_SPARE, %o2, %GLOBAL_SPARE + retl + add %GLOBAL_SPARE, 0x10, %o0 +ENDPROC(U1_gs_10) +ENTRY(U1_o2_0) + retl + mov %o2, %o0 +ENDPROC(U1_o2_0) +ENTRY(U1_o2_8) + retl + add %o2, 8, %o0 +ENDPROC(U1_o2_8) +ENTRY(U1_o2_4) + retl + add %o2, 4, %o0 +ENDPROC(U1_o2_4) +ENTRY(U1_o2_1) + retl + add %o2, 1, %o0 +ENDPROC(U1_o2_1) +ENTRY(U1_g1_0) + retl + add %g1, %o2, %o0 +ENDPROC(U1_g1_0) +ENTRY(U1_g1_1) + add %g1, 1, %g1 + retl + add %g1, %o2, %o0 +ENDPROC(U1_g1_1) +ENTRY(U1_gs_0_o2_adj) + and %o2, 7, %o2 + retl + add %GLOBAL_SPARE, %o2, %o0 +ENDPROC(U1_gs_0_o2_adj) +ENTRY(U1_gs_8_o2_adj) + and %o2, 7, %o2 + add %GLOBAL_SPARE, 8, %GLOBAL_SPARE + retl + add %GLOBAL_SPARE, %o2, %o0 +ENDPROC(U1_gs_8_o2_adj) +#endif + .align 64 .globl FUNC_NAME @@ -166,8 +279,8 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ and %g2, 0x38, %g2 1: subcc %g1, 0x1, %g1 - EX_LD_FP(LOAD(ldub, %o1 + 0x00, %o3)) - EX_ST_FP(STORE(stb, %o3, %o1 + %GLOBAL_SPARE)) + EX_LD_FP(LOAD(ldub, %o1 + 0x00, %o3), U1_g1_1_fp) + EX_ST_FP(STORE(stb, %o3, %o1 + %GLOBAL_SPARE), U1_g1_1_fp) bgu,pt %XCC, 1b add %o1, 0x1, %o1 @@ -178,20 +291,20 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ be,pt %icc, 3f alignaddr %o1, %g0, %o1 - EX_LD_FP(LOAD(ldd, %o1, %f4)) -1: EX_LD_FP(LOAD(ldd, %o1 + 0x8, %f6)) + EX_LD_FP(LOAD(ldd, %o1, %f4), U1_g2_0_fp) +1: EX_LD_FP(LOAD(ldd, %o1 + 0x8, %f6), U1_g2_0_fp) add %o1, 0x8, %o1 subcc %g2, 0x8, %g2 faligndata %f4, %f6, %f0 - EX_ST_FP(STORE(std, %f0, %o0)) + EX_ST_FP(STORE(std, %f0, %o0), U1_g2_8_fp) be,pn %icc, 3f add %o0, 0x8, %o0 - EX_LD_FP(LOAD(ldd, %o1 + 0x8, %f4)) + EX_LD_FP(LOAD(ldd, %o1 + 0x8, %f4), U1_g2_0_fp) add %o1, 0x8, %o1 subcc %g2, 0x8, %g2 faligndata %f6, %f4, %f0 - EX_ST_FP(STORE(std, %f0, %o0)) + EX_ST_FP(STORE(std, %f0, %o0), U1_g2_8_fp) bne,pt %icc, 1b add %o0, 0x8, %o0 @@ -214,13 +327,13 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ add %g1, %GLOBAL_SPARE, %g1 subcc %o2, %g3, %o2 - EX_LD_FP(LOAD_BLK(%o1, %f0)) + EX_LD_FP(LOAD_BLK(%o1, %f0), U1_gs_0_fp) add %o1, 0x40, %o1 add %g1, %g3, %g1 - EX_LD_FP(LOAD_BLK(%o1, %f16)) + EX_LD_FP(LOAD_BLK(%o1, %f16), U1_gs_0_fp) add %o1, 0x40, %o1 sub %GLOBAL_SPARE, 0x80, %GLOBAL_SPARE - EX_LD_FP(LOAD_BLK(%o1, %f32)) + EX_LD_FP(LOAD_BLK(%o1, %f32), U1_gs_80_fp) add %o1, 0x40, %o1 /* There are 8 instances of the unrolled loop, @@ -240,11 +353,11 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ .align 64 1: FREG_FROB(f0, f2, f4, f6, f8, f10,f12,f14,f16) - LOOP_CHUNK1(o1, o0, GLOBAL_SPARE, 1f) + LOOP_CHUNK1(o1, o0, 1f) FREG_FROB(f16,f18,f20,f22,f24,f26,f28,f30,f32) - LOOP_CHUNK2(o1, o0, GLOBAL_SPARE, 2f) + LOOP_CHUNK2(o1, o0, 2f) FREG_FROB(f32,f34,f36,f38,f40,f42,f44,f46,f0) - LOOP_CHUNK3(o1, o0, GLOBAL_SPARE, 3f) + LOOP_CHUNK3(o1, o0, 3f) ba,pt %xcc, 1b+4 faligndata %f0, %f2, %f48 1: FREG_FROB(f16,f18,f20,f22,f24,f26,f28,f30,f32) @@ -261,11 +374,11 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ STORE_JUMP(o0, f48, 56f) 1: FREG_FROB(f2, f4, f6, f8, f10,f12,f14,f16,f18) - LOOP_CHUNK1(o1, o0, GLOBAL_SPARE, 1f) + LOOP_CHUNK1(o1, o0, 1f) FREG_FROB(f18,f20,f22,f24,f26,f28,f30,f32,f34) - LOOP_CHUNK2(o1, o0, GLOBAL_SPARE, 2f) + LOOP_CHUNK2(o1, o0, 2f) FREG_FROB(f34,f36,f38,f40,f42,f44,f46,f0, f2) - LOOP_CHUNK3(o1, o0, GLOBAL_SPARE, 3f) + LOOP_CHUNK3(o1, o0, 3f) ba,pt %xcc, 1b+4 faligndata %f2, %f4, %f48 1: FREG_FROB(f18,f20,f22,f24,f26,f28,f30,f32,f34) @@ -282,11 +395,11 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ STORE_JUMP(o0, f48, 57f) 1: FREG_FROB(f4, f6, f8, f10,f12,f14,f16,f18,f20) - LOOP_CHUNK1(o1, o0, GLOBAL_SPARE, 1f) + LOOP_CHUNK1(o1, o0, 1f) FREG_FROB(f20,f22,f24,f26,f28,f30,f32,f34,f36) - LOOP_CHUNK2(o1, o0, GLOBAL_SPARE, 2f) + LOOP_CHUNK2(o1, o0, 2f) FREG_FROB(f36,f38,f40,f42,f44,f46,f0, f2, f4) - LOOP_CHUNK3(o1, o0, GLOBAL_SPARE, 3f) + LOOP_CHUNK3(o1, o0, 3f) ba,pt %xcc, 1b+4 faligndata %f4, %f6, %f48 1: FREG_FROB(f20,f22,f24,f26,f28,f30,f32,f34,f36) @@ -303,11 +416,11 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ STORE_JUMP(o0, f48, 58f) 1: FREG_FROB(f6, f8, f10,f12,f14,f16,f18,f20,f22) - LOOP_CHUNK1(o1, o0, GLOBAL_SPARE, 1f) + LOOP_CHUNK1(o1, o0, 1f) FREG_FROB(f22,f24,f26,f28,f30,f32,f34,f36,f38) - LOOP_CHUNK2(o1, o0, GLOBAL_SPARE, 2f) + LOOP_CHUNK2(o1, o0, 2f) FREG_FROB(f38,f40,f42,f44,f46,f0, f2, f4, f6) - LOOP_CHUNK3(o1, o0, GLOBAL_SPARE, 3f) + LOOP_CHUNK3(o1, o0, 3f) ba,pt %xcc, 1b+4 faligndata %f6, %f8, %f48 1: FREG_FROB(f22,f24,f26,f28,f30,f32,f34,f36,f38) @@ -324,11 +437,11 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ STORE_JUMP(o0, f48, 59f) 1: FREG_FROB(f8, f10,f12,f14,f16,f18,f20,f22,f24) - LOOP_CHUNK1(o1, o0, GLOBAL_SPARE, 1f) + LOOP_CHUNK1(o1, o0, 1f) FREG_FROB(f24,f26,f28,f30,f32,f34,f36,f38,f40) - LOOP_CHUNK2(o1, o0, GLOBAL_SPARE, 2f) + LOOP_CHUNK2(o1, o0, 2f) FREG_FROB(f40,f42,f44,f46,f0, f2, f4, f6, f8) - LOOP_CHUNK3(o1, o0, GLOBAL_SPARE, 3f) + LOOP_CHUNK3(o1, o0, 3f) ba,pt %xcc, 1b+4 faligndata %f8, %f10, %f48 1: FREG_FROB(f24,f26,f28,f30,f32,f34,f36,f38,f40) @@ -345,11 +458,11 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ STORE_JUMP(o0, f48, 60f) 1: FREG_FROB(f10,f12,f14,f16,f18,f20,f22,f24,f26) - LOOP_CHUNK1(o1, o0, GLOBAL_SPARE, 1f) + LOOP_CHUNK1(o1, o0, 1f) FREG_FROB(f26,f28,f30,f32,f34,f36,f38,f40,f42) - LOOP_CHUNK2(o1, o0, GLOBAL_SPARE, 2f) + LOOP_CHUNK2(o1, o0, 2f) FREG_FROB(f42,f44,f46,f0, f2, f4, f6, f8, f10) - LOOP_CHUNK3(o1, o0, GLOBAL_SPARE, 3f) + LOOP_CHUNK3(o1, o0, 3f) ba,pt %xcc, 1b+4 faligndata %f10, %f12, %f48 1: FREG_FROB(f26,f28,f30,f32,f34,f36,f38,f40,f42) @@ -366,11 +479,11 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ STORE_JUMP(o0, f48, 61f) 1: FREG_FROB(f12,f14,f16,f18,f20,f22,f24,f26,f28) - LOOP_CHUNK1(o1, o0, GLOBAL_SPARE, 1f) + LOOP_CHUNK1(o1, o0, 1f) FREG_FROB(f28,f30,f32,f34,f36,f38,f40,f42,f44) - LOOP_CHUNK2(o1, o0, GLOBAL_SPARE, 2f) + LOOP_CHUNK2(o1, o0, 2f) FREG_FROB(f44,f46,f0, f2, f4, f6, f8, f10,f12) - LOOP_CHUNK3(o1, o0, GLOBAL_SPARE, 3f) + LOOP_CHUNK3(o1, o0, 3f) ba,pt %xcc, 1b+4 faligndata %f12, %f14, %f48 1: FREG_FROB(f28,f30,f32,f34,f36,f38,f40,f42,f44) @@ -387,11 +500,11 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ STORE_JUMP(o0, f48, 62f) 1: FREG_FROB(f14,f16,f18,f20,f22,f24,f26,f28,f30) - LOOP_CHUNK1(o1, o0, GLOBAL_SPARE, 1f) + LOOP_CHUNK1(o1, o0, 1f) FREG_FROB(f30,f32,f34,f36,f38,f40,f42,f44,f46) - LOOP_CHUNK2(o1, o0, GLOBAL_SPARE, 2f) + LOOP_CHUNK2(o1, o0, 2f) FREG_FROB(f46,f0, f2, f4, f6, f8, f10,f12,f14) - LOOP_CHUNK3(o1, o0, GLOBAL_SPARE, 3f) + LOOP_CHUNK3(o1, o0, 3f) ba,pt %xcc, 1b+4 faligndata %f14, %f16, %f48 1: FREG_FROB(f30,f32,f34,f36,f38,f40,f42,f44,f46) @@ -407,53 +520,53 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ FREG_FROB(f30,f32,f34,f36,f38,f40,f42,f44,f46) STORE_JUMP(o0, f48, 63f) -40: FINISH_VISCHUNK(o0, f0, f2, g3) -41: FINISH_VISCHUNK(o0, f2, f4, g3) -42: FINISH_VISCHUNK(o0, f4, f6, g3) -43: FINISH_VISCHUNK(o0, f6, f8, g3) -44: FINISH_VISCHUNK(o0, f8, f10, g3) -45: FINISH_VISCHUNK(o0, f10, f12, g3) -46: FINISH_VISCHUNK(o0, f12, f14, g3) -47: UNEVEN_VISCHUNK(o0, f14, f0, g3) -48: FINISH_VISCHUNK(o0, f16, f18, g3) -49: FINISH_VISCHUNK(o0, f18, f20, g3) -50: FINISH_VISCHUNK(o0, f20, f22, g3) -51: FINISH_VISCHUNK(o0, f22, f24, g3) -52: FINISH_VISCHUNK(o0, f24, f26, g3) -53: FINISH_VISCHUNK(o0, f26, f28, g3) -54: FINISH_VISCHUNK(o0, f28, f30, g3) -55: UNEVEN_VISCHUNK(o0, f30, f0, g3) -56: FINISH_VISCHUNK(o0, f32, f34, g3) -57: FINISH_VISCHUNK(o0, f34, f36, g3) -58: FINISH_VISCHUNK(o0, f36, f38, g3) -59: FINISH_VISCHUNK(o0, f38, f40, g3) -60: FINISH_VISCHUNK(o0, f40, f42, g3) -61: FINISH_VISCHUNK(o0, f42, f44, g3) -62: FINISH_VISCHUNK(o0, f44, f46, g3) -63: UNEVEN_VISCHUNK_LAST(o0, f46, f0, g3) - -93: EX_LD_FP(LOAD(ldd, %o1, %f2)) +40: FINISH_VISCHUNK(o0, f0, f2) +41: FINISH_VISCHUNK(o0, f2, f4) +42: FINISH_VISCHUNK(o0, f4, f6) +43: FINISH_VISCHUNK(o0, f6, f8) +44: FINISH_VISCHUNK(o0, f8, f10) +45: FINISH_VISCHUNK(o0, f10, f12) +46: FINISH_VISCHUNK(o0, f12, f14) +47: UNEVEN_VISCHUNK(o0, f14, f0) +48: FINISH_VISCHUNK(o0, f16, f18) +49: FINISH_VISCHUNK(o0, f18, f20) +50: FINISH_VISCHUNK(o0, f20, f22) +51: FINISH_VISCHUNK(o0, f22, f24) +52: FINISH_VISCHUNK(o0, f24, f26) +53: FINISH_VISCHUNK(o0, f26, f28) +54: FINISH_VISCHUNK(o0, f28, f30) +55: UNEVEN_VISCHUNK(o0, f30, f0) +56: FINISH_VISCHUNK(o0, f32, f34) +57: FINISH_VISCHUNK(o0, f34, f36) +58: FINISH_VISCHUNK(o0, f36, f38) +59: FINISH_VISCHUNK(o0, f38, f40) +60: FINISH_VISCHUNK(o0, f40, f42) +61: FINISH_VISCHUNK(o0, f42, f44) +62: FINISH_VISCHUNK(o0, f44, f46) +63: UNEVEN_VISCHUNK_LAST(o0, f46, f0) + +93: EX_LD_FP(LOAD(ldd, %o1, %f2), U1_g3_0_fp) add %o1, 8, %o1 subcc %g3, 8, %g3 faligndata %f0, %f2, %f8 - EX_ST_FP(STORE(std, %f8, %o0)) + EX_ST_FP(STORE(std, %f8, %o0), U1_g3_8_fp) bl,pn %xcc, 95f add %o0, 8, %o0 - EX_LD_FP(LOAD(ldd, %o1, %f0)) + EX_LD_FP(LOAD(ldd, %o1, %f0), U1_g3_0_fp) add %o1, 8, %o1 subcc %g3, 8, %g3 faligndata %f2, %f0, %f8 - EX_ST_FP(STORE(std, %f8, %o0)) + EX_ST_FP(STORE(std, %f8, %o0), U1_g3_8_fp) bge,pt %xcc, 93b add %o0, 8, %o0 95: brz,pt %o2, 2f mov %g1, %o1 -1: EX_LD_FP(LOAD(ldub, %o1, %o3)) +1: EX_LD_FP(LOAD(ldub, %o1, %o3), U1_o2_0_fp) add %o1, 1, %o1 subcc %o2, 1, %o2 - EX_ST_FP(STORE(stb, %o3, %o0)) + EX_ST_FP(STORE(stb, %o3, %o0), U1_o2_1_fp) bne,pt %xcc, 1b add %o0, 1, %o0 @@ -469,27 +582,27 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ 72: andn %o2, 0xf, %GLOBAL_SPARE and %o2, 0xf, %o2 -1: EX_LD(LOAD(ldx, %o1 + 0x00, %o5)) - EX_LD(LOAD(ldx, %o1 + 0x08, %g1)) +1: EX_LD(LOAD(ldx, %o1 + 0x00, %o5), U1_gs_0) + EX_LD(LOAD(ldx, %o1 + 0x08, %g1), U1_gs_0) subcc %GLOBAL_SPARE, 0x10, %GLOBAL_SPARE - EX_ST(STORE(stx, %o5, %o1 + %o3)) + EX_ST(STORE(stx, %o5, %o1 + %o3), U1_gs_10) add %o1, 0x8, %o1 - EX_ST(STORE(stx, %g1, %o1 + %o3)) + EX_ST(STORE(stx, %g1, %o1 + %o3), U1_gs_8) bgu,pt %XCC, 1b add %o1, 0x8, %o1 73: andcc %o2, 0x8, %g0 be,pt %XCC, 1f nop - EX_LD(LOAD(ldx, %o1, %o5)) + EX_LD(LOAD(ldx, %o1, %o5), U1_o2_0) sub %o2, 0x8, %o2 - EX_ST(STORE(stx, %o5, %o1 + %o3)) + EX_ST(STORE(stx, %o5, %o1 + %o3), U1_o2_8) add %o1, 0x8, %o1 1: andcc %o2, 0x4, %g0 be,pt %XCC, 1f nop - EX_LD(LOAD(lduw, %o1, %o5)) + EX_LD(LOAD(lduw, %o1, %o5), U1_o2_0) sub %o2, 0x4, %o2 - EX_ST(STORE(stw, %o5, %o1 + %o3)) + EX_ST(STORE(stw, %o5, %o1 + %o3), U1_o2_4) add %o1, 0x4, %o1 1: cmp %o2, 0 be,pt %XCC, 85f @@ -503,9 +616,9 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ sub %g0, %g1, %g1 sub %o2, %g1, %o2 -1: EX_LD(LOAD(ldub, %o1, %o5)) +1: EX_LD(LOAD(ldub, %o1, %o5), U1_g1_0) subcc %g1, 1, %g1 - EX_ST(STORE(stb, %o5, %o1 + %o3)) + EX_ST(STORE(stb, %o5, %o1 + %o3), U1_g1_1) bgu,pt %icc, 1b add %o1, 1, %o1 @@ -521,16 +634,16 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ 8: mov 64, %o3 andn %o1, 0x7, %o1 - EX_LD(LOAD(ldx, %o1, %g2)) + EX_LD(LOAD(ldx, %o1, %g2), U1_o2_0) sub %o3, %g1, %o3 andn %o2, 0x7, %GLOBAL_SPARE sllx %g2, %g1, %g2 -1: EX_LD(LOAD(ldx, %o1 + 0x8, %g3)) +1: EX_LD(LOAD(ldx, %o1 + 0x8, %g3), U1_gs_0_o2_adj) subcc %GLOBAL_SPARE, 0x8, %GLOBAL_SPARE add %o1, 0x8, %o1 srlx %g3, %o3, %o5 or %o5, %g2, %o5 - EX_ST(STORE(stx, %o5, %o0)) + EX_ST(STORE(stx, %o5, %o0), U1_gs_8_o2_adj) add %o0, 0x8, %o0 bgu,pt %icc, 1b sllx %g3, %g1, %g2 @@ -548,9 +661,9 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ bne,pn %XCC, 90f sub %o0, %o1, %o3 -1: EX_LD(LOAD(lduw, %o1, %g1)) +1: EX_LD(LOAD(lduw, %o1, %g1), U1_o2_0) subcc %o2, 4, %o2 - EX_ST(STORE(stw, %g1, %o1 + %o3)) + EX_ST(STORE(stw, %g1, %o1 + %o3), U1_o2_4) bgu,pt %XCC, 1b add %o1, 4, %o1 @@ -558,9 +671,9 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ mov EX_RETVAL(%o4), %o0 .align 32 -90: EX_LD(LOAD(ldub, %o1, %g1)) +90: EX_LD(LOAD(ldub, %o1, %g1), U1_o2_0) subcc %o2, 1, %o2 - EX_ST(STORE(stb, %g1, %o1 + %o3)) + EX_ST(STORE(stb, %g1, %o1 + %o3), U1_o2_1) bgu,pt %XCC, 90b add %o1, 1, %o1 retl diff --git a/arch/sparc/lib/U3copy_from_user.S b/arch/sparc/lib/U3copy_from_user.S index 88ad73d86fe4..db73010a1af8 100644 --- a/arch/sparc/lib/U3copy_from_user.S +++ b/arch/sparc/lib/U3copy_from_user.S @@ -3,19 +3,19 @@ * Copyright (C) 1999, 2000, 2004 David S. Miller (davem@redhat.com) */ -#define EX_LD(x) \ +#define EX_LD(x,y) \ 98: x; \ .section __ex_table,"a";\ .align 4; \ - .word 98b, __retl_one; \ + .word 98b, y; \ .text; \ .align 4; -#define EX_LD_FP(x) \ +#define EX_LD_FP(x,y) \ 98: x; \ .section __ex_table,"a";\ .align 4; \ - .word 98b, __retl_one_fp;\ + .word 98b, y##_fp; \ .text; \ .align 4; diff --git a/arch/sparc/lib/U3copy_to_user.S b/arch/sparc/lib/U3copy_to_user.S index 845139d75537..c4ee858e352a 100644 --- a/arch/sparc/lib/U3copy_to_user.S +++ b/arch/sparc/lib/U3copy_to_user.S @@ -3,19 +3,19 @@ * Copyright (C) 1999, 2000, 2004 David S. Miller (davem@redhat.com) */ -#define EX_ST(x) \ +#define EX_ST(x,y) \ 98: x; \ .section __ex_table,"a";\ .align 4; \ - .word 98b, __retl_one; \ + .word 98b, y; \ .text; \ .align 4; -#define EX_ST_FP(x) \ +#define EX_ST_FP(x,y) \ 98: x; \ .section __ex_table,"a";\ .align 4; \ - .word 98b, __retl_one_fp;\ + .word 98b, y##_fp; \ .text; \ .align 4; diff --git a/arch/sparc/lib/U3memcpy.S b/arch/sparc/lib/U3memcpy.S index 491ee69e4995..5a8cb37f0a3b 100644 --- a/arch/sparc/lib/U3memcpy.S +++ b/arch/sparc/lib/U3memcpy.S @@ -4,6 +4,7 @@ */ #ifdef __KERNEL__ +#include <linux/linkage.h> #include <asm/visasm.h> #include <asm/asi.h> #define GLOBAL_SPARE %g7 @@ -22,21 +23,17 @@ #endif #ifndef EX_LD -#define EX_LD(x) x +#define EX_LD(x,y) x #endif #ifndef EX_LD_FP -#define EX_LD_FP(x) x +#define EX_LD_FP(x,y) x #endif #ifndef EX_ST -#define EX_ST(x) x +#define EX_ST(x,y) x #endif #ifndef EX_ST_FP -#define EX_ST_FP(x) x -#endif - -#ifndef EX_RETVAL -#define EX_RETVAL(x) x +#define EX_ST_FP(x,y) x #endif #ifndef LOAD @@ -77,6 +74,87 @@ */ .text +#ifndef EX_RETVAL +#define EX_RETVAL(x) x +__restore_fp: + VISExitHalf + retl + nop +ENTRY(U3_retl_o2_plus_g2_plus_g1_plus_1_fp) + add %g1, 1, %g1 + add %g2, %g1, %g2 + ba,pt %xcc, __restore_fp + add %o2, %g2, %o0 +ENDPROC(U3_retl_o2_plus_g2_plus_g1_plus_1_fp) +ENTRY(U3_retl_o2_plus_g2_fp) + ba,pt %xcc, __restore_fp + add %o2, %g2, %o0 +ENDPROC(U3_retl_o2_plus_g2_fp) +ENTRY(U3_retl_o2_plus_g2_plus_8_fp) + add %g2, 8, %g2 + ba,pt %xcc, __restore_fp + add %o2, %g2, %o0 +ENDPROC(U3_retl_o2_plus_g2_plus_8_fp) +ENTRY(U3_retl_o2) + retl + mov %o2, %o0 +ENDPROC(U3_retl_o2) +ENTRY(U3_retl_o2_plus_1) + retl + add %o2, 1, %o0 +ENDPROC(U3_retl_o2_plus_1) +ENTRY(U3_retl_o2_plus_4) + retl + add %o2, 4, %o0 +ENDPROC(U3_retl_o2_plus_4) +ENTRY(U3_retl_o2_plus_8) + retl + add %o2, 8, %o0 +ENDPROC(U3_retl_o2_plus_8) +ENTRY(U3_retl_o2_plus_g1_plus_1) + add %g1, 1, %g1 + retl + add %o2, %g1, %o0 +ENDPROC(U3_retl_o2_plus_g1_plus_1) +ENTRY(U3_retl_o2_fp) + ba,pt %xcc, __restore_fp + mov %o2, %o0 +ENDPROC(U3_retl_o2_fp) +ENTRY(U3_retl_o2_plus_o3_sll_6_plus_0x80_fp) + sll %o3, 6, %o3 + add %o3, 0x80, %o3 + ba,pt %xcc, __restore_fp + add %o2, %o3, %o0 +ENDPROC(U3_retl_o2_plus_o3_sll_6_plus_0x80_fp) +ENTRY(U3_retl_o2_plus_o3_sll_6_plus_0x40_fp) + sll %o3, 6, %o3 + add %o3, 0x40, %o3 + ba,pt %xcc, __restore_fp + add %o2, %o3, %o0 +ENDPROC(U3_retl_o2_plus_o3_sll_6_plus_0x40_fp) +ENTRY(U3_retl_o2_plus_GS_plus_0x10) + add GLOBAL_SPARE, 0x10, GLOBAL_SPARE + retl + add %o2, GLOBAL_SPARE, %o0 +ENDPROC(U3_retl_o2_plus_GS_plus_0x10) +ENTRY(U3_retl_o2_plus_GS_plus_0x08) + add GLOBAL_SPARE, 0x08, GLOBAL_SPARE + retl + add %o2, GLOBAL_SPARE, %o0 +ENDPROC(U3_retl_o2_plus_GS_plus_0x08) +ENTRY(U3_retl_o2_and_7_plus_GS) + and %o2, 7, %o2 + retl + add %o2, GLOBAL_SPARE, %o0 +ENDPROC(U3_retl_o2_and_7_plus_GS) +ENTRY(U3_retl_o2_and_7_plus_GS_plus_8) + add GLOBAL_SPARE, 8, GLOBAL_SPARE + and %o2, 7, %o2 + retl + add %o2, GLOBAL_SPARE, %o0 +ENDPROC(U3_retl_o2_and_7_plus_GS_plus_8) +#endif + .align 64 /* The cheetah's flexible spine, oversized liver, enlarged heart, @@ -126,8 +204,8 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ and %g2, 0x38, %g2 1: subcc %g1, 0x1, %g1 - EX_LD_FP(LOAD(ldub, %o1 + 0x00, %o3)) - EX_ST_FP(STORE(stb, %o3, %o1 + GLOBAL_SPARE)) + EX_LD_FP(LOAD(ldub, %o1 + 0x00, %o3), U3_retl_o2_plus_g2_plus_g1_plus_1) + EX_ST_FP(STORE(stb, %o3, %o1 + GLOBAL_SPARE), U3_retl_o2_plus_g2_plus_g1_plus_1) bgu,pt %XCC, 1b add %o1, 0x1, %o1 @@ -138,20 +216,20 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ be,pt %icc, 3f alignaddr %o1, %g0, %o1 - EX_LD_FP(LOAD(ldd, %o1, %f4)) -1: EX_LD_FP(LOAD(ldd, %o1 + 0x8, %f6)) + EX_LD_FP(LOAD(ldd, %o1, %f4), U3_retl_o2_plus_g2) +1: EX_LD_FP(LOAD(ldd, %o1 + 0x8, %f6), U3_retl_o2_plus_g2) add %o1, 0x8, %o1 subcc %g2, 0x8, %g2 faligndata %f4, %f6, %f0 - EX_ST_FP(STORE(std, %f0, %o0)) + EX_ST_FP(STORE(std, %f0, %o0), U3_retl_o2_plus_g2_plus_8) be,pn %icc, 3f add %o0, 0x8, %o0 - EX_LD_FP(LOAD(ldd, %o1 + 0x8, %f4)) + EX_LD_FP(LOAD(ldd, %o1 + 0x8, %f4), U3_retl_o2_plus_g2) add %o1, 0x8, %o1 subcc %g2, 0x8, %g2 faligndata %f6, %f4, %f2 - EX_ST_FP(STORE(std, %f2, %o0)) + EX_ST_FP(STORE(std, %f2, %o0), U3_retl_o2_plus_g2_plus_8) bne,pt %icc, 1b add %o0, 0x8, %o0 @@ -161,25 +239,25 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ LOAD(prefetch, %o1 + 0x080, #one_read) LOAD(prefetch, %o1 + 0x0c0, #one_read) LOAD(prefetch, %o1 + 0x100, #one_read) - EX_LD_FP(LOAD(ldd, %o1 + 0x000, %f0)) + EX_LD_FP(LOAD(ldd, %o1 + 0x000, %f0), U3_retl_o2) LOAD(prefetch, %o1 + 0x140, #one_read) - EX_LD_FP(LOAD(ldd, %o1 + 0x008, %f2)) + EX_LD_FP(LOAD(ldd, %o1 + 0x008, %f2), U3_retl_o2) LOAD(prefetch, %o1 + 0x180, #one_read) - EX_LD_FP(LOAD(ldd, %o1 + 0x010, %f4)) + EX_LD_FP(LOAD(ldd, %o1 + 0x010, %f4), U3_retl_o2) LOAD(prefetch, %o1 + 0x1c0, #one_read) faligndata %f0, %f2, %f16 - EX_LD_FP(LOAD(ldd, %o1 + 0x018, %f6)) + EX_LD_FP(LOAD(ldd, %o1 + 0x018, %f6), U3_retl_o2) faligndata %f2, %f4, %f18 - EX_LD_FP(LOAD(ldd, %o1 + 0x020, %f8)) + EX_LD_FP(LOAD(ldd, %o1 + 0x020, %f8), U3_retl_o2) faligndata %f4, %f6, %f20 - EX_LD_FP(LOAD(ldd, %o1 + 0x028, %f10)) + EX_LD_FP(LOAD(ldd, %o1 + 0x028, %f10), U3_retl_o2) faligndata %f6, %f8, %f22 - EX_LD_FP(LOAD(ldd, %o1 + 0x030, %f12)) + EX_LD_FP(LOAD(ldd, %o1 + 0x030, %f12), U3_retl_o2) faligndata %f8, %f10, %f24 - EX_LD_FP(LOAD(ldd, %o1 + 0x038, %f14)) + EX_LD_FP(LOAD(ldd, %o1 + 0x038, %f14), U3_retl_o2) faligndata %f10, %f12, %f26 - EX_LD_FP(LOAD(ldd, %o1 + 0x040, %f0)) + EX_LD_FP(LOAD(ldd, %o1 + 0x040, %f0), U3_retl_o2) subcc GLOBAL_SPARE, 0x80, GLOBAL_SPARE add %o1, 0x40, %o1 @@ -190,26 +268,26 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ .align 64 1: - EX_LD_FP(LOAD(ldd, %o1 + 0x008, %f2)) + EX_LD_FP(LOAD(ldd, %o1 + 0x008, %f2), U3_retl_o2_plus_o3_sll_6_plus_0x80) faligndata %f12, %f14, %f28 - EX_LD_FP(LOAD(ldd, %o1 + 0x010, %f4)) + EX_LD_FP(LOAD(ldd, %o1 + 0x010, %f4), U3_retl_o2_plus_o3_sll_6_plus_0x80) faligndata %f14, %f0, %f30 - EX_ST_FP(STORE_BLK(%f16, %o0)) - EX_LD_FP(LOAD(ldd, %o1 + 0x018, %f6)) + EX_ST_FP(STORE_BLK(%f16, %o0), U3_retl_o2_plus_o3_sll_6_plus_0x80) + EX_LD_FP(LOAD(ldd, %o1 + 0x018, %f6), U3_retl_o2_plus_o3_sll_6_plus_0x40) faligndata %f0, %f2, %f16 add %o0, 0x40, %o0 - EX_LD_FP(LOAD(ldd, %o1 + 0x020, %f8)) + EX_LD_FP(LOAD(ldd, %o1 + 0x020, %f8), U3_retl_o2_plus_o3_sll_6_plus_0x40) faligndata %f2, %f4, %f18 - EX_LD_FP(LOAD(ldd, %o1 + 0x028, %f10)) + EX_LD_FP(LOAD(ldd, %o1 + 0x028, %f10), U3_retl_o2_plus_o3_sll_6_plus_0x40) faligndata %f4, %f6, %f20 - EX_LD_FP(LOAD(ldd, %o1 + 0x030, %f12)) + EX_LD_FP(LOAD(ldd, %o1 + 0x030, %f12), U3_retl_o2_plus_o3_sll_6_plus_0x40) subcc %o3, 0x01, %o3 faligndata %f6, %f8, %f22 - EX_LD_FP(LOAD(ldd, %o1 + 0x038, %f14)) + EX_LD_FP(LOAD(ldd, %o1 + 0x038, %f14), U3_retl_o2_plus_o3_sll_6_plus_0x80) faligndata %f8, %f10, %f24 - EX_LD_FP(LOAD(ldd, %o1 + 0x040, %f0)) + EX_LD_FP(LOAD(ldd, %o1 + 0x040, %f0), U3_retl_o2_plus_o3_sll_6_plus_0x80) LOAD(prefetch, %o1 + 0x1c0, #one_read) faligndata %f10, %f12, %f26 bg,pt %XCC, 1b @@ -217,29 +295,29 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ /* Finally we copy the last full 64-byte block. */ 2: - EX_LD_FP(LOAD(ldd, %o1 + 0x008, %f2)) + EX_LD_FP(LOAD(ldd, %o1 + 0x008, %f2), U3_retl_o2_plus_o3_sll_6_plus_0x80) faligndata %f12, %f14, %f28 - EX_LD_FP(LOAD(ldd, %o1 + 0x010, %f4)) + EX_LD_FP(LOAD(ldd, %o1 + 0x010, %f4), U3_retl_o2_plus_o3_sll_6_plus_0x80) faligndata %f14, %f0, %f30 - EX_ST_FP(STORE_BLK(%f16, %o0)) - EX_LD_FP(LOAD(ldd, %o1 + 0x018, %f6)) + EX_ST_FP(STORE_BLK(%f16, %o0), U3_retl_o2_plus_o3_sll_6_plus_0x80) + EX_LD_FP(LOAD(ldd, %o1 + 0x018, %f6), U3_retl_o2_plus_o3_sll_6_plus_0x40) faligndata %f0, %f2, %f16 - EX_LD_FP(LOAD(ldd, %o1 + 0x020, %f8)) + EX_LD_FP(LOAD(ldd, %o1 + 0x020, %f8), U3_retl_o2_plus_o3_sll_6_plus_0x40) faligndata %f2, %f4, %f18 - EX_LD_FP(LOAD(ldd, %o1 + 0x028, %f10)) + EX_LD_FP(LOAD(ldd, %o1 + 0x028, %f10), U3_retl_o2_plus_o3_sll_6_plus_0x40) faligndata %f4, %f6, %f20 - EX_LD_FP(LOAD(ldd, %o1 + 0x030, %f12)) + EX_LD_FP(LOAD(ldd, %o1 + 0x030, %f12), U3_retl_o2_plus_o3_sll_6_plus_0x40) faligndata %f6, %f8, %f22 - EX_LD_FP(LOAD(ldd, %o1 + 0x038, %f14)) + EX_LD_FP(LOAD(ldd, %o1 + 0x038, %f14), U3_retl_o2_plus_o3_sll_6_plus_0x40) faligndata %f8, %f10, %f24 cmp %g1, 0 be,pt %XCC, 1f add %o0, 0x40, %o0 - EX_LD_FP(LOAD(ldd, %o1 + 0x040, %f0)) + EX_LD_FP(LOAD(ldd, %o1 + 0x040, %f0), U3_retl_o2_plus_o3_sll_6_plus_0x40) 1: faligndata %f10, %f12, %f26 faligndata %f12, %f14, %f28 faligndata %f14, %f0, %f30 - EX_ST_FP(STORE_BLK(%f16, %o0)) + EX_ST_FP(STORE_BLK(%f16, %o0), U3_retl_o2_plus_o3_sll_6_plus_0x40) add %o0, 0x40, %o0 add %o1, 0x40, %o1 membar #Sync @@ -259,20 +337,20 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ sub %o2, %g2, %o2 be,a,pt %XCC, 1f - EX_LD_FP(LOAD(ldd, %o1 + 0x00, %f0)) + EX_LD_FP(LOAD(ldd, %o1 + 0x00, %f0), U3_retl_o2_plus_g2) -1: EX_LD_FP(LOAD(ldd, %o1 + 0x08, %f2)) +1: EX_LD_FP(LOAD(ldd, %o1 + 0x08, %f2), U3_retl_o2_plus_g2) add %o1, 0x8, %o1 subcc %g2, 0x8, %g2 faligndata %f0, %f2, %f8 - EX_ST_FP(STORE(std, %f8, %o0)) + EX_ST_FP(STORE(std, %f8, %o0), U3_retl_o2_plus_g2_plus_8) be,pn %XCC, 2f add %o0, 0x8, %o0 - EX_LD_FP(LOAD(ldd, %o1 + 0x08, %f0)) + EX_LD_FP(LOAD(ldd, %o1 + 0x08, %f0), U3_retl_o2_plus_g2) add %o1, 0x8, %o1 subcc %g2, 0x8, %g2 faligndata %f2, %f0, %f8 - EX_ST_FP(STORE(std, %f8, %o0)) + EX_ST_FP(STORE(std, %f8, %o0), U3_retl_o2_plus_g2_plus_8) bne,pn %XCC, 1b add %o0, 0x8, %o0 @@ -292,30 +370,33 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ andcc %o2, 0x8, %g0 be,pt %icc, 1f nop - EX_LD(LOAD(ldx, %o1, %o5)) - EX_ST(STORE(stx, %o5, %o1 + %o3)) + EX_LD(LOAD(ldx, %o1, %o5), U3_retl_o2) + EX_ST(STORE(stx, %o5, %o1 + %o3), U3_retl_o2) add %o1, 0x8, %o1 + sub %o2, 8, %o2 1: andcc %o2, 0x4, %g0 be,pt %icc, 1f nop - EX_LD(LOAD(lduw, %o1, %o5)) - EX_ST(STORE(stw, %o5, %o1 + %o3)) + EX_LD(LOAD(lduw, %o1, %o5), U3_retl_o2) + EX_ST(STORE(stw, %o5, %o1 + %o3), U3_retl_o2) add %o1, 0x4, %o1 + sub %o2, 4, %o2 1: andcc %o2, 0x2, %g0 be,pt %icc, 1f nop - EX_LD(LOAD(lduh, %o1, %o5)) - EX_ST(STORE(sth, %o5, %o1 + %o3)) + EX_LD(LOAD(lduh, %o1, %o5), U3_retl_o2) + EX_ST(STORE(sth, %o5, %o1 + %o3), U3_retl_o2) add %o1, 0x2, %o1 + sub %o2, 2, %o2 1: andcc %o2, 0x1, %g0 be,pt %icc, 85f nop - EX_LD(LOAD(ldub, %o1, %o5)) + EX_LD(LOAD(ldub, %o1, %o5), U3_retl_o2) ba,pt %xcc, 85f - EX_ST(STORE(stb, %o5, %o1 + %o3)) + EX_ST(STORE(stb, %o5, %o1 + %o3), U3_retl_o2) .align 64 70: /* 16 < len <= 64 */ @@ -326,26 +407,26 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ andn %o2, 0xf, GLOBAL_SPARE and %o2, 0xf, %o2 1: subcc GLOBAL_SPARE, 0x10, GLOBAL_SPARE - EX_LD(LOAD(ldx, %o1 + 0x00, %o5)) - EX_LD(LOAD(ldx, %o1 + 0x08, %g1)) - EX_ST(STORE(stx, %o5, %o1 + %o3)) + EX_LD(LOAD(ldx, %o1 + 0x00, %o5), U3_retl_o2_plus_GS_plus_0x10) + EX_LD(LOAD(ldx, %o1 + 0x08, %g1), U3_retl_o2_plus_GS_plus_0x10) + EX_ST(STORE(stx, %o5, %o1 + %o3), U3_retl_o2_plus_GS_plus_0x10) add %o1, 0x8, %o1 - EX_ST(STORE(stx, %g1, %o1 + %o3)) + EX_ST(STORE(stx, %g1, %o1 + %o3), U3_retl_o2_plus_GS_plus_0x08) bgu,pt %XCC, 1b add %o1, 0x8, %o1 73: andcc %o2, 0x8, %g0 be,pt %XCC, 1f nop sub %o2, 0x8, %o2 - EX_LD(LOAD(ldx, %o1, %o5)) - EX_ST(STORE(stx, %o5, %o1 + %o3)) + EX_LD(LOAD(ldx, %o1, %o5), U3_retl_o2_plus_8) + EX_ST(STORE(stx, %o5, %o1 + %o3), U3_retl_o2_plus_8) add %o1, 0x8, %o1 1: andcc %o2, 0x4, %g0 be,pt %XCC, 1f nop sub %o2, 0x4, %o2 - EX_LD(LOAD(lduw, %o1, %o5)) - EX_ST(STORE(stw, %o5, %o1 + %o3)) + EX_LD(LOAD(lduw, %o1, %o5), U3_retl_o2_plus_4) + EX_ST(STORE(stw, %o5, %o1 + %o3), U3_retl_o2_plus_4) add %o1, 0x4, %o1 1: cmp %o2, 0 be,pt %XCC, 85f @@ -361,8 +442,8 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ sub %o2, %g1, %o2 1: subcc %g1, 1, %g1 - EX_LD(LOAD(ldub, %o1, %o5)) - EX_ST(STORE(stb, %o5, %o1 + %o3)) + EX_LD(LOAD(ldub, %o1, %o5), U3_retl_o2_plus_g1_plus_1) + EX_ST(STORE(stb, %o5, %o1 + %o3), U3_retl_o2_plus_g1_plus_1) bgu,pt %icc, 1b add %o1, 1, %o1 @@ -378,16 +459,16 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ 8: mov 64, %o3 andn %o1, 0x7, %o1 - EX_LD(LOAD(ldx, %o1, %g2)) + EX_LD(LOAD(ldx, %o1, %g2), U3_retl_o2) sub %o3, %g1, %o3 andn %o2, 0x7, GLOBAL_SPARE sllx %g2, %g1, %g2 -1: EX_LD(LOAD(ldx, %o1 + 0x8, %g3)) +1: EX_LD(LOAD(ldx, %o1 + 0x8, %g3), U3_retl_o2_and_7_plus_GS) subcc GLOBAL_SPARE, 0x8, GLOBAL_SPARE add %o1, 0x8, %o1 srlx %g3, %o3, %o5 or %o5, %g2, %o5 - EX_ST(STORE(stx, %o5, %o0)) + EX_ST(STORE(stx, %o5, %o0), U3_retl_o2_and_7_plus_GS_plus_8) add %o0, 0x8, %o0 bgu,pt %icc, 1b sllx %g3, %g1, %g2 @@ -407,8 +488,8 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ 1: subcc %o2, 4, %o2 - EX_LD(LOAD(lduw, %o1, %g1)) - EX_ST(STORE(stw, %g1, %o1 + %o3)) + EX_LD(LOAD(lduw, %o1, %g1), U3_retl_o2_plus_4) + EX_ST(STORE(stw, %g1, %o1 + %o3), U3_retl_o2_plus_4) bgu,pt %XCC, 1b add %o1, 4, %o1 @@ -418,8 +499,8 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ .align 32 90: subcc %o2, 1, %o2 - EX_LD(LOAD(ldub, %o1, %g1)) - EX_ST(STORE(stb, %g1, %o1 + %o3)) + EX_LD(LOAD(ldub, %o1, %g1), U3_retl_o2_plus_1) + EX_ST(STORE(stb, %g1, %o1 + %o3), U3_retl_o2_plus_1) bgu,pt %XCC, 90b add %o1, 1, %o1 retl diff --git a/arch/sparc/lib/copy_in_user.S b/arch/sparc/lib/copy_in_user.S index 302c0e60dc2c..4c89b486fa0d 100644 --- a/arch/sparc/lib/copy_in_user.S +++ b/arch/sparc/lib/copy_in_user.S @@ -8,18 +8,33 @@ #define XCC xcc -#define EX(x,y) \ +#define EX(x,y,z) \ 98: x,y; \ .section __ex_table,"a";\ .align 4; \ - .word 98b, __retl_one; \ + .word 98b, z; \ .text; \ .align 4; +#define EX_O4(x,y) EX(x,y,__retl_o4_plus_8) +#define EX_O2_4(x,y) EX(x,y,__retl_o2_plus_4) +#define EX_O2_1(x,y) EX(x,y,__retl_o2_plus_1) + .register %g2,#scratch .register %g3,#scratch .text +__retl_o4_plus_8: + add %o4, %o2, %o4 + retl + add %o4, 8, %o0 +__retl_o2_plus_4: + retl + add %o2, 4, %o0 +__retl_o2_plus_1: + retl + add %o2, 1, %o0 + .align 32 /* Don't try to get too fancy here, just nice and @@ -44,8 +59,8 @@ ENTRY(___copy_in_user) /* %o0=dst, %o1=src, %o2=len */ andn %o2, 0x7, %o4 and %o2, 0x7, %o2 1: subcc %o4, 0x8, %o4 - EX(ldxa [%o1] %asi, %o5) - EX(stxa %o5, [%o0] %asi) + EX_O4(ldxa [%o1] %asi, %o5) + EX_O4(stxa %o5, [%o0] %asi) add %o1, 0x8, %o1 bgu,pt %XCC, 1b add %o0, 0x8, %o0 @@ -53,8 +68,8 @@ ENTRY(___copy_in_user) /* %o0=dst, %o1=src, %o2=len */ be,pt %XCC, 1f nop sub %o2, 0x4, %o2 - EX(lduwa [%o1] %asi, %o5) - EX(stwa %o5, [%o0] %asi) + EX_O2_4(lduwa [%o1] %asi, %o5) + EX_O2_4(stwa %o5, [%o0] %asi) add %o1, 0x4, %o1 add %o0, 0x4, %o0 1: cmp %o2, 0 @@ -70,8 +85,8 @@ ENTRY(___copy_in_user) /* %o0=dst, %o1=src, %o2=len */ 82: subcc %o2, 4, %o2 - EX(lduwa [%o1] %asi, %g1) - EX(stwa %g1, [%o0] %asi) + EX_O2_4(lduwa [%o1] %asi, %g1) + EX_O2_4(stwa %g1, [%o0] %asi) add %o1, 4, %o1 bgu,pt %XCC, 82b add %o0, 4, %o0 @@ -82,8 +97,8 @@ ENTRY(___copy_in_user) /* %o0=dst, %o1=src, %o2=len */ .align 32 90: subcc %o2, 1, %o2 - EX(lduba [%o1] %asi, %g1) - EX(stba %g1, [%o0] %asi) + EX_O2_1(lduba [%o1] %asi, %g1) + EX_O2_1(stba %g1, [%o0] %asi) add %o1, 1, %o1 bgu,pt %XCC, 90b add %o0, 1, %o0 diff --git a/arch/sparc/lib/user_fixup.c b/arch/sparc/lib/user_fixup.c deleted file mode 100644 index ac96ae236709..000000000000 --- a/arch/sparc/lib/user_fixup.c +++ /dev/null @@ -1,71 +0,0 @@ -/* user_fixup.c: Fix up user copy faults. - * - * Copyright (C) 2004 David S. Miller <davem@redhat.com> - */ - -#include <linux/compiler.h> -#include <linux/kernel.h> -#include <linux/string.h> -#include <linux/errno.h> -#include <linux/module.h> - -#include <asm/uaccess.h> - -/* Calculating the exact fault address when using - * block loads and stores can be very complicated. - * - * Instead of trying to be clever and handling all - * of the cases, just fix things up simply here. - */ - -static unsigned long compute_size(unsigned long start, unsigned long size, unsigned long *offset) -{ - unsigned long fault_addr = current_thread_info()->fault_address; - unsigned long end = start + size; - - if (fault_addr < start || fault_addr >= end) { - *offset = 0; - } else { - *offset = fault_addr - start; - size = end - fault_addr; - } - return size; -} - -unsigned long copy_from_user_fixup(void *to, const void __user *from, unsigned long size) -{ - unsigned long offset; - - size = compute_size((unsigned long) from, size, &offset); - if (likely(size)) - memset(to + offset, 0, size); - - return size; -} -EXPORT_SYMBOL(copy_from_user_fixup); - -unsigned long copy_to_user_fixup(void __user *to, const void *from, unsigned long size) -{ - unsigned long offset; - - return compute_size((unsigned long) to, size, &offset); -} -EXPORT_SYMBOL(copy_to_user_fixup); - -unsigned long copy_in_user_fixup(void __user *to, void __user *from, unsigned long size) -{ - unsigned long fault_addr = current_thread_info()->fault_address; - unsigned long start = (unsigned long) to; - unsigned long end = start + size; - - if (fault_addr >= start && fault_addr < end) - return end - fault_addr; - - start = (unsigned long) from; - end = start + size; - if (fault_addr >= start && fault_addr < end) - return end - fault_addr; - - return size; -} -EXPORT_SYMBOL(copy_in_user_fixup); diff --git a/arch/sparc/mm/fault_64.c b/arch/sparc/mm/fault_64.c index dbabe5713a15..b01ec72522cb 100644 --- a/arch/sparc/mm/fault_64.c +++ b/arch/sparc/mm/fault_64.c @@ -479,14 +479,15 @@ good_area: up_read(&mm->mmap_sem); mm_rss = get_mm_rss(mm); -#if defined(CONFIG_HUGETLB_PAGE) || defined(CONFIG_TRANSPARENT_HUGEPAGE) - mm_rss -= (mm->context.huge_pte_count * (HPAGE_SIZE / PAGE_SIZE)); +#if defined(CONFIG_TRANSPARENT_HUGEPAGE) + mm_rss -= (mm->context.thp_pte_count * (HPAGE_SIZE / PAGE_SIZE)); #endif if (unlikely(mm_rss > mm->context.tsb_block[MM_TSB_BASE].tsb_rss_limit)) tsb_grow(mm, MM_TSB_BASE, mm_rss); #if defined(CONFIG_HUGETLB_PAGE) || defined(CONFIG_TRANSPARENT_HUGEPAGE) - mm_rss = mm->context.huge_pte_count; + mm_rss = mm->context.hugetlb_pte_count + mm->context.thp_pte_count; + mm_rss *= REAL_HPAGE_PER_HPAGE; if (unlikely(mm_rss > mm->context.tsb_block[MM_TSB_HUGE].tsb_rss_limit)) { if (mm->context.tsb_block[MM_TSB_HUGE].tsb) diff --git a/arch/sparc/mm/gup.c b/arch/sparc/mm/gup.c index 2e5c4fc2daa9..150f48303fb0 100644 --- a/arch/sparc/mm/gup.c +++ b/arch/sparc/mm/gup.c @@ -250,7 +250,8 @@ slow: pages += nr; ret = get_user_pages_unlocked(current, mm, start, - (end - start) >> PAGE_SHIFT, write, 0, pages); + (end - start) >> PAGE_SHIFT, pages, + write ? FOLL_WRITE : 0); /* Have to be a bit careful with return values */ if (nr > 0) { diff --git a/arch/sparc/mm/hugetlbpage.c b/arch/sparc/mm/hugetlbpage.c index 364d093f46c6..ffa842b4d7d4 100644 --- a/arch/sparc/mm/hugetlbpage.c +++ b/arch/sparc/mm/hugetlbpage.c @@ -115,7 +115,7 @@ hugetlb_get_unmapped_area(struct file *file, unsigned long addr, addr = ALIGN(addr, HPAGE_SIZE); vma = find_vma(mm, addr); if (task_size - len >= addr && - (!vma || addr + len <= vma->vm_start)) + (!vma || addr + len <= vm_start_gap(vma))) return addr; } if (mm->get_unmapped_area == arch_get_unmapped_area) @@ -180,7 +180,7 @@ void set_huge_pte_at(struct mm_struct *mm, unsigned long addr, unsigned long nptes; if (!pte_present(*ptep) && pte_present(entry)) - mm->context.huge_pte_count++; + mm->context.hugetlb_pte_count++; addr &= HPAGE_MASK; @@ -212,7 +212,7 @@ pte_t huge_ptep_get_and_clear(struct mm_struct *mm, unsigned long addr, entry = *ptep; if (pte_present(entry)) - mm->context.huge_pte_count--; + mm->context.hugetlb_pte_count--; addr &= HPAGE_MASK; nptes = 1 << HUGETLB_PAGE_ORDER; diff --git a/arch/sparc/mm/init_32.c b/arch/sparc/mm/init_32.c index eb8287155279..3b7092d9ea8f 100644 --- a/arch/sparc/mm/init_32.c +++ b/arch/sparc/mm/init_32.c @@ -301,7 +301,7 @@ void __init mem_init(void) /* Saves us work later. */ - memset((void *)&empty_zero_page, 0, PAGE_SIZE); + memset((void *)empty_zero_page, 0, PAGE_SIZE); i = last_valid_pfn >> ((20 - PAGE_SHIFT) + 5); i += 1; diff --git a/arch/sparc/mm/init_64.c b/arch/sparc/mm/init_64.c index 3c4b8975fa76..c2f376ce443b 100644 --- a/arch/sparc/mm/init_64.c +++ b/arch/sparc/mm/init_64.c @@ -346,7 +346,8 @@ void update_mmu_cache(struct vm_area_struct *vma, unsigned long address, pte_t * spin_lock_irqsave(&mm->context.lock, flags); #if defined(CONFIG_HUGETLB_PAGE) || defined(CONFIG_TRANSPARENT_HUGEPAGE) - if (mm->context.huge_pte_count && is_hugetlb_pte(pte)) + if ((mm->context.hugetlb_pte_count || mm->context.thp_pte_count) && + is_hugetlb_pte(pte)) __update_mmu_tsb_insert(mm, MM_TSB_HUGE, REAL_HPAGE_SHIFT, address, pte_val(pte)); else @@ -655,10 +656,58 @@ EXPORT_SYMBOL(__flush_dcache_range); /* get_new_mmu_context() uses "cache + 1". */ DEFINE_SPINLOCK(ctx_alloc_lock); -unsigned long tlb_context_cache = CTX_FIRST_VERSION - 1; +unsigned long tlb_context_cache = CTX_FIRST_VERSION; #define MAX_CTX_NR (1UL << CTX_NR_BITS) #define CTX_BMAP_SLOTS BITS_TO_LONGS(MAX_CTX_NR) DECLARE_BITMAP(mmu_context_bmap, MAX_CTX_NR); +DEFINE_PER_CPU(struct mm_struct *, per_cpu_secondary_mm) = {0}; + +static void mmu_context_wrap(void) +{ + unsigned long old_ver = tlb_context_cache & CTX_VERSION_MASK; + unsigned long new_ver, new_ctx, old_ctx; + struct mm_struct *mm; + int cpu; + + bitmap_zero(mmu_context_bmap, 1 << CTX_NR_BITS); + + /* Reserve kernel context */ + set_bit(0, mmu_context_bmap); + + new_ver = (tlb_context_cache & CTX_VERSION_MASK) + CTX_FIRST_VERSION; + if (unlikely(new_ver == 0)) + new_ver = CTX_FIRST_VERSION; + tlb_context_cache = new_ver; + + /* + * Make sure that any new mm that are added into per_cpu_secondary_mm, + * are going to go through get_new_mmu_context() path. + */ + mb(); + + /* + * Updated versions to current on those CPUs that had valid secondary + * contexts + */ + for_each_online_cpu(cpu) { + /* + * If a new mm is stored after we took this mm from the array, + * it will go into get_new_mmu_context() path, because we + * already bumped the version in tlb_context_cache. + */ + mm = per_cpu(per_cpu_secondary_mm, cpu); + + if (unlikely(!mm || mm == &init_mm)) + continue; + + old_ctx = mm->context.sparc64_ctx_val; + if (likely((old_ctx & CTX_VERSION_MASK) == old_ver)) { + new_ctx = (old_ctx & ~CTX_VERSION_MASK) | new_ver; + set_bit(new_ctx & CTX_NR_MASK, mmu_context_bmap); + mm->context.sparc64_ctx_val = new_ctx; + } + } +} /* Caller does TLB context flushing on local CPU if necessary. * The caller also ensures that CTX_VALID(mm->context) is false. @@ -674,48 +723,30 @@ void get_new_mmu_context(struct mm_struct *mm) { unsigned long ctx, new_ctx; unsigned long orig_pgsz_bits; - int new_version; spin_lock(&ctx_alloc_lock); +retry: + /* wrap might have happened, test again if our context became valid */ + if (unlikely(CTX_VALID(mm->context))) + goto out; orig_pgsz_bits = (mm->context.sparc64_ctx_val & CTX_PGSZ_MASK); ctx = (tlb_context_cache + 1) & CTX_NR_MASK; new_ctx = find_next_zero_bit(mmu_context_bmap, 1 << CTX_NR_BITS, ctx); - new_version = 0; if (new_ctx >= (1 << CTX_NR_BITS)) { new_ctx = find_next_zero_bit(mmu_context_bmap, ctx, 1); if (new_ctx >= ctx) { - int i; - new_ctx = (tlb_context_cache & CTX_VERSION_MASK) + - CTX_FIRST_VERSION; - if (new_ctx == 1) - new_ctx = CTX_FIRST_VERSION; - - /* Don't call memset, for 16 entries that's just - * plain silly... - */ - mmu_context_bmap[0] = 3; - mmu_context_bmap[1] = 0; - mmu_context_bmap[2] = 0; - mmu_context_bmap[3] = 0; - for (i = 4; i < CTX_BMAP_SLOTS; i += 4) { - mmu_context_bmap[i + 0] = 0; - mmu_context_bmap[i + 1] = 0; - mmu_context_bmap[i + 2] = 0; - mmu_context_bmap[i + 3] = 0; - } - new_version = 1; - goto out; + mmu_context_wrap(); + goto retry; } } + if (mm->context.sparc64_ctx_val) + cpumask_clear(mm_cpumask(mm)); mmu_context_bmap[new_ctx>>6] |= (1UL << (new_ctx & 63)); new_ctx |= (tlb_context_cache & CTX_VERSION_MASK); -out: tlb_context_cache = new_ctx; mm->context.sparc64_ctx_val = new_ctx | orig_pgsz_bits; +out: spin_unlock(&ctx_alloc_lock); - - if (unlikely(new_version)) - smp_new_mmu_context_version(); } static int numa_enabled = 1; @@ -799,8 +830,10 @@ struct mdesc_mblock { }; static struct mdesc_mblock *mblocks; static int num_mblocks; +static int find_numa_node_for_addr(unsigned long pa, + struct node_mem_mask *pnode_mask); -static unsigned long ra_to_pa(unsigned long addr) +static unsigned long __init ra_to_pa(unsigned long addr) { int i; @@ -816,8 +849,11 @@ static unsigned long ra_to_pa(unsigned long addr) return addr; } -static int find_node(unsigned long addr) +static int __init find_node(unsigned long addr) { + static bool search_mdesc = true; + static struct node_mem_mask last_mem_mask = { ~0UL, ~0UL }; + static int last_index; int i; addr = ra_to_pa(addr); @@ -827,13 +863,30 @@ static int find_node(unsigned long addr) if ((addr & p->mask) == p->val) return i; } - /* The following condition has been observed on LDOM guests.*/ - WARN_ONCE(1, "find_node: A physical address doesn't match a NUMA node" - " rule. Some physical memory will be owned by node 0."); - return 0; + /* The following condition has been observed on LDOM guests because + * node_masks only contains the best latency mask and value. + * LDOM guest's mdesc can contain a single latency group to + * cover multiple address range. Print warning message only if the + * address cannot be found in node_masks nor mdesc. + */ + if ((search_mdesc) && + ((addr & last_mem_mask.mask) != last_mem_mask.val)) { + /* find the available node in the mdesc */ + last_index = find_numa_node_for_addr(addr, &last_mem_mask); + numadbg("find_node: latency group for address 0x%lx is %d\n", + addr, last_index); + if ((last_index < 0) || (last_index >= num_node_masks)) { + /* WARN_ONCE() and use default group 0 */ + WARN_ONCE(1, "find_node: A physical address doesn't match a NUMA node rule. Some physical memory will be owned by node 0."); + search_mdesc = false; + last_index = 0; + } + } + + return last_index; } -static u64 memblock_nid_range(u64 start, u64 end, int *nid) +static u64 __init memblock_nid_range(u64 start, u64 end, int *nid) { *nid = find_node(start); start += PAGE_SIZE; @@ -1157,6 +1210,41 @@ int __node_distance(int from, int to) return numa_latency[from][to]; } +static int find_numa_node_for_addr(unsigned long pa, + struct node_mem_mask *pnode_mask) +{ + struct mdesc_handle *md = mdesc_grab(); + u64 node, arc; + int i = 0; + + node = mdesc_node_by_name(md, MDESC_NODE_NULL, "latency-groups"); + if (node == MDESC_NODE_NULL) + goto out; + + mdesc_for_each_node_by_name(md, node, "group") { + mdesc_for_each_arc(arc, md, node, MDESC_ARC_TYPE_FWD) { + u64 target = mdesc_arc_target(md, arc); + struct mdesc_mlgroup *m = find_mlgroup(target); + + if (!m) + continue; + if ((pa & m->mask) == m->match) { + if (pnode_mask) { + pnode_mask->mask = m->mask; + pnode_mask->val = m->match; + } + mdesc_release(md); + return i; + } + } + i++; + } + +out: + mdesc_release(md); + return -1; +} + static int find_best_numa_node_for_mlgroup(struct mdesc_mlgroup *grp) { int i; @@ -1435,7 +1523,7 @@ bool kern_addr_valid(unsigned long addr) if ((long)addr < 0L) { unsigned long pa = __pa(addr); - if ((addr >> max_phys_bits) != 0UL) + if ((pa >> max_phys_bits) != 0UL) return false; return pfn_valid(pa >> PAGE_SHIFT); @@ -2314,10 +2402,17 @@ void __init mem_init(void) { high_memory = __va(last_valid_pfn << PAGE_SHIFT); - register_page_bootmem_info(); free_all_bootmem(); /* + * Must be done after boot memory is put on freelist, because here we + * might set fields in deferred struct pages that have not yet been + * initialized, and free_all_bootmem() initializes all the reserved + * deferred pages for us. + */ + register_page_bootmem_info(); + + /* * Set up the zero page, mark it reserved, so that page count * is not manipulated when freeing the page from user ptes. */ diff --git a/arch/sparc/mm/tlb.c b/arch/sparc/mm/tlb.c index f81cd9736700..c56a195c9071 100644 --- a/arch/sparc/mm/tlb.c +++ b/arch/sparc/mm/tlb.c @@ -174,10 +174,25 @@ void set_pmd_at(struct mm_struct *mm, unsigned long addr, return; if ((pmd_val(pmd) ^ pmd_val(orig)) & _PAGE_PMD_HUGE) { - if (pmd_val(pmd) & _PAGE_PMD_HUGE) - mm->context.huge_pte_count++; - else - mm->context.huge_pte_count--; + /* + * Note that this routine only sets pmds for THP pages. + * Hugetlb pages are handled elsewhere. We need to check + * for huge zero page. Huge zero pages are like hugetlb + * pages in that there is no RSS, but there is the need + * for TSB entries. So, huge zero page counts go into + * hugetlb_pte_count. + */ + if (pmd_val(pmd) & _PAGE_PMD_HUGE) { + if (is_huge_zero_page(pmd_page(pmd))) + mm->context.hugetlb_pte_count++; + else + mm->context.thp_pte_count++; + } else { + if (is_huge_zero_page(pmd_page(orig))) + mm->context.hugetlb_pte_count--; + else + mm->context.thp_pte_count--; + } /* Do not try to allocate the TSB hash table if we * don't have one already. We have various locks held @@ -204,6 +219,9 @@ void set_pmd_at(struct mm_struct *mm, unsigned long addr, } } +/* + * This routine is only called when splitting a THP + */ void pmdp_invalidate(struct vm_area_struct *vma, unsigned long address, pmd_t *pmdp) { @@ -213,6 +231,15 @@ void pmdp_invalidate(struct vm_area_struct *vma, unsigned long address, set_pmd_at(vma->vm_mm, address, pmdp, entry); flush_tlb_range(vma, address, address + HPAGE_PMD_SIZE); + + /* + * set_pmd_at() will not be called in a way to decrement + * thp_pte_count when splitting a THP, so do it now. + * Sanity check pmd before doing the actual decrement. + */ + if ((pmd_val(entry) & _PAGE_PMD_HUGE) && + !is_huge_zero_page(pmd_page(entry))) + (vma->vm_mm)->context.thp_pte_count--; } void pgtable_trans_huge_deposit(struct mm_struct *mm, pmd_t *pmdp, diff --git a/arch/sparc/mm/tsb.c b/arch/sparc/mm/tsb.c index a0604a493a36..84cd593117a6 100644 --- a/arch/sparc/mm/tsb.c +++ b/arch/sparc/mm/tsb.c @@ -27,6 +27,20 @@ static inline int tag_compare(unsigned long tag, unsigned long vaddr) return (tag == (vaddr >> 22)); } +static void flush_tsb_kernel_range_scan(unsigned long start, unsigned long end) +{ + unsigned long idx; + + for (idx = 0; idx < KERNEL_TSB_NENTRIES; idx++) { + struct tsb *ent = &swapper_tsb[idx]; + unsigned long match = idx << 13; + + match |= (ent->tag << 22); + if (match >= start && match < end) + ent->tag = (1UL << TSB_TAG_INVALID_BIT); + } +} + /* TSB flushes need only occur on the processor initiating the address * space modification, not on each cpu the address space has run on. * Only the TLB flush needs that treatment. @@ -36,6 +50,9 @@ void flush_tsb_kernel_range(unsigned long start, unsigned long end) { unsigned long v; + if ((end - start) >> PAGE_SHIFT >= 2 * KERNEL_TSB_NENTRIES) + return flush_tsb_kernel_range_scan(start, end); + for (v = start; v < end; v += PAGE_SIZE) { unsigned long hash = tsb_hash(v, PAGE_SHIFT, KERNEL_TSB_NENTRIES); @@ -434,7 +451,8 @@ retry_tsb_alloc: extern void copy_tsb(unsigned long old_tsb_base, unsigned long old_tsb_size, unsigned long new_tsb_base, - unsigned long new_tsb_size); + unsigned long new_tsb_size, + unsigned long page_size_shift); unsigned long old_tsb_base = (unsigned long) old_tsb; unsigned long new_tsb_base = (unsigned long) new_tsb; @@ -442,7 +460,9 @@ retry_tsb_alloc: old_tsb_base = __pa(old_tsb_base); new_tsb_base = __pa(new_tsb_base); } - copy_tsb(old_tsb_base, old_size, new_tsb_base, new_size); + copy_tsb(old_tsb_base, old_size, new_tsb_base, new_size, + tsb_index == MM_TSB_BASE ? + PAGE_SHIFT : REAL_HPAGE_SHIFT); } mm->context.tsb_block[tsb_index].tsb = new_tsb; @@ -469,8 +489,10 @@ retry_tsb_alloc: int init_new_context(struct task_struct *tsk, struct mm_struct *mm) { + unsigned long mm_rss = get_mm_rss(mm); #if defined(CONFIG_HUGETLB_PAGE) || defined(CONFIG_TRANSPARENT_HUGEPAGE) - unsigned long huge_pte_count; + unsigned long saved_hugetlb_pte_count; + unsigned long saved_thp_pte_count; #endif unsigned int i; @@ -479,12 +501,16 @@ int init_new_context(struct task_struct *tsk, struct mm_struct *mm) mm->context.sparc64_ctx_val = 0UL; #if defined(CONFIG_HUGETLB_PAGE) || defined(CONFIG_TRANSPARENT_HUGEPAGE) - /* We reset it to zero because the fork() page copying + /* We reset them to zero because the fork() page copying * will re-increment the counters as the parent PTEs are * copied into the child address space. */ - huge_pte_count = mm->context.huge_pte_count; - mm->context.huge_pte_count = 0; + saved_hugetlb_pte_count = mm->context.hugetlb_pte_count; + saved_thp_pte_count = mm->context.thp_pte_count; + mm->context.hugetlb_pte_count = 0; + mm->context.thp_pte_count = 0; + + mm_rss -= saved_thp_pte_count * (HPAGE_SIZE / PAGE_SIZE); #endif /* copy_mm() copies over the parent's mm_struct before calling @@ -497,11 +523,13 @@ int init_new_context(struct task_struct *tsk, struct mm_struct *mm) /* If this is fork, inherit the parent's TSB size. We would * grow it to that size on the first page fault anyways. */ - tsb_grow(mm, MM_TSB_BASE, get_mm_rss(mm)); + tsb_grow(mm, MM_TSB_BASE, mm_rss); #if defined(CONFIG_HUGETLB_PAGE) || defined(CONFIG_TRANSPARENT_HUGEPAGE) - if (unlikely(huge_pte_count)) - tsb_grow(mm, MM_TSB_HUGE, huge_pte_count); + if (unlikely(saved_hugetlb_pte_count + saved_thp_pte_count)) + tsb_grow(mm, MM_TSB_HUGE, + (saved_hugetlb_pte_count + saved_thp_pte_count) * + REAL_HPAGE_PER_HPAGE); #endif if (unlikely(!mm->context.tsb_block[MM_TSB_BASE].tsb)) diff --git a/arch/sparc/mm/ultra.S b/arch/sparc/mm/ultra.S index b4f4733abc6e..fcf4d27a38fb 100644 --- a/arch/sparc/mm/ultra.S +++ b/arch/sparc/mm/ultra.S @@ -30,7 +30,7 @@ .text .align 32 .globl __flush_tlb_mm -__flush_tlb_mm: /* 18 insns */ +__flush_tlb_mm: /* 19 insns */ /* %o0=(ctx & TAG_CONTEXT_BITS), %o1=SECONDARY_CONTEXT */ ldxa [%o1] ASI_DMMU, %g2 cmp %g2, %o0 @@ -81,7 +81,7 @@ __flush_tlb_page: /* 22 insns */ .align 32 .globl __flush_tlb_pending -__flush_tlb_pending: /* 26 insns */ +__flush_tlb_pending: /* 27 insns */ /* %o0 = context, %o1 = nr, %o2 = vaddrs[] */ rdpr %pstate, %g7 sllx %o1, 3, %o1 @@ -113,12 +113,14 @@ __flush_tlb_pending: /* 26 insns */ .align 32 .globl __flush_tlb_kernel_range -__flush_tlb_kernel_range: /* 16 insns */ +__flush_tlb_kernel_range: /* 31 insns */ /* %o0=start, %o1=end */ cmp %o0, %o1 be,pn %xcc, 2f + sub %o1, %o0, %o3 + srlx %o3, 18, %o4 + brnz,pn %o4, __spitfire_flush_tlb_kernel_range_slow sethi %hi(PAGE_SIZE), %o4 - sub %o1, %o0, %o3 sub %o3, %o4, %o3 or %o0, 0x20, %o0 ! Nucleus 1: stxa %g0, [%o0 + %o3] ASI_DMMU_DEMAP @@ -131,6 +133,41 @@ __flush_tlb_kernel_range: /* 16 insns */ retl nop nop + nop + nop + nop + nop + nop + nop + nop + nop + nop + nop + nop + nop + nop + +__spitfire_flush_tlb_kernel_range_slow: + mov 63 * 8, %o4 +1: ldxa [%o4] ASI_ITLB_DATA_ACCESS, %o3 + andcc %o3, 0x40, %g0 /* _PAGE_L_4U */ + bne,pn %xcc, 2f + mov TLB_TAG_ACCESS, %o3 + stxa %g0, [%o3] ASI_IMMU + stxa %g0, [%o4] ASI_ITLB_DATA_ACCESS + membar #Sync +2: ldxa [%o4] ASI_DTLB_DATA_ACCESS, %o3 + andcc %o3, 0x40, %g0 + bne,pn %xcc, 2f + mov TLB_TAG_ACCESS, %o3 + stxa %g0, [%o3] ASI_DMMU + stxa %g0, [%o4] ASI_DTLB_DATA_ACCESS + membar #Sync +2: sub %o4, 8, %o4 + brgez,pt %o4, 1b + nop + retl + nop __spitfire_flush_tlb_mm_slow: rdpr %pstate, %g1 @@ -285,6 +322,40 @@ __cheetah_flush_tlb_pending: /* 27 insns */ retl wrpr %g7, 0x0, %pstate +__cheetah_flush_tlb_kernel_range: /* 31 insns */ + /* %o0=start, %o1=end */ + cmp %o0, %o1 + be,pn %xcc, 2f + sub %o1, %o0, %o3 + srlx %o3, 18, %o4 + brnz,pn %o4, 3f + sethi %hi(PAGE_SIZE), %o4 + sub %o3, %o4, %o3 + or %o0, 0x20, %o0 ! Nucleus +1: stxa %g0, [%o0 + %o3] ASI_DMMU_DEMAP + stxa %g0, [%o0 + %o3] ASI_IMMU_DEMAP + membar #Sync + brnz,pt %o3, 1b + sub %o3, %o4, %o3 +2: sethi %hi(KERNBASE), %o3 + flush %o3 + retl + nop +3: mov 0x80, %o4 + stxa %g0, [%o4] ASI_DMMU_DEMAP + membar #Sync + stxa %g0, [%o4] ASI_IMMU_DEMAP + membar #Sync + retl + nop + nop + nop + nop + nop + nop + nop + nop + #ifdef DCACHE_ALIASING_POSSIBLE __cheetah_flush_dcache_page: /* 11 insns */ sethi %hi(PAGE_OFFSET), %g1 @@ -309,19 +380,28 @@ __hypervisor_tlb_tl0_error: ret restore -__hypervisor_flush_tlb_mm: /* 10 insns */ +__hypervisor_flush_tlb_mm: /* 19 insns */ mov %o0, %o2 /* ARG2: mmu context */ mov 0, %o0 /* ARG0: CPU lists unimplemented */ mov 0, %o1 /* ARG1: CPU lists unimplemented */ mov HV_MMU_ALL, %o3 /* ARG3: flags */ mov HV_FAST_MMU_DEMAP_CTX, %o5 ta HV_FAST_TRAP - brnz,pn %o0, __hypervisor_tlb_tl0_error + brnz,pn %o0, 1f mov HV_FAST_MMU_DEMAP_CTX, %o1 retl nop +1: sethi %hi(__hypervisor_tlb_tl0_error), %o5 + jmpl %o5 + %lo(__hypervisor_tlb_tl0_error), %g0 + nop + nop + nop + nop + nop + nop + nop -__hypervisor_flush_tlb_page: /* 11 insns */ +__hypervisor_flush_tlb_page: /* 22 insns */ /* %o0 = context, %o1 = vaddr */ mov %o0, %g2 mov %o1, %o0 /* ARG0: vaddr + IMMU-bit */ @@ -330,12 +410,23 @@ __hypervisor_flush_tlb_page: /* 11 insns */ srlx %o0, PAGE_SHIFT, %o0 sllx %o0, PAGE_SHIFT, %o0 ta HV_MMU_UNMAP_ADDR_TRAP - brnz,pn %o0, __hypervisor_tlb_tl0_error + brnz,pn %o0, 1f mov HV_MMU_UNMAP_ADDR_TRAP, %o1 retl nop +1: sethi %hi(__hypervisor_tlb_tl0_error), %o2 + jmpl %o2 + %lo(__hypervisor_tlb_tl0_error), %g0 + nop + nop + nop + nop + nop + nop + nop + nop + nop -__hypervisor_flush_tlb_pending: /* 16 insns */ +__hypervisor_flush_tlb_pending: /* 27 insns */ /* %o0 = context, %o1 = nr, %o2 = vaddrs[] */ sllx %o1, 3, %g1 mov %o2, %g2 @@ -347,31 +438,57 @@ __hypervisor_flush_tlb_pending: /* 16 insns */ srlx %o0, PAGE_SHIFT, %o0 sllx %o0, PAGE_SHIFT, %o0 ta HV_MMU_UNMAP_ADDR_TRAP - brnz,pn %o0, __hypervisor_tlb_tl0_error + brnz,pn %o0, 1f mov HV_MMU_UNMAP_ADDR_TRAP, %o1 brnz,pt %g1, 1b nop retl nop +1: sethi %hi(__hypervisor_tlb_tl0_error), %o2 + jmpl %o2 + %lo(__hypervisor_tlb_tl0_error), %g0 + nop + nop + nop + nop + nop + nop + nop + nop + nop -__hypervisor_flush_tlb_kernel_range: /* 16 insns */ +__hypervisor_flush_tlb_kernel_range: /* 31 insns */ /* %o0=start, %o1=end */ cmp %o0, %o1 be,pn %xcc, 2f - sethi %hi(PAGE_SIZE), %g3 - mov %o0, %g1 - sub %o1, %g1, %g2 + sub %o1, %o0, %g2 + srlx %g2, 18, %g3 + brnz,pn %g3, 4f + mov %o0, %g1 + sethi %hi(PAGE_SIZE), %g3 sub %g2, %g3, %g2 1: add %g1, %g2, %o0 /* ARG0: virtual address */ mov 0, %o1 /* ARG1: mmu context */ mov HV_MMU_ALL, %o2 /* ARG2: flags */ ta HV_MMU_UNMAP_ADDR_TRAP - brnz,pn %o0, __hypervisor_tlb_tl0_error + brnz,pn %o0, 3f mov HV_MMU_UNMAP_ADDR_TRAP, %o1 brnz,pt %g2, 1b sub %g2, %g3, %g2 2: retl nop +3: sethi %hi(__hypervisor_tlb_tl0_error), %o2 + jmpl %o2 + %lo(__hypervisor_tlb_tl0_error), %g0 + nop +4: mov 0, %o0 /* ARG0: CPU lists unimplemented */ + mov 0, %o1 /* ARG1: CPU lists unimplemented */ + mov 0, %o2 /* ARG2: mmu context == nucleus */ + mov HV_MMU_ALL, %o3 /* ARG3: flags */ + mov HV_FAST_MMU_DEMAP_CTX, %o5 + ta HV_FAST_TRAP + brnz,pn %o0, 3b + mov HV_FAST_MMU_DEMAP_CTX, %o1 + retl + nop #ifdef DCACHE_ALIASING_POSSIBLE /* XXX Niagara and friends have an 8K cache, so no aliasing is @@ -394,43 +511,6 @@ tlb_patch_one: retl nop - .globl cheetah_patch_cachetlbops -cheetah_patch_cachetlbops: - save %sp, -128, %sp - - sethi %hi(__flush_tlb_mm), %o0 - or %o0, %lo(__flush_tlb_mm), %o0 - sethi %hi(__cheetah_flush_tlb_mm), %o1 - or %o1, %lo(__cheetah_flush_tlb_mm), %o1 - call tlb_patch_one - mov 19, %o2 - - sethi %hi(__flush_tlb_page), %o0 - or %o0, %lo(__flush_tlb_page), %o0 - sethi %hi(__cheetah_flush_tlb_page), %o1 - or %o1, %lo(__cheetah_flush_tlb_page), %o1 - call tlb_patch_one - mov 22, %o2 - - sethi %hi(__flush_tlb_pending), %o0 - or %o0, %lo(__flush_tlb_pending), %o0 - sethi %hi(__cheetah_flush_tlb_pending), %o1 - or %o1, %lo(__cheetah_flush_tlb_pending), %o1 - call tlb_patch_one - mov 27, %o2 - -#ifdef DCACHE_ALIASING_POSSIBLE - sethi %hi(__flush_dcache_page), %o0 - or %o0, %lo(__flush_dcache_page), %o0 - sethi %hi(__cheetah_flush_dcache_page), %o1 - or %o1, %lo(__cheetah_flush_dcache_page), %o1 - call tlb_patch_one - mov 11, %o2 -#endif /* DCACHE_ALIASING_POSSIBLE */ - - ret - restore - #ifdef CONFIG_SMP /* These are all called by the slaves of a cross call, at * trap level 1, with interrupts fully disabled. @@ -447,7 +527,7 @@ cheetah_patch_cachetlbops: */ .align 32 .globl xcall_flush_tlb_mm -xcall_flush_tlb_mm: /* 21 insns */ +xcall_flush_tlb_mm: /* 24 insns */ mov PRIMARY_CONTEXT, %g2 ldxa [%g2] ASI_DMMU, %g3 srlx %g3, CTX_PGSZ1_NUC_SHIFT, %g4 @@ -469,9 +549,12 @@ xcall_flush_tlb_mm: /* 21 insns */ nop nop nop + nop + nop + nop .globl xcall_flush_tlb_page -xcall_flush_tlb_page: /* 17 insns */ +xcall_flush_tlb_page: /* 20 insns */ /* %g5=context, %g1=vaddr */ mov PRIMARY_CONTEXT, %g4 ldxa [%g4] ASI_DMMU, %g2 @@ -490,15 +573,20 @@ xcall_flush_tlb_page: /* 17 insns */ retry nop nop + nop + nop + nop .globl xcall_flush_tlb_kernel_range -xcall_flush_tlb_kernel_range: /* 25 insns */ +xcall_flush_tlb_kernel_range: /* 44 insns */ sethi %hi(PAGE_SIZE - 1), %g2 or %g2, %lo(PAGE_SIZE - 1), %g2 andn %g1, %g2, %g1 andn %g7, %g2, %g7 sub %g7, %g1, %g3 - add %g2, 1, %g2 + srlx %g3, 18, %g2 + brnz,pn %g2, 2f + add %g2, 1, %g2 sub %g3, %g2, %g3 or %g1, 0x20, %g1 ! Nucleus 1: stxa %g0, [%g1 + %g3] ASI_DMMU_DEMAP @@ -507,8 +595,25 @@ xcall_flush_tlb_kernel_range: /* 25 insns */ brnz,pt %g3, 1b sub %g3, %g2, %g3 retry - nop - nop +2: mov 63 * 8, %g1 +1: ldxa [%g1] ASI_ITLB_DATA_ACCESS, %g2 + andcc %g2, 0x40, %g0 /* _PAGE_L_4U */ + bne,pn %xcc, 2f + mov TLB_TAG_ACCESS, %g2 + stxa %g0, [%g2] ASI_IMMU + stxa %g0, [%g1] ASI_ITLB_DATA_ACCESS + membar #Sync +2: ldxa [%g1] ASI_DTLB_DATA_ACCESS, %g2 + andcc %g2, 0x40, %g0 + bne,pn %xcc, 2f + mov TLB_TAG_ACCESS, %g2 + stxa %g0, [%g2] ASI_DMMU + stxa %g0, [%g1] ASI_DTLB_DATA_ACCESS + membar #Sync +2: sub %g1, 8, %g1 + brgez,pt %g1, 1b + nop + retry nop nop nop @@ -637,6 +742,52 @@ xcall_fetch_glob_pmu_n4: retry +__cheetah_xcall_flush_tlb_kernel_range: /* 44 insns */ + sethi %hi(PAGE_SIZE - 1), %g2 + or %g2, %lo(PAGE_SIZE - 1), %g2 + andn %g1, %g2, %g1 + andn %g7, %g2, %g7 + sub %g7, %g1, %g3 + srlx %g3, 18, %g2 + brnz,pn %g2, 2f + add %g2, 1, %g2 + sub %g3, %g2, %g3 + or %g1, 0x20, %g1 ! Nucleus +1: stxa %g0, [%g1 + %g3] ASI_DMMU_DEMAP + stxa %g0, [%g1 + %g3] ASI_IMMU_DEMAP + membar #Sync + brnz,pt %g3, 1b + sub %g3, %g2, %g3 + retry +2: mov 0x80, %g2 + stxa %g0, [%g2] ASI_DMMU_DEMAP + membar #Sync + stxa %g0, [%g2] ASI_IMMU_DEMAP + membar #Sync + retry + nop + nop + nop + nop + nop + nop + nop + nop + nop + nop + nop + nop + nop + nop + nop + nop + nop + nop + nop + nop + nop + nop + #ifdef DCACHE_ALIASING_POSSIBLE .align 32 .globl xcall_flush_dcache_page_cheetah @@ -700,7 +851,7 @@ __hypervisor_tlb_xcall_error: ba,a,pt %xcc, rtrap .globl __hypervisor_xcall_flush_tlb_mm -__hypervisor_xcall_flush_tlb_mm: /* 21 insns */ +__hypervisor_xcall_flush_tlb_mm: /* 24 insns */ /* %g5=ctx, g1,g2,g3,g4,g7=scratch, %g6=unusable */ mov %o0, %g2 mov %o1, %g3 @@ -714,7 +865,7 @@ __hypervisor_xcall_flush_tlb_mm: /* 21 insns */ mov HV_FAST_MMU_DEMAP_CTX, %o5 ta HV_FAST_TRAP mov HV_FAST_MMU_DEMAP_CTX, %g6 - brnz,pn %o0, __hypervisor_tlb_xcall_error + brnz,pn %o0, 1f mov %o0, %g5 mov %g2, %o0 mov %g3, %o1 @@ -723,9 +874,12 @@ __hypervisor_xcall_flush_tlb_mm: /* 21 insns */ mov %g7, %o5 membar #Sync retry +1: sethi %hi(__hypervisor_tlb_xcall_error), %g4 + jmpl %g4 + %lo(__hypervisor_tlb_xcall_error), %g0 + nop .globl __hypervisor_xcall_flush_tlb_page -__hypervisor_xcall_flush_tlb_page: /* 17 insns */ +__hypervisor_xcall_flush_tlb_page: /* 20 insns */ /* %g5=ctx, %g1=vaddr */ mov %o0, %g2 mov %o1, %g3 @@ -737,42 +891,64 @@ __hypervisor_xcall_flush_tlb_page: /* 17 insns */ sllx %o0, PAGE_SHIFT, %o0 ta HV_MMU_UNMAP_ADDR_TRAP mov HV_MMU_UNMAP_ADDR_TRAP, %g6 - brnz,a,pn %o0, __hypervisor_tlb_xcall_error + brnz,a,pn %o0, 1f mov %o0, %g5 mov %g2, %o0 mov %g3, %o1 mov %g4, %o2 membar #Sync retry +1: sethi %hi(__hypervisor_tlb_xcall_error), %g4 + jmpl %g4 + %lo(__hypervisor_tlb_xcall_error), %g0 + nop .globl __hypervisor_xcall_flush_tlb_kernel_range -__hypervisor_xcall_flush_tlb_kernel_range: /* 25 insns */ +__hypervisor_xcall_flush_tlb_kernel_range: /* 44 insns */ /* %g1=start, %g7=end, g2,g3,g4,g5,g6=scratch */ sethi %hi(PAGE_SIZE - 1), %g2 or %g2, %lo(PAGE_SIZE - 1), %g2 andn %g1, %g2, %g1 andn %g7, %g2, %g7 sub %g7, %g1, %g3 + srlx %g3, 18, %g7 add %g2, 1, %g2 sub %g3, %g2, %g3 mov %o0, %g2 mov %o1, %g4 - mov %o2, %g7 + brnz,pn %g7, 2f + mov %o2, %g7 1: add %g1, %g3, %o0 /* ARG0: virtual address */ mov 0, %o1 /* ARG1: mmu context */ mov HV_MMU_ALL, %o2 /* ARG2: flags */ ta HV_MMU_UNMAP_ADDR_TRAP mov HV_MMU_UNMAP_ADDR_TRAP, %g6 - brnz,pn %o0, __hypervisor_tlb_xcall_error + brnz,pn %o0, 1f mov %o0, %g5 sethi %hi(PAGE_SIZE), %o2 brnz,pt %g3, 1b sub %g3, %o2, %g3 - mov %g2, %o0 +5: mov %g2, %o0 mov %g4, %o1 mov %g7, %o2 membar #Sync retry +1: sethi %hi(__hypervisor_tlb_xcall_error), %g4 + jmpl %g4 + %lo(__hypervisor_tlb_xcall_error), %g0 + nop +2: mov %o3, %g1 + mov %o5, %g3 + mov 0, %o0 /* ARG0: CPU lists unimplemented */ + mov 0, %o1 /* ARG1: CPU lists unimplemented */ + mov 0, %o2 /* ARG2: mmu context == nucleus */ + mov HV_MMU_ALL, %o3 /* ARG3: flags */ + mov HV_FAST_MMU_DEMAP_CTX, %o5 + ta HV_FAST_TRAP + mov %g1, %o3 + brz,pt %o0, 5b + mov %g3, %o5 + mov HV_FAST_MMU_DEMAP_CTX, %g6 + ba,pt %xcc, 1b + clr %g5 /* These just get rescheduled to PIL vectors. */ .globl xcall_call_function @@ -795,11 +971,6 @@ xcall_capture: wr %g0, (1 << PIL_SMP_CAPTURE), %set_softint retry - .globl xcall_new_mmu_context_version -xcall_new_mmu_context_version: - wr %g0, (1 << PIL_SMP_CTX_NEW_VERSION), %set_softint - retry - #ifdef CONFIG_KGDB .globl xcall_kgdb_capture xcall_kgdb_capture: @@ -809,6 +980,58 @@ xcall_kgdb_capture: #endif /* CONFIG_SMP */ + .globl cheetah_patch_cachetlbops +cheetah_patch_cachetlbops: + save %sp, -128, %sp + + sethi %hi(__flush_tlb_mm), %o0 + or %o0, %lo(__flush_tlb_mm), %o0 + sethi %hi(__cheetah_flush_tlb_mm), %o1 + or %o1, %lo(__cheetah_flush_tlb_mm), %o1 + call tlb_patch_one + mov 19, %o2 + + sethi %hi(__flush_tlb_page), %o0 + or %o0, %lo(__flush_tlb_page), %o0 + sethi %hi(__cheetah_flush_tlb_page), %o1 + or %o1, %lo(__cheetah_flush_tlb_page), %o1 + call tlb_patch_one + mov 22, %o2 + + sethi %hi(__flush_tlb_pending), %o0 + or %o0, %lo(__flush_tlb_pending), %o0 + sethi %hi(__cheetah_flush_tlb_pending), %o1 + or %o1, %lo(__cheetah_flush_tlb_pending), %o1 + call tlb_patch_one + mov 27, %o2 + + sethi %hi(__flush_tlb_kernel_range), %o0 + or %o0, %lo(__flush_tlb_kernel_range), %o0 + sethi %hi(__cheetah_flush_tlb_kernel_range), %o1 + or %o1, %lo(__cheetah_flush_tlb_kernel_range), %o1 + call tlb_patch_one + mov 31, %o2 + +#ifdef DCACHE_ALIASING_POSSIBLE + sethi %hi(__flush_dcache_page), %o0 + or %o0, %lo(__flush_dcache_page), %o0 + sethi %hi(__cheetah_flush_dcache_page), %o1 + or %o1, %lo(__cheetah_flush_dcache_page), %o1 + call tlb_patch_one + mov 11, %o2 +#endif /* DCACHE_ALIASING_POSSIBLE */ + +#ifdef CONFIG_SMP + sethi %hi(xcall_flush_tlb_kernel_range), %o0 + or %o0, %lo(xcall_flush_tlb_kernel_range), %o0 + sethi %hi(__cheetah_xcall_flush_tlb_kernel_range), %o1 + or %o1, %lo(__cheetah_xcall_flush_tlb_kernel_range), %o1 + call tlb_patch_one + mov 44, %o2 +#endif /* CONFIG_SMP */ + + ret + restore .globl hypervisor_patch_cachetlbops hypervisor_patch_cachetlbops: @@ -819,28 +1042,28 @@ hypervisor_patch_cachetlbops: sethi %hi(__hypervisor_flush_tlb_mm), %o1 or %o1, %lo(__hypervisor_flush_tlb_mm), %o1 call tlb_patch_one - mov 10, %o2 + mov 19, %o2 sethi %hi(__flush_tlb_page), %o0 or %o0, %lo(__flush_tlb_page), %o0 sethi %hi(__hypervisor_flush_tlb_page), %o1 or %o1, %lo(__hypervisor_flush_tlb_page), %o1 call tlb_patch_one - mov 11, %o2 + mov 22, %o2 sethi %hi(__flush_tlb_pending), %o0 or %o0, %lo(__flush_tlb_pending), %o0 sethi %hi(__hypervisor_flush_tlb_pending), %o1 or %o1, %lo(__hypervisor_flush_tlb_pending), %o1 call tlb_patch_one - mov 16, %o2 + mov 27, %o2 sethi %hi(__flush_tlb_kernel_range), %o0 or %o0, %lo(__flush_tlb_kernel_range), %o0 sethi %hi(__hypervisor_flush_tlb_kernel_range), %o1 or %o1, %lo(__hypervisor_flush_tlb_kernel_range), %o1 call tlb_patch_one - mov 16, %o2 + mov 31, %o2 #ifdef DCACHE_ALIASING_POSSIBLE sethi %hi(__flush_dcache_page), %o0 @@ -857,21 +1080,21 @@ hypervisor_patch_cachetlbops: sethi %hi(__hypervisor_xcall_flush_tlb_mm), %o1 or %o1, %lo(__hypervisor_xcall_flush_tlb_mm), %o1 call tlb_patch_one - mov 21, %o2 + mov 24, %o2 sethi %hi(xcall_flush_tlb_page), %o0 or %o0, %lo(xcall_flush_tlb_page), %o0 sethi %hi(__hypervisor_xcall_flush_tlb_page), %o1 or %o1, %lo(__hypervisor_xcall_flush_tlb_page), %o1 call tlb_patch_one - mov 17, %o2 + mov 20, %o2 sethi %hi(xcall_flush_tlb_kernel_range), %o0 or %o0, %lo(xcall_flush_tlb_kernel_range), %o0 sethi %hi(__hypervisor_xcall_flush_tlb_kernel_range), %o1 or %o1, %lo(__hypervisor_xcall_flush_tlb_kernel_range), %o1 call tlb_patch_one - mov 25, %o2 + mov 44, %o2 #endif /* CONFIG_SMP */ ret diff --git a/arch/sparc/power/hibernate.c b/arch/sparc/power/hibernate.c index 17bd2e167e07..df707a8ad311 100644 --- a/arch/sparc/power/hibernate.c +++ b/arch/sparc/power/hibernate.c @@ -35,6 +35,5 @@ void restore_processor_state(void) { struct mm_struct *mm = current->active_mm; - load_secondary_context(mm); - tsb_context_switch(mm); + tsb_context_switch_ctx(mm, CTX_HWBITS(mm->context)); } |
