summaryrefslogtreecommitdiff
path: root/arch
diff options
context:
space:
mode:
authorLinux Build Service Account <lnxbuild@quicinc.com>2017-09-12 14:41:59 -0700
committerGerrit - the friendly Code Review server <code-review@localhost>2017-09-12 14:41:58 -0700
commit14f6bfeeebb97c02cbf0c43818f7998e3bdb3cae (patch)
tree055fbddd1e98f289dc9c65cb65a8e37fc2dbc151 /arch
parent043f52c20a63011ae14175e56926076965d59353 (diff)
parent03f50f905f9f7cadbb1b3cd82c078806bfd77703 (diff)
Merge "Merge android-4.4@610af85 (v4.4.85) into msm-4.4"
Diffstat (limited to 'arch')
-rw-r--r--arch/arc/include/asm/cache.h2
-rw-r--r--arch/arc/mm/cache.c13
-rw-r--r--arch/arm64/include/asm/current.h10
-rw-r--r--arch/arm64/include/asm/elf.h4
-rw-r--r--arch/arm64/include/asm/smp.h4
-rw-r--r--arch/arm64/kernel/setup.c2
-rw-r--r--arch/x86/crypto/sha1_avx2_x86_64_asm.S67
-rw-r--r--arch/x86/crypto/sha1_ssse3_glue.c2
-rw-r--r--arch/x86/entry/entry_64.S2
-rw-r--r--arch/x86/include/asm/elf.h4
-rw-r--r--arch/x86/kernel/cpu/perf_event_intel_lbr.c8
11 files changed, 74 insertions, 44 deletions
diff --git a/arch/arc/include/asm/cache.h b/arch/arc/include/asm/cache.h
index 210ef3e72332..0ddd7144c492 100644
--- a/arch/arc/include/asm/cache.h
+++ b/arch/arc/include/asm/cache.h
@@ -88,7 +88,9 @@ extern int ioc_exists;
#define ARC_REG_SLC_FLUSH 0x904
#define ARC_REG_SLC_INVALIDATE 0x905
#define ARC_REG_SLC_RGN_START 0x914
+#define ARC_REG_SLC_RGN_START1 0x915
#define ARC_REG_SLC_RGN_END 0x916
+#define ARC_REG_SLC_RGN_END1 0x917
/* Bit val in SLC_CONTROL */
#define SLC_CTRL_IM 0x040
diff --git a/arch/arc/mm/cache.c b/arch/arc/mm/cache.c
index d81b6d7e11e7..9a84cbdd44b0 100644
--- a/arch/arc/mm/cache.c
+++ b/arch/arc/mm/cache.c
@@ -543,6 +543,7 @@ noinline void slc_op(phys_addr_t paddr, unsigned long sz, const int op)
static DEFINE_SPINLOCK(lock);
unsigned long flags;
unsigned int ctrl;
+ phys_addr_t end;
spin_lock_irqsave(&lock, flags);
@@ -572,8 +573,16 @@ noinline void slc_op(phys_addr_t paddr, unsigned long sz, const int op)
* END needs to be setup before START (latter triggers the operation)
* END can't be same as START, so add (l2_line_sz - 1) to sz
*/
- write_aux_reg(ARC_REG_SLC_RGN_END, (paddr + sz + l2_line_sz - 1));
- write_aux_reg(ARC_REG_SLC_RGN_START, paddr);
+ end = paddr + sz + l2_line_sz - 1;
+ if (is_pae40_enabled())
+ write_aux_reg(ARC_REG_SLC_RGN_END1, upper_32_bits(end));
+
+ write_aux_reg(ARC_REG_SLC_RGN_END, lower_32_bits(end));
+
+ if (is_pae40_enabled())
+ write_aux_reg(ARC_REG_SLC_RGN_START1, upper_32_bits(paddr));
+
+ write_aux_reg(ARC_REG_SLC_RGN_START, lower_32_bits(paddr));
while (read_aux_reg(ARC_REG_SLC_CTRL) & SLC_CTRL_BUSY);
diff --git a/arch/arm64/include/asm/current.h b/arch/arm64/include/asm/current.h
index 2e61d21294ba..483a6c9d3e10 100644
--- a/arch/arm64/include/asm/current.h
+++ b/arch/arm64/include/asm/current.h
@@ -10,9 +10,17 @@
#ifdef CONFIG_THREAD_INFO_IN_TASK
struct task_struct;
+/*
+ * We don't use read_sysreg() as we want the compiler to cache the value where
+ * possible.
+ */
static __always_inline struct task_struct *get_current(void)
{
- return (struct task_struct *)read_sysreg(sp_el0);
+ unsigned long sp_el0;
+
+ asm ("mrs %0, sp_el0" : "=r" (sp_el0));
+
+ return (struct task_struct *)sp_el0;
}
#define current get_current()
#else
diff --git a/arch/arm64/include/asm/elf.h b/arch/arm64/include/asm/elf.h
index b98332269462..9d9287277201 100644
--- a/arch/arm64/include/asm/elf.h
+++ b/arch/arm64/include/asm/elf.h
@@ -115,10 +115,10 @@
/*
* This is the base location for PIE (ET_DYN with INTERP) loads. On
- * 64-bit, this is raised to 4GB to leave the entire 32-bit address
+ * 64-bit, this is above 4GB to leave the entire 32-bit address
* space open for things that want to use the area for 32-bit pointers.
*/
-#define ELF_ET_DYN_BASE 0x100000000UL
+#define ELF_ET_DYN_BASE (2 * TASK_SIZE_64 / 3)
#ifndef __ASSEMBLY__
diff --git a/arch/arm64/include/asm/smp.h b/arch/arm64/include/asm/smp.h
index a05033beb2a2..4325b3622a92 100644
--- a/arch/arm64/include/asm/smp.h
+++ b/arch/arm64/include/asm/smp.h
@@ -28,8 +28,10 @@ DECLARE_PER_CPU_READ_MOSTLY(int, cpu_number);
* We don't use this_cpu_read(cpu_number) as that has implicit writes to
* preempt_count, and associated (compiler) barriers, that we'd like to avoid
* the expense of. If we're preemptible, the value can be stale at use anyway.
+ * And we can't use this_cpu_ptr() either, as that winds up recursing back
+ * here under CONFIG_DEBUG_PREEMPT=y.
*/
-#define raw_smp_processor_id() (*this_cpu_ptr(&cpu_number))
+#define raw_smp_processor_id() (*raw_cpu_ptr(&cpu_number))
struct seq_file;
diff --git a/arch/arm64/kernel/setup.c b/arch/arm64/kernel/setup.c
index f592e7406fc5..b8b40d95ebef 100644
--- a/arch/arm64/kernel/setup.c
+++ b/arch/arm64/kernel/setup.c
@@ -369,7 +369,7 @@ void __init setup_arch(char **cmdline_p)
#ifdef CONFIG_THREAD_INFO_IN_TASK
init_task.thread_info.ttbr0 = virt_to_phys(empty_zero_page);
#else
- init_thread_info.ttbr0 = (init_thread_union.thread_info);
+ init_thread_info.ttbr0 = virt_to_phys(empty_zero_page);
#endif
#endif
diff --git a/arch/x86/crypto/sha1_avx2_x86_64_asm.S b/arch/x86/crypto/sha1_avx2_x86_64_asm.S
index 1cd792db15ef..1eab79c9ac48 100644
--- a/arch/x86/crypto/sha1_avx2_x86_64_asm.S
+++ b/arch/x86/crypto/sha1_avx2_x86_64_asm.S
@@ -117,11 +117,10 @@
.set T1, REG_T1
.endm
-#define K_BASE %r8
#define HASH_PTR %r9
+#define BLOCKS_CTR %r8
#define BUFFER_PTR %r10
#define BUFFER_PTR2 %r13
-#define BUFFER_END %r11
#define PRECALC_BUF %r14
#define WK_BUF %r15
@@ -205,14 +204,14 @@
* blended AVX2 and ALU instruction scheduling
* 1 vector iteration per 8 rounds
*/
- vmovdqu ((i * 2) + PRECALC_OFFSET)(BUFFER_PTR), W_TMP
+ vmovdqu (i * 2)(BUFFER_PTR), W_TMP
.elseif ((i & 7) == 1)
- vinsertf128 $1, (((i-1) * 2)+PRECALC_OFFSET)(BUFFER_PTR2),\
+ vinsertf128 $1, ((i-1) * 2)(BUFFER_PTR2),\
WY_TMP, WY_TMP
.elseif ((i & 7) == 2)
vpshufb YMM_SHUFB_BSWAP, WY_TMP, WY
.elseif ((i & 7) == 4)
- vpaddd K_XMM(K_BASE), WY, WY_TMP
+ vpaddd K_XMM + K_XMM_AR(%rip), WY, WY_TMP
.elseif ((i & 7) == 7)
vmovdqu WY_TMP, PRECALC_WK(i&~7)
@@ -255,7 +254,7 @@
vpxor WY, WY_TMP, WY_TMP
.elseif ((i & 7) == 7)
vpxor WY_TMP2, WY_TMP, WY
- vpaddd K_XMM(K_BASE), WY, WY_TMP
+ vpaddd K_XMM + K_XMM_AR(%rip), WY, WY_TMP
vmovdqu WY_TMP, PRECALC_WK(i&~7)
PRECALC_ROTATE_WY
@@ -291,7 +290,7 @@
vpsrld $30, WY, WY
vpor WY, WY_TMP, WY
.elseif ((i & 7) == 7)
- vpaddd K_XMM(K_BASE), WY, WY_TMP
+ vpaddd K_XMM + K_XMM_AR(%rip), WY, WY_TMP
vmovdqu WY_TMP, PRECALC_WK(i&~7)
PRECALC_ROTATE_WY
@@ -446,6 +445,16 @@
.endm
+/* Add constant only if (%2 > %3) condition met (uses RTA as temp)
+ * %1 + %2 >= %3 ? %4 : 0
+ */
+.macro ADD_IF_GE a, b, c, d
+ mov \a, RTA
+ add $\d, RTA
+ cmp $\c, \b
+ cmovge RTA, \a
+.endm
+
/*
* macro implements 80 rounds of SHA-1, for multiple blocks with s/w pipelining
*/
@@ -463,13 +472,16 @@
lea (2*4*80+32)(%rsp), WK_BUF
# Precalc WK for first 2 blocks
- PRECALC_OFFSET = 0
+ ADD_IF_GE BUFFER_PTR2, BLOCKS_CTR, 2, 64
.set i, 0
.rept 160
PRECALC i
.set i, i + 1
.endr
- PRECALC_OFFSET = 128
+
+ /* Go to next block if needed */
+ ADD_IF_GE BUFFER_PTR, BLOCKS_CTR, 3, 128
+ ADD_IF_GE BUFFER_PTR2, BLOCKS_CTR, 4, 128
xchg WK_BUF, PRECALC_BUF
.align 32
@@ -479,8 +491,8 @@ _loop:
* we use K_BASE value as a signal of a last block,
* it is set below by: cmovae BUFFER_PTR, K_BASE
*/
- cmp K_BASE, BUFFER_PTR
- jne _begin
+ test BLOCKS_CTR, BLOCKS_CTR
+ jnz _begin
.align 32
jmp _end
.align 32
@@ -512,10 +524,10 @@ _loop0:
.set j, j+2
.endr
- add $(2*64), BUFFER_PTR /* move to next odd-64-byte block */
- cmp BUFFER_END, BUFFER_PTR /* is current block the last one? */
- cmovae K_BASE, BUFFER_PTR /* signal the last iteration smartly */
-
+ /* Update Counter */
+ sub $1, BLOCKS_CTR
+ /* Move to the next block only if needed*/
+ ADD_IF_GE BUFFER_PTR, BLOCKS_CTR, 4, 128
/*
* rounds
* 60,62,64,66,68
@@ -532,8 +544,8 @@ _loop0:
UPDATE_HASH 12(HASH_PTR), D
UPDATE_HASH 16(HASH_PTR), E
- cmp K_BASE, BUFFER_PTR /* is current block the last one? */
- je _loop
+ test BLOCKS_CTR, BLOCKS_CTR
+ jz _loop
mov TB, B
@@ -575,10 +587,10 @@ _loop2:
.set j, j+2
.endr
- add $(2*64), BUFFER_PTR2 /* move to next even-64-byte block */
-
- cmp BUFFER_END, BUFFER_PTR2 /* is current block the last one */
- cmovae K_BASE, BUFFER_PTR /* signal the last iteration smartly */
+ /* update counter */
+ sub $1, BLOCKS_CTR
+ /* Move to the next block only if needed*/
+ ADD_IF_GE BUFFER_PTR2, BLOCKS_CTR, 4, 128
jmp _loop3
_loop3:
@@ -641,19 +653,12 @@ _loop3:
avx2_zeroupper
- lea K_XMM_AR(%rip), K_BASE
-
+ /* Setup initial values */
mov CTX, HASH_PTR
mov BUF, BUFFER_PTR
- lea 64(BUF), BUFFER_PTR2
-
- shl $6, CNT /* mul by 64 */
- add BUF, CNT
- add $64, CNT
- mov CNT, BUFFER_END
- cmp BUFFER_END, BUFFER_PTR2
- cmovae K_BASE, BUFFER_PTR2
+ mov BUF, BUFFER_PTR2
+ mov CNT, BLOCKS_CTR
xmm_mov BSWAP_SHUFB_CTL(%rip), YMM_SHUFB_BSWAP
diff --git a/arch/x86/crypto/sha1_ssse3_glue.c b/arch/x86/crypto/sha1_ssse3_glue.c
index 7de207a11014..dd14616b7739 100644
--- a/arch/x86/crypto/sha1_ssse3_glue.c
+++ b/arch/x86/crypto/sha1_ssse3_glue.c
@@ -201,7 +201,7 @@ asmlinkage void sha1_transform_avx2(u32 *digest, const char *data,
static bool avx2_usable(void)
{
- if (false && avx_usable() && boot_cpu_has(X86_FEATURE_AVX2)
+ if (avx_usable() && boot_cpu_has(X86_FEATURE_AVX2)
&& boot_cpu_has(X86_FEATURE_BMI1)
&& boot_cpu_has(X86_FEATURE_BMI2))
return true;
diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S
index a55697d19824..cc0f2f5da19b 100644
--- a/arch/x86/entry/entry_64.S
+++ b/arch/x86/entry/entry_64.S
@@ -1190,6 +1190,8 @@ ENTRY(nmi)
* other IST entries.
*/
+ ASM_CLAC
+
/* Use %rdx as our temp variable throughout */
pushq %rdx
diff --git a/arch/x86/include/asm/elf.h b/arch/x86/include/asm/elf.h
index 07cf288b692e..bcd3d6199464 100644
--- a/arch/x86/include/asm/elf.h
+++ b/arch/x86/include/asm/elf.h
@@ -247,11 +247,11 @@ extern int force_personality32;
/*
* This is the base location for PIE (ET_DYN with INTERP) loads. On
- * 64-bit, this is raised to 4GB to leave the entire 32-bit address
+ * 64-bit, this is above 4GB to leave the entire 32-bit address
* space open for things that want to use the area for 32-bit pointers.
*/
#define ELF_ET_DYN_BASE (mmap_is_ia32() ? 0x000400000UL : \
- 0x100000000UL)
+ (TASK_SIZE / 3 * 2))
/* This yields a mask that user programs can use to figure out what
instruction set this CPU supports. This could be done in user space,
diff --git a/arch/x86/kernel/cpu/perf_event_intel_lbr.c b/arch/x86/kernel/cpu/perf_event_intel_lbr.c
index 8900400230c6..2cdae69d7e0b 100644
--- a/arch/x86/kernel/cpu/perf_event_intel_lbr.c
+++ b/arch/x86/kernel/cpu/perf_event_intel_lbr.c
@@ -153,7 +153,7 @@ static void __intel_pmu_lbr_enable(bool pmi)
*/
if (cpuc->lbr_sel)
lbr_select = cpuc->lbr_sel->config;
- if (!pmi)
+ if (!pmi && cpuc->lbr_sel)
wrmsrl(MSR_LBR_SELECT, lbr_select);
rdmsrl(MSR_IA32_DEBUGCTLMSR, debugctl);
@@ -432,8 +432,10 @@ static void intel_pmu_lbr_read_64(struct cpu_hw_events *cpuc)
int out = 0;
int num = x86_pmu.lbr_nr;
- if (cpuc->lbr_sel->config & LBR_CALL_STACK)
- num = tos;
+ if (cpuc->lbr_sel) {
+ if (cpuc->lbr_sel->config & LBR_CALL_STACK)
+ num = tos;
+ }
for (i = 0; i < num; i++) {
unsigned long lbr_idx = (tos - i) & mask;