summaryrefslogtreecommitdiff
path: root/arch/mips/math-emu
diff options
context:
space:
mode:
Diffstat (limited to 'arch/mips/math-emu')
-rw-r--r--arch/mips/math-emu/Makefile6
-rw-r--r--arch/mips/math-emu/cp1emu.c389
-rw-r--r--arch/mips/math-emu/dp_add.c3
-rw-r--r--arch/mips/math-emu/dp_div.c1
-rw-r--r--arch/mips/math-emu/dp_fmax.c2
-rw-r--r--arch/mips/math-emu/dp_fmin.c2
-rw-r--r--arch/mips/math-emu/dp_maddf.c280
-rw-r--r--arch/mips/math-emu/dp_msubf.c269
-rw-r--r--arch/mips/math-emu/dp_mul.c16
-rw-r--r--arch/mips/math-emu/dp_rint.c89
-rw-r--r--arch/mips/math-emu/dp_simple.c38
-rw-r--r--arch/mips/math-emu/dp_sqrt.c12
-rw-r--r--arch/mips/math-emu/dp_sub.c2
-rw-r--r--arch/mips/math-emu/dp_tint.c9
-rw-r--r--arch/mips/math-emu/dp_tlong.c9
-rw-r--r--arch/mips/math-emu/dsemul.c367
-rw-r--r--arch/mips/math-emu/ieee754.c6
-rw-r--r--arch/mips/math-emu/ieee754.h59
-rw-r--r--arch/mips/math-emu/ieee754dp.c21
-rw-r--r--arch/mips/math-emu/ieee754dp.h4
-rw-r--r--arch/mips/math-emu/ieee754int.h30
-rw-r--r--arch/mips/math-emu/ieee754sp.c28
-rw-r--r--arch/mips/math-emu/ieee754sp.h23
-rw-r--r--arch/mips/math-emu/me-debugfs.c318
-rw-r--r--arch/mips/math-emu/sp_add.c9
-rw-r--r--arch/mips/math-emu/sp_div.c5
-rw-r--r--arch/mips/math-emu/sp_fdp.c12
-rw-r--r--arch/mips/math-emu/sp_fint.c2
-rw-r--r--arch/mips/math-emu/sp_fmax.c2
-rw-r--r--arch/mips/math-emu/sp_fmin.c2
-rw-r--r--arch/mips/math-emu/sp_maddf.c256
-rw-r--r--arch/mips/math-emu/sp_msubf.c258
-rw-r--r--arch/mips/math-emu/sp_mul.c11
-rw-r--r--arch/mips/math-emu/sp_rint.c90
-rw-r--r--arch/mips/math-emu/sp_simple.c38
-rw-r--r--arch/mips/math-emu/sp_sqrt.c3
-rw-r--r--arch/mips/math-emu/sp_sub.c7
-rw-r--r--arch/mips/math-emu/sp_tint.c9
-rw-r--r--arch/mips/math-emu/sp_tlong.c10
39 files changed, 1656 insertions, 1041 deletions
diff --git a/arch/mips/math-emu/Makefile b/arch/mips/math-emu/Makefile
index a19641d3ac23..e9f10b88b695 100644
--- a/arch/mips/math-emu/Makefile
+++ b/arch/mips/math-emu/Makefile
@@ -4,9 +4,11 @@
obj-y += cp1emu.o ieee754dp.o ieee754sp.o ieee754.o \
dp_div.o dp_mul.o dp_sub.o dp_add.o dp_fsp.o dp_cmp.o dp_simple.o \
- dp_tint.o dp_fint.o dp_maddf.o dp_msubf.o dp_2008class.o dp_fmin.o dp_fmax.o \
+ dp_tint.o dp_fint.o dp_rint.o dp_maddf.o dp_2008class.o dp_fmin.o \
+ dp_fmax.o \
sp_div.o sp_mul.o sp_sub.o sp_add.o sp_fdp.o sp_cmp.o sp_simple.o \
- sp_tint.o sp_fint.o sp_maddf.o sp_msubf.o sp_2008class.o sp_fmin.o sp_fmax.o \
+ sp_tint.o sp_fint.o sp_rint.o sp_maddf.o sp_2008class.o sp_fmin.o \
+ sp_fmax.o \
dsemul.o
lib-y += ieee754d.o \
diff --git a/arch/mips/math-emu/cp1emu.c b/arch/mips/math-emu/cp1emu.c
index 89d05de8040a..ebb5e3bfe12a 100644
--- a/arch/mips/math-emu/cp1emu.c
+++ b/arch/mips/math-emu/cp1emu.c
@@ -434,12 +434,14 @@ static int microMIPS32_to_MIPS32(union mips_instruction *insn_ptr)
* a single subroutine should be used across both
* modules.
*/
-static int isBranchInstr(struct pt_regs *regs, struct mm_decoded_insn dec_insn,
- unsigned long *contpc)
+int isBranchInstr(struct pt_regs *regs, struct mm_decoded_insn dec_insn,
+ unsigned long *contpc)
{
union mips_instruction insn = (union mips_instruction)dec_insn.insn;
unsigned int fcr31;
unsigned int bit = 0;
+ unsigned int bit0;
+ union fpureg *fpr;
switch (insn.i_format.opcode) {
case spec_op:
@@ -450,7 +452,7 @@ static int isBranchInstr(struct pt_regs *regs, struct mm_decoded_insn dec_insn,
regs->cp0_epc + dec_insn.pc_inc +
dec_insn.next_pc_inc;
}
- /* Fall through */
+ /* fall through */
case jr_op:
/* For R6, JR already emulated in jalr_op */
if (NO_R6EMU && insn.r_format.func == jr_op)
@@ -470,10 +472,11 @@ static int isBranchInstr(struct pt_regs *regs, struct mm_decoded_insn dec_insn,
regs->regs[31] = regs->cp0_epc +
dec_insn.pc_inc +
dec_insn.next_pc_inc;
- /* Fall through */
+ /* fall through */
case bltzl_op:
if (NO_R6EMU)
break;
+ /* fall through */
case bltz_op:
if ((long)regs->regs[insn.i_format.rs] < 0)
*contpc = regs->cp0_epc +
@@ -493,10 +496,11 @@ static int isBranchInstr(struct pt_regs *regs, struct mm_decoded_insn dec_insn,
regs->regs[31] = regs->cp0_epc +
dec_insn.pc_inc +
dec_insn.next_pc_inc;
- /* Fall through */
+ /* fall through */
case bgezl_op:
if (NO_R6EMU)
break;
+ /* fall through */
case bgez_op:
if ((long)regs->regs[insn.i_format.rs] >= 0)
*contpc = regs->cp0_epc +
@@ -511,11 +515,12 @@ static int isBranchInstr(struct pt_regs *regs, struct mm_decoded_insn dec_insn,
break;
case jalx_op:
set_isa16_mode(bit);
+ /* fall through */
case jal_op:
regs->regs[31] = regs->cp0_epc +
dec_insn.pc_inc +
dec_insn.next_pc_inc;
- /* Fall through */
+ /* fall through */
case j_op:
*contpc = regs->cp0_epc + dec_insn.pc_inc;
*contpc >>= 28;
@@ -527,6 +532,7 @@ static int isBranchInstr(struct pt_regs *regs, struct mm_decoded_insn dec_insn,
case beql_op:
if (NO_R6EMU)
break;
+ /* fall through */
case beq_op:
if (regs->regs[insn.i_format.rs] ==
regs->regs[insn.i_format.rt])
@@ -541,6 +547,7 @@ static int isBranchInstr(struct pt_regs *regs, struct mm_decoded_insn dec_insn,
case bnel_op:
if (NO_R6EMU)
break;
+ /* fall through */
case bne_op:
if (regs->regs[insn.i_format.rs] !=
regs->regs[insn.i_format.rt])
@@ -555,6 +562,7 @@ static int isBranchInstr(struct pt_regs *regs, struct mm_decoded_insn dec_insn,
case blezl_op:
if (!insn.i_format.rt && NO_R6EMU)
break;
+ /* fall through */
case blez_op:
/*
@@ -592,6 +600,7 @@ static int isBranchInstr(struct pt_regs *regs, struct mm_decoded_insn dec_insn,
case bgtzl_op:
if (!insn.i_format.rt && NO_R6EMU)
break;
+ /* fall through */
case bgtz_op:
/*
* Compact branches for R6 for the
@@ -627,8 +636,8 @@ static int isBranchInstr(struct pt_regs *regs, struct mm_decoded_insn dec_insn,
dec_insn.pc_inc +
dec_insn.next_pc_inc;
return 1;
- case cbcond0_op:
- case cbcond1_op:
+ case pop10_op:
+ case pop30_op:
if (!cpu_has_mips_r6)
break;
if (insn.i_format.rt && !insn.i_format.rs)
@@ -683,14 +692,14 @@ static int isBranchInstr(struct pt_regs *regs, struct mm_decoded_insn dec_insn,
dec_insn.next_pc_inc;
return 1;
- case beqzcjic_op:
+ case pop66_op:
if (!cpu_has_mips_r6)
break;
*contpc = regs->cp0_epc + dec_insn.pc_inc +
dec_insn.next_pc_inc;
return 1;
- case bnezcjialc_op:
+ case pop76_op:
if (!cpu_has_mips_r6)
break;
if (!insn.i_format.rs)
@@ -707,14 +716,14 @@ static int isBranchInstr(struct pt_regs *regs, struct mm_decoded_insn dec_insn,
((insn.i_format.rs == bc1eqz_op) ||
(insn.i_format.rs == bc1nez_op))) {
bit = 0;
+ fpr = &current->thread.fpu.fpr[insn.i_format.rt];
+ bit0 = get_fpr32(fpr, 0) & 0x1;
switch (insn.i_format.rs) {
case bc1eqz_op:
- if (get_fpr32(&current->thread.fpu.fpr[insn.i_format.rt], 0) & 0x1)
- bit = 1;
+ bit = bit0 == 0;
break;
case bc1nez_op:
- if (!(get_fpr32(&current->thread.fpu.fpr[insn.i_format.rt], 0) & 0x1))
- bit = 1;
+ bit = bit0 != 0;
break;
}
if (bit)
@@ -728,7 +737,8 @@ static int isBranchInstr(struct pt_regs *regs, struct mm_decoded_insn dec_insn,
return 1;
}
- /* R2/R6 compatible cop1 instruction. Fall through */
+ /* R2/R6 compatible cop1 instruction */
+ /* fall through */
case cop2_op:
case cop1x_op:
if (insn.i_format.rs == bc_op) {
@@ -809,7 +819,7 @@ do { \
#define SITOREG(si, x) \
do { \
if (cop1_64bit(xcp) && !hybrid_fprs()) { \
- unsigned i; \
+ unsigned int i; \
set_fpr32(&ctx->fpr[x], 0, si); \
for (i = 1; i < ARRAY_SIZE(ctx->fpr[x].val32); i++) \
set_fpr32(&ctx->fpr[x], i, 0); \
@@ -822,19 +832,19 @@ do { \
#define SITOHREG(si, x) \
do { \
- unsigned i; \
+ unsigned int i; \
set_fpr32(&ctx->fpr[x], 1, si); \
for (i = 2; i < ARRAY_SIZE(ctx->fpr[x].val32); i++) \
set_fpr32(&ctx->fpr[x], i, 0); \
} while (0)
#define DIFROMREG(di, x) \
- ((di) = get_fpr64(&ctx->fpr[(x) & ~(cop1_64bit(xcp) == 0)], 0))
+ ((di) = get_fpr64(&ctx->fpr[(x) & ~(cop1_64bit(xcp) ^ 1)], 0))
#define DITOREG(di, x) \
do { \
- unsigned fpr, i; \
- fpr = (x) & ~(cop1_64bit(xcp) == 0); \
+ unsigned int fpr, i; \
+ fpr = (x) & ~(cop1_64bit(xcp) ^ 1); \
set_fpr64(&ctx->fpr[fpr], 0, di); \
for (i = 1; i < ARRAY_SIZE(ctx->fpr[x].val64); i++) \
set_fpr64(&ctx->fpr[fpr], i, 0); \
@@ -975,9 +985,10 @@ static int cop1Emulate(struct pt_regs *xcp, struct mips_fpu_struct *ctx,
struct mm_decoded_insn dec_insn, void *__user *fault_addr)
{
unsigned long contpc = xcp->cp0_epc + dec_insn.pc_inc;
- unsigned int cond, cbit;
+ unsigned int cond, cbit, bit0;
mips_instruction ir;
int likely, pc_inc;
+ union fpureg *fpr;
u32 __user *wva;
u64 __user *dva;
u32 wval;
@@ -1188,15 +1199,18 @@ emul:
if (!cpu_has_mips_r6 || delay_slot(xcp))
return SIGILL;
- cond = likely = 0;
+ likely = 0;
+ cond = 0;
+ fpr = &current->thread.fpu.fpr[MIPSInst_RT(ir)];
+ bit0 = get_fpr32(fpr, 0) & 0x1;
switch (MIPSInst_RS(ir)) {
case bc1eqz_op:
- if (get_fpr32(&current->thread.fpu.fpr[MIPSInst_RT(ir)], 0) & 0x1)
- cond = 1;
+ MIPS_FPU_EMU_INC_STATS(bc1eqz);
+ cond = bit0 == 0;
break;
case bc1nez_op:
- if (!(get_fpr32(&current->thread.fpu.fpr[MIPSInst_RT(ir)], 0) & 0x1))
- cond = 1;
+ MIPS_FPU_EMU_INC_STATS(bc1nez);
+ cond = bit0 != 0;
break;
}
goto branch_common;
@@ -1216,18 +1230,19 @@ emul:
case bcfl_op:
if (cpu_has_mips_2_3_4_5_r)
likely = 1;
- /* Fall through */
+ /* fall through */
case bcf_op:
cond = !cond;
break;
case bctl_op:
if (cpu_has_mips_2_3_4_5_r)
likely = 1;
- /* Fall through */
+ /* fall through */
case bct_op:
break;
}
branch_common:
+ MIPS_FPU_EMU_INC_STATS(branches);
set_delay_slot(xcp);
if (cond) {
/*
@@ -1267,7 +1282,9 @@ branch_common:
* instruction in the dslot.
*/
sig = mips_dsemul(xcp, ir,
- contpc);
+ bcpc, contpc);
+ if (sig < 0)
+ break;
if (sig)
xcp->cp0_epc = bcpc;
/*
@@ -1320,7 +1337,9 @@ branch_common:
* Single step the non-cp1
* instruction in the dslot
*/
- sig = mips_dsemul(xcp, ir, contpc);
+ sig = mips_dsemul(xcp, ir, bcpc, contpc);
+ if (sig < 0)
+ break;
if (sig)
xcp->cp0_epc = bcpc;
/* SIGILL forces out of the emulation loop. */
@@ -1344,7 +1363,8 @@ branch_common:
return SIGILL;
/* a real fpu computation instruction */
- if ((sig = fpu_emu(xcp, ctx, ir)))
+ sig = fpu_emu(xcp, ctx, ir);
+ if (sig)
return sig;
}
break;
@@ -1457,7 +1477,7 @@ DEF3OP(nmsub, dp, ieee754dp_mul, ieee754dp_sub, ieee754dp_neg);
static int fpux_emu(struct pt_regs *xcp, struct mips_fpu_struct *ctx,
mips_instruction ir, void *__user *fault_addr)
{
- unsigned rcsr = 0; /* resulting csr */
+ unsigned int rcsr = 0; /* resulting csr */
MIPS_FPU_EMU_INC_STATS(cp1xops);
@@ -1653,10 +1673,10 @@ static int fpu_emu(struct pt_regs *xcp, struct mips_fpu_struct *ctx,
mips_instruction ir)
{
int rfmt; /* resulting format */
- unsigned rcsr = 0; /* resulting csr */
+ unsigned int rcsr = 0; /* resulting csr */
unsigned int oldrm;
unsigned int cbit;
- unsigned cond;
+ unsigned int cond;
union {
union ieee754dp d;
union ieee754sp s;
@@ -1672,20 +1692,24 @@ static int fpu_emu(struct pt_regs *xcp, struct mips_fpu_struct *ctx,
union ieee754sp(*b) (union ieee754sp, union ieee754sp);
union ieee754sp(*u) (union ieee754sp);
} handler;
- union ieee754sp fs, ft;
+ union ieee754sp fd, fs, ft;
switch (MIPSInst_FUNC(ir)) {
/* binary ops */
case fadd_op:
+ MIPS_FPU_EMU_INC_STATS(add_s);
handler.b = ieee754sp_add;
goto scopbop;
case fsub_op:
+ MIPS_FPU_EMU_INC_STATS(sub_s);
handler.b = ieee754sp_sub;
goto scopbop;
case fmul_op:
+ MIPS_FPU_EMU_INC_STATS(mul_s);
handler.b = ieee754sp_mul;
goto scopbop;
case fdiv_op:
+ MIPS_FPU_EMU_INC_STATS(div_s);
handler.b = ieee754sp_div;
goto scopbop;
@@ -1694,6 +1718,7 @@ static int fpu_emu(struct pt_regs *xcp, struct mips_fpu_struct *ctx,
if (!cpu_has_mips_2_3_4_5_r)
return SIGILL;
+ MIPS_FPU_EMU_INC_STATS(sqrt_s);
handler.u = ieee754sp_sqrt;
goto scopuop;
@@ -1706,6 +1731,7 @@ static int fpu_emu(struct pt_regs *xcp, struct mips_fpu_struct *ctx,
if (!cpu_has_mips_4_5_64_r2_r6)
return SIGILL;
+ MIPS_FPU_EMU_INC_STATS(rsqrt_s);
handler.u = fpemu_sp_rsqrt;
goto scopuop;
@@ -1713,6 +1739,7 @@ static int fpu_emu(struct pt_regs *xcp, struct mips_fpu_struct *ctx,
if (!cpu_has_mips_4_5_64_r2_r6)
return SIGILL;
+ MIPS_FPU_EMU_INC_STATS(recip_s);
handler.u = fpemu_sp_recip;
goto scopuop;
@@ -1749,6 +1776,7 @@ static int fpu_emu(struct pt_regs *xcp, struct mips_fpu_struct *ctx,
if (!cpu_has_mips_r6)
return SIGILL;
+ MIPS_FPU_EMU_INC_STATS(seleqz_s);
SPFROMREG(rv.s, MIPSInst_FT(ir));
if (rv.w & 0x1)
rv.w = 0;
@@ -1760,6 +1788,7 @@ static int fpu_emu(struct pt_regs *xcp, struct mips_fpu_struct *ctx,
if (!cpu_has_mips_r6)
return SIGILL;
+ MIPS_FPU_EMU_INC_STATS(selnez_s);
SPFROMREG(rv.s, MIPSInst_FT(ir));
if (rv.w & 0x1)
SPFROMREG(rv.s, MIPSInst_FS(ir));
@@ -1773,6 +1802,7 @@ static int fpu_emu(struct pt_regs *xcp, struct mips_fpu_struct *ctx,
if (!cpu_has_mips_r6)
return SIGILL;
+ MIPS_FPU_EMU_INC_STATS(maddf_s);
SPFROMREG(ft, MIPSInst_FT(ir));
SPFROMREG(fs, MIPSInst_FS(ir));
SPFROMREG(fd, MIPSInst_FD(ir));
@@ -1786,6 +1816,7 @@ static int fpu_emu(struct pt_regs *xcp, struct mips_fpu_struct *ctx,
if (!cpu_has_mips_r6)
return SIGILL;
+ MIPS_FPU_EMU_INC_STATS(msubf_s);
SPFROMREG(ft, MIPSInst_FT(ir));
SPFROMREG(fs, MIPSInst_FS(ir));
SPFROMREG(fd, MIPSInst_FD(ir));
@@ -1799,9 +1830,9 @@ static int fpu_emu(struct pt_regs *xcp, struct mips_fpu_struct *ctx,
if (!cpu_has_mips_r6)
return SIGILL;
+ MIPS_FPU_EMU_INC_STATS(rint_s);
SPFROMREG(fs, MIPSInst_FS(ir));
- rv.l = ieee754sp_tlong(fs);
- rv.s = ieee754sp_flong(rv.l);
+ rv.s = ieee754sp_rint(fs);
goto copcsr;
}
@@ -1811,6 +1842,7 @@ static int fpu_emu(struct pt_regs *xcp, struct mips_fpu_struct *ctx,
if (!cpu_has_mips_r6)
return SIGILL;
+ MIPS_FPU_EMU_INC_STATS(class_s);
SPFROMREG(fs, MIPSInst_FS(ir));
rv.w = ieee754sp_2008class(fs);
rfmt = w_fmt;
@@ -1823,6 +1855,7 @@ static int fpu_emu(struct pt_regs *xcp, struct mips_fpu_struct *ctx,
if (!cpu_has_mips_r6)
return SIGILL;
+ MIPS_FPU_EMU_INC_STATS(min_s);
SPFROMREG(ft, MIPSInst_FT(ir));
SPFROMREG(fs, MIPSInst_FS(ir));
rv.s = ieee754sp_fmin(fs, ft);
@@ -1835,6 +1868,7 @@ static int fpu_emu(struct pt_regs *xcp, struct mips_fpu_struct *ctx,
if (!cpu_has_mips_r6)
return SIGILL;
+ MIPS_FPU_EMU_INC_STATS(mina_s);
SPFROMREG(ft, MIPSInst_FT(ir));
SPFROMREG(fs, MIPSInst_FS(ir));
rv.s = ieee754sp_fmina(fs, ft);
@@ -1847,6 +1881,7 @@ static int fpu_emu(struct pt_regs *xcp, struct mips_fpu_struct *ctx,
if (!cpu_has_mips_r6)
return SIGILL;
+ MIPS_FPU_EMU_INC_STATS(max_s);
SPFROMREG(ft, MIPSInst_FT(ir));
SPFROMREG(fs, MIPSInst_FS(ir));
rv.s = ieee754sp_fmax(fs, ft);
@@ -1859,6 +1894,7 @@ static int fpu_emu(struct pt_regs *xcp, struct mips_fpu_struct *ctx,
if (!cpu_has_mips_r6)
return SIGILL;
+ MIPS_FPU_EMU_INC_STATS(maxa_s);
SPFROMREG(ft, MIPSInst_FT(ir));
SPFROMREG(fs, MIPSInst_FS(ir));
rv.s = ieee754sp_fmaxa(fs, ft);
@@ -1866,15 +1902,18 @@ static int fpu_emu(struct pt_regs *xcp, struct mips_fpu_struct *ctx,
}
case fabs_op:
+ MIPS_FPU_EMU_INC_STATS(abs_s);
handler.u = ieee754sp_abs;
goto scopuop;
case fneg_op:
+ MIPS_FPU_EMU_INC_STATS(neg_s);
handler.u = ieee754sp_neg;
goto scopuop;
case fmov_op:
/* an easy one */
+ MIPS_FPU_EMU_INC_STATS(mov_s);
SPFROMREG(rv.s, MIPSInst_FS(ir));
goto copcsr;
@@ -1917,12 +1956,14 @@ copcsr:
return SIGILL; /* not defined */
case fcvtd_op:
+ MIPS_FPU_EMU_INC_STATS(cvt_d_s);
SPFROMREG(fs, MIPSInst_FS(ir));
rv.d = ieee754dp_fsp(fs);
rfmt = d_fmt;
goto copcsr;
case fcvtw_op:
+ MIPS_FPU_EMU_INC_STATS(cvt_w_s);
SPFROMREG(fs, MIPSInst_FS(ir));
rv.w = ieee754sp_tint(fs);
rfmt = w_fmt;
@@ -1935,6 +1976,15 @@ copcsr:
if (!cpu_has_mips_2_3_4_5_r)
return SIGILL;
+ if (MIPSInst_FUNC(ir) == fceil_op)
+ MIPS_FPU_EMU_INC_STATS(ceil_w_s);
+ if (MIPSInst_FUNC(ir) == ffloor_op)
+ MIPS_FPU_EMU_INC_STATS(floor_w_s);
+ if (MIPSInst_FUNC(ir) == fround_op)
+ MIPS_FPU_EMU_INC_STATS(round_w_s);
+ if (MIPSInst_FUNC(ir) == ftrunc_op)
+ MIPS_FPU_EMU_INC_STATS(trunc_w_s);
+
oldrm = ieee754_csr.rm;
SPFROMREG(fs, MIPSInst_FS(ir));
ieee754_csr.rm = MIPSInst_FUNC(ir);
@@ -1943,10 +1993,23 @@ copcsr:
rfmt = w_fmt;
goto copcsr;
+ case fsel_op:
+ if (!cpu_has_mips_r6)
+ return SIGILL;
+
+ MIPS_FPU_EMU_INC_STATS(sel_s);
+ SPFROMREG(fd, MIPSInst_FD(ir));
+ if (fd.bits & 0x1)
+ SPFROMREG(rv.s, MIPSInst_FT(ir));
+ else
+ SPFROMREG(rv.s, MIPSInst_FS(ir));
+ break;
+
case fcvtl_op:
if (!cpu_has_mips_3_4_5_64_r2_r6)
return SIGILL;
+ MIPS_FPU_EMU_INC_STATS(cvt_l_s);
SPFROMREG(fs, MIPSInst_FS(ir));
rv.l = ieee754sp_tlong(fs);
rfmt = l_fmt;
@@ -1959,6 +2022,15 @@ copcsr:
if (!cpu_has_mips_3_4_5_64_r2_r6)
return SIGILL;
+ if (MIPSInst_FUNC(ir) == fceill_op)
+ MIPS_FPU_EMU_INC_STATS(ceil_l_s);
+ if (MIPSInst_FUNC(ir) == ffloorl_op)
+ MIPS_FPU_EMU_INC_STATS(floor_l_s);
+ if (MIPSInst_FUNC(ir) == froundl_op)
+ MIPS_FPU_EMU_INC_STATS(round_l_s);
+ if (MIPSInst_FUNC(ir) == ftruncl_op)
+ MIPS_FPU_EMU_INC_STATS(trunc_l_s);
+
oldrm = ieee754_csr.rm;
SPFROMREG(fs, MIPSInst_FS(ir));
ieee754_csr.rm = MIPSInst_FUNC(ir);
@@ -1969,9 +2041,10 @@ copcsr:
default:
if (!NO_R6EMU && MIPSInst_FUNC(ir) >= fcmp_op) {
- unsigned cmpop = MIPSInst_FUNC(ir) - fcmp_op;
+ unsigned int cmpop;
union ieee754sp fs, ft;
+ cmpop = MIPSInst_FUNC(ir) - fcmp_op;
SPFROMREG(fs, MIPSInst_FS(ir));
SPFROMREG(ft, MIPSInst_FT(ir));
rv.w = ieee754sp_cmp(fs, ft,
@@ -1991,7 +2064,7 @@ copcsr:
}
case d_fmt: {
- union ieee754dp fs, ft;
+ union ieee754dp fd, fs, ft;
union {
union ieee754dp(*b) (union ieee754dp, union ieee754dp);
union ieee754dp(*u) (union ieee754dp);
@@ -2000,15 +2073,19 @@ copcsr:
switch (MIPSInst_FUNC(ir)) {
/* binary ops */
case fadd_op:
+ MIPS_FPU_EMU_INC_STATS(add_d);
handler.b = ieee754dp_add;
goto dcopbop;
case fsub_op:
+ MIPS_FPU_EMU_INC_STATS(sub_d);
handler.b = ieee754dp_sub;
goto dcopbop;
case fmul_op:
+ MIPS_FPU_EMU_INC_STATS(mul_d);
handler.b = ieee754dp_mul;
goto dcopbop;
case fdiv_op:
+ MIPS_FPU_EMU_INC_STATS(div_d);
handler.b = ieee754dp_div;
goto dcopbop;
@@ -2017,6 +2094,7 @@ copcsr:
if (!cpu_has_mips_2_3_4_5_r)
return SIGILL;
+ MIPS_FPU_EMU_INC_STATS(sqrt_d);
handler.u = ieee754dp_sqrt;
goto dcopuop;
/*
@@ -2028,12 +2106,14 @@ copcsr:
if (!cpu_has_mips_4_5_64_r2_r6)
return SIGILL;
+ MIPS_FPU_EMU_INC_STATS(rsqrt_d);
handler.u = fpemu_dp_rsqrt;
goto dcopuop;
case frecip_op:
if (!cpu_has_mips_4_5_64_r2_r6)
return SIGILL;
+ MIPS_FPU_EMU_INC_STATS(recip_d);
handler.u = fpemu_dp_recip;
goto dcopuop;
case fmovc_op:
@@ -2067,6 +2147,7 @@ copcsr:
if (!cpu_has_mips_r6)
return SIGILL;
+ MIPS_FPU_EMU_INC_STATS(seleqz_d);
DPFROMREG(rv.d, MIPSInst_FT(ir));
if (rv.l & 0x1)
rv.l = 0;
@@ -2078,6 +2159,7 @@ copcsr:
if (!cpu_has_mips_r6)
return SIGILL;
+ MIPS_FPU_EMU_INC_STATS(selnez_d);
DPFROMREG(rv.d, MIPSInst_FT(ir));
if (rv.l & 0x1)
DPFROMREG(rv.d, MIPSInst_FS(ir));
@@ -2091,6 +2173,7 @@ copcsr:
if (!cpu_has_mips_r6)
return SIGILL;
+ MIPS_FPU_EMU_INC_STATS(maddf_d);
DPFROMREG(ft, MIPSInst_FT(ir));
DPFROMREG(fs, MIPSInst_FS(ir));
DPFROMREG(fd, MIPSInst_FD(ir));
@@ -2104,6 +2187,7 @@ copcsr:
if (!cpu_has_mips_r6)
return SIGILL;
+ MIPS_FPU_EMU_INC_STATS(msubf_d);
DPFROMREG(ft, MIPSInst_FT(ir));
DPFROMREG(fs, MIPSInst_FS(ir));
DPFROMREG(fd, MIPSInst_FD(ir));
@@ -2117,9 +2201,9 @@ copcsr:
if (!cpu_has_mips_r6)
return SIGILL;
+ MIPS_FPU_EMU_INC_STATS(rint_d);
DPFROMREG(fs, MIPSInst_FS(ir));
- rv.l = ieee754dp_tlong(fs);
- rv.d = ieee754dp_flong(rv.l);
+ rv.d = ieee754dp_rint(fs);
goto copcsr;
}
@@ -2129,9 +2213,10 @@ copcsr:
if (!cpu_has_mips_r6)
return SIGILL;
+ MIPS_FPU_EMU_INC_STATS(class_d);
DPFROMREG(fs, MIPSInst_FS(ir));
- rv.w = ieee754dp_2008class(fs);
- rfmt = w_fmt;
+ rv.l = ieee754dp_2008class(fs);
+ rfmt = l_fmt;
goto copcsr;
}
@@ -2141,6 +2226,7 @@ copcsr:
if (!cpu_has_mips_r6)
return SIGILL;
+ MIPS_FPU_EMU_INC_STATS(min_d);
DPFROMREG(ft, MIPSInst_FT(ir));
DPFROMREG(fs, MIPSInst_FS(ir));
rv.d = ieee754dp_fmin(fs, ft);
@@ -2153,6 +2239,7 @@ copcsr:
if (!cpu_has_mips_r6)
return SIGILL;
+ MIPS_FPU_EMU_INC_STATS(mina_d);
DPFROMREG(ft, MIPSInst_FT(ir));
DPFROMREG(fs, MIPSInst_FS(ir));
rv.d = ieee754dp_fmina(fs, ft);
@@ -2165,6 +2252,7 @@ copcsr:
if (!cpu_has_mips_r6)
return SIGILL;
+ MIPS_FPU_EMU_INC_STATS(max_d);
DPFROMREG(ft, MIPSInst_FT(ir));
DPFROMREG(fs, MIPSInst_FS(ir));
rv.d = ieee754dp_fmax(fs, ft);
@@ -2177,6 +2265,7 @@ copcsr:
if (!cpu_has_mips_r6)
return SIGILL;
+ MIPS_FPU_EMU_INC_STATS(maxa_d);
DPFROMREG(ft, MIPSInst_FT(ir));
DPFROMREG(fs, MIPSInst_FS(ir));
rv.d = ieee754dp_fmaxa(fs, ft);
@@ -2184,15 +2273,18 @@ copcsr:
}
case fabs_op:
+ MIPS_FPU_EMU_INC_STATS(abs_d);
handler.u = ieee754dp_abs;
goto dcopuop;
case fneg_op:
+ MIPS_FPU_EMU_INC_STATS(neg_d);
handler.u = ieee754dp_neg;
goto dcopuop;
case fmov_op:
/* an easy one */
+ MIPS_FPU_EMU_INC_STATS(mov_d);
DPFROMREG(rv.d, MIPSInst_FS(ir));
goto copcsr;
@@ -2212,6 +2304,7 @@ dcopuop:
* unary conv ops
*/
case fcvts_op:
+ MIPS_FPU_EMU_INC_STATS(cvt_s_d);
DPFROMREG(fs, MIPSInst_FS(ir));
rv.s = ieee754sp_fdp(fs);
rfmt = s_fmt;
@@ -2221,6 +2314,7 @@ dcopuop:
return SIGILL; /* not defined */
case fcvtw_op:
+ MIPS_FPU_EMU_INC_STATS(cvt_w_d);
DPFROMREG(fs, MIPSInst_FS(ir));
rv.w = ieee754dp_tint(fs); /* wrong */
rfmt = w_fmt;
@@ -2233,6 +2327,15 @@ dcopuop:
if (!cpu_has_mips_2_3_4_5_r)
return SIGILL;
+ if (MIPSInst_FUNC(ir) == fceil_op)
+ MIPS_FPU_EMU_INC_STATS(ceil_w_d);
+ if (MIPSInst_FUNC(ir) == ffloor_op)
+ MIPS_FPU_EMU_INC_STATS(floor_w_d);
+ if (MIPSInst_FUNC(ir) == fround_op)
+ MIPS_FPU_EMU_INC_STATS(round_w_d);
+ if (MIPSInst_FUNC(ir) == ftrunc_op)
+ MIPS_FPU_EMU_INC_STATS(trunc_w_d);
+
oldrm = ieee754_csr.rm;
DPFROMREG(fs, MIPSInst_FS(ir));
ieee754_csr.rm = MIPSInst_FUNC(ir);
@@ -2241,10 +2344,23 @@ dcopuop:
rfmt = w_fmt;
goto copcsr;
+ case fsel_op:
+ if (!cpu_has_mips_r6)
+ return SIGILL;
+
+ MIPS_FPU_EMU_INC_STATS(sel_d);
+ DPFROMREG(fd, MIPSInst_FD(ir));
+ if (fd.bits & 0x1)
+ DPFROMREG(rv.d, MIPSInst_FT(ir));
+ else
+ DPFROMREG(rv.d, MIPSInst_FS(ir));
+ break;
+
case fcvtl_op:
if (!cpu_has_mips_3_4_5_64_r2_r6)
return SIGILL;
+ MIPS_FPU_EMU_INC_STATS(cvt_l_d);
DPFROMREG(fs, MIPSInst_FS(ir));
rv.l = ieee754dp_tlong(fs);
rfmt = l_fmt;
@@ -2257,6 +2373,15 @@ dcopuop:
if (!cpu_has_mips_3_4_5_64_r2_r6)
return SIGILL;
+ if (MIPSInst_FUNC(ir) == fceill_op)
+ MIPS_FPU_EMU_INC_STATS(ceil_l_d);
+ if (MIPSInst_FUNC(ir) == ffloorl_op)
+ MIPS_FPU_EMU_INC_STATS(floor_l_d);
+ if (MIPSInst_FUNC(ir) == froundl_op)
+ MIPS_FPU_EMU_INC_STATS(round_l_d);
+ if (MIPSInst_FUNC(ir) == ftruncl_op)
+ MIPS_FPU_EMU_INC_STATS(trunc_l_d);
+
oldrm = ieee754_csr.rm;
DPFROMREG(fs, MIPSInst_FS(ir));
ieee754_csr.rm = MIPSInst_FUNC(ir);
@@ -2267,9 +2392,10 @@ dcopuop:
default:
if (!NO_R6EMU && MIPSInst_FUNC(ir) >= fcmp_op) {
- unsigned cmpop = MIPSInst_FUNC(ir) - fcmp_op;
+ unsigned int cmpop;
union ieee754dp fs, ft;
+ cmpop = MIPSInst_FUNC(ir) - fcmp_op;
DPFROMREG(fs, MIPSInst_FS(ir));
DPFROMREG(ft, MIPSInst_FT(ir));
rv.w = ieee754dp_cmp(fs, ft,
@@ -2298,12 +2424,14 @@ dcopuop:
switch (MIPSInst_FUNC(ir)) {
case fcvts_op:
/* convert word to single precision real */
+ MIPS_FPU_EMU_INC_STATS(cvt_s_w);
SPFROMREG(fs, MIPSInst_FS(ir));
rv.s = ieee754sp_fint(fs.bits);
rfmt = s_fmt;
goto copcsr;
case fcvtd_op:
/* convert word to double precision real */
+ MIPS_FPU_EMU_INC_STATS(cvt_d_w);
SPFROMREG(fs, MIPSInst_FS(ir));
rv.d = ieee754dp_fint(fs.bits);
rfmt = d_fmt;
@@ -2323,6 +2451,90 @@ dcopuop:
(MIPSInst_FUNC(ir) & 0x20))
return SIGILL;
+ if (!sig) {
+ if (!(MIPSInst_FUNC(ir) & PREDICATE_BIT)) {
+ switch (cmpop) {
+ case 0:
+ MIPS_FPU_EMU_INC_STATS(cmp_af_s);
+ break;
+ case 1:
+ MIPS_FPU_EMU_INC_STATS(cmp_un_s);
+ break;
+ case 2:
+ MIPS_FPU_EMU_INC_STATS(cmp_eq_s);
+ break;
+ case 3:
+ MIPS_FPU_EMU_INC_STATS(cmp_ueq_s);
+ break;
+ case 4:
+ MIPS_FPU_EMU_INC_STATS(cmp_lt_s);
+ break;
+ case 5:
+ MIPS_FPU_EMU_INC_STATS(cmp_ult_s);
+ break;
+ case 6:
+ MIPS_FPU_EMU_INC_STATS(cmp_le_s);
+ break;
+ case 7:
+ MIPS_FPU_EMU_INC_STATS(cmp_ule_s);
+ break;
+ }
+ } else {
+ switch (cmpop) {
+ case 1:
+ MIPS_FPU_EMU_INC_STATS(cmp_or_s);
+ break;
+ case 2:
+ MIPS_FPU_EMU_INC_STATS(cmp_une_s);
+ break;
+ case 3:
+ MIPS_FPU_EMU_INC_STATS(cmp_ne_s);
+ break;
+ }
+ }
+ } else {
+ if (!(MIPSInst_FUNC(ir) & PREDICATE_BIT)) {
+ switch (cmpop) {
+ case 0:
+ MIPS_FPU_EMU_INC_STATS(cmp_saf_s);
+ break;
+ case 1:
+ MIPS_FPU_EMU_INC_STATS(cmp_sun_s);
+ break;
+ case 2:
+ MIPS_FPU_EMU_INC_STATS(cmp_seq_s);
+ break;
+ case 3:
+ MIPS_FPU_EMU_INC_STATS(cmp_sueq_s);
+ break;
+ case 4:
+ MIPS_FPU_EMU_INC_STATS(cmp_slt_s);
+ break;
+ case 5:
+ MIPS_FPU_EMU_INC_STATS(cmp_sult_s);
+ break;
+ case 6:
+ MIPS_FPU_EMU_INC_STATS(cmp_sle_s);
+ break;
+ case 7:
+ MIPS_FPU_EMU_INC_STATS(cmp_sule_s);
+ break;
+ }
+ } else {
+ switch (cmpop) {
+ case 1:
+ MIPS_FPU_EMU_INC_STATS(cmp_sor_s);
+ break;
+ case 2:
+ MIPS_FPU_EMU_INC_STATS(cmp_sune_s);
+ break;
+ case 3:
+ MIPS_FPU_EMU_INC_STATS(cmp_sne_s);
+ break;
+ }
+ }
+ }
+
/* fmt is w_fmt for single precision so fix it */
rfmt = s_fmt;
/* default to false */
@@ -2366,6 +2578,7 @@ dcopuop:
break;
}
}
+ break;
}
case l_fmt:
@@ -2378,11 +2591,13 @@ dcopuop:
switch (MIPSInst_FUNC(ir)) {
case fcvts_op:
/* convert long to single precision real */
+ MIPS_FPU_EMU_INC_STATS(cvt_s_l);
rv.s = ieee754sp_flong(bits);
rfmt = s_fmt;
goto copcsr;
case fcvtd_op:
/* convert long to double precision real */
+ MIPS_FPU_EMU_INC_STATS(cvt_d_l);
rv.d = ieee754dp_flong(bits);
rfmt = d_fmt;
goto copcsr;
@@ -2396,6 +2611,90 @@ dcopuop:
(MIPSInst_FUNC(ir) & 0x20))
return SIGILL;
+ if (!sig) {
+ if (!(MIPSInst_FUNC(ir) & PREDICATE_BIT)) {
+ switch (cmpop) {
+ case 0:
+ MIPS_FPU_EMU_INC_STATS(cmp_af_d);
+ break;
+ case 1:
+ MIPS_FPU_EMU_INC_STATS(cmp_un_d);
+ break;
+ case 2:
+ MIPS_FPU_EMU_INC_STATS(cmp_eq_d);
+ break;
+ case 3:
+ MIPS_FPU_EMU_INC_STATS(cmp_ueq_d);
+ break;
+ case 4:
+ MIPS_FPU_EMU_INC_STATS(cmp_lt_d);
+ break;
+ case 5:
+ MIPS_FPU_EMU_INC_STATS(cmp_ult_d);
+ break;
+ case 6:
+ MIPS_FPU_EMU_INC_STATS(cmp_le_d);
+ break;
+ case 7:
+ MIPS_FPU_EMU_INC_STATS(cmp_ule_d);
+ break;
+ }
+ } else {
+ switch (cmpop) {
+ case 1:
+ MIPS_FPU_EMU_INC_STATS(cmp_or_d);
+ break;
+ case 2:
+ MIPS_FPU_EMU_INC_STATS(cmp_une_d);
+ break;
+ case 3:
+ MIPS_FPU_EMU_INC_STATS(cmp_ne_d);
+ break;
+ }
+ }
+ } else {
+ if (!(MIPSInst_FUNC(ir) & PREDICATE_BIT)) {
+ switch (cmpop) {
+ case 0:
+ MIPS_FPU_EMU_INC_STATS(cmp_saf_d);
+ break;
+ case 1:
+ MIPS_FPU_EMU_INC_STATS(cmp_sun_d);
+ break;
+ case 2:
+ MIPS_FPU_EMU_INC_STATS(cmp_seq_d);
+ break;
+ case 3:
+ MIPS_FPU_EMU_INC_STATS(cmp_sueq_d);
+ break;
+ case 4:
+ MIPS_FPU_EMU_INC_STATS(cmp_slt_d);
+ break;
+ case 5:
+ MIPS_FPU_EMU_INC_STATS(cmp_sult_d);
+ break;
+ case 6:
+ MIPS_FPU_EMU_INC_STATS(cmp_sle_d);
+ break;
+ case 7:
+ MIPS_FPU_EMU_INC_STATS(cmp_sule_d);
+ break;
+ }
+ } else {
+ switch (cmpop) {
+ case 1:
+ MIPS_FPU_EMU_INC_STATS(cmp_sor_d);
+ break;
+ case 2:
+ MIPS_FPU_EMU_INC_STATS(cmp_sune_d);
+ break;
+ case 3:
+ MIPS_FPU_EMU_INC_STATS(cmp_sne_d);
+ break;
+ }
+ }
+ }
+
/* fmt is l_fmt for double precision so fix it */
rfmt = d_fmt;
/* default to false */
@@ -2439,6 +2738,8 @@ dcopuop:
break;
}
}
+ break;
+
default:
return SIGILL;
}
diff --git a/arch/mips/math-emu/dp_add.c b/arch/mips/math-emu/dp_add.c
index 8954ef031f84..678de20e4cb1 100644
--- a/arch/mips/math-emu/dp_add.c
+++ b/arch/mips/math-emu/dp_add.c
@@ -104,8 +104,7 @@ union ieee754dp ieee754dp_add(union ieee754dp x, union ieee754dp y)
case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_DNORM):
DPDNORMX;
-
- /* FALL THROUGH */
+ /* fall through */
case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_DNORM):
DPDNORMY;
diff --git a/arch/mips/math-emu/dp_div.c b/arch/mips/math-emu/dp_div.c
index f4746f7c5f63..3063ae3ab3b9 100644
--- a/arch/mips/math-emu/dp_div.c
+++ b/arch/mips/math-emu/dp_div.c
@@ -103,6 +103,7 @@ union ieee754dp ieee754dp_div(union ieee754dp x, union ieee754dp y)
case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_DNORM):
DPDNORMX;
+ /* fall through */
case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_DNORM):
DPDNORMY;
diff --git a/arch/mips/math-emu/dp_fmax.c b/arch/mips/math-emu/dp_fmax.c
index 5bec64f2884e..d1f984b40344 100644
--- a/arch/mips/math-emu/dp_fmax.c
+++ b/arch/mips/math-emu/dp_fmax.c
@@ -96,6 +96,7 @@ union ieee754dp ieee754dp_fmax(union ieee754dp x, union ieee754dp y)
case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_DNORM):
DPDNORMX;
+ /* fall through */
case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_DNORM):
DPDNORMY;
@@ -224,6 +225,7 @@ union ieee754dp ieee754dp_fmaxa(union ieee754dp x, union ieee754dp y)
case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_DNORM):
DPDNORMX;
+ /* fall through */
case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_DNORM):
DPDNORMY;
diff --git a/arch/mips/math-emu/dp_fmin.c b/arch/mips/math-emu/dp_fmin.c
index a287b23818d8..f98b96135c8d 100644
--- a/arch/mips/math-emu/dp_fmin.c
+++ b/arch/mips/math-emu/dp_fmin.c
@@ -96,6 +96,7 @@ union ieee754dp ieee754dp_fmin(union ieee754dp x, union ieee754dp y)
case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_DNORM):
DPDNORMX;
+ /* fall through */
case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_DNORM):
DPDNORMY;
@@ -224,6 +225,7 @@ union ieee754dp ieee754dp_fmina(union ieee754dp x, union ieee754dp y)
case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_DNORM):
DPDNORMX;
+ /* fall through */
case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_DNORM):
DPDNORMY;
diff --git a/arch/mips/math-emu/dp_maddf.c b/arch/mips/math-emu/dp_maddf.c
index 119eda9fa1ea..7ea2f8222026 100644
--- a/arch/mips/math-emu/dp_maddf.c
+++ b/arch/mips/math-emu/dp_maddf.c
@@ -14,83 +14,91 @@
#include "ieee754dp.h"
-union ieee754dp ieee754dp_maddf(union ieee754dp z, union ieee754dp x,
- union ieee754dp y)
+
+/* 128 bits shift right logical with rounding. */
+static void srl128(u64 *hptr, u64 *lptr, int count)
+{
+ u64 low;
+
+ if (count >= 128) {
+ *lptr = *hptr != 0 || *lptr != 0;
+ *hptr = 0;
+ } else if (count >= 64) {
+ if (count == 64) {
+ *lptr = *hptr | (*lptr != 0);
+ } else {
+ low = *lptr;
+ *lptr = *hptr >> (count - 64);
+ *lptr |= (*hptr << (128 - count)) != 0 || low != 0;
+ }
+ *hptr = 0;
+ } else {
+ low = *lptr;
+ *lptr = low >> count | *hptr << (64 - count);
+ *lptr |= (low << (64 - count)) != 0;
+ *hptr = *hptr >> count;
+ }
+}
+
+static union ieee754dp _dp_maddf(union ieee754dp z, union ieee754dp x,
+ union ieee754dp y, enum maddf_flags flags)
{
int re;
int rs;
- u64 rm;
- unsigned lxm;
- unsigned hxm;
- unsigned lym;
- unsigned hym;
+ unsigned int lxm;
+ unsigned int hxm;
+ unsigned int lym;
+ unsigned int hym;
u64 lrm;
u64 hrm;
+ u64 lzm;
+ u64 hzm;
u64 t;
u64 at;
int s;
COMPXDP;
COMPYDP;
-
- u64 zm; int ze; int zs __maybe_unused; int zc;
+ COMPZDP;
EXPLODEXDP;
EXPLODEYDP;
- EXPLODEDP(z, zc, zs, ze, zm)
+ EXPLODEZDP;
FLUSHXDP;
FLUSHYDP;
- FLUSHDP(z, zc, zs, ze, zm);
+ FLUSHZDP;
ieee754_clearcx();
- switch (zc) {
- case IEEE754_CLASS_SNAN:
- ieee754_setcx(IEEE754_INVALID_OPERATION);
+ /*
+ * Handle the cases when at least one of x, y or z is a NaN.
+ * Order of precedence is sNaN, qNaN and z, x, y.
+ */
+ if (zc == IEEE754_CLASS_SNAN)
return ieee754dp_nanxcpt(z);
- case IEEE754_CLASS_DNORM:
- DPDNORMx(zm, ze);
- /* QNAN is handled separately below */
- }
-
- switch (CLPAIR(xc, yc)) {
- case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_SNAN):
- case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_SNAN):
- case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_SNAN):
- case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_SNAN):
- case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_SNAN):
- return ieee754dp_nanxcpt(y);
-
- case CLPAIR(IEEE754_CLASS_SNAN, IEEE754_CLASS_SNAN):
- case CLPAIR(IEEE754_CLASS_SNAN, IEEE754_CLASS_QNAN):
- case CLPAIR(IEEE754_CLASS_SNAN, IEEE754_CLASS_ZERO):
- case CLPAIR(IEEE754_CLASS_SNAN, IEEE754_CLASS_NORM):
- case CLPAIR(IEEE754_CLASS_SNAN, IEEE754_CLASS_DNORM):
- case CLPAIR(IEEE754_CLASS_SNAN, IEEE754_CLASS_INF):
+ if (xc == IEEE754_CLASS_SNAN)
return ieee754dp_nanxcpt(x);
-
- case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_QNAN):
- case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_QNAN):
- case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_QNAN):
- case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_QNAN):
+ if (yc == IEEE754_CLASS_SNAN)
+ return ieee754dp_nanxcpt(y);
+ if (zc == IEEE754_CLASS_QNAN)
+ return z;
+ if (xc == IEEE754_CLASS_QNAN)
+ return x;
+ if (yc == IEEE754_CLASS_QNAN)
return y;
- case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_QNAN):
- case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_ZERO):
- case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_NORM):
- case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_DNORM):
- case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_INF):
- return x;
+ if (zc == IEEE754_CLASS_DNORM)
+ DPDNORMZ;
+ /* ZERO z cases are handled separately below */
+ switch (CLPAIR(xc, yc)) {
/*
* Infinity handling
*/
case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_ZERO):
case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_INF):
- if (zc == IEEE754_CLASS_QNAN)
- return z;
ieee754_setcx(IEEE754_INVALID_OPERATION);
return ieee754dp_indef();
@@ -99,9 +107,27 @@ union ieee754dp ieee754dp_maddf(union ieee754dp z, union ieee754dp x,
case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_NORM):
case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_DNORM):
case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_INF):
- if (zc == IEEE754_CLASS_QNAN)
- return z;
- return ieee754dp_inf(xs ^ ys);
+ if ((zc == IEEE754_CLASS_INF) &&
+ ((!(flags & MADDF_NEGATE_PRODUCT) && (zs != (xs ^ ys))) ||
+ ((flags & MADDF_NEGATE_PRODUCT) && (zs == (xs ^ ys))))) {
+ /*
+ * Cases of addition of infinities with opposite signs
+ * or subtraction of infinities with same signs.
+ */
+ ieee754_setcx(IEEE754_INVALID_OPERATION);
+ return ieee754dp_indef();
+ }
+ /*
+ * z is here either not an infinity, or an infinity having the
+ * same sign as product (x*y) (in case of MADDF.D instruction)
+ * or product -(x*y) (in MSUBF.D case). The result must be an
+ * infinity, and its sign is determined only by the value of
+ * (flags & MADDF_NEGATE_PRODUCT) and the signs of x and y.
+ */
+ if (flags & MADDF_NEGATE_PRODUCT)
+ return ieee754dp_inf(1 ^ (xs ^ ys));
+ else
+ return ieee754dp_inf(xs ^ ys);
case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_ZERO):
case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_NORM):
@@ -110,34 +136,45 @@ union ieee754dp ieee754dp_maddf(union ieee754dp z, union ieee754dp x,
case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_ZERO):
if (zc == IEEE754_CLASS_INF)
return ieee754dp_inf(zs);
- /* Multiplication is 0 so just return z */
+ if (zc == IEEE754_CLASS_ZERO) {
+ /* Handle cases +0 + (-0) and similar ones. */
+ if ((!(flags & MADDF_NEGATE_PRODUCT)
+ && (zs == (xs ^ ys))) ||
+ ((flags & MADDF_NEGATE_PRODUCT)
+ && (zs != (xs ^ ys))))
+ /*
+ * Cases of addition of zeros of equal signs
+ * or subtraction of zeroes of opposite signs.
+ * The sign of the resulting zero is in any
+ * such case determined only by the sign of z.
+ */
+ return z;
+
+ return ieee754dp_zero(ieee754_csr.rm == FPU_CSR_RD);
+ }
+ /* x*y is here 0, and z is not 0, so just return z */
return z;
case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_DNORM):
DPDNORMX;
+ /* fall through */
case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_DNORM):
- if (zc == IEEE754_CLASS_QNAN)
- return z;
- else if (zc == IEEE754_CLASS_INF)
+ if (zc == IEEE754_CLASS_INF)
return ieee754dp_inf(zs);
DPDNORMY;
break;
case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_NORM):
- if (zc == IEEE754_CLASS_QNAN)
- return z;
- else if (zc == IEEE754_CLASS_INF)
+ if (zc == IEEE754_CLASS_INF)
return ieee754dp_inf(zs);
DPDNORMX;
break;
case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_NORM):
- if (zc == IEEE754_CLASS_QNAN)
- return z;
- else if (zc == IEEE754_CLASS_INF)
+ if (zc == IEEE754_CLASS_INF)
return ieee754dp_inf(zs);
- /* fall through to real computations */
+ /* continue to real computations */
}
/* Finally get to do some computation */
@@ -154,18 +191,17 @@ union ieee754dp ieee754dp_maddf(union ieee754dp z, union ieee754dp x,
re = xe + ye;
rs = xs ^ ys;
+ if (flags & MADDF_NEGATE_PRODUCT)
+ rs ^= 1;
/* shunt to top of word */
xm <<= 64 - (DP_FBITS + 1);
ym <<= 64 - (DP_FBITS + 1);
/*
- * Multiply 32 bits xm, ym to give high 32 bits rm with stickness.
+ * Multiply 64 bits xm and ym to give 128 bits result in hrm:lrm.
*/
- /* 32 * 32 => 64 */
-#define DPXMULT(x, y) ((u64)(x) * (u64)y)
-
lxm = xm;
hxm = xm >> 32;
lym = ym;
@@ -190,76 +226,120 @@ union ieee754dp ieee754dp_maddf(union ieee754dp z, union ieee754dp x,
hrm = hrm + (t >> 32);
- rm = hrm | (lrm != 0);
-
- /*
- * Sticky shift down to normal rounding precision.
- */
- if ((s64) rm < 0) {
- rm = (rm >> (64 - (DP_FBITS + 1 + 3))) |
- ((rm << (DP_FBITS + 1 + 3)) != 0);
- re++;
- } else {
- rm = (rm >> (64 - (DP_FBITS + 1 + 3 + 1))) |
- ((rm << (DP_FBITS + 1 + 3 + 1)) != 0);
+ /* Put explicit bit at bit 126 if necessary */
+ if ((int64_t)hrm < 0) {
+ lrm = (hrm << 63) | (lrm >> 1);
+ hrm = hrm >> 1;
+ re++;
}
- assert(rm & (DP_HIDDEN_BIT << 3));
- /* And now the addition */
- assert(zm & DP_HIDDEN_BIT);
+ assert(hrm & (1 << 62));
- /*
- * Provide guard,round and stick bit space.
- */
- zm <<= 3;
+ if (zc == IEEE754_CLASS_ZERO) {
+ /*
+ * Move explicit bit from bit 126 to bit 55 since the
+ * ieee754dp_format code expects the mantissa to be
+ * 56 bits wide (53 + 3 rounding bits).
+ */
+ srl128(&hrm, &lrm, (126 - 55));
+ return ieee754dp_format(rs, re, lrm);
+ }
+
+ /* Move explicit bit from bit 52 to bit 126 */
+ lzm = 0;
+ hzm = zm << 10;
+ assert(hzm & (1 << 62));
+ /* Make the exponents the same */
if (ze > re) {
/*
* Have to shift y fraction right to align.
*/
s = ze - re;
- rm = XDPSRS(rm, s);
+ srl128(&hrm, &lrm, s);
re += s;
} else if (re > ze) {
/*
* Have to shift x fraction right to align.
*/
s = re - ze;
- zm = XDPSRS(zm, s);
+ srl128(&hzm, &lzm, s);
ze += s;
}
assert(ze == re);
assert(ze <= DP_EMAX);
+ /* Do the addition */
if (zs == rs) {
/*
- * Generate 28 bit result of adding two 27 bit numbers
- * leaving result in xm, xs and xe.
+ * Generate 128 bit result by adding two 127 bit numbers
+ * leaving result in hzm:lzm, zs and ze.
*/
- zm = zm + rm;
-
- if (zm >> (DP_FBITS + 1 + 3)) { /* carry out */
- zm = XDPSRS1(zm);
+ hzm = hzm + hrm + (lzm > (lzm + lrm));
+ lzm = lzm + lrm;
+ if ((int64_t)hzm < 0) { /* carry out */
+ srl128(&hzm, &lzm, 1);
ze++;
}
} else {
- if (zm >= rm) {
- zm = zm - rm;
+ if (hzm > hrm || (hzm == hrm && lzm >= lrm)) {
+ hzm = hzm - hrm - (lzm < lrm);
+ lzm = lzm - lrm;
} else {
- zm = rm - zm;
+ hzm = hrm - hzm - (lrm < lzm);
+ lzm = lrm - lzm;
zs = rs;
}
- if (zm == 0)
+ if (lzm == 0 && hzm == 0)
return ieee754dp_zero(ieee754_csr.rm == FPU_CSR_RD);
/*
- * Normalize to rounding precision.
+ * Put explicit bit at bit 126 if necessary.
*/
- while ((zm >> (DP_FBITS + 3)) == 0) {
- zm <<= 1;
- ze--;
+ if (hzm == 0) {
+ /* left shift by 63 or 64 bits */
+ if ((int64_t)lzm < 0) {
+ /* MSB of lzm is the explicit bit */
+ hzm = lzm >> 1;
+ lzm = lzm << 63;
+ ze -= 63;
+ } else {
+ hzm = lzm;
+ lzm = 0;
+ ze -= 64;
+ }
+ }
+
+ t = 0;
+ while ((hzm >> (62 - t)) == 0)
+ t++;
+
+ assert(t <= 62);
+ if (t) {
+ hzm = hzm << t | lzm >> (64 - t);
+ lzm = lzm << t;
+ ze -= t;
}
}
- return ieee754dp_format(zs, ze, zm);
+ /*
+ * Move explicit bit from bit 126 to bit 55 since the
+ * ieee754dp_format code expects the mantissa to be
+ * 56 bits wide (53 + 3 rounding bits).
+ */
+ srl128(&hzm, &lzm, (126 - 55));
+
+ return ieee754dp_format(zs, ze, lzm);
+}
+
+union ieee754dp ieee754dp_maddf(union ieee754dp z, union ieee754dp x,
+ union ieee754dp y)
+{
+ return _dp_maddf(z, x, y, 0);
+}
+
+union ieee754dp ieee754dp_msubf(union ieee754dp z, union ieee754dp x,
+ union ieee754dp y)
+{
+ return _dp_maddf(z, x, y, MADDF_NEGATE_PRODUCT);
}
diff --git a/arch/mips/math-emu/dp_msubf.c b/arch/mips/math-emu/dp_msubf.c
deleted file mode 100644
index 12241262f856..000000000000
--- a/arch/mips/math-emu/dp_msubf.c
+++ /dev/null
@@ -1,269 +0,0 @@
-/*
- * IEEE754 floating point arithmetic
- * double precision: MSUB.f (Fused Multiply Subtract)
- * MSUBF.fmt: FPR[fd] = FPR[fd] - (FPR[fs] x FPR[ft])
- *
- * MIPS floating point support
- * Copyright (C) 2015 Imagination Technologies, Ltd.
- * Author: Markos Chandras <markos.chandras@imgtec.com>
- *
- * This program is free software; you can distribute it and/or modify it
- * under the terms of the GNU General Public License as published by the
- * Free Software Foundation; version 2 of the License.
- */
-
-#include "ieee754dp.h"
-
-union ieee754dp ieee754dp_msubf(union ieee754dp z, union ieee754dp x,
- union ieee754dp y)
-{
- int re;
- int rs;
- u64 rm;
- unsigned lxm;
- unsigned hxm;
- unsigned lym;
- unsigned hym;
- u64 lrm;
- u64 hrm;
- u64 t;
- u64 at;
- int s;
-
- COMPXDP;
- COMPYDP;
-
- u64 zm; int ze; int zs __maybe_unused; int zc;
-
- EXPLODEXDP;
- EXPLODEYDP;
- EXPLODEDP(z, zc, zs, ze, zm)
-
- FLUSHXDP;
- FLUSHYDP;
- FLUSHDP(z, zc, zs, ze, zm);
-
- ieee754_clearcx();
-
- switch (zc) {
- case IEEE754_CLASS_SNAN:
- ieee754_setcx(IEEE754_INVALID_OPERATION);
- return ieee754dp_nanxcpt(z);
- case IEEE754_CLASS_DNORM:
- DPDNORMx(zm, ze);
- /* QNAN is handled separately below */
- }
-
- switch (CLPAIR(xc, yc)) {
- case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_SNAN):
- case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_SNAN):
- case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_SNAN):
- case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_SNAN):
- case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_SNAN):
- return ieee754dp_nanxcpt(y);
-
- case CLPAIR(IEEE754_CLASS_SNAN, IEEE754_CLASS_SNAN):
- case CLPAIR(IEEE754_CLASS_SNAN, IEEE754_CLASS_QNAN):
- case CLPAIR(IEEE754_CLASS_SNAN, IEEE754_CLASS_ZERO):
- case CLPAIR(IEEE754_CLASS_SNAN, IEEE754_CLASS_NORM):
- case CLPAIR(IEEE754_CLASS_SNAN, IEEE754_CLASS_DNORM):
- case CLPAIR(IEEE754_CLASS_SNAN, IEEE754_CLASS_INF):
- return ieee754dp_nanxcpt(x);
-
- case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_QNAN):
- case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_QNAN):
- case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_QNAN):
- case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_QNAN):
- return y;
-
- case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_QNAN):
- case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_ZERO):
- case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_NORM):
- case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_DNORM):
- case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_INF):
- return x;
-
-
- /*
- * Infinity handling
- */
- case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_ZERO):
- case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_INF):
- if (zc == IEEE754_CLASS_QNAN)
- return z;
- ieee754_setcx(IEEE754_INVALID_OPERATION);
- return ieee754dp_indef();
-
- case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_INF):
- case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_INF):
- case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_NORM):
- case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_DNORM):
- case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_INF):
- if (zc == IEEE754_CLASS_QNAN)
- return z;
- return ieee754dp_inf(xs ^ ys);
-
- case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_ZERO):
- case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_NORM):
- case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_DNORM):
- case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_ZERO):
- case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_ZERO):
- if (zc == IEEE754_CLASS_INF)
- return ieee754dp_inf(zs);
- /* Multiplication is 0 so just return z */
- return z;
-
- case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_DNORM):
- DPDNORMX;
-
- case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_DNORM):
- if (zc == IEEE754_CLASS_QNAN)
- return z;
- else if (zc == IEEE754_CLASS_INF)
- return ieee754dp_inf(zs);
- DPDNORMY;
- break;
-
- case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_NORM):
- if (zc == IEEE754_CLASS_QNAN)
- return z;
- else if (zc == IEEE754_CLASS_INF)
- return ieee754dp_inf(zs);
- DPDNORMX;
- break;
-
- case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_NORM):
- if (zc == IEEE754_CLASS_QNAN)
- return z;
- else if (zc == IEEE754_CLASS_INF)
- return ieee754dp_inf(zs);
- /* fall through to real computations */
- }
-
- /* Finally get to do some computation */
-
- /*
- * Do the multiplication bit first
- *
- * rm = xm * ym, re = xe + ye basically
- *
- * At this point xm and ym should have been normalized.
- */
- assert(xm & DP_HIDDEN_BIT);
- assert(ym & DP_HIDDEN_BIT);
-
- re = xe + ye;
- rs = xs ^ ys;
-
- /* shunt to top of word */
- xm <<= 64 - (DP_FBITS + 1);
- ym <<= 64 - (DP_FBITS + 1);
-
- /*
- * Multiply 32 bits xm, ym to give high 32 bits rm with stickness.
- */
-
- /* 32 * 32 => 64 */
-#define DPXMULT(x, y) ((u64)(x) * (u64)y)
-
- lxm = xm;
- hxm = xm >> 32;
- lym = ym;
- hym = ym >> 32;
-
- lrm = DPXMULT(lxm, lym);
- hrm = DPXMULT(hxm, hym);
-
- t = DPXMULT(lxm, hym);
-
- at = lrm + (t << 32);
- hrm += at < lrm;
- lrm = at;
-
- hrm = hrm + (t >> 32);
-
- t = DPXMULT(hxm, lym);
-
- at = lrm + (t << 32);
- hrm += at < lrm;
- lrm = at;
-
- hrm = hrm + (t >> 32);
-
- rm = hrm | (lrm != 0);
-
- /*
- * Sticky shift down to normal rounding precision.
- */
- if ((s64) rm < 0) {
- rm = (rm >> (64 - (DP_FBITS + 1 + 3))) |
- ((rm << (DP_FBITS + 1 + 3)) != 0);
- re++;
- } else {
- rm = (rm >> (64 - (DP_FBITS + 1 + 3 + 1))) |
- ((rm << (DP_FBITS + 1 + 3 + 1)) != 0);
- }
- assert(rm & (DP_HIDDEN_BIT << 3));
-
- /* And now the subtraction */
-
- /* flip sign of r and handle as add */
- rs ^= 1;
-
- assert(zm & DP_HIDDEN_BIT);
-
- /*
- * Provide guard,round and stick bit space.
- */
- zm <<= 3;
-
- if (ze > re) {
- /*
- * Have to shift y fraction right to align.
- */
- s = ze - re;
- rm = XDPSRS(rm, s);
- re += s;
- } else if (re > ze) {
- /*
- * Have to shift x fraction right to align.
- */
- s = re - ze;
- zm = XDPSRS(zm, s);
- ze += s;
- }
- assert(ze == re);
- assert(ze <= DP_EMAX);
-
- if (zs == rs) {
- /*
- * Generate 28 bit result of adding two 27 bit numbers
- * leaving result in xm, xs and xe.
- */
- zm = zm + rm;
-
- if (zm >> (DP_FBITS + 1 + 3)) { /* carry out */
- zm = XDPSRS1(zm);
- ze++;
- }
- } else {
- if (zm >= rm) {
- zm = zm - rm;
- } else {
- zm = rm - zm;
- zs = rs;
- }
- if (zm == 0)
- return ieee754dp_zero(ieee754_csr.rm == FPU_CSR_RD);
-
- /*
- * Normalize to rounding precision.
- */
- while ((zm >> (DP_FBITS + 3)) == 0) {
- zm <<= 1;
- ze--;
- }
- }
-
- return ieee754dp_format(zs, ze, zm);
-}
diff --git a/arch/mips/math-emu/dp_mul.c b/arch/mips/math-emu/dp_mul.c
index d0901f03fa19..c34a6cdf1b25 100644
--- a/arch/mips/math-emu/dp_mul.c
+++ b/arch/mips/math-emu/dp_mul.c
@@ -26,10 +26,10 @@ union ieee754dp ieee754dp_mul(union ieee754dp x, union ieee754dp y)
int re;
int rs;
u64 rm;
- unsigned lxm;
- unsigned hxm;
- unsigned lym;
- unsigned hym;
+ unsigned int lxm;
+ unsigned int hxm;
+ unsigned int lym;
+ unsigned int hym;
u64 lrm;
u64 hrm;
u64 t;
@@ -101,6 +101,7 @@ union ieee754dp ieee754dp_mul(union ieee754dp x, union ieee754dp y)
case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_DNORM):
DPDNORMX;
+ /* fall through */
case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_DNORM):
DPDNORMY;
@@ -125,12 +126,9 @@ union ieee754dp ieee754dp_mul(union ieee754dp x, union ieee754dp y)
ym <<= 64 - (DP_FBITS + 1);
/*
- * Multiply 32 bits xm, ym to give high 32 bits rm with stickness.
+ * Multiply 64 bits xm, ym to give high 64 bits rm with stickness.
*/
- /* 32 * 32 => 64 */
-#define DPXMULT(x, y) ((u64)(x) * (u64)y)
-
lxm = xm;
hxm = xm >> 32;
lym = ym;
@@ -163,7 +161,7 @@ union ieee754dp ieee754dp_mul(union ieee754dp x, union ieee754dp y)
if ((s64) rm < 0) {
rm = (rm >> (64 - (DP_FBITS + 1 + 3))) |
((rm << (DP_FBITS + 1 + 3)) != 0);
- re++;
+ re++;
} else {
rm = (rm >> (64 - (DP_FBITS + 1 + 3 + 1))) |
((rm << (DP_FBITS + 1 + 3 + 1)) != 0);
diff --git a/arch/mips/math-emu/dp_rint.c b/arch/mips/math-emu/dp_rint.c
new file mode 100644
index 000000000000..c3b9077ff357
--- /dev/null
+++ b/arch/mips/math-emu/dp_rint.c
@@ -0,0 +1,89 @@
+/* IEEE754 floating point arithmetic
+ * double precision: common utilities
+ */
+/*
+ * MIPS floating point support
+ * Copyright (C) 1994-2000 Algorithmics Ltd.
+ * Copyright (C) 2017 Imagination Technologies, Ltd.
+ * Author: Aleksandar Markovic <aleksandar.markovic@imgtec.com>
+ *
+ * This program is free software; you can distribute it and/or modify it
+ * under the terms of the GNU General Public License (Version 2) as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program.
+ */
+
+#include "ieee754dp.h"
+
+union ieee754dp ieee754dp_rint(union ieee754dp x)
+{
+ union ieee754dp ret;
+ u64 residue;
+ int sticky;
+ int round;
+ int odd;
+
+ COMPXDP;
+
+ ieee754_clearcx();
+
+ EXPLODEXDP;
+ FLUSHXDP;
+
+ if (xc == IEEE754_CLASS_SNAN)
+ return ieee754dp_nanxcpt(x);
+
+ if ((xc == IEEE754_CLASS_QNAN) ||
+ (xc == IEEE754_CLASS_INF) ||
+ (xc == IEEE754_CLASS_ZERO))
+ return x;
+
+ if (xe >= DP_FBITS)
+ return x;
+
+ if (xe < -1) {
+ residue = xm;
+ round = 0;
+ sticky = residue != 0;
+ xm = 0;
+ } else {
+ residue = xm << (64 - DP_FBITS + xe);
+ round = (residue >> 63) != 0;
+ sticky = (residue << 1) != 0;
+ xm >>= DP_FBITS - xe;
+ }
+
+ odd = (xm & 0x1) != 0x0;
+
+ switch (ieee754_csr.rm) {
+ case FPU_CSR_RN: /* toward nearest */
+ if (round && (sticky || odd))
+ xm++;
+ break;
+ case FPU_CSR_RZ: /* toward zero */
+ break;
+ case FPU_CSR_RU: /* toward +infinity */
+ if ((round || sticky) && !xs)
+ xm++;
+ break;
+ case FPU_CSR_RD: /* toward -infinity */
+ if ((round || sticky) && xs)
+ xm++;
+ break;
+ }
+
+ if (round || sticky)
+ ieee754_setcx(IEEE754_INEXACT);
+
+ ret = ieee754dp_flong(xm);
+ DPSIGN(ret) = xs;
+
+ return ret;
+}
diff --git a/arch/mips/math-emu/dp_simple.c b/arch/mips/math-emu/dp_simple.c
index 926d56bf37f2..eb96485ed939 100644
--- a/arch/mips/math-emu/dp_simple.c
+++ b/arch/mips/math-emu/dp_simple.c
@@ -23,27 +23,39 @@
union ieee754dp ieee754dp_neg(union ieee754dp x)
{
- unsigned int oldrm;
union ieee754dp y;
- oldrm = ieee754_csr.rm;
- ieee754_csr.rm = FPU_CSR_RD;
- y = ieee754dp_sub(ieee754dp_zero(0), x);
- ieee754_csr.rm = oldrm;
+ if (ieee754_csr.abs2008) {
+ y = x;
+ DPSIGN(y) = !DPSIGN(x);
+ } else {
+ unsigned int oldrm;
+
+ oldrm = ieee754_csr.rm;
+ ieee754_csr.rm = FPU_CSR_RD;
+ y = ieee754dp_sub(ieee754dp_zero(0), x);
+ ieee754_csr.rm = oldrm;
+ }
return y;
}
union ieee754dp ieee754dp_abs(union ieee754dp x)
{
- unsigned int oldrm;
union ieee754dp y;
- oldrm = ieee754_csr.rm;
- ieee754_csr.rm = FPU_CSR_RD;
- if (DPSIGN(x))
- y = ieee754dp_sub(ieee754dp_zero(0), x);
- else
- y = ieee754dp_add(ieee754dp_zero(0), x);
- ieee754_csr.rm = oldrm;
+ if (ieee754_csr.abs2008) {
+ y = x;
+ DPSIGN(y) = 0;
+ } else {
+ unsigned int oldrm;
+
+ oldrm = ieee754_csr.rm;
+ ieee754_csr.rm = FPU_CSR_RD;
+ if (DPSIGN(x))
+ y = ieee754dp_sub(ieee754dp_zero(0), x);
+ else
+ y = ieee754dp_add(ieee754dp_zero(0), x);
+ ieee754_csr.rm = oldrm;
+ }
return y;
}
diff --git a/arch/mips/math-emu/dp_sqrt.c b/arch/mips/math-emu/dp_sqrt.c
index cd5bc083001e..1d26c92e5295 100644
--- a/arch/mips/math-emu/dp_sqrt.c
+++ b/arch/mips/math-emu/dp_sqrt.c
@@ -21,7 +21,7 @@
#include "ieee754dp.h"
-static const unsigned table[] = {
+static const unsigned int table[] = {
0, 1204, 3062, 5746, 9193, 13348, 18162, 23592,
29598, 36145, 43202, 50740, 58733, 67158, 75992,
85215, 83599, 71378, 60428, 50647, 41945, 34246,
@@ -33,7 +33,7 @@ union ieee754dp ieee754dp_sqrt(union ieee754dp x)
{
struct _ieee754_csr oldcsr;
union ieee754dp y, z, t;
- unsigned scalx, yh;
+ unsigned int scalx, yh;
COMPXDP;
EXPLODEXDP;
@@ -91,7 +91,8 @@ union ieee754dp ieee754dp_sqrt(union ieee754dp x)
scalx -= 256;
}
- y = x = builddp(0, xe + DP_EBIAS, xm & ~DP_HIDDEN_BIT);
+ x = builddp(0, xe + DP_EBIAS, xm & ~DP_HIDDEN_BIT);
+ y = x;
/* magic initial approximation to almost 8 sig. bits */
yh = y.bits >> 32;
@@ -108,7 +109,8 @@ union ieee754dp ieee754dp_sqrt(union ieee754dp x)
/* triple to almost 56 sig. bits: y ~= sqrt(x) to within 1 ulp */
/* t=y*y; z=t; pt[n0]+=0x00100000; t+=z; z=(x-z)*y; */
- z = t = ieee754dp_mul(y, y);
+ t = ieee754dp_mul(y, y);
+ z = t;
t.bexp += 0x001;
t = ieee754dp_add(t, z);
z = ieee754dp_mul(ieee754dp_sub(x, z), y);
@@ -140,7 +142,7 @@ union ieee754dp ieee754dp_sqrt(union ieee754dp x)
switch (oldcsr.rm) {
case FPU_CSR_RU:
y.bits += 1;
- /* drop through */
+ /* fall through */
case FPU_CSR_RN:
t.bits += 1;
break;
diff --git a/arch/mips/math-emu/dp_sub.c b/arch/mips/math-emu/dp_sub.c
index fc17a781b9ae..3cc48b86519b 100644
--- a/arch/mips/math-emu/dp_sub.c
+++ b/arch/mips/math-emu/dp_sub.c
@@ -106,7 +106,7 @@ union ieee754dp ieee754dp_sub(union ieee754dp x, union ieee754dp y)
case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_DNORM):
DPDNORMX;
- /* FALL THROUGH */
+ /* fall through */
case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_DNORM):
/* normalize ym,ye */
diff --git a/arch/mips/math-emu/dp_tint.c b/arch/mips/math-emu/dp_tint.c
index 6ffc336c530e..f3985617ce31 100644
--- a/arch/mips/math-emu/dp_tint.c
+++ b/arch/mips/math-emu/dp_tint.c
@@ -38,10 +38,13 @@ int ieee754dp_tint(union ieee754dp x)
switch (xc) {
case IEEE754_CLASS_SNAN:
case IEEE754_CLASS_QNAN:
- case IEEE754_CLASS_INF:
ieee754_setcx(IEEE754_INVALID_OPERATION);
return ieee754si_indef();
+ case IEEE754_CLASS_INF:
+ ieee754_setcx(IEEE754_INVALID_OPERATION);
+ return ieee754si_overflow(xs);
+
case IEEE754_CLASS_ZERO:
return 0;
@@ -53,7 +56,7 @@ int ieee754dp_tint(union ieee754dp x)
/* Set invalid. We will only use overflow for floating
point overflow */
ieee754_setcx(IEEE754_INVALID_OPERATION);
- return ieee754si_indef();
+ return ieee754si_overflow(xs);
}
/* oh gawd */
if (xe > DP_FBITS) {
@@ -93,7 +96,7 @@ int ieee754dp_tint(union ieee754dp x)
if ((xm >> 31) != 0 && (xs == 0 || xm != 0x80000000)) {
/* This can happen after rounding */
ieee754_setcx(IEEE754_INVALID_OPERATION);
- return ieee754si_indef();
+ return ieee754si_overflow(xs);
}
if (round || sticky)
ieee754_setcx(IEEE754_INEXACT);
diff --git a/arch/mips/math-emu/dp_tlong.c b/arch/mips/math-emu/dp_tlong.c
index 9cdc145b75e0..748fa10ed4cf 100644
--- a/arch/mips/math-emu/dp_tlong.c
+++ b/arch/mips/math-emu/dp_tlong.c
@@ -38,10 +38,13 @@ s64 ieee754dp_tlong(union ieee754dp x)
switch (xc) {
case IEEE754_CLASS_SNAN:
case IEEE754_CLASS_QNAN:
- case IEEE754_CLASS_INF:
ieee754_setcx(IEEE754_INVALID_OPERATION);
return ieee754di_indef();
+ case IEEE754_CLASS_INF:
+ ieee754_setcx(IEEE754_INVALID_OPERATION);
+ return ieee754di_overflow(xs);
+
case IEEE754_CLASS_ZERO:
return 0;
@@ -56,7 +59,7 @@ s64 ieee754dp_tlong(union ieee754dp x)
/* Set invalid. We will only use overflow for floating
point overflow */
ieee754_setcx(IEEE754_INVALID_OPERATION);
- return ieee754di_indef();
+ return ieee754di_overflow(xs);
}
/* oh gawd */
if (xe > DP_FBITS) {
@@ -97,7 +100,7 @@ s64 ieee754dp_tlong(union ieee754dp x)
if ((xm >> 63) != 0) {
/* This can happen after rounding */
ieee754_setcx(IEEE754_INVALID_OPERATION);
- return ieee754di_indef();
+ return ieee754di_overflow(xs);
}
if (round || sticky)
ieee754_setcx(IEEE754_INEXACT);
diff --git a/arch/mips/math-emu/dsemul.c b/arch/mips/math-emu/dsemul.c
index cbb36c14b155..4a094f7acb3d 100644
--- a/arch/mips/math-emu/dsemul.c
+++ b/arch/mips/math-emu/dsemul.c
@@ -1,3 +1,6 @@
+#include <linux/err.h>
+#include <linux/slab.h>
+
#include <asm/branch.h>
#include <asm/cacheflush.h>
#include <asm/fpu_emulator.h>
@@ -5,158 +8,296 @@
#include <asm/mipsregs.h>
#include <asm/uaccess.h>
-#include "ieee754.h"
-
-/*
- * Emulate the arbritrary instruction ir at xcp->cp0_epc. Required when
- * we have to emulate the instruction in a COP1 branch delay slot. Do
- * not change cp0_epc due to the instruction
+/**
+ * struct emuframe - The 'emulation' frame structure
+ * @emul: The instruction to 'emulate'.
+ * @badinst: A break instruction to cause a return to the kernel.
*
- * According to the spec:
- * 1) it shouldn't be a branch :-)
- * 2) it can be a COP instruction :-(
- * 3) if we are tring to run a protected memory space we must take
- * special care on memory access instructions :-(
- */
-
-/*
- * "Trampoline" return routine to catch exception following
- * execution of delay-slot instruction execution.
+ * This structure defines the frames placed within the delay slot emulation
+ * page in response to a call to mips_dsemul(). Each thread may be allocated
+ * only one frame at any given time. The kernel stores within it the
+ * instruction to be 'emulated' followed by a break instruction, then
+ * executes the frame in user mode. The break causes a trap to the kernel
+ * which leads to do_dsemulret() being called unless the instruction in
+ * @emul causes a trap itself, is a branch, or a signal is delivered to
+ * the thread. In these cases the allocated frame will either be reused by
+ * a subsequent delay slot 'emulation', or be freed during signal delivery or
+ * upon thread exit.
+ *
+ * This approach is used because:
+ *
+ * - Actually emulating all instructions isn't feasible. We would need to
+ * be able to handle instructions from all revisions of the MIPS ISA,
+ * all ASEs & all vendor instruction set extensions. This would be a
+ * whole lot of work & continual maintenance burden as new instructions
+ * are introduced, and in the case of some vendor extensions may not
+ * even be possible. Thus we need to take the approach of actually
+ * executing the instruction.
+ *
+ * - We must execute the instruction within user context. If we were to
+ * execute the instruction in kernel mode then it would have access to
+ * kernel resources without very careful checks, leaving us with a
+ * high potential for security or stability issues to arise.
+ *
+ * - We used to place the frame on the users stack, but this requires
+ * that the stack be executable. This is bad for security so the
+ * per-process page is now used instead.
+ *
+ * - The instruction in @emul may be something entirely invalid for a
+ * delay slot. The user may (intentionally or otherwise) place a branch
+ * in a delay slot, or a kernel mode instruction, or something else
+ * which generates an exception. Thus we can't rely upon the break in
+ * @badinst always being hit. For this reason we track the index of the
+ * frame allocated to each thread, allowing us to clean it up at later
+ * points such as signal delivery or thread exit.
+ *
+ * - The user may generate a fake struct emuframe if they wish, invoking
+ * the BRK_MEMU break instruction themselves. We must therefore not
+ * trust that BRK_MEMU means there's actually a valid frame allocated
+ * to the thread, and must not allow the user to do anything they
+ * couldn't already.
*/
-
struct emuframe {
mips_instruction emul;
mips_instruction badinst;
- mips_instruction cookie;
- unsigned long epc;
};
-int mips_dsemul(struct pt_regs *regs, mips_instruction ir, unsigned long cpc)
-{
- struct emuframe __user *fr;
- int err;
-
- if ((get_isa16_mode(regs->cp0_epc) && ((ir >> 16) == MM_NOP16)) ||
- (ir == 0)) {
- /* NOP is easy */
- regs->cp0_epc = cpc;
- clear_delay_slot(regs);
- return 0;
- }
+static const int emupage_frame_count = PAGE_SIZE / sizeof(struct emuframe);
- pr_debug("dsemul %lx %lx\n", regs->cp0_epc, cpc);
+static inline __user struct emuframe *dsemul_page(void)
+{
+ return (__user struct emuframe *)STACK_TOP;
+}
- /*
- * The strategy is to push the instruction onto the user stack
- * and put a trap after it which we can catch and jump to
- * the required address any alternative apart from full
- * instruction emulation!!.
- *
- * Algorithmics used a system call instruction, and
- * borrowed that vector. MIPS/Linux version is a bit
- * more heavyweight in the interests of portability and
- * multiprocessor support. For Linux we generate a
- * an unaligned access and force an address error exception.
- *
- * For embedded systems (stand-alone) we prefer to use a
- * non-existing CP1 instruction. This prevents us from emulating
- * branches, but gives us a cleaner interface to the exception
- * handler (single entry point).
- */
+static int alloc_emuframe(void)
+{
+ mm_context_t *mm_ctx = &current->mm->context;
+ int idx;
- /* Ensure that the two instructions are in the same cache line */
- fr = (struct emuframe __user *)
- ((regs->regs[29] - sizeof(struct emuframe)) & ~0x7);
+retry:
+ spin_lock(&mm_ctx->bd_emupage_lock);
- /* Verify that the stack pointer is not competely insane */
- if (unlikely(!access_ok(VERIFY_WRITE, fr, sizeof(struct emuframe))))
- return SIGBUS;
+ /* Ensure we have an allocation bitmap */
+ if (!mm_ctx->bd_emupage_allocmap) {
+ mm_ctx->bd_emupage_allocmap =
+ kcalloc(BITS_TO_LONGS(emupage_frame_count),
+ sizeof(unsigned long),
+ GFP_ATOMIC);
- if (get_isa16_mode(regs->cp0_epc)) {
- err = __put_user(ir >> 16, (u16 __user *)(&fr->emul));
- err |= __put_user(ir & 0xffff, (u16 __user *)((long)(&fr->emul) + 2));
- err |= __put_user(BREAK_MATH >> 16, (u16 __user *)(&fr->badinst));
- err |= __put_user(BREAK_MATH & 0xffff, (u16 __user *)((long)(&fr->badinst) + 2));
- } else {
- err = __put_user(ir, &fr->emul);
- err |= __put_user((mips_instruction)BREAK_MATH, &fr->badinst);
+ if (!mm_ctx->bd_emupage_allocmap) {
+ idx = BD_EMUFRAME_NONE;
+ goto out_unlock;
+ }
}
- err |= __put_user((mips_instruction)BD_COOKIE, &fr->cookie);
- err |= __put_user(cpc, &fr->epc);
+ /* Attempt to allocate a single bit/frame */
+ idx = bitmap_find_free_region(mm_ctx->bd_emupage_allocmap,
+ emupage_frame_count, 0);
+ if (idx < 0) {
+ /*
+ * Failed to allocate a frame. We'll wait until one becomes
+ * available. We unlock the page so that other threads actually
+ * get the opportunity to free their frames, which means
+ * technically the result of bitmap_full may be incorrect.
+ * However the worst case is that we repeat all this and end up
+ * back here again.
+ */
+ spin_unlock(&mm_ctx->bd_emupage_lock);
+ if (!wait_event_killable(mm_ctx->bd_emupage_queue,
+ !bitmap_full(mm_ctx->bd_emupage_allocmap,
+ emupage_frame_count)))
+ goto retry;
- if (unlikely(err)) {
- MIPS_FPU_EMU_INC_STATS(errors);
- return SIGBUS;
+ /* Received a fatal signal - just give in */
+ return BD_EMUFRAME_NONE;
}
- regs->cp0_epc = ((unsigned long) &fr->emul) |
- get_isa16_mode(regs->cp0_epc);
+ /* Success! */
+ pr_debug("allocate emuframe %d to %d\n", idx, current->pid);
+out_unlock:
+ spin_unlock(&mm_ctx->bd_emupage_lock);
+ return idx;
+}
- flush_cache_sigtramp((unsigned long)&fr->emul);
+static void free_emuframe(int idx, struct mm_struct *mm)
+{
+ mm_context_t *mm_ctx = &mm->context;
- return 0;
+ spin_lock(&mm_ctx->bd_emupage_lock);
+
+ pr_debug("free emuframe %d from %d\n", idx, current->pid);
+ bitmap_clear(mm_ctx->bd_emupage_allocmap, idx, 1);
+
+ /* If some thread is waiting for a frame, now's its chance */
+ wake_up(&mm_ctx->bd_emupage_queue);
+
+ spin_unlock(&mm_ctx->bd_emupage_lock);
+}
+
+static bool within_emuframe(struct pt_regs *regs)
+{
+ unsigned long base = (unsigned long)dsemul_page();
+
+ if (regs->cp0_epc < base)
+ return false;
+ if (regs->cp0_epc >= (base + PAGE_SIZE))
+ return false;
+
+ return true;
+}
+
+bool dsemul_thread_cleanup(struct task_struct *tsk)
+{
+ int fr_idx;
+
+ /* Clear any allocated frame, retrieving its index */
+ fr_idx = atomic_xchg(&tsk->thread.bd_emu_frame, BD_EMUFRAME_NONE);
+
+ /* If no frame was allocated, we're done */
+ if (fr_idx == BD_EMUFRAME_NONE)
+ return false;
+
+ task_lock(tsk);
+
+ /* Free the frame that this thread had allocated */
+ if (tsk->mm)
+ free_emuframe(fr_idx, tsk->mm);
+
+ task_unlock(tsk);
+ return true;
}
-int do_dsemulret(struct pt_regs *xcp)
+bool dsemul_thread_rollback(struct pt_regs *regs)
{
struct emuframe __user *fr;
- unsigned long epc;
- u32 insn, cookie;
- int err = 0;
- u16 instr[2];
+ int fr_idx;
- fr = (struct emuframe __user *)
- (msk_isa16_mode(xcp->cp0_epc) - sizeof(mips_instruction));
+ /* Do nothing if we're not executing from a frame */
+ if (!within_emuframe(regs))
+ return false;
- /*
- * If we can't even access the area, something is very wrong, but we'll
- * leave that to the default handling
- */
- if (!access_ok(VERIFY_READ, fr, sizeof(struct emuframe)))
- return 0;
+ /* Find the frame being executed */
+ fr_idx = atomic_read(&current->thread.bd_emu_frame);
+ if (fr_idx == BD_EMUFRAME_NONE)
+ return false;
+ fr = &dsemul_page()[fr_idx];
/*
- * Do some sanity checking on the stackframe:
- *
- * - Is the instruction pointed to by the EPC an BREAK_MATH?
- * - Is the following memory word the BD_COOKIE?
+ * If the PC is at the emul instruction, roll back to the branch. If
+ * PC is at the badinst (break) instruction, we've already emulated the
+ * instruction so progress to the continue PC. If it's anything else
+ * then something is amiss & the user has branched into some other area
+ * of the emupage - we'll free the allocated frame anyway.
*/
- if (get_isa16_mode(xcp->cp0_epc)) {
- err = __get_user(instr[0], (u16 __user *)(&fr->badinst));
- err |= __get_user(instr[1], (u16 __user *)((long)(&fr->badinst) + 2));
- insn = (instr[0] << 16) | instr[1];
+ if (msk_isa16_mode(regs->cp0_epc) == (unsigned long)&fr->emul)
+ regs->cp0_epc = current->thread.bd_emu_branch_pc;
+ else if (msk_isa16_mode(regs->cp0_epc) == (unsigned long)&fr->badinst)
+ regs->cp0_epc = current->thread.bd_emu_cont_pc;
+
+ atomic_set(&current->thread.bd_emu_frame, BD_EMUFRAME_NONE);
+ free_emuframe(fr_idx, current->mm);
+ return true;
+}
+
+void dsemul_mm_cleanup(struct mm_struct *mm)
+{
+ mm_context_t *mm_ctx = &mm->context;
+
+ kfree(mm_ctx->bd_emupage_allocmap);
+}
+
+int mips_dsemul(struct pt_regs *regs, mips_instruction ir,
+ unsigned long branch_pc, unsigned long cont_pc)
+{
+ int isa16 = get_isa16_mode(regs->cp0_epc);
+ mips_instruction break_math;
+ struct emuframe __user *fr;
+ int err, fr_idx;
+
+ /* NOP is easy */
+ if (ir == 0)
+ return -1;
+
+ /* microMIPS instructions */
+ if (isa16) {
+ union mips_instruction insn = { .word = ir };
+
+ /* NOP16 aka MOVE16 $0, $0 */
+ if ((ir >> 16) == MM_NOP16)
+ return -1;
+
+ /* ADDIUPC */
+ if (insn.mm_a_format.opcode == mm_addiupc_op) {
+ unsigned int rs;
+ s32 v;
+
+ rs = (((insn.mm_a_format.rs + 0xe) & 0xf) + 2);
+ v = regs->cp0_epc & ~3;
+ v += insn.mm_a_format.simmediate << 2;
+ regs->regs[rs] = (long)v;
+ return -1;
+ }
+ }
+
+ pr_debug("dsemul 0x%08lx cont at 0x%08lx\n", regs->cp0_epc, cont_pc);
+
+ /* Allocate a frame if we don't already have one */
+ fr_idx = atomic_read(&current->thread.bd_emu_frame);
+ if (fr_idx == BD_EMUFRAME_NONE)
+ fr_idx = alloc_emuframe();
+ if (fr_idx == BD_EMUFRAME_NONE)
+ return SIGBUS;
+ fr = &dsemul_page()[fr_idx];
+
+ /* Retrieve the appropriately encoded break instruction */
+ break_math = BREAK_MATH(isa16);
+
+ /* Write the instructions to the frame */
+ if (isa16) {
+ err = __put_user(ir >> 16,
+ (u16 __user *)(&fr->emul));
+ err |= __put_user(ir & 0xffff,
+ (u16 __user *)((long)(&fr->emul) + 2));
+ err |= __put_user(break_math >> 16,
+ (u16 __user *)(&fr->badinst));
+ err |= __put_user(break_math & 0xffff,
+ (u16 __user *)((long)(&fr->badinst) + 2));
} else {
- err = __get_user(insn, &fr->badinst);
+ err = __put_user(ir, &fr->emul);
+ err |= __put_user(break_math, &fr->badinst);
}
- err |= __get_user(cookie, &fr->cookie);
- if (unlikely(err || (insn != BREAK_MATH) || (cookie != BD_COOKIE))) {
+ if (unlikely(err)) {
MIPS_FPU_EMU_INC_STATS(errors);
- return 0;
+ free_emuframe(fr_idx, current->mm);
+ return SIGBUS;
}
- /*
- * At this point, we are satisfied that it's a BD emulation trap. Yes,
- * a user might have deliberately put two malformed and useless
- * instructions in a row in his program, in which case he's in for a
- * nasty surprise - the next instruction will be treated as a
- * continuation address! Alas, this seems to be the only way that we
- * can handle signals, recursion, and longjmps() in the context of
- * emulating the branch delay instruction.
- */
+ /* Record the PC of the branch, PC to continue from & frame index */
+ current->thread.bd_emu_branch_pc = branch_pc;
+ current->thread.bd_emu_cont_pc = cont_pc;
+ atomic_set(&current->thread.bd_emu_frame, fr_idx);
- pr_debug("dsemulret\n");
+ /* Change user register context to execute the frame */
+ regs->cp0_epc = (unsigned long)&fr->emul | isa16;
- if (__get_user(epc, &fr->epc)) { /* Saved EPC */
- /* This is not a good situation to be in */
- force_sig(SIGBUS, current);
+ /* Ensure the icache observes our newly written frame */
+ flush_cache_sigtramp((unsigned long)&fr->emul);
- return 0;
+ return 0;
+}
+
+bool do_dsemulret(struct pt_regs *xcp)
+{
+ /* Cleanup the allocated frame, returning if there wasn't one */
+ if (!dsemul_thread_cleanup(current)) {
+ MIPS_FPU_EMU_INC_STATS(errors);
+ return false;
}
/* Set EPC to return to post-branch instruction */
- xcp->cp0_epc = epc;
+ xcp->cp0_epc = current->thread.bd_emu_cont_pc;
+ pr_debug("dsemulret to 0x%08lx\n", xcp->cp0_epc);
MIPS_FPU_EMU_INC_STATS(ds_emul);
- return 1;
+ return true;
}
diff --git a/arch/mips/math-emu/ieee754.c b/arch/mips/math-emu/ieee754.c
index 8e97acbbe22c..e16ae7b75dbb 100644
--- a/arch/mips/math-emu/ieee754.c
+++ b/arch/mips/math-emu/ieee754.c
@@ -59,7 +59,8 @@ const union ieee754dp __ieee754dp_spcvals[] = {
DPCNST(1, 3, 0x4000000000000ULL), /* - 10.0 */
DPCNST(0, DP_EMAX + 1, 0x0000000000000ULL), /* + infinity */
DPCNST(1, DP_EMAX + 1, 0x0000000000000ULL), /* - infinity */
- DPCNST(0, DP_EMAX + 1, 0x7FFFFFFFFFFFFULL), /* + indef quiet Nan */
+ DPCNST(0, DP_EMAX + 1, 0x7FFFFFFFFFFFFULL), /* + ind legacy qNaN */
+ DPCNST(0, DP_EMAX + 1, 0x8000000000000ULL), /* + indef 2008 qNaN */
DPCNST(0, DP_EMAX, 0xFFFFFFFFFFFFFULL), /* + max */
DPCNST(1, DP_EMAX, 0xFFFFFFFFFFFFFULL), /* - max */
DPCNST(0, DP_EMIN, 0x0000000000000ULL), /* + min normal */
@@ -82,7 +83,8 @@ const union ieee754sp __ieee754sp_spcvals[] = {
SPCNST(1, 3, 0x200000), /* - 10.0 */
SPCNST(0, SP_EMAX + 1, 0x000000), /* + infinity */
SPCNST(1, SP_EMAX + 1, 0x000000), /* - infinity */
- SPCNST(0, SP_EMAX + 1, 0x3FFFFF), /* + indef quiet Nan */
+ SPCNST(0, SP_EMAX + 1, 0x3FFFFF), /* + indef legacy quiet NaN */
+ SPCNST(0, SP_EMAX + 1, 0x400000), /* + indef 2008 quiet NaN */
SPCNST(0, SP_EMAX, 0x7FFFFF), /* + max normal */
SPCNST(1, SP_EMAX, 0x7FFFFF), /* - max normal */
SPCNST(0, SP_EMIN, 0x000000), /* + min normal */
diff --git a/arch/mips/math-emu/ieee754.h b/arch/mips/math-emu/ieee754.h
index df94720714c7..e0eb7a965fdf 100644
--- a/arch/mips/math-emu/ieee754.h
+++ b/arch/mips/math-emu/ieee754.h
@@ -67,6 +67,7 @@ union ieee754sp ieee754sp_div(union ieee754sp x, union ieee754sp y);
union ieee754sp ieee754sp_fint(int x);
union ieee754sp ieee754sp_flong(s64 x);
union ieee754sp ieee754sp_fdp(union ieee754dp x);
+union ieee754sp ieee754sp_rint(union ieee754sp x);
int ieee754sp_tint(union ieee754sp x);
s64 ieee754sp_tlong(union ieee754sp x);
@@ -101,6 +102,7 @@ union ieee754dp ieee754dp_neg(union ieee754dp x);
union ieee754dp ieee754dp_fint(int x);
union ieee754dp ieee754dp_flong(s64 x);
union ieee754dp ieee754dp_fsp(union ieee754sp x);
+union ieee754dp ieee754dp_rint(union ieee754dp x);
int ieee754dp_tint(union ieee754dp x);
s64 ieee754dp_tlong(union ieee754dp x);
@@ -163,11 +165,12 @@ struct _ieee754_csr {
};
#define ieee754_csr (*(struct _ieee754_csr *)(&current->thread.fpu.fcr31))
-static inline unsigned ieee754_getrm(void)
+static inline unsigned int ieee754_getrm(void)
{
return (ieee754_csr.rm);
}
-static inline unsigned ieee754_setrm(unsigned rm)
+
+static inline unsigned int ieee754_setrm(unsigned int rm)
{
return (ieee754_csr.rm = rm);
}
@@ -175,14 +178,14 @@ static inline unsigned ieee754_setrm(unsigned rm)
/*
* get current exceptions
*/
-static inline unsigned ieee754_getcx(void)
+static inline unsigned int ieee754_getcx(void)
{
return (ieee754_csr.cx);
}
/* test for current exception condition
*/
-static inline int ieee754_cxtest(unsigned n)
+static inline int ieee754_cxtest(unsigned int n)
{
return (ieee754_csr.cx & n);
}
@@ -190,21 +193,21 @@ static inline int ieee754_cxtest(unsigned n)
/*
* get sticky exceptions
*/
-static inline unsigned ieee754_getsx(void)
+static inline unsigned int ieee754_getsx(void)
{
return (ieee754_csr.sx);
}
/* clear sticky conditions
*/
-static inline unsigned ieee754_clrsx(void)
+static inline unsigned int ieee754_clrsx(void)
{
return (ieee754_csr.sx = 0);
}
/* test for sticky exception condition
*/
-static inline int ieee754_sxtest(unsigned n)
+static inline int ieee754_sxtest(unsigned int n)
{
return (ieee754_csr.sx & n);
}
@@ -221,15 +224,16 @@ union ieee754dp ieee754dp_dump(char *s, union ieee754dp x);
#define IEEE754_SPCVAL_NTEN 5 /* -10.0 */
#define IEEE754_SPCVAL_PINFINITY 6 /* +inf */
#define IEEE754_SPCVAL_NINFINITY 7 /* -inf */
-#define IEEE754_SPCVAL_INDEF 8 /* quiet NaN */
-#define IEEE754_SPCVAL_PMAX 9 /* +max norm */
-#define IEEE754_SPCVAL_NMAX 10 /* -max norm */
-#define IEEE754_SPCVAL_PMIN 11 /* +min norm */
-#define IEEE754_SPCVAL_NMIN 12 /* -min norm */
-#define IEEE754_SPCVAL_PMIND 13 /* +min denorm */
-#define IEEE754_SPCVAL_NMIND 14 /* -min denorm */
-#define IEEE754_SPCVAL_P1E31 15 /* + 1.0e31 */
-#define IEEE754_SPCVAL_P1E63 16 /* + 1.0e63 */
+#define IEEE754_SPCVAL_INDEF_LEG 8 /* legacy quiet NaN */
+#define IEEE754_SPCVAL_INDEF_2008 9 /* IEEE 754-2008 quiet NaN */
+#define IEEE754_SPCVAL_PMAX 10 /* +max norm */
+#define IEEE754_SPCVAL_NMAX 11 /* -max norm */
+#define IEEE754_SPCVAL_PMIN 12 /* +min norm */
+#define IEEE754_SPCVAL_NMIN 13 /* -min norm */
+#define IEEE754_SPCVAL_PMIND 14 /* +min denorm */
+#define IEEE754_SPCVAL_NMIND 15 /* -min denorm */
+#define IEEE754_SPCVAL_P1E31 16 /* + 1.0e31 */
+#define IEEE754_SPCVAL_P1E63 17 /* + 1.0e63 */
extern const union ieee754dp __ieee754dp_spcvals[];
extern const union ieee754sp __ieee754sp_spcvals[];
@@ -243,7 +247,8 @@ extern const union ieee754sp __ieee754sp_spcvals[];
#define ieee754dp_zero(sn) (ieee754dp_spcvals[IEEE754_SPCVAL_PZERO+(sn)])
#define ieee754dp_one(sn) (ieee754dp_spcvals[IEEE754_SPCVAL_PONE+(sn)])
#define ieee754dp_ten(sn) (ieee754dp_spcvals[IEEE754_SPCVAL_PTEN+(sn)])
-#define ieee754dp_indef() (ieee754dp_spcvals[IEEE754_SPCVAL_INDEF])
+#define ieee754dp_indef() (ieee754dp_spcvals[IEEE754_SPCVAL_INDEF_LEG + \
+ ieee754_csr.nan2008])
#define ieee754dp_max(sn) (ieee754dp_spcvals[IEEE754_SPCVAL_PMAX+(sn)])
#define ieee754dp_min(sn) (ieee754dp_spcvals[IEEE754_SPCVAL_PMIN+(sn)])
#define ieee754dp_mind(sn) (ieee754dp_spcvals[IEEE754_SPCVAL_PMIND+(sn)])
@@ -254,7 +259,8 @@ extern const union ieee754sp __ieee754sp_spcvals[];
#define ieee754sp_zero(sn) (ieee754sp_spcvals[IEEE754_SPCVAL_PZERO+(sn)])
#define ieee754sp_one(sn) (ieee754sp_spcvals[IEEE754_SPCVAL_PONE+(sn)])
#define ieee754sp_ten(sn) (ieee754sp_spcvals[IEEE754_SPCVAL_PTEN+(sn)])
-#define ieee754sp_indef() (ieee754sp_spcvals[IEEE754_SPCVAL_INDEF])
+#define ieee754sp_indef() (ieee754sp_spcvals[IEEE754_SPCVAL_INDEF_LEG + \
+ ieee754_csr.nan2008])
#define ieee754sp_max(sn) (ieee754sp_spcvals[IEEE754_SPCVAL_PMAX+(sn)])
#define ieee754sp_min(sn) (ieee754sp_spcvals[IEEE754_SPCVAL_PMIN+(sn)])
#define ieee754sp_mind(sn) (ieee754sp_spcvals[IEEE754_SPCVAL_PMIND+(sn)])
@@ -266,12 +272,25 @@ extern const union ieee754sp __ieee754sp_spcvals[];
*/
static inline int ieee754si_indef(void)
{
- return INT_MAX;
+ return ieee754_csr.nan2008 ? 0 : INT_MAX;
}
static inline s64 ieee754di_indef(void)
{
- return S64_MAX;
+ return ieee754_csr.nan2008 ? 0 : S64_MAX;
+}
+
+/*
+ * Overflow integer value
+ */
+static inline int ieee754si_overflow(int xs)
+{
+ return ieee754_csr.nan2008 && xs ? INT_MIN : INT_MAX;
+}
+
+static inline s64 ieee754di_overflow(int xs)
+{
+ return ieee754_csr.nan2008 && xs ? S64_MIN : S64_MAX;
}
/* result types for xctx.rt */
diff --git a/arch/mips/math-emu/ieee754dp.c b/arch/mips/math-emu/ieee754dp.c
index 522d843f2ffd..465a0342ed4c 100644
--- a/arch/mips/math-emu/ieee754dp.c
+++ b/arch/mips/math-emu/ieee754dp.c
@@ -37,8 +37,11 @@ static inline int ieee754dp_isnan(union ieee754dp x)
static inline int ieee754dp_issnan(union ieee754dp x)
{
+ int qbit;
+
assert(ieee754dp_isnan(x));
- return (DPMANT(x) & DP_MBIT(DP_FBITS - 1)) == DP_MBIT(DP_FBITS - 1);
+ qbit = (DPMANT(x) & DP_MBIT(DP_FBITS - 1)) == DP_MBIT(DP_FBITS - 1);
+ return ieee754_csr.nan2008 ^ qbit;
}
@@ -51,7 +54,15 @@ union ieee754dp __cold ieee754dp_nanxcpt(union ieee754dp r)
assert(ieee754dp_issnan(r));
ieee754_setcx(IEEE754_INVALID_OPERATION);
- return ieee754dp_indef();
+ if (ieee754_csr.nan2008) {
+ DPMANT(r) |= DP_MBIT(DP_FBITS - 1);
+ } else {
+ DPMANT(r) &= ~DP_MBIT(DP_FBITS - 1);
+ if (!ieee754dp_isnan(r))
+ DPMANT(r) |= DP_MBIT(DP_FBITS - 2);
+ }
+
+ return r;
}
static u64 ieee754dp_get_rounding(int sn, u64 xm)
@@ -89,7 +100,7 @@ union ieee754dp ieee754dp_format(int sn, int xe, u64 xm)
{
assert(xm); /* we don't gen exact zeros (probably should) */
- assert((xm >> (DP_FBITS + 1 + 3)) == 0); /* no execess */
+ assert((xm >> (DP_FBITS + 1 + 3)) == 0); /* no excess */
assert(xm & (DP_HIDDEN_BIT << 3));
if (xe < DP_EMIN) {
@@ -157,7 +168,7 @@ union ieee754dp ieee754dp_format(int sn, int xe, u64 xm)
/* strip grs bits */
xm >>= 3;
- assert((xm >> (DP_FBITS + 1)) == 0); /* no execess */
+ assert((xm >> (DP_FBITS + 1)) == 0); /* no excess */
assert(xe >= DP_EMIN);
if (xe > DP_EMAX) {
@@ -190,7 +201,7 @@ union ieee754dp ieee754dp_format(int sn, int xe, u64 xm)
ieee754_setcx(IEEE754_UNDERFLOW);
return builddp(sn, DP_EMIN - 1 + DP_EBIAS, xm);
} else {
- assert((xm >> (DP_FBITS + 1)) == 0); /* no execess */
+ assert((xm >> (DP_FBITS + 1)) == 0); /* no excess */
assert(xm & DP_HIDDEN_BIT);
return builddp(sn, xe + DP_EBIAS, xm & ~DP_HIDDEN_BIT);
diff --git a/arch/mips/math-emu/ieee754dp.h b/arch/mips/math-emu/ieee754dp.h
index e2babd98fee3..a56707b75282 100644
--- a/arch/mips/math-emu/ieee754dp.h
+++ b/arch/mips/math-emu/ieee754dp.h
@@ -55,11 +55,15 @@ static inline int ieee754dp_finite(union ieee754dp x)
#define XDPSRS1(v) \
(((v) >> 1) | ((v) & 1))
+/* 32bit * 32bit => 64bit unsigned integer multiplication */
+#define DPXMULT(x, y) ((u64)(x) * (u64)y)
+
/* convert denormal to normalized with extended exponent */
#define DPDNORMx(m,e) \
while ((m >> DP_FBITS) == 0) { m <<= 1; e--; }
#define DPDNORMX DPDNORMx(xm, xe)
#define DPDNORMY DPDNORMx(ym, ye)
+#define DPDNORMZ DPDNORMx(zm, ze)
static inline union ieee754dp builddp(int s, int bx, u64 m)
{
diff --git a/arch/mips/math-emu/ieee754int.h b/arch/mips/math-emu/ieee754int.h
index 6383e2c5c1ad..06ac0e2ac7ac 100644
--- a/arch/mips/math-emu/ieee754int.h
+++ b/arch/mips/math-emu/ieee754int.h
@@ -26,6 +26,10 @@
#define CLPAIR(x, y) ((x)*6+(y))
+enum maddf_flags {
+ MADDF_NEGATE_PRODUCT = 1 << 0,
+};
+
static inline void ieee754_clearcx(void)
{
ieee754_csr.cx = 0;
@@ -50,10 +54,13 @@ static inline int ieee754_class_nan(int xc)
}
#define COMPXSP \
- unsigned xm; int xe; int xs __maybe_unused; int xc
+ unsigned int xm; int xe; int xs __maybe_unused; int xc
#define COMPYSP \
- unsigned ym; int ye; int ys; int yc
+ unsigned int ym; int ye; int ys; int yc
+
+#define COMPZSP \
+ unsigned int zm; int ze; int zs; int zc
#define EXPLODESP(v, vc, vs, ve, vm) \
{ \
@@ -63,10 +70,10 @@ static inline int ieee754_class_nan(int xc)
if (ve == SP_EMAX+1+SP_EBIAS) { \
if (vm == 0) \
vc = IEEE754_CLASS_INF; \
- else if (vm & SP_MBIT(SP_FBITS-1)) \
- vc = IEEE754_CLASS_SNAN; \
- else \
+ else if (ieee754_csr.nan2008 ^ !(vm & SP_MBIT(SP_FBITS - 1))) \
vc = IEEE754_CLASS_QNAN; \
+ else \
+ vc = IEEE754_CLASS_SNAN; \
} else if (ve == SP_EMIN-1+SP_EBIAS) { \
if (vm) { \
ve = SP_EMIN; \
@@ -81,6 +88,7 @@ static inline int ieee754_class_nan(int xc)
}
#define EXPLODEXSP EXPLODESP(x, xc, xs, xe, xm)
#define EXPLODEYSP EXPLODESP(y, yc, ys, ye, ym)
+#define EXPLODEZSP EXPLODESP(z, zc, zs, ze, zm)
#define COMPXDP \
@@ -89,6 +97,9 @@ static inline int ieee754_class_nan(int xc)
#define COMPYDP \
u64 ym; int ye; int ys; int yc
+#define COMPZDP \
+ u64 zm; int ze; int zs; int zc
+
#define EXPLODEDP(v, vc, vs, ve, vm) \
{ \
vm = DPMANT(v); \
@@ -97,10 +108,10 @@ static inline int ieee754_class_nan(int xc)
if (ve == DP_EMAX+1+DP_EBIAS) { \
if (vm == 0) \
vc = IEEE754_CLASS_INF; \
- else if (vm & DP_MBIT(DP_FBITS-1)) \
- vc = IEEE754_CLASS_SNAN; \
- else \
+ else if (ieee754_csr.nan2008 ^ !(vm & DP_MBIT(DP_FBITS - 1))) \
vc = IEEE754_CLASS_QNAN; \
+ else \
+ vc = IEEE754_CLASS_SNAN; \
} else if (ve == DP_EMIN-1+DP_EBIAS) { \
if (vm) { \
ve = DP_EMIN; \
@@ -115,6 +126,7 @@ static inline int ieee754_class_nan(int xc)
}
#define EXPLODEXDP EXPLODEDP(x, xc, xs, xe, xm)
#define EXPLODEYDP EXPLODEDP(y, yc, ys, ye, ym)
+#define EXPLODEZDP EXPLODEDP(z, zc, zs, ze, zm)
#define FLUSHDP(v, vc, vs, ve, vm) \
if (vc==IEEE754_CLASS_DNORM) { \
@@ -140,7 +152,9 @@ static inline int ieee754_class_nan(int xc)
#define FLUSHXDP FLUSHDP(x, xc, xs, xe, xm)
#define FLUSHYDP FLUSHDP(y, yc, ys, ye, ym)
+#define FLUSHZDP FLUSHDP(z, zc, zs, ze, zm)
#define FLUSHXSP FLUSHSP(x, xc, xs, xe, xm)
#define FLUSHYSP FLUSHSP(y, yc, ys, ye, ym)
+#define FLUSHZSP FLUSHSP(z, zc, zs, ze, zm)
#endif /* __IEEE754INT_H */
diff --git a/arch/mips/math-emu/ieee754sp.c b/arch/mips/math-emu/ieee754sp.c
index ca8e35e33bf7..8423e4c5e415 100644
--- a/arch/mips/math-emu/ieee754sp.c
+++ b/arch/mips/math-emu/ieee754sp.c
@@ -37,8 +37,11 @@ static inline int ieee754sp_isnan(union ieee754sp x)
static inline int ieee754sp_issnan(union ieee754sp x)
{
+ int qbit;
+
assert(ieee754sp_isnan(x));
- return SPMANT(x) & SP_MBIT(SP_FBITS - 1);
+ qbit = (SPMANT(x) & SP_MBIT(SP_FBITS - 1)) == SP_MBIT(SP_FBITS - 1);
+ return ieee754_csr.nan2008 ^ qbit;
}
@@ -51,10 +54,18 @@ union ieee754sp __cold ieee754sp_nanxcpt(union ieee754sp r)
assert(ieee754sp_issnan(r));
ieee754_setcx(IEEE754_INVALID_OPERATION);
- return ieee754sp_indef();
+ if (ieee754_csr.nan2008) {
+ SPMANT(r) |= SP_MBIT(SP_FBITS - 1);
+ } else {
+ SPMANT(r) &= ~SP_MBIT(SP_FBITS - 1);
+ if (!ieee754sp_isnan(r))
+ SPMANT(r) |= SP_MBIT(SP_FBITS - 2);
+ }
+
+ return r;
}
-static unsigned ieee754sp_get_rounding(int sn, unsigned xm)
+static unsigned int ieee754sp_get_rounding(int sn, unsigned int xm)
{
/* inexact must round of 3 bits
*/
@@ -85,11 +96,11 @@ static unsigned ieee754sp_get_rounding(int sn, unsigned xm)
* xe is an unbiased exponent
* xm is 3bit extended precision value.
*/
-union ieee754sp ieee754sp_format(int sn, int xe, unsigned xm)
+union ieee754sp ieee754sp_format(int sn, int xe, unsigned int xm)
{
assert(xm); /* we don't gen exact zeros (probably should) */
- assert((xm >> (SP_FBITS + 1 + 3)) == 0); /* no execess */
+ assert((xm >> (SP_FBITS + 1 + 3)) == 0); /* no excess */
assert(xm & (SP_HIDDEN_BIT << 3));
if (xe < SP_EMIN) {
@@ -130,7 +141,8 @@ union ieee754sp ieee754sp_format(int sn, int xe, unsigned xm)
} else {
/* sticky right shift es bits
*/
- SPXSRSXn(es);
+ xm = XSPSRS(xm, es);
+ xe += es;
assert((xm & (SP_HIDDEN_BIT << 3)) == 0);
assert(xe == SP_EMIN);
}
@@ -155,7 +167,7 @@ union ieee754sp ieee754sp_format(int sn, int xe, unsigned xm)
/* strip grs bits */
xm >>= 3;
- assert((xm >> (SP_FBITS + 1)) == 0); /* no execess */
+ assert((xm >> (SP_FBITS + 1)) == 0); /* no excess */
assert(xe >= SP_EMIN);
if (xe > SP_EMAX) {
@@ -188,7 +200,7 @@ union ieee754sp ieee754sp_format(int sn, int xe, unsigned xm)
ieee754_setcx(IEEE754_UNDERFLOW);
return buildsp(sn, SP_EMIN - 1 + SP_EBIAS, xm);
} else {
- assert((xm >> (SP_FBITS + 1)) == 0); /* no execess */
+ assert((xm >> (SP_FBITS + 1)) == 0); /* no excess */
assert(xm & SP_HIDDEN_BIT);
return buildsp(sn, xe + SP_EBIAS, xm & ~SP_HIDDEN_BIT);
diff --git a/arch/mips/math-emu/ieee754sp.h b/arch/mips/math-emu/ieee754sp.h
index 374a3f00a589..8c5a63804873 100644
--- a/arch/mips/math-emu/ieee754sp.h
+++ b/arch/mips/math-emu/ieee754sp.h
@@ -45,28 +45,31 @@ static inline int ieee754sp_finite(union ieee754sp x)
return SPBEXP(x) != SP_EMAX + 1 + SP_EBIAS;
}
+/* 64 bit right shift with rounding */
+#define XSPSRS64(v, rs) \
+ (((rs) >= 64) ? ((v) != 0) : ((v) >> (rs)) | ((v) << (64-(rs)) != 0))
+
/* 3bit extended single precision sticky right shift */
-#define SPXSRSXn(rs) \
- (xe += rs, \
- xm = (rs > (SP_FBITS+3))?1:((xm) >> (rs)) | ((xm) << (32-(rs)) != 0))
+#define XSPSRS(v, rs) \
+ ((rs > (SP_FBITS+3))?1:((v) >> (rs)) | ((v) << (32-(rs)) != 0))
-#define SPXSRSX1() \
- (xe++, (xm = (xm >> 1) | (xm & 1)))
+#define XSPSRS1(m) \
+ ((m >> 1) | (m & 1))
-#define SPXSRSYn(rs) \
- (ye+=rs, \
- ym = (rs > (SP_FBITS+3))?1:((ym) >> (rs)) | ((ym) << (32-(rs)) != 0))
+#define SPXSRSX1() \
+ (xe++, (xm = XSPSRS1(xm)))
#define SPXSRSY1() \
- (ye++, (ym = (ym >> 1) | (ym & 1)))
+ (ye++, (ym = XSPSRS1(ym)))
/* convert denormal to normalized with extended exponent */
#define SPDNORMx(m,e) \
while ((m >> SP_FBITS) == 0) { m <<= 1; e--; }
#define SPDNORMX SPDNORMx(xm, xe)
#define SPDNORMY SPDNORMx(ym, ye)
+#define SPDNORMZ SPDNORMx(zm, ze)
-static inline union ieee754sp buildsp(int s, int bx, unsigned m)
+static inline union ieee754sp buildsp(int s, int bx, unsigned int m)
{
union ieee754sp r;
diff --git a/arch/mips/math-emu/me-debugfs.c b/arch/mips/math-emu/me-debugfs.c
index be650ed7db59..8c0ec154aecc 100644
--- a/arch/mips/math-emu/me-debugfs.c
+++ b/arch/mips/math-emu/me-debugfs.c
@@ -28,14 +28,190 @@ static int fpuemu_stat_get(void *data, u64 *val)
}
DEFINE_SIMPLE_ATTRIBUTE(fops_fpuemu_stat, fpuemu_stat_get, NULL, "%llu\n");
+/*
+ * Used to obtain names for a debugfs instruction counter, given field name
+ * in fpuemustats structure. For example, for input "cmp_sueq_d", the output
+ * would be "cmp.sueq.d". This is needed since dots are not allowed to be
+ * used in structure field names, and are, on the other hand, desired to be
+ * used in debugfs item names to be clearly associated to corresponding
+ * MIPS FPU instructions.
+ */
+static void adjust_instruction_counter_name(char *out_name, char *in_name)
+{
+ int i = 0;
+
+ strcpy(out_name, in_name);
+ while (in_name[i] != '\0') {
+ if (out_name[i] == '_')
+ out_name[i] = '.';
+ i++;
+ }
+}
+
+static int fpuemustats_clear_show(struct seq_file *s, void *unused)
+{
+ __this_cpu_write((fpuemustats).emulated, 0);
+ __this_cpu_write((fpuemustats).loads, 0);
+ __this_cpu_write((fpuemustats).stores, 0);
+ __this_cpu_write((fpuemustats).branches, 0);
+ __this_cpu_write((fpuemustats).cp1ops, 0);
+ __this_cpu_write((fpuemustats).cp1xops, 0);
+ __this_cpu_write((fpuemustats).errors, 0);
+ __this_cpu_write((fpuemustats).ieee754_inexact, 0);
+ __this_cpu_write((fpuemustats).ieee754_underflow, 0);
+ __this_cpu_write((fpuemustats).ieee754_overflow, 0);
+ __this_cpu_write((fpuemustats).ieee754_zerodiv, 0);
+ __this_cpu_write((fpuemustats).ieee754_invalidop, 0);
+ __this_cpu_write((fpuemustats).ds_emul, 0);
+
+ __this_cpu_write((fpuemustats).abs_s, 0);
+ __this_cpu_write((fpuemustats).abs_d, 0);
+ __this_cpu_write((fpuemustats).add_s, 0);
+ __this_cpu_write((fpuemustats).add_d, 0);
+ __this_cpu_write((fpuemustats).bc1eqz, 0);
+ __this_cpu_write((fpuemustats).bc1nez, 0);
+ __this_cpu_write((fpuemustats).ceil_w_s, 0);
+ __this_cpu_write((fpuemustats).ceil_w_d, 0);
+ __this_cpu_write((fpuemustats).ceil_l_s, 0);
+ __this_cpu_write((fpuemustats).ceil_l_d, 0);
+ __this_cpu_write((fpuemustats).class_s, 0);
+ __this_cpu_write((fpuemustats).class_d, 0);
+ __this_cpu_write((fpuemustats).cmp_af_s, 0);
+ __this_cpu_write((fpuemustats).cmp_af_d, 0);
+ __this_cpu_write((fpuemustats).cmp_eq_s, 0);
+ __this_cpu_write((fpuemustats).cmp_eq_d, 0);
+ __this_cpu_write((fpuemustats).cmp_le_s, 0);
+ __this_cpu_write((fpuemustats).cmp_le_d, 0);
+ __this_cpu_write((fpuemustats).cmp_lt_s, 0);
+ __this_cpu_write((fpuemustats).cmp_lt_d, 0);
+ __this_cpu_write((fpuemustats).cmp_ne_s, 0);
+ __this_cpu_write((fpuemustats).cmp_ne_d, 0);
+ __this_cpu_write((fpuemustats).cmp_or_s, 0);
+ __this_cpu_write((fpuemustats).cmp_or_d, 0);
+ __this_cpu_write((fpuemustats).cmp_ueq_s, 0);
+ __this_cpu_write((fpuemustats).cmp_ueq_d, 0);
+ __this_cpu_write((fpuemustats).cmp_ule_s, 0);
+ __this_cpu_write((fpuemustats).cmp_ule_d, 0);
+ __this_cpu_write((fpuemustats).cmp_ult_s, 0);
+ __this_cpu_write((fpuemustats).cmp_ult_d, 0);
+ __this_cpu_write((fpuemustats).cmp_un_s, 0);
+ __this_cpu_write((fpuemustats).cmp_un_d, 0);
+ __this_cpu_write((fpuemustats).cmp_une_s, 0);
+ __this_cpu_write((fpuemustats).cmp_une_d, 0);
+ __this_cpu_write((fpuemustats).cmp_saf_s, 0);
+ __this_cpu_write((fpuemustats).cmp_saf_d, 0);
+ __this_cpu_write((fpuemustats).cmp_seq_s, 0);
+ __this_cpu_write((fpuemustats).cmp_seq_d, 0);
+ __this_cpu_write((fpuemustats).cmp_sle_s, 0);
+ __this_cpu_write((fpuemustats).cmp_sle_d, 0);
+ __this_cpu_write((fpuemustats).cmp_slt_s, 0);
+ __this_cpu_write((fpuemustats).cmp_slt_d, 0);
+ __this_cpu_write((fpuemustats).cmp_sne_s, 0);
+ __this_cpu_write((fpuemustats).cmp_sne_d, 0);
+ __this_cpu_write((fpuemustats).cmp_sor_s, 0);
+ __this_cpu_write((fpuemustats).cmp_sor_d, 0);
+ __this_cpu_write((fpuemustats).cmp_sueq_s, 0);
+ __this_cpu_write((fpuemustats).cmp_sueq_d, 0);
+ __this_cpu_write((fpuemustats).cmp_sule_s, 0);
+ __this_cpu_write((fpuemustats).cmp_sule_d, 0);
+ __this_cpu_write((fpuemustats).cmp_sult_s, 0);
+ __this_cpu_write((fpuemustats).cmp_sult_d, 0);
+ __this_cpu_write((fpuemustats).cmp_sun_s, 0);
+ __this_cpu_write((fpuemustats).cmp_sun_d, 0);
+ __this_cpu_write((fpuemustats).cmp_sune_s, 0);
+ __this_cpu_write((fpuemustats).cmp_sune_d, 0);
+ __this_cpu_write((fpuemustats).cvt_d_l, 0);
+ __this_cpu_write((fpuemustats).cvt_d_s, 0);
+ __this_cpu_write((fpuemustats).cvt_d_w, 0);
+ __this_cpu_write((fpuemustats).cvt_l_s, 0);
+ __this_cpu_write((fpuemustats).cvt_l_d, 0);
+ __this_cpu_write((fpuemustats).cvt_s_d, 0);
+ __this_cpu_write((fpuemustats).cvt_s_l, 0);
+ __this_cpu_write((fpuemustats).cvt_s_w, 0);
+ __this_cpu_write((fpuemustats).cvt_w_s, 0);
+ __this_cpu_write((fpuemustats).cvt_w_d, 0);
+ __this_cpu_write((fpuemustats).div_s, 0);
+ __this_cpu_write((fpuemustats).div_d, 0);
+ __this_cpu_write((fpuemustats).floor_w_s, 0);
+ __this_cpu_write((fpuemustats).floor_w_d, 0);
+ __this_cpu_write((fpuemustats).floor_l_s, 0);
+ __this_cpu_write((fpuemustats).floor_l_d, 0);
+ __this_cpu_write((fpuemustats).maddf_s, 0);
+ __this_cpu_write((fpuemustats).maddf_d, 0);
+ __this_cpu_write((fpuemustats).max_s, 0);
+ __this_cpu_write((fpuemustats).max_d, 0);
+ __this_cpu_write((fpuemustats).maxa_s, 0);
+ __this_cpu_write((fpuemustats).maxa_d, 0);
+ __this_cpu_write((fpuemustats).min_s, 0);
+ __this_cpu_write((fpuemustats).min_d, 0);
+ __this_cpu_write((fpuemustats).mina_s, 0);
+ __this_cpu_write((fpuemustats).mina_d, 0);
+ __this_cpu_write((fpuemustats).mov_s, 0);
+ __this_cpu_write((fpuemustats).mov_d, 0);
+ __this_cpu_write((fpuemustats).msubf_s, 0);
+ __this_cpu_write((fpuemustats).msubf_d, 0);
+ __this_cpu_write((fpuemustats).mul_s, 0);
+ __this_cpu_write((fpuemustats).mul_d, 0);
+ __this_cpu_write((fpuemustats).neg_s, 0);
+ __this_cpu_write((fpuemustats).neg_d, 0);
+ __this_cpu_write((fpuemustats).recip_s, 0);
+ __this_cpu_write((fpuemustats).recip_d, 0);
+ __this_cpu_write((fpuemustats).rint_s, 0);
+ __this_cpu_write((fpuemustats).rint_d, 0);
+ __this_cpu_write((fpuemustats).round_w_s, 0);
+ __this_cpu_write((fpuemustats).round_w_d, 0);
+ __this_cpu_write((fpuemustats).round_l_s, 0);
+ __this_cpu_write((fpuemustats).round_l_d, 0);
+ __this_cpu_write((fpuemustats).rsqrt_s, 0);
+ __this_cpu_write((fpuemustats).rsqrt_d, 0);
+ __this_cpu_write((fpuemustats).sel_s, 0);
+ __this_cpu_write((fpuemustats).sel_d, 0);
+ __this_cpu_write((fpuemustats).seleqz_s, 0);
+ __this_cpu_write((fpuemustats).seleqz_d, 0);
+ __this_cpu_write((fpuemustats).selnez_s, 0);
+ __this_cpu_write((fpuemustats).selnez_d, 0);
+ __this_cpu_write((fpuemustats).sqrt_s, 0);
+ __this_cpu_write((fpuemustats).sqrt_d, 0);
+ __this_cpu_write((fpuemustats).sub_s, 0);
+ __this_cpu_write((fpuemustats).sub_d, 0);
+ __this_cpu_write((fpuemustats).trunc_w_s, 0);
+ __this_cpu_write((fpuemustats).trunc_w_d, 0);
+ __this_cpu_write((fpuemustats).trunc_l_s, 0);
+ __this_cpu_write((fpuemustats).trunc_l_d, 0);
+
+ return 0;
+}
+
+static int fpuemustats_clear_open(struct inode *inode, struct file *file)
+{
+ return single_open(file, fpuemustats_clear_show, inode->i_private);
+}
+
+static const struct file_operations fpuemustats_clear_fops = {
+ .open = fpuemustats_clear_open,
+ .read = seq_read,
+ .llseek = seq_lseek,
+ .release = single_release,
+};
+
static int __init debugfs_fpuemu(void)
{
- struct dentry *d, *dir;
+ struct dentry *fpuemu_debugfs_base_dir;
+ struct dentry *fpuemu_debugfs_inst_dir;
+ struct dentry *d, *reset_file;
if (!mips_debugfs_dir)
return -ENODEV;
- dir = debugfs_create_dir("fpuemustats", mips_debugfs_dir);
- if (!dir)
+
+ fpuemu_debugfs_base_dir = debugfs_create_dir("fpuemustats",
+ mips_debugfs_dir);
+ if (!fpuemu_debugfs_base_dir)
+ return -ENOMEM;
+
+ reset_file = debugfs_create_file("fpuemustats_clear", 0444,
+ mips_debugfs_dir, NULL,
+ &fpuemustats_clear_fops);
+ if (!reset_file)
return -ENOMEM;
#define FPU_EMU_STAT_OFFSET(m) \
@@ -43,7 +219,7 @@ static int __init debugfs_fpuemu(void)
#define FPU_STAT_CREATE(m) \
do { \
- d = debugfs_create_file(#m , S_IRUGO, dir, \
+ d = debugfs_create_file(#m, 0444, fpuemu_debugfs_base_dir, \
(void *)FPU_EMU_STAT_OFFSET(m), \
&fops_fpuemu_stat); \
if (!d) \
@@ -53,6 +229,7 @@ do { \
FPU_STAT_CREATE(emulated);
FPU_STAT_CREATE(loads);
FPU_STAT_CREATE(stores);
+ FPU_STAT_CREATE(branches);
FPU_STAT_CREATE(cp1ops);
FPU_STAT_CREATE(cp1xops);
FPU_STAT_CREATE(errors);
@@ -63,6 +240,139 @@ do { \
FPU_STAT_CREATE(ieee754_invalidop);
FPU_STAT_CREATE(ds_emul);
+ fpuemu_debugfs_inst_dir = debugfs_create_dir("instructions",
+ fpuemu_debugfs_base_dir);
+ if (!fpuemu_debugfs_inst_dir)
+ return -ENOMEM;
+
+#define FPU_STAT_CREATE_EX(m) \
+do { \
+ char name[32]; \
+ \
+ adjust_instruction_counter_name(name, #m); \
+ \
+ d = debugfs_create_file(name, 0444, fpuemu_debugfs_inst_dir, \
+ (void *)FPU_EMU_STAT_OFFSET(m), \
+ &fops_fpuemu_stat); \
+ if (!d) \
+ return -ENOMEM; \
+} while (0)
+
+ FPU_STAT_CREATE_EX(abs_s);
+ FPU_STAT_CREATE_EX(abs_d);
+ FPU_STAT_CREATE_EX(add_s);
+ FPU_STAT_CREATE_EX(add_d);
+ FPU_STAT_CREATE_EX(bc1eqz);
+ FPU_STAT_CREATE_EX(bc1nez);
+ FPU_STAT_CREATE_EX(ceil_w_s);
+ FPU_STAT_CREATE_EX(ceil_w_d);
+ FPU_STAT_CREATE_EX(ceil_l_s);
+ FPU_STAT_CREATE_EX(ceil_l_d);
+ FPU_STAT_CREATE_EX(class_s);
+ FPU_STAT_CREATE_EX(class_d);
+ FPU_STAT_CREATE_EX(cmp_af_s);
+ FPU_STAT_CREATE_EX(cmp_af_d);
+ FPU_STAT_CREATE_EX(cmp_eq_s);
+ FPU_STAT_CREATE_EX(cmp_eq_d);
+ FPU_STAT_CREATE_EX(cmp_le_s);
+ FPU_STAT_CREATE_EX(cmp_le_d);
+ FPU_STAT_CREATE_EX(cmp_lt_s);
+ FPU_STAT_CREATE_EX(cmp_lt_d);
+ FPU_STAT_CREATE_EX(cmp_ne_s);
+ FPU_STAT_CREATE_EX(cmp_ne_d);
+ FPU_STAT_CREATE_EX(cmp_or_s);
+ FPU_STAT_CREATE_EX(cmp_or_d);
+ FPU_STAT_CREATE_EX(cmp_ueq_s);
+ FPU_STAT_CREATE_EX(cmp_ueq_d);
+ FPU_STAT_CREATE_EX(cmp_ule_s);
+ FPU_STAT_CREATE_EX(cmp_ule_d);
+ FPU_STAT_CREATE_EX(cmp_ult_s);
+ FPU_STAT_CREATE_EX(cmp_ult_d);
+ FPU_STAT_CREATE_EX(cmp_un_s);
+ FPU_STAT_CREATE_EX(cmp_un_d);
+ FPU_STAT_CREATE_EX(cmp_une_s);
+ FPU_STAT_CREATE_EX(cmp_une_d);
+ FPU_STAT_CREATE_EX(cmp_saf_s);
+ FPU_STAT_CREATE_EX(cmp_saf_d);
+ FPU_STAT_CREATE_EX(cmp_seq_s);
+ FPU_STAT_CREATE_EX(cmp_seq_d);
+ FPU_STAT_CREATE_EX(cmp_sle_s);
+ FPU_STAT_CREATE_EX(cmp_sle_d);
+ FPU_STAT_CREATE_EX(cmp_slt_s);
+ FPU_STAT_CREATE_EX(cmp_slt_d);
+ FPU_STAT_CREATE_EX(cmp_sne_s);
+ FPU_STAT_CREATE_EX(cmp_sne_d);
+ FPU_STAT_CREATE_EX(cmp_sor_s);
+ FPU_STAT_CREATE_EX(cmp_sor_d);
+ FPU_STAT_CREATE_EX(cmp_sueq_s);
+ FPU_STAT_CREATE_EX(cmp_sueq_d);
+ FPU_STAT_CREATE_EX(cmp_sule_s);
+ FPU_STAT_CREATE_EX(cmp_sule_d);
+ FPU_STAT_CREATE_EX(cmp_sult_s);
+ FPU_STAT_CREATE_EX(cmp_sult_d);
+ FPU_STAT_CREATE_EX(cmp_sun_s);
+ FPU_STAT_CREATE_EX(cmp_sun_d);
+ FPU_STAT_CREATE_EX(cmp_sune_s);
+ FPU_STAT_CREATE_EX(cmp_sune_d);
+ FPU_STAT_CREATE_EX(cvt_d_l);
+ FPU_STAT_CREATE_EX(cvt_d_s);
+ FPU_STAT_CREATE_EX(cvt_d_w);
+ FPU_STAT_CREATE_EX(cvt_l_s);
+ FPU_STAT_CREATE_EX(cvt_l_d);
+ FPU_STAT_CREATE_EX(cvt_s_d);
+ FPU_STAT_CREATE_EX(cvt_s_l);
+ FPU_STAT_CREATE_EX(cvt_s_w);
+ FPU_STAT_CREATE_EX(cvt_w_s);
+ FPU_STAT_CREATE_EX(cvt_w_d);
+ FPU_STAT_CREATE_EX(div_s);
+ FPU_STAT_CREATE_EX(div_d);
+ FPU_STAT_CREATE_EX(floor_w_s);
+ FPU_STAT_CREATE_EX(floor_w_d);
+ FPU_STAT_CREATE_EX(floor_l_s);
+ FPU_STAT_CREATE_EX(floor_l_d);
+ FPU_STAT_CREATE_EX(maddf_s);
+ FPU_STAT_CREATE_EX(maddf_d);
+ FPU_STAT_CREATE_EX(max_s);
+ FPU_STAT_CREATE_EX(max_d);
+ FPU_STAT_CREATE_EX(maxa_s);
+ FPU_STAT_CREATE_EX(maxa_d);
+ FPU_STAT_CREATE_EX(min_s);
+ FPU_STAT_CREATE_EX(min_d);
+ FPU_STAT_CREATE_EX(mina_s);
+ FPU_STAT_CREATE_EX(mina_d);
+ FPU_STAT_CREATE_EX(mov_s);
+ FPU_STAT_CREATE_EX(mov_d);
+ FPU_STAT_CREATE_EX(msubf_s);
+ FPU_STAT_CREATE_EX(msubf_d);
+ FPU_STAT_CREATE_EX(mul_s);
+ FPU_STAT_CREATE_EX(mul_d);
+ FPU_STAT_CREATE_EX(neg_s);
+ FPU_STAT_CREATE_EX(neg_d);
+ FPU_STAT_CREATE_EX(recip_s);
+ FPU_STAT_CREATE_EX(recip_d);
+ FPU_STAT_CREATE_EX(rint_s);
+ FPU_STAT_CREATE_EX(rint_d);
+ FPU_STAT_CREATE_EX(round_w_s);
+ FPU_STAT_CREATE_EX(round_w_d);
+ FPU_STAT_CREATE_EX(round_l_s);
+ FPU_STAT_CREATE_EX(round_l_d);
+ FPU_STAT_CREATE_EX(rsqrt_s);
+ FPU_STAT_CREATE_EX(rsqrt_d);
+ FPU_STAT_CREATE_EX(sel_s);
+ FPU_STAT_CREATE_EX(sel_d);
+ FPU_STAT_CREATE_EX(seleqz_s);
+ FPU_STAT_CREATE_EX(seleqz_d);
+ FPU_STAT_CREATE_EX(selnez_s);
+ FPU_STAT_CREATE_EX(selnez_d);
+ FPU_STAT_CREATE_EX(sqrt_s);
+ FPU_STAT_CREATE_EX(sqrt_d);
+ FPU_STAT_CREATE_EX(sub_s);
+ FPU_STAT_CREATE_EX(sub_d);
+ FPU_STAT_CREATE_EX(trunc_w_s);
+ FPU_STAT_CREATE_EX(trunc_w_d);
+ FPU_STAT_CREATE_EX(trunc_l_s);
+ FPU_STAT_CREATE_EX(trunc_l_d);
+
return 0;
}
arch_initcall(debugfs_fpuemu);
diff --git a/arch/mips/math-emu/sp_add.c b/arch/mips/math-emu/sp_add.c
index f1c87b07d3b4..51dced9fbdaf 100644
--- a/arch/mips/math-emu/sp_add.c
+++ b/arch/mips/math-emu/sp_add.c
@@ -104,8 +104,7 @@ union ieee754sp ieee754sp_add(union ieee754sp x, union ieee754sp y)
case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_DNORM):
SPDNORMX;
-
- /* FALL THROUGH */
+ /* fall through */
case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_DNORM):
SPDNORMY;
@@ -132,13 +131,15 @@ union ieee754sp ieee754sp_add(union ieee754sp x, union ieee754sp y)
* Have to shift y fraction right to align.
*/
s = xe - ye;
- SPXSRSYn(s);
+ ym = XSPSRS(ym, s);
+ ye += s;
} else if (ye > xe) {
/*
* Have to shift x fraction right to align.
*/
s = ye - xe;
- SPXSRSXn(s);
+ xm = XSPSRS(xm, s);
+ xe += s;
}
assert(xe == ye);
assert(xe <= SP_EMAX);
diff --git a/arch/mips/math-emu/sp_div.c b/arch/mips/math-emu/sp_div.c
index 27f6db3a0a4c..5d2904960eb8 100644
--- a/arch/mips/math-emu/sp_div.c
+++ b/arch/mips/math-emu/sp_div.c
@@ -23,9 +23,9 @@
union ieee754sp ieee754sp_div(union ieee754sp x, union ieee754sp y)
{
- unsigned rm;
+ unsigned int rm;
int re;
- unsigned bm;
+ unsigned int bm;
COMPXSP;
COMPYSP;
@@ -103,6 +103,7 @@ union ieee754sp ieee754sp_div(union ieee754sp x, union ieee754sp y)
case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_DNORM):
SPDNORMX;
+ /* fall through */
case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_DNORM):
SPDNORMY;
diff --git a/arch/mips/math-emu/sp_fdp.c b/arch/mips/math-emu/sp_fdp.c
index 3797148893ad..36a50f9082d1 100644
--- a/arch/mips/math-emu/sp_fdp.c
+++ b/arch/mips/math-emu/sp_fdp.c
@@ -44,13 +44,17 @@ union ieee754sp ieee754sp_fdp(union ieee754dp x)
switch (xc) {
case IEEE754_CLASS_SNAN:
- return ieee754sp_nanxcpt(ieee754sp_nan_fdp(xs, xm));
+ x = ieee754dp_nanxcpt(x);
+ EXPLODEXDP;
+ /* fall through */
case IEEE754_CLASS_QNAN:
y = ieee754sp_nan_fdp(xs, xm);
- EXPLODEYSP;
- if (!ieee754_class_nan(yc))
- y = ieee754sp_indef();
+ if (!ieee754_csr.nan2008) {
+ EXPLODEYSP;
+ if (!ieee754_class_nan(yc))
+ y = ieee754sp_indef();
+ }
return y;
case IEEE754_CLASS_INF:
diff --git a/arch/mips/math-emu/sp_fint.c b/arch/mips/math-emu/sp_fint.c
index d5d8495b2cc4..1a35d12b6fc8 100644
--- a/arch/mips/math-emu/sp_fint.c
+++ b/arch/mips/math-emu/sp_fint.c
@@ -23,7 +23,7 @@
union ieee754sp ieee754sp_fint(int x)
{
- unsigned xm;
+ unsigned int xm;
int xe;
int xs;
diff --git a/arch/mips/math-emu/sp_fmax.c b/arch/mips/math-emu/sp_fmax.c
index 74a5a00d2f22..22019ed691df 100644
--- a/arch/mips/math-emu/sp_fmax.c
+++ b/arch/mips/math-emu/sp_fmax.c
@@ -96,6 +96,7 @@ union ieee754sp ieee754sp_fmax(union ieee754sp x, union ieee754sp y)
case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_DNORM):
SPDNORMX;
+ /* fall through */
case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_DNORM):
SPDNORMY;
@@ -224,6 +225,7 @@ union ieee754sp ieee754sp_fmaxa(union ieee754sp x, union ieee754sp y)
case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_DNORM):
SPDNORMX;
+ /* fall through */
case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_DNORM):
SPDNORMY;
diff --git a/arch/mips/math-emu/sp_fmin.c b/arch/mips/math-emu/sp_fmin.c
index c51385f46b09..feaec3985cca 100644
--- a/arch/mips/math-emu/sp_fmin.c
+++ b/arch/mips/math-emu/sp_fmin.c
@@ -96,6 +96,7 @@ union ieee754sp ieee754sp_fmin(union ieee754sp x, union ieee754sp y)
case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_DNORM):
SPDNORMX;
+ /* fall through */
case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_DNORM):
SPDNORMY;
@@ -224,6 +225,7 @@ union ieee754sp ieee754sp_fmina(union ieee754sp x, union ieee754sp y)
case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_DNORM):
SPDNORMX;
+ /* fall through */
case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_DNORM):
SPDNORMY;
diff --git a/arch/mips/math-emu/sp_maddf.c b/arch/mips/math-emu/sp_maddf.c
index dd1dd83e34eb..07ba675401e2 100644
--- a/arch/mips/math-emu/sp_maddf.c
+++ b/arch/mips/math-emu/sp_maddf.c
@@ -14,81 +14,60 @@
#include "ieee754sp.h"
-union ieee754sp ieee754sp_maddf(union ieee754sp z, union ieee754sp x,
- union ieee754sp y)
+
+static union ieee754sp _sp_maddf(union ieee754sp z, union ieee754sp x,
+ union ieee754sp y, enum maddf_flags flags)
{
int re;
int rs;
- unsigned rm;
- unsigned short lxm;
- unsigned short hxm;
- unsigned short lym;
- unsigned short hym;
- unsigned lrm;
- unsigned hrm;
- unsigned t;
- unsigned at;
+ unsigned int rm;
+ u64 rm64;
+ u64 zm64;
int s;
COMPXSP;
COMPYSP;
- u32 zm; int ze; int zs __maybe_unused; int zc;
+ COMPZSP;
EXPLODEXSP;
EXPLODEYSP;
- EXPLODESP(z, zc, zs, ze, zm)
+ EXPLODEZSP;
FLUSHXSP;
FLUSHYSP;
- FLUSHSP(z, zc, zs, ze, zm);
+ FLUSHZSP;
ieee754_clearcx();
- switch (zc) {
- case IEEE754_CLASS_SNAN:
- ieee754_setcx(IEEE754_INVALID_OPERATION);
+ /*
+ * Handle the cases when at least one of x, y or z is a NaN.
+ * Order of precedence is sNaN, qNaN and z, x, y.
+ */
+ if (zc == IEEE754_CLASS_SNAN)
return ieee754sp_nanxcpt(z);
- case IEEE754_CLASS_DNORM:
- SPDNORMx(zm, ze);
- /* QNAN is handled separately below */
- }
-
- switch (CLPAIR(xc, yc)) {
- case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_SNAN):
- case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_SNAN):
- case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_SNAN):
- case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_SNAN):
- case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_SNAN):
+ if (xc == IEEE754_CLASS_SNAN)
+ return ieee754sp_nanxcpt(x);
+ if (yc == IEEE754_CLASS_SNAN)
return ieee754sp_nanxcpt(y);
+ if (zc == IEEE754_CLASS_QNAN)
+ return z;
+ if (xc == IEEE754_CLASS_QNAN)
+ return x;
+ if (yc == IEEE754_CLASS_QNAN)
+ return y;
- case CLPAIR(IEEE754_CLASS_SNAN, IEEE754_CLASS_SNAN):
- case CLPAIR(IEEE754_CLASS_SNAN, IEEE754_CLASS_QNAN):
- case CLPAIR(IEEE754_CLASS_SNAN, IEEE754_CLASS_ZERO):
- case CLPAIR(IEEE754_CLASS_SNAN, IEEE754_CLASS_NORM):
- case CLPAIR(IEEE754_CLASS_SNAN, IEEE754_CLASS_DNORM):
- case CLPAIR(IEEE754_CLASS_SNAN, IEEE754_CLASS_INF):
- return ieee754sp_nanxcpt(x);
+ if (zc == IEEE754_CLASS_DNORM)
+ SPDNORMZ;
+ /* ZERO z cases are handled separately below */
- case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_QNAN):
- case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_QNAN):
- case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_QNAN):
- case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_QNAN):
- return y;
+ switch (CLPAIR(xc, yc)) {
- case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_QNAN):
- case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_ZERO):
- case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_NORM):
- case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_DNORM):
- case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_INF):
- return x;
/*
* Infinity handling
*/
case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_ZERO):
case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_INF):
- if (zc == IEEE754_CLASS_QNAN)
- return z;
ieee754_setcx(IEEE754_INVALID_OPERATION);
return ieee754sp_indef();
@@ -97,9 +76,27 @@ union ieee754sp ieee754sp_maddf(union ieee754sp z, union ieee754sp x,
case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_NORM):
case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_DNORM):
case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_INF):
- if (zc == IEEE754_CLASS_QNAN)
- return z;
- return ieee754sp_inf(xs ^ ys);
+ if ((zc == IEEE754_CLASS_INF) &&
+ ((!(flags & MADDF_NEGATE_PRODUCT) && (zs != (xs ^ ys))) ||
+ ((flags & MADDF_NEGATE_PRODUCT) && (zs == (xs ^ ys))))) {
+ /*
+ * Cases of addition of infinities with opposite signs
+ * or subtraction of infinities with same signs.
+ */
+ ieee754_setcx(IEEE754_INVALID_OPERATION);
+ return ieee754sp_indef();
+ }
+ /*
+ * z is here either not an infinity, or an infinity having the
+ * same sign as product (x*y) (in case of MADDF.D instruction)
+ * or product -(x*y) (in MSUBF.D case). The result must be an
+ * infinity, and its sign is determined only by the value of
+ * (flags & MADDF_NEGATE_PRODUCT) and the signs of x and y.
+ */
+ if (flags & MADDF_NEGATE_PRODUCT)
+ return ieee754sp_inf(1 ^ (xs ^ ys));
+ else
+ return ieee754sp_inf(xs ^ ys);
case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_ZERO):
case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_NORM):
@@ -108,34 +105,45 @@ union ieee754sp ieee754sp_maddf(union ieee754sp z, union ieee754sp x,
case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_ZERO):
if (zc == IEEE754_CLASS_INF)
return ieee754sp_inf(zs);
- /* Multiplication is 0 so just return z */
+ if (zc == IEEE754_CLASS_ZERO) {
+ /* Handle cases +0 + (-0) and similar ones. */
+ if ((!(flags & MADDF_NEGATE_PRODUCT)
+ && (zs == (xs ^ ys))) ||
+ ((flags & MADDF_NEGATE_PRODUCT)
+ && (zs != (xs ^ ys))))
+ /*
+ * Cases of addition of zeros of equal signs
+ * or subtraction of zeroes of opposite signs.
+ * The sign of the resulting zero is in any
+ * such case determined only by the sign of z.
+ */
+ return z;
+
+ return ieee754sp_zero(ieee754_csr.rm == FPU_CSR_RD);
+ }
+ /* x*y is here 0, and z is not 0, so just return z */
return z;
case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_DNORM):
SPDNORMX;
+ /* fall through */
case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_DNORM):
- if (zc == IEEE754_CLASS_QNAN)
- return z;
- else if (zc == IEEE754_CLASS_INF)
+ if (zc == IEEE754_CLASS_INF)
return ieee754sp_inf(zs);
SPDNORMY;
break;
case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_NORM):
- if (zc == IEEE754_CLASS_QNAN)
- return z;
- else if (zc == IEEE754_CLASS_INF)
+ if (zc == IEEE754_CLASS_INF)
return ieee754sp_inf(zs);
SPDNORMX;
break;
case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_NORM):
- if (zc == IEEE754_CLASS_QNAN)
- return z;
- else if (zc == IEEE754_CLASS_INF)
+ if (zc == IEEE754_CLASS_INF)
return ieee754sp_inf(zs);
- /* fall through to real computations */
+ /* continue to real computations */
}
/* Finally get to do some computation */
@@ -154,102 +162,104 @@ union ieee754sp ieee754sp_maddf(union ieee754sp z, union ieee754sp x,
re = xe + ye;
rs = xs ^ ys;
+ if (flags & MADDF_NEGATE_PRODUCT)
+ rs ^= 1;
- /* shunt to top of word */
- xm <<= 32 - (SP_FBITS + 1);
- ym <<= 32 - (SP_FBITS + 1);
+ /* Multiple 24 bit xm and ym to give 48 bit results */
+ rm64 = (uint64_t)xm * ym;
- /*
- * Multiply 32 bits xm, ym to give high 32 bits rm with stickness.
- */
- lxm = xm & 0xffff;
- hxm = xm >> 16;
- lym = ym & 0xffff;
- hym = ym >> 16;
+ /* Shunt to top of word */
+ rm64 = rm64 << 16;
- lrm = lxm * lym; /* 16 * 16 => 32 */
- hrm = hxm * hym; /* 16 * 16 => 32 */
-
- t = lxm * hym; /* 16 * 16 => 32 */
- at = lrm + (t << 16);
- hrm += at < lrm;
- lrm = at;
- hrm = hrm + (t >> 16);
-
- t = hxm * lym; /* 16 * 16 => 32 */
- at = lrm + (t << 16);
- hrm += at < lrm;
- lrm = at;
- hrm = hrm + (t >> 16);
-
- rm = hrm | (lrm != 0);
-
- /*
- * Sticky shift down to normal rounding precision.
- */
- if ((int) rm < 0) {
- rm = (rm >> (32 - (SP_FBITS + 1 + 3))) |
- ((rm << (SP_FBITS + 1 + 3)) != 0);
+ /* Put explicit bit at bit 62 if necessary */
+ if ((int64_t) rm64 < 0) {
+ rm64 = rm64 >> 1;
re++;
- } else {
- rm = (rm >> (32 - (SP_FBITS + 1 + 3 + 1))) |
- ((rm << (SP_FBITS + 1 + 3 + 1)) != 0);
}
- assert(rm & (SP_HIDDEN_BIT << 3));
- /* And now the addition */
+ assert(rm64 & (1 << 62));
- assert(zm & SP_HIDDEN_BIT);
+ if (zc == IEEE754_CLASS_ZERO) {
+ /*
+ * Move explicit bit from bit 62 to bit 26 since the
+ * ieee754sp_format code expects the mantissa to be
+ * 27 bits wide (24 + 3 rounding bits).
+ */
+ rm = XSPSRS64(rm64, (62 - 26));
+ return ieee754sp_format(rs, re, rm);
+ }
- /*
- * Provide guard,round and stick bit space.
- */
- zm <<= 3;
+ /* Move explicit bit from bit 23 to bit 62 */
+ zm64 = (uint64_t)zm << (62 - 23);
+ assert(zm64 & (1 << 62));
+ /* Make the exponents the same */
if (ze > re) {
/*
- * Have to shift y fraction right to align.
+ * Have to shift r fraction right to align.
*/
s = ze - re;
- SPXSRSYn(s);
+ rm64 = XSPSRS64(rm64, s);
+ re += s;
} else if (re > ze) {
/*
- * Have to shift x fraction right to align.
+ * Have to shift z fraction right to align.
*/
s = re - ze;
- SPXSRSYn(s);
+ zm64 = XSPSRS64(zm64, s);
+ ze += s;
}
assert(ze == re);
assert(ze <= SP_EMAX);
+ /* Do the addition */
if (zs == rs) {
/*
- * Generate 28 bit result of adding two 27 bit numbers
- * leaving result in zm, zs and ze.
+ * Generate 64 bit result by adding two 63 bit numbers
+ * leaving result in zm64, zs and ze.
*/
- zm = zm + rm;
-
- if (zm >> (SP_FBITS + 1 + 3)) { /* carry out */
- SPXSRSX1();
+ zm64 = zm64 + rm64;
+ if ((int64_t)zm64 < 0) { /* carry out */
+ zm64 = XSPSRS1(zm64);
+ ze++;
}
} else {
- if (zm >= rm) {
- zm = zm - rm;
+ if (zm64 >= rm64) {
+ zm64 = zm64 - rm64;
} else {
- zm = rm - zm;
+ zm64 = rm64 - zm64;
zs = rs;
}
- if (zm == 0)
+ if (zm64 == 0)
return ieee754sp_zero(ieee754_csr.rm == FPU_CSR_RD);
/*
- * Normalize in extended single precision
+ * Put explicit bit at bit 62 if necessary.
*/
- while ((zm >> (SP_MBITS + 3)) == 0) {
- zm <<= 1;
+ while ((zm64 >> 62) == 0) {
+ zm64 <<= 1;
ze--;
}
-
}
+
+ /*
+ * Move explicit bit from bit 62 to bit 26 since the
+ * ieee754sp_format code expects the mantissa to be
+ * 27 bits wide (24 + 3 rounding bits).
+ */
+ zm = XSPSRS64(zm64, (62 - 26));
+
return ieee754sp_format(zs, ze, zm);
}
+
+union ieee754sp ieee754sp_maddf(union ieee754sp z, union ieee754sp x,
+ union ieee754sp y)
+{
+ return _sp_maddf(z, x, y, 0);
+}
+
+union ieee754sp ieee754sp_msubf(union ieee754sp z, union ieee754sp x,
+ union ieee754sp y)
+{
+ return _sp_maddf(z, x, y, MADDF_NEGATE_PRODUCT);
+}
diff --git a/arch/mips/math-emu/sp_msubf.c b/arch/mips/math-emu/sp_msubf.c
deleted file mode 100644
index 81c38b980d69..000000000000
--- a/arch/mips/math-emu/sp_msubf.c
+++ /dev/null
@@ -1,258 +0,0 @@
-/*
- * IEEE754 floating point arithmetic
- * single precision: MSUB.f (Fused Multiply Subtract)
- * MSUBF.fmt: FPR[fd] = FPR[fd] - (FPR[fs] x FPR[ft])
- *
- * MIPS floating point support
- * Copyright (C) 2015 Imagination Technologies, Ltd.
- * Author: Markos Chandras <markos.chandras@imgtec.com>
- *
- * This program is free software; you can distribute it and/or modify it
- * under the terms of the GNU General Public License as published by the
- * Free Software Foundation; version 2 of the License.
- */
-
-#include "ieee754sp.h"
-
-union ieee754sp ieee754sp_msubf(union ieee754sp z, union ieee754sp x,
- union ieee754sp y)
-{
- int re;
- int rs;
- unsigned rm;
- unsigned short lxm;
- unsigned short hxm;
- unsigned short lym;
- unsigned short hym;
- unsigned lrm;
- unsigned hrm;
- unsigned t;
- unsigned at;
- int s;
-
- COMPXSP;
- COMPYSP;
- u32 zm; int ze; int zs __maybe_unused; int zc;
-
- EXPLODEXSP;
- EXPLODEYSP;
- EXPLODESP(z, zc, zs, ze, zm)
-
- FLUSHXSP;
- FLUSHYSP;
- FLUSHSP(z, zc, zs, ze, zm);
-
- ieee754_clearcx();
-
- switch (zc) {
- case IEEE754_CLASS_SNAN:
- ieee754_setcx(IEEE754_INVALID_OPERATION);
- return ieee754sp_nanxcpt(z);
- case IEEE754_CLASS_DNORM:
- SPDNORMx(zm, ze);
- /* QNAN is handled separately below */
- }
-
- switch (CLPAIR(xc, yc)) {
- case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_SNAN):
- case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_SNAN):
- case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_SNAN):
- case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_SNAN):
- case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_SNAN):
- return ieee754sp_nanxcpt(y);
-
- case CLPAIR(IEEE754_CLASS_SNAN, IEEE754_CLASS_SNAN):
- case CLPAIR(IEEE754_CLASS_SNAN, IEEE754_CLASS_QNAN):
- case CLPAIR(IEEE754_CLASS_SNAN, IEEE754_CLASS_ZERO):
- case CLPAIR(IEEE754_CLASS_SNAN, IEEE754_CLASS_NORM):
- case CLPAIR(IEEE754_CLASS_SNAN, IEEE754_CLASS_DNORM):
- case CLPAIR(IEEE754_CLASS_SNAN, IEEE754_CLASS_INF):
- return ieee754sp_nanxcpt(x);
-
- case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_QNAN):
- case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_QNAN):
- case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_QNAN):
- case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_QNAN):
- return y;
-
- case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_QNAN):
- case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_ZERO):
- case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_NORM):
- case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_DNORM):
- case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_INF):
- return x;
-
- /*
- * Infinity handling
- */
- case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_ZERO):
- case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_INF):
- if (zc == IEEE754_CLASS_QNAN)
- return z;
- ieee754_setcx(IEEE754_INVALID_OPERATION);
- return ieee754sp_indef();
-
- case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_INF):
- case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_INF):
- case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_NORM):
- case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_DNORM):
- case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_INF):
- if (zc == IEEE754_CLASS_QNAN)
- return z;
- return ieee754sp_inf(xs ^ ys);
-
- case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_ZERO):
- case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_NORM):
- case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_DNORM):
- case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_ZERO):
- case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_ZERO):
- if (zc == IEEE754_CLASS_INF)
- return ieee754sp_inf(zs);
- /* Multiplication is 0 so just return z */
- return z;
-
- case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_DNORM):
- SPDNORMX;
-
- case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_DNORM):
- if (zc == IEEE754_CLASS_QNAN)
- return z;
- else if (zc == IEEE754_CLASS_INF)
- return ieee754sp_inf(zs);
- SPDNORMY;
- break;
-
- case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_NORM):
- if (zc == IEEE754_CLASS_QNAN)
- return z;
- else if (zc == IEEE754_CLASS_INF)
- return ieee754sp_inf(zs);
- SPDNORMX;
- break;
-
- case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_NORM):
- if (zc == IEEE754_CLASS_QNAN)
- return z;
- else if (zc == IEEE754_CLASS_INF)
- return ieee754sp_inf(zs);
- /* fall through to real compuation */
- }
-
- /* Finally get to do some computation */
-
- /*
- * Do the multiplication bit first
- *
- * rm = xm * ym, re = xe + ye basically
- *
- * At this point xm and ym should have been normalized.
- */
-
- /* rm = xm * ym, re = xe+ye basically */
- assert(xm & SP_HIDDEN_BIT);
- assert(ym & SP_HIDDEN_BIT);
-
- re = xe + ye;
- rs = xs ^ ys;
-
- /* shunt to top of word */
- xm <<= 32 - (SP_FBITS + 1);
- ym <<= 32 - (SP_FBITS + 1);
-
- /*
- * Multiply 32 bits xm, ym to give high 32 bits rm with stickness.
- */
- lxm = xm & 0xffff;
- hxm = xm >> 16;
- lym = ym & 0xffff;
- hym = ym >> 16;
-
- lrm = lxm * lym; /* 16 * 16 => 32 */
- hrm = hxm * hym; /* 16 * 16 => 32 */
-
- t = lxm * hym; /* 16 * 16 => 32 */
- at = lrm + (t << 16);
- hrm += at < lrm;
- lrm = at;
- hrm = hrm + (t >> 16);
-
- t = hxm * lym; /* 16 * 16 => 32 */
- at = lrm + (t << 16);
- hrm += at < lrm;
- lrm = at;
- hrm = hrm + (t >> 16);
-
- rm = hrm | (lrm != 0);
-
- /*
- * Sticky shift down to normal rounding precision.
- */
- if ((int) rm < 0) {
- rm = (rm >> (32 - (SP_FBITS + 1 + 3))) |
- ((rm << (SP_FBITS + 1 + 3)) != 0);
- re++;
- } else {
- rm = (rm >> (32 - (SP_FBITS + 1 + 3 + 1))) |
- ((rm << (SP_FBITS + 1 + 3 + 1)) != 0);
- }
- assert(rm & (SP_HIDDEN_BIT << 3));
-
- /* And now the subtraction */
-
- /* Flip sign of r and handle as add */
- rs ^= 1;
-
- assert(zm & SP_HIDDEN_BIT);
-
- /*
- * Provide guard,round and stick bit space.
- */
- zm <<= 3;
-
- if (ze > re) {
- /*
- * Have to shift y fraction right to align.
- */
- s = ze - re;
- SPXSRSYn(s);
- } else if (re > ze) {
- /*
- * Have to shift x fraction right to align.
- */
- s = re - ze;
- SPXSRSYn(s);
- }
- assert(ze == re);
- assert(ze <= SP_EMAX);
-
- if (zs == rs) {
- /*
- * Generate 28 bit result of adding two 27 bit numbers
- * leaving result in zm, zs and ze.
- */
- zm = zm + rm;
-
- if (zm >> (SP_FBITS + 1 + 3)) { /* carry out */
- SPXSRSX1(); /* shift preserving sticky */
- }
- } else {
- if (zm >= rm) {
- zm = zm - rm;
- } else {
- zm = rm - zm;
- zs = rs;
- }
- if (zm == 0)
- return ieee754sp_zero(ieee754_csr.rm == FPU_CSR_RD);
-
- /*
- * Normalize in extended single precision
- */
- while ((zm >> (SP_MBITS + 3)) == 0) {
- zm <<= 1;
- ze--;
- }
-
- }
- return ieee754sp_format(zs, ze, zm);
-}
diff --git a/arch/mips/math-emu/sp_mul.c b/arch/mips/math-emu/sp_mul.c
index d910c43a6f30..fde71e293ec4 100644
--- a/arch/mips/math-emu/sp_mul.c
+++ b/arch/mips/math-emu/sp_mul.c
@@ -25,15 +25,15 @@ union ieee754sp ieee754sp_mul(union ieee754sp x, union ieee754sp y)
{
int re;
int rs;
- unsigned rm;
+ unsigned int rm;
unsigned short lxm;
unsigned short hxm;
unsigned short lym;
unsigned short hym;
- unsigned lrm;
- unsigned hrm;
- unsigned t;
- unsigned at;
+ unsigned int lrm;
+ unsigned int hrm;
+ unsigned int t;
+ unsigned int at;
COMPXSP;
COMPYSP;
@@ -101,6 +101,7 @@ union ieee754sp ieee754sp_mul(union ieee754sp x, union ieee754sp y)
case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_DNORM):
SPDNORMX;
+ /* fall through */
case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_DNORM):
SPDNORMY;
diff --git a/arch/mips/math-emu/sp_rint.c b/arch/mips/math-emu/sp_rint.c
new file mode 100644
index 000000000000..70765b17e196
--- /dev/null
+++ b/arch/mips/math-emu/sp_rint.c
@@ -0,0 +1,90 @@
+/* IEEE754 floating point arithmetic
+ * single precision
+ */
+/*
+ * MIPS floating point support
+ * Copyright (C) 1994-2000 Algorithmics Ltd.
+ * Copyright (C) 2017 Imagination Technologies, Ltd.
+ * Author: Aleksandar Markovic <aleksandar.markovic@imgtec.com>
+ *
+ * This program is free software; you can distribute it and/or modify it
+ * under the terms of the GNU General Public License (Version 2) as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program.
+ */
+
+#include "ieee754sp.h"
+
+union ieee754sp ieee754sp_rint(union ieee754sp x)
+{
+ union ieee754sp ret;
+ u32 residue;
+ int sticky;
+ int round;
+ int odd;
+
+ COMPXDP; /* <-- DP needed for 64-bit mantissa tmp */
+
+ ieee754_clearcx();
+
+ EXPLODEXSP;
+ FLUSHXSP;
+
+ if (xc == IEEE754_CLASS_SNAN)
+ return ieee754sp_nanxcpt(x);
+
+ if ((xc == IEEE754_CLASS_QNAN) ||
+ (xc == IEEE754_CLASS_INF) ||
+ (xc == IEEE754_CLASS_ZERO))
+ return x;
+
+ if (xe >= SP_FBITS)
+ return x;
+
+ if (xe < -1) {
+ residue = xm;
+ round = 0;
+ sticky = residue != 0;
+ xm = 0;
+ } else {
+ residue = xm << (xe + 1);
+ residue <<= 31 - SP_FBITS;
+ round = (residue >> 31) != 0;
+ sticky = (residue << 1) != 0;
+ xm >>= SP_FBITS - xe;
+ }
+
+ odd = (xm & 0x1) != 0x0;
+
+ switch (ieee754_csr.rm) {
+ case FPU_CSR_RN: /* toward nearest */
+ if (round && (sticky || odd))
+ xm++;
+ break;
+ case FPU_CSR_RZ: /* toward zero */
+ break;
+ case FPU_CSR_RU: /* toward +infinity */
+ if ((round || sticky) && !xs)
+ xm++;
+ break;
+ case FPU_CSR_RD: /* toward -infinity */
+ if ((round || sticky) && xs)
+ xm++;
+ break;
+ }
+
+ if (round || sticky)
+ ieee754_setcx(IEEE754_INEXACT);
+
+ ret = ieee754sp_flong(xm);
+ SPSIGN(ret) = xs;
+
+ return ret;
+}
diff --git a/arch/mips/math-emu/sp_simple.c b/arch/mips/math-emu/sp_simple.c
index c50e9451f2d2..756c9cf2dfd2 100644
--- a/arch/mips/math-emu/sp_simple.c
+++ b/arch/mips/math-emu/sp_simple.c
@@ -23,27 +23,39 @@
union ieee754sp ieee754sp_neg(union ieee754sp x)
{
- unsigned int oldrm;
union ieee754sp y;
- oldrm = ieee754_csr.rm;
- ieee754_csr.rm = FPU_CSR_RD;
- y = ieee754sp_sub(ieee754sp_zero(0), x);
- ieee754_csr.rm = oldrm;
+ if (ieee754_csr.abs2008) {
+ y = x;
+ SPSIGN(y) = !SPSIGN(x);
+ } else {
+ unsigned int oldrm;
+
+ oldrm = ieee754_csr.rm;
+ ieee754_csr.rm = FPU_CSR_RD;
+ y = ieee754sp_sub(ieee754sp_zero(0), x);
+ ieee754_csr.rm = oldrm;
+ }
return y;
}
union ieee754sp ieee754sp_abs(union ieee754sp x)
{
- unsigned int oldrm;
union ieee754sp y;
- oldrm = ieee754_csr.rm;
- ieee754_csr.rm = FPU_CSR_RD;
- if (SPSIGN(x))
- y = ieee754sp_sub(ieee754sp_zero(0), x);
- else
- y = ieee754sp_add(ieee754sp_zero(0), x);
- ieee754_csr.rm = oldrm;
+ if (ieee754_csr.abs2008) {
+ y = x;
+ SPSIGN(y) = 0;
+ } else {
+ unsigned int oldrm;
+
+ oldrm = ieee754_csr.rm;
+ ieee754_csr.rm = FPU_CSR_RD;
+ if (SPSIGN(x))
+ y = ieee754sp_sub(ieee754sp_zero(0), x);
+ else
+ y = ieee754sp_add(ieee754sp_zero(0), x);
+ ieee754_csr.rm = oldrm;
+ }
return y;
}
diff --git a/arch/mips/math-emu/sp_sqrt.c b/arch/mips/math-emu/sp_sqrt.c
index 67059c33a250..9cc83f012342 100644
--- a/arch/mips/math-emu/sp_sqrt.c
+++ b/arch/mips/math-emu/sp_sqrt.c
@@ -82,7 +82,8 @@ union ieee754sp ieee754sp_sqrt(union ieee754sp x)
/* generate sqrt(x) bit by bit */
ix += ix;
- q = s = 0; /* q = sqrt(x) */
+ s = 0;
+ q = 0; /* q = sqrt(x) */
r = 0x01000000; /* r = moving bit from right to left */
while (r != 0) {
diff --git a/arch/mips/math-emu/sp_sub.c b/arch/mips/math-emu/sp_sub.c
index ec5f937a8b3e..9f2ff72c3d6b 100644
--- a/arch/mips/math-emu/sp_sub.c
+++ b/arch/mips/math-emu/sp_sub.c
@@ -106,6 +106,7 @@ union ieee754sp ieee754sp_sub(union ieee754sp x, union ieee754sp y)
case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_DNORM):
SPDNORMX;
+ /* fall through */
case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_DNORM):
SPDNORMY;
@@ -134,13 +135,15 @@ union ieee754sp ieee754sp_sub(union ieee754sp x, union ieee754sp y)
* have to shift y fraction right to align
*/
s = xe - ye;
- SPXSRSYn(s);
+ ym = XSPSRS(ym, s);
+ ye += s;
} else if (ye > xe) {
/*
* have to shift x fraction right to align
*/
s = ye - xe;
- SPXSRSXn(s);
+ xm = XSPSRS(xm, s);
+ xe += s;
}
assert(xe == ye);
assert(xe <= SP_EMAX);
diff --git a/arch/mips/math-emu/sp_tint.c b/arch/mips/math-emu/sp_tint.c
index 091299a31798..f4b4cabfe2e1 100644
--- a/arch/mips/math-emu/sp_tint.c
+++ b/arch/mips/math-emu/sp_tint.c
@@ -38,10 +38,13 @@ int ieee754sp_tint(union ieee754sp x)
switch (xc) {
case IEEE754_CLASS_SNAN:
case IEEE754_CLASS_QNAN:
- case IEEE754_CLASS_INF:
ieee754_setcx(IEEE754_INVALID_OPERATION);
return ieee754si_indef();
+ case IEEE754_CLASS_INF:
+ ieee754_setcx(IEEE754_INVALID_OPERATION);
+ return ieee754si_overflow(xs);
+
case IEEE754_CLASS_ZERO:
return 0;
@@ -56,7 +59,7 @@ int ieee754sp_tint(union ieee754sp x)
/* Set invalid. We will only use overflow for floating
point overflow */
ieee754_setcx(IEEE754_INVALID_OPERATION);
- return ieee754si_indef();
+ return ieee754si_overflow(xs);
}
/* oh gawd */
if (xe > SP_FBITS) {
@@ -97,7 +100,7 @@ int ieee754sp_tint(union ieee754sp x)
if ((xm >> 31) != 0) {
/* This can happen after rounding */
ieee754_setcx(IEEE754_INVALID_OPERATION);
- return ieee754si_indef();
+ return ieee754si_overflow(xs);
}
if (round || sticky)
ieee754_setcx(IEEE754_INEXACT);
diff --git a/arch/mips/math-emu/sp_tlong.c b/arch/mips/math-emu/sp_tlong.c
index 9f3c742c1cea..bca5ac995801 100644
--- a/arch/mips/math-emu/sp_tlong.c
+++ b/arch/mips/math-emu/sp_tlong.c
@@ -20,7 +20,6 @@
*/
#include "ieee754sp.h"
-#include "ieee754dp.h"
s64 ieee754sp_tlong(union ieee754sp x)
{
@@ -39,10 +38,13 @@ s64 ieee754sp_tlong(union ieee754sp x)
switch (xc) {
case IEEE754_CLASS_SNAN:
case IEEE754_CLASS_QNAN:
- case IEEE754_CLASS_INF:
ieee754_setcx(IEEE754_INVALID_OPERATION);
return ieee754di_indef();
+ case IEEE754_CLASS_INF:
+ ieee754_setcx(IEEE754_INVALID_OPERATION);
+ return ieee754di_overflow(xs);
+
case IEEE754_CLASS_ZERO:
return 0;
@@ -57,7 +59,7 @@ s64 ieee754sp_tlong(union ieee754sp x)
/* Set invalid. We will only use overflow for floating
point overflow */
ieee754_setcx(IEEE754_INVALID_OPERATION);
- return ieee754di_indef();
+ return ieee754di_overflow(xs);
}
/* oh gawd */
if (xe > SP_FBITS) {
@@ -94,7 +96,7 @@ s64 ieee754sp_tlong(union ieee754sp x)
if ((xm >> 63) != 0) {
/* This can happen after rounding */
ieee754_setcx(IEEE754_INVALID_OPERATION);
- return ieee754di_indef();
+ return ieee754di_overflow(xs);
}
if (round || sticky)
ieee754_setcx(IEEE754_INEXACT);