diff options
Diffstat (limited to 'arch/mips/math-emu')
39 files changed, 1656 insertions, 1041 deletions
diff --git a/arch/mips/math-emu/Makefile b/arch/mips/math-emu/Makefile index a19641d3ac23..e9f10b88b695 100644 --- a/arch/mips/math-emu/Makefile +++ b/arch/mips/math-emu/Makefile @@ -4,9 +4,11 @@ obj-y += cp1emu.o ieee754dp.o ieee754sp.o ieee754.o \ dp_div.o dp_mul.o dp_sub.o dp_add.o dp_fsp.o dp_cmp.o dp_simple.o \ - dp_tint.o dp_fint.o dp_maddf.o dp_msubf.o dp_2008class.o dp_fmin.o dp_fmax.o \ + dp_tint.o dp_fint.o dp_rint.o dp_maddf.o dp_2008class.o dp_fmin.o \ + dp_fmax.o \ sp_div.o sp_mul.o sp_sub.o sp_add.o sp_fdp.o sp_cmp.o sp_simple.o \ - sp_tint.o sp_fint.o sp_maddf.o sp_msubf.o sp_2008class.o sp_fmin.o sp_fmax.o \ + sp_tint.o sp_fint.o sp_rint.o sp_maddf.o sp_2008class.o sp_fmin.o \ + sp_fmax.o \ dsemul.o lib-y += ieee754d.o \ diff --git a/arch/mips/math-emu/cp1emu.c b/arch/mips/math-emu/cp1emu.c index 89d05de8040a..ebb5e3bfe12a 100644 --- a/arch/mips/math-emu/cp1emu.c +++ b/arch/mips/math-emu/cp1emu.c @@ -434,12 +434,14 @@ static int microMIPS32_to_MIPS32(union mips_instruction *insn_ptr) * a single subroutine should be used across both * modules. */ -static int isBranchInstr(struct pt_regs *regs, struct mm_decoded_insn dec_insn, - unsigned long *contpc) +int isBranchInstr(struct pt_regs *regs, struct mm_decoded_insn dec_insn, + unsigned long *contpc) { union mips_instruction insn = (union mips_instruction)dec_insn.insn; unsigned int fcr31; unsigned int bit = 0; + unsigned int bit0; + union fpureg *fpr; switch (insn.i_format.opcode) { case spec_op: @@ -450,7 +452,7 @@ static int isBranchInstr(struct pt_regs *regs, struct mm_decoded_insn dec_insn, regs->cp0_epc + dec_insn.pc_inc + dec_insn.next_pc_inc; } - /* Fall through */ + /* fall through */ case jr_op: /* For R6, JR already emulated in jalr_op */ if (NO_R6EMU && insn.r_format.func == jr_op) @@ -470,10 +472,11 @@ static int isBranchInstr(struct pt_regs *regs, struct mm_decoded_insn dec_insn, regs->regs[31] = regs->cp0_epc + dec_insn.pc_inc + dec_insn.next_pc_inc; - /* Fall through */ + /* fall through */ case bltzl_op: if (NO_R6EMU) break; + /* fall through */ case bltz_op: if ((long)regs->regs[insn.i_format.rs] < 0) *contpc = regs->cp0_epc + @@ -493,10 +496,11 @@ static int isBranchInstr(struct pt_regs *regs, struct mm_decoded_insn dec_insn, regs->regs[31] = regs->cp0_epc + dec_insn.pc_inc + dec_insn.next_pc_inc; - /* Fall through */ + /* fall through */ case bgezl_op: if (NO_R6EMU) break; + /* fall through */ case bgez_op: if ((long)regs->regs[insn.i_format.rs] >= 0) *contpc = regs->cp0_epc + @@ -511,11 +515,12 @@ static int isBranchInstr(struct pt_regs *regs, struct mm_decoded_insn dec_insn, break; case jalx_op: set_isa16_mode(bit); + /* fall through */ case jal_op: regs->regs[31] = regs->cp0_epc + dec_insn.pc_inc + dec_insn.next_pc_inc; - /* Fall through */ + /* fall through */ case j_op: *contpc = regs->cp0_epc + dec_insn.pc_inc; *contpc >>= 28; @@ -527,6 +532,7 @@ static int isBranchInstr(struct pt_regs *regs, struct mm_decoded_insn dec_insn, case beql_op: if (NO_R6EMU) break; + /* fall through */ case beq_op: if (regs->regs[insn.i_format.rs] == regs->regs[insn.i_format.rt]) @@ -541,6 +547,7 @@ static int isBranchInstr(struct pt_regs *regs, struct mm_decoded_insn dec_insn, case bnel_op: if (NO_R6EMU) break; + /* fall through */ case bne_op: if (regs->regs[insn.i_format.rs] != regs->regs[insn.i_format.rt]) @@ -555,6 +562,7 @@ static int isBranchInstr(struct pt_regs *regs, struct mm_decoded_insn dec_insn, case blezl_op: if (!insn.i_format.rt && NO_R6EMU) break; + /* fall through */ case blez_op: /* @@ -592,6 +600,7 @@ static int isBranchInstr(struct pt_regs *regs, struct mm_decoded_insn dec_insn, case bgtzl_op: if (!insn.i_format.rt && NO_R6EMU) break; + /* fall through */ case bgtz_op: /* * Compact branches for R6 for the @@ -627,8 +636,8 @@ static int isBranchInstr(struct pt_regs *regs, struct mm_decoded_insn dec_insn, dec_insn.pc_inc + dec_insn.next_pc_inc; return 1; - case cbcond0_op: - case cbcond1_op: + case pop10_op: + case pop30_op: if (!cpu_has_mips_r6) break; if (insn.i_format.rt && !insn.i_format.rs) @@ -683,14 +692,14 @@ static int isBranchInstr(struct pt_regs *regs, struct mm_decoded_insn dec_insn, dec_insn.next_pc_inc; return 1; - case beqzcjic_op: + case pop66_op: if (!cpu_has_mips_r6) break; *contpc = regs->cp0_epc + dec_insn.pc_inc + dec_insn.next_pc_inc; return 1; - case bnezcjialc_op: + case pop76_op: if (!cpu_has_mips_r6) break; if (!insn.i_format.rs) @@ -707,14 +716,14 @@ static int isBranchInstr(struct pt_regs *regs, struct mm_decoded_insn dec_insn, ((insn.i_format.rs == bc1eqz_op) || (insn.i_format.rs == bc1nez_op))) { bit = 0; + fpr = ¤t->thread.fpu.fpr[insn.i_format.rt]; + bit0 = get_fpr32(fpr, 0) & 0x1; switch (insn.i_format.rs) { case bc1eqz_op: - if (get_fpr32(¤t->thread.fpu.fpr[insn.i_format.rt], 0) & 0x1) - bit = 1; + bit = bit0 == 0; break; case bc1nez_op: - if (!(get_fpr32(¤t->thread.fpu.fpr[insn.i_format.rt], 0) & 0x1)) - bit = 1; + bit = bit0 != 0; break; } if (bit) @@ -728,7 +737,8 @@ static int isBranchInstr(struct pt_regs *regs, struct mm_decoded_insn dec_insn, return 1; } - /* R2/R6 compatible cop1 instruction. Fall through */ + /* R2/R6 compatible cop1 instruction */ + /* fall through */ case cop2_op: case cop1x_op: if (insn.i_format.rs == bc_op) { @@ -809,7 +819,7 @@ do { \ #define SITOREG(si, x) \ do { \ if (cop1_64bit(xcp) && !hybrid_fprs()) { \ - unsigned i; \ + unsigned int i; \ set_fpr32(&ctx->fpr[x], 0, si); \ for (i = 1; i < ARRAY_SIZE(ctx->fpr[x].val32); i++) \ set_fpr32(&ctx->fpr[x], i, 0); \ @@ -822,19 +832,19 @@ do { \ #define SITOHREG(si, x) \ do { \ - unsigned i; \ + unsigned int i; \ set_fpr32(&ctx->fpr[x], 1, si); \ for (i = 2; i < ARRAY_SIZE(ctx->fpr[x].val32); i++) \ set_fpr32(&ctx->fpr[x], i, 0); \ } while (0) #define DIFROMREG(di, x) \ - ((di) = get_fpr64(&ctx->fpr[(x) & ~(cop1_64bit(xcp) == 0)], 0)) + ((di) = get_fpr64(&ctx->fpr[(x) & ~(cop1_64bit(xcp) ^ 1)], 0)) #define DITOREG(di, x) \ do { \ - unsigned fpr, i; \ - fpr = (x) & ~(cop1_64bit(xcp) == 0); \ + unsigned int fpr, i; \ + fpr = (x) & ~(cop1_64bit(xcp) ^ 1); \ set_fpr64(&ctx->fpr[fpr], 0, di); \ for (i = 1; i < ARRAY_SIZE(ctx->fpr[x].val64); i++) \ set_fpr64(&ctx->fpr[fpr], i, 0); \ @@ -975,9 +985,10 @@ static int cop1Emulate(struct pt_regs *xcp, struct mips_fpu_struct *ctx, struct mm_decoded_insn dec_insn, void *__user *fault_addr) { unsigned long contpc = xcp->cp0_epc + dec_insn.pc_inc; - unsigned int cond, cbit; + unsigned int cond, cbit, bit0; mips_instruction ir; int likely, pc_inc; + union fpureg *fpr; u32 __user *wva; u64 __user *dva; u32 wval; @@ -1188,15 +1199,18 @@ emul: if (!cpu_has_mips_r6 || delay_slot(xcp)) return SIGILL; - cond = likely = 0; + likely = 0; + cond = 0; + fpr = ¤t->thread.fpu.fpr[MIPSInst_RT(ir)]; + bit0 = get_fpr32(fpr, 0) & 0x1; switch (MIPSInst_RS(ir)) { case bc1eqz_op: - if (get_fpr32(¤t->thread.fpu.fpr[MIPSInst_RT(ir)], 0) & 0x1) - cond = 1; + MIPS_FPU_EMU_INC_STATS(bc1eqz); + cond = bit0 == 0; break; case bc1nez_op: - if (!(get_fpr32(¤t->thread.fpu.fpr[MIPSInst_RT(ir)], 0) & 0x1)) - cond = 1; + MIPS_FPU_EMU_INC_STATS(bc1nez); + cond = bit0 != 0; break; } goto branch_common; @@ -1216,18 +1230,19 @@ emul: case bcfl_op: if (cpu_has_mips_2_3_4_5_r) likely = 1; - /* Fall through */ + /* fall through */ case bcf_op: cond = !cond; break; case bctl_op: if (cpu_has_mips_2_3_4_5_r) likely = 1; - /* Fall through */ + /* fall through */ case bct_op: break; } branch_common: + MIPS_FPU_EMU_INC_STATS(branches); set_delay_slot(xcp); if (cond) { /* @@ -1267,7 +1282,9 @@ branch_common: * instruction in the dslot. */ sig = mips_dsemul(xcp, ir, - contpc); + bcpc, contpc); + if (sig < 0) + break; if (sig) xcp->cp0_epc = bcpc; /* @@ -1320,7 +1337,9 @@ branch_common: * Single step the non-cp1 * instruction in the dslot */ - sig = mips_dsemul(xcp, ir, contpc); + sig = mips_dsemul(xcp, ir, bcpc, contpc); + if (sig < 0) + break; if (sig) xcp->cp0_epc = bcpc; /* SIGILL forces out of the emulation loop. */ @@ -1344,7 +1363,8 @@ branch_common: return SIGILL; /* a real fpu computation instruction */ - if ((sig = fpu_emu(xcp, ctx, ir))) + sig = fpu_emu(xcp, ctx, ir); + if (sig) return sig; } break; @@ -1457,7 +1477,7 @@ DEF3OP(nmsub, dp, ieee754dp_mul, ieee754dp_sub, ieee754dp_neg); static int fpux_emu(struct pt_regs *xcp, struct mips_fpu_struct *ctx, mips_instruction ir, void *__user *fault_addr) { - unsigned rcsr = 0; /* resulting csr */ + unsigned int rcsr = 0; /* resulting csr */ MIPS_FPU_EMU_INC_STATS(cp1xops); @@ -1653,10 +1673,10 @@ static int fpu_emu(struct pt_regs *xcp, struct mips_fpu_struct *ctx, mips_instruction ir) { int rfmt; /* resulting format */ - unsigned rcsr = 0; /* resulting csr */ + unsigned int rcsr = 0; /* resulting csr */ unsigned int oldrm; unsigned int cbit; - unsigned cond; + unsigned int cond; union { union ieee754dp d; union ieee754sp s; @@ -1672,20 +1692,24 @@ static int fpu_emu(struct pt_regs *xcp, struct mips_fpu_struct *ctx, union ieee754sp(*b) (union ieee754sp, union ieee754sp); union ieee754sp(*u) (union ieee754sp); } handler; - union ieee754sp fs, ft; + union ieee754sp fd, fs, ft; switch (MIPSInst_FUNC(ir)) { /* binary ops */ case fadd_op: + MIPS_FPU_EMU_INC_STATS(add_s); handler.b = ieee754sp_add; goto scopbop; case fsub_op: + MIPS_FPU_EMU_INC_STATS(sub_s); handler.b = ieee754sp_sub; goto scopbop; case fmul_op: + MIPS_FPU_EMU_INC_STATS(mul_s); handler.b = ieee754sp_mul; goto scopbop; case fdiv_op: + MIPS_FPU_EMU_INC_STATS(div_s); handler.b = ieee754sp_div; goto scopbop; @@ -1694,6 +1718,7 @@ static int fpu_emu(struct pt_regs *xcp, struct mips_fpu_struct *ctx, if (!cpu_has_mips_2_3_4_5_r) return SIGILL; + MIPS_FPU_EMU_INC_STATS(sqrt_s); handler.u = ieee754sp_sqrt; goto scopuop; @@ -1706,6 +1731,7 @@ static int fpu_emu(struct pt_regs *xcp, struct mips_fpu_struct *ctx, if (!cpu_has_mips_4_5_64_r2_r6) return SIGILL; + MIPS_FPU_EMU_INC_STATS(rsqrt_s); handler.u = fpemu_sp_rsqrt; goto scopuop; @@ -1713,6 +1739,7 @@ static int fpu_emu(struct pt_regs *xcp, struct mips_fpu_struct *ctx, if (!cpu_has_mips_4_5_64_r2_r6) return SIGILL; + MIPS_FPU_EMU_INC_STATS(recip_s); handler.u = fpemu_sp_recip; goto scopuop; @@ -1749,6 +1776,7 @@ static int fpu_emu(struct pt_regs *xcp, struct mips_fpu_struct *ctx, if (!cpu_has_mips_r6) return SIGILL; + MIPS_FPU_EMU_INC_STATS(seleqz_s); SPFROMREG(rv.s, MIPSInst_FT(ir)); if (rv.w & 0x1) rv.w = 0; @@ -1760,6 +1788,7 @@ static int fpu_emu(struct pt_regs *xcp, struct mips_fpu_struct *ctx, if (!cpu_has_mips_r6) return SIGILL; + MIPS_FPU_EMU_INC_STATS(selnez_s); SPFROMREG(rv.s, MIPSInst_FT(ir)); if (rv.w & 0x1) SPFROMREG(rv.s, MIPSInst_FS(ir)); @@ -1773,6 +1802,7 @@ static int fpu_emu(struct pt_regs *xcp, struct mips_fpu_struct *ctx, if (!cpu_has_mips_r6) return SIGILL; + MIPS_FPU_EMU_INC_STATS(maddf_s); SPFROMREG(ft, MIPSInst_FT(ir)); SPFROMREG(fs, MIPSInst_FS(ir)); SPFROMREG(fd, MIPSInst_FD(ir)); @@ -1786,6 +1816,7 @@ static int fpu_emu(struct pt_regs *xcp, struct mips_fpu_struct *ctx, if (!cpu_has_mips_r6) return SIGILL; + MIPS_FPU_EMU_INC_STATS(msubf_s); SPFROMREG(ft, MIPSInst_FT(ir)); SPFROMREG(fs, MIPSInst_FS(ir)); SPFROMREG(fd, MIPSInst_FD(ir)); @@ -1799,9 +1830,9 @@ static int fpu_emu(struct pt_regs *xcp, struct mips_fpu_struct *ctx, if (!cpu_has_mips_r6) return SIGILL; + MIPS_FPU_EMU_INC_STATS(rint_s); SPFROMREG(fs, MIPSInst_FS(ir)); - rv.l = ieee754sp_tlong(fs); - rv.s = ieee754sp_flong(rv.l); + rv.s = ieee754sp_rint(fs); goto copcsr; } @@ -1811,6 +1842,7 @@ static int fpu_emu(struct pt_regs *xcp, struct mips_fpu_struct *ctx, if (!cpu_has_mips_r6) return SIGILL; + MIPS_FPU_EMU_INC_STATS(class_s); SPFROMREG(fs, MIPSInst_FS(ir)); rv.w = ieee754sp_2008class(fs); rfmt = w_fmt; @@ -1823,6 +1855,7 @@ static int fpu_emu(struct pt_regs *xcp, struct mips_fpu_struct *ctx, if (!cpu_has_mips_r6) return SIGILL; + MIPS_FPU_EMU_INC_STATS(min_s); SPFROMREG(ft, MIPSInst_FT(ir)); SPFROMREG(fs, MIPSInst_FS(ir)); rv.s = ieee754sp_fmin(fs, ft); @@ -1835,6 +1868,7 @@ static int fpu_emu(struct pt_regs *xcp, struct mips_fpu_struct *ctx, if (!cpu_has_mips_r6) return SIGILL; + MIPS_FPU_EMU_INC_STATS(mina_s); SPFROMREG(ft, MIPSInst_FT(ir)); SPFROMREG(fs, MIPSInst_FS(ir)); rv.s = ieee754sp_fmina(fs, ft); @@ -1847,6 +1881,7 @@ static int fpu_emu(struct pt_regs *xcp, struct mips_fpu_struct *ctx, if (!cpu_has_mips_r6) return SIGILL; + MIPS_FPU_EMU_INC_STATS(max_s); SPFROMREG(ft, MIPSInst_FT(ir)); SPFROMREG(fs, MIPSInst_FS(ir)); rv.s = ieee754sp_fmax(fs, ft); @@ -1859,6 +1894,7 @@ static int fpu_emu(struct pt_regs *xcp, struct mips_fpu_struct *ctx, if (!cpu_has_mips_r6) return SIGILL; + MIPS_FPU_EMU_INC_STATS(maxa_s); SPFROMREG(ft, MIPSInst_FT(ir)); SPFROMREG(fs, MIPSInst_FS(ir)); rv.s = ieee754sp_fmaxa(fs, ft); @@ -1866,15 +1902,18 @@ static int fpu_emu(struct pt_regs *xcp, struct mips_fpu_struct *ctx, } case fabs_op: + MIPS_FPU_EMU_INC_STATS(abs_s); handler.u = ieee754sp_abs; goto scopuop; case fneg_op: + MIPS_FPU_EMU_INC_STATS(neg_s); handler.u = ieee754sp_neg; goto scopuop; case fmov_op: /* an easy one */ + MIPS_FPU_EMU_INC_STATS(mov_s); SPFROMREG(rv.s, MIPSInst_FS(ir)); goto copcsr; @@ -1917,12 +1956,14 @@ copcsr: return SIGILL; /* not defined */ case fcvtd_op: + MIPS_FPU_EMU_INC_STATS(cvt_d_s); SPFROMREG(fs, MIPSInst_FS(ir)); rv.d = ieee754dp_fsp(fs); rfmt = d_fmt; goto copcsr; case fcvtw_op: + MIPS_FPU_EMU_INC_STATS(cvt_w_s); SPFROMREG(fs, MIPSInst_FS(ir)); rv.w = ieee754sp_tint(fs); rfmt = w_fmt; @@ -1935,6 +1976,15 @@ copcsr: if (!cpu_has_mips_2_3_4_5_r) return SIGILL; + if (MIPSInst_FUNC(ir) == fceil_op) + MIPS_FPU_EMU_INC_STATS(ceil_w_s); + if (MIPSInst_FUNC(ir) == ffloor_op) + MIPS_FPU_EMU_INC_STATS(floor_w_s); + if (MIPSInst_FUNC(ir) == fround_op) + MIPS_FPU_EMU_INC_STATS(round_w_s); + if (MIPSInst_FUNC(ir) == ftrunc_op) + MIPS_FPU_EMU_INC_STATS(trunc_w_s); + oldrm = ieee754_csr.rm; SPFROMREG(fs, MIPSInst_FS(ir)); ieee754_csr.rm = MIPSInst_FUNC(ir); @@ -1943,10 +1993,23 @@ copcsr: rfmt = w_fmt; goto copcsr; + case fsel_op: + if (!cpu_has_mips_r6) + return SIGILL; + + MIPS_FPU_EMU_INC_STATS(sel_s); + SPFROMREG(fd, MIPSInst_FD(ir)); + if (fd.bits & 0x1) + SPFROMREG(rv.s, MIPSInst_FT(ir)); + else + SPFROMREG(rv.s, MIPSInst_FS(ir)); + break; + case fcvtl_op: if (!cpu_has_mips_3_4_5_64_r2_r6) return SIGILL; + MIPS_FPU_EMU_INC_STATS(cvt_l_s); SPFROMREG(fs, MIPSInst_FS(ir)); rv.l = ieee754sp_tlong(fs); rfmt = l_fmt; @@ -1959,6 +2022,15 @@ copcsr: if (!cpu_has_mips_3_4_5_64_r2_r6) return SIGILL; + if (MIPSInst_FUNC(ir) == fceill_op) + MIPS_FPU_EMU_INC_STATS(ceil_l_s); + if (MIPSInst_FUNC(ir) == ffloorl_op) + MIPS_FPU_EMU_INC_STATS(floor_l_s); + if (MIPSInst_FUNC(ir) == froundl_op) + MIPS_FPU_EMU_INC_STATS(round_l_s); + if (MIPSInst_FUNC(ir) == ftruncl_op) + MIPS_FPU_EMU_INC_STATS(trunc_l_s); + oldrm = ieee754_csr.rm; SPFROMREG(fs, MIPSInst_FS(ir)); ieee754_csr.rm = MIPSInst_FUNC(ir); @@ -1969,9 +2041,10 @@ copcsr: default: if (!NO_R6EMU && MIPSInst_FUNC(ir) >= fcmp_op) { - unsigned cmpop = MIPSInst_FUNC(ir) - fcmp_op; + unsigned int cmpop; union ieee754sp fs, ft; + cmpop = MIPSInst_FUNC(ir) - fcmp_op; SPFROMREG(fs, MIPSInst_FS(ir)); SPFROMREG(ft, MIPSInst_FT(ir)); rv.w = ieee754sp_cmp(fs, ft, @@ -1991,7 +2064,7 @@ copcsr: } case d_fmt: { - union ieee754dp fs, ft; + union ieee754dp fd, fs, ft; union { union ieee754dp(*b) (union ieee754dp, union ieee754dp); union ieee754dp(*u) (union ieee754dp); @@ -2000,15 +2073,19 @@ copcsr: switch (MIPSInst_FUNC(ir)) { /* binary ops */ case fadd_op: + MIPS_FPU_EMU_INC_STATS(add_d); handler.b = ieee754dp_add; goto dcopbop; case fsub_op: + MIPS_FPU_EMU_INC_STATS(sub_d); handler.b = ieee754dp_sub; goto dcopbop; case fmul_op: + MIPS_FPU_EMU_INC_STATS(mul_d); handler.b = ieee754dp_mul; goto dcopbop; case fdiv_op: + MIPS_FPU_EMU_INC_STATS(div_d); handler.b = ieee754dp_div; goto dcopbop; @@ -2017,6 +2094,7 @@ copcsr: if (!cpu_has_mips_2_3_4_5_r) return SIGILL; + MIPS_FPU_EMU_INC_STATS(sqrt_d); handler.u = ieee754dp_sqrt; goto dcopuop; /* @@ -2028,12 +2106,14 @@ copcsr: if (!cpu_has_mips_4_5_64_r2_r6) return SIGILL; + MIPS_FPU_EMU_INC_STATS(rsqrt_d); handler.u = fpemu_dp_rsqrt; goto dcopuop; case frecip_op: if (!cpu_has_mips_4_5_64_r2_r6) return SIGILL; + MIPS_FPU_EMU_INC_STATS(recip_d); handler.u = fpemu_dp_recip; goto dcopuop; case fmovc_op: @@ -2067,6 +2147,7 @@ copcsr: if (!cpu_has_mips_r6) return SIGILL; + MIPS_FPU_EMU_INC_STATS(seleqz_d); DPFROMREG(rv.d, MIPSInst_FT(ir)); if (rv.l & 0x1) rv.l = 0; @@ -2078,6 +2159,7 @@ copcsr: if (!cpu_has_mips_r6) return SIGILL; + MIPS_FPU_EMU_INC_STATS(selnez_d); DPFROMREG(rv.d, MIPSInst_FT(ir)); if (rv.l & 0x1) DPFROMREG(rv.d, MIPSInst_FS(ir)); @@ -2091,6 +2173,7 @@ copcsr: if (!cpu_has_mips_r6) return SIGILL; + MIPS_FPU_EMU_INC_STATS(maddf_d); DPFROMREG(ft, MIPSInst_FT(ir)); DPFROMREG(fs, MIPSInst_FS(ir)); DPFROMREG(fd, MIPSInst_FD(ir)); @@ -2104,6 +2187,7 @@ copcsr: if (!cpu_has_mips_r6) return SIGILL; + MIPS_FPU_EMU_INC_STATS(msubf_d); DPFROMREG(ft, MIPSInst_FT(ir)); DPFROMREG(fs, MIPSInst_FS(ir)); DPFROMREG(fd, MIPSInst_FD(ir)); @@ -2117,9 +2201,9 @@ copcsr: if (!cpu_has_mips_r6) return SIGILL; + MIPS_FPU_EMU_INC_STATS(rint_d); DPFROMREG(fs, MIPSInst_FS(ir)); - rv.l = ieee754dp_tlong(fs); - rv.d = ieee754dp_flong(rv.l); + rv.d = ieee754dp_rint(fs); goto copcsr; } @@ -2129,9 +2213,10 @@ copcsr: if (!cpu_has_mips_r6) return SIGILL; + MIPS_FPU_EMU_INC_STATS(class_d); DPFROMREG(fs, MIPSInst_FS(ir)); - rv.w = ieee754dp_2008class(fs); - rfmt = w_fmt; + rv.l = ieee754dp_2008class(fs); + rfmt = l_fmt; goto copcsr; } @@ -2141,6 +2226,7 @@ copcsr: if (!cpu_has_mips_r6) return SIGILL; + MIPS_FPU_EMU_INC_STATS(min_d); DPFROMREG(ft, MIPSInst_FT(ir)); DPFROMREG(fs, MIPSInst_FS(ir)); rv.d = ieee754dp_fmin(fs, ft); @@ -2153,6 +2239,7 @@ copcsr: if (!cpu_has_mips_r6) return SIGILL; + MIPS_FPU_EMU_INC_STATS(mina_d); DPFROMREG(ft, MIPSInst_FT(ir)); DPFROMREG(fs, MIPSInst_FS(ir)); rv.d = ieee754dp_fmina(fs, ft); @@ -2165,6 +2252,7 @@ copcsr: if (!cpu_has_mips_r6) return SIGILL; + MIPS_FPU_EMU_INC_STATS(max_d); DPFROMREG(ft, MIPSInst_FT(ir)); DPFROMREG(fs, MIPSInst_FS(ir)); rv.d = ieee754dp_fmax(fs, ft); @@ -2177,6 +2265,7 @@ copcsr: if (!cpu_has_mips_r6) return SIGILL; + MIPS_FPU_EMU_INC_STATS(maxa_d); DPFROMREG(ft, MIPSInst_FT(ir)); DPFROMREG(fs, MIPSInst_FS(ir)); rv.d = ieee754dp_fmaxa(fs, ft); @@ -2184,15 +2273,18 @@ copcsr: } case fabs_op: + MIPS_FPU_EMU_INC_STATS(abs_d); handler.u = ieee754dp_abs; goto dcopuop; case fneg_op: + MIPS_FPU_EMU_INC_STATS(neg_d); handler.u = ieee754dp_neg; goto dcopuop; case fmov_op: /* an easy one */ + MIPS_FPU_EMU_INC_STATS(mov_d); DPFROMREG(rv.d, MIPSInst_FS(ir)); goto copcsr; @@ -2212,6 +2304,7 @@ dcopuop: * unary conv ops */ case fcvts_op: + MIPS_FPU_EMU_INC_STATS(cvt_s_d); DPFROMREG(fs, MIPSInst_FS(ir)); rv.s = ieee754sp_fdp(fs); rfmt = s_fmt; @@ -2221,6 +2314,7 @@ dcopuop: return SIGILL; /* not defined */ case fcvtw_op: + MIPS_FPU_EMU_INC_STATS(cvt_w_d); DPFROMREG(fs, MIPSInst_FS(ir)); rv.w = ieee754dp_tint(fs); /* wrong */ rfmt = w_fmt; @@ -2233,6 +2327,15 @@ dcopuop: if (!cpu_has_mips_2_3_4_5_r) return SIGILL; + if (MIPSInst_FUNC(ir) == fceil_op) + MIPS_FPU_EMU_INC_STATS(ceil_w_d); + if (MIPSInst_FUNC(ir) == ffloor_op) + MIPS_FPU_EMU_INC_STATS(floor_w_d); + if (MIPSInst_FUNC(ir) == fround_op) + MIPS_FPU_EMU_INC_STATS(round_w_d); + if (MIPSInst_FUNC(ir) == ftrunc_op) + MIPS_FPU_EMU_INC_STATS(trunc_w_d); + oldrm = ieee754_csr.rm; DPFROMREG(fs, MIPSInst_FS(ir)); ieee754_csr.rm = MIPSInst_FUNC(ir); @@ -2241,10 +2344,23 @@ dcopuop: rfmt = w_fmt; goto copcsr; + case fsel_op: + if (!cpu_has_mips_r6) + return SIGILL; + + MIPS_FPU_EMU_INC_STATS(sel_d); + DPFROMREG(fd, MIPSInst_FD(ir)); + if (fd.bits & 0x1) + DPFROMREG(rv.d, MIPSInst_FT(ir)); + else + DPFROMREG(rv.d, MIPSInst_FS(ir)); + break; + case fcvtl_op: if (!cpu_has_mips_3_4_5_64_r2_r6) return SIGILL; + MIPS_FPU_EMU_INC_STATS(cvt_l_d); DPFROMREG(fs, MIPSInst_FS(ir)); rv.l = ieee754dp_tlong(fs); rfmt = l_fmt; @@ -2257,6 +2373,15 @@ dcopuop: if (!cpu_has_mips_3_4_5_64_r2_r6) return SIGILL; + if (MIPSInst_FUNC(ir) == fceill_op) + MIPS_FPU_EMU_INC_STATS(ceil_l_d); + if (MIPSInst_FUNC(ir) == ffloorl_op) + MIPS_FPU_EMU_INC_STATS(floor_l_d); + if (MIPSInst_FUNC(ir) == froundl_op) + MIPS_FPU_EMU_INC_STATS(round_l_d); + if (MIPSInst_FUNC(ir) == ftruncl_op) + MIPS_FPU_EMU_INC_STATS(trunc_l_d); + oldrm = ieee754_csr.rm; DPFROMREG(fs, MIPSInst_FS(ir)); ieee754_csr.rm = MIPSInst_FUNC(ir); @@ -2267,9 +2392,10 @@ dcopuop: default: if (!NO_R6EMU && MIPSInst_FUNC(ir) >= fcmp_op) { - unsigned cmpop = MIPSInst_FUNC(ir) - fcmp_op; + unsigned int cmpop; union ieee754dp fs, ft; + cmpop = MIPSInst_FUNC(ir) - fcmp_op; DPFROMREG(fs, MIPSInst_FS(ir)); DPFROMREG(ft, MIPSInst_FT(ir)); rv.w = ieee754dp_cmp(fs, ft, @@ -2298,12 +2424,14 @@ dcopuop: switch (MIPSInst_FUNC(ir)) { case fcvts_op: /* convert word to single precision real */ + MIPS_FPU_EMU_INC_STATS(cvt_s_w); SPFROMREG(fs, MIPSInst_FS(ir)); rv.s = ieee754sp_fint(fs.bits); rfmt = s_fmt; goto copcsr; case fcvtd_op: /* convert word to double precision real */ + MIPS_FPU_EMU_INC_STATS(cvt_d_w); SPFROMREG(fs, MIPSInst_FS(ir)); rv.d = ieee754dp_fint(fs.bits); rfmt = d_fmt; @@ -2323,6 +2451,90 @@ dcopuop: (MIPSInst_FUNC(ir) & 0x20)) return SIGILL; + if (!sig) { + if (!(MIPSInst_FUNC(ir) & PREDICATE_BIT)) { + switch (cmpop) { + case 0: + MIPS_FPU_EMU_INC_STATS(cmp_af_s); + break; + case 1: + MIPS_FPU_EMU_INC_STATS(cmp_un_s); + break; + case 2: + MIPS_FPU_EMU_INC_STATS(cmp_eq_s); + break; + case 3: + MIPS_FPU_EMU_INC_STATS(cmp_ueq_s); + break; + case 4: + MIPS_FPU_EMU_INC_STATS(cmp_lt_s); + break; + case 5: + MIPS_FPU_EMU_INC_STATS(cmp_ult_s); + break; + case 6: + MIPS_FPU_EMU_INC_STATS(cmp_le_s); + break; + case 7: + MIPS_FPU_EMU_INC_STATS(cmp_ule_s); + break; + } + } else { + switch (cmpop) { + case 1: + MIPS_FPU_EMU_INC_STATS(cmp_or_s); + break; + case 2: + MIPS_FPU_EMU_INC_STATS(cmp_une_s); + break; + case 3: + MIPS_FPU_EMU_INC_STATS(cmp_ne_s); + break; + } + } + } else { + if (!(MIPSInst_FUNC(ir) & PREDICATE_BIT)) { + switch (cmpop) { + case 0: + MIPS_FPU_EMU_INC_STATS(cmp_saf_s); + break; + case 1: + MIPS_FPU_EMU_INC_STATS(cmp_sun_s); + break; + case 2: + MIPS_FPU_EMU_INC_STATS(cmp_seq_s); + break; + case 3: + MIPS_FPU_EMU_INC_STATS(cmp_sueq_s); + break; + case 4: + MIPS_FPU_EMU_INC_STATS(cmp_slt_s); + break; + case 5: + MIPS_FPU_EMU_INC_STATS(cmp_sult_s); + break; + case 6: + MIPS_FPU_EMU_INC_STATS(cmp_sle_s); + break; + case 7: + MIPS_FPU_EMU_INC_STATS(cmp_sule_s); + break; + } + } else { + switch (cmpop) { + case 1: + MIPS_FPU_EMU_INC_STATS(cmp_sor_s); + break; + case 2: + MIPS_FPU_EMU_INC_STATS(cmp_sune_s); + break; + case 3: + MIPS_FPU_EMU_INC_STATS(cmp_sne_s); + break; + } + } + } + /* fmt is w_fmt for single precision so fix it */ rfmt = s_fmt; /* default to false */ @@ -2366,6 +2578,7 @@ dcopuop: break; } } + break; } case l_fmt: @@ -2378,11 +2591,13 @@ dcopuop: switch (MIPSInst_FUNC(ir)) { case fcvts_op: /* convert long to single precision real */ + MIPS_FPU_EMU_INC_STATS(cvt_s_l); rv.s = ieee754sp_flong(bits); rfmt = s_fmt; goto copcsr; case fcvtd_op: /* convert long to double precision real */ + MIPS_FPU_EMU_INC_STATS(cvt_d_l); rv.d = ieee754dp_flong(bits); rfmt = d_fmt; goto copcsr; @@ -2396,6 +2611,90 @@ dcopuop: (MIPSInst_FUNC(ir) & 0x20)) return SIGILL; + if (!sig) { + if (!(MIPSInst_FUNC(ir) & PREDICATE_BIT)) { + switch (cmpop) { + case 0: + MIPS_FPU_EMU_INC_STATS(cmp_af_d); + break; + case 1: + MIPS_FPU_EMU_INC_STATS(cmp_un_d); + break; + case 2: + MIPS_FPU_EMU_INC_STATS(cmp_eq_d); + break; + case 3: + MIPS_FPU_EMU_INC_STATS(cmp_ueq_d); + break; + case 4: + MIPS_FPU_EMU_INC_STATS(cmp_lt_d); + break; + case 5: + MIPS_FPU_EMU_INC_STATS(cmp_ult_d); + break; + case 6: + MIPS_FPU_EMU_INC_STATS(cmp_le_d); + break; + case 7: + MIPS_FPU_EMU_INC_STATS(cmp_ule_d); + break; + } + } else { + switch (cmpop) { + case 1: + MIPS_FPU_EMU_INC_STATS(cmp_or_d); + break; + case 2: + MIPS_FPU_EMU_INC_STATS(cmp_une_d); + break; + case 3: + MIPS_FPU_EMU_INC_STATS(cmp_ne_d); + break; + } + } + } else { + if (!(MIPSInst_FUNC(ir) & PREDICATE_BIT)) { + switch (cmpop) { + case 0: + MIPS_FPU_EMU_INC_STATS(cmp_saf_d); + break; + case 1: + MIPS_FPU_EMU_INC_STATS(cmp_sun_d); + break; + case 2: + MIPS_FPU_EMU_INC_STATS(cmp_seq_d); + break; + case 3: + MIPS_FPU_EMU_INC_STATS(cmp_sueq_d); + break; + case 4: + MIPS_FPU_EMU_INC_STATS(cmp_slt_d); + break; + case 5: + MIPS_FPU_EMU_INC_STATS(cmp_sult_d); + break; + case 6: + MIPS_FPU_EMU_INC_STATS(cmp_sle_d); + break; + case 7: + MIPS_FPU_EMU_INC_STATS(cmp_sule_d); + break; + } + } else { + switch (cmpop) { + case 1: + MIPS_FPU_EMU_INC_STATS(cmp_sor_d); + break; + case 2: + MIPS_FPU_EMU_INC_STATS(cmp_sune_d); + break; + case 3: + MIPS_FPU_EMU_INC_STATS(cmp_sne_d); + break; + } + } + } + /* fmt is l_fmt for double precision so fix it */ rfmt = d_fmt; /* default to false */ @@ -2439,6 +2738,8 @@ dcopuop: break; } } + break; + default: return SIGILL; } diff --git a/arch/mips/math-emu/dp_add.c b/arch/mips/math-emu/dp_add.c index 8954ef031f84..678de20e4cb1 100644 --- a/arch/mips/math-emu/dp_add.c +++ b/arch/mips/math-emu/dp_add.c @@ -104,8 +104,7 @@ union ieee754dp ieee754dp_add(union ieee754dp x, union ieee754dp y) case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_DNORM): DPDNORMX; - - /* FALL THROUGH */ + /* fall through */ case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_DNORM): DPDNORMY; diff --git a/arch/mips/math-emu/dp_div.c b/arch/mips/math-emu/dp_div.c index f4746f7c5f63..3063ae3ab3b9 100644 --- a/arch/mips/math-emu/dp_div.c +++ b/arch/mips/math-emu/dp_div.c @@ -103,6 +103,7 @@ union ieee754dp ieee754dp_div(union ieee754dp x, union ieee754dp y) case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_DNORM): DPDNORMX; + /* fall through */ case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_DNORM): DPDNORMY; diff --git a/arch/mips/math-emu/dp_fmax.c b/arch/mips/math-emu/dp_fmax.c index 5bec64f2884e..d1f984b40344 100644 --- a/arch/mips/math-emu/dp_fmax.c +++ b/arch/mips/math-emu/dp_fmax.c @@ -96,6 +96,7 @@ union ieee754dp ieee754dp_fmax(union ieee754dp x, union ieee754dp y) case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_DNORM): DPDNORMX; + /* fall through */ case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_DNORM): DPDNORMY; @@ -224,6 +225,7 @@ union ieee754dp ieee754dp_fmaxa(union ieee754dp x, union ieee754dp y) case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_DNORM): DPDNORMX; + /* fall through */ case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_DNORM): DPDNORMY; diff --git a/arch/mips/math-emu/dp_fmin.c b/arch/mips/math-emu/dp_fmin.c index a287b23818d8..f98b96135c8d 100644 --- a/arch/mips/math-emu/dp_fmin.c +++ b/arch/mips/math-emu/dp_fmin.c @@ -96,6 +96,7 @@ union ieee754dp ieee754dp_fmin(union ieee754dp x, union ieee754dp y) case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_DNORM): DPDNORMX; + /* fall through */ case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_DNORM): DPDNORMY; @@ -224,6 +225,7 @@ union ieee754dp ieee754dp_fmina(union ieee754dp x, union ieee754dp y) case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_DNORM): DPDNORMX; + /* fall through */ case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_DNORM): DPDNORMY; diff --git a/arch/mips/math-emu/dp_maddf.c b/arch/mips/math-emu/dp_maddf.c index 119eda9fa1ea..7ea2f8222026 100644 --- a/arch/mips/math-emu/dp_maddf.c +++ b/arch/mips/math-emu/dp_maddf.c @@ -14,83 +14,91 @@ #include "ieee754dp.h" -union ieee754dp ieee754dp_maddf(union ieee754dp z, union ieee754dp x, - union ieee754dp y) + +/* 128 bits shift right logical with rounding. */ +static void srl128(u64 *hptr, u64 *lptr, int count) +{ + u64 low; + + if (count >= 128) { + *lptr = *hptr != 0 || *lptr != 0; + *hptr = 0; + } else if (count >= 64) { + if (count == 64) { + *lptr = *hptr | (*lptr != 0); + } else { + low = *lptr; + *lptr = *hptr >> (count - 64); + *lptr |= (*hptr << (128 - count)) != 0 || low != 0; + } + *hptr = 0; + } else { + low = *lptr; + *lptr = low >> count | *hptr << (64 - count); + *lptr |= (low << (64 - count)) != 0; + *hptr = *hptr >> count; + } +} + +static union ieee754dp _dp_maddf(union ieee754dp z, union ieee754dp x, + union ieee754dp y, enum maddf_flags flags) { int re; int rs; - u64 rm; - unsigned lxm; - unsigned hxm; - unsigned lym; - unsigned hym; + unsigned int lxm; + unsigned int hxm; + unsigned int lym; + unsigned int hym; u64 lrm; u64 hrm; + u64 lzm; + u64 hzm; u64 t; u64 at; int s; COMPXDP; COMPYDP; - - u64 zm; int ze; int zs __maybe_unused; int zc; + COMPZDP; EXPLODEXDP; EXPLODEYDP; - EXPLODEDP(z, zc, zs, ze, zm) + EXPLODEZDP; FLUSHXDP; FLUSHYDP; - FLUSHDP(z, zc, zs, ze, zm); + FLUSHZDP; ieee754_clearcx(); - switch (zc) { - case IEEE754_CLASS_SNAN: - ieee754_setcx(IEEE754_INVALID_OPERATION); + /* + * Handle the cases when at least one of x, y or z is a NaN. + * Order of precedence is sNaN, qNaN and z, x, y. + */ + if (zc == IEEE754_CLASS_SNAN) return ieee754dp_nanxcpt(z); - case IEEE754_CLASS_DNORM: - DPDNORMx(zm, ze); - /* QNAN is handled separately below */ - } - - switch (CLPAIR(xc, yc)) { - case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_SNAN): - case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_SNAN): - case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_SNAN): - case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_SNAN): - case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_SNAN): - return ieee754dp_nanxcpt(y); - - case CLPAIR(IEEE754_CLASS_SNAN, IEEE754_CLASS_SNAN): - case CLPAIR(IEEE754_CLASS_SNAN, IEEE754_CLASS_QNAN): - case CLPAIR(IEEE754_CLASS_SNAN, IEEE754_CLASS_ZERO): - case CLPAIR(IEEE754_CLASS_SNAN, IEEE754_CLASS_NORM): - case CLPAIR(IEEE754_CLASS_SNAN, IEEE754_CLASS_DNORM): - case CLPAIR(IEEE754_CLASS_SNAN, IEEE754_CLASS_INF): + if (xc == IEEE754_CLASS_SNAN) return ieee754dp_nanxcpt(x); - - case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_QNAN): - case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_QNAN): - case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_QNAN): - case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_QNAN): + if (yc == IEEE754_CLASS_SNAN) + return ieee754dp_nanxcpt(y); + if (zc == IEEE754_CLASS_QNAN) + return z; + if (xc == IEEE754_CLASS_QNAN) + return x; + if (yc == IEEE754_CLASS_QNAN) return y; - case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_QNAN): - case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_ZERO): - case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_NORM): - case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_DNORM): - case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_INF): - return x; + if (zc == IEEE754_CLASS_DNORM) + DPDNORMZ; + /* ZERO z cases are handled separately below */ + switch (CLPAIR(xc, yc)) { /* * Infinity handling */ case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_ZERO): case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_INF): - if (zc == IEEE754_CLASS_QNAN) - return z; ieee754_setcx(IEEE754_INVALID_OPERATION); return ieee754dp_indef(); @@ -99,9 +107,27 @@ union ieee754dp ieee754dp_maddf(union ieee754dp z, union ieee754dp x, case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_NORM): case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_DNORM): case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_INF): - if (zc == IEEE754_CLASS_QNAN) - return z; - return ieee754dp_inf(xs ^ ys); + if ((zc == IEEE754_CLASS_INF) && + ((!(flags & MADDF_NEGATE_PRODUCT) && (zs != (xs ^ ys))) || + ((flags & MADDF_NEGATE_PRODUCT) && (zs == (xs ^ ys))))) { + /* + * Cases of addition of infinities with opposite signs + * or subtraction of infinities with same signs. + */ + ieee754_setcx(IEEE754_INVALID_OPERATION); + return ieee754dp_indef(); + } + /* + * z is here either not an infinity, or an infinity having the + * same sign as product (x*y) (in case of MADDF.D instruction) + * or product -(x*y) (in MSUBF.D case). The result must be an + * infinity, and its sign is determined only by the value of + * (flags & MADDF_NEGATE_PRODUCT) and the signs of x and y. + */ + if (flags & MADDF_NEGATE_PRODUCT) + return ieee754dp_inf(1 ^ (xs ^ ys)); + else + return ieee754dp_inf(xs ^ ys); case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_ZERO): case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_NORM): @@ -110,34 +136,45 @@ union ieee754dp ieee754dp_maddf(union ieee754dp z, union ieee754dp x, case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_ZERO): if (zc == IEEE754_CLASS_INF) return ieee754dp_inf(zs); - /* Multiplication is 0 so just return z */ + if (zc == IEEE754_CLASS_ZERO) { + /* Handle cases +0 + (-0) and similar ones. */ + if ((!(flags & MADDF_NEGATE_PRODUCT) + && (zs == (xs ^ ys))) || + ((flags & MADDF_NEGATE_PRODUCT) + && (zs != (xs ^ ys)))) + /* + * Cases of addition of zeros of equal signs + * or subtraction of zeroes of opposite signs. + * The sign of the resulting zero is in any + * such case determined only by the sign of z. + */ + return z; + + return ieee754dp_zero(ieee754_csr.rm == FPU_CSR_RD); + } + /* x*y is here 0, and z is not 0, so just return z */ return z; case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_DNORM): DPDNORMX; + /* fall through */ case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_DNORM): - if (zc == IEEE754_CLASS_QNAN) - return z; - else if (zc == IEEE754_CLASS_INF) + if (zc == IEEE754_CLASS_INF) return ieee754dp_inf(zs); DPDNORMY; break; case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_NORM): - if (zc == IEEE754_CLASS_QNAN) - return z; - else if (zc == IEEE754_CLASS_INF) + if (zc == IEEE754_CLASS_INF) return ieee754dp_inf(zs); DPDNORMX; break; case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_NORM): - if (zc == IEEE754_CLASS_QNAN) - return z; - else if (zc == IEEE754_CLASS_INF) + if (zc == IEEE754_CLASS_INF) return ieee754dp_inf(zs); - /* fall through to real computations */ + /* continue to real computations */ } /* Finally get to do some computation */ @@ -154,18 +191,17 @@ union ieee754dp ieee754dp_maddf(union ieee754dp z, union ieee754dp x, re = xe + ye; rs = xs ^ ys; + if (flags & MADDF_NEGATE_PRODUCT) + rs ^= 1; /* shunt to top of word */ xm <<= 64 - (DP_FBITS + 1); ym <<= 64 - (DP_FBITS + 1); /* - * Multiply 32 bits xm, ym to give high 32 bits rm with stickness. + * Multiply 64 bits xm and ym to give 128 bits result in hrm:lrm. */ - /* 32 * 32 => 64 */ -#define DPXMULT(x, y) ((u64)(x) * (u64)y) - lxm = xm; hxm = xm >> 32; lym = ym; @@ -190,76 +226,120 @@ union ieee754dp ieee754dp_maddf(union ieee754dp z, union ieee754dp x, hrm = hrm + (t >> 32); - rm = hrm | (lrm != 0); - - /* - * Sticky shift down to normal rounding precision. - */ - if ((s64) rm < 0) { - rm = (rm >> (64 - (DP_FBITS + 1 + 3))) | - ((rm << (DP_FBITS + 1 + 3)) != 0); - re++; - } else { - rm = (rm >> (64 - (DP_FBITS + 1 + 3 + 1))) | - ((rm << (DP_FBITS + 1 + 3 + 1)) != 0); + /* Put explicit bit at bit 126 if necessary */ + if ((int64_t)hrm < 0) { + lrm = (hrm << 63) | (lrm >> 1); + hrm = hrm >> 1; + re++; } - assert(rm & (DP_HIDDEN_BIT << 3)); - /* And now the addition */ - assert(zm & DP_HIDDEN_BIT); + assert(hrm & (1 << 62)); - /* - * Provide guard,round and stick bit space. - */ - zm <<= 3; + if (zc == IEEE754_CLASS_ZERO) { + /* + * Move explicit bit from bit 126 to bit 55 since the + * ieee754dp_format code expects the mantissa to be + * 56 bits wide (53 + 3 rounding bits). + */ + srl128(&hrm, &lrm, (126 - 55)); + return ieee754dp_format(rs, re, lrm); + } + + /* Move explicit bit from bit 52 to bit 126 */ + lzm = 0; + hzm = zm << 10; + assert(hzm & (1 << 62)); + /* Make the exponents the same */ if (ze > re) { /* * Have to shift y fraction right to align. */ s = ze - re; - rm = XDPSRS(rm, s); + srl128(&hrm, &lrm, s); re += s; } else if (re > ze) { /* * Have to shift x fraction right to align. */ s = re - ze; - zm = XDPSRS(zm, s); + srl128(&hzm, &lzm, s); ze += s; } assert(ze == re); assert(ze <= DP_EMAX); + /* Do the addition */ if (zs == rs) { /* - * Generate 28 bit result of adding two 27 bit numbers - * leaving result in xm, xs and xe. + * Generate 128 bit result by adding two 127 bit numbers + * leaving result in hzm:lzm, zs and ze. */ - zm = zm + rm; - - if (zm >> (DP_FBITS + 1 + 3)) { /* carry out */ - zm = XDPSRS1(zm); + hzm = hzm + hrm + (lzm > (lzm + lrm)); + lzm = lzm + lrm; + if ((int64_t)hzm < 0) { /* carry out */ + srl128(&hzm, &lzm, 1); ze++; } } else { - if (zm >= rm) { - zm = zm - rm; + if (hzm > hrm || (hzm == hrm && lzm >= lrm)) { + hzm = hzm - hrm - (lzm < lrm); + lzm = lzm - lrm; } else { - zm = rm - zm; + hzm = hrm - hzm - (lrm < lzm); + lzm = lrm - lzm; zs = rs; } - if (zm == 0) + if (lzm == 0 && hzm == 0) return ieee754dp_zero(ieee754_csr.rm == FPU_CSR_RD); /* - * Normalize to rounding precision. + * Put explicit bit at bit 126 if necessary. */ - while ((zm >> (DP_FBITS + 3)) == 0) { - zm <<= 1; - ze--; + if (hzm == 0) { + /* left shift by 63 or 64 bits */ + if ((int64_t)lzm < 0) { + /* MSB of lzm is the explicit bit */ + hzm = lzm >> 1; + lzm = lzm << 63; + ze -= 63; + } else { + hzm = lzm; + lzm = 0; + ze -= 64; + } + } + + t = 0; + while ((hzm >> (62 - t)) == 0) + t++; + + assert(t <= 62); + if (t) { + hzm = hzm << t | lzm >> (64 - t); + lzm = lzm << t; + ze -= t; } } - return ieee754dp_format(zs, ze, zm); + /* + * Move explicit bit from bit 126 to bit 55 since the + * ieee754dp_format code expects the mantissa to be + * 56 bits wide (53 + 3 rounding bits). + */ + srl128(&hzm, &lzm, (126 - 55)); + + return ieee754dp_format(zs, ze, lzm); +} + +union ieee754dp ieee754dp_maddf(union ieee754dp z, union ieee754dp x, + union ieee754dp y) +{ + return _dp_maddf(z, x, y, 0); +} + +union ieee754dp ieee754dp_msubf(union ieee754dp z, union ieee754dp x, + union ieee754dp y) +{ + return _dp_maddf(z, x, y, MADDF_NEGATE_PRODUCT); } diff --git a/arch/mips/math-emu/dp_msubf.c b/arch/mips/math-emu/dp_msubf.c deleted file mode 100644 index 12241262f856..000000000000 --- a/arch/mips/math-emu/dp_msubf.c +++ /dev/null @@ -1,269 +0,0 @@ -/* - * IEEE754 floating point arithmetic - * double precision: MSUB.f (Fused Multiply Subtract) - * MSUBF.fmt: FPR[fd] = FPR[fd] - (FPR[fs] x FPR[ft]) - * - * MIPS floating point support - * Copyright (C) 2015 Imagination Technologies, Ltd. - * Author: Markos Chandras <markos.chandras@imgtec.com> - * - * This program is free software; you can distribute it and/or modify it - * under the terms of the GNU General Public License as published by the - * Free Software Foundation; version 2 of the License. - */ - -#include "ieee754dp.h" - -union ieee754dp ieee754dp_msubf(union ieee754dp z, union ieee754dp x, - union ieee754dp y) -{ - int re; - int rs; - u64 rm; - unsigned lxm; - unsigned hxm; - unsigned lym; - unsigned hym; - u64 lrm; - u64 hrm; - u64 t; - u64 at; - int s; - - COMPXDP; - COMPYDP; - - u64 zm; int ze; int zs __maybe_unused; int zc; - - EXPLODEXDP; - EXPLODEYDP; - EXPLODEDP(z, zc, zs, ze, zm) - - FLUSHXDP; - FLUSHYDP; - FLUSHDP(z, zc, zs, ze, zm); - - ieee754_clearcx(); - - switch (zc) { - case IEEE754_CLASS_SNAN: - ieee754_setcx(IEEE754_INVALID_OPERATION); - return ieee754dp_nanxcpt(z); - case IEEE754_CLASS_DNORM: - DPDNORMx(zm, ze); - /* QNAN is handled separately below */ - } - - switch (CLPAIR(xc, yc)) { - case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_SNAN): - case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_SNAN): - case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_SNAN): - case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_SNAN): - case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_SNAN): - return ieee754dp_nanxcpt(y); - - case CLPAIR(IEEE754_CLASS_SNAN, IEEE754_CLASS_SNAN): - case CLPAIR(IEEE754_CLASS_SNAN, IEEE754_CLASS_QNAN): - case CLPAIR(IEEE754_CLASS_SNAN, IEEE754_CLASS_ZERO): - case CLPAIR(IEEE754_CLASS_SNAN, IEEE754_CLASS_NORM): - case CLPAIR(IEEE754_CLASS_SNAN, IEEE754_CLASS_DNORM): - case CLPAIR(IEEE754_CLASS_SNAN, IEEE754_CLASS_INF): - return ieee754dp_nanxcpt(x); - - case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_QNAN): - case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_QNAN): - case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_QNAN): - case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_QNAN): - return y; - - case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_QNAN): - case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_ZERO): - case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_NORM): - case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_DNORM): - case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_INF): - return x; - - - /* - * Infinity handling - */ - case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_ZERO): - case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_INF): - if (zc == IEEE754_CLASS_QNAN) - return z; - ieee754_setcx(IEEE754_INVALID_OPERATION); - return ieee754dp_indef(); - - case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_INF): - case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_INF): - case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_NORM): - case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_DNORM): - case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_INF): - if (zc == IEEE754_CLASS_QNAN) - return z; - return ieee754dp_inf(xs ^ ys); - - case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_ZERO): - case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_NORM): - case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_DNORM): - case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_ZERO): - case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_ZERO): - if (zc == IEEE754_CLASS_INF) - return ieee754dp_inf(zs); - /* Multiplication is 0 so just return z */ - return z; - - case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_DNORM): - DPDNORMX; - - case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_DNORM): - if (zc == IEEE754_CLASS_QNAN) - return z; - else if (zc == IEEE754_CLASS_INF) - return ieee754dp_inf(zs); - DPDNORMY; - break; - - case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_NORM): - if (zc == IEEE754_CLASS_QNAN) - return z; - else if (zc == IEEE754_CLASS_INF) - return ieee754dp_inf(zs); - DPDNORMX; - break; - - case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_NORM): - if (zc == IEEE754_CLASS_QNAN) - return z; - else if (zc == IEEE754_CLASS_INF) - return ieee754dp_inf(zs); - /* fall through to real computations */ - } - - /* Finally get to do some computation */ - - /* - * Do the multiplication bit first - * - * rm = xm * ym, re = xe + ye basically - * - * At this point xm and ym should have been normalized. - */ - assert(xm & DP_HIDDEN_BIT); - assert(ym & DP_HIDDEN_BIT); - - re = xe + ye; - rs = xs ^ ys; - - /* shunt to top of word */ - xm <<= 64 - (DP_FBITS + 1); - ym <<= 64 - (DP_FBITS + 1); - - /* - * Multiply 32 bits xm, ym to give high 32 bits rm with stickness. - */ - - /* 32 * 32 => 64 */ -#define DPXMULT(x, y) ((u64)(x) * (u64)y) - - lxm = xm; - hxm = xm >> 32; - lym = ym; - hym = ym >> 32; - - lrm = DPXMULT(lxm, lym); - hrm = DPXMULT(hxm, hym); - - t = DPXMULT(lxm, hym); - - at = lrm + (t << 32); - hrm += at < lrm; - lrm = at; - - hrm = hrm + (t >> 32); - - t = DPXMULT(hxm, lym); - - at = lrm + (t << 32); - hrm += at < lrm; - lrm = at; - - hrm = hrm + (t >> 32); - - rm = hrm | (lrm != 0); - - /* - * Sticky shift down to normal rounding precision. - */ - if ((s64) rm < 0) { - rm = (rm >> (64 - (DP_FBITS + 1 + 3))) | - ((rm << (DP_FBITS + 1 + 3)) != 0); - re++; - } else { - rm = (rm >> (64 - (DP_FBITS + 1 + 3 + 1))) | - ((rm << (DP_FBITS + 1 + 3 + 1)) != 0); - } - assert(rm & (DP_HIDDEN_BIT << 3)); - - /* And now the subtraction */ - - /* flip sign of r and handle as add */ - rs ^= 1; - - assert(zm & DP_HIDDEN_BIT); - - /* - * Provide guard,round and stick bit space. - */ - zm <<= 3; - - if (ze > re) { - /* - * Have to shift y fraction right to align. - */ - s = ze - re; - rm = XDPSRS(rm, s); - re += s; - } else if (re > ze) { - /* - * Have to shift x fraction right to align. - */ - s = re - ze; - zm = XDPSRS(zm, s); - ze += s; - } - assert(ze == re); - assert(ze <= DP_EMAX); - - if (zs == rs) { - /* - * Generate 28 bit result of adding two 27 bit numbers - * leaving result in xm, xs and xe. - */ - zm = zm + rm; - - if (zm >> (DP_FBITS + 1 + 3)) { /* carry out */ - zm = XDPSRS1(zm); - ze++; - } - } else { - if (zm >= rm) { - zm = zm - rm; - } else { - zm = rm - zm; - zs = rs; - } - if (zm == 0) - return ieee754dp_zero(ieee754_csr.rm == FPU_CSR_RD); - - /* - * Normalize to rounding precision. - */ - while ((zm >> (DP_FBITS + 3)) == 0) { - zm <<= 1; - ze--; - } - } - - return ieee754dp_format(zs, ze, zm); -} diff --git a/arch/mips/math-emu/dp_mul.c b/arch/mips/math-emu/dp_mul.c index d0901f03fa19..c34a6cdf1b25 100644 --- a/arch/mips/math-emu/dp_mul.c +++ b/arch/mips/math-emu/dp_mul.c @@ -26,10 +26,10 @@ union ieee754dp ieee754dp_mul(union ieee754dp x, union ieee754dp y) int re; int rs; u64 rm; - unsigned lxm; - unsigned hxm; - unsigned lym; - unsigned hym; + unsigned int lxm; + unsigned int hxm; + unsigned int lym; + unsigned int hym; u64 lrm; u64 hrm; u64 t; @@ -101,6 +101,7 @@ union ieee754dp ieee754dp_mul(union ieee754dp x, union ieee754dp y) case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_DNORM): DPDNORMX; + /* fall through */ case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_DNORM): DPDNORMY; @@ -125,12 +126,9 @@ union ieee754dp ieee754dp_mul(union ieee754dp x, union ieee754dp y) ym <<= 64 - (DP_FBITS + 1); /* - * Multiply 32 bits xm, ym to give high 32 bits rm with stickness. + * Multiply 64 bits xm, ym to give high 64 bits rm with stickness. */ - /* 32 * 32 => 64 */ -#define DPXMULT(x, y) ((u64)(x) * (u64)y) - lxm = xm; hxm = xm >> 32; lym = ym; @@ -163,7 +161,7 @@ union ieee754dp ieee754dp_mul(union ieee754dp x, union ieee754dp y) if ((s64) rm < 0) { rm = (rm >> (64 - (DP_FBITS + 1 + 3))) | ((rm << (DP_FBITS + 1 + 3)) != 0); - re++; + re++; } else { rm = (rm >> (64 - (DP_FBITS + 1 + 3 + 1))) | ((rm << (DP_FBITS + 1 + 3 + 1)) != 0); diff --git a/arch/mips/math-emu/dp_rint.c b/arch/mips/math-emu/dp_rint.c new file mode 100644 index 000000000000..c3b9077ff357 --- /dev/null +++ b/arch/mips/math-emu/dp_rint.c @@ -0,0 +1,89 @@ +/* IEEE754 floating point arithmetic + * double precision: common utilities + */ +/* + * MIPS floating point support + * Copyright (C) 1994-2000 Algorithmics Ltd. + * Copyright (C) 2017 Imagination Technologies, Ltd. + * Author: Aleksandar Markovic <aleksandar.markovic@imgtec.com> + * + * This program is free software; you can distribute it and/or modify it + * under the terms of the GNU General Public License (Version 2) as + * published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program. + */ + +#include "ieee754dp.h" + +union ieee754dp ieee754dp_rint(union ieee754dp x) +{ + union ieee754dp ret; + u64 residue; + int sticky; + int round; + int odd; + + COMPXDP; + + ieee754_clearcx(); + + EXPLODEXDP; + FLUSHXDP; + + if (xc == IEEE754_CLASS_SNAN) + return ieee754dp_nanxcpt(x); + + if ((xc == IEEE754_CLASS_QNAN) || + (xc == IEEE754_CLASS_INF) || + (xc == IEEE754_CLASS_ZERO)) + return x; + + if (xe >= DP_FBITS) + return x; + + if (xe < -1) { + residue = xm; + round = 0; + sticky = residue != 0; + xm = 0; + } else { + residue = xm << (64 - DP_FBITS + xe); + round = (residue >> 63) != 0; + sticky = (residue << 1) != 0; + xm >>= DP_FBITS - xe; + } + + odd = (xm & 0x1) != 0x0; + + switch (ieee754_csr.rm) { + case FPU_CSR_RN: /* toward nearest */ + if (round && (sticky || odd)) + xm++; + break; + case FPU_CSR_RZ: /* toward zero */ + break; + case FPU_CSR_RU: /* toward +infinity */ + if ((round || sticky) && !xs) + xm++; + break; + case FPU_CSR_RD: /* toward -infinity */ + if ((round || sticky) && xs) + xm++; + break; + } + + if (round || sticky) + ieee754_setcx(IEEE754_INEXACT); + + ret = ieee754dp_flong(xm); + DPSIGN(ret) = xs; + + return ret; +} diff --git a/arch/mips/math-emu/dp_simple.c b/arch/mips/math-emu/dp_simple.c index 926d56bf37f2..eb96485ed939 100644 --- a/arch/mips/math-emu/dp_simple.c +++ b/arch/mips/math-emu/dp_simple.c @@ -23,27 +23,39 @@ union ieee754dp ieee754dp_neg(union ieee754dp x) { - unsigned int oldrm; union ieee754dp y; - oldrm = ieee754_csr.rm; - ieee754_csr.rm = FPU_CSR_RD; - y = ieee754dp_sub(ieee754dp_zero(0), x); - ieee754_csr.rm = oldrm; + if (ieee754_csr.abs2008) { + y = x; + DPSIGN(y) = !DPSIGN(x); + } else { + unsigned int oldrm; + + oldrm = ieee754_csr.rm; + ieee754_csr.rm = FPU_CSR_RD; + y = ieee754dp_sub(ieee754dp_zero(0), x); + ieee754_csr.rm = oldrm; + } return y; } union ieee754dp ieee754dp_abs(union ieee754dp x) { - unsigned int oldrm; union ieee754dp y; - oldrm = ieee754_csr.rm; - ieee754_csr.rm = FPU_CSR_RD; - if (DPSIGN(x)) - y = ieee754dp_sub(ieee754dp_zero(0), x); - else - y = ieee754dp_add(ieee754dp_zero(0), x); - ieee754_csr.rm = oldrm; + if (ieee754_csr.abs2008) { + y = x; + DPSIGN(y) = 0; + } else { + unsigned int oldrm; + + oldrm = ieee754_csr.rm; + ieee754_csr.rm = FPU_CSR_RD; + if (DPSIGN(x)) + y = ieee754dp_sub(ieee754dp_zero(0), x); + else + y = ieee754dp_add(ieee754dp_zero(0), x); + ieee754_csr.rm = oldrm; + } return y; } diff --git a/arch/mips/math-emu/dp_sqrt.c b/arch/mips/math-emu/dp_sqrt.c index cd5bc083001e..1d26c92e5295 100644 --- a/arch/mips/math-emu/dp_sqrt.c +++ b/arch/mips/math-emu/dp_sqrt.c @@ -21,7 +21,7 @@ #include "ieee754dp.h" -static const unsigned table[] = { +static const unsigned int table[] = { 0, 1204, 3062, 5746, 9193, 13348, 18162, 23592, 29598, 36145, 43202, 50740, 58733, 67158, 75992, 85215, 83599, 71378, 60428, 50647, 41945, 34246, @@ -33,7 +33,7 @@ union ieee754dp ieee754dp_sqrt(union ieee754dp x) { struct _ieee754_csr oldcsr; union ieee754dp y, z, t; - unsigned scalx, yh; + unsigned int scalx, yh; COMPXDP; EXPLODEXDP; @@ -91,7 +91,8 @@ union ieee754dp ieee754dp_sqrt(union ieee754dp x) scalx -= 256; } - y = x = builddp(0, xe + DP_EBIAS, xm & ~DP_HIDDEN_BIT); + x = builddp(0, xe + DP_EBIAS, xm & ~DP_HIDDEN_BIT); + y = x; /* magic initial approximation to almost 8 sig. bits */ yh = y.bits >> 32; @@ -108,7 +109,8 @@ union ieee754dp ieee754dp_sqrt(union ieee754dp x) /* triple to almost 56 sig. bits: y ~= sqrt(x) to within 1 ulp */ /* t=y*y; z=t; pt[n0]+=0x00100000; t+=z; z=(x-z)*y; */ - z = t = ieee754dp_mul(y, y); + t = ieee754dp_mul(y, y); + z = t; t.bexp += 0x001; t = ieee754dp_add(t, z); z = ieee754dp_mul(ieee754dp_sub(x, z), y); @@ -140,7 +142,7 @@ union ieee754dp ieee754dp_sqrt(union ieee754dp x) switch (oldcsr.rm) { case FPU_CSR_RU: y.bits += 1; - /* drop through */ + /* fall through */ case FPU_CSR_RN: t.bits += 1; break; diff --git a/arch/mips/math-emu/dp_sub.c b/arch/mips/math-emu/dp_sub.c index fc17a781b9ae..3cc48b86519b 100644 --- a/arch/mips/math-emu/dp_sub.c +++ b/arch/mips/math-emu/dp_sub.c @@ -106,7 +106,7 @@ union ieee754dp ieee754dp_sub(union ieee754dp x, union ieee754dp y) case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_DNORM): DPDNORMX; - /* FALL THROUGH */ + /* fall through */ case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_DNORM): /* normalize ym,ye */ diff --git a/arch/mips/math-emu/dp_tint.c b/arch/mips/math-emu/dp_tint.c index 6ffc336c530e..f3985617ce31 100644 --- a/arch/mips/math-emu/dp_tint.c +++ b/arch/mips/math-emu/dp_tint.c @@ -38,10 +38,13 @@ int ieee754dp_tint(union ieee754dp x) switch (xc) { case IEEE754_CLASS_SNAN: case IEEE754_CLASS_QNAN: - case IEEE754_CLASS_INF: ieee754_setcx(IEEE754_INVALID_OPERATION); return ieee754si_indef(); + case IEEE754_CLASS_INF: + ieee754_setcx(IEEE754_INVALID_OPERATION); + return ieee754si_overflow(xs); + case IEEE754_CLASS_ZERO: return 0; @@ -53,7 +56,7 @@ int ieee754dp_tint(union ieee754dp x) /* Set invalid. We will only use overflow for floating point overflow */ ieee754_setcx(IEEE754_INVALID_OPERATION); - return ieee754si_indef(); + return ieee754si_overflow(xs); } /* oh gawd */ if (xe > DP_FBITS) { @@ -93,7 +96,7 @@ int ieee754dp_tint(union ieee754dp x) if ((xm >> 31) != 0 && (xs == 0 || xm != 0x80000000)) { /* This can happen after rounding */ ieee754_setcx(IEEE754_INVALID_OPERATION); - return ieee754si_indef(); + return ieee754si_overflow(xs); } if (round || sticky) ieee754_setcx(IEEE754_INEXACT); diff --git a/arch/mips/math-emu/dp_tlong.c b/arch/mips/math-emu/dp_tlong.c index 9cdc145b75e0..748fa10ed4cf 100644 --- a/arch/mips/math-emu/dp_tlong.c +++ b/arch/mips/math-emu/dp_tlong.c @@ -38,10 +38,13 @@ s64 ieee754dp_tlong(union ieee754dp x) switch (xc) { case IEEE754_CLASS_SNAN: case IEEE754_CLASS_QNAN: - case IEEE754_CLASS_INF: ieee754_setcx(IEEE754_INVALID_OPERATION); return ieee754di_indef(); + case IEEE754_CLASS_INF: + ieee754_setcx(IEEE754_INVALID_OPERATION); + return ieee754di_overflow(xs); + case IEEE754_CLASS_ZERO: return 0; @@ -56,7 +59,7 @@ s64 ieee754dp_tlong(union ieee754dp x) /* Set invalid. We will only use overflow for floating point overflow */ ieee754_setcx(IEEE754_INVALID_OPERATION); - return ieee754di_indef(); + return ieee754di_overflow(xs); } /* oh gawd */ if (xe > DP_FBITS) { @@ -97,7 +100,7 @@ s64 ieee754dp_tlong(union ieee754dp x) if ((xm >> 63) != 0) { /* This can happen after rounding */ ieee754_setcx(IEEE754_INVALID_OPERATION); - return ieee754di_indef(); + return ieee754di_overflow(xs); } if (round || sticky) ieee754_setcx(IEEE754_INEXACT); diff --git a/arch/mips/math-emu/dsemul.c b/arch/mips/math-emu/dsemul.c index cbb36c14b155..4a094f7acb3d 100644 --- a/arch/mips/math-emu/dsemul.c +++ b/arch/mips/math-emu/dsemul.c @@ -1,3 +1,6 @@ +#include <linux/err.h> +#include <linux/slab.h> + #include <asm/branch.h> #include <asm/cacheflush.h> #include <asm/fpu_emulator.h> @@ -5,158 +8,296 @@ #include <asm/mipsregs.h> #include <asm/uaccess.h> -#include "ieee754.h" - -/* - * Emulate the arbritrary instruction ir at xcp->cp0_epc. Required when - * we have to emulate the instruction in a COP1 branch delay slot. Do - * not change cp0_epc due to the instruction +/** + * struct emuframe - The 'emulation' frame structure + * @emul: The instruction to 'emulate'. + * @badinst: A break instruction to cause a return to the kernel. * - * According to the spec: - * 1) it shouldn't be a branch :-) - * 2) it can be a COP instruction :-( - * 3) if we are tring to run a protected memory space we must take - * special care on memory access instructions :-( - */ - -/* - * "Trampoline" return routine to catch exception following - * execution of delay-slot instruction execution. + * This structure defines the frames placed within the delay slot emulation + * page in response to a call to mips_dsemul(). Each thread may be allocated + * only one frame at any given time. The kernel stores within it the + * instruction to be 'emulated' followed by a break instruction, then + * executes the frame in user mode. The break causes a trap to the kernel + * which leads to do_dsemulret() being called unless the instruction in + * @emul causes a trap itself, is a branch, or a signal is delivered to + * the thread. In these cases the allocated frame will either be reused by + * a subsequent delay slot 'emulation', or be freed during signal delivery or + * upon thread exit. + * + * This approach is used because: + * + * - Actually emulating all instructions isn't feasible. We would need to + * be able to handle instructions from all revisions of the MIPS ISA, + * all ASEs & all vendor instruction set extensions. This would be a + * whole lot of work & continual maintenance burden as new instructions + * are introduced, and in the case of some vendor extensions may not + * even be possible. Thus we need to take the approach of actually + * executing the instruction. + * + * - We must execute the instruction within user context. If we were to + * execute the instruction in kernel mode then it would have access to + * kernel resources without very careful checks, leaving us with a + * high potential for security or stability issues to arise. + * + * - We used to place the frame on the users stack, but this requires + * that the stack be executable. This is bad for security so the + * per-process page is now used instead. + * + * - The instruction in @emul may be something entirely invalid for a + * delay slot. The user may (intentionally or otherwise) place a branch + * in a delay slot, or a kernel mode instruction, or something else + * which generates an exception. Thus we can't rely upon the break in + * @badinst always being hit. For this reason we track the index of the + * frame allocated to each thread, allowing us to clean it up at later + * points such as signal delivery or thread exit. + * + * - The user may generate a fake struct emuframe if they wish, invoking + * the BRK_MEMU break instruction themselves. We must therefore not + * trust that BRK_MEMU means there's actually a valid frame allocated + * to the thread, and must not allow the user to do anything they + * couldn't already. */ - struct emuframe { mips_instruction emul; mips_instruction badinst; - mips_instruction cookie; - unsigned long epc; }; -int mips_dsemul(struct pt_regs *regs, mips_instruction ir, unsigned long cpc) -{ - struct emuframe __user *fr; - int err; - - if ((get_isa16_mode(regs->cp0_epc) && ((ir >> 16) == MM_NOP16)) || - (ir == 0)) { - /* NOP is easy */ - regs->cp0_epc = cpc; - clear_delay_slot(regs); - return 0; - } +static const int emupage_frame_count = PAGE_SIZE / sizeof(struct emuframe); - pr_debug("dsemul %lx %lx\n", regs->cp0_epc, cpc); +static inline __user struct emuframe *dsemul_page(void) +{ + return (__user struct emuframe *)STACK_TOP; +} - /* - * The strategy is to push the instruction onto the user stack - * and put a trap after it which we can catch and jump to - * the required address any alternative apart from full - * instruction emulation!!. - * - * Algorithmics used a system call instruction, and - * borrowed that vector. MIPS/Linux version is a bit - * more heavyweight in the interests of portability and - * multiprocessor support. For Linux we generate a - * an unaligned access and force an address error exception. - * - * For embedded systems (stand-alone) we prefer to use a - * non-existing CP1 instruction. This prevents us from emulating - * branches, but gives us a cleaner interface to the exception - * handler (single entry point). - */ +static int alloc_emuframe(void) +{ + mm_context_t *mm_ctx = ¤t->mm->context; + int idx; - /* Ensure that the two instructions are in the same cache line */ - fr = (struct emuframe __user *) - ((regs->regs[29] - sizeof(struct emuframe)) & ~0x7); +retry: + spin_lock(&mm_ctx->bd_emupage_lock); - /* Verify that the stack pointer is not competely insane */ - if (unlikely(!access_ok(VERIFY_WRITE, fr, sizeof(struct emuframe)))) - return SIGBUS; + /* Ensure we have an allocation bitmap */ + if (!mm_ctx->bd_emupage_allocmap) { + mm_ctx->bd_emupage_allocmap = + kcalloc(BITS_TO_LONGS(emupage_frame_count), + sizeof(unsigned long), + GFP_ATOMIC); - if (get_isa16_mode(regs->cp0_epc)) { - err = __put_user(ir >> 16, (u16 __user *)(&fr->emul)); - err |= __put_user(ir & 0xffff, (u16 __user *)((long)(&fr->emul) + 2)); - err |= __put_user(BREAK_MATH >> 16, (u16 __user *)(&fr->badinst)); - err |= __put_user(BREAK_MATH & 0xffff, (u16 __user *)((long)(&fr->badinst) + 2)); - } else { - err = __put_user(ir, &fr->emul); - err |= __put_user((mips_instruction)BREAK_MATH, &fr->badinst); + if (!mm_ctx->bd_emupage_allocmap) { + idx = BD_EMUFRAME_NONE; + goto out_unlock; + } } - err |= __put_user((mips_instruction)BD_COOKIE, &fr->cookie); - err |= __put_user(cpc, &fr->epc); + /* Attempt to allocate a single bit/frame */ + idx = bitmap_find_free_region(mm_ctx->bd_emupage_allocmap, + emupage_frame_count, 0); + if (idx < 0) { + /* + * Failed to allocate a frame. We'll wait until one becomes + * available. We unlock the page so that other threads actually + * get the opportunity to free their frames, which means + * technically the result of bitmap_full may be incorrect. + * However the worst case is that we repeat all this and end up + * back here again. + */ + spin_unlock(&mm_ctx->bd_emupage_lock); + if (!wait_event_killable(mm_ctx->bd_emupage_queue, + !bitmap_full(mm_ctx->bd_emupage_allocmap, + emupage_frame_count))) + goto retry; - if (unlikely(err)) { - MIPS_FPU_EMU_INC_STATS(errors); - return SIGBUS; + /* Received a fatal signal - just give in */ + return BD_EMUFRAME_NONE; } - regs->cp0_epc = ((unsigned long) &fr->emul) | - get_isa16_mode(regs->cp0_epc); + /* Success! */ + pr_debug("allocate emuframe %d to %d\n", idx, current->pid); +out_unlock: + spin_unlock(&mm_ctx->bd_emupage_lock); + return idx; +} - flush_cache_sigtramp((unsigned long)&fr->emul); +static void free_emuframe(int idx, struct mm_struct *mm) +{ + mm_context_t *mm_ctx = &mm->context; - return 0; + spin_lock(&mm_ctx->bd_emupage_lock); + + pr_debug("free emuframe %d from %d\n", idx, current->pid); + bitmap_clear(mm_ctx->bd_emupage_allocmap, idx, 1); + + /* If some thread is waiting for a frame, now's its chance */ + wake_up(&mm_ctx->bd_emupage_queue); + + spin_unlock(&mm_ctx->bd_emupage_lock); +} + +static bool within_emuframe(struct pt_regs *regs) +{ + unsigned long base = (unsigned long)dsemul_page(); + + if (regs->cp0_epc < base) + return false; + if (regs->cp0_epc >= (base + PAGE_SIZE)) + return false; + + return true; +} + +bool dsemul_thread_cleanup(struct task_struct *tsk) +{ + int fr_idx; + + /* Clear any allocated frame, retrieving its index */ + fr_idx = atomic_xchg(&tsk->thread.bd_emu_frame, BD_EMUFRAME_NONE); + + /* If no frame was allocated, we're done */ + if (fr_idx == BD_EMUFRAME_NONE) + return false; + + task_lock(tsk); + + /* Free the frame that this thread had allocated */ + if (tsk->mm) + free_emuframe(fr_idx, tsk->mm); + + task_unlock(tsk); + return true; } -int do_dsemulret(struct pt_regs *xcp) +bool dsemul_thread_rollback(struct pt_regs *regs) { struct emuframe __user *fr; - unsigned long epc; - u32 insn, cookie; - int err = 0; - u16 instr[2]; + int fr_idx; - fr = (struct emuframe __user *) - (msk_isa16_mode(xcp->cp0_epc) - sizeof(mips_instruction)); + /* Do nothing if we're not executing from a frame */ + if (!within_emuframe(regs)) + return false; - /* - * If we can't even access the area, something is very wrong, but we'll - * leave that to the default handling - */ - if (!access_ok(VERIFY_READ, fr, sizeof(struct emuframe))) - return 0; + /* Find the frame being executed */ + fr_idx = atomic_read(¤t->thread.bd_emu_frame); + if (fr_idx == BD_EMUFRAME_NONE) + return false; + fr = &dsemul_page()[fr_idx]; /* - * Do some sanity checking on the stackframe: - * - * - Is the instruction pointed to by the EPC an BREAK_MATH? - * - Is the following memory word the BD_COOKIE? + * If the PC is at the emul instruction, roll back to the branch. If + * PC is at the badinst (break) instruction, we've already emulated the + * instruction so progress to the continue PC. If it's anything else + * then something is amiss & the user has branched into some other area + * of the emupage - we'll free the allocated frame anyway. */ - if (get_isa16_mode(xcp->cp0_epc)) { - err = __get_user(instr[0], (u16 __user *)(&fr->badinst)); - err |= __get_user(instr[1], (u16 __user *)((long)(&fr->badinst) + 2)); - insn = (instr[0] << 16) | instr[1]; + if (msk_isa16_mode(regs->cp0_epc) == (unsigned long)&fr->emul) + regs->cp0_epc = current->thread.bd_emu_branch_pc; + else if (msk_isa16_mode(regs->cp0_epc) == (unsigned long)&fr->badinst) + regs->cp0_epc = current->thread.bd_emu_cont_pc; + + atomic_set(¤t->thread.bd_emu_frame, BD_EMUFRAME_NONE); + free_emuframe(fr_idx, current->mm); + return true; +} + +void dsemul_mm_cleanup(struct mm_struct *mm) +{ + mm_context_t *mm_ctx = &mm->context; + + kfree(mm_ctx->bd_emupage_allocmap); +} + +int mips_dsemul(struct pt_regs *regs, mips_instruction ir, + unsigned long branch_pc, unsigned long cont_pc) +{ + int isa16 = get_isa16_mode(regs->cp0_epc); + mips_instruction break_math; + struct emuframe __user *fr; + int err, fr_idx; + + /* NOP is easy */ + if (ir == 0) + return -1; + + /* microMIPS instructions */ + if (isa16) { + union mips_instruction insn = { .word = ir }; + + /* NOP16 aka MOVE16 $0, $0 */ + if ((ir >> 16) == MM_NOP16) + return -1; + + /* ADDIUPC */ + if (insn.mm_a_format.opcode == mm_addiupc_op) { + unsigned int rs; + s32 v; + + rs = (((insn.mm_a_format.rs + 0xe) & 0xf) + 2); + v = regs->cp0_epc & ~3; + v += insn.mm_a_format.simmediate << 2; + regs->regs[rs] = (long)v; + return -1; + } + } + + pr_debug("dsemul 0x%08lx cont at 0x%08lx\n", regs->cp0_epc, cont_pc); + + /* Allocate a frame if we don't already have one */ + fr_idx = atomic_read(¤t->thread.bd_emu_frame); + if (fr_idx == BD_EMUFRAME_NONE) + fr_idx = alloc_emuframe(); + if (fr_idx == BD_EMUFRAME_NONE) + return SIGBUS; + fr = &dsemul_page()[fr_idx]; + + /* Retrieve the appropriately encoded break instruction */ + break_math = BREAK_MATH(isa16); + + /* Write the instructions to the frame */ + if (isa16) { + err = __put_user(ir >> 16, + (u16 __user *)(&fr->emul)); + err |= __put_user(ir & 0xffff, + (u16 __user *)((long)(&fr->emul) + 2)); + err |= __put_user(break_math >> 16, + (u16 __user *)(&fr->badinst)); + err |= __put_user(break_math & 0xffff, + (u16 __user *)((long)(&fr->badinst) + 2)); } else { - err = __get_user(insn, &fr->badinst); + err = __put_user(ir, &fr->emul); + err |= __put_user(break_math, &fr->badinst); } - err |= __get_user(cookie, &fr->cookie); - if (unlikely(err || (insn != BREAK_MATH) || (cookie != BD_COOKIE))) { + if (unlikely(err)) { MIPS_FPU_EMU_INC_STATS(errors); - return 0; + free_emuframe(fr_idx, current->mm); + return SIGBUS; } - /* - * At this point, we are satisfied that it's a BD emulation trap. Yes, - * a user might have deliberately put two malformed and useless - * instructions in a row in his program, in which case he's in for a - * nasty surprise - the next instruction will be treated as a - * continuation address! Alas, this seems to be the only way that we - * can handle signals, recursion, and longjmps() in the context of - * emulating the branch delay instruction. - */ + /* Record the PC of the branch, PC to continue from & frame index */ + current->thread.bd_emu_branch_pc = branch_pc; + current->thread.bd_emu_cont_pc = cont_pc; + atomic_set(¤t->thread.bd_emu_frame, fr_idx); - pr_debug("dsemulret\n"); + /* Change user register context to execute the frame */ + regs->cp0_epc = (unsigned long)&fr->emul | isa16; - if (__get_user(epc, &fr->epc)) { /* Saved EPC */ - /* This is not a good situation to be in */ - force_sig(SIGBUS, current); + /* Ensure the icache observes our newly written frame */ + flush_cache_sigtramp((unsigned long)&fr->emul); - return 0; + return 0; +} + +bool do_dsemulret(struct pt_regs *xcp) +{ + /* Cleanup the allocated frame, returning if there wasn't one */ + if (!dsemul_thread_cleanup(current)) { + MIPS_FPU_EMU_INC_STATS(errors); + return false; } /* Set EPC to return to post-branch instruction */ - xcp->cp0_epc = epc; + xcp->cp0_epc = current->thread.bd_emu_cont_pc; + pr_debug("dsemulret to 0x%08lx\n", xcp->cp0_epc); MIPS_FPU_EMU_INC_STATS(ds_emul); - return 1; + return true; } diff --git a/arch/mips/math-emu/ieee754.c b/arch/mips/math-emu/ieee754.c index 8e97acbbe22c..e16ae7b75dbb 100644 --- a/arch/mips/math-emu/ieee754.c +++ b/arch/mips/math-emu/ieee754.c @@ -59,7 +59,8 @@ const union ieee754dp __ieee754dp_spcvals[] = { DPCNST(1, 3, 0x4000000000000ULL), /* - 10.0 */ DPCNST(0, DP_EMAX + 1, 0x0000000000000ULL), /* + infinity */ DPCNST(1, DP_EMAX + 1, 0x0000000000000ULL), /* - infinity */ - DPCNST(0, DP_EMAX + 1, 0x7FFFFFFFFFFFFULL), /* + indef quiet Nan */ + DPCNST(0, DP_EMAX + 1, 0x7FFFFFFFFFFFFULL), /* + ind legacy qNaN */ + DPCNST(0, DP_EMAX + 1, 0x8000000000000ULL), /* + indef 2008 qNaN */ DPCNST(0, DP_EMAX, 0xFFFFFFFFFFFFFULL), /* + max */ DPCNST(1, DP_EMAX, 0xFFFFFFFFFFFFFULL), /* - max */ DPCNST(0, DP_EMIN, 0x0000000000000ULL), /* + min normal */ @@ -82,7 +83,8 @@ const union ieee754sp __ieee754sp_spcvals[] = { SPCNST(1, 3, 0x200000), /* - 10.0 */ SPCNST(0, SP_EMAX + 1, 0x000000), /* + infinity */ SPCNST(1, SP_EMAX + 1, 0x000000), /* - infinity */ - SPCNST(0, SP_EMAX + 1, 0x3FFFFF), /* + indef quiet Nan */ + SPCNST(0, SP_EMAX + 1, 0x3FFFFF), /* + indef legacy quiet NaN */ + SPCNST(0, SP_EMAX + 1, 0x400000), /* + indef 2008 quiet NaN */ SPCNST(0, SP_EMAX, 0x7FFFFF), /* + max normal */ SPCNST(1, SP_EMAX, 0x7FFFFF), /* - max normal */ SPCNST(0, SP_EMIN, 0x000000), /* + min normal */ diff --git a/arch/mips/math-emu/ieee754.h b/arch/mips/math-emu/ieee754.h index df94720714c7..e0eb7a965fdf 100644 --- a/arch/mips/math-emu/ieee754.h +++ b/arch/mips/math-emu/ieee754.h @@ -67,6 +67,7 @@ union ieee754sp ieee754sp_div(union ieee754sp x, union ieee754sp y); union ieee754sp ieee754sp_fint(int x); union ieee754sp ieee754sp_flong(s64 x); union ieee754sp ieee754sp_fdp(union ieee754dp x); +union ieee754sp ieee754sp_rint(union ieee754sp x); int ieee754sp_tint(union ieee754sp x); s64 ieee754sp_tlong(union ieee754sp x); @@ -101,6 +102,7 @@ union ieee754dp ieee754dp_neg(union ieee754dp x); union ieee754dp ieee754dp_fint(int x); union ieee754dp ieee754dp_flong(s64 x); union ieee754dp ieee754dp_fsp(union ieee754sp x); +union ieee754dp ieee754dp_rint(union ieee754dp x); int ieee754dp_tint(union ieee754dp x); s64 ieee754dp_tlong(union ieee754dp x); @@ -163,11 +165,12 @@ struct _ieee754_csr { }; #define ieee754_csr (*(struct _ieee754_csr *)(¤t->thread.fpu.fcr31)) -static inline unsigned ieee754_getrm(void) +static inline unsigned int ieee754_getrm(void) { return (ieee754_csr.rm); } -static inline unsigned ieee754_setrm(unsigned rm) + +static inline unsigned int ieee754_setrm(unsigned int rm) { return (ieee754_csr.rm = rm); } @@ -175,14 +178,14 @@ static inline unsigned ieee754_setrm(unsigned rm) /* * get current exceptions */ -static inline unsigned ieee754_getcx(void) +static inline unsigned int ieee754_getcx(void) { return (ieee754_csr.cx); } /* test for current exception condition */ -static inline int ieee754_cxtest(unsigned n) +static inline int ieee754_cxtest(unsigned int n) { return (ieee754_csr.cx & n); } @@ -190,21 +193,21 @@ static inline int ieee754_cxtest(unsigned n) /* * get sticky exceptions */ -static inline unsigned ieee754_getsx(void) +static inline unsigned int ieee754_getsx(void) { return (ieee754_csr.sx); } /* clear sticky conditions */ -static inline unsigned ieee754_clrsx(void) +static inline unsigned int ieee754_clrsx(void) { return (ieee754_csr.sx = 0); } /* test for sticky exception condition */ -static inline int ieee754_sxtest(unsigned n) +static inline int ieee754_sxtest(unsigned int n) { return (ieee754_csr.sx & n); } @@ -221,15 +224,16 @@ union ieee754dp ieee754dp_dump(char *s, union ieee754dp x); #define IEEE754_SPCVAL_NTEN 5 /* -10.0 */ #define IEEE754_SPCVAL_PINFINITY 6 /* +inf */ #define IEEE754_SPCVAL_NINFINITY 7 /* -inf */ -#define IEEE754_SPCVAL_INDEF 8 /* quiet NaN */ -#define IEEE754_SPCVAL_PMAX 9 /* +max norm */ -#define IEEE754_SPCVAL_NMAX 10 /* -max norm */ -#define IEEE754_SPCVAL_PMIN 11 /* +min norm */ -#define IEEE754_SPCVAL_NMIN 12 /* -min norm */ -#define IEEE754_SPCVAL_PMIND 13 /* +min denorm */ -#define IEEE754_SPCVAL_NMIND 14 /* -min denorm */ -#define IEEE754_SPCVAL_P1E31 15 /* + 1.0e31 */ -#define IEEE754_SPCVAL_P1E63 16 /* + 1.0e63 */ +#define IEEE754_SPCVAL_INDEF_LEG 8 /* legacy quiet NaN */ +#define IEEE754_SPCVAL_INDEF_2008 9 /* IEEE 754-2008 quiet NaN */ +#define IEEE754_SPCVAL_PMAX 10 /* +max norm */ +#define IEEE754_SPCVAL_NMAX 11 /* -max norm */ +#define IEEE754_SPCVAL_PMIN 12 /* +min norm */ +#define IEEE754_SPCVAL_NMIN 13 /* -min norm */ +#define IEEE754_SPCVAL_PMIND 14 /* +min denorm */ +#define IEEE754_SPCVAL_NMIND 15 /* -min denorm */ +#define IEEE754_SPCVAL_P1E31 16 /* + 1.0e31 */ +#define IEEE754_SPCVAL_P1E63 17 /* + 1.0e63 */ extern const union ieee754dp __ieee754dp_spcvals[]; extern const union ieee754sp __ieee754sp_spcvals[]; @@ -243,7 +247,8 @@ extern const union ieee754sp __ieee754sp_spcvals[]; #define ieee754dp_zero(sn) (ieee754dp_spcvals[IEEE754_SPCVAL_PZERO+(sn)]) #define ieee754dp_one(sn) (ieee754dp_spcvals[IEEE754_SPCVAL_PONE+(sn)]) #define ieee754dp_ten(sn) (ieee754dp_spcvals[IEEE754_SPCVAL_PTEN+(sn)]) -#define ieee754dp_indef() (ieee754dp_spcvals[IEEE754_SPCVAL_INDEF]) +#define ieee754dp_indef() (ieee754dp_spcvals[IEEE754_SPCVAL_INDEF_LEG + \ + ieee754_csr.nan2008]) #define ieee754dp_max(sn) (ieee754dp_spcvals[IEEE754_SPCVAL_PMAX+(sn)]) #define ieee754dp_min(sn) (ieee754dp_spcvals[IEEE754_SPCVAL_PMIN+(sn)]) #define ieee754dp_mind(sn) (ieee754dp_spcvals[IEEE754_SPCVAL_PMIND+(sn)]) @@ -254,7 +259,8 @@ extern const union ieee754sp __ieee754sp_spcvals[]; #define ieee754sp_zero(sn) (ieee754sp_spcvals[IEEE754_SPCVAL_PZERO+(sn)]) #define ieee754sp_one(sn) (ieee754sp_spcvals[IEEE754_SPCVAL_PONE+(sn)]) #define ieee754sp_ten(sn) (ieee754sp_spcvals[IEEE754_SPCVAL_PTEN+(sn)]) -#define ieee754sp_indef() (ieee754sp_spcvals[IEEE754_SPCVAL_INDEF]) +#define ieee754sp_indef() (ieee754sp_spcvals[IEEE754_SPCVAL_INDEF_LEG + \ + ieee754_csr.nan2008]) #define ieee754sp_max(sn) (ieee754sp_spcvals[IEEE754_SPCVAL_PMAX+(sn)]) #define ieee754sp_min(sn) (ieee754sp_spcvals[IEEE754_SPCVAL_PMIN+(sn)]) #define ieee754sp_mind(sn) (ieee754sp_spcvals[IEEE754_SPCVAL_PMIND+(sn)]) @@ -266,12 +272,25 @@ extern const union ieee754sp __ieee754sp_spcvals[]; */ static inline int ieee754si_indef(void) { - return INT_MAX; + return ieee754_csr.nan2008 ? 0 : INT_MAX; } static inline s64 ieee754di_indef(void) { - return S64_MAX; + return ieee754_csr.nan2008 ? 0 : S64_MAX; +} + +/* + * Overflow integer value + */ +static inline int ieee754si_overflow(int xs) +{ + return ieee754_csr.nan2008 && xs ? INT_MIN : INT_MAX; +} + +static inline s64 ieee754di_overflow(int xs) +{ + return ieee754_csr.nan2008 && xs ? S64_MIN : S64_MAX; } /* result types for xctx.rt */ diff --git a/arch/mips/math-emu/ieee754dp.c b/arch/mips/math-emu/ieee754dp.c index 522d843f2ffd..465a0342ed4c 100644 --- a/arch/mips/math-emu/ieee754dp.c +++ b/arch/mips/math-emu/ieee754dp.c @@ -37,8 +37,11 @@ static inline int ieee754dp_isnan(union ieee754dp x) static inline int ieee754dp_issnan(union ieee754dp x) { + int qbit; + assert(ieee754dp_isnan(x)); - return (DPMANT(x) & DP_MBIT(DP_FBITS - 1)) == DP_MBIT(DP_FBITS - 1); + qbit = (DPMANT(x) & DP_MBIT(DP_FBITS - 1)) == DP_MBIT(DP_FBITS - 1); + return ieee754_csr.nan2008 ^ qbit; } @@ -51,7 +54,15 @@ union ieee754dp __cold ieee754dp_nanxcpt(union ieee754dp r) assert(ieee754dp_issnan(r)); ieee754_setcx(IEEE754_INVALID_OPERATION); - return ieee754dp_indef(); + if (ieee754_csr.nan2008) { + DPMANT(r) |= DP_MBIT(DP_FBITS - 1); + } else { + DPMANT(r) &= ~DP_MBIT(DP_FBITS - 1); + if (!ieee754dp_isnan(r)) + DPMANT(r) |= DP_MBIT(DP_FBITS - 2); + } + + return r; } static u64 ieee754dp_get_rounding(int sn, u64 xm) @@ -89,7 +100,7 @@ union ieee754dp ieee754dp_format(int sn, int xe, u64 xm) { assert(xm); /* we don't gen exact zeros (probably should) */ - assert((xm >> (DP_FBITS + 1 + 3)) == 0); /* no execess */ + assert((xm >> (DP_FBITS + 1 + 3)) == 0); /* no excess */ assert(xm & (DP_HIDDEN_BIT << 3)); if (xe < DP_EMIN) { @@ -157,7 +168,7 @@ union ieee754dp ieee754dp_format(int sn, int xe, u64 xm) /* strip grs bits */ xm >>= 3; - assert((xm >> (DP_FBITS + 1)) == 0); /* no execess */ + assert((xm >> (DP_FBITS + 1)) == 0); /* no excess */ assert(xe >= DP_EMIN); if (xe > DP_EMAX) { @@ -190,7 +201,7 @@ union ieee754dp ieee754dp_format(int sn, int xe, u64 xm) ieee754_setcx(IEEE754_UNDERFLOW); return builddp(sn, DP_EMIN - 1 + DP_EBIAS, xm); } else { - assert((xm >> (DP_FBITS + 1)) == 0); /* no execess */ + assert((xm >> (DP_FBITS + 1)) == 0); /* no excess */ assert(xm & DP_HIDDEN_BIT); return builddp(sn, xe + DP_EBIAS, xm & ~DP_HIDDEN_BIT); diff --git a/arch/mips/math-emu/ieee754dp.h b/arch/mips/math-emu/ieee754dp.h index e2babd98fee3..a56707b75282 100644 --- a/arch/mips/math-emu/ieee754dp.h +++ b/arch/mips/math-emu/ieee754dp.h @@ -55,11 +55,15 @@ static inline int ieee754dp_finite(union ieee754dp x) #define XDPSRS1(v) \ (((v) >> 1) | ((v) & 1)) +/* 32bit * 32bit => 64bit unsigned integer multiplication */ +#define DPXMULT(x, y) ((u64)(x) * (u64)y) + /* convert denormal to normalized with extended exponent */ #define DPDNORMx(m,e) \ while ((m >> DP_FBITS) == 0) { m <<= 1; e--; } #define DPDNORMX DPDNORMx(xm, xe) #define DPDNORMY DPDNORMx(ym, ye) +#define DPDNORMZ DPDNORMx(zm, ze) static inline union ieee754dp builddp(int s, int bx, u64 m) { diff --git a/arch/mips/math-emu/ieee754int.h b/arch/mips/math-emu/ieee754int.h index 6383e2c5c1ad..06ac0e2ac7ac 100644 --- a/arch/mips/math-emu/ieee754int.h +++ b/arch/mips/math-emu/ieee754int.h @@ -26,6 +26,10 @@ #define CLPAIR(x, y) ((x)*6+(y)) +enum maddf_flags { + MADDF_NEGATE_PRODUCT = 1 << 0, +}; + static inline void ieee754_clearcx(void) { ieee754_csr.cx = 0; @@ -50,10 +54,13 @@ static inline int ieee754_class_nan(int xc) } #define COMPXSP \ - unsigned xm; int xe; int xs __maybe_unused; int xc + unsigned int xm; int xe; int xs __maybe_unused; int xc #define COMPYSP \ - unsigned ym; int ye; int ys; int yc + unsigned int ym; int ye; int ys; int yc + +#define COMPZSP \ + unsigned int zm; int ze; int zs; int zc #define EXPLODESP(v, vc, vs, ve, vm) \ { \ @@ -63,10 +70,10 @@ static inline int ieee754_class_nan(int xc) if (ve == SP_EMAX+1+SP_EBIAS) { \ if (vm == 0) \ vc = IEEE754_CLASS_INF; \ - else if (vm & SP_MBIT(SP_FBITS-1)) \ - vc = IEEE754_CLASS_SNAN; \ - else \ + else if (ieee754_csr.nan2008 ^ !(vm & SP_MBIT(SP_FBITS - 1))) \ vc = IEEE754_CLASS_QNAN; \ + else \ + vc = IEEE754_CLASS_SNAN; \ } else if (ve == SP_EMIN-1+SP_EBIAS) { \ if (vm) { \ ve = SP_EMIN; \ @@ -81,6 +88,7 @@ static inline int ieee754_class_nan(int xc) } #define EXPLODEXSP EXPLODESP(x, xc, xs, xe, xm) #define EXPLODEYSP EXPLODESP(y, yc, ys, ye, ym) +#define EXPLODEZSP EXPLODESP(z, zc, zs, ze, zm) #define COMPXDP \ @@ -89,6 +97,9 @@ static inline int ieee754_class_nan(int xc) #define COMPYDP \ u64 ym; int ye; int ys; int yc +#define COMPZDP \ + u64 zm; int ze; int zs; int zc + #define EXPLODEDP(v, vc, vs, ve, vm) \ { \ vm = DPMANT(v); \ @@ -97,10 +108,10 @@ static inline int ieee754_class_nan(int xc) if (ve == DP_EMAX+1+DP_EBIAS) { \ if (vm == 0) \ vc = IEEE754_CLASS_INF; \ - else if (vm & DP_MBIT(DP_FBITS-1)) \ - vc = IEEE754_CLASS_SNAN; \ - else \ + else if (ieee754_csr.nan2008 ^ !(vm & DP_MBIT(DP_FBITS - 1))) \ vc = IEEE754_CLASS_QNAN; \ + else \ + vc = IEEE754_CLASS_SNAN; \ } else if (ve == DP_EMIN-1+DP_EBIAS) { \ if (vm) { \ ve = DP_EMIN; \ @@ -115,6 +126,7 @@ static inline int ieee754_class_nan(int xc) } #define EXPLODEXDP EXPLODEDP(x, xc, xs, xe, xm) #define EXPLODEYDP EXPLODEDP(y, yc, ys, ye, ym) +#define EXPLODEZDP EXPLODEDP(z, zc, zs, ze, zm) #define FLUSHDP(v, vc, vs, ve, vm) \ if (vc==IEEE754_CLASS_DNORM) { \ @@ -140,7 +152,9 @@ static inline int ieee754_class_nan(int xc) #define FLUSHXDP FLUSHDP(x, xc, xs, xe, xm) #define FLUSHYDP FLUSHDP(y, yc, ys, ye, ym) +#define FLUSHZDP FLUSHDP(z, zc, zs, ze, zm) #define FLUSHXSP FLUSHSP(x, xc, xs, xe, xm) #define FLUSHYSP FLUSHSP(y, yc, ys, ye, ym) +#define FLUSHZSP FLUSHSP(z, zc, zs, ze, zm) #endif /* __IEEE754INT_H */ diff --git a/arch/mips/math-emu/ieee754sp.c b/arch/mips/math-emu/ieee754sp.c index ca8e35e33bf7..8423e4c5e415 100644 --- a/arch/mips/math-emu/ieee754sp.c +++ b/arch/mips/math-emu/ieee754sp.c @@ -37,8 +37,11 @@ static inline int ieee754sp_isnan(union ieee754sp x) static inline int ieee754sp_issnan(union ieee754sp x) { + int qbit; + assert(ieee754sp_isnan(x)); - return SPMANT(x) & SP_MBIT(SP_FBITS - 1); + qbit = (SPMANT(x) & SP_MBIT(SP_FBITS - 1)) == SP_MBIT(SP_FBITS - 1); + return ieee754_csr.nan2008 ^ qbit; } @@ -51,10 +54,18 @@ union ieee754sp __cold ieee754sp_nanxcpt(union ieee754sp r) assert(ieee754sp_issnan(r)); ieee754_setcx(IEEE754_INVALID_OPERATION); - return ieee754sp_indef(); + if (ieee754_csr.nan2008) { + SPMANT(r) |= SP_MBIT(SP_FBITS - 1); + } else { + SPMANT(r) &= ~SP_MBIT(SP_FBITS - 1); + if (!ieee754sp_isnan(r)) + SPMANT(r) |= SP_MBIT(SP_FBITS - 2); + } + + return r; } -static unsigned ieee754sp_get_rounding(int sn, unsigned xm) +static unsigned int ieee754sp_get_rounding(int sn, unsigned int xm) { /* inexact must round of 3 bits */ @@ -85,11 +96,11 @@ static unsigned ieee754sp_get_rounding(int sn, unsigned xm) * xe is an unbiased exponent * xm is 3bit extended precision value. */ -union ieee754sp ieee754sp_format(int sn, int xe, unsigned xm) +union ieee754sp ieee754sp_format(int sn, int xe, unsigned int xm) { assert(xm); /* we don't gen exact zeros (probably should) */ - assert((xm >> (SP_FBITS + 1 + 3)) == 0); /* no execess */ + assert((xm >> (SP_FBITS + 1 + 3)) == 0); /* no excess */ assert(xm & (SP_HIDDEN_BIT << 3)); if (xe < SP_EMIN) { @@ -130,7 +141,8 @@ union ieee754sp ieee754sp_format(int sn, int xe, unsigned xm) } else { /* sticky right shift es bits */ - SPXSRSXn(es); + xm = XSPSRS(xm, es); + xe += es; assert((xm & (SP_HIDDEN_BIT << 3)) == 0); assert(xe == SP_EMIN); } @@ -155,7 +167,7 @@ union ieee754sp ieee754sp_format(int sn, int xe, unsigned xm) /* strip grs bits */ xm >>= 3; - assert((xm >> (SP_FBITS + 1)) == 0); /* no execess */ + assert((xm >> (SP_FBITS + 1)) == 0); /* no excess */ assert(xe >= SP_EMIN); if (xe > SP_EMAX) { @@ -188,7 +200,7 @@ union ieee754sp ieee754sp_format(int sn, int xe, unsigned xm) ieee754_setcx(IEEE754_UNDERFLOW); return buildsp(sn, SP_EMIN - 1 + SP_EBIAS, xm); } else { - assert((xm >> (SP_FBITS + 1)) == 0); /* no execess */ + assert((xm >> (SP_FBITS + 1)) == 0); /* no excess */ assert(xm & SP_HIDDEN_BIT); return buildsp(sn, xe + SP_EBIAS, xm & ~SP_HIDDEN_BIT); diff --git a/arch/mips/math-emu/ieee754sp.h b/arch/mips/math-emu/ieee754sp.h index 374a3f00a589..8c5a63804873 100644 --- a/arch/mips/math-emu/ieee754sp.h +++ b/arch/mips/math-emu/ieee754sp.h @@ -45,28 +45,31 @@ static inline int ieee754sp_finite(union ieee754sp x) return SPBEXP(x) != SP_EMAX + 1 + SP_EBIAS; } +/* 64 bit right shift with rounding */ +#define XSPSRS64(v, rs) \ + (((rs) >= 64) ? ((v) != 0) : ((v) >> (rs)) | ((v) << (64-(rs)) != 0)) + /* 3bit extended single precision sticky right shift */ -#define SPXSRSXn(rs) \ - (xe += rs, \ - xm = (rs > (SP_FBITS+3))?1:((xm) >> (rs)) | ((xm) << (32-(rs)) != 0)) +#define XSPSRS(v, rs) \ + ((rs > (SP_FBITS+3))?1:((v) >> (rs)) | ((v) << (32-(rs)) != 0)) -#define SPXSRSX1() \ - (xe++, (xm = (xm >> 1) | (xm & 1))) +#define XSPSRS1(m) \ + ((m >> 1) | (m & 1)) -#define SPXSRSYn(rs) \ - (ye+=rs, \ - ym = (rs > (SP_FBITS+3))?1:((ym) >> (rs)) | ((ym) << (32-(rs)) != 0)) +#define SPXSRSX1() \ + (xe++, (xm = XSPSRS1(xm))) #define SPXSRSY1() \ - (ye++, (ym = (ym >> 1) | (ym & 1))) + (ye++, (ym = XSPSRS1(ym))) /* convert denormal to normalized with extended exponent */ #define SPDNORMx(m,e) \ while ((m >> SP_FBITS) == 0) { m <<= 1; e--; } #define SPDNORMX SPDNORMx(xm, xe) #define SPDNORMY SPDNORMx(ym, ye) +#define SPDNORMZ SPDNORMx(zm, ze) -static inline union ieee754sp buildsp(int s, int bx, unsigned m) +static inline union ieee754sp buildsp(int s, int bx, unsigned int m) { union ieee754sp r; diff --git a/arch/mips/math-emu/me-debugfs.c b/arch/mips/math-emu/me-debugfs.c index be650ed7db59..8c0ec154aecc 100644 --- a/arch/mips/math-emu/me-debugfs.c +++ b/arch/mips/math-emu/me-debugfs.c @@ -28,14 +28,190 @@ static int fpuemu_stat_get(void *data, u64 *val) } DEFINE_SIMPLE_ATTRIBUTE(fops_fpuemu_stat, fpuemu_stat_get, NULL, "%llu\n"); +/* + * Used to obtain names for a debugfs instruction counter, given field name + * in fpuemustats structure. For example, for input "cmp_sueq_d", the output + * would be "cmp.sueq.d". This is needed since dots are not allowed to be + * used in structure field names, and are, on the other hand, desired to be + * used in debugfs item names to be clearly associated to corresponding + * MIPS FPU instructions. + */ +static void adjust_instruction_counter_name(char *out_name, char *in_name) +{ + int i = 0; + + strcpy(out_name, in_name); + while (in_name[i] != '\0') { + if (out_name[i] == '_') + out_name[i] = '.'; + i++; + } +} + +static int fpuemustats_clear_show(struct seq_file *s, void *unused) +{ + __this_cpu_write((fpuemustats).emulated, 0); + __this_cpu_write((fpuemustats).loads, 0); + __this_cpu_write((fpuemustats).stores, 0); + __this_cpu_write((fpuemustats).branches, 0); + __this_cpu_write((fpuemustats).cp1ops, 0); + __this_cpu_write((fpuemustats).cp1xops, 0); + __this_cpu_write((fpuemustats).errors, 0); + __this_cpu_write((fpuemustats).ieee754_inexact, 0); + __this_cpu_write((fpuemustats).ieee754_underflow, 0); + __this_cpu_write((fpuemustats).ieee754_overflow, 0); + __this_cpu_write((fpuemustats).ieee754_zerodiv, 0); + __this_cpu_write((fpuemustats).ieee754_invalidop, 0); + __this_cpu_write((fpuemustats).ds_emul, 0); + + __this_cpu_write((fpuemustats).abs_s, 0); + __this_cpu_write((fpuemustats).abs_d, 0); + __this_cpu_write((fpuemustats).add_s, 0); + __this_cpu_write((fpuemustats).add_d, 0); + __this_cpu_write((fpuemustats).bc1eqz, 0); + __this_cpu_write((fpuemustats).bc1nez, 0); + __this_cpu_write((fpuemustats).ceil_w_s, 0); + __this_cpu_write((fpuemustats).ceil_w_d, 0); + __this_cpu_write((fpuemustats).ceil_l_s, 0); + __this_cpu_write((fpuemustats).ceil_l_d, 0); + __this_cpu_write((fpuemustats).class_s, 0); + __this_cpu_write((fpuemustats).class_d, 0); + __this_cpu_write((fpuemustats).cmp_af_s, 0); + __this_cpu_write((fpuemustats).cmp_af_d, 0); + __this_cpu_write((fpuemustats).cmp_eq_s, 0); + __this_cpu_write((fpuemustats).cmp_eq_d, 0); + __this_cpu_write((fpuemustats).cmp_le_s, 0); + __this_cpu_write((fpuemustats).cmp_le_d, 0); + __this_cpu_write((fpuemustats).cmp_lt_s, 0); + __this_cpu_write((fpuemustats).cmp_lt_d, 0); + __this_cpu_write((fpuemustats).cmp_ne_s, 0); + __this_cpu_write((fpuemustats).cmp_ne_d, 0); + __this_cpu_write((fpuemustats).cmp_or_s, 0); + __this_cpu_write((fpuemustats).cmp_or_d, 0); + __this_cpu_write((fpuemustats).cmp_ueq_s, 0); + __this_cpu_write((fpuemustats).cmp_ueq_d, 0); + __this_cpu_write((fpuemustats).cmp_ule_s, 0); + __this_cpu_write((fpuemustats).cmp_ule_d, 0); + __this_cpu_write((fpuemustats).cmp_ult_s, 0); + __this_cpu_write((fpuemustats).cmp_ult_d, 0); + __this_cpu_write((fpuemustats).cmp_un_s, 0); + __this_cpu_write((fpuemustats).cmp_un_d, 0); + __this_cpu_write((fpuemustats).cmp_une_s, 0); + __this_cpu_write((fpuemustats).cmp_une_d, 0); + __this_cpu_write((fpuemustats).cmp_saf_s, 0); + __this_cpu_write((fpuemustats).cmp_saf_d, 0); + __this_cpu_write((fpuemustats).cmp_seq_s, 0); + __this_cpu_write((fpuemustats).cmp_seq_d, 0); + __this_cpu_write((fpuemustats).cmp_sle_s, 0); + __this_cpu_write((fpuemustats).cmp_sle_d, 0); + __this_cpu_write((fpuemustats).cmp_slt_s, 0); + __this_cpu_write((fpuemustats).cmp_slt_d, 0); + __this_cpu_write((fpuemustats).cmp_sne_s, 0); + __this_cpu_write((fpuemustats).cmp_sne_d, 0); + __this_cpu_write((fpuemustats).cmp_sor_s, 0); + __this_cpu_write((fpuemustats).cmp_sor_d, 0); + __this_cpu_write((fpuemustats).cmp_sueq_s, 0); + __this_cpu_write((fpuemustats).cmp_sueq_d, 0); + __this_cpu_write((fpuemustats).cmp_sule_s, 0); + __this_cpu_write((fpuemustats).cmp_sule_d, 0); + __this_cpu_write((fpuemustats).cmp_sult_s, 0); + __this_cpu_write((fpuemustats).cmp_sult_d, 0); + __this_cpu_write((fpuemustats).cmp_sun_s, 0); + __this_cpu_write((fpuemustats).cmp_sun_d, 0); + __this_cpu_write((fpuemustats).cmp_sune_s, 0); + __this_cpu_write((fpuemustats).cmp_sune_d, 0); + __this_cpu_write((fpuemustats).cvt_d_l, 0); + __this_cpu_write((fpuemustats).cvt_d_s, 0); + __this_cpu_write((fpuemustats).cvt_d_w, 0); + __this_cpu_write((fpuemustats).cvt_l_s, 0); + __this_cpu_write((fpuemustats).cvt_l_d, 0); + __this_cpu_write((fpuemustats).cvt_s_d, 0); + __this_cpu_write((fpuemustats).cvt_s_l, 0); + __this_cpu_write((fpuemustats).cvt_s_w, 0); + __this_cpu_write((fpuemustats).cvt_w_s, 0); + __this_cpu_write((fpuemustats).cvt_w_d, 0); + __this_cpu_write((fpuemustats).div_s, 0); + __this_cpu_write((fpuemustats).div_d, 0); + __this_cpu_write((fpuemustats).floor_w_s, 0); + __this_cpu_write((fpuemustats).floor_w_d, 0); + __this_cpu_write((fpuemustats).floor_l_s, 0); + __this_cpu_write((fpuemustats).floor_l_d, 0); + __this_cpu_write((fpuemustats).maddf_s, 0); + __this_cpu_write((fpuemustats).maddf_d, 0); + __this_cpu_write((fpuemustats).max_s, 0); + __this_cpu_write((fpuemustats).max_d, 0); + __this_cpu_write((fpuemustats).maxa_s, 0); + __this_cpu_write((fpuemustats).maxa_d, 0); + __this_cpu_write((fpuemustats).min_s, 0); + __this_cpu_write((fpuemustats).min_d, 0); + __this_cpu_write((fpuemustats).mina_s, 0); + __this_cpu_write((fpuemustats).mina_d, 0); + __this_cpu_write((fpuemustats).mov_s, 0); + __this_cpu_write((fpuemustats).mov_d, 0); + __this_cpu_write((fpuemustats).msubf_s, 0); + __this_cpu_write((fpuemustats).msubf_d, 0); + __this_cpu_write((fpuemustats).mul_s, 0); + __this_cpu_write((fpuemustats).mul_d, 0); + __this_cpu_write((fpuemustats).neg_s, 0); + __this_cpu_write((fpuemustats).neg_d, 0); + __this_cpu_write((fpuemustats).recip_s, 0); + __this_cpu_write((fpuemustats).recip_d, 0); + __this_cpu_write((fpuemustats).rint_s, 0); + __this_cpu_write((fpuemustats).rint_d, 0); + __this_cpu_write((fpuemustats).round_w_s, 0); + __this_cpu_write((fpuemustats).round_w_d, 0); + __this_cpu_write((fpuemustats).round_l_s, 0); + __this_cpu_write((fpuemustats).round_l_d, 0); + __this_cpu_write((fpuemustats).rsqrt_s, 0); + __this_cpu_write((fpuemustats).rsqrt_d, 0); + __this_cpu_write((fpuemustats).sel_s, 0); + __this_cpu_write((fpuemustats).sel_d, 0); + __this_cpu_write((fpuemustats).seleqz_s, 0); + __this_cpu_write((fpuemustats).seleqz_d, 0); + __this_cpu_write((fpuemustats).selnez_s, 0); + __this_cpu_write((fpuemustats).selnez_d, 0); + __this_cpu_write((fpuemustats).sqrt_s, 0); + __this_cpu_write((fpuemustats).sqrt_d, 0); + __this_cpu_write((fpuemustats).sub_s, 0); + __this_cpu_write((fpuemustats).sub_d, 0); + __this_cpu_write((fpuemustats).trunc_w_s, 0); + __this_cpu_write((fpuemustats).trunc_w_d, 0); + __this_cpu_write((fpuemustats).trunc_l_s, 0); + __this_cpu_write((fpuemustats).trunc_l_d, 0); + + return 0; +} + +static int fpuemustats_clear_open(struct inode *inode, struct file *file) +{ + return single_open(file, fpuemustats_clear_show, inode->i_private); +} + +static const struct file_operations fpuemustats_clear_fops = { + .open = fpuemustats_clear_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, +}; + static int __init debugfs_fpuemu(void) { - struct dentry *d, *dir; + struct dentry *fpuemu_debugfs_base_dir; + struct dentry *fpuemu_debugfs_inst_dir; + struct dentry *d, *reset_file; if (!mips_debugfs_dir) return -ENODEV; - dir = debugfs_create_dir("fpuemustats", mips_debugfs_dir); - if (!dir) + + fpuemu_debugfs_base_dir = debugfs_create_dir("fpuemustats", + mips_debugfs_dir); + if (!fpuemu_debugfs_base_dir) + return -ENOMEM; + + reset_file = debugfs_create_file("fpuemustats_clear", 0444, + mips_debugfs_dir, NULL, + &fpuemustats_clear_fops); + if (!reset_file) return -ENOMEM; #define FPU_EMU_STAT_OFFSET(m) \ @@ -43,7 +219,7 @@ static int __init debugfs_fpuemu(void) #define FPU_STAT_CREATE(m) \ do { \ - d = debugfs_create_file(#m , S_IRUGO, dir, \ + d = debugfs_create_file(#m, 0444, fpuemu_debugfs_base_dir, \ (void *)FPU_EMU_STAT_OFFSET(m), \ &fops_fpuemu_stat); \ if (!d) \ @@ -53,6 +229,7 @@ do { \ FPU_STAT_CREATE(emulated); FPU_STAT_CREATE(loads); FPU_STAT_CREATE(stores); + FPU_STAT_CREATE(branches); FPU_STAT_CREATE(cp1ops); FPU_STAT_CREATE(cp1xops); FPU_STAT_CREATE(errors); @@ -63,6 +240,139 @@ do { \ FPU_STAT_CREATE(ieee754_invalidop); FPU_STAT_CREATE(ds_emul); + fpuemu_debugfs_inst_dir = debugfs_create_dir("instructions", + fpuemu_debugfs_base_dir); + if (!fpuemu_debugfs_inst_dir) + return -ENOMEM; + +#define FPU_STAT_CREATE_EX(m) \ +do { \ + char name[32]; \ + \ + adjust_instruction_counter_name(name, #m); \ + \ + d = debugfs_create_file(name, 0444, fpuemu_debugfs_inst_dir, \ + (void *)FPU_EMU_STAT_OFFSET(m), \ + &fops_fpuemu_stat); \ + if (!d) \ + return -ENOMEM; \ +} while (0) + + FPU_STAT_CREATE_EX(abs_s); + FPU_STAT_CREATE_EX(abs_d); + FPU_STAT_CREATE_EX(add_s); + FPU_STAT_CREATE_EX(add_d); + FPU_STAT_CREATE_EX(bc1eqz); + FPU_STAT_CREATE_EX(bc1nez); + FPU_STAT_CREATE_EX(ceil_w_s); + FPU_STAT_CREATE_EX(ceil_w_d); + FPU_STAT_CREATE_EX(ceil_l_s); + FPU_STAT_CREATE_EX(ceil_l_d); + FPU_STAT_CREATE_EX(class_s); + FPU_STAT_CREATE_EX(class_d); + FPU_STAT_CREATE_EX(cmp_af_s); + FPU_STAT_CREATE_EX(cmp_af_d); + FPU_STAT_CREATE_EX(cmp_eq_s); + FPU_STAT_CREATE_EX(cmp_eq_d); + FPU_STAT_CREATE_EX(cmp_le_s); + FPU_STAT_CREATE_EX(cmp_le_d); + FPU_STAT_CREATE_EX(cmp_lt_s); + FPU_STAT_CREATE_EX(cmp_lt_d); + FPU_STAT_CREATE_EX(cmp_ne_s); + FPU_STAT_CREATE_EX(cmp_ne_d); + FPU_STAT_CREATE_EX(cmp_or_s); + FPU_STAT_CREATE_EX(cmp_or_d); + FPU_STAT_CREATE_EX(cmp_ueq_s); + FPU_STAT_CREATE_EX(cmp_ueq_d); + FPU_STAT_CREATE_EX(cmp_ule_s); + FPU_STAT_CREATE_EX(cmp_ule_d); + FPU_STAT_CREATE_EX(cmp_ult_s); + FPU_STAT_CREATE_EX(cmp_ult_d); + FPU_STAT_CREATE_EX(cmp_un_s); + FPU_STAT_CREATE_EX(cmp_un_d); + FPU_STAT_CREATE_EX(cmp_une_s); + FPU_STAT_CREATE_EX(cmp_une_d); + FPU_STAT_CREATE_EX(cmp_saf_s); + FPU_STAT_CREATE_EX(cmp_saf_d); + FPU_STAT_CREATE_EX(cmp_seq_s); + FPU_STAT_CREATE_EX(cmp_seq_d); + FPU_STAT_CREATE_EX(cmp_sle_s); + FPU_STAT_CREATE_EX(cmp_sle_d); + FPU_STAT_CREATE_EX(cmp_slt_s); + FPU_STAT_CREATE_EX(cmp_slt_d); + FPU_STAT_CREATE_EX(cmp_sne_s); + FPU_STAT_CREATE_EX(cmp_sne_d); + FPU_STAT_CREATE_EX(cmp_sor_s); + FPU_STAT_CREATE_EX(cmp_sor_d); + FPU_STAT_CREATE_EX(cmp_sueq_s); + FPU_STAT_CREATE_EX(cmp_sueq_d); + FPU_STAT_CREATE_EX(cmp_sule_s); + FPU_STAT_CREATE_EX(cmp_sule_d); + FPU_STAT_CREATE_EX(cmp_sult_s); + FPU_STAT_CREATE_EX(cmp_sult_d); + FPU_STAT_CREATE_EX(cmp_sun_s); + FPU_STAT_CREATE_EX(cmp_sun_d); + FPU_STAT_CREATE_EX(cmp_sune_s); + FPU_STAT_CREATE_EX(cmp_sune_d); + FPU_STAT_CREATE_EX(cvt_d_l); + FPU_STAT_CREATE_EX(cvt_d_s); + FPU_STAT_CREATE_EX(cvt_d_w); + FPU_STAT_CREATE_EX(cvt_l_s); + FPU_STAT_CREATE_EX(cvt_l_d); + FPU_STAT_CREATE_EX(cvt_s_d); + FPU_STAT_CREATE_EX(cvt_s_l); + FPU_STAT_CREATE_EX(cvt_s_w); + FPU_STAT_CREATE_EX(cvt_w_s); + FPU_STAT_CREATE_EX(cvt_w_d); + FPU_STAT_CREATE_EX(div_s); + FPU_STAT_CREATE_EX(div_d); + FPU_STAT_CREATE_EX(floor_w_s); + FPU_STAT_CREATE_EX(floor_w_d); + FPU_STAT_CREATE_EX(floor_l_s); + FPU_STAT_CREATE_EX(floor_l_d); + FPU_STAT_CREATE_EX(maddf_s); + FPU_STAT_CREATE_EX(maddf_d); + FPU_STAT_CREATE_EX(max_s); + FPU_STAT_CREATE_EX(max_d); + FPU_STAT_CREATE_EX(maxa_s); + FPU_STAT_CREATE_EX(maxa_d); + FPU_STAT_CREATE_EX(min_s); + FPU_STAT_CREATE_EX(min_d); + FPU_STAT_CREATE_EX(mina_s); + FPU_STAT_CREATE_EX(mina_d); + FPU_STAT_CREATE_EX(mov_s); + FPU_STAT_CREATE_EX(mov_d); + FPU_STAT_CREATE_EX(msubf_s); + FPU_STAT_CREATE_EX(msubf_d); + FPU_STAT_CREATE_EX(mul_s); + FPU_STAT_CREATE_EX(mul_d); + FPU_STAT_CREATE_EX(neg_s); + FPU_STAT_CREATE_EX(neg_d); + FPU_STAT_CREATE_EX(recip_s); + FPU_STAT_CREATE_EX(recip_d); + FPU_STAT_CREATE_EX(rint_s); + FPU_STAT_CREATE_EX(rint_d); + FPU_STAT_CREATE_EX(round_w_s); + FPU_STAT_CREATE_EX(round_w_d); + FPU_STAT_CREATE_EX(round_l_s); + FPU_STAT_CREATE_EX(round_l_d); + FPU_STAT_CREATE_EX(rsqrt_s); + FPU_STAT_CREATE_EX(rsqrt_d); + FPU_STAT_CREATE_EX(sel_s); + FPU_STAT_CREATE_EX(sel_d); + FPU_STAT_CREATE_EX(seleqz_s); + FPU_STAT_CREATE_EX(seleqz_d); + FPU_STAT_CREATE_EX(selnez_s); + FPU_STAT_CREATE_EX(selnez_d); + FPU_STAT_CREATE_EX(sqrt_s); + FPU_STAT_CREATE_EX(sqrt_d); + FPU_STAT_CREATE_EX(sub_s); + FPU_STAT_CREATE_EX(sub_d); + FPU_STAT_CREATE_EX(trunc_w_s); + FPU_STAT_CREATE_EX(trunc_w_d); + FPU_STAT_CREATE_EX(trunc_l_s); + FPU_STAT_CREATE_EX(trunc_l_d); + return 0; } arch_initcall(debugfs_fpuemu); diff --git a/arch/mips/math-emu/sp_add.c b/arch/mips/math-emu/sp_add.c index f1c87b07d3b4..51dced9fbdaf 100644 --- a/arch/mips/math-emu/sp_add.c +++ b/arch/mips/math-emu/sp_add.c @@ -104,8 +104,7 @@ union ieee754sp ieee754sp_add(union ieee754sp x, union ieee754sp y) case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_DNORM): SPDNORMX; - - /* FALL THROUGH */ + /* fall through */ case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_DNORM): SPDNORMY; @@ -132,13 +131,15 @@ union ieee754sp ieee754sp_add(union ieee754sp x, union ieee754sp y) * Have to shift y fraction right to align. */ s = xe - ye; - SPXSRSYn(s); + ym = XSPSRS(ym, s); + ye += s; } else if (ye > xe) { /* * Have to shift x fraction right to align. */ s = ye - xe; - SPXSRSXn(s); + xm = XSPSRS(xm, s); + xe += s; } assert(xe == ye); assert(xe <= SP_EMAX); diff --git a/arch/mips/math-emu/sp_div.c b/arch/mips/math-emu/sp_div.c index 27f6db3a0a4c..5d2904960eb8 100644 --- a/arch/mips/math-emu/sp_div.c +++ b/arch/mips/math-emu/sp_div.c @@ -23,9 +23,9 @@ union ieee754sp ieee754sp_div(union ieee754sp x, union ieee754sp y) { - unsigned rm; + unsigned int rm; int re; - unsigned bm; + unsigned int bm; COMPXSP; COMPYSP; @@ -103,6 +103,7 @@ union ieee754sp ieee754sp_div(union ieee754sp x, union ieee754sp y) case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_DNORM): SPDNORMX; + /* fall through */ case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_DNORM): SPDNORMY; diff --git a/arch/mips/math-emu/sp_fdp.c b/arch/mips/math-emu/sp_fdp.c index 3797148893ad..36a50f9082d1 100644 --- a/arch/mips/math-emu/sp_fdp.c +++ b/arch/mips/math-emu/sp_fdp.c @@ -44,13 +44,17 @@ union ieee754sp ieee754sp_fdp(union ieee754dp x) switch (xc) { case IEEE754_CLASS_SNAN: - return ieee754sp_nanxcpt(ieee754sp_nan_fdp(xs, xm)); + x = ieee754dp_nanxcpt(x); + EXPLODEXDP; + /* fall through */ case IEEE754_CLASS_QNAN: y = ieee754sp_nan_fdp(xs, xm); - EXPLODEYSP; - if (!ieee754_class_nan(yc)) - y = ieee754sp_indef(); + if (!ieee754_csr.nan2008) { + EXPLODEYSP; + if (!ieee754_class_nan(yc)) + y = ieee754sp_indef(); + } return y; case IEEE754_CLASS_INF: diff --git a/arch/mips/math-emu/sp_fint.c b/arch/mips/math-emu/sp_fint.c index d5d8495b2cc4..1a35d12b6fc8 100644 --- a/arch/mips/math-emu/sp_fint.c +++ b/arch/mips/math-emu/sp_fint.c @@ -23,7 +23,7 @@ union ieee754sp ieee754sp_fint(int x) { - unsigned xm; + unsigned int xm; int xe; int xs; diff --git a/arch/mips/math-emu/sp_fmax.c b/arch/mips/math-emu/sp_fmax.c index 74a5a00d2f22..22019ed691df 100644 --- a/arch/mips/math-emu/sp_fmax.c +++ b/arch/mips/math-emu/sp_fmax.c @@ -96,6 +96,7 @@ union ieee754sp ieee754sp_fmax(union ieee754sp x, union ieee754sp y) case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_DNORM): SPDNORMX; + /* fall through */ case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_DNORM): SPDNORMY; @@ -224,6 +225,7 @@ union ieee754sp ieee754sp_fmaxa(union ieee754sp x, union ieee754sp y) case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_DNORM): SPDNORMX; + /* fall through */ case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_DNORM): SPDNORMY; diff --git a/arch/mips/math-emu/sp_fmin.c b/arch/mips/math-emu/sp_fmin.c index c51385f46b09..feaec3985cca 100644 --- a/arch/mips/math-emu/sp_fmin.c +++ b/arch/mips/math-emu/sp_fmin.c @@ -96,6 +96,7 @@ union ieee754sp ieee754sp_fmin(union ieee754sp x, union ieee754sp y) case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_DNORM): SPDNORMX; + /* fall through */ case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_DNORM): SPDNORMY; @@ -224,6 +225,7 @@ union ieee754sp ieee754sp_fmina(union ieee754sp x, union ieee754sp y) case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_DNORM): SPDNORMX; + /* fall through */ case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_DNORM): SPDNORMY; diff --git a/arch/mips/math-emu/sp_maddf.c b/arch/mips/math-emu/sp_maddf.c index dd1dd83e34eb..07ba675401e2 100644 --- a/arch/mips/math-emu/sp_maddf.c +++ b/arch/mips/math-emu/sp_maddf.c @@ -14,81 +14,60 @@ #include "ieee754sp.h" -union ieee754sp ieee754sp_maddf(union ieee754sp z, union ieee754sp x, - union ieee754sp y) + +static union ieee754sp _sp_maddf(union ieee754sp z, union ieee754sp x, + union ieee754sp y, enum maddf_flags flags) { int re; int rs; - unsigned rm; - unsigned short lxm; - unsigned short hxm; - unsigned short lym; - unsigned short hym; - unsigned lrm; - unsigned hrm; - unsigned t; - unsigned at; + unsigned int rm; + u64 rm64; + u64 zm64; int s; COMPXSP; COMPYSP; - u32 zm; int ze; int zs __maybe_unused; int zc; + COMPZSP; EXPLODEXSP; EXPLODEYSP; - EXPLODESP(z, zc, zs, ze, zm) + EXPLODEZSP; FLUSHXSP; FLUSHYSP; - FLUSHSP(z, zc, zs, ze, zm); + FLUSHZSP; ieee754_clearcx(); - switch (zc) { - case IEEE754_CLASS_SNAN: - ieee754_setcx(IEEE754_INVALID_OPERATION); + /* + * Handle the cases when at least one of x, y or z is a NaN. + * Order of precedence is sNaN, qNaN and z, x, y. + */ + if (zc == IEEE754_CLASS_SNAN) return ieee754sp_nanxcpt(z); - case IEEE754_CLASS_DNORM: - SPDNORMx(zm, ze); - /* QNAN is handled separately below */ - } - - switch (CLPAIR(xc, yc)) { - case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_SNAN): - case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_SNAN): - case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_SNAN): - case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_SNAN): - case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_SNAN): + if (xc == IEEE754_CLASS_SNAN) + return ieee754sp_nanxcpt(x); + if (yc == IEEE754_CLASS_SNAN) return ieee754sp_nanxcpt(y); + if (zc == IEEE754_CLASS_QNAN) + return z; + if (xc == IEEE754_CLASS_QNAN) + return x; + if (yc == IEEE754_CLASS_QNAN) + return y; - case CLPAIR(IEEE754_CLASS_SNAN, IEEE754_CLASS_SNAN): - case CLPAIR(IEEE754_CLASS_SNAN, IEEE754_CLASS_QNAN): - case CLPAIR(IEEE754_CLASS_SNAN, IEEE754_CLASS_ZERO): - case CLPAIR(IEEE754_CLASS_SNAN, IEEE754_CLASS_NORM): - case CLPAIR(IEEE754_CLASS_SNAN, IEEE754_CLASS_DNORM): - case CLPAIR(IEEE754_CLASS_SNAN, IEEE754_CLASS_INF): - return ieee754sp_nanxcpt(x); + if (zc == IEEE754_CLASS_DNORM) + SPDNORMZ; + /* ZERO z cases are handled separately below */ - case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_QNAN): - case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_QNAN): - case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_QNAN): - case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_QNAN): - return y; + switch (CLPAIR(xc, yc)) { - case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_QNAN): - case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_ZERO): - case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_NORM): - case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_DNORM): - case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_INF): - return x; /* * Infinity handling */ case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_ZERO): case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_INF): - if (zc == IEEE754_CLASS_QNAN) - return z; ieee754_setcx(IEEE754_INVALID_OPERATION); return ieee754sp_indef(); @@ -97,9 +76,27 @@ union ieee754sp ieee754sp_maddf(union ieee754sp z, union ieee754sp x, case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_NORM): case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_DNORM): case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_INF): - if (zc == IEEE754_CLASS_QNAN) - return z; - return ieee754sp_inf(xs ^ ys); + if ((zc == IEEE754_CLASS_INF) && + ((!(flags & MADDF_NEGATE_PRODUCT) && (zs != (xs ^ ys))) || + ((flags & MADDF_NEGATE_PRODUCT) && (zs == (xs ^ ys))))) { + /* + * Cases of addition of infinities with opposite signs + * or subtraction of infinities with same signs. + */ + ieee754_setcx(IEEE754_INVALID_OPERATION); + return ieee754sp_indef(); + } + /* + * z is here either not an infinity, or an infinity having the + * same sign as product (x*y) (in case of MADDF.D instruction) + * or product -(x*y) (in MSUBF.D case). The result must be an + * infinity, and its sign is determined only by the value of + * (flags & MADDF_NEGATE_PRODUCT) and the signs of x and y. + */ + if (flags & MADDF_NEGATE_PRODUCT) + return ieee754sp_inf(1 ^ (xs ^ ys)); + else + return ieee754sp_inf(xs ^ ys); case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_ZERO): case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_NORM): @@ -108,34 +105,45 @@ union ieee754sp ieee754sp_maddf(union ieee754sp z, union ieee754sp x, case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_ZERO): if (zc == IEEE754_CLASS_INF) return ieee754sp_inf(zs); - /* Multiplication is 0 so just return z */ + if (zc == IEEE754_CLASS_ZERO) { + /* Handle cases +0 + (-0) and similar ones. */ + if ((!(flags & MADDF_NEGATE_PRODUCT) + && (zs == (xs ^ ys))) || + ((flags & MADDF_NEGATE_PRODUCT) + && (zs != (xs ^ ys)))) + /* + * Cases of addition of zeros of equal signs + * or subtraction of zeroes of opposite signs. + * The sign of the resulting zero is in any + * such case determined only by the sign of z. + */ + return z; + + return ieee754sp_zero(ieee754_csr.rm == FPU_CSR_RD); + } + /* x*y is here 0, and z is not 0, so just return z */ return z; case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_DNORM): SPDNORMX; + /* fall through */ case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_DNORM): - if (zc == IEEE754_CLASS_QNAN) - return z; - else if (zc == IEEE754_CLASS_INF) + if (zc == IEEE754_CLASS_INF) return ieee754sp_inf(zs); SPDNORMY; break; case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_NORM): - if (zc == IEEE754_CLASS_QNAN) - return z; - else if (zc == IEEE754_CLASS_INF) + if (zc == IEEE754_CLASS_INF) return ieee754sp_inf(zs); SPDNORMX; break; case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_NORM): - if (zc == IEEE754_CLASS_QNAN) - return z; - else if (zc == IEEE754_CLASS_INF) + if (zc == IEEE754_CLASS_INF) return ieee754sp_inf(zs); - /* fall through to real computations */ + /* continue to real computations */ } /* Finally get to do some computation */ @@ -154,102 +162,104 @@ union ieee754sp ieee754sp_maddf(union ieee754sp z, union ieee754sp x, re = xe + ye; rs = xs ^ ys; + if (flags & MADDF_NEGATE_PRODUCT) + rs ^= 1; - /* shunt to top of word */ - xm <<= 32 - (SP_FBITS + 1); - ym <<= 32 - (SP_FBITS + 1); + /* Multiple 24 bit xm and ym to give 48 bit results */ + rm64 = (uint64_t)xm * ym; - /* - * Multiply 32 bits xm, ym to give high 32 bits rm with stickness. - */ - lxm = xm & 0xffff; - hxm = xm >> 16; - lym = ym & 0xffff; - hym = ym >> 16; + /* Shunt to top of word */ + rm64 = rm64 << 16; - lrm = lxm * lym; /* 16 * 16 => 32 */ - hrm = hxm * hym; /* 16 * 16 => 32 */ - - t = lxm * hym; /* 16 * 16 => 32 */ - at = lrm + (t << 16); - hrm += at < lrm; - lrm = at; - hrm = hrm + (t >> 16); - - t = hxm * lym; /* 16 * 16 => 32 */ - at = lrm + (t << 16); - hrm += at < lrm; - lrm = at; - hrm = hrm + (t >> 16); - - rm = hrm | (lrm != 0); - - /* - * Sticky shift down to normal rounding precision. - */ - if ((int) rm < 0) { - rm = (rm >> (32 - (SP_FBITS + 1 + 3))) | - ((rm << (SP_FBITS + 1 + 3)) != 0); + /* Put explicit bit at bit 62 if necessary */ + if ((int64_t) rm64 < 0) { + rm64 = rm64 >> 1; re++; - } else { - rm = (rm >> (32 - (SP_FBITS + 1 + 3 + 1))) | - ((rm << (SP_FBITS + 1 + 3 + 1)) != 0); } - assert(rm & (SP_HIDDEN_BIT << 3)); - /* And now the addition */ + assert(rm64 & (1 << 62)); - assert(zm & SP_HIDDEN_BIT); + if (zc == IEEE754_CLASS_ZERO) { + /* + * Move explicit bit from bit 62 to bit 26 since the + * ieee754sp_format code expects the mantissa to be + * 27 bits wide (24 + 3 rounding bits). + */ + rm = XSPSRS64(rm64, (62 - 26)); + return ieee754sp_format(rs, re, rm); + } - /* - * Provide guard,round and stick bit space. - */ - zm <<= 3; + /* Move explicit bit from bit 23 to bit 62 */ + zm64 = (uint64_t)zm << (62 - 23); + assert(zm64 & (1 << 62)); + /* Make the exponents the same */ if (ze > re) { /* - * Have to shift y fraction right to align. + * Have to shift r fraction right to align. */ s = ze - re; - SPXSRSYn(s); + rm64 = XSPSRS64(rm64, s); + re += s; } else if (re > ze) { /* - * Have to shift x fraction right to align. + * Have to shift z fraction right to align. */ s = re - ze; - SPXSRSYn(s); + zm64 = XSPSRS64(zm64, s); + ze += s; } assert(ze == re); assert(ze <= SP_EMAX); + /* Do the addition */ if (zs == rs) { /* - * Generate 28 bit result of adding two 27 bit numbers - * leaving result in zm, zs and ze. + * Generate 64 bit result by adding two 63 bit numbers + * leaving result in zm64, zs and ze. */ - zm = zm + rm; - - if (zm >> (SP_FBITS + 1 + 3)) { /* carry out */ - SPXSRSX1(); + zm64 = zm64 + rm64; + if ((int64_t)zm64 < 0) { /* carry out */ + zm64 = XSPSRS1(zm64); + ze++; } } else { - if (zm >= rm) { - zm = zm - rm; + if (zm64 >= rm64) { + zm64 = zm64 - rm64; } else { - zm = rm - zm; + zm64 = rm64 - zm64; zs = rs; } - if (zm == 0) + if (zm64 == 0) return ieee754sp_zero(ieee754_csr.rm == FPU_CSR_RD); /* - * Normalize in extended single precision + * Put explicit bit at bit 62 if necessary. */ - while ((zm >> (SP_MBITS + 3)) == 0) { - zm <<= 1; + while ((zm64 >> 62) == 0) { + zm64 <<= 1; ze--; } - } + + /* + * Move explicit bit from bit 62 to bit 26 since the + * ieee754sp_format code expects the mantissa to be + * 27 bits wide (24 + 3 rounding bits). + */ + zm = XSPSRS64(zm64, (62 - 26)); + return ieee754sp_format(zs, ze, zm); } + +union ieee754sp ieee754sp_maddf(union ieee754sp z, union ieee754sp x, + union ieee754sp y) +{ + return _sp_maddf(z, x, y, 0); +} + +union ieee754sp ieee754sp_msubf(union ieee754sp z, union ieee754sp x, + union ieee754sp y) +{ + return _sp_maddf(z, x, y, MADDF_NEGATE_PRODUCT); +} diff --git a/arch/mips/math-emu/sp_msubf.c b/arch/mips/math-emu/sp_msubf.c deleted file mode 100644 index 81c38b980d69..000000000000 --- a/arch/mips/math-emu/sp_msubf.c +++ /dev/null @@ -1,258 +0,0 @@ -/* - * IEEE754 floating point arithmetic - * single precision: MSUB.f (Fused Multiply Subtract) - * MSUBF.fmt: FPR[fd] = FPR[fd] - (FPR[fs] x FPR[ft]) - * - * MIPS floating point support - * Copyright (C) 2015 Imagination Technologies, Ltd. - * Author: Markos Chandras <markos.chandras@imgtec.com> - * - * This program is free software; you can distribute it and/or modify it - * under the terms of the GNU General Public License as published by the - * Free Software Foundation; version 2 of the License. - */ - -#include "ieee754sp.h" - -union ieee754sp ieee754sp_msubf(union ieee754sp z, union ieee754sp x, - union ieee754sp y) -{ - int re; - int rs; - unsigned rm; - unsigned short lxm; - unsigned short hxm; - unsigned short lym; - unsigned short hym; - unsigned lrm; - unsigned hrm; - unsigned t; - unsigned at; - int s; - - COMPXSP; - COMPYSP; - u32 zm; int ze; int zs __maybe_unused; int zc; - - EXPLODEXSP; - EXPLODEYSP; - EXPLODESP(z, zc, zs, ze, zm) - - FLUSHXSP; - FLUSHYSP; - FLUSHSP(z, zc, zs, ze, zm); - - ieee754_clearcx(); - - switch (zc) { - case IEEE754_CLASS_SNAN: - ieee754_setcx(IEEE754_INVALID_OPERATION); - return ieee754sp_nanxcpt(z); - case IEEE754_CLASS_DNORM: - SPDNORMx(zm, ze); - /* QNAN is handled separately below */ - } - - switch (CLPAIR(xc, yc)) { - case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_SNAN): - case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_SNAN): - case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_SNAN): - case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_SNAN): - case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_SNAN): - return ieee754sp_nanxcpt(y); - - case CLPAIR(IEEE754_CLASS_SNAN, IEEE754_CLASS_SNAN): - case CLPAIR(IEEE754_CLASS_SNAN, IEEE754_CLASS_QNAN): - case CLPAIR(IEEE754_CLASS_SNAN, IEEE754_CLASS_ZERO): - case CLPAIR(IEEE754_CLASS_SNAN, IEEE754_CLASS_NORM): - case CLPAIR(IEEE754_CLASS_SNAN, IEEE754_CLASS_DNORM): - case CLPAIR(IEEE754_CLASS_SNAN, IEEE754_CLASS_INF): - return ieee754sp_nanxcpt(x); - - case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_QNAN): - case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_QNAN): - case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_QNAN): - case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_QNAN): - return y; - - case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_QNAN): - case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_ZERO): - case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_NORM): - case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_DNORM): - case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_INF): - return x; - - /* - * Infinity handling - */ - case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_ZERO): - case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_INF): - if (zc == IEEE754_CLASS_QNAN) - return z; - ieee754_setcx(IEEE754_INVALID_OPERATION); - return ieee754sp_indef(); - - case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_INF): - case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_INF): - case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_NORM): - case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_DNORM): - case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_INF): - if (zc == IEEE754_CLASS_QNAN) - return z; - return ieee754sp_inf(xs ^ ys); - - case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_ZERO): - case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_NORM): - case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_DNORM): - case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_ZERO): - case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_ZERO): - if (zc == IEEE754_CLASS_INF) - return ieee754sp_inf(zs); - /* Multiplication is 0 so just return z */ - return z; - - case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_DNORM): - SPDNORMX; - - case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_DNORM): - if (zc == IEEE754_CLASS_QNAN) - return z; - else if (zc == IEEE754_CLASS_INF) - return ieee754sp_inf(zs); - SPDNORMY; - break; - - case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_NORM): - if (zc == IEEE754_CLASS_QNAN) - return z; - else if (zc == IEEE754_CLASS_INF) - return ieee754sp_inf(zs); - SPDNORMX; - break; - - case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_NORM): - if (zc == IEEE754_CLASS_QNAN) - return z; - else if (zc == IEEE754_CLASS_INF) - return ieee754sp_inf(zs); - /* fall through to real compuation */ - } - - /* Finally get to do some computation */ - - /* - * Do the multiplication bit first - * - * rm = xm * ym, re = xe + ye basically - * - * At this point xm and ym should have been normalized. - */ - - /* rm = xm * ym, re = xe+ye basically */ - assert(xm & SP_HIDDEN_BIT); - assert(ym & SP_HIDDEN_BIT); - - re = xe + ye; - rs = xs ^ ys; - - /* shunt to top of word */ - xm <<= 32 - (SP_FBITS + 1); - ym <<= 32 - (SP_FBITS + 1); - - /* - * Multiply 32 bits xm, ym to give high 32 bits rm with stickness. - */ - lxm = xm & 0xffff; - hxm = xm >> 16; - lym = ym & 0xffff; - hym = ym >> 16; - - lrm = lxm * lym; /* 16 * 16 => 32 */ - hrm = hxm * hym; /* 16 * 16 => 32 */ - - t = lxm * hym; /* 16 * 16 => 32 */ - at = lrm + (t << 16); - hrm += at < lrm; - lrm = at; - hrm = hrm + (t >> 16); - - t = hxm * lym; /* 16 * 16 => 32 */ - at = lrm + (t << 16); - hrm += at < lrm; - lrm = at; - hrm = hrm + (t >> 16); - - rm = hrm | (lrm != 0); - - /* - * Sticky shift down to normal rounding precision. - */ - if ((int) rm < 0) { - rm = (rm >> (32 - (SP_FBITS + 1 + 3))) | - ((rm << (SP_FBITS + 1 + 3)) != 0); - re++; - } else { - rm = (rm >> (32 - (SP_FBITS + 1 + 3 + 1))) | - ((rm << (SP_FBITS + 1 + 3 + 1)) != 0); - } - assert(rm & (SP_HIDDEN_BIT << 3)); - - /* And now the subtraction */ - - /* Flip sign of r and handle as add */ - rs ^= 1; - - assert(zm & SP_HIDDEN_BIT); - - /* - * Provide guard,round and stick bit space. - */ - zm <<= 3; - - if (ze > re) { - /* - * Have to shift y fraction right to align. - */ - s = ze - re; - SPXSRSYn(s); - } else if (re > ze) { - /* - * Have to shift x fraction right to align. - */ - s = re - ze; - SPXSRSYn(s); - } - assert(ze == re); - assert(ze <= SP_EMAX); - - if (zs == rs) { - /* - * Generate 28 bit result of adding two 27 bit numbers - * leaving result in zm, zs and ze. - */ - zm = zm + rm; - - if (zm >> (SP_FBITS + 1 + 3)) { /* carry out */ - SPXSRSX1(); /* shift preserving sticky */ - } - } else { - if (zm >= rm) { - zm = zm - rm; - } else { - zm = rm - zm; - zs = rs; - } - if (zm == 0) - return ieee754sp_zero(ieee754_csr.rm == FPU_CSR_RD); - - /* - * Normalize in extended single precision - */ - while ((zm >> (SP_MBITS + 3)) == 0) { - zm <<= 1; - ze--; - } - - } - return ieee754sp_format(zs, ze, zm); -} diff --git a/arch/mips/math-emu/sp_mul.c b/arch/mips/math-emu/sp_mul.c index d910c43a6f30..fde71e293ec4 100644 --- a/arch/mips/math-emu/sp_mul.c +++ b/arch/mips/math-emu/sp_mul.c @@ -25,15 +25,15 @@ union ieee754sp ieee754sp_mul(union ieee754sp x, union ieee754sp y) { int re; int rs; - unsigned rm; + unsigned int rm; unsigned short lxm; unsigned short hxm; unsigned short lym; unsigned short hym; - unsigned lrm; - unsigned hrm; - unsigned t; - unsigned at; + unsigned int lrm; + unsigned int hrm; + unsigned int t; + unsigned int at; COMPXSP; COMPYSP; @@ -101,6 +101,7 @@ union ieee754sp ieee754sp_mul(union ieee754sp x, union ieee754sp y) case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_DNORM): SPDNORMX; + /* fall through */ case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_DNORM): SPDNORMY; diff --git a/arch/mips/math-emu/sp_rint.c b/arch/mips/math-emu/sp_rint.c new file mode 100644 index 000000000000..70765b17e196 --- /dev/null +++ b/arch/mips/math-emu/sp_rint.c @@ -0,0 +1,90 @@ +/* IEEE754 floating point arithmetic + * single precision + */ +/* + * MIPS floating point support + * Copyright (C) 1994-2000 Algorithmics Ltd. + * Copyright (C) 2017 Imagination Technologies, Ltd. + * Author: Aleksandar Markovic <aleksandar.markovic@imgtec.com> + * + * This program is free software; you can distribute it and/or modify it + * under the terms of the GNU General Public License (Version 2) as + * published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program. + */ + +#include "ieee754sp.h" + +union ieee754sp ieee754sp_rint(union ieee754sp x) +{ + union ieee754sp ret; + u32 residue; + int sticky; + int round; + int odd; + + COMPXDP; /* <-- DP needed for 64-bit mantissa tmp */ + + ieee754_clearcx(); + + EXPLODEXSP; + FLUSHXSP; + + if (xc == IEEE754_CLASS_SNAN) + return ieee754sp_nanxcpt(x); + + if ((xc == IEEE754_CLASS_QNAN) || + (xc == IEEE754_CLASS_INF) || + (xc == IEEE754_CLASS_ZERO)) + return x; + + if (xe >= SP_FBITS) + return x; + + if (xe < -1) { + residue = xm; + round = 0; + sticky = residue != 0; + xm = 0; + } else { + residue = xm << (xe + 1); + residue <<= 31 - SP_FBITS; + round = (residue >> 31) != 0; + sticky = (residue << 1) != 0; + xm >>= SP_FBITS - xe; + } + + odd = (xm & 0x1) != 0x0; + + switch (ieee754_csr.rm) { + case FPU_CSR_RN: /* toward nearest */ + if (round && (sticky || odd)) + xm++; + break; + case FPU_CSR_RZ: /* toward zero */ + break; + case FPU_CSR_RU: /* toward +infinity */ + if ((round || sticky) && !xs) + xm++; + break; + case FPU_CSR_RD: /* toward -infinity */ + if ((round || sticky) && xs) + xm++; + break; + } + + if (round || sticky) + ieee754_setcx(IEEE754_INEXACT); + + ret = ieee754sp_flong(xm); + SPSIGN(ret) = xs; + + return ret; +} diff --git a/arch/mips/math-emu/sp_simple.c b/arch/mips/math-emu/sp_simple.c index c50e9451f2d2..756c9cf2dfd2 100644 --- a/arch/mips/math-emu/sp_simple.c +++ b/arch/mips/math-emu/sp_simple.c @@ -23,27 +23,39 @@ union ieee754sp ieee754sp_neg(union ieee754sp x) { - unsigned int oldrm; union ieee754sp y; - oldrm = ieee754_csr.rm; - ieee754_csr.rm = FPU_CSR_RD; - y = ieee754sp_sub(ieee754sp_zero(0), x); - ieee754_csr.rm = oldrm; + if (ieee754_csr.abs2008) { + y = x; + SPSIGN(y) = !SPSIGN(x); + } else { + unsigned int oldrm; + + oldrm = ieee754_csr.rm; + ieee754_csr.rm = FPU_CSR_RD; + y = ieee754sp_sub(ieee754sp_zero(0), x); + ieee754_csr.rm = oldrm; + } return y; } union ieee754sp ieee754sp_abs(union ieee754sp x) { - unsigned int oldrm; union ieee754sp y; - oldrm = ieee754_csr.rm; - ieee754_csr.rm = FPU_CSR_RD; - if (SPSIGN(x)) - y = ieee754sp_sub(ieee754sp_zero(0), x); - else - y = ieee754sp_add(ieee754sp_zero(0), x); - ieee754_csr.rm = oldrm; + if (ieee754_csr.abs2008) { + y = x; + SPSIGN(y) = 0; + } else { + unsigned int oldrm; + + oldrm = ieee754_csr.rm; + ieee754_csr.rm = FPU_CSR_RD; + if (SPSIGN(x)) + y = ieee754sp_sub(ieee754sp_zero(0), x); + else + y = ieee754sp_add(ieee754sp_zero(0), x); + ieee754_csr.rm = oldrm; + } return y; } diff --git a/arch/mips/math-emu/sp_sqrt.c b/arch/mips/math-emu/sp_sqrt.c index 67059c33a250..9cc83f012342 100644 --- a/arch/mips/math-emu/sp_sqrt.c +++ b/arch/mips/math-emu/sp_sqrt.c @@ -82,7 +82,8 @@ union ieee754sp ieee754sp_sqrt(union ieee754sp x) /* generate sqrt(x) bit by bit */ ix += ix; - q = s = 0; /* q = sqrt(x) */ + s = 0; + q = 0; /* q = sqrt(x) */ r = 0x01000000; /* r = moving bit from right to left */ while (r != 0) { diff --git a/arch/mips/math-emu/sp_sub.c b/arch/mips/math-emu/sp_sub.c index ec5f937a8b3e..9f2ff72c3d6b 100644 --- a/arch/mips/math-emu/sp_sub.c +++ b/arch/mips/math-emu/sp_sub.c @@ -106,6 +106,7 @@ union ieee754sp ieee754sp_sub(union ieee754sp x, union ieee754sp y) case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_DNORM): SPDNORMX; + /* fall through */ case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_DNORM): SPDNORMY; @@ -134,13 +135,15 @@ union ieee754sp ieee754sp_sub(union ieee754sp x, union ieee754sp y) * have to shift y fraction right to align */ s = xe - ye; - SPXSRSYn(s); + ym = XSPSRS(ym, s); + ye += s; } else if (ye > xe) { /* * have to shift x fraction right to align */ s = ye - xe; - SPXSRSXn(s); + xm = XSPSRS(xm, s); + xe += s; } assert(xe == ye); assert(xe <= SP_EMAX); diff --git a/arch/mips/math-emu/sp_tint.c b/arch/mips/math-emu/sp_tint.c index 091299a31798..f4b4cabfe2e1 100644 --- a/arch/mips/math-emu/sp_tint.c +++ b/arch/mips/math-emu/sp_tint.c @@ -38,10 +38,13 @@ int ieee754sp_tint(union ieee754sp x) switch (xc) { case IEEE754_CLASS_SNAN: case IEEE754_CLASS_QNAN: - case IEEE754_CLASS_INF: ieee754_setcx(IEEE754_INVALID_OPERATION); return ieee754si_indef(); + case IEEE754_CLASS_INF: + ieee754_setcx(IEEE754_INVALID_OPERATION); + return ieee754si_overflow(xs); + case IEEE754_CLASS_ZERO: return 0; @@ -56,7 +59,7 @@ int ieee754sp_tint(union ieee754sp x) /* Set invalid. We will only use overflow for floating point overflow */ ieee754_setcx(IEEE754_INVALID_OPERATION); - return ieee754si_indef(); + return ieee754si_overflow(xs); } /* oh gawd */ if (xe > SP_FBITS) { @@ -97,7 +100,7 @@ int ieee754sp_tint(union ieee754sp x) if ((xm >> 31) != 0) { /* This can happen after rounding */ ieee754_setcx(IEEE754_INVALID_OPERATION); - return ieee754si_indef(); + return ieee754si_overflow(xs); } if (round || sticky) ieee754_setcx(IEEE754_INEXACT); diff --git a/arch/mips/math-emu/sp_tlong.c b/arch/mips/math-emu/sp_tlong.c index 9f3c742c1cea..bca5ac995801 100644 --- a/arch/mips/math-emu/sp_tlong.c +++ b/arch/mips/math-emu/sp_tlong.c @@ -20,7 +20,6 @@ */ #include "ieee754sp.h" -#include "ieee754dp.h" s64 ieee754sp_tlong(union ieee754sp x) { @@ -39,10 +38,13 @@ s64 ieee754sp_tlong(union ieee754sp x) switch (xc) { case IEEE754_CLASS_SNAN: case IEEE754_CLASS_QNAN: - case IEEE754_CLASS_INF: ieee754_setcx(IEEE754_INVALID_OPERATION); return ieee754di_indef(); + case IEEE754_CLASS_INF: + ieee754_setcx(IEEE754_INVALID_OPERATION); + return ieee754di_overflow(xs); + case IEEE754_CLASS_ZERO: return 0; @@ -57,7 +59,7 @@ s64 ieee754sp_tlong(union ieee754sp x) /* Set invalid. We will only use overflow for floating point overflow */ ieee754_setcx(IEEE754_INVALID_OPERATION); - return ieee754di_indef(); + return ieee754di_overflow(xs); } /* oh gawd */ if (xe > SP_FBITS) { @@ -94,7 +96,7 @@ s64 ieee754sp_tlong(union ieee754sp x) if ((xm >> 63) != 0) { /* This can happen after rounding */ ieee754_setcx(IEEE754_INVALID_OPERATION); - return ieee754di_indef(); + return ieee754di_overflow(xs); } if (round || sticky) ieee754_setcx(IEEE754_INEXACT); |
