diff --git a/include/tcg/tcg-op-gvec.h b/include/tcg/tcg-op-gvec.h index da55fed8704a1..28cafbcc5ce02 100644 --- a/include/tcg/tcg-op-gvec.h +++ b/include/tcg/tcg-op-gvec.h @@ -218,6 +218,25 @@ typedef struct { bool write_aofs; } GVecGen4; +typedef struct { + /* + * Expand inline as a 64-bit or 32-bit integer. Only one of these will be + * non-NULL. + */ + void (*fni8)(TCGv_i64, TCGv_i64, TCGv_i64, TCGv_i64, int64_t); + void (*fni4)(TCGv_i32, TCGv_i32, TCGv_i32, TCGv_i32, int32_t); + /* Expand inline with a host vector type. */ + void (*fniv)(unsigned, TCGv_vec, TCGv_vec, TCGv_vec, TCGv_vec, int64_t); + /* Expand out-of-line helper w/descriptor, data in descriptor. */ + gen_helper_gvec_4 *fno; + /* The optional opcodes, if any, utilized by .fniv. */ + const TCGOpcode *opt_opc; + /* The vector element size, if applicable. */ + uint8_t vece; + /* Prefer i64 to v64. */ + bool prefer_i64; +} GVecGen4i; + void tcg_gen_gvec_2(uint32_t dofs, uint32_t aofs, uint32_t oprsz, uint32_t maxsz, const GVecGen2 *); void tcg_gen_gvec_2i(uint32_t dofs, uint32_t aofs, uint32_t oprsz, @@ -231,6 +250,9 @@ void tcg_gen_gvec_3i(uint32_t dofs, uint32_t aofs, uint32_t bofs, const GVecGen3i *); void tcg_gen_gvec_4(uint32_t dofs, uint32_t aofs, uint32_t bofs, uint32_t cofs, uint32_t oprsz, uint32_t maxsz, const GVecGen4 *); +void tcg_gen_gvec_4i(uint32_t dofs, uint32_t aofs, uint32_t bofs, uint32_t cofs, + uint32_t oprsz, uint32_t maxsz, int64_t c, + const GVecGen4i *); /* Expand a specific vector operation. */ diff --git a/linux-headers/asm-powerpc/kvm.h b/linux-headers/asm-powerpc/kvm.h index 9f18fa090f1f1..4c4a5d1e25030 100644 --- a/linux-headers/asm-powerpc/kvm.h +++ b/linux-headers/asm-powerpc/kvm.h @@ -646,6 +646,8 @@ struct kvm_ppc_cpu_char { #define KVM_REG_PPC_SIER3 (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xc3) #define KVM_REG_PPC_DAWR1 (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xc4) #define KVM_REG_PPC_DAWRX1 (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xc5) +#define KVM_REG_PPC_HASHKEYR (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0x00) /* TODO */ +#define KVM_REG_PPC_HASHPKEYR (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0x00) /* TODO */ /* Transactional Memory checkpointed state: * This is all GPRs, all VSX regs and a subset of SPRs diff --git a/linux-user/ppc/cpu_loop.c b/linux-user/ppc/cpu_loop.c index 46e6ffd6d300f..6c99feb19b0e1 100644 --- a/linux-user/ppc/cpu_loop.c +++ b/linux-user/ppc/cpu_loop.c @@ -188,7 +188,8 @@ void cpu_loop(CPUPPCState *env) } break; case POWERPC_EXCP_TRAP: - cpu_abort(cs, "Tried to call a TRAP\n"); + si_signo = TARGET_SIGTRAP; + si_code = TARGET_TRAP_BRKPT; break; default: /* Should not happen ! */ diff --git a/target/ppc/cpu.h b/target/ppc/cpu.h index 2560b70c5f10f..7367c42e1acfc 100644 --- a/target/ppc/cpu.h +++ b/target/ppc/cpu.h @@ -1684,6 +1684,8 @@ typedef PowerPCCPU ArchCPU; #define SPR_BOOKE_GIVOR14 (0x1BD) #define SPR_TIR (0x1BE) #define SPR_PTCR (0x1D0) +#define SPR_POWER_HASHKEYR (0x1D4) +#define SPR_POWER_HASHPKEYR (0x1D5) #define SPR_BOOKE_SPEFSCR (0x200) #define SPR_Exxx_BBEAR (0x201) #define SPR_Exxx_BBTAR (0x202) diff --git a/target/ppc/cpu_init.c b/target/ppc/cpu_init.c index e30e86fe9d04f..0b2bec4c7d91e 100644 --- a/target/ppc/cpu_init.c +++ b/target/ppc/cpu_init.c @@ -7592,6 +7592,14 @@ static void init_proc_POWER10(CPUPPCState *env) spr_read_generic, spr_write_generic, KVM_REG_PPC_PSSCR, 0); + /* FIXME: should this be here? */ + spr_register_kvm(env, SPR_POWER_HASHKEYR, "HASHPKEYR", + SPR_NOACCESS, SPR_NOACCESS, &spr_read_generic, &spr_write_generic, + KVM_REG_PPC_HASHKEYR, 0x0); + spr_register_kvm(env, SPR_POWER_HASHPKEYR, "HASHPKEYR", + SPR_NOACCESS, SPR_NOACCESS, &spr_read_generic, &spr_write_generic, + KVM_REG_PPC_HASHPKEYR, 0x0); + /* env variables */ env->dcache_line_size = 128; env->icache_line_size = 128; diff --git a/target/ppc/excp_helper.c b/target/ppc/excp_helper.c index bc646c67a0f54..ebc935182d20c 100644 --- a/target/ppc/excp_helper.c +++ b/target/ppc/excp_helper.c @@ -1305,6 +1305,92 @@ void helper_td(CPUPPCState *env, target_ulong arg1, target_ulong arg2, #endif #endif +static uint32_t helper_SIMON_LIKE_32_64(uint32_t x, uint64_t key, uint32_t lane) +{ + uint32_t c = 0xfffc; + uint64_t z0 = 0xfa2561cdf44ac398; + uint16_t z = 0, temp; + uint16_t k[32], eff_k[32], xleft[33], xright[33], fxleft[32]; + + for (int i = 3; i >= 0; i--) { + k[i] = key & 0xffff; + key >>= 16; + } + xleft[0] = x & 0xffff; + xright[0] = (x >> 16) & 0xffff; + + for (int i = 0; i < 28; i++) { + z |= ((z0 >> (63 - i)) & 1) << 48; + temp = ror16(k[i + 3], 3) ^ k[i + 1]; + k[i + 4] = c ^ z ^ k[i] ^ temp ^ ror16(temp, 1); + } + + for (int i = 0; i < 8; i++) + { + eff_k[4 * i + 0] = k[4 * i + ((0 + lane) % 4)]; + eff_k[4 * i + 1] = k[4 * i + ((1 + lane) % 4)]; + eff_k[4 * i + 2] = k[4 * i + ((2 + lane) % 4)]; + eff_k[4 * i + 3] = k[4 * i + ((3 + lane) % 4)]; + } + + for (int i = 0; i < 32; i++) + { + fxleft[i] = (rol16(xleft[i], 1) & + rol16(xleft[i], 8)) ^ rol16(xleft[i], 2); + xleft[i + 1] = xright[i] ^ fxleft[i] ^ eff_k[i]; + xright[i + 1] = xleft[i]; + } + + return (((uint32_t)xright[32]) << 16) | xleft[32]; +} + +/* TODO: check this implementation correctness; make it better */ +static uint64_t hash_digest(uint64_t ra, uint64_t rb, uint64_t key) +{ + uint64_t stage0_h = 0ULL, stage0_l = 0ULL; + uint64_t stage1_h, stage1_l; + + for (int i = 0; i < 4; i++) { + stage0_h |= ror64(rb & 0xff, 8 * (2 * i + 1)); + stage0_h |= ((ra >> 32) & 0xff) << (8 * 2 * i); + stage0_l |= ror64((rb >> 32) & 0xff, 8 * (2 * i +1)); + stage0_l |= (ra & 0xff) << (8 * 2 * i); + rb >>= 8; + ra >>= 8; + } + + stage1_h = (uint64_t)helper_SIMON_LIKE_32_64(stage0_h >> 32, key, 0) << 32; + stage1_h |= helper_SIMON_LIKE_32_64(stage0_h, key, 1); + stage1_l = (uint64_t)helper_SIMON_LIKE_32_64(stage0_l >> 32, key, 2) << 32; + stage1_l |= helper_SIMON_LIKE_32_64(stage0_l, key, 3); + + return (stage1_h ^ stage1_l); +} + +#define HELPER_HASH(op, key, store) \ +void helper_##op(CPUPPCState *env, target_ulong ea, target_ulong ra, \ + target_ulong rb) \ +{ \ + uint64_t chash = hash_digest(ra, rb, key), lhash; \ + \ + if (store) { \ + cpu_stq_data_ra(env, ea, chash, GETPC()); \ + } \ + else { \ + lhash = cpu_ldq_data_ra(env, ea, GETPC()); \ + if (lhash != chash) { \ + /* hashes don't match, trap */ \ + raise_exception_err_ra(env, POWERPC_EXCP_PROGRAM, \ + POWERPC_EXCP_TRAP, GETPC()); \ + } \ + } \ +} + +HELPER_HASH(HASHST, env->spr[SPR_POWER_HASHKEYR], true) +HELPER_HASH(HASHCHK, env->spr[SPR_POWER_HASHKEYR], false) +HELPER_HASH(HASHSTP, env->spr[SPR_POWER_HASHPKEYR], true) +HELPER_HASH(HASHCHKP, env->spr[SPR_POWER_HASHPKEYR], false) + #if !defined(CONFIG_USER_ONLY) /*****************************************************************************/ /* PowerPC 601 specific instructions (POWER bridge) */ diff --git a/target/ppc/fpu_helper.c b/target/ppc/fpu_helper.c index e5c29b53b8b55..f83a80e6852e8 100644 --- a/target/ppc/fpu_helper.c +++ b/target/ppc/fpu_helper.c @@ -2156,10 +2156,11 @@ VSX_TSQRT(xvtsqrtsp, 4, float32, VsrW(i), -126, 23) * maddflgs - flags for the float*muladd routine that control the * various forms (madd, msub, nmadd, nmsub) * sfprf - set FPRF + * r2sp - round intermediate double precision result to single precision */ #define VSX_MADD(op, nels, tp, fld, maddflgs, sfprf, r2sp) \ void helper_##op(CPUPPCState *env, ppc_vsr_t *xt, \ - ppc_vsr_t *xa, ppc_vsr_t *b, ppc_vsr_t *c) \ + ppc_vsr_t *s1, ppc_vsr_t *s2, ppc_vsr_t *s3) \ { \ ppc_vsr_t t = *xt; \ int i; \ @@ -2175,12 +2176,12 @@ void helper_##op(CPUPPCState *env, ppc_vsr_t *xt, \ * result to odd. \ */ \ set_float_rounding_mode(float_round_to_zero, &tstat); \ - t.fld = tp##_muladd(xa->fld, b->fld, c->fld, \ + t.fld = tp##_muladd(s1->fld, s3->fld, s2->fld, \ maddflgs, &tstat); \ t.fld |= (get_float_exception_flags(&tstat) & \ float_flag_inexact) != 0; \ } else { \ - t.fld = tp##_muladd(xa->fld, b->fld, c->fld, \ + t.fld = tp##_muladd(s1->fld, s3->fld, s2->fld, \ maddflgs, &tstat); \ } \ env->fp_status.float_exception_flags |= tstat.float_exception_flags; \ @@ -2202,14 +2203,14 @@ void helper_##op(CPUPPCState *env, ppc_vsr_t *xt, \ do_float_check_status(env, GETPC()); \ } -VSX_MADD(xsmadddp, 1, float64, VsrD(0), MADD_FLGS, 1, 0) -VSX_MADD(xsmsubdp, 1, float64, VsrD(0), MSUB_FLGS, 1, 0) -VSX_MADD(xsnmadddp, 1, float64, VsrD(0), NMADD_FLGS, 1, 0) -VSX_MADD(xsnmsubdp, 1, float64, VsrD(0), NMSUB_FLGS, 1, 0) -VSX_MADD(xsmaddsp, 1, float64, VsrD(0), MADD_FLGS, 1, 1) -VSX_MADD(xsmsubsp, 1, float64, VsrD(0), MSUB_FLGS, 1, 1) -VSX_MADD(xsnmaddsp, 1, float64, VsrD(0), NMADD_FLGS, 1, 1) -VSX_MADD(xsnmsubsp, 1, float64, VsrD(0), NMSUB_FLGS, 1, 1) +VSX_MADD(XSMADDDP, 1, float64, VsrD(0), MADD_FLGS, 1, 0) +VSX_MADD(XSMSUBDP, 1, float64, VsrD(0), MSUB_FLGS, 1, 0) +VSX_MADD(XSNMADDDP, 1, float64, VsrD(0), NMADD_FLGS, 1, 0) +VSX_MADD(XSNMSUBDP, 1, float64, VsrD(0), NMSUB_FLGS, 1, 0) +VSX_MADD(XSMADDSP, 1, float64, VsrD(0), MADD_FLGS, 1, 1) +VSX_MADD(XSMSUBSP, 1, float64, VsrD(0), MSUB_FLGS, 1, 1) +VSX_MADD(XSNMADDSP, 1, float64, VsrD(0), NMADD_FLGS, 1, 1) +VSX_MADD(XSNMSUBSP, 1, float64, VsrD(0), NMSUB_FLGS, 1, 1) VSX_MADD(xvmadddp, 2, float64, VsrD(i), MADD_FLGS, 0, 0) VSX_MADD(xvmsubdp, 2, float64, VsrD(i), MSUB_FLGS, 0, 0) @@ -2222,28 +2223,72 @@ VSX_MADD(xvnmaddsp, 4, float32, VsrW(i), NMADD_FLGS, 0, 0) VSX_MADD(xvnmsubsp, 4, float32, VsrW(i), NMSUB_FLGS, 0, 0) /* - * VSX_SCALAR_CMP_DP - VSX scalar floating point compare double precision + * VSX_MADDQ - VSX floating point quad-precision muliply/add * op - instruction mnemonic + * maddflgs - flags for the float*muladd routine that control the + * various forms (madd, msub, nmadd, nmsub) + * ro - round to odd + */ +#define VSX_MADDQ(op, maddflgs, ro) \ +void helper_##op(CPUPPCState *env, ppc_vsr_t *xt, ppc_vsr_t *s1, ppc_vsr_t *s2,\ + ppc_vsr_t *s3) \ +{ \ + ppc_vsr_t t = *xt; \ + \ + helper_reset_fpstatus(env); \ + \ + float_status tstat = env->fp_status; \ + set_float_exception_flags(0, &tstat); \ + if (ro) { \ + tstat.float_rounding_mode = float_round_to_odd; \ + } \ + t.f128 = float128_muladd(s1->f128, s3->f128, s2->f128, maddflgs, &tstat); \ + env->fp_status.float_exception_flags |= tstat.float_exception_flags; \ + \ + if (unlikely(tstat.float_exception_flags & float_flag_invalid)) { \ + float_invalid_op_madd(env, tstat.float_exception_flags, \ + false, GETPC()); \ + } \ + \ + helper_compute_fprf_float128(env, t.f128); \ + *xt = t; \ + do_float_check_status(env, GETPC()); \ +} + +VSX_MADDQ(XSMADDQP, MADD_FLGS, 0) +VSX_MADDQ(XSMADDQPO, MADD_FLGS, 1) +VSX_MADDQ(XSMSUBQP, MSUB_FLGS, 0) +VSX_MADDQ(XSMSUBQPO, MSUB_FLGS, 1) +VSX_MADDQ(XSNMADDQP, NMADD_FLGS, 0) +VSX_MADDQ(XSNMADDQPO, NMADD_FLGS, 1) +VSX_MADDQ(XSNMSUBQP, NMSUB_FLGS, 0) +VSX_MADDQ(XSNMSUBQPO, NMSUB_FLGS, 0) + +/* + * VSX_SCALAR_CMP - VSX scalar floating point compare + * op - instruction mnemonic + * tp - type * cmp - comparison operation * exp - expected result of comparison + * fld - vsr_t field * svxvc - set VXVC bit */ -#define VSX_SCALAR_CMP_DP(op, cmp, exp, svxvc) \ +#define VSX_SCALAR_CMP(op, tp, cmp, fld, exp, svxvc) \ void helper_##op(CPUPPCState *env, ppc_vsr_t *xt, \ ppc_vsr_t *xa, ppc_vsr_t *xb) \ { \ - ppc_vsr_t t = *xt; \ + ppc_vsr_t t = { }; \ bool vxsnan_flag = false, vxvc_flag = false, vex_flag = false; \ \ - if (float64_is_signaling_nan(xa->VsrD(0), &env->fp_status) || \ - float64_is_signaling_nan(xb->VsrD(0), &env->fp_status)) { \ + if (tp##_is_signaling_nan(xa->fld, &env->fp_status) || \ + tp##_is_signaling_nan(xb->fld, &env->fp_status)) { \ vxsnan_flag = true; \ if (fpscr_ve == 0 && svxvc) { \ vxvc_flag = true; \ } \ } else if (svxvc) { \ - vxvc_flag = float64_is_quiet_nan(xa->VsrD(0), &env->fp_status) || \ - float64_is_quiet_nan(xb->VsrD(0), &env->fp_status); \ + vxvc_flag = tp##_is_quiet_nan(xa->fld, &env->fp_status) || \ + tp##_is_quiet_nan(xb->fld, &env->fp_status); \ } \ if (vxsnan_flag) { \ float_invalid_op_vxsnan(env, GETPC()); \ @@ -2254,23 +2299,20 @@ void helper_##op(CPUPPCState *env, ppc_vsr_t *xt, \ vex_flag = fpscr_ve && (vxvc_flag || vxsnan_flag); \ \ if (!vex_flag) { \ - if (float64_##cmp(xb->VsrD(0), xa->VsrD(0), \ - &env->fp_status) == exp) { \ - t.VsrD(0) = -1; \ - t.VsrD(1) = 0; \ - } else { \ - t.VsrD(0) = 0; \ - t.VsrD(1) = 0; \ + if (tp##_##cmp(xb->fld, xa->fld, &env->fp_status) == exp) { \ + memset(&t.fld, 0xFF, sizeof(t.fld)); \ } \ } \ *xt = t; \ do_float_check_status(env, GETPC()); \ } -VSX_SCALAR_CMP_DP(xscmpeqdp, eq, 1, 0) -VSX_SCALAR_CMP_DP(xscmpgedp, le, 1, 1) -VSX_SCALAR_CMP_DP(xscmpgtdp, lt, 1, 1) -VSX_SCALAR_CMP_DP(xscmpnedp, eq, 0, 0) +VSX_SCALAR_CMP(XSCMPEQDP, float64, eq, VsrD(0), 1, 0) +VSX_SCALAR_CMP(XSCMPGEDP, float64, le, VsrD(0), 1, 1) +VSX_SCALAR_CMP(XSCMPGTDP, float64, lt, VsrD(0), 1, 1) +VSX_SCALAR_CMP(XSCMPEQQP, float128, eq, f128, 1, 0) +VSX_SCALAR_CMP(XSCMPGEQP, float128, le, f128, 1, 1) +VSX_SCALAR_CMP(XSCMPGTQP, float128, lt, f128, 1, 1) void helper_xscmpexpdp(CPUPPCState *env, uint32_t opcode, ppc_vsr_t *xa, ppc_vsr_t *xb) @@ -2494,27 +2536,22 @@ VSX_MAX_MIN(xsmindp, minnum, 1, float64, VsrD(0)) VSX_MAX_MIN(xvmindp, minnum, 2, float64, VsrD(i)) VSX_MAX_MIN(xvminsp, minnum, 4, float32, VsrW(i)) -#define VSX_MAX_MINC(name, max) \ +#define VSX_MAX_MINC(name, op, tp, fld) \ void helper_##name(CPUPPCState *env, \ ppc_vsr_t *xt, ppc_vsr_t *xa, ppc_vsr_t *xb) \ { \ ppc_vsr_t t = *xt; \ bool vxsnan_flag = false, vex_flag = false; \ \ - if (unlikely(float64_is_any_nan(xa->VsrD(0)) || \ - float64_is_any_nan(xb->VsrD(0)))) { \ - if (float64_is_signaling_nan(xa->VsrD(0), &env->fp_status) || \ - float64_is_signaling_nan(xb->VsrD(0), &env->fp_status)) { \ + if (unlikely(tp##_is_any_nan(xa->fld) || \ + tp##_is_any_nan(xb->fld))) { \ + if (tp##_is_signaling_nan(xa->fld, &env->fp_status) || \ + tp##_is_signaling_nan(xb->fld, &env->fp_status)) { \ vxsnan_flag = true; \ } \ - t.VsrD(0) = xb->VsrD(0); \ - } else if ((max && \ - !float64_lt(xa->VsrD(0), xb->VsrD(0), &env->fp_status)) || \ - (!max && \ - float64_lt(xa->VsrD(0), xb->VsrD(0), &env->fp_status))) { \ - t.VsrD(0) = xa->VsrD(0); \ + t.fld = xb->fld; \ } else { \ - t.VsrD(0) = xb->VsrD(0); \ + t.fld = tp##_##op(xa->fld, xb->fld, &env->fp_status); \ } \ \ vex_flag = fpscr_ve & vxsnan_flag; \ @@ -2526,8 +2563,10 @@ void helper_##name(CPUPPCState *env, \ } \ } \ -VSX_MAX_MINC(xsmaxcdp, 1); -VSX_MAX_MINC(xsmincdp, 0); +VSX_MAX_MINC(XSMAXCDP, maxnum, float64, VsrD(0)); +VSX_MAX_MINC(XSMINCDP, minnum, float64, VsrD(0)); +VSX_MAX_MINC(XSMAXCQP, maxnum, float128, f128); +VSX_MAX_MINC(XSMINCQP, minnum, float128, f128); #define VSX_MAX_MINJ(name, max) \ void helper_##name(CPUPPCState *env, \ @@ -2581,8 +2620,8 @@ void helper_##name(CPUPPCState *env, \ } \ } \ -VSX_MAX_MINJ(xsmaxjdp, 1); -VSX_MAX_MINJ(xsminjdp, 0); +VSX_MAX_MINJ(XSMAXJDP, 1); +VSX_MAX_MINJ(XSMINJDP, 0); /* * VSX_CMP - VSX floating point compare @@ -2751,6 +2790,27 @@ VSX_CVT_FP_TO_FP_HP(xscvhpdp, 1, float16, float64, VsrH(3), VsrD(0), 1) VSX_CVT_FP_TO_FP_HP(xvcvsphp, 4, float32, float16, VsrW(i), VsrH(2 * i + 1), 0) VSX_CVT_FP_TO_FP_HP(xvcvhpsp, 4, float16, float32, VsrH(2 * i + 1), VsrW(i), 0) +void helper_XVCVSPBF16(CPUPPCState *env, ppc_vsr_t *xt, ppc_vsr_t *xb) +{ + ppc_vsr_t t = { }; + int i; + + helper_reset_fpstatus(env); + for (i = 0; i < 4; i++) { + if (unlikely(float32_is_signaling_nan(xb->VsrW(i), &env->fp_status))) { + float_invalid_op_vxsnan(env, GETPC()); + t.VsrH(2 * i + 1) = float32_to_bfloat16( + float32_snan_to_qnan(xb->VsrW(i)), &env->fp_status); + } else { + t.VsrH(2 * i + 1) = + float32_to_bfloat16(xb->VsrW(i), &env->fp_status); + } + } + + *xt = t; + do_float_check_status(env, GETPC()); +} + void helper_XSCVQPDP(CPUPPCState *env, uint32_t ro, ppc_vsr_t *xt, ppc_vsr_t *xb) { @@ -3055,27 +3115,6 @@ uint64_t helper_xsrsp(CPUPPCState *env, uint64_t xb) return xt; } -#define VSX_XXPERM(op, indexed) \ -void helper_##op(CPUPPCState *env, ppc_vsr_t *xt, \ - ppc_vsr_t *xa, ppc_vsr_t *pcv) \ -{ \ - ppc_vsr_t t = *xt; \ - int i, idx; \ - \ - for (i = 0; i < 16; i++) { \ - idx = pcv->VsrB(i) & 0x1F; \ - if (indexed) { \ - idx = 31 - idx; \ - } \ - t.VsrB(i) = (idx <= 15) ? xa->VsrB(idx) \ - : xt->VsrB(idx - 16); \ - } \ - *xt = t; \ -} - -VSX_XXPERM(xxperm, 0) -VSX_XXPERM(xxpermr, 1) - void helper_xvxsigsp(CPUPPCState *env, ppc_vsr_t *xt, ppc_vsr_t *xb) { ppc_vsr_t t = { }; diff --git a/target/ppc/helper.h b/target/ppc/helper.h index d318837ea5cc9..5b4d5454f95bc 100644 --- a/target/ppc/helper.h +++ b/target/ppc/helper.h @@ -4,6 +4,10 @@ DEF_HELPER_FLAGS_4(tw, TCG_CALL_NO_WG, void, env, tl, tl, i32) #if defined(TARGET_PPC64) DEF_HELPER_FLAGS_4(td, TCG_CALL_NO_WG, void, env, tl, tl, i32) #endif +DEF_HELPER_4(HASHST, void, env, tl, tl, tl) +DEF_HELPER_4(HASHCHK, void, env, tl, tl, tl) +DEF_HELPER_4(HASHSTP, void, env, tl, tl, tl) +DEF_HELPER_4(HASHCHKP, void, env, tl, tl, tl) #if !defined(CONFIG_USER_ONLY) DEF_HELPER_2(store_msr, void, env, tl) DEF_HELPER_1(rfi, void, env) @@ -141,46 +145,13 @@ DEF_HELPER_3(vabsduw, void, avr, avr, avr) DEF_HELPER_3(vavgsb, void, avr, avr, avr) DEF_HELPER_3(vavgsh, void, avr, avr, avr) DEF_HELPER_3(vavgsw, void, avr, avr, avr) -DEF_HELPER_4(vcmpequb, void, env, avr, avr, avr) -DEF_HELPER_4(vcmpequh, void, env, avr, avr, avr) -DEF_HELPER_4(vcmpequw, void, env, avr, avr, avr) -DEF_HELPER_4(vcmpequd, void, env, avr, avr, avr) -DEF_HELPER_4(vcmpneb, void, env, avr, avr, avr) -DEF_HELPER_4(vcmpneh, void, env, avr, avr, avr) -DEF_HELPER_4(vcmpnew, void, env, avr, avr, avr) -DEF_HELPER_4(vcmpnezb, void, env, avr, avr, avr) -DEF_HELPER_4(vcmpnezh, void, env, avr, avr, avr) -DEF_HELPER_4(vcmpnezw, void, env, avr, avr, avr) -DEF_HELPER_4(vcmpgtub, void, env, avr, avr, avr) -DEF_HELPER_4(vcmpgtuh, void, env, avr, avr, avr) -DEF_HELPER_4(vcmpgtuw, void, env, avr, avr, avr) -DEF_HELPER_4(vcmpgtud, void, env, avr, avr, avr) -DEF_HELPER_4(vcmpgtsb, void, env, avr, avr, avr) -DEF_HELPER_4(vcmpgtsh, void, env, avr, avr, avr) -DEF_HELPER_4(vcmpgtsw, void, env, avr, avr, avr) -DEF_HELPER_4(vcmpgtsd, void, env, avr, avr, avr) DEF_HELPER_4(vcmpeqfp, void, env, avr, avr, avr) DEF_HELPER_4(vcmpgefp, void, env, avr, avr, avr) DEF_HELPER_4(vcmpgtfp, void, env, avr, avr, avr) DEF_HELPER_4(vcmpbfp, void, env, avr, avr, avr) -DEF_HELPER_4(vcmpequb_dot, void, env, avr, avr, avr) -DEF_HELPER_4(vcmpequh_dot, void, env, avr, avr, avr) -DEF_HELPER_4(vcmpequw_dot, void, env, avr, avr, avr) -DEF_HELPER_4(vcmpequd_dot, void, env, avr, avr, avr) -DEF_HELPER_4(vcmpneb_dot, void, env, avr, avr, avr) -DEF_HELPER_4(vcmpneh_dot, void, env, avr, avr, avr) -DEF_HELPER_4(vcmpnew_dot, void, env, avr, avr, avr) -DEF_HELPER_4(vcmpnezb_dot, void, env, avr, avr, avr) -DEF_HELPER_4(vcmpnezh_dot, void, env, avr, avr, avr) -DEF_HELPER_4(vcmpnezw_dot, void, env, avr, avr, avr) -DEF_HELPER_4(vcmpgtub_dot, void, env, avr, avr, avr) -DEF_HELPER_4(vcmpgtuh_dot, void, env, avr, avr, avr) -DEF_HELPER_4(vcmpgtuw_dot, void, env, avr, avr, avr) -DEF_HELPER_4(vcmpgtud_dot, void, env, avr, avr, avr) -DEF_HELPER_4(vcmpgtsb_dot, void, env, avr, avr, avr) -DEF_HELPER_4(vcmpgtsh_dot, void, env, avr, avr, avr) -DEF_HELPER_4(vcmpgtsw_dot, void, env, avr, avr, avr) -DEF_HELPER_4(vcmpgtsd_dot, void, env, avr, avr, avr) +DEF_HELPER_4(VCMPNEZB, void, avr, avr, avr, i32) +DEF_HELPER_4(VCMPNEZH, void, avr, avr, avr, i32) +DEF_HELPER_4(VCMPNEZW, void, avr, avr, avr, i32) DEF_HELPER_4(vcmpeqfp_dot, void, env, avr, avr, avr) DEF_HELPER_4(vcmpgefp_dot, void, env, avr, avr, avr) DEF_HELPER_4(vcmpgtfp_dot, void, env, avr, avr, avr) @@ -191,22 +162,26 @@ DEF_HELPER_3(vmrglw, void, avr, avr, avr) DEF_HELPER_3(vmrghb, void, avr, avr, avr) DEF_HELPER_3(vmrghh, void, avr, avr, avr) DEF_HELPER_3(vmrghw, void, avr, avr, avr) -DEF_HELPER_3(vmulesb, void, avr, avr, avr) -DEF_HELPER_3(vmulesh, void, avr, avr, avr) -DEF_HELPER_3(vmulesw, void, avr, avr, avr) -DEF_HELPER_3(vmuleub, void, avr, avr, avr) -DEF_HELPER_3(vmuleuh, void, avr, avr, avr) -DEF_HELPER_3(vmuleuw, void, avr, avr, avr) -DEF_HELPER_3(vmulosb, void, avr, avr, avr) -DEF_HELPER_3(vmulosh, void, avr, avr, avr) -DEF_HELPER_3(vmulosw, void, avr, avr, avr) -DEF_HELPER_3(vmuloub, void, avr, avr, avr) -DEF_HELPER_3(vmulouh, void, avr, avr, avr) -DEF_HELPER_3(vmulouw, void, avr, avr, avr) -DEF_HELPER_3(vmulhsw, void, avr, avr, avr) -DEF_HELPER_3(vmulhuw, void, avr, avr, avr) -DEF_HELPER_3(vmulhsd, void, avr, avr, avr) -DEF_HELPER_3(vmulhud, void, avr, avr, avr) +DEF_HELPER_3(VMULESB, void, avr, avr, avr) +DEF_HELPER_3(VMULESH, void, avr, avr, avr) +DEF_HELPER_3(VMULESW, void, avr, avr, avr) +DEF_HELPER_3(VMULESD, void, avr, avr, avr) +DEF_HELPER_3(VMULEUB, void, avr, avr, avr) +DEF_HELPER_3(VMULEUH, void, avr, avr, avr) +DEF_HELPER_3(VMULEUW, void, avr, avr, avr) +DEF_HELPER_3(VMULEUD, void, avr, avr, avr) +DEF_HELPER_3(VMULOSB, void, avr, avr, avr) +DEF_HELPER_3(VMULOSH, void, avr, avr, avr) +DEF_HELPER_3(VMULOSW, void, avr, avr, avr) +DEF_HELPER_3(VMULOSD, void, avr, avr, avr) +DEF_HELPER_3(VMULOUB, void, avr, avr, avr) +DEF_HELPER_3(VMULOUH, void, avr, avr, avr) +DEF_HELPER_3(VMULOUW, void, avr, avr, avr) +DEF_HELPER_3(VMULOUD, void, avr, avr, avr) +DEF_HELPER_4(VMULHSW, void, avr, avr, avr, i32) +DEF_HELPER_4(VMULHUW, void, avr, avr, avr, i32) +DEF_HELPER_4(VMULHSD, void, avr, avr, avr, i32) +DEF_HELPER_4(VMULHUD, void, avr, avr, avr, i32) DEF_HELPER_3(vslo, void, avr, avr, avr) DEF_HELPER_3(vsro, void, avr, avr, avr) DEF_HELPER_3(vsrv, void, avr, avr, avr) @@ -245,11 +220,10 @@ DEF_HELPER_4(VINSBLX, void, env, avr, i64, tl) DEF_HELPER_4(VINSHLX, void, env, avr, i64, tl) DEF_HELPER_4(VINSWLX, void, env, avr, i64, tl) DEF_HELPER_4(VINSDLX, void, env, avr, i64, tl) -DEF_HELPER_2(vextsb2w, void, avr, avr) -DEF_HELPER_2(vextsh2w, void, avr, avr) -DEF_HELPER_2(vextsb2d, void, avr, avr) -DEF_HELPER_2(vextsh2d, void, avr, avr) -DEF_HELPER_2(vextsw2d, void, avr, avr) +DEF_HELPER_4(VSTRIBL, void, env, avr, avr, tl) +DEF_HELPER_4(VSTRIBR, void, env, avr, avr, tl) +DEF_HELPER_4(VSTRIHL, void, env, avr, avr, tl) +DEF_HELPER_4(VSTRIHR, void, env, avr, avr, tl) DEF_HELPER_2(vnegw, void, avr, avr) DEF_HELPER_2(vnegd, void, avr, avr) DEF_HELPER_2(vupkhpx, void, avr, avr) @@ -262,9 +236,8 @@ DEF_HELPER_2(vupklsh, void, avr, avr) DEF_HELPER_2(vupklsw, void, avr, avr) DEF_HELPER_5(vmsumubm, void, env, avr, avr, avr, avr) DEF_HELPER_5(vmsummbm, void, env, avr, avr, avr, avr) -DEF_HELPER_5(vsel, void, env, avr, avr, avr, avr) -DEF_HELPER_5(vperm, void, env, avr, avr, avr, avr) -DEF_HELPER_5(vpermr, void, env, avr, avr, avr, avr) +DEF_HELPER_4(VPERM, void, avr, avr, avr, avr) +DEF_HELPER_4(VPERMR, void, avr, avr, avr, avr) DEF_HELPER_4(vpkshss, void, env, avr, avr, avr) DEF_HELPER_4(vpkshus, void, env, avr, avr, avr) DEF_HELPER_4(vpkswss, void, env, avr, avr, avr) @@ -393,14 +366,16 @@ DEF_HELPER_3(xssqrtdp, void, env, vsr, vsr) DEF_HELPER_3(xsrsqrtedp, void, env, vsr, vsr) DEF_HELPER_4(xstdivdp, void, env, i32, vsr, vsr) DEF_HELPER_3(xstsqrtdp, void, env, i32, vsr) -DEF_HELPER_5(xsmadddp, void, env, vsr, vsr, vsr, vsr) -DEF_HELPER_5(xsmsubdp, void, env, vsr, vsr, vsr, vsr) -DEF_HELPER_5(xsnmadddp, void, env, vsr, vsr, vsr, vsr) -DEF_HELPER_5(xsnmsubdp, void, env, vsr, vsr, vsr, vsr) -DEF_HELPER_4(xscmpeqdp, void, env, vsr, vsr, vsr) -DEF_HELPER_4(xscmpgtdp, void, env, vsr, vsr, vsr) -DEF_HELPER_4(xscmpgedp, void, env, vsr, vsr, vsr) -DEF_HELPER_4(xscmpnedp, void, env, vsr, vsr, vsr) +DEF_HELPER_5(XSMADDDP, void, env, vsr, vsr, vsr, vsr) +DEF_HELPER_5(XSMSUBDP, void, env, vsr, vsr, vsr, vsr) +DEF_HELPER_5(XSNMADDDP, void, env, vsr, vsr, vsr, vsr) +DEF_HELPER_5(XSNMSUBDP, void, env, vsr, vsr, vsr, vsr) +DEF_HELPER_4(XSCMPEQDP, void, env, vsr, vsr, vsr) +DEF_HELPER_4(XSCMPGTDP, void, env, vsr, vsr, vsr) +DEF_HELPER_4(XSCMPGEDP, void, env, vsr, vsr, vsr) +DEF_HELPER_4(XSCMPEQQP, void, env, vsr, vsr, vsr) +DEF_HELPER_4(XSCMPGTQP, void, env, vsr, vsr, vsr) +DEF_HELPER_4(XSCMPGEQP, void, env, vsr, vsr, vsr) DEF_HELPER_4(xscmpexpdp, void, env, i32, vsr, vsr) DEF_HELPER_4(xscmpexpqp, void, env, i32, vsr, vsr) DEF_HELPER_4(xscmpodp, void, env, i32, vsr, vsr) @@ -409,10 +384,12 @@ DEF_HELPER_4(xscmpoqp, void, env, i32, vsr, vsr) DEF_HELPER_4(xscmpuqp, void, env, i32, vsr, vsr) DEF_HELPER_4(xsmaxdp, void, env, vsr, vsr, vsr) DEF_HELPER_4(xsmindp, void, env, vsr, vsr, vsr) -DEF_HELPER_4(xsmaxcdp, void, env, vsr, vsr, vsr) -DEF_HELPER_4(xsmincdp, void, env, vsr, vsr, vsr) -DEF_HELPER_4(xsmaxjdp, void, env, vsr, vsr, vsr) -DEF_HELPER_4(xsminjdp, void, env, vsr, vsr, vsr) +DEF_HELPER_4(XSMAXCDP, void, env, vsr, vsr, vsr) +DEF_HELPER_4(XSMINCDP, void, env, vsr, vsr, vsr) +DEF_HELPER_4(XSMAXJDP, void, env, vsr, vsr, vsr) +DEF_HELPER_4(XSMINJDP, void, env, vsr, vsr, vsr) +DEF_HELPER_4(XSMAXCQP, void, env, vsr, vsr, vsr) +DEF_HELPER_4(XSMINCQP, void, env, vsr, vsr, vsr) DEF_HELPER_3(xscvdphp, void, env, vsr, vsr) DEF_HELPER_4(xscvdpqp, void, env, i32, vsr, vsr) DEF_HELPER_3(xscvdpsp, void, env, vsr, vsr) @@ -456,10 +433,19 @@ DEF_HELPER_3(xsresp, void, env, vsr, vsr) DEF_HELPER_2(xsrsp, i64, env, i64) DEF_HELPER_3(xssqrtsp, void, env, vsr, vsr) DEF_HELPER_3(xsrsqrtesp, void, env, vsr, vsr) -DEF_HELPER_5(xsmaddsp, void, env, vsr, vsr, vsr, vsr) -DEF_HELPER_5(xsmsubsp, void, env, vsr, vsr, vsr, vsr) -DEF_HELPER_5(xsnmaddsp, void, env, vsr, vsr, vsr, vsr) -DEF_HELPER_5(xsnmsubsp, void, env, vsr, vsr, vsr, vsr) +DEF_HELPER_5(XSMADDSP, void, env, vsr, vsr, vsr, vsr) +DEF_HELPER_5(XSMSUBSP, void, env, vsr, vsr, vsr, vsr) +DEF_HELPER_5(XSNMADDSP, void, env, vsr, vsr, vsr, vsr) +DEF_HELPER_5(XSNMSUBSP, void, env, vsr, vsr, vsr, vsr) + +DEF_HELPER_5(XSMADDQP, void, env, vsr, vsr, vsr, vsr) +DEF_HELPER_5(XSMADDQPO, void, env, vsr, vsr, vsr, vsr) +DEF_HELPER_5(XSMSUBQP, void, env, vsr, vsr, vsr, vsr) +DEF_HELPER_5(XSMSUBQPO, void, env, vsr, vsr, vsr, vsr) +DEF_HELPER_5(XSNMADDQP, void, env, vsr, vsr, vsr, vsr) +DEF_HELPER_5(XSNMADDQPO, void, env, vsr, vsr, vsr, vsr) +DEF_HELPER_5(XSNMSUBQP, void, env, vsr, vsr, vsr, vsr) +DEF_HELPER_5(XSNMSUBQPO, void, env, vsr, vsr, vsr, vsr) DEF_HELPER_4(xvadddp, void, env, vsr, vsr, vsr) DEF_HELPER_4(xvsubdp, void, env, vsr, vsr, vsr) @@ -517,6 +503,7 @@ DEF_HELPER_FLAGS_4(xvcmpnesp, TCG_CALL_NO_RWG, i32, env, vsr, vsr, vsr) DEF_HELPER_3(xvcvspdp, void, env, vsr, vsr) DEF_HELPER_3(xvcvsphp, void, env, vsr, vsr) DEF_HELPER_3(xvcvhpsp, void, env, vsr, vsr) +DEF_HELPER_3(XVCVSPBF16, void, env, vsr, vsr) DEF_HELPER_3(xvcvspsxds, void, env, vsr, vsr) DEF_HELPER_3(xvcvspsxws, void, env, vsr, vsr) DEF_HELPER_3(xvcvspuxds, void, env, vsr, vsr) @@ -532,11 +519,15 @@ DEF_HELPER_3(xvrspic, void, env, vsr, vsr) DEF_HELPER_3(xvrspim, void, env, vsr, vsr) DEF_HELPER_3(xvrspip, void, env, vsr, vsr) DEF_HELPER_3(xvrspiz, void, env, vsr, vsr) -DEF_HELPER_4(xxperm, void, env, vsr, vsr, vsr) -DEF_HELPER_4(xxpermr, void, env, vsr, vsr, vsr) +DEF_HELPER_3(XXGENPCVBM, void, vsr, avr, tl) +DEF_HELPER_3(XXGENPCVHM, void, vsr, avr, tl) +DEF_HELPER_3(XXGENPCVWM, void, vsr, avr, tl) +DEF_HELPER_3(XXGENPCVDM, void, vsr, avr, tl) DEF_HELPER_4(xxextractuw, void, env, vsr, vsr, i32) +DEF_HELPER_5(XXPERMX, void, vsr, vsr, vsr, vsr, tl) DEF_HELPER_4(xxinsertw, void, env, vsr, vsr, i32) DEF_HELPER_3(xvxsigsp, void, env, vsr, vsr) +DEF_HELPER_5(XXEVAL, void, vsr, vsr, vsr, vsr, i32) DEF_HELPER_5(XXBLENDVB, void, vsr, vsr, vsr, vsr, i32) DEF_HELPER_5(XXBLENDVH, void, vsr, vsr, vsr, vsr, i32) DEF_HELPER_5(XXBLENDVW, void, vsr, vsr, vsr, vsr, i32) diff --git a/target/ppc/insn32.decode b/target/ppc/insn32.decode index 2a9c91a4235e6..f369b45c44731 100644 --- a/target/ppc/insn32.decode +++ b/target/ppc/insn32.decode @@ -18,11 +18,11 @@ # &D rt ra si:int64_t -@D ...... rt:5 ra:5 si:s16 &D +@D ...... rt:5 ra:5 si:s16 &D &D_bf bf l:bool ra imm -@D_bfs ...... bf:3 - l:1 ra:5 imm:s16 &D_bf -@D_bfu ...... bf:3 - l:1 ra:5 imm:16 &D_bf +@D_bfs ...... bf:3 . l:1 ra:5 imm:s16 &D_bf +@D_bfu ...... bf:3 . l:1 ra:5 imm:16 &D_bf %dq_si 4:s12 !function=times_16 %dq_rtp 22:4 !function=times_2 @@ -35,7 +35,7 @@ @DQ_TSXP ...... ..... ra:5 ............ .... &D si=%dq_si rt=%rt_tsxp %ds_si 2:s14 !function=times_4 -@DS ...... rt:5 ra:5 .............. .. &D si=%ds_si +@DS ...... rt:5 ra:5 .............. .. &D si=%ds_si %ds_rtp 22:4 !function=times_2 @DS_rtp ...... ....0 ra:5 .............. .. &D rt=%ds_rtp si=%ds_si @@ -46,25 +46,40 @@ &DX rt d %dx_d 6:s10 16:5 0:1 -@DX ...... rt:5 ..... .......... ..... . &DX d=%dx_d +@DX ...... rt:5 ..... .......... ..... . &DX d=%dx_d &VA vrt vra vrb rc -@VA ...... vrt:5 vra:5 vrb:5 rc:5 ...... &VA +@VA ...... vrt:5 vra:5 vrb:5 rc:5 ...... &VA + +&VC vrt vra vrb rc:bool +@VC ...... vrt:5 vra:5 vrb:5 rc:1 .......... &VC &VN vrt vra vrb sh @VN ...... vrt:5 vra:5 vrb:5 .. sh:3 ...... &VN &VX vrt vra vrb -@VX ...... vrt:5 vra:5 vrb:5 .......... . &VX +@VX ...... vrt:5 vra:5 vrb:5 .......... . &VX + +&VX_bf bf vra vrb +@VX_bf ...... bf:3 .. vra:5 vrb:5 ........... &VX_bf + +&VX_mp rt mp:bool vrb +@VX_mp ...... rt:5 .... mp:1 vrb:5 ........... &VX_mp + +&VX_n rt vrb n +@VX_n ...... rt:5 .. n:3 vrb:5 ........... &VX_n + +&VX_tb_rc vrt vrb rc:bool +@VX_tb_rc ...... vrt:5 ..... vrb:5 rc:1 .......... &VX_tb_rc &VX_uim4 vrt uim vrb -@VX_uim4 ...... vrt:5 . uim:4 vrb:5 ........... &VX_uim4 +@VX_uim4 ...... vrt:5 . uim:4 vrb:5 ........... &VX_uim4 &VX_tb vrt vrb -@VX_tb ...... vrt:5 ..... vrb:5 ........... &VX_tb +@VX_tb ...... vrt:5 ..... vrb:5 ........... &VX_tb &X rt ra rb -@X ...... rt:5 ra:5 rb:5 .......... . &X +@X ...... rt:5 ra:5 rb:5 .......... . &X &X_rc rt ra rb rc:bool @X_rc ...... rt:5 ra:5 rb:5 .......... rc:1 &X_rc @@ -76,6 +91,21 @@ @X_tp_a_bp_rc ...... ....0 ra:5 ....0 .......... rc:1 &X_rc rt=%x_frtp rb=%x_frbp +&X_t rt +@X_t ...... rt:5 ..... ..... .......... . &X_t + +&X_t_rc rt rc:bool +@X_t_rc ...... rt:5 ..... ..... .......... rc:1 &X_t_rc + +&X_tb rt rb +@X_tb ...... rt:5 ..... rb:5 .......... . &X_tb + +&X_imm2 rt imm +@X_imm2 ...... rt:5 ..... ... imm:2 .......... . &X_imm2 + +&X_imm3 rt imm +@X_imm3 ...... rt:5 ..... .. imm:3 .......... . &X_imm3 + &X_tb_rc rt rb rc:bool @X_tb_rc ...... rt:5 ..... rb:5 .......... rc:1 &X_tb_rc @@ -86,7 +116,7 @@ @X_t_bp_rc ...... rt:5 ..... ....0 .......... rc:1 &X_tb_rc rb=%x_frbp &X_bi rt bi -@X_bi ...... rt:5 bi:5 ----- .......... - &X_bi +@X_bi ...... rt:5 bi:5 ..... .......... . &X_bi &X_bf bf ra rb @X_bf ...... bf:3 .. ra:5 rb:5 .......... . &X_bf @@ -101,9 +131,12 @@ @X_bf_uim_bp ...... bf:3 . uim:6 ....0 .......... . &X_bf_uim rb=%x_frbp &X_bfl bf l:bool ra rb -@X_bfl ...... bf:3 - l:1 ra:5 rb:5 ..........- &X_bfl +@X_bfl ...... bf:3 . l:1 ra:5 rb:5 .......... . &X_bfl %x_xt 0:1 21:5 +&X_imm5 xt imm:uint8_t vrb +@X_imm5 ...... ..... imm:5 vrb:5 .......... . &X_imm5 xt=%x_xt + &X_imm8 xt imm:uint8_t @X_imm8 ...... ..... .. imm:8 .......... . &X_imm8 xt=%x_xt @@ -133,12 +166,25 @@ %xx_xt 0:1 21:5 %xx_xb 1:1 11:5 %xx_xa 2:1 16:5 -&XX2 xt xb uim:uint8_t -@XX2 ...... ..... ... uim:2 ..... ......... .. &XX2 xt=%xx_xt xb=%xx_xb +%xx_xc 3:1 6:5 +&XX2 xt xb +@XX2 ...... ..... ..... ..... ......... .. &XX2 xt=%xx_xt xb=%xx_xb + +&XX2_uim2 xt xb uim:uint8_t +@XX2_uim2 ...... ..... ... uim:2 ..... ......... .. &XX2_uim2 xt=%xx_xt xb=%xx_xb + +&XX2_bf_xb bf xb +@XX2_bf_xb ...... bf:3 .. ..... ..... ......... . . &XX2_bf_xb xb=%xx_xb &XX3 xt xa xb @XX3 ...... ..... ..... ..... ........ ... &XX3 xt=%xx_xt xa=%xx_xa xb=%xx_xb +&XX3_dm xt xa xb dm +@XX3_dm ...... ..... ..... ..... . dm:2 ..... ... &XX3_dm xt=%xx_xt xa=%xx_xa xb=%xx_xb + +&XX4 xt xa xb xc +@XX4 ...... ..... ..... ..... ..... .. .... &XX4 xt=%xx_xt xa=%xx_xa xb=%xx_xb xc=%xx_xc + &Z22_bf_fra bf fra dm @Z22_bf_fra ...... bf:3 .. fra:5 dm:6 ......... . &Z22_bf_fra @@ -250,6 +296,13 @@ CNTTZDM 011111 ..... ..... ..... 1000111011 - @X PDEPD 011111 ..... ..... ..... 0010011100 - @X PEXTD 011111 ..... ..... ..... 0010111100 - @X +# Fixed-Point Hash Instructions + +HASHST 011111 ..... ..... ..... 1011010010 . @X_TSX +HASHCHK 011111 ..... ..... ..... 1011110010 . @X_TSX +HASHSTP 011111 ..... ..... ..... 1010010010 . @X_TSX +HASHCHKP 011111 ..... ..... ..... 1010110010 . @X_TSX + ### Float-Point Load Instructions LFS 110000 ..... ..... ................ @D @@ -281,6 +334,16 @@ SETBCR 011111 ..... ..... ----- 0110100000 - @X_bi SETNBC 011111 ..... ..... ----- 0111000000 - @X_bi SETNBCR 011111 ..... ..... ----- 0111100000 - @X_bi +### Move To/From FPSCR + +MFFS 111111 ..... 00000 ----- 1001000111 . @X_t_rc +MFFSCE 111111 ..... 00001 ----- 1001000111 - @X_t +MFFSCDRN 111111 ..... 10100 ..... 1001000111 - @X_tb +MFFSCDRNI 111111 ..... 10101 --... 1001000111 - @X_imm3 +MFFSCRN 111111 ..... 10110 ..... 1001000111 - @X_tb +MFFSCRNI 111111 ..... 10111 ---.. 1001000111 - @X_imm2 +MFFSL 111111 ..... 11000 ----- 1001000111 - @X_t + ### Decimal Floating-Point Arithmetic Instructions DADD 111011 ..... ..... ..... 0000000010 . @X_rc @@ -373,8 +436,41 @@ DSCLIQ 111111 ..... ..... ...... 001000010 . @Z22_tap_sh_rc DSCRI 111011 ..... ..... ...... 001100010 . @Z22_ta_sh_rc DSCRIQ 111111 ..... ..... ...... 001100010 . @Z22_tap_sh_rc +## Vector Integer Instructions + +VCMPEQUB 000100 ..... ..... ..... . 0000000110 @VC +VCMPEQUH 000100 ..... ..... ..... . 0001000110 @VC +VCMPEQUW 000100 ..... ..... ..... . 0010000110 @VC +VCMPEQUD 000100 ..... ..... ..... . 0011000111 @VC +VCMPEQUQ 000100 ..... ..... ..... . 0111000111 @VC + +VCMPGTSB 000100 ..... ..... ..... . 1100000110 @VC +VCMPGTSH 000100 ..... ..... ..... . 1101000110 @VC +VCMPGTSW 000100 ..... ..... ..... . 1110000110 @VC +VCMPGTSD 000100 ..... ..... ..... . 1111000111 @VC +VCMPGTSQ 000100 ..... ..... ..... . 1110000111 @VC + +VCMPGTUB 000100 ..... ..... ..... . 1000000110 @VC +VCMPGTUH 000100 ..... ..... ..... . 1001000110 @VC +VCMPGTUW 000100 ..... ..... ..... . 1010000110 @VC +VCMPGTUD 000100 ..... ..... ..... . 1011000111 @VC +VCMPGTUQ 000100 ..... ..... ..... . 1010000111 @VC + +VCMPNEB 000100 ..... ..... ..... . 0000000111 @VC +VCMPNEH 000100 ..... ..... ..... . 0001000111 @VC +VCMPNEW 000100 ..... ..... ..... . 0010000111 @VC + +VCMPNEZB 000100 ..... ..... ..... . 0100000111 @VC +VCMPNEZH 000100 ..... ..... ..... . 0101000111 @VC +VCMPNEZW 000100 ..... ..... ..... . 0110000111 @VC + +VCMPSQ 000100 ... -- ..... ..... 00101000001 @VX_bf +VCMPUQ 000100 ... -- ..... ..... 00100000001 @VX_bf + ## Vector Bit Manipulation Instruction +VGNB 000100 ..... -- ... ..... 10011001100 @VX_n + VCFUGED 000100 ..... ..... ..... 10101001101 @VX VCLZDM 000100 ..... ..... ..... 11110000100 @VX VCTZDM 000100 ..... ..... ..... 11111000100 @VX @@ -419,6 +515,20 @@ VINSWVRX 000100 ..... ..... ..... 00110001111 @VX VSLDBI 000100 ..... ..... ..... 00 ... 010110 @VN VSRDBI 000100 ..... ..... ..... 01 ... 010110 @VN +VPERM 000100 ..... ..... ..... ..... 101011 @VA +VPERMR 000100 ..... ..... ..... ..... 111011 @VA + +VSEL 000100 ..... ..... ..... ..... 101010 @VA + +## Vector Integer Arithmetic Instructions + +VEXTSB2W 000100 ..... 10000 ..... 11000000010 @VX_tb +VEXTSH2W 000100 ..... 10001 ..... 11000000010 @VX_tb +VEXTSB2D 000100 ..... 11000 ..... 11000000010 @VX_tb +VEXTSH2D 000100 ..... 11001 ..... 11000000010 @VX_tb +VEXTSW2D 000100 ..... 11010 ..... 11000000010 @VX_tb +VEXTSD2Q 000100 ..... 11011 ..... 11000000010 @VX_tb + ## Vector Mask Manipulation Instructions MTVSRBM 000100 ..... 10000 ..... 11001000010 @VX_tb @@ -440,6 +550,26 @@ VEXTRACTWM 000100 ..... 01010 ..... 11001000010 @VX_tb VEXTRACTDM 000100 ..... 01011 ..... 11001000010 @VX_tb VEXTRACTQM 000100 ..... 01100 ..... 11001000010 @VX_tb +VCNTMBB 000100 ..... 1100 . ..... 11001000010 @VX_mp +VCNTMBH 000100 ..... 1101 . ..... 11001000010 @VX_mp +VCNTMBW 000100 ..... 1110 . ..... 11001000010 @VX_mp +VCNTMBD 000100 ..... 1111 . ..... 11001000010 @VX_mp + +## Vector Multiply-Sum Instructions + +VMSUMCUD 000100 ..... ..... ..... ..... 010111 @VA +VMSUMUDM 000100 ..... ..... ..... ..... 100011 @VA + +## Vector String Instructions + +VSTRIBL 000100 ..... 00000 ..... . 0000001101 @VX_tb_rc +VSTRIBR 000100 ..... 00001 ..... . 0000001101 @VX_tb_rc +VSTRIHL 000100 ..... 00010 ..... . 0000001101 @VX_tb_rc +VSTRIHR 000100 ..... 00011 ..... . 0000001101 @VX_tb_rc + +VCLRLB 000100 ..... ..... ..... 00110001101 @VX +VCLRRB 000100 ..... ..... ..... 00111001101 @VX + # VSX Load/Store Instructions LXV 111101 ..... ..... ............ . 001 @DQ_TSX @@ -451,10 +581,51 @@ STXVX 011111 ..... ..... ..... 0110001100 . @X_TSX LXVPX 011111 ..... ..... ..... 0101001101 - @X_TSXP STXVPX 011111 ..... ..... ..... 0111001101 - @X_TSXP +## VSX Scalar Multiply-Add Instructions + +XSMADDADP 111100 ..... ..... ..... 00100001 . . . @XX3 +XSMADDMDP 111100 ..... ..... ..... 00101001 . . . @XX3 +XSMADDASP 111100 ..... ..... ..... 00000001 . . . @XX3 +XSMADDMSP 111100 ..... ..... ..... 00001001 . . . @XX3 +XSMADDQP 111111 ..... ..... ..... 0110000100 . @X_rc + +XSMSUBADP 111100 ..... ..... ..... 00110001 . . . @XX3 +XSMSUBMDP 111100 ..... ..... ..... 00111001 . . . @XX3 +XSMSUBASP 111100 ..... ..... ..... 00010001 . . . @XX3 +XSMSUBMSP 111100 ..... ..... ..... 00011001 . . . @XX3 +XSMSUBQP 111111 ..... ..... ..... 0110100100 . @X_rc + +XSNMADDASP 111100 ..... ..... ..... 10000001 . . . @XX3 +XSNMADDMSP 111100 ..... ..... ..... 10001001 . . . @XX3 +XSNMADDADP 111100 ..... ..... ..... 10100001 . . . @XX3 +XSNMADDMDP 111100 ..... ..... ..... 10101001 . . . @XX3 +XSNMADDQP 111111 ..... ..... ..... 0111000100 . @X_rc + +XSNMSUBASP 111100 ..... ..... ..... 10010001 . . . @XX3 +XSNMSUBMSP 111100 ..... ..... ..... 10011001 . . . @XX3 +XSNMSUBADP 111100 ..... ..... ..... 10110001 . . . @XX3 +XSNMSUBMDP 111100 ..... ..... ..... 10111001 . . . @XX3 +XSNMSUBQP 111111 ..... ..... ..... 0111100100 . @X_rc + ## VSX splat instruction XXSPLTIB 111100 ..... 00 ........ 0101101000 . @X_imm8 -XXSPLTW 111100 ..... ---.. ..... 010100100 . . @XX2 +XXSPLTW 111100 ..... ---.. ..... 010100100 . . @XX2_uim2 + +## VSX Permute Instructions + +XXPERM 111100 ..... ..... ..... 00011010 ... @XX3 +XXPERMR 111100 ..... ..... ..... 00111010 ... @XX3 +XXPERMDI 111100 ..... ..... ..... 0 .. 01010 ... @XX3_dm + +XXSEL 111100 ..... ..... ..... ..... 11 .... @XX4 + +## VSX Vector Generate PCV + +XXGENPCVBM 111100 ..... ..... ..... 1110010100 . @X_imm5 +XXGENPCVHM 111100 ..... ..... ..... 1110010101 . @X_imm5 +XXGENPCVWM 111100 ..... ..... ..... 1110110100 . @X_imm5 +XXGENPCVDM 111100 ..... ..... ..... 1110110101 . @X_imm5 ## VSX Vector Load Special Value Instruction @@ -466,12 +637,55 @@ XSMAXCDP 111100 ..... ..... ..... 10000000 ... @XX3 XSMINCDP 111100 ..... ..... ..... 10001000 ... @XX3 XSMAXJDP 111100 ..... ..... ..... 10010000 ... @XX3 XSMINJDP 111100 ..... ..... ..... 10011000 ... @XX3 +XSMAXCQP 111111 ..... ..... ..... 1010100100 - @X +XSMINCQP 111111 ..... ..... ..... 1011100100 - @X + +XSCMPEQDP 111100 ..... ..... ..... 00000011 ... @XX3 +XSCMPGEDP 111100 ..... ..... ..... 00010011 ... @XX3 +XSCMPGTDP 111100 ..... ..... ..... 00001011 ... @XX3 +XSCMPEQQP 111111 ..... ..... ..... 0001000100 - @X +XSCMPGEQP 111111 ..... ..... ..... 0011000100 - @X +XSCMPGTQP 111111 ..... ..... ..... 0011100100 - @X ## VSX Binary Floating-Point Convert Instructions XSCVQPDP 111111 ..... 10100 ..... 1101000100 . @X_tb_rc +XVCVBF16SPN 111100 ..... 10000 ..... 111011011 .. @XX2 +XVCVSPBF16 111100 ..... 10001 ..... 111011011 .. @XX2 + +## VSX Vector Test Least-Significant Bit by Byte Instruction + +XVTLSBB 111100 ... -- 00010 ..... 111011011 . - @XX2_bf_xb ### rfebb &XL_s s:uint8_t @XL_s ......-------------- s:1 .......... - &XL_s RFEBB 010011-------------- . 0010010010 - @XL_s + +## Vector Multiply Instruction + +VMULESB 000100 ..... ..... ..... 01100001000 @VX +VMULOSB 000100 ..... ..... ..... 00100001000 @VX +VMULEUB 000100 ..... ..... ..... 01000001000 @VX +VMULOUB 000100 ..... ..... ..... 00000001000 @VX + +VMULESH 000100 ..... ..... ..... 01101001000 @VX +VMULOSH 000100 ..... ..... ..... 00101001000 @VX +VMULEUH 000100 ..... ..... ..... 01001001000 @VX +VMULOUH 000100 ..... ..... ..... 00001001000 @VX + +VMULESW 000100 ..... ..... ..... 01110001000 @VX +VMULOSW 000100 ..... ..... ..... 00110001000 @VX +VMULEUW 000100 ..... ..... ..... 01010001000 @VX +VMULOUW 000100 ..... ..... ..... 00010001000 @VX + +VMULESD 000100 ..... ..... ..... 01111001000 @VX +VMULOSD 000100 ..... ..... ..... 00111001000 @VX +VMULEUD 000100 ..... ..... ..... 01011001000 @VX +VMULOUD 000100 ..... ..... ..... 00011001000 @VX + +VMULHSW 000100 ..... ..... ..... 01110001001 @VX +VMULHUW 000100 ..... ..... ..... 01010001001 @VX +VMULHSD 000100 ..... ..... ..... 01111001001 @VX +VMULHUD 000100 ..... ..... ..... 01011001001 @VX +VMULLD 000100 ..... ..... ..... 00111001001 @VX diff --git a/target/ppc/insn64.decode b/target/ppc/insn64.decode index 39e610913dfdb..fdb859f62dd89 100644 --- a/target/ppc/insn64.decode +++ b/target/ppc/insn64.decode @@ -44,15 +44,25 @@ ...... ..... .... . ................ \ &8RR_D si=%8rr_si xt=%8rr_xt -# Format XX4 -&XX4 xt xa xb xc -%xx4_xt 0:1 21:5 -%xx4_xa 2:1 16:5 -%xx4_xb 1:1 11:5 -%xx4_xc 3:1 6:5 -@XX4 ........ ........ ........ ........ \ +# Format 8RR:XX4 +%8rr_xx_xt 0:1 21:5 +%8rr_xx_xa 2:1 16:5 +%8rr_xx_xb 1:1 11:5 +%8rr_xx_xc 3:1 6:5 +&8RR_XX4 xt xa xb xc +@8RR_XX4 ........ ........ ........ ........ \ ...... ..... ..... ..... ..... .. .... \ - &XX4 xt=%xx4_xt xa=%xx4_xa xb=%xx4_xb xc=%xx4_xc + &8RR_XX4 xt=%8rr_xx_xt xa=%8rr_xx_xa xb=%8rr_xx_xb xc=%8rr_xx_xc + +&8RR_XX4_imm xt xa xb xc imm +@8RR_XX4_imm ........ ........ ........ imm:8 \ + ...... ..... ..... ..... ..... .. .... \ + &8RR_XX4_imm xt=%8rr_xx_xt xa=%8rr_xx_xa xb=%8rr_xx_xb xc=%8rr_xx_xc + +&8RR_XX4_uim3 xt xa xb xc uim3 +@8RR_XX4_uim3 ...... .. .... .. ............... uim3:3 \ + ...... ..... ..... ..... ..... .. .... \ + &8RR_XX4_uim3 xt=%8rr_xx_xt xa=%8rr_xx_xa xb=%8rr_xx_xb xc=%8rr_xx_xc ### Fixed-Point Load Instructions @@ -179,6 +189,9 @@ PLXVP 000001 00 0--.-- .................. \ PSTXVP 000001 00 0--.-- .................. \ 111110 ..... ..... ................ @8LS_D_TSXP +XXEVAL 000001 01 0000 -- ---------- ........ \ + 100010 ..... ..... ..... ..... 01 .... @8RR_XX4_imm + XXSPLTIDP 000001 01 0000 -- -- ................ \ 100000 ..... 0010 . ................ @8RR_D XXSPLTIW 000001 01 0000 -- -- ................ \ @@ -187,10 +200,13 @@ XXSPLTI32DX 000001 01 0000 -- -- ................ \ 100000 ..... 000 .. ................ @8RR_D_IX XXBLENDVD 000001 01 0000 -- ------------------ \ - 100001 ..... ..... ..... ..... 11 .... @XX4 + 100001 ..... ..... ..... ..... 11 .... @8RR_XX4 XXBLENDVW 000001 01 0000 -- ------------------ \ - 100001 ..... ..... ..... ..... 10 .... @XX4 + 100001 ..... ..... ..... ..... 10 .... @8RR_XX4 XXBLENDVH 000001 01 0000 -- ------------------ \ - 100001 ..... ..... ..... ..... 01 .... @XX4 + 100001 ..... ..... ..... ..... 01 .... @8RR_XX4 XXBLENDVB 000001 01 0000 -- ------------------ \ - 100001 ..... ..... ..... ..... 00 .... @XX4 + 100001 ..... ..... ..... ..... 00 .... @8RR_XX4 + +XXPERMX 000001 01 0000 -- --------------- ... \ + 100010 ..... ..... ..... ..... 00 .... @8RR_XX4_uim3 diff --git a/target/ppc/int_helper.c b/target/ppc/int_helper.c index 9bc327bcba5ac..2fa37a91c9b6b 100644 --- a/target/ppc/int_helper.c +++ b/target/ppc/int_helper.c @@ -28,6 +28,7 @@ #include "fpu/softfloat.h" #include "qapi/error.h" #include "qemu/guest-random.h" +#include "tcg/tcg-gvec-desc.h" #include "helper_regs.h" /*****************************************************************************/ @@ -749,100 +750,18 @@ VCF(ux, uint32_to_float32, u32) VCF(sx, int32_to_float32, s32) #undef VCF -#define VCMP_DO(suffix, compare, element, record) \ - void helper_vcmp##suffix(CPUPPCState *env, ppc_avr_t *r, \ - ppc_avr_t *a, ppc_avr_t *b) \ - { \ - uint64_t ones = (uint64_t)-1; \ - uint64_t all = ones; \ - uint64_t none = 0; \ - int i; \ - \ - for (i = 0; i < ARRAY_SIZE(r->element); i++) { \ - uint64_t result = (a->element[i] compare b->element[i] ? \ - ones : 0x0); \ - switch (sizeof(a->element[0])) { \ - case 8: \ - r->u64[i] = result; \ - break; \ - case 4: \ - r->u32[i] = result; \ - break; \ - case 2: \ - r->u16[i] = result; \ - break; \ - case 1: \ - r->u8[i] = result; \ - break; \ - } \ - all &= result; \ - none |= result; \ - } \ - if (record) { \ - env->crf[6] = ((all != 0) << 3) | ((none == 0) << 1); \ - } \ - } -#define VCMP(suffix, compare, element) \ - VCMP_DO(suffix, compare, element, 0) \ - VCMP_DO(suffix##_dot, compare, element, 1) -VCMP(equb, ==, u8) -VCMP(equh, ==, u16) -VCMP(equw, ==, u32) -VCMP(equd, ==, u64) -VCMP(gtub, >, u8) -VCMP(gtuh, >, u16) -VCMP(gtuw, >, u32) -VCMP(gtud, >, u64) -VCMP(gtsb, >, s8) -VCMP(gtsh, >, s16) -VCMP(gtsw, >, s32) -VCMP(gtsd, >, s64) -#undef VCMP_DO -#undef VCMP - -#define VCMPNE_DO(suffix, element, etype, cmpzero, record) \ -void helper_vcmpne##suffix(CPUPPCState *env, ppc_avr_t *r, \ - ppc_avr_t *a, ppc_avr_t *b) \ -{ \ - etype ones = (etype)-1; \ - etype all = ones; \ - etype result, none = 0; \ - int i; \ - \ - for (i = 0; i < ARRAY_SIZE(r->element); i++) { \ - if (cmpzero) { \ - result = ((a->element[i] == 0) \ - || (b->element[i] == 0) \ - || (a->element[i] != b->element[i]) ? \ - ones : 0x0); \ - } else { \ - result = (a->element[i] != b->element[i]) ? ones : 0x0; \ - } \ - r->element[i] = result; \ - all &= result; \ - none |= result; \ - } \ - if (record) { \ - env->crf[6] = ((all != 0) << 3) | ((none == 0) << 1); \ - } \ +#define VCMPNEZ(NAME, ELEM) \ +void helper_##NAME(ppc_vsr_t *t, ppc_vsr_t *a, ppc_vsr_t *b, uint32_t desc) \ +{ \ + for (int i = 0; i < ARRAY_SIZE(t->ELEM); i++) { \ + t->ELEM[i] = ((a->ELEM[i] == 0) || (b->ELEM[i] == 0) || \ + (a->ELEM[i] != b->ELEM[i])) ? -1 : 0; \ + } \ } - -/* - * VCMPNEZ - Vector compare not equal to zero - * suffix - instruction mnemonic suffix (b: byte, h: halfword, w: word) - * element - element type to access from vector - */ -#define VCMPNE(suffix, element, etype, cmpzero) \ - VCMPNE_DO(suffix, element, etype, cmpzero, 0) \ - VCMPNE_DO(suffix##_dot, element, etype, cmpzero, 1) -VCMPNE(zb, u8, uint8_t, 1) -VCMPNE(zh, u16, uint16_t, 1) -VCMPNE(zw, u32, uint32_t, 1) -VCMPNE(b, u8, uint8_t, 0) -VCMPNE(h, u16, uint16_t, 0) -VCMPNE(w, u32, uint32_t, 0) -#undef VCMPNE_DO -#undef VCMPNE +VCMPNEZ(VCMPNEZB, u8) +VCMPNEZ(VCMPNEZH, u16) +VCMPNEZ(VCMPNEZW, u32) +#undef VCMPNEZ #define VCMPFP_DO(suffix, compare, order, record) \ void helper_vcmp##suffix(CPUPPCState *env, ppc_avr_t *r, \ @@ -1150,7 +1069,7 @@ void helper_vmsumuhs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, } #define VMUL_DO_EVN(name, mul_element, mul_access, prod_access, cast) \ - void helper_v##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \ + void helper_V##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \ { \ int i; \ \ @@ -1161,7 +1080,7 @@ void helper_vmsumuhs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, } #define VMUL_DO_ODD(name, mul_element, mul_access, prod_access, cast) \ - void helper_v##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \ + void helper_V##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \ { \ int i; \ \ @@ -1172,19 +1091,35 @@ void helper_vmsumuhs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, } #define VMUL(suffix, mul_element, mul_access, prod_access, cast) \ - VMUL_DO_EVN(mule##suffix, mul_element, mul_access, prod_access, cast) \ - VMUL_DO_ODD(mulo##suffix, mul_element, mul_access, prod_access, cast) -VMUL(sb, s8, VsrSB, VsrSH, int16_t) -VMUL(sh, s16, VsrSH, VsrSW, int32_t) -VMUL(sw, s32, VsrSW, VsrSD, int64_t) -VMUL(ub, u8, VsrB, VsrH, uint16_t) -VMUL(uh, u16, VsrH, VsrW, uint32_t) -VMUL(uw, u32, VsrW, VsrD, uint64_t) + VMUL_DO_EVN(MULE##suffix, mul_element, mul_access, prod_access, cast) \ + VMUL_DO_ODD(MULO##suffix, mul_element, mul_access, prod_access, cast) +VMUL(SB, s8, VsrSB, VsrSH, int16_t) +VMUL(SH, s16, VsrSH, VsrSW, int32_t) +VMUL(SW, s32, VsrSW, VsrSD, int64_t) +VMUL(UB, u8, VsrB, VsrH, uint16_t) +VMUL(UH, u16, VsrH, VsrW, uint32_t) +VMUL(UW, u32, VsrW, VsrD, uint64_t) #undef VMUL_DO_EVN #undef VMUL_DO_ODD #undef VMUL +void helper_VMULESD(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) +{ + muls64(&r->VsrD(1), &r->VsrD(0), a->VsrSD(0), b->VsrSD(0)); +} +void helper_VMULOSD(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) +{ + muls64(&r->VsrD(1), &r->VsrD(0), a->VsrSD(1), b->VsrSD(1)); +} +void helper_VMULEUD(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) +{ + mulu64(&r->VsrD(1), &r->VsrD(0), a->VsrD(0), b->VsrD(0)); +} +void helper_VMULOUD(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) +{ + mulu64(&r->VsrD(1), &r->VsrD(0), a->VsrD(1), b->VsrD(1)); +} -void helper_vmulhsw(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) +void helper_VMULHSW(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t desc) { int i; @@ -1193,7 +1128,7 @@ void helper_vmulhsw(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) } } -void helper_vmulhuw(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) +void helper_VMULHUW(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t desc) { int i; @@ -1203,7 +1138,7 @@ void helper_vmulhuw(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) } } -void helper_vmulhsd(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) +void helper_VMULHSD(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t desc) { uint64_t discard; @@ -1211,7 +1146,7 @@ void helper_vmulhsd(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) muls64(&discard, &r->u64[1], a->s64[1], b->s64[1]); } -void helper_vmulhud(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) +void helper_VMULHUD(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t desc) { uint64_t discard; @@ -1219,8 +1154,27 @@ void helper_vmulhud(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) mulu64(&discard, &r->u64[1], a->u64[1], b->u64[1]); } -void helper_vperm(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, - ppc_avr_t *c) +void helper_XXPERMX(ppc_vsr_t *t, ppc_vsr_t *s0, ppc_vsr_t *s1, ppc_vsr_t *pcv, + target_ulong uim) +{ + int i, idx; + ppc_vsr_t tmp = { .u64 = {0, 0} }; + + for (i = 0; i < ARRAY_SIZE(t->u8); i++) { + if ((pcv->VsrB(i) >> 5) == uim) { + idx = pcv->VsrB(i) & 0x1f; + if (idx < ARRAY_SIZE(t->u8)) { + tmp.VsrB(i) = s0->VsrB(idx); + } else { + tmp.VsrB(i) = s1->VsrB(idx - ARRAY_SIZE(t->u8)); + } + } + } + + *t = tmp; +} + +void helper_VPERM(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c) { ppc_avr_t result; int i; @@ -1238,8 +1192,7 @@ void helper_vperm(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, *r = result; } -void helper_vpermr(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, - ppc_avr_t *c) +void helper_VPERMR(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c) { ppc_avr_t result; int i; @@ -1257,6 +1210,90 @@ void helper_vpermr(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, *r = result; } +#define XXGENPCV(NAME, SZ) \ +void helper_##NAME(ppc_vsr_t *t, ppc_vsr_t *b, target_ulong imm) \ +{ \ + ppc_vsr_t tmp = { .u64 = { 0, 0 } }; \ + \ + switch (imm) { \ + case 0b00000: /* Big-Endian expansion */ \ + /* Initialize tmp with the result of an all-zeros mask */ \ + tmp.VsrD(0) = 0x1011121314151617; \ + tmp.VsrD(1) = 0x18191A1B1C1D1E1F; \ + \ + /* Iterate over the most significant byte of each element */ \ + for (int i = 0, j = 0; i < ARRAY_SIZE(b->u8); i += SZ) { \ + if (b->VsrB(i) & 0x80) { \ + /* Update each byte of the element */ \ + for (int k = 0; k < SZ; k++) { \ + tmp.VsrB(i + k) = j + k; \ + } \ + j += SZ; \ + } \ + } \ + \ + break; \ + case 0b00001: /* Big-Endian compression */ \ + /* Iterate over the most significant byte of each element */ \ + for (int i = 0, j = 0; i < ARRAY_SIZE(b->u8); i += SZ) { \ + if (b->VsrB(i) & 0x80) { \ + /* Update each byte of the element */ \ + for (int k = 0; k < SZ; k++) { \ + tmp.VsrB(j + k) = i + k; \ + } \ + j += SZ; \ + } \ + } \ + \ + break; \ + case 0b00010: /* Little-Endian expansion */ \ + /* Initialize tmp with the result of an all-zeros mask */ \ + tmp.VsrD(0) = 0x1F1E1D1C1B1A1918; \ + tmp.VsrD(1) = 0x1716151413121110; \ + \ + /* Iterate over the most significant byte of each element */ \ + for (int i = 0, j = 0; i < ARRAY_SIZE(b->u8); i += SZ) { \ + /* Reverse indexing of "i" */ \ + const int idx = ARRAY_SIZE(b->u8) - i - SZ; \ + if (b->VsrB(idx) & 0x80) { \ + /* Update each byte of the element */ \ + for (int k = 0, rk = SZ - 1; k < SZ; k++, rk--) { \ + tmp.VsrB(idx + rk) = j + k; \ + } \ + j += SZ; \ + } \ + } \ + \ + break; \ + case 0b00011: /* Little-Endian compression */ \ + /* Iterate over the most significant byte of each element */ \ + for (int i = 0, j = 0; i < ARRAY_SIZE(b->u8); i += SZ) { \ + if (b->VsrB(ARRAY_SIZE(b->u8) - i - SZ) & 0x80) { \ + /* Update each byte of the element */ \ + for (int k = 0, rk = SZ - 1; k < SZ; k++, rk--) { \ + /* Reverse indexing of "j" */ \ + const int idx = ARRAY_SIZE(b->u8) - j - SZ; \ + tmp.VsrB(idx + rk) = i + k; \ + } \ + j += SZ; \ + } \ + } \ + \ + break; \ + default: \ + /* Translation code validates IMM before calling this helper */ \ + g_assert_not_reached(); \ + break; \ + } \ + \ + *t = tmp; \ +} +XXGENPCV(XXGENPCVBM, 1) +XXGENPCV(XXGENPCVHM, 2) +XXGENPCV(XXGENPCVWM, 4) +XXGENPCV(XXGENPCVDM, 8) +#undef XXGENPCV + #if defined(HOST_WORDS_BIGENDIAN) #define VBPERMQ_INDEX(avr, i) ((avr)->u8[(i)]) #define VBPERMD_INDEX(i) (i) @@ -1507,13 +1544,6 @@ VRLMI(vrlwmi, 32, u32, 1); VRLMI(vrldnm, 64, u64, 0); VRLMI(vrlwnm, 32, u32, 0); -void helper_vsel(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, - ppc_avr_t *c) -{ - r->u64[0] = (a->u64[0] & ~c->u64[0]) | (b->u64[0] & c->u64[0]); - r->u64[1] = (a->u64[1] & ~c->u64[1]) | (b->u64[1] & c->u64[1]); -} - void helper_vexptefp(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *b) { int i; @@ -1706,6 +1736,38 @@ VEXTRACT(uw, u32) VEXTRACT(d, u64) #undef VEXTRACT +#define VSTRI(NAME, ELEM, NUM_ELEMS, LEFT) \ +void helper_##NAME(CPUPPCState *env, ppc_avr_t *t, ppc_avr_t *b, \ + target_ulong rc) \ +{ \ + bool null_found = false; \ + int i, idx; \ + \ + for (i = 0; i < NUM_ELEMS; i++) { \ + idx = LEFT ? i : NUM_ELEMS - i - 1; \ + if (b->Vsr##ELEM(idx)) { \ + t->Vsr##ELEM(idx) = b->Vsr##ELEM(idx); \ + } else { \ + null_found = true; \ + break; \ + } \ + } \ + \ + for (; i < NUM_ELEMS; i++) { \ + idx = LEFT ? i : NUM_ELEMS - i - 1; \ + t->Vsr##ELEM(idx) = 0; \ + } \ + \ + if (rc) { \ + env->crf[6] = null_found ? 0b0010 : 0; \ + } \ +} +VSTRI(VSTRIBL, B, 16, true) +VSTRI(VSTRIBR, B, 16, false) +VSTRI(VSTRIHL, H, 8, true) +VSTRI(VSTRIHR, H, 8, false) +#undef VSTRI + void helper_xxextractuw(CPUPPCState *env, ppc_vsr_t *xt, ppc_vsr_t *xb, uint32_t index) { @@ -1737,6 +1799,47 @@ void helper_xxinsertw(CPUPPCState *env, ppc_vsr_t *xt, *xt = t; } +void helper_XXEVAL(ppc_avr_t *t, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c, + uint32_t desc) +{ + /* + * Instead of processing imm bit-by-bit, we'll skip the computation of + * conjunctions whose corresponding bit is unset. + */ + int bit, imm = simd_data(desc); + Int128 conj, disj = int128_zero(); + + /* Iterate over set bits from the least to the most significant bit */ + while (imm) { + /* + * Get the next bit to be processed with ctz64. Invert the result of + * ctz64 to match the indexing used by PowerISA. + */ + bit = 7 - ctzl(imm); + if (bit & 0x4) { + conj = a->s128; + } else { + conj = int128_not(a->s128); + } + if (bit & 0x2) { + conj = int128_and(conj, b->s128); + } else { + conj = int128_and(conj, int128_not(b->s128)); + } + if (bit & 0x1) { + conj = int128_and(conj, c->s128); + } else { + conj = int128_and(conj, int128_not(c->s128)); + } + disj = int128_or(disj, conj); + + /* Unset the least significant bit that is set */ + imm &= imm - 1; + } + + t->s128 = disj; +} + #define XXBLEND(name, sz) \ void glue(helper_XXBLENDV, name)(ppc_avr_t *t, ppc_avr_t *a, ppc_avr_t *b, \ ppc_avr_t *c, uint32_t desc) \ @@ -1752,21 +1855,6 @@ XXBLEND(W, 32) XXBLEND(D, 64) #undef XXBLEND -#define VEXT_SIGNED(name, element, cast) \ -void helper_##name(ppc_avr_t *r, ppc_avr_t *b) \ -{ \ - int i; \ - for (i = 0; i < ARRAY_SIZE(r->element); i++) { \ - r->element[i] = (cast)b->element[i]; \ - } \ -} -VEXT_SIGNED(vextsb2w, s32, int8_t) -VEXT_SIGNED(vextsb2d, s64, int8_t) -VEXT_SIGNED(vextsh2w, s32, int16_t) -VEXT_SIGNED(vextsh2d, s64, int16_t) -VEXT_SIGNED(vextsw2d, s64, int32_t) -#undef VEXT_SIGNED - #define VNEG(name, element) \ void helper_##name(ppc_avr_t *r, ppc_avr_t *b) \ { \ diff --git a/target/ppc/internal.h b/target/ppc/internal.h index 6aa9484f34a57..43c4cdb35906d 100644 --- a/target/ppc/internal.h +++ b/target/ppc/internal.h @@ -157,9 +157,6 @@ EXTRACT_HELPER(FPL, 25, 1); EXTRACT_HELPER(FPFLM, 17, 8); EXTRACT_HELPER(FPW, 16, 1); -/* mffscrni */ -EXTRACT_HELPER(RM, 11, 2); - /* addpcis */ EXTRACT_HELPER_SPLIT_3(DX, 10, 6, 6, 5, 16, 1, 1, 0, 0) #if defined(TARGET_PPC64) diff --git a/target/ppc/translate.c b/target/ppc/translate.c index 9d2adc0caee04..8959736fe5030 100644 --- a/target/ppc/translate.c +++ b/target/ppc/translate.c @@ -4813,11 +4813,11 @@ static inline void gen_op_mfspr(DisasContext *ctx) */ if (sprn & 0x10) { if (ctx->pr) { - gen_priv_exception(ctx, POWERPC_EXCP_INVAL_SPR); + gen_priv_exception(ctx, POWERPC_EXCP_PRIV_REG); } } else { if (ctx->pr || sprn == 0 || sprn == 4 || sprn == 5 || sprn == 6) { - gen_hvpriv_exception(ctx, POWERPC_EXCP_INVAL_SPR); + gen_hvpriv_exception(ctx, POWERPC_EXCP_PRIV_REG); } } } @@ -5000,11 +5000,11 @@ static void gen_mtspr(DisasContext *ctx) */ if (sprn & 0x10) { if (ctx->pr) { - gen_priv_exception(ctx, POWERPC_EXCP_INVAL_SPR); + gen_priv_exception(ctx, POWERPC_EXCP_PRIV_REG); } } else { if (ctx->pr || sprn == 0) { - gen_hvpriv_exception(ctx, POWERPC_EXCP_INVAL_SPR); + gen_hvpriv_exception(ctx, POWERPC_EXCP_PRIV_REG); } } } @@ -7424,10 +7424,29 @@ static int times_16(DisasContext *ctx, int x) #define TRANS(NAME, FUNC, ...) \ static bool trans_##NAME(DisasContext *ctx, arg_##NAME *a) \ { return FUNC(ctx, a, __VA_ARGS__); } +#define TRANS_FLAGS(FLAGS, NAME, FUNC, ...) \ + static bool trans_##NAME(DisasContext *ctx, arg_##NAME *a) \ + { \ + REQUIRE_INSNS_FLAGS(ctx, FLAGS); \ + return FUNC(ctx, a, __VA_ARGS__); \ + } +#define TRANS_FLAGS2(FLAGS2, NAME, FUNC, ...) \ + static bool trans_##NAME(DisasContext *ctx, arg_##NAME *a) \ + { \ + REQUIRE_INSNS_FLAGS2(ctx, FLAGS2); \ + return FUNC(ctx, a, __VA_ARGS__); \ + } #define TRANS64(NAME, FUNC, ...) \ static bool trans_##NAME(DisasContext *ctx, arg_##NAME *a) \ { REQUIRE_64BIT(ctx); return FUNC(ctx, a, __VA_ARGS__); } +#define TRANS64_FLAGS2(FLAGS2, NAME, FUNC, ...) \ + static bool trans_##NAME(DisasContext *ctx, arg_##NAME *a) \ + { \ + REQUIRE_64BIT(ctx); \ + REQUIRE_INSNS_FLAGS2(ctx, FLAGS2); \ + return FUNC(ctx, a, __VA_ARGS__); \ + } /* TODO: More TRANS* helpers for extra insn_flags checks. */ diff --git a/target/ppc/translate/fixedpoint-impl.c.inc b/target/ppc/translate/fixedpoint-impl.c.inc index 1aab32be03641..1be70e69eac9d 100644 --- a/target/ppc/translate/fixedpoint-impl.c.inc +++ b/target/ppc/translate/fixedpoint-impl.c.inc @@ -492,3 +492,39 @@ static bool trans_PEXTD(DisasContext *ctx, arg_X *a) #endif return true; } + +static bool do_hash(DisasContext *ctx, arg_X *a, bool priv, + void (*helper)(TCGv_ptr, TCGv, TCGv, TCGv)) +{ + TCGv ea; + + if (!(ctx->insns_flags2 & PPC2_ISA310)) { + /* if version is before v3.1, this operation is a nop */ + return true; + } + + if (unlikely(priv && ctx->pr)) { + /* if instruction is privileged but the context is in user space */ + gen_priv_exception(ctx, POWERPC_EXCP_PRIV_OPC); + return 0; + } + + ea = tcg_const_tl(0b1111111000000000); + + /* TODO: do this on insn32.decode with !function */ + tcg_gen_addi_tl(ea, ea, a->rt << 3); + tcg_gen_ext16s_tl(ea, ea); + tcg_gen_add_tl(ea, ea, cpu_gpr[a->ra]); + + helper(cpu_env, ea, cpu_gpr[a->ra], cpu_gpr[a->rb]); + + tcg_temp_free(ea); + + return true; + +} + +TRANS(HASHST, do_hash, false, gen_helper_HASHST) +TRANS(HASHCHK, do_hash, false, gen_helper_HASHCHK) +TRANS(HASHSTP, do_hash, true, gen_helper_HASHSTP) +TRANS(HASHCHKP, do_hash, true, gen_helper_HASHCHKP) diff --git a/target/ppc/translate/fp-impl.c.inc b/target/ppc/translate/fp-impl.c.inc index c96769742e143..da3ef6a2105d9 100644 --- a/target/ppc/translate/fp-impl.c.inc +++ b/target/ppc/translate/fp-impl.c.inc @@ -589,141 +589,128 @@ static void gen_mcrfs(DisasContext *ctx) tcg_temp_free_i64(tnew_fpscr); } -/* mffs */ -static void gen_mffs(DisasContext *ctx) +static void do_mffsc(int rt, TCGv_i64 t1, uint64_t set_mask, + uint64_t clear_mask, uint32_t fpscr_mask) { - TCGv_i64 t0; - if (unlikely(!ctx->fpu_enabled)) { - gen_exception(ctx, POWERPC_EXCP_FPU); - return; - } - t0 = tcg_temp_new_i64(); + TCGv_i64 fpscr; + + fpscr = tcg_temp_new_i64(); + gen_reset_fpstatus(); - tcg_gen_extu_tl_i64(t0, cpu_fpscr); - set_fpr(rD(ctx->opcode), t0); - if (unlikely(Rc(ctx->opcode))) { - gen_set_cr1_from_fpscr(ctx); - } - tcg_temp_free_i64(t0); + tcg_gen_extu_tl_i64(fpscr, cpu_fpscr); + tcg_gen_andi_i64(fpscr, fpscr, set_mask); + set_fpr(rt, fpscr); + + tcg_gen_andi_i64(fpscr, fpscr, clear_mask); + tcg_gen_or_i64(fpscr, fpscr, t1); + + gen_helper_store_fpscr(cpu_env, fpscr, tcg_constant_i32(fpscr_mask)); + + tcg_temp_free_i64(fpscr); } -/* mffsl */ -static void gen_mffsl(DisasContext *ctx) +static bool trans_MFFS(DisasContext *ctx, arg_X_t_rc *a) { - TCGv_i64 t0; + REQUIRE_FPU(ctx); - if (unlikely(!(ctx->insns_flags2 & PPC2_ISA300))) { - return gen_mffs(ctx); + do_mffsc(a->rt, tcg_constant_i64(0), 0xFFFFFFFFFFFFFFFFULL, 0, 0); + if (a->rc) { + gen_set_cr1_from_fpscr(ctx); } - if (unlikely(!ctx->fpu_enabled)) { - gen_exception(ctx, POWERPC_EXCP_FPU); - return; - } - t0 = tcg_temp_new_i64(); - gen_reset_fpstatus(); - tcg_gen_extu_tl_i64(t0, cpu_fpscr); - /* Mask everything except mode, status, and enables. */ - tcg_gen_andi_i64(t0, t0, FP_DRN | FP_STATUS | FP_ENABLES | FP_RN); - set_fpr(rD(ctx->opcode), t0); - tcg_temp_free_i64(t0); + return true; } -/* mffsce */ -static void gen_mffsce(DisasContext *ctx) +static bool trans_MFFSCE(DisasContext *ctx, arg_X_t *a) { - TCGv_i64 t0; - TCGv_i32 mask; + REQUIRE_INSNS_FLAGS2(ctx, ISA300); + REQUIRE_FPU(ctx); - if (unlikely(!(ctx->insns_flags2 & PPC2_ISA300))) { - return gen_mffs(ctx); - } + do_mffsc(a->rt, tcg_constant_i64(0), 0xFFFFFFFFFFFFFFFFULL, + ~FP_ENABLES, 0x0003); - if (unlikely(!ctx->fpu_enabled)) { - gen_exception(ctx, POWERPC_EXCP_FPU); - return; - } + return true; +} - t0 = tcg_temp_new_i64(); +static bool trans_MFFSCDRN(DisasContext *ctx, arg_X_tb *a) +{ + TCGv_i64 t1; - gen_reset_fpstatus(); - tcg_gen_extu_tl_i64(t0, cpu_fpscr); - set_fpr(rD(ctx->opcode), t0); + REQUIRE_INSNS_FLAGS2(ctx, ISA300); + REQUIRE_FPU(ctx); - /* Clear exception enable bits in the FPSCR. */ - tcg_gen_andi_i64(t0, t0, ~FP_ENABLES); - mask = tcg_const_i32(0x0003); - gen_helper_store_fpscr(cpu_env, t0, mask); + t1 = tcg_temp_new_i64(); + get_fpr(t1, a->rb); + tcg_gen_andi_i64(t1, t1, FP_DRN); - tcg_temp_free_i32(mask); - tcg_temp_free_i64(t0); + do_mffsc(a->rt, t1, FP_DRN | FP_ENABLES | FP_NI | FP_RN, ~FP_DRN, 0x0100); + + tcg_temp_free_i64(t1); + + return true; } -static void gen_helper_mffscrn(DisasContext *ctx, TCGv_i64 t1) +static bool trans_MFFSCDRNI(DisasContext *ctx, arg_X_imm3 *a) { - TCGv_i64 t0 = tcg_temp_new_i64(); - TCGv_i32 mask = tcg_const_i32(0x0001); + TCGv_i64 t1; - gen_reset_fpstatus(); - tcg_gen_extu_tl_i64(t0, cpu_fpscr); - tcg_gen_andi_i64(t0, t0, FP_DRN | FP_ENABLES | FP_RN); - set_fpr(rD(ctx->opcode), t0); + REQUIRE_INSNS_FLAGS2(ctx, ISA300); + REQUIRE_FPU(ctx); - /* Mask FPSCR value to clear RN. */ - tcg_gen_andi_i64(t0, t0, ~FP_RN); + t1 = tcg_temp_new_i64(); + tcg_gen_movi_i64(t1, (uint64_t)a->imm << FPSCR_DRN0); - /* Merge RN into FPSCR value. */ - tcg_gen_or_i64(t0, t0, t1); + do_mffsc(a->rt, t1, FP_DRN | FP_ENABLES | FP_NI | FP_RN, ~FP_DRN, 0x0100); - gen_helper_store_fpscr(cpu_env, t0, mask); + tcg_temp_free_i64(t1); - tcg_temp_free_i32(mask); - tcg_temp_free_i64(t0); + return true; } -/* mffscrn */ -static void gen_mffscrn(DisasContext *ctx) +static bool trans_MFFSCRN(DisasContext *ctx, arg_X_tb *a) { TCGv_i64 t1; - if (unlikely(!(ctx->insns_flags2 & PPC2_ISA300))) { - return gen_mffs(ctx); - } - - if (unlikely(!ctx->fpu_enabled)) { - gen_exception(ctx, POWERPC_EXCP_FPU); - return; - } + REQUIRE_INSNS_FLAGS2(ctx, ISA300); + REQUIRE_FPU(ctx); t1 = tcg_temp_new_i64(); - get_fpr(t1, rB(ctx->opcode)); - /* Mask FRB to get just RN. */ + get_fpr(t1, a->rb); tcg_gen_andi_i64(t1, t1, FP_RN); - gen_helper_mffscrn(ctx, t1); + do_mffsc(a->rt, t1, FP_DRN | FP_ENABLES | FP_NI | FP_RN, ~FP_RN, 0x0001); tcg_temp_free_i64(t1); + + return true; } -/* mffscrni */ -static void gen_mffscrni(DisasContext *ctx) +static bool trans_MFFSCRNI(DisasContext *ctx, arg_X_imm2 *a) { TCGv_i64 t1; - if (unlikely(!(ctx->insns_flags2 & PPC2_ISA300))) { - return gen_mffs(ctx); - } - - if (unlikely(!ctx->fpu_enabled)) { - gen_exception(ctx, POWERPC_EXCP_FPU); - return; - } + REQUIRE_INSNS_FLAGS2(ctx, ISA300); + REQUIRE_FPU(ctx); - t1 = tcg_const_i64((uint64_t)RM(ctx->opcode)); + t1 = tcg_temp_new_i64(); + tcg_gen_movi_i64(t1, a->imm); - gen_helper_mffscrn(ctx, t1); + do_mffsc(a->rt, t1, FP_DRN | FP_ENABLES | FP_NI | FP_RN, ~FP_RN, 0x0001); tcg_temp_free_i64(t1); + + return true; +} + +static bool trans_MFFSL(DisasContext *ctx, arg_X_t *a) +{ + REQUIRE_INSNS_FLAGS2(ctx, ISA300); + REQUIRE_FPU(ctx); + + do_mffsc(a->rt, tcg_constant_i64(0), + FP_DRN | FP_STATUS | FP_ENABLES | FP_NI | FP_RN, 0, 0); + + return true; } /* mtfsb0 */ diff --git a/target/ppc/translate/fp-ops.c.inc b/target/ppc/translate/fp-ops.c.inc index 4260635a126dd..81640553e1a83 100644 --- a/target/ppc/translate/fp-ops.c.inc +++ b/target/ppc/translate/fp-ops.c.inc @@ -75,15 +75,6 @@ GEN_HANDLER_E(fcpsgn, 0x3F, 0x08, 0x00, 0x00000000, PPC_NONE, PPC2_ISA205), GEN_HANDLER_E(fmrgew, 0x3F, 0x06, 0x1E, 0x00000001, PPC_NONE, PPC2_VSX207), GEN_HANDLER_E(fmrgow, 0x3F, 0x06, 0x1A, 0x00000001, PPC_NONE, PPC2_VSX207), GEN_HANDLER(mcrfs, 0x3F, 0x00, 0x02, 0x0063F801, PPC_FLOAT), -GEN_HANDLER_E_2(mffs, 0x3F, 0x07, 0x12, 0x00, 0x00000000, PPC_FLOAT, PPC_NONE), -GEN_HANDLER_E_2(mffsce, 0x3F, 0x07, 0x12, 0x01, 0x00000000, PPC_FLOAT, - PPC2_ISA300), -GEN_HANDLER_E_2(mffsl, 0x3F, 0x07, 0x12, 0x18, 0x00000000, PPC_FLOAT, - PPC2_ISA300), -GEN_HANDLER_E_2(mffscrn, 0x3F, 0x07, 0x12, 0x16, 0x00000000, PPC_FLOAT, - PPC_NONE), -GEN_HANDLER_E_2(mffscrni, 0x3F, 0x07, 0x12, 0x17, 0x00000000, PPC_FLOAT, - PPC_NONE), GEN_HANDLER(mtfsb0, 0x3F, 0x06, 0x02, 0x001FF800, PPC_FLOAT), GEN_HANDLER(mtfsb1, 0x3F, 0x06, 0x01, 0x001FF800, PPC_FLOAT), GEN_HANDLER(mtfsf, 0x3F, 0x07, 0x16, 0x00000000, PPC_FLOAT), diff --git a/target/ppc/translate/vmx-impl.c.inc b/target/ppc/translate/vmx-impl.c.inc index d5e02fd7f22e1..fbb85f75644c8 100644 --- a/target/ppc/translate/vmx-impl.c.inc +++ b/target/ppc/translate/vmx-impl.c.inc @@ -798,30 +798,7 @@ static void trans_vclzd(DisasContext *ctx) tcg_temp_free_i64(avr); } -GEN_VXFORM(vmuloub, 4, 0); -GEN_VXFORM(vmulouh, 4, 1); -GEN_VXFORM(vmulouw, 4, 2); GEN_VXFORM_V(vmuluwm, MO_32, tcg_gen_gvec_mul, 4, 2); -GEN_VXFORM_DUAL(vmulouw, PPC_ALTIVEC, PPC_NONE, - vmuluwm, PPC_NONE, PPC2_ALTIVEC_207) -GEN_VXFORM(vmulosb, 4, 4); -GEN_VXFORM(vmulosh, 4, 5); -GEN_VXFORM(vmulosw, 4, 6); -GEN_VXFORM_V(vmulld, MO_64, tcg_gen_gvec_mul, 4, 7); -GEN_VXFORM(vmuleub, 4, 8); -GEN_VXFORM(vmuleuh, 4, 9); -GEN_VXFORM(vmuleuw, 4, 10); -GEN_VXFORM(vmulhuw, 4, 10); -GEN_VXFORM(vmulhud, 4, 11); -GEN_VXFORM_DUAL(vmuleuw, PPC_ALTIVEC, PPC_NONE, - vmulhuw, PPC_NONE, PPC2_ISA310); -GEN_VXFORM(vmulesb, 4, 12); -GEN_VXFORM(vmulesh, 4, 13); -GEN_VXFORM(vmulesw, 4, 14); -GEN_VXFORM(vmulhsw, 4, 14); -GEN_VXFORM_DUAL(vmulesw, PPC_ALTIVEC, PPC_NONE, - vmulhsw, PPC_NONE, PPC2_ISA310); -GEN_VXFORM(vmulhsd, 4, 15); GEN_VXFORM_V(vslb, MO_8, tcg_gen_gvec_shlv, 2, 4); GEN_VXFORM_V(vslh, MO_16, tcg_gen_gvec_shlv, 2, 5); GEN_VXFORM_V(vslw, MO_32, tcg_gen_gvec_shlv, 2, 6); @@ -1008,41 +985,274 @@ static void glue(gen_, name0##_##name1)(DisasContext *ctx) \ } \ } -GEN_VXRFORM(vcmpequb, 3, 0) -GEN_VXRFORM(vcmpequh, 3, 1) -GEN_VXRFORM(vcmpequw, 3, 2) -GEN_VXRFORM(vcmpequd, 3, 3) -GEN_VXRFORM(vcmpnezb, 3, 4) -GEN_VXRFORM(vcmpnezh, 3, 5) -GEN_VXRFORM(vcmpnezw, 3, 6) -GEN_VXRFORM(vcmpgtsb, 3, 12) -GEN_VXRFORM(vcmpgtsh, 3, 13) -GEN_VXRFORM(vcmpgtsw, 3, 14) -GEN_VXRFORM(vcmpgtsd, 3, 15) -GEN_VXRFORM(vcmpgtub, 3, 8) -GEN_VXRFORM(vcmpgtuh, 3, 9) -GEN_VXRFORM(vcmpgtuw, 3, 10) -GEN_VXRFORM(vcmpgtud, 3, 11) +static void do_vcmp_rc(int vrt) +{ + TCGv_i64 t0, t1; + + t0 = tcg_temp_new_i64(); + t1 = tcg_temp_new_i64(); + + get_avr64(t0, vrt, true); + tcg_gen_ctpop_i64(t1, t0); + get_avr64(t0, vrt, false); + tcg_gen_ctpop_i64(t0, t0); + tcg_gen_add_i64(t1, t0, t1); + + tcg_gen_setcondi_i64(TCG_COND_EQ, t0, t1, 0); + tcg_gen_shli_i64(t0, t0, 1); + + tcg_gen_setcondi_i64(TCG_COND_EQ, t1, t1, 128); + tcg_gen_shli_i64(t1, t1, 3); + + tcg_gen_or_i64(t0, t0, t1); + tcg_gen_extrl_i64_i32(cpu_crf[6], t0); + + tcg_temp_free_i64(t0); + tcg_temp_free_i64(t1); +} + +static bool do_vcmp(DisasContext *ctx, arg_VC *a, TCGCond cond, int vece) +{ + REQUIRE_VECTOR(ctx); + + tcg_gen_gvec_cmp(cond, vece, avr_full_offset(a->vrt), + avr_full_offset(a->vra), avr_full_offset(a->vrb), 16, 16); + tcg_gen_gvec_shli(vece, avr_full_offset(a->vrt), avr_full_offset(a->vrt), + (8 << vece) - 1, 16, 16); + tcg_gen_gvec_sari(vece, avr_full_offset(a->vrt), avr_full_offset(a->vrt), + (8 << vece) - 1, 16, 16); + + if (a->rc) { + do_vcmp_rc(a->vrt); + } + + return true; +} + +TRANS_FLAGS(ALTIVEC, VCMPEQUB, do_vcmp, TCG_COND_EQ, MO_8) +TRANS_FLAGS(ALTIVEC, VCMPEQUH, do_vcmp, TCG_COND_EQ, MO_16) +TRANS_FLAGS(ALTIVEC, VCMPEQUW, do_vcmp, TCG_COND_EQ, MO_32) +TRANS_FLAGS2(ALTIVEC_207, VCMPEQUD, do_vcmp, TCG_COND_EQ, MO_64) + +TRANS_FLAGS(ALTIVEC, VCMPGTSB, do_vcmp, TCG_COND_GT, MO_8) +TRANS_FLAGS(ALTIVEC, VCMPGTSH, do_vcmp, TCG_COND_GT, MO_16) +TRANS_FLAGS(ALTIVEC, VCMPGTSW, do_vcmp, TCG_COND_GT, MO_32) +TRANS_FLAGS2(ALTIVEC_207, VCMPGTSD, do_vcmp, TCG_COND_GT, MO_64) +TRANS_FLAGS(ALTIVEC, VCMPGTUB, do_vcmp, TCG_COND_GTU, MO_8) +TRANS_FLAGS(ALTIVEC, VCMPGTUH, do_vcmp, TCG_COND_GTU, MO_16) +TRANS_FLAGS(ALTIVEC, VCMPGTUW, do_vcmp, TCG_COND_GTU, MO_32) +TRANS_FLAGS2(ALTIVEC_207, VCMPGTUD, do_vcmp, TCG_COND_GTU, MO_64) + +TRANS_FLAGS2(ISA300, VCMPNEB, do_vcmp, TCG_COND_NE, MO_8) +TRANS_FLAGS2(ISA300, VCMPNEH, do_vcmp, TCG_COND_NE, MO_16) +TRANS_FLAGS2(ISA300, VCMPNEW, do_vcmp, TCG_COND_NE, MO_32) + +static void gen_vcmpnez_vec(unsigned vece, TCGv_vec t, TCGv_vec a, TCGv_vec b) +{ + TCGv_vec t0, t1, zero; + + t0 = tcg_temp_new_vec_matching(t); + t1 = tcg_temp_new_vec_matching(t); + zero = tcg_constant_vec_matching(t, vece, 0); + + tcg_gen_cmp_vec(TCG_COND_EQ, vece, t0, a, zero); + tcg_gen_cmp_vec(TCG_COND_EQ, vece, t1, b, zero); + tcg_gen_cmp_vec(TCG_COND_NE, vece, t, a, b); + + tcg_gen_or_vec(vece, t, t, t0); + tcg_gen_or_vec(vece, t, t, t1); + + tcg_gen_shli_vec(vece, t, t, (8 << vece) - 1); + tcg_gen_sari_vec(vece, t, t, (8 << vece) - 1); + + tcg_temp_free_vec(t0); + tcg_temp_free_vec(t1); +} + +static bool do_vcmpnez(DisasContext *ctx, arg_VC *a, int vece) +{ + static const TCGOpcode vecop_list[] = { + INDEX_op_cmp_vec, INDEX_op_shli_vec, INDEX_op_sari_vec, 0 + }; + static const GVecGen3 ops[3] = { + { + .fniv = gen_vcmpnez_vec, + .fno = gen_helper_VCMPNEZB, + .opt_opc = vecop_list, + .vece = MO_8 + }, + { + .fniv = gen_vcmpnez_vec, + .fno = gen_helper_VCMPNEZH, + .opt_opc = vecop_list, + .vece = MO_16 + }, + { + .fniv = gen_vcmpnez_vec, + .fno = gen_helper_VCMPNEZW, + .opt_opc = vecop_list, + .vece = MO_32 + } + }; + + REQUIRE_INSNS_FLAGS2(ctx, ISA300); + REQUIRE_VECTOR(ctx); + + tcg_gen_gvec_3(avr_full_offset(a->vrt), avr_full_offset(a->vra), + avr_full_offset(a->vrb), 16, 16, &ops[vece]); + + if (a->rc) { + do_vcmp_rc(a->vrt); + } + + return true; +} + +TRANS(VCMPNEZB, do_vcmpnez, MO_8) +TRANS(VCMPNEZH, do_vcmpnez, MO_16) +TRANS(VCMPNEZW, do_vcmpnez, MO_32) + +static bool trans_VCMPEQUQ(DisasContext *ctx, arg_VC *a) +{ + TCGv_i64 t0, t1; + TCGLabel *l1, *l2; + + REQUIRE_INSNS_FLAGS2(ctx, ISA310); + REQUIRE_VECTOR(ctx); + + t0 = tcg_temp_new_i64(); + t1 = tcg_temp_new_i64(); + l1 = gen_new_label(); + l2 = gen_new_label(); + + get_avr64(t0, a->vra, true); + get_avr64(t1, a->vrb, true); + tcg_gen_brcond_i64(TCG_COND_NE, t0, t1, l1); + + get_avr64(t0, a->vra, false); + get_avr64(t1, a->vrb, false); + tcg_gen_brcond_i64(TCG_COND_NE, t0, t1, l1); + + set_avr64(a->vrt, tcg_constant_i64(-1), true); + set_avr64(a->vrt, tcg_constant_i64(-1), false); + if (a->rc) { + tcg_gen_movi_i32(cpu_crf[6], 1 << 3); + } + tcg_gen_br(l2); + + gen_set_label(l1); + set_avr64(a->vrt, tcg_constant_i64(0), true); + set_avr64(a->vrt, tcg_constant_i64(0), false); + if (a->rc) { + tcg_gen_movi_i32(cpu_crf[6], 1 << 1); + } + + gen_set_label(l2); + + tcg_temp_free_i64(t0); + tcg_temp_free_i64(t1); + + return true; +} + +static bool do_vcmpgtq(DisasContext *ctx, arg_VC *a, bool sign) +{ + TCGv_i64 t0, t1; + TCGLabel *l1, *l2, *l3; + + REQUIRE_INSNS_FLAGS2(ctx, ISA310); + REQUIRE_VECTOR(ctx); + + t0 = tcg_temp_local_new_i64(); + t1 = tcg_temp_local_new_i64(); + l1 = gen_new_label(); + l2 = gen_new_label(); + l3 = gen_new_label(); + + get_avr64(t0, a->vra, true); + get_avr64(t1, a->vrb, true); + tcg_gen_brcond_i64(sign ? TCG_COND_GT : TCG_COND_GTU, t0, t1, l1); + tcg_gen_brcond_i64(sign ? TCG_COND_LT : TCG_COND_LTU, t0, t1, l2); + + get_avr64(t0, a->vra, false); + get_avr64(t1, a->vrb, false); + tcg_gen_brcond_i64(TCG_COND_GTU, t0, t1, l1); + tcg_gen_br(l2); + + gen_set_label(l1); + set_avr64(a->vrt, tcg_constant_i64(-1), true); + set_avr64(a->vrt, tcg_constant_i64(-1), false); + if (a->rc) { + tcg_gen_movi_i32(cpu_crf[6], 1 << 3); + } + tcg_gen_br(l3); + + gen_set_label(l2); + set_avr64(a->vrt, tcg_constant_i64(0), true); + set_avr64(a->vrt, tcg_constant_i64(0), false); + if (a->rc) { + tcg_gen_movi_i32(cpu_crf[6], 1 << 1); + } + + gen_set_label(l3); + tcg_temp_free_i64(t0); + tcg_temp_free_i64(t1); + + return true; +} + +TRANS(VCMPGTSQ, do_vcmpgtq, true) +TRANS(VCMPGTUQ, do_vcmpgtq, false) + +static bool do_vcmpq(DisasContext *ctx, arg_VX_bf *a, bool sign) +{ + TCGv_i64 vra, vrb; + TCGLabel *gt, *lt, *done; + + REQUIRE_INSNS_FLAGS2(ctx, ISA310); + REQUIRE_VECTOR(ctx); + + vra = tcg_temp_local_new_i64(); + vrb = tcg_temp_local_new_i64(); + gt = gen_new_label(); + lt = gen_new_label(); + done = gen_new_label(); + + get_avr64(vra, a->vra, true); + get_avr64(vrb, a->vrb, true); + tcg_gen_brcond_i64((sign ? TCG_COND_GT : TCG_COND_GTU), vra, vrb, gt); + tcg_gen_brcond_i64((sign ? TCG_COND_LT : TCG_COND_LTU), vra, vrb, lt); + + get_avr64(vra, a->vra, false); + get_avr64(vrb, a->vrb, false); + tcg_gen_brcond_i64(TCG_COND_GTU, vra, vrb, gt); + tcg_gen_brcond_i64(TCG_COND_LTU, vra, vrb, lt); + + tcg_gen_movi_i32(cpu_crf[a->bf], CRF_EQ); + tcg_gen_br(done); + + gen_set_label(gt); + tcg_gen_movi_i32(cpu_crf[a->bf], CRF_GT); + tcg_gen_br(done); + + gen_set_label(lt); + tcg_gen_movi_i32(cpu_crf[a->bf], CRF_LT); + tcg_gen_br(done); + + gen_set_label(done); + tcg_temp_free_i64(vra); + tcg_temp_free_i64(vrb); + + return true; +} + +TRANS(VCMPSQ, do_vcmpq, true) +TRANS(VCMPUQ, do_vcmpq, false) + GEN_VXRFORM(vcmpeqfp, 3, 3) GEN_VXRFORM(vcmpgefp, 3, 7) GEN_VXRFORM(vcmpgtfp, 3, 11) GEN_VXRFORM(vcmpbfp, 3, 15) -GEN_VXRFORM(vcmpneb, 3, 0) -GEN_VXRFORM(vcmpneh, 3, 1) -GEN_VXRFORM(vcmpnew, 3, 2) - -GEN_VXRFORM_DUAL(vcmpequb, PPC_ALTIVEC, PPC_NONE, \ - vcmpneb, PPC_NONE, PPC2_ISA300) -GEN_VXRFORM_DUAL(vcmpequh, PPC_ALTIVEC, PPC_NONE, \ - vcmpneh, PPC_NONE, PPC2_ISA300) -GEN_VXRFORM_DUAL(vcmpequw, PPC_ALTIVEC, PPC_NONE, \ - vcmpnew, PPC_NONE, PPC2_ISA300) -GEN_VXRFORM_DUAL(vcmpeqfp, PPC_ALTIVEC, PPC_NONE, \ - vcmpequd, PPC_NONE, PPC2_ALTIVEC_207) -GEN_VXRFORM_DUAL(vcmpbfp, PPC_ALTIVEC, PPC_NONE, \ - vcmpgtsd, PPC_NONE, PPC2_ALTIVEC_207) -GEN_VXRFORM_DUAL(vcmpgtfp, PPC_ALTIVEC, PPC_NONE, \ - vcmpgtud, PPC_NONE, PPC2_ALTIVEC_207) static void gen_vsplti(DisasContext *ctx, int vece) { @@ -1228,6 +1438,50 @@ GEN_VXFORM_DUAL(vsplth, PPC_ALTIVEC, PPC_NONE, GEN_VXFORM_DUAL(vspltw, PPC_ALTIVEC, PPC_NONE, vextractuw, PPC_NONE, PPC2_ISA300); +static bool trans_VGNB(DisasContext *ctx, arg_VX_n *a) +{ + TCGv_i64 vrb, tmp, rt; + int in = 63, out = 63; + + REQUIRE_INSNS_FLAGS2(ctx, ISA310); + REQUIRE_VECTOR(ctx); + + if (a->n < 2) { + /* + * "N can be any value between 2 and 7, inclusive." Otherwise, the + * result is undefined, so we don't need to change RT. Also, N > 7 is + * impossible since the immediate field is 3 bits only. + */ + return true; + } + + vrb = tcg_temp_new_i64(); + tmp = tcg_temp_new_i64(); + rt = tcg_const_i64(0); + + for (int dw = 1; dw >= 0; dw--) { + get_avr64(vrb, a->vrb, dw); + for (; in >= 0; in -= a->n, out--) { + if (in > out) { + tcg_gen_shri_i64(tmp, vrb, in - out); + } else { + tcg_gen_shli_i64(tmp, vrb, out - in); + } + tcg_gen_andi_i64(tmp, tmp, 1ULL << out); + tcg_gen_or_i64(rt, rt, tmp); + } + in += 64; + } + + tcg_gen_trunc_i64_tl(cpu_gpr[a->rt], rt); + + tcg_temp_free_i64(vrb); + tcg_temp_free_i64(tmp); + tcg_temp_free_i64(rt); + + return true; +} + static bool do_vextdx(DisasContext *ctx, arg_VA *a, int size, bool right, void (*gen_helper)(TCGv_ptr, TCGv_ptr, TCGv_ptr, TCGv_ptr, TCGv)) { @@ -1722,6 +1976,143 @@ static bool trans_MTVSRBMI(DisasContext *ctx, arg_DX_b *a) return true; } +static bool do_vcntmb(DisasContext *ctx, arg_VX_mp *a, int vece) +{ + TCGv_i64 rt, vrb, mask; + rt = tcg_const_i64(0); + vrb = tcg_temp_new_i64(); + mask = tcg_constant_i64(dup_const(vece, 1ULL << ((8 << vece) - 1))); + + for (int i = 0; i < 2; i++) { + get_avr64(vrb, a->vrb, i); + if (a->mp) { + tcg_gen_and_i64(vrb, mask, vrb); + } else { + tcg_gen_andc_i64(vrb, mask, vrb); + } + tcg_gen_ctpop_i64(vrb, vrb); + tcg_gen_add_i64(rt, rt, vrb); + } + + tcg_gen_shli_i64(rt, rt, TARGET_LONG_BITS - 8 + vece); + tcg_gen_trunc_i64_tl(cpu_gpr[a->rt], rt); + + tcg_temp_free_i64(vrb); + tcg_temp_free_i64(rt); + + return true; +} + +TRANS(VCNTMBB, do_vcntmb, MO_8) +TRANS(VCNTMBH, do_vcntmb, MO_16) +TRANS(VCNTMBW, do_vcntmb, MO_32) +TRANS(VCNTMBD, do_vcntmb, MO_64) + +static bool do_vstri(DisasContext *ctx, arg_VX_tb_rc *a, + void (*gen_helper)(TCGv_ptr, TCGv_ptr, TCGv_ptr, TCGv)) +{ + TCGv_ptr vrt, vrb; + + REQUIRE_INSNS_FLAGS2(ctx, ISA310); + REQUIRE_VECTOR(ctx); + + vrt = gen_avr_ptr(a->vrt); + vrb = gen_avr_ptr(a->vrb); + + gen_helper(cpu_env, vrt, vrb, tcg_constant_tl(a->rc)); + + tcg_temp_free_ptr(vrt); + tcg_temp_free_ptr(vrb); + + return true; +} + +TRANS(VSTRIBL, do_vstri, gen_helper_VSTRIBL) +TRANS(VSTRIBR, do_vstri, gen_helper_VSTRIBR) +TRANS(VSTRIHL, do_vstri, gen_helper_VSTRIHL) +TRANS(VSTRIHR, do_vstri, gen_helper_VSTRIHR) + +static bool do_vclrb(DisasContext *ctx, arg_VX *a, bool right) +{ + TCGv_i64 hi, lo, rb; + TCGLabel *l, *end; + + REQUIRE_INSNS_FLAGS2(ctx, ISA310); + REQUIRE_VECTOR(ctx); + + l = gen_new_label(); + end = gen_new_label(); + + hi = tcg_const_local_i64(0); + lo = tcg_const_local_i64(0); + rb = tcg_temp_local_new_i64(); + + tcg_gen_extu_tl_i64(rb, cpu_gpr[a->vrb]); + + /* RB == 0: all zeros */ + tcg_gen_brcondi_i64(TCG_COND_EQ, rb, 0, end); + + if (right) { + get_avr64(hi, a->vra, true); + } else { + get_avr64(lo, a->vra, false); + } + + /* RB <= 8 */ + tcg_gen_brcondi_i64(TCG_COND_LEU, rb, 8, l); + + if (right) { + get_avr64(lo, a->vra, false); + } else { + get_avr64(hi, a->vra, true); + } + + /* RB >= 16: just copy VRA to VRB */ + tcg_gen_brcondi_i64(TCG_COND_GEU, rb, 16, end); + + /* 8 < RB < 16: */ + tcg_gen_subfi_i64(rb, 16, rb); + tcg_gen_shli_i64(rb, rb, 3); + if (right) { + /* copy hi and partially clear lo */ + tcg_gen_shr_i64(lo, lo, rb); + tcg_gen_shl_i64(lo, lo, rb); + } else { + /* copy lo and partially clear hi */ + tcg_gen_shl_i64(hi, hi, rb); + tcg_gen_shr_i64(hi, hi, rb); + } + tcg_gen_br(end); + + /* 0 < RB <= 8: */ + gen_set_label(l); + tcg_gen_subfi_i64(rb, 8, rb); + tcg_gen_shli_i64(rb, rb, 3); + if (right) { + /* zeroes lo and partially clears hi */ + tcg_gen_shr_i64(hi, hi, rb); + tcg_gen_shl_i64(hi, hi, rb); + } else { + /* zeroes hi and partially clears lo */ + tcg_gen_shl_i64(lo, lo, rb); + tcg_gen_shr_i64(lo, lo, rb); + } + + /* Update VRT */ + gen_set_label(end); + set_avr64(a->vrt, hi, true); + set_avr64(a->vrt, lo, false); + + tcg_temp_free_i64(hi); + tcg_temp_free_i64(lo); + tcg_temp_free_i64(rb); + + return true; +} + +TRANS(VCLRLB, do_vclrb, false) +TRANS(VCLRRB, do_vclrb, true) + #define GEN_VAFORM_PAIRED(name0, name1, opc2) \ static void glue(gen_, name0##_##name1)(DisasContext *ctx) \ { \ @@ -1765,28 +2156,65 @@ static void gen_vmladduhm(DisasContext *ctx) tcg_temp_free_ptr(rd); } -static void gen_vpermr(DisasContext *ctx) +static bool trans_VPERM(DisasContext *ctx, arg_VA *a) { - TCGv_ptr ra, rb, rc, rd; - if (unlikely(!ctx->altivec_enabled)) { - gen_exception(ctx, POWERPC_EXCP_VPU); - return; - } - ra = gen_avr_ptr(rA(ctx->opcode)); - rb = gen_avr_ptr(rB(ctx->opcode)); - rc = gen_avr_ptr(rC(ctx->opcode)); - rd = gen_avr_ptr(rD(ctx->opcode)); - gen_helper_vpermr(cpu_env, rd, ra, rb, rc); - tcg_temp_free_ptr(ra); - tcg_temp_free_ptr(rb); - tcg_temp_free_ptr(rc); - tcg_temp_free_ptr(rd); + TCGv_ptr vrt, vra, vrb, vrc; + + REQUIRE_INSNS_FLAGS(ctx, ALTIVEC); + REQUIRE_VECTOR(ctx); + + vrt = gen_avr_ptr(a->vrt); + vra = gen_avr_ptr(a->vra); + vrb = gen_avr_ptr(a->vrb); + vrc = gen_avr_ptr(a->rc); + + gen_helper_VPERM(vrt, vra, vrb, vrc); + + tcg_temp_free_ptr(vrt); + tcg_temp_free_ptr(vra); + tcg_temp_free_ptr(vrb); + tcg_temp_free_ptr(vrc); + + return true; +} + +static bool trans_VPERMR(DisasContext *ctx, arg_VA *a) +{ + TCGv_ptr vrt, vra, vrb, vrc; + + REQUIRE_INSNS_FLAGS2(ctx, ISA300); + REQUIRE_VECTOR(ctx); + + vrt = gen_avr_ptr(a->vrt); + vra = gen_avr_ptr(a->vra); + vrb = gen_avr_ptr(a->vrb); + vrc = gen_avr_ptr(a->rc); + + gen_helper_VPERMR(vrt, vra, vrb, vrc); + + tcg_temp_free_ptr(vrt); + tcg_temp_free_ptr(vra); + tcg_temp_free_ptr(vrb); + tcg_temp_free_ptr(vrc); + + return true; +} + +static bool trans_VSEL(DisasContext *ctx, arg_VA *a) +{ + REQUIRE_INSNS_FLAGS(ctx, ALTIVEC); + REQUIRE_VECTOR(ctx); + + tcg_gen_gvec_bitsel(MO_64, avr_full_offset(a->vrt), avr_full_offset(a->rc), + avr_full_offset(a->vrb), avr_full_offset(a->vra), + 16, 16); + + return true; } GEN_VAFORM_PAIRED(vmsumubm, vmsummbm, 18) GEN_VAFORM_PAIRED(vmsumuhm, vmsumuhs, 19) GEN_VAFORM_PAIRED(vmsumshm, vmsumshs, 20) -GEN_VAFORM_PAIRED(vsel, vperm, 21) GEN_VAFORM_PAIRED(vmaddfp, vnmsubfp, 23) GEN_VXFORM_NOA(vclzb, 1, 28) @@ -1795,11 +2223,44 @@ GEN_VXFORM_TRANS(vclzw, 1, 30) GEN_VXFORM_TRANS(vclzd, 1, 31) GEN_VXFORM_NOA_2(vnegw, 1, 24, 6) GEN_VXFORM_NOA_2(vnegd, 1, 24, 7) -GEN_VXFORM_NOA_2(vextsb2w, 1, 24, 16) -GEN_VXFORM_NOA_2(vextsh2w, 1, 24, 17) -GEN_VXFORM_NOA_2(vextsb2d, 1, 24, 24) -GEN_VXFORM_NOA_2(vextsh2d, 1, 24, 25) -GEN_VXFORM_NOA_2(vextsw2d, 1, 24, 26) + +static bool do_vexts(DisasContext *ctx, arg_VX_tb *a, int vece, int s) +{ + REQUIRE_INSNS_FLAGS2(ctx, ISA300); + REQUIRE_VECTOR(ctx); + + tcg_gen_gvec_shli(vece, avr_full_offset(a->vrt), avr_full_offset(a->vrb), + s, 16, 16); + tcg_gen_gvec_sari(vece, avr_full_offset(a->vrt), avr_full_offset(a->vrt), + s, 16, 16); + + return true; +} + +TRANS(VEXTSB2W, do_vexts, MO_32, 24); +TRANS(VEXTSH2W, do_vexts, MO_32, 16); +TRANS(VEXTSB2D, do_vexts, MO_64, 56); +TRANS(VEXTSH2D, do_vexts, MO_64, 48); +TRANS(VEXTSW2D, do_vexts, MO_64, 32); + +static bool trans_VEXTSD2Q(DisasContext *ctx, arg_VX_tb *a) +{ + TCGv_i64 tmp; + + REQUIRE_INSNS_FLAGS2(ctx, ISA310); + REQUIRE_VECTOR(ctx); + + tmp = tcg_temp_new_i64(); + + get_avr64(tmp, a->vrb, false); + set_avr64(a->vrt, tmp, false); + tcg_gen_sari_i64(tmp, tmp, 63); + set_avr64(a->vrt, tmp, true); + + tcg_temp_free_i64(tmp); + return true; +} + GEN_VXFORM_NOA_2(vctzb, 1, 24, 28) GEN_VXFORM_NOA_2(vctzh, 1, 24, 29) GEN_VXFORM_NOA_2(vctzw, 1, 24, 30) @@ -2104,6 +2565,289 @@ static bool trans_VPEXTD(DisasContext *ctx, arg_VX *a) return true; } +static bool trans_VMSUMUDM(DisasContext *ctx, arg_VA *a) +{ + TCGv_i64 rl, rh, src1, src2; + int dw; + + REQUIRE_INSNS_FLAGS2(ctx, ISA300); + REQUIRE_VECTOR(ctx); + + rh = tcg_temp_new_i64(); + rl = tcg_temp_new_i64(); + src1 = tcg_temp_new_i64(); + src2 = tcg_temp_new_i64(); + + get_avr64(rl, a->rc, false); + get_avr64(rh, a->rc, true); + + for (dw = 0; dw < 2; dw++) { + get_avr64(src1, a->vra, dw); + get_avr64(src2, a->vrb, dw); + tcg_gen_mulu2_i64(src1, src2, src1, src2); + tcg_gen_add2_i64(rl, rh, rl, rh, src1, src2); + } + + set_avr64(a->vrt, rl, false); + set_avr64(a->vrt, rh, true); + + tcg_temp_free_i64(rl); + tcg_temp_free_i64(rh); + tcg_temp_free_i64(src1); + tcg_temp_free_i64(src2); + + return true; +} + +static bool trans_VMSUMCUD(DisasContext *ctx, arg_VA *a) +{ + TCGv_i64 tmp0, tmp1, prod1h, prod1l, prod0h, prod0l, zero; + + REQUIRE_INSNS_FLAGS2(ctx, ISA310); + REQUIRE_VECTOR(ctx); + + tmp0 = tcg_temp_new_i64(); + tmp1 = tcg_temp_new_i64(); + prod1h = tcg_temp_new_i64(); + prod1l = tcg_temp_new_i64(); + prod0h = tcg_temp_new_i64(); + prod0l = tcg_temp_new_i64(); + zero = tcg_constant_i64(0); + + /* prod1 = vsr[vra+32].dw[1] * vsr[vrb+32].dw[1] */ + get_avr64(tmp0, a->vra, false); + get_avr64(tmp1, a->vrb, false); + tcg_gen_mulu2_i64(prod1l, prod1h, tmp0, tmp1); + + /* prod0 = vsr[vra+32].dw[0] * vsr[vrb+32].dw[0] */ + get_avr64(tmp0, a->vra, true); + get_avr64(tmp1, a->vrb, true); + tcg_gen_mulu2_i64(prod0l, prod0h, tmp0, tmp1); + + /* Sum lower 64-bits elements */ + get_avr64(tmp1, a->rc, false); + tcg_gen_add2_i64(tmp1, tmp0, tmp1, zero, prod1l, zero); + tcg_gen_add2_i64(tmp1, tmp0, tmp1, tmp0, prod0l, zero); + + /* + * Discard lower 64-bits, leaving the carry into bit 64. + * Then sum the higher 64-bit elements. + */ + tcg_gen_mov_i64(tmp1, tmp0); + get_avr64(tmp0, a->rc, true); + tcg_gen_add2_i64(tmp1, tmp0, tmp0, zero, prod1h, zero); + tcg_gen_add2_i64(tmp1, tmp0, tmp1, tmp0, prod0h, zero); + + /* Discard 64 more bits to complete the CHOP128(temp >> 128) */ + set_avr64(a->vrt, tmp0, false); + set_avr64(a->vrt, zero, true); + + tcg_temp_free_i64(tmp0); + tcg_temp_free_i64(tmp1); + tcg_temp_free_i64(prod1h); + tcg_temp_free_i64(prod1l); + tcg_temp_free_i64(prod0h); + tcg_temp_free_i64(prod0l); + + return true; +} + +static bool do_vx_helper(DisasContext *ctx, arg_VX *a, + void (*gen_helper) (TCGv_ptr, TCGv_ptr, TCGv_ptr)) +{ + TCGv_ptr ra, rb, rd; + REQUIRE_VECTOR(ctx); + + ra = gen_avr_ptr(a->vra); + rb = gen_avr_ptr(a->vrb); + rd = gen_avr_ptr(a->vrt); + gen_helper(rd, ra, rb); + tcg_temp_free_ptr(ra); + tcg_temp_free_ptr(rb); + tcg_temp_free_ptr(rd); + + return true; +} + +static bool trans_VMULLD(DisasContext *ctx, arg_VX *a) +{ + REQUIRE_INSNS_FLAGS2(ctx, ISA310); + REQUIRE_VECTOR(ctx); + + tcg_gen_gvec_mul(MO_64, avr_full_offset(a->vrt), avr_full_offset(a->vra), + avr_full_offset(a->vrb), 16, 16); + + return true; +} + +TRANS_FLAGS2(ALTIVEC_207, VMULESB, do_vx_helper, gen_helper_VMULESB) +TRANS_FLAGS2(ALTIVEC_207, VMULOSB, do_vx_helper, gen_helper_VMULOSB) +TRANS_FLAGS2(ALTIVEC_207, VMULEUB, do_vx_helper, gen_helper_VMULEUB) +TRANS_FLAGS2(ALTIVEC_207, VMULOUB, do_vx_helper, gen_helper_VMULOUB) +TRANS_FLAGS2(ALTIVEC_207, VMULESH, do_vx_helper, gen_helper_VMULESH) +TRANS_FLAGS2(ALTIVEC_207, VMULOSH, do_vx_helper, gen_helper_VMULOSH) +TRANS_FLAGS2(ALTIVEC_207, VMULEUH, do_vx_helper, gen_helper_VMULEUH) +TRANS_FLAGS2(ALTIVEC_207, VMULOUH, do_vx_helper, gen_helper_VMULOUH) +TRANS_FLAGS2(ALTIVEC_207, VMULESW, do_vx_helper, gen_helper_VMULESW) +TRANS_FLAGS2(ALTIVEC_207, VMULOSW, do_vx_helper, gen_helper_VMULOSW) +TRANS_FLAGS2(ALTIVEC_207, VMULEUW, do_vx_helper, gen_helper_VMULEUW) +TRANS_FLAGS2(ALTIVEC_207, VMULOUW, do_vx_helper, gen_helper_VMULOUW) +TRANS_FLAGS2(ISA310, VMULESD, do_vx_helper, gen_helper_VMULESD) +TRANS_FLAGS2(ISA310, VMULOSD, do_vx_helper, gen_helper_VMULOSD) +TRANS_FLAGS2(ISA310, VMULEUD, do_vx_helper, gen_helper_VMULEUD) +TRANS_FLAGS2(ISA310, VMULOUD, do_vx_helper, gen_helper_VMULOUD) + +static void do_vx_vmulhu_vec(unsigned vece, TCGv_vec t, TCGv_vec a, TCGv_vec b) +{ + TCGv_vec a1, b1, mask, w, k; + unsigned bits; + bits = (vece == MO_32) ? 16 : 32; + + a1 = tcg_temp_new_vec_matching(t); + b1 = tcg_temp_new_vec_matching(t); + w = tcg_temp_new_vec_matching(t); + k = tcg_temp_new_vec_matching(t); + mask = tcg_temp_new_vec_matching(t); + + tcg_gen_dupi_vec(vece, mask, (vece == MO_32) ? 0xFFFF : 0xFFFFFFFF); + tcg_gen_and_vec(vece, a1, a, mask); + tcg_gen_and_vec(vece, b1, b, mask); + tcg_gen_mul_vec(vece, t, a1, b1); + tcg_gen_shri_vec(vece, k, t, bits); + + tcg_gen_shri_vec(vece, a1, a, bits); + tcg_gen_mul_vec(vece, t, a1, b1); + tcg_gen_add_vec(vece, t, t, k); + tcg_gen_and_vec(vece, k, t, mask); + tcg_gen_shri_vec(vece, w, t, bits); + + tcg_gen_and_vec(vece, a1, a, mask); + tcg_gen_shri_vec(vece, b1, b, bits); + tcg_gen_mul_vec(vece, t, a1, b1); + tcg_gen_add_vec(vece, t, t, k); + tcg_gen_shri_vec(vece, k, t, bits); + + tcg_gen_shri_vec(vece, a1, a, bits); + tcg_gen_mul_vec(vece, t, a1, b1); + tcg_gen_add_vec(vece, t, t, w); + tcg_gen_add_vec(vece, t, t, k); + + tcg_temp_free_vec(a1); + tcg_temp_free_vec(b1); + tcg_temp_free_vec(w); + tcg_temp_free_vec(k); + tcg_temp_free_vec(mask); +} + +static bool do_vx_mulhu(DisasContext *ctx, arg_VX *a, unsigned vece) +{ + REQUIRE_INSNS_FLAGS2(ctx, ISA310); + REQUIRE_VECTOR(ctx); + + static const TCGOpcode vecop_list[] = { + INDEX_op_mul_vec, INDEX_op_add_vec, INDEX_op_shri_vec, 0 + }; + + static const GVecGen3 op[2] = { + { + .fniv = do_vx_vmulhu_vec, + .fno = gen_helper_VMULHUW, + .opt_opc = vecop_list, + .vece = MO_32 + }, + { + .fniv = do_vx_vmulhu_vec, + .fno = gen_helper_VMULHUD, + .opt_opc = vecop_list, + .vece = MO_64 + }, + }; + + tcg_gen_gvec_3(avr_full_offset(a->vrt), avr_full_offset(a->vra), + avr_full_offset(a->vrb), 16, 16, &op[vece - MO_32]); + + return true; + +} + +static void do_vx_vmulhs_vec(unsigned vece, TCGv_vec t, TCGv_vec a, TCGv_vec b) +{ + TCGv_vec a1, b1, mask, w, k; + unsigned bits; + bits = (vece == MO_32) ? 16 : 32; + + a1 = tcg_temp_new_vec_matching(t); + b1 = tcg_temp_new_vec_matching(t); + w = tcg_temp_new_vec_matching(t); + k = tcg_temp_new_vec_matching(t); + mask = tcg_temp_new_vec_matching(t); + + tcg_gen_dupi_vec(vece, mask, (vece == MO_32) ? 0xFFFF : 0xFFFFFFFF); + tcg_gen_and_vec(vece, a1, a, mask); + tcg_gen_and_vec(vece, b1, b, mask); + tcg_gen_mul_vec(vece, t, a1, b1); + tcg_gen_shri_vec(vece, k, t, bits); + + tcg_gen_sari_vec(vece, a1, a, bits); + tcg_gen_mul_vec(vece, t, a1, b1); + tcg_gen_add_vec(vece, t, t, k); + tcg_gen_and_vec(vece, k, t, mask); + tcg_gen_sari_vec(vece, w, t, bits); + + tcg_gen_and_vec(vece, a1, a, mask); + tcg_gen_sari_vec(vece, b1, b, bits); + tcg_gen_mul_vec(vece, t, a1, b1); + tcg_gen_add_vec(vece, t, t, k); + tcg_gen_sari_vec(vece, k, t, bits); + + tcg_gen_sari_vec(vece, a1, a, bits); + tcg_gen_mul_vec(vece, t, a1, b1); + tcg_gen_add_vec(vece, t, t, w); + tcg_gen_add_vec(vece, t, t, k); + + tcg_temp_free_vec(a1); + tcg_temp_free_vec(b1); + tcg_temp_free_vec(w); + tcg_temp_free_vec(k); + tcg_temp_free_vec(mask); +} + +static bool do_vx_mulhs(DisasContext *ctx, arg_VX *a, unsigned vece) +{ + REQUIRE_INSNS_FLAGS2(ctx, ISA310); + REQUIRE_VECTOR(ctx); + + static const TCGOpcode vecop_list[] = { + INDEX_op_mul_vec, INDEX_op_add_vec, INDEX_op_shri_vec, + INDEX_op_sari_vec, 0 + }; + + static const GVecGen3 op[2] = { + { + .fniv = do_vx_vmulhs_vec, + .fno = gen_helper_VMULHSW, + .opt_opc = vecop_list, + .vece = MO_32 + }, + { + .fniv = do_vx_vmulhs_vec, + .fno = gen_helper_VMULHSD, + .opt_opc = vecop_list, + .vece = MO_64 + }, + }; + + tcg_gen_gvec_3(avr_full_offset(a->vrt), avr_full_offset(a->vra), + avr_full_offset(a->vrb), 16, 16, &op[vece - MO_32]); + + return true; +} + +TRANS(VMULHSW, do_vx_mulhs, MO_32) +TRANS(VMULHSD, do_vx_mulhs, MO_64) +TRANS(VMULHUW, do_vx_mulhu, MO_32) +TRANS(VMULHUD, do_vx_mulhu, MO_64) + #undef GEN_VR_LDX #undef GEN_VR_STX #undef GEN_VR_LVE diff --git a/target/ppc/translate/vmx-ops.c.inc b/target/ppc/translate/vmx-ops.c.inc index 25ee715b4326b..2895e8a114f05 100644 --- a/target/ppc/translate/vmx-ops.c.inc +++ b/target/ppc/translate/vmx-ops.c.inc @@ -101,21 +101,7 @@ GEN_VXFORM_DUAL(vmrgow, vextuwlx, 6, 26, PPC_NONE, PPC2_ALTIVEC_207), GEN_VXFORM_300(vextubrx, 6, 28), GEN_VXFORM_300(vextuhrx, 6, 29), GEN_VXFORM_DUAL(vmrgew, vextuwrx, 6, 30, PPC_NONE, PPC2_ALTIVEC_207), -GEN_VXFORM(vmuloub, 4, 0), -GEN_VXFORM(vmulouh, 4, 1), -GEN_VXFORM_DUAL(vmulouw, vmuluwm, 4, 2, PPC_ALTIVEC, PPC_NONE), -GEN_VXFORM(vmulosb, 4, 4), -GEN_VXFORM(vmulosh, 4, 5), -GEN_VXFORM_207(vmulosw, 4, 6), -GEN_VXFORM_310(vmulld, 4, 7), -GEN_VXFORM(vmuleub, 4, 8), -GEN_VXFORM(vmuleuh, 4, 9), -GEN_VXFORM_DUAL(vmuleuw, vmulhuw, 4, 10, PPC_ALTIVEC, PPC_NONE), -GEN_VXFORM_310(vmulhud, 4, 11), -GEN_VXFORM(vmulesb, 4, 12), -GEN_VXFORM(vmulesh, 4, 13), -GEN_VXFORM_DUAL(vmulesw, vmulhsw, 4, 14, PPC_ALTIVEC, PPC_NONE), -GEN_VXFORM_310(vmulhsd, 4, 15), +GEN_VXFORM_207(vmuluwm, 4, 2), GEN_VXFORM(vslb, 2, 4), GEN_VXFORM(vslh, 2, 5), GEN_VXFORM_DUAL(vslw, vrlwnm, 2, 6, PPC_ALTIVEC, PPC_NONE), @@ -198,22 +184,10 @@ GEN_HANDLER2_E(name, str, 0x4, opc2, opc3, 0x00000000, PPC_NONE, PPC2_ISA300), GEN_VXRFORM1_300(name, name, #name, opc2, opc3) \ GEN_VXRFORM1_300(name##_dot, name##_, #name ".", opc2, (opc3 | (0x1 << 4))) -GEN_VXRFORM_300(vcmpnezb, 3, 4) -GEN_VXRFORM_300(vcmpnezh, 3, 5) -GEN_VXRFORM_300(vcmpnezw, 3, 6) -GEN_VXRFORM(vcmpgtsb, 3, 12) -GEN_VXRFORM(vcmpgtsh, 3, 13) -GEN_VXRFORM(vcmpgtsw, 3, 14) -GEN_VXRFORM(vcmpgtub, 3, 8) -GEN_VXRFORM(vcmpgtuh, 3, 9) -GEN_VXRFORM(vcmpgtuw, 3, 10) -GEN_VXRFORM_DUAL(vcmpeqfp, vcmpequd, 3, 3, PPC_ALTIVEC, PPC_NONE) +GEN_VXRFORM(vcmpeqfp, 3, 3) GEN_VXRFORM(vcmpgefp, 3, 7) -GEN_VXRFORM_DUAL(vcmpgtfp, vcmpgtud, 3, 11, PPC_ALTIVEC, PPC_NONE) -GEN_VXRFORM_DUAL(vcmpbfp, vcmpgtsd, 3, 15, PPC_ALTIVEC, PPC_NONE) -GEN_VXRFORM_DUAL(vcmpequb, vcmpneb, 3, 0, PPC_ALTIVEC, PPC_NONE) -GEN_VXRFORM_DUAL(vcmpequh, vcmpneh, 3, 1, PPC_ALTIVEC, PPC_NONE) -GEN_VXRFORM_DUAL(vcmpequw, vcmpnew, 3, 2, PPC_ALTIVEC, PPC_NONE) +GEN_VXRFORM(vcmpgtfp, 3, 11) +GEN_VXRFORM(vcmpbfp, 3, 15) #define GEN_VXFORM_DUAL_INV(name0, name1, opc2, opc3, inval0, inval1, type) \ GEN_OPCODE_DUAL(name0##_##name1, 0x04, opc2, opc3, inval0, inval1, type, \ @@ -230,18 +204,12 @@ GEN_VXFORM(vspltish, 6, 13), GEN_VXFORM(vspltisw, 6, 14), GEN_VXFORM_300_EO(vnegw, 0x01, 0x18, 0x06), GEN_VXFORM_300_EO(vnegd, 0x01, 0x18, 0x07), -GEN_VXFORM_300_EO(vextsb2w, 0x01, 0x18, 0x10), -GEN_VXFORM_300_EO(vextsh2w, 0x01, 0x18, 0x11), -GEN_VXFORM_300_EO(vextsb2d, 0x01, 0x18, 0x18), -GEN_VXFORM_300_EO(vextsh2d, 0x01, 0x18, 0x19), -GEN_VXFORM_300_EO(vextsw2d, 0x01, 0x18, 0x1A), GEN_VXFORM_300_EO(vctzb, 0x01, 0x18, 0x1C), GEN_VXFORM_300_EO(vctzh, 0x01, 0x18, 0x1D), GEN_VXFORM_300_EO(vctzw, 0x01, 0x18, 0x1E), GEN_VXFORM_300_EO(vctzd, 0x01, 0x18, 0x1F), GEN_VXFORM_300_EO(vclzlsbb, 0x01, 0x18, 0x0), GEN_VXFORM_300_EO(vctzlsbb, 0x01, 0x18, 0x1), -GEN_VXFORM_300(vpermr, 0x1D, 0xFF), #define GEN_VXFORM_NOA(name, opc2, opc3) \ GEN_HANDLER(name, 0x04, opc2, opc3, 0x001f0000, PPC_ALTIVEC) @@ -276,7 +244,6 @@ GEN_VAFORM_PAIRED(vmhaddshs, vmhraddshs, 16), GEN_VAFORM_PAIRED(vmsumubm, vmsummbm, 18), GEN_VAFORM_PAIRED(vmsumuhm, vmsumuhs, 19), GEN_VAFORM_PAIRED(vmsumshm, vmsumshs, 20), -GEN_VAFORM_PAIRED(vsel, vperm, 21), GEN_VAFORM_PAIRED(vmaddfp, vnmsubfp, 23), GEN_VXFORM_DUAL(vclzb, vpopcntb, 1, 28, PPC_NONE, PPC2_ALTIVEC_207), diff --git a/target/ppc/translate/vsx-impl.c.inc b/target/ppc/translate/vsx-impl.c.inc index c636e38164316..34ebc2c3626b0 100644 --- a/target/ppc/translate/vsx-impl.c.inc +++ b/target/ppc/translate/vsx-impl.c.inc @@ -665,45 +665,6 @@ static void gen_mtvsrws(DisasContext *ctx) #endif -static void gen_xxpermdi(DisasContext *ctx) -{ - TCGv_i64 xh, xl; - - if (unlikely(!ctx->vsx_enabled)) { - gen_exception(ctx, POWERPC_EXCP_VSXU); - return; - } - - xh = tcg_temp_new_i64(); - xl = tcg_temp_new_i64(); - - if (unlikely((xT(ctx->opcode) == xA(ctx->opcode)) || - (xT(ctx->opcode) == xB(ctx->opcode)))) { - get_cpu_vsr(xh, xA(ctx->opcode), (DM(ctx->opcode) & 2) == 0); - get_cpu_vsr(xl, xB(ctx->opcode), (DM(ctx->opcode) & 1) == 0); - - set_cpu_vsr(xT(ctx->opcode), xh, true); - set_cpu_vsr(xT(ctx->opcode), xl, false); - } else { - if ((DM(ctx->opcode) & 2) == 0) { - get_cpu_vsr(xh, xA(ctx->opcode), true); - set_cpu_vsr(xT(ctx->opcode), xh, true); - } else { - get_cpu_vsr(xh, xA(ctx->opcode), false); - set_cpu_vsr(xT(ctx->opcode), xh, true); - } - if ((DM(ctx->opcode) & 1) == 0) { - get_cpu_vsr(xl, xB(ctx->opcode), true); - set_cpu_vsr(xT(ctx->opcode), xl, false); - } else { - get_cpu_vsr(xl, xB(ctx->opcode), false); - set_cpu_vsr(xT(ctx->opcode), xl, false); - } - } - tcg_temp_free_i64(xh); - tcg_temp_free_i64(xl); -} - #define OP_ABS 1 #define OP_NABS 2 #define OP_NEG 3 @@ -1089,10 +1050,6 @@ GEN_VSX_HELPER_X2(xssqrtdp, 0x16, 0x04, 0, PPC2_VSX) GEN_VSX_HELPER_X2(xsrsqrtedp, 0x14, 0x04, 0, PPC2_VSX) GEN_VSX_HELPER_X2_AB(xstdivdp, 0x14, 0x07, 0, PPC2_VSX) GEN_VSX_HELPER_X1(xstsqrtdp, 0x14, 0x06, 0, PPC2_VSX) -GEN_VSX_HELPER_X3(xscmpeqdp, 0x0C, 0x00, 0, PPC2_ISA300) -GEN_VSX_HELPER_X3(xscmpgtdp, 0x0C, 0x01, 0, PPC2_ISA300) -GEN_VSX_HELPER_X3(xscmpgedp, 0x0C, 0x02, 0, PPC2_ISA300) -GEN_VSX_HELPER_X3(xscmpnedp, 0x0C, 0x03, 0, PPC2_ISA300) GEN_VSX_HELPER_X2_AB(xscmpexpdp, 0x0C, 0x07, 0, PPC2_ISA300) GEN_VSX_HELPER_R2_AB(xscmpexpqp, 0x04, 0x05, 0, PPC2_ISA300) GEN_VSX_HELPER_X2_AB(xscmpodp, 0x0C, 0x05, 0, PPC2_VSX) @@ -1198,8 +1155,193 @@ GEN_VSX_HELPER_X2(xvrspip, 0x12, 0x0A, 0, PPC2_VSX) GEN_VSX_HELPER_X2(xvrspiz, 0x12, 0x09, 0, PPC2_VSX) GEN_VSX_HELPER_2(xvtstdcsp, 0x14, 0x1A, 0, PPC2_VSX) GEN_VSX_HELPER_2(xvtstdcdp, 0x14, 0x1E, 0, PPC2_VSX) -GEN_VSX_HELPER_X3(xxperm, 0x08, 0x03, 0, PPC2_ISA300) -GEN_VSX_HELPER_X3(xxpermr, 0x08, 0x07, 0, PPC2_ISA300) + +static bool do_helper_XX3(DisasContext *ctx, arg_XX3 *a, + void (*helper)(TCGv_ptr, TCGv_ptr, TCGv_ptr, TCGv_ptr)) +{ + TCGv_ptr xt, xa, xb; + + REQUIRE_INSNS_FLAGS2(ctx, ISA300); + REQUIRE_VSX(ctx); + + xt = gen_vsr_ptr(a->xt); + xa = gen_vsr_ptr(a->xa); + xb = gen_vsr_ptr(a->xb); + + helper(cpu_env, xt, xa, xb); + + tcg_temp_free_ptr(xt); + tcg_temp_free_ptr(xa); + tcg_temp_free_ptr(xb); + + return true; +} + +TRANS(XXPERM, do_helper_XX3, gen_helper_VPERM); +TRANS(XXPERMR, do_helper_XX3, gen_helper_VPERMR); +TRANS(XSCMPEQDP, do_helper_XX3, gen_helper_XSCMPEQDP) +TRANS(XSCMPGEDP, do_helper_XX3, gen_helper_XSCMPGEDP) +TRANS(XSCMPGTDP, do_helper_XX3, gen_helper_XSCMPGTDP) +TRANS(XSMAXCDP, do_helper_XX3, gen_helper_XSMAXCDP) +TRANS(XSMINCDP, do_helper_XX3, gen_helper_XSMINCDP) +TRANS(XSMAXJDP, do_helper_XX3, gen_helper_XSMAXJDP) +TRANS(XSMINJDP, do_helper_XX3, gen_helper_XSMINJDP) + +static bool trans_XXPERMDI(DisasContext *ctx, arg_XX3_dm *a) +{ + TCGv_i64 t0, t1; + + REQUIRE_INSNS_FLAGS2(ctx, VSX); + REQUIRE_VSX(ctx); + + t0 = tcg_temp_new_i64(); + + if (unlikely(a->xt == a->xa || a->xt == a->xb)) { + t1 = tcg_temp_new_i64(); + + get_cpu_vsr(t0, a->xa, (a->dm & 2) == 0); + get_cpu_vsr(t1, a->xb, (a->dm & 1) == 0); + + set_cpu_vsr(a->xt, t0, true); + set_cpu_vsr(a->xt, t1, false); + + tcg_temp_free_i64(t1); + } else { + get_cpu_vsr(t0, a->xa, (a->dm & 2) == 0); + set_cpu_vsr(a->xt, t0, true); + + get_cpu_vsr(t0, a->xb, (a->dm & 1) == 0); + set_cpu_vsr(a->xt, t0, false); + } + + tcg_temp_free_i64(t0); + + return true; +} + +static bool trans_XXPERMX(DisasContext *ctx, arg_8RR_XX4_uim3 *a) +{ + TCGv_ptr xt, xa, xb, xc; + + REQUIRE_INSNS_FLAGS2(ctx, ISA310); + REQUIRE_VSX(ctx); + + xt = gen_vsr_ptr(a->xt); + xa = gen_vsr_ptr(a->xa); + xb = gen_vsr_ptr(a->xb); + xc = gen_vsr_ptr(a->xc); + + gen_helper_XXPERMX(xt, xa, xb, xc, tcg_constant_tl(a->uim3)); + + tcg_temp_free_ptr(xt); + tcg_temp_free_ptr(xa); + tcg_temp_free_ptr(xb); + tcg_temp_free_ptr(xc); + + return true; +} + +static bool do_xxgenpcv(DisasContext *ctx, arg_X_imm5 *a, + void (*gen_helper)(TCGv_ptr, TCGv_ptr, TCGv)) +{ + TCGv_ptr xt, vrb; + + REQUIRE_INSNS_FLAGS2(ctx, ISA310); + REQUIRE_VSX(ctx); + + if (a->imm & ~0x3) { + gen_invalid(ctx); + return true; + } + + xt = gen_vsr_ptr(a->xt); + vrb = gen_avr_ptr(a->vrb); + + gen_helper(xt, vrb, tcg_constant_tl(a->imm)); + + tcg_temp_free_ptr(xt); + tcg_temp_free_ptr(vrb); + + return true; +} + +TRANS(XXGENPCVBM, do_xxgenpcv, gen_helper_XXGENPCVBM) +TRANS(XXGENPCVHM, do_xxgenpcv, gen_helper_XXGENPCVHM) +TRANS(XXGENPCVWM, do_xxgenpcv, gen_helper_XXGENPCVWM) +TRANS(XXGENPCVDM, do_xxgenpcv, gen_helper_XXGENPCVDM) + +static bool do_xsmadd(DisasContext *ctx, int tgt, int src1, int src2, int src3, + void (gen_helper)(TCGv_ptr, TCGv_ptr, TCGv_ptr, TCGv_ptr, TCGv_ptr)) +{ + TCGv_ptr t, s1, s2, s3; + + t = gen_vsr_ptr(tgt); + s1 = gen_vsr_ptr(src1); + s2 = gen_vsr_ptr(src2); + s3 = gen_vsr_ptr(src3); + + gen_helper(cpu_env, t, s1, s2, s3); + + tcg_temp_free_ptr(t); + tcg_temp_free_ptr(s1); + tcg_temp_free_ptr(s2); + tcg_temp_free_ptr(s3); + + return true; +} + +static bool do_xsmadd_XX3(DisasContext *ctx, arg_XX3 *a, bool type_a, + void (gen_helper)(TCGv_ptr, TCGv_ptr, TCGv_ptr, TCGv_ptr, TCGv_ptr)) +{ + REQUIRE_VSX(ctx); + + if (type_a) { + return do_xsmadd(ctx, a->xt, a->xa, a->xt, a->xb, gen_helper); + } + return do_xsmadd(ctx, a->xt, a->xa, a->xb, a->xt, gen_helper); +} + +TRANS_FLAGS2(VSX, XSMADDADP, do_xsmadd_XX3, true, gen_helper_XSMADDDP) +TRANS_FLAGS2(VSX, XSMADDMDP, do_xsmadd_XX3, false, gen_helper_XSMADDDP) +TRANS_FLAGS2(VSX, XSMSUBADP, do_xsmadd_XX3, true, gen_helper_XSMSUBDP) +TRANS_FLAGS2(VSX, XSMSUBMDP, do_xsmadd_XX3, false, gen_helper_XSMSUBDP) +TRANS_FLAGS2(VSX, XSNMADDADP, do_xsmadd_XX3, true, gen_helper_XSNMADDDP) +TRANS_FLAGS2(VSX, XSNMADDMDP, do_xsmadd_XX3, false, gen_helper_XSNMADDDP) +TRANS_FLAGS2(VSX, XSNMSUBADP, do_xsmadd_XX3, true, gen_helper_XSNMSUBDP) +TRANS_FLAGS2(VSX, XSNMSUBMDP, do_xsmadd_XX3, false, gen_helper_XSNMSUBDP) +TRANS_FLAGS2(VSX207, XSMADDASP, do_xsmadd_XX3, true, gen_helper_XSMADDSP) +TRANS_FLAGS2(VSX207, XSMADDMSP, do_xsmadd_XX3, false, gen_helper_XSMADDSP) +TRANS_FLAGS2(VSX207, XSMSUBASP, do_xsmadd_XX3, true, gen_helper_XSMSUBSP) +TRANS_FLAGS2(VSX207, XSMSUBMSP, do_xsmadd_XX3, false, gen_helper_XSMSUBSP) +TRANS_FLAGS2(VSX207, XSNMADDASP, do_xsmadd_XX3, true, gen_helper_XSNMADDSP) +TRANS_FLAGS2(VSX207, XSNMADDMSP, do_xsmadd_XX3, false, gen_helper_XSNMADDSP) +TRANS_FLAGS2(VSX207, XSNMSUBASP, do_xsmadd_XX3, true, gen_helper_XSNMSUBSP) +TRANS_FLAGS2(VSX207, XSNMSUBMSP, do_xsmadd_XX3, false, gen_helper_XSNMSUBSP) + +static bool do_xsmadd_X(DisasContext *ctx, arg_X_rc *a, + void (gen_helper)(TCGv_ptr, TCGv_ptr, TCGv_ptr, TCGv_ptr, TCGv_ptr), + void (gen_helper_ro)(TCGv_ptr, TCGv_ptr, TCGv_ptr, TCGv_ptr, TCGv_ptr)) +{ + int vrt, vra, vrb; + + REQUIRE_INSNS_FLAGS2(ctx, ISA300); + REQUIRE_VSX(ctx); + + vrt = a->rt + 32; + vra = a->ra + 32; + vrb = a->rb + 32; + + if (a->rc) { + return do_xsmadd(ctx, vrt, vra, vrt, vrb, gen_helper_ro); + } + + return do_xsmadd(ctx, vrt, vra, vrt, vrb, gen_helper); +} + +TRANS(XSMADDQP, do_xsmadd_X, gen_helper_XSMADDQP, gen_helper_XSMADDQPO) +TRANS(XSMSUBQP, do_xsmadd_X, gen_helper_XSMSUBQP, gen_helper_XSMSUBQPO) +TRANS(XSNMADDQP, do_xsmadd_X, gen_helper_XSNMADDQP, gen_helper_XSNMADDQPO) +TRANS(XSNMSUBQP, do_xsmadd_X, gen_helper_XSNMSUBQP, gen_helper_XSNMSUBQPO) #define GEN_VSX_HELPER_VSX_MADD(name, op1, aop, mop, inval, type) \ static void gen_##name(DisasContext *ctx) \ @@ -1231,14 +1373,6 @@ static void gen_##name(DisasContext *ctx) \ tcg_temp_free_ptr(c); \ } -GEN_VSX_HELPER_VSX_MADD(xsmadddp, 0x04, 0x04, 0x05, 0, PPC2_VSX) -GEN_VSX_HELPER_VSX_MADD(xsmsubdp, 0x04, 0x06, 0x07, 0, PPC2_VSX) -GEN_VSX_HELPER_VSX_MADD(xsnmadddp, 0x04, 0x14, 0x15, 0, PPC2_VSX) -GEN_VSX_HELPER_VSX_MADD(xsnmsubdp, 0x04, 0x16, 0x17, 0, PPC2_VSX) -GEN_VSX_HELPER_VSX_MADD(xsmaddsp, 0x04, 0x00, 0x01, 0, PPC2_VSX207) -GEN_VSX_HELPER_VSX_MADD(xsmsubsp, 0x04, 0x02, 0x03, 0, PPC2_VSX207) -GEN_VSX_HELPER_VSX_MADD(xsnmaddsp, 0x04, 0x10, 0x11, 0, PPC2_VSX207) -GEN_VSX_HELPER_VSX_MADD(xsnmsubsp, 0x04, 0x12, 0x13, 0, PPC2_VSX207) GEN_VSX_HELPER_VSX_MADD(xvmadddp, 0x04, 0x0C, 0x0D, 0, PPC2_VSX) GEN_VSX_HELPER_VSX_MADD(xvmsubdp, 0x04, 0x0E, 0x0F, 0, PPC2_VSX) GEN_VSX_HELPER_VSX_MADD(xvnmadddp, 0x04, 0x1C, 0x1D, 0, PPC2_VSX) @@ -1420,22 +1554,18 @@ static void glue(gen_, name)(DisasContext *ctx) \ VSX_XXMRG(xxmrghw, 1) VSX_XXMRG(xxmrglw, 0) -static void gen_xxsel(DisasContext *ctx) +static bool trans_XXSEL(DisasContext *ctx, arg_XX4 *a) { - int rt = xT(ctx->opcode); - int ra = xA(ctx->opcode); - int rb = xB(ctx->opcode); - int rc = xC(ctx->opcode); + REQUIRE_INSNS_FLAGS2(ctx, VSX); + REQUIRE_VSX(ctx); - if (unlikely(!ctx->vsx_enabled)) { - gen_exception(ctx, POWERPC_EXCP_VSXU); - return; - } - tcg_gen_gvec_bitsel(MO_64, vsr_full_offset(rt), vsr_full_offset(rc), - vsr_full_offset(rb), vsr_full_offset(ra), 16, 16); + tcg_gen_gvec_bitsel(MO_64, vsr_full_offset(a->xt), vsr_full_offset(a->xc), + vsr_full_offset(a->xb), vsr_full_offset(a->xa), 16, 16); + + return true; } -static bool trans_XXSPLTW(DisasContext *ctx, arg_XX2 *a) +static bool trans_XXSPLTW(DisasContext *ctx, arg_XX2_uim2 *a) { int tofs, bofs; @@ -1545,6 +1675,43 @@ static bool trans_LXVKQ(DisasContext *ctx, arg_X_uim5 *a) return true; } +static bool trans_XVTLSBB(DisasContext *ctx, arg_XX2_bf_xb *a) +{ + TCGv_i64 xb, tmp, all_true, all_false, mask, zero; + + REQUIRE_INSNS_FLAGS2(ctx, ISA310); + REQUIRE_VSX(ctx); + + xb = tcg_temp_new_i64(); + tmp = tcg_temp_new_i64(); + all_true = tcg_const_i64(0b1000); + all_false = tcg_const_i64(0b0010); + mask = tcg_constant_i64(dup_const(MO_8, 1)); + zero = tcg_constant_i64(0); + + for (int dw = 0; dw < 2; dw++) { + get_cpu_vsr(xb, a->xb, dw); + + tcg_gen_and_i64(tmp, mask, xb); + tcg_gen_movcond_i64(TCG_COND_EQ, all_true, tmp, + mask, all_true, zero); + + tcg_gen_andc_i64(tmp, mask, xb); + tcg_gen_movcond_i64(TCG_COND_EQ, all_false, tmp, + mask, all_false, zero); + } + + tcg_gen_or_i64(tmp, all_false, all_true); + tcg_gen_extrl_i64_i32(cpu_crf[a->bf], tmp); + + tcg_temp_free_i64(xb); + tcg_temp_free_i64(tmp); + tcg_temp_free_i64(all_true); + tcg_temp_free_i64(all_false); + + return true; +} + static void gen_xxsldwi(DisasContext *ctx) { TCGv_i64 xth, xtl; @@ -2070,12 +2237,6 @@ static bool do_lstxv(DisasContext *ctx, int ra, TCGv displ, static bool do_lstxv_D(DisasContext *ctx, arg_D *a, bool store, bool paired) { - if (paired) { - REQUIRE_INSNS_FLAGS2(ctx, ISA310); - } else { - REQUIRE_INSNS_FLAGS2(ctx, ISA300); - } - if (paired || a->rt >= 32) { REQUIRE_VSX(ctx); } else { @@ -2089,7 +2250,6 @@ static bool do_lstxv_PLS_D(DisasContext *ctx, arg_PLS_D *a, bool store, bool paired) { arg_D d; - REQUIRE_INSNS_FLAGS2(ctx, ISA310); REQUIRE_VSX(ctx); if (!resolve_PLS_D(ctx, &d, a)) { @@ -2101,12 +2261,6 @@ static bool do_lstxv_PLS_D(DisasContext *ctx, arg_PLS_D *a, static bool do_lstxv_X(DisasContext *ctx, arg_X *a, bool store, bool paired) { - if (paired) { - REQUIRE_INSNS_FLAGS2(ctx, ISA310); - } else { - REQUIRE_INSNS_FLAGS2(ctx, ISA300); - } - if (paired || a->rt >= 32) { REQUIRE_VSX(ctx); } else { @@ -2116,18 +2270,139 @@ static bool do_lstxv_X(DisasContext *ctx, arg_X *a, bool store, bool paired) return do_lstxv(ctx, a->ra, cpu_gpr[a->rb], a->rt, store, paired); } -TRANS(STXV, do_lstxv_D, true, false) -TRANS(LXV, do_lstxv_D, false, false) -TRANS(STXVP, do_lstxv_D, true, true) -TRANS(LXVP, do_lstxv_D, false, true) -TRANS(STXVX, do_lstxv_X, true, false) -TRANS(LXVX, do_lstxv_X, false, false) -TRANS(STXVPX, do_lstxv_X, true, true) -TRANS(LXVPX, do_lstxv_X, false, true) -TRANS64(PSTXV, do_lstxv_PLS_D, true, false) -TRANS64(PLXV, do_lstxv_PLS_D, false, false) -TRANS64(PSTXVP, do_lstxv_PLS_D, true, true) -TRANS64(PLXVP, do_lstxv_PLS_D, false, true) +TRANS_FLAGS2(ISA300, STXV, do_lstxv_D, true, false) +TRANS_FLAGS2(ISA300, LXV, do_lstxv_D, false, false) +TRANS_FLAGS2(ISA310, STXVP, do_lstxv_D, true, true) +TRANS_FLAGS2(ISA310, LXVP, do_lstxv_D, false, true) +TRANS_FLAGS2(ISA300, STXVX, do_lstxv_X, true, false) +TRANS_FLAGS2(ISA300, LXVX, do_lstxv_X, false, false) +TRANS_FLAGS2(ISA310, STXVPX, do_lstxv_X, true, true) +TRANS_FLAGS2(ISA310, LXVPX, do_lstxv_X, false, true) +TRANS64_FLAGS2(ISA310, PSTXV, do_lstxv_PLS_D, true, false) +TRANS64_FLAGS2(ISA310, PLXV, do_lstxv_PLS_D, false, false) +TRANS64_FLAGS2(ISA310, PSTXVP, do_lstxv_PLS_D, true, true) +TRANS64_FLAGS2(ISA310, PLXVP, do_lstxv_PLS_D, false, true) + +static void gen_xxeval_i64(TCGv_i64 t, TCGv_i64 a, TCGv_i64 b, TCGv_i64 c, + int64_t imm) +{ + /* + * Instead of processing imm bit-by-bit, we'll skip the computation of + * conjunctions whose corresponding bit is unset. + */ + int bit; + TCGv_i64 conj, disj; + + conj = tcg_temp_new_i64(); + disj = tcg_temp_new_i64(); + + tcg_gen_movi_i64(disj, 0); + + /* Iterate over set bits from the least to the most significant bit */ + while (imm) { + /* + * Get the next bit to be processed with ctz64. Invert the result of + * ctz64 to match the indexing used by PowerISA. + */ + bit = 7 - ctz64(imm); + if (bit & 0x4) { + tcg_gen_mov_i64(conj, a); + } else { + tcg_gen_not_i64(conj, a); + } + if (bit & 0x2) { + tcg_gen_and_i64(conj, conj, b); + } else { + tcg_gen_andc_i64(conj, conj, b); + } + if (bit & 0x1) { + tcg_gen_and_i64(conj, conj, c); + } else { + tcg_gen_andc_i64(conj, conj, c); + } + tcg_gen_or_i64(disj, disj, conj); + + /* Unset the least significant bit that is set */ + imm &= imm - 1; + } + + tcg_gen_mov_i64(t, disj); + + tcg_temp_free_i64(conj); + tcg_temp_free_i64(disj); +} + +static void gen_xxeval_vec(unsigned vece, TCGv_vec t, TCGv_vec a, TCGv_vec b, + TCGv_vec c, int64_t imm) +{ + /* + * Instead of processing imm bit-by-bit, we'll skip the computation of + * conjunctions whose corresponding bit is unset. + */ + int bit; + TCGv_vec disj, conj; + + disj = tcg_temp_new_vec_matching(t); + conj = tcg_temp_new_vec_matching(t); + + tcg_gen_dupi_vec(vece, disj, 0); + + /* Iterate over set bits from the least to the most significant bit */ + while (imm) { + /* + * Get the next bit to be processed with ctz64. Invert the result of + * ctz64 to match the indexing used by PowerISA. + */ + bit = 7 - ctz64(imm); + if (bit & 0x4) { + tcg_gen_mov_vec(conj, a); + } else { + tcg_gen_not_vec(vece, conj, a); + } + if (bit & 0x2) { + tcg_gen_and_vec(vece, conj, conj, b); + } else { + tcg_gen_andc_vec(vece, conj, conj, b); + } + if (bit & 0x1) { + tcg_gen_and_vec(vece, conj, conj, c); + } else { + tcg_gen_andc_vec(vece, conj, conj, c); + } + tcg_gen_or_vec(vece, disj, disj, conj); + + /* Unset the least significant bit that is set */ + imm &= imm - 1; + } + + tcg_gen_mov_vec(t, disj); + + tcg_temp_free_vec(disj); + tcg_temp_free_vec(conj); +} + +static bool trans_XXEVAL(DisasContext *ctx, arg_8RR_XX4_imm *a) +{ + REQUIRE_INSNS_FLAGS2(ctx, ISA310); + REQUIRE_VSX(ctx); + + static const TCGOpcode vecop_list[] = { + INDEX_op_andc_vec, 0 + }; + static const GVecGen4i op = { + .fniv = gen_xxeval_vec, + .fno = gen_helper_XXEVAL, + .fni8 = gen_xxeval_i64, + .opt_opc = vecop_list, + .vece = MO_64 + }; + + tcg_gen_gvec_4i(vsr_full_offset(a->xt), vsr_full_offset(a->xa), + vsr_full_offset(a->xb), vsr_full_offset(a->xc), + 16, 16, a->imm, &op); + + return true; +} static void gen_xxblendv_vec(unsigned vece, TCGv_vec t, TCGv_vec a, TCGv_vec b, TCGv_vec c) @@ -2138,7 +2413,7 @@ static void gen_xxblendv_vec(unsigned vece, TCGv_vec t, TCGv_vec a, TCGv_vec b, tcg_temp_free_vec(tmp); } -static bool do_xxblendv(DisasContext *ctx, arg_XX4 *a, unsigned vece) +static bool do_xxblendv(DisasContext *ctx, arg_8RR_XX4 *a, unsigned vece) { static const TCGOpcode vecop_list[] = { INDEX_op_sari_vec, 0 @@ -2184,31 +2459,67 @@ TRANS(XXBLENDVH, do_xxblendv, MO_16) TRANS(XXBLENDVW, do_xxblendv, MO_32) TRANS(XXBLENDVD, do_xxblendv, MO_64) -static bool do_xsmaxmincjdp(DisasContext *ctx, arg_XX3 *a, - void (*helper)(TCGv_ptr, TCGv_ptr, TCGv_ptr, TCGv_ptr)) +static bool do_helper_X(arg_X *a, + void (*helper)(TCGv_ptr, TCGv_ptr, TCGv_ptr, TCGv_ptr)) { - TCGv_ptr xt, xa, xb; + TCGv_ptr rt, ra, rb; - REQUIRE_INSNS_FLAGS2(ctx, ISA300); + rt = gen_avr_ptr(a->rt); + ra = gen_avr_ptr(a->ra); + rb = gen_avr_ptr(a->rb); + + helper(cpu_env, rt, ra, rb); + + tcg_temp_free_ptr(rt); + tcg_temp_free_ptr(ra); + tcg_temp_free_ptr(rb); + + return true; +} + +static bool do_xscmpqp(DisasContext *ctx, arg_X *a, + void (*helper)(TCGv_ptr, TCGv_ptr, TCGv_ptr, TCGv_ptr)) +{ + REQUIRE_INSNS_FLAGS2(ctx, ISA310); + REQUIRE_VSX(ctx); + + return do_helper_X(a, helper); +} + +TRANS(XSCMPEQQP, do_xscmpqp, gen_helper_XSCMPEQQP) +TRANS(XSCMPGEQP, do_xscmpqp, gen_helper_XSCMPGEQP) +TRANS(XSCMPGTQP, do_xscmpqp, gen_helper_XSCMPGTQP) +TRANS(XSMAXCQP, do_xscmpqp, gen_helper_XSMAXCQP) +TRANS(XSMINCQP, do_xscmpqp, gen_helper_XSMINCQP) + +static bool trans_XVCVSPBF16(DisasContext *ctx, arg_XX2 *a) +{ + TCGv_ptr xt, xb; + + REQUIRE_INSNS_FLAGS2(ctx, ISA310); REQUIRE_VSX(ctx); xt = gen_vsr_ptr(a->xt); - xa = gen_vsr_ptr(a->xa); xb = gen_vsr_ptr(a->xb); - helper(cpu_env, xt, xa, xb); + gen_helper_XVCVSPBF16(cpu_env, xt, xb); tcg_temp_free_ptr(xt); - tcg_temp_free_ptr(xa); tcg_temp_free_ptr(xb); return true; } -TRANS(XSMAXCDP, do_xsmaxmincjdp, gen_helper_xsmaxcdp) -TRANS(XSMINCDP, do_xsmaxmincjdp, gen_helper_xsmincdp) -TRANS(XSMAXJDP, do_xsmaxmincjdp, gen_helper_xsmaxjdp) -TRANS(XSMINJDP, do_xsmaxmincjdp, gen_helper_xsminjdp) +static bool trans_XVCVBF16SPN(DisasContext *ctx, arg_XX2 *a) +{ + REQUIRE_INSNS_FLAGS2(ctx, ISA310); + REQUIRE_VSX(ctx); + + tcg_gen_gvec_shli(MO_32, vsr_full_offset(a->xt), vsr_full_offset(a->xb), + 16, 16, 16); + + return true; +} #undef GEN_XX2FORM #undef GEN_XX3FORM diff --git a/target/ppc/translate/vsx-ops.c.inc b/target/ppc/translate/vsx-ops.c.inc index c974324c4c825..b8fd116728bdc 100644 --- a/target/ppc/translate/vsx-ops.c.inc +++ b/target/ppc/translate/vsx-ops.c.inc @@ -186,18 +186,6 @@ GEN_XX2FORM(xssqrtdp, 0x16, 0x04, PPC2_VSX), GEN_XX2FORM(xsrsqrtedp, 0x14, 0x04, PPC2_VSX), GEN_XX3FORM(xstdivdp, 0x14, 0x07, PPC2_VSX), GEN_XX2FORM(xstsqrtdp, 0x14, 0x06, PPC2_VSX), -GEN_XX3FORM_NAME(xsmadddp, "xsmaddadp", 0x04, 0x04, PPC2_VSX), -GEN_XX3FORM_NAME(xsmadddp, "xsmaddmdp", 0x04, 0x05, PPC2_VSX), -GEN_XX3FORM_NAME(xsmsubdp, "xsmsubadp", 0x04, 0x06, PPC2_VSX), -GEN_XX3FORM_NAME(xsmsubdp, "xsmsubmdp", 0x04, 0x07, PPC2_VSX), -GEN_XX3FORM_NAME(xsnmadddp, "xsnmaddadp", 0x04, 0x14, PPC2_VSX), -GEN_XX3FORM_NAME(xsnmadddp, "xsnmaddmdp", 0x04, 0x15, PPC2_VSX), -GEN_XX3FORM_NAME(xsnmsubdp, "xsnmsubadp", 0x04, 0x16, PPC2_VSX), -GEN_XX3FORM_NAME(xsnmsubdp, "xsnmsubmdp", 0x04, 0x17, PPC2_VSX), -GEN_XX3FORM(xscmpeqdp, 0x0C, 0x00, PPC2_ISA300), -GEN_XX3FORM(xscmpgtdp, 0x0C, 0x01, PPC2_ISA300), -GEN_XX3FORM(xscmpgedp, 0x0C, 0x02, PPC2_ISA300), -GEN_XX3FORM(xscmpnedp, 0x0C, 0x03, PPC2_ISA300), GEN_XX3FORM(xscmpexpdp, 0x0C, 0x07, PPC2_ISA300), GEN_VSX_XFORM_300(xscmpexpqp, 0x04, 0x05, 0x00600001), GEN_XX2IFORM(xscmpodp, 0x0C, 0x05, PPC2_VSX), @@ -235,14 +223,6 @@ GEN_XX2FORM(xsresp, 0x14, 0x01, PPC2_VSX207), GEN_XX2FORM(xsrsp, 0x12, 0x11, PPC2_VSX207), GEN_XX2FORM(xssqrtsp, 0x16, 0x00, PPC2_VSX207), GEN_XX2FORM(xsrsqrtesp, 0x14, 0x00, PPC2_VSX207), -GEN_XX3FORM_NAME(xsmaddsp, "xsmaddasp", 0x04, 0x00, PPC2_VSX207), -GEN_XX3FORM_NAME(xsmaddsp, "xsmaddmsp", 0x04, 0x01, PPC2_VSX207), -GEN_XX3FORM_NAME(xsmsubsp, "xsmsubasp", 0x04, 0x02, PPC2_VSX207), -GEN_XX3FORM_NAME(xsmsubsp, "xsmsubmsp", 0x04, 0x03, PPC2_VSX207), -GEN_XX3FORM_NAME(xsnmaddsp, "xsnmaddasp", 0x04, 0x10, PPC2_VSX207), -GEN_XX3FORM_NAME(xsnmaddsp, "xsnmaddmsp", 0x04, 0x11, PPC2_VSX207), -GEN_XX3FORM_NAME(xsnmsubsp, "xsnmsubasp", 0x04, 0x12, PPC2_VSX207), -GEN_XX3FORM_NAME(xsnmsubsp, "xsnmsubmsp", 0x04, 0x13, PPC2_VSX207), GEN_XX2FORM(xscvsxdsp, 0x10, 0x13, PPC2_VSX207), GEN_XX2FORM(xscvuxdsp, 0x10, 0x12, PPC2_VSX207), @@ -341,53 +321,6 @@ VSX_LOGICAL(xxlnand, 0x8, 0x16, PPC2_VSX207), VSX_LOGICAL(xxlorc, 0x8, 0x15, PPC2_VSX207), GEN_XX3FORM(xxmrghw, 0x08, 0x02, PPC2_VSX), GEN_XX3FORM(xxmrglw, 0x08, 0x06, PPC2_VSX), -GEN_XX3FORM(xxperm, 0x08, 0x03, PPC2_ISA300), -GEN_XX3FORM(xxpermr, 0x08, 0x07, PPC2_ISA300), GEN_XX3FORM_DM(xxsldwi, 0x08, 0x00), GEN_XX2FORM_EXT(xxextractuw, 0x0A, 0x0A, PPC2_ISA300), GEN_XX2FORM_EXT(xxinsertw, 0x0A, 0x0B, PPC2_ISA300), - -#define GEN_XXSEL_ROW(opc3) \ -GEN_HANDLER2_E(xxsel, "xxsel", 0x3C, 0x18, opc3, 0, PPC_NONE, PPC2_VSX), \ -GEN_HANDLER2_E(xxsel, "xxsel", 0x3C, 0x19, opc3, 0, PPC_NONE, PPC2_VSX), \ -GEN_HANDLER2_E(xxsel, "xxsel", 0x3C, 0x1A, opc3, 0, PPC_NONE, PPC2_VSX), \ -GEN_HANDLER2_E(xxsel, "xxsel", 0x3C, 0x1B, opc3, 0, PPC_NONE, PPC2_VSX), \ -GEN_HANDLER2_E(xxsel, "xxsel", 0x3C, 0x1C, opc3, 0, PPC_NONE, PPC2_VSX), \ -GEN_HANDLER2_E(xxsel, "xxsel", 0x3C, 0x1D, opc3, 0, PPC_NONE, PPC2_VSX), \ -GEN_HANDLER2_E(xxsel, "xxsel", 0x3C, 0x1E, opc3, 0, PPC_NONE, PPC2_VSX), \ -GEN_HANDLER2_E(xxsel, "xxsel", 0x3C, 0x1F, opc3, 0, PPC_NONE, PPC2_VSX), \ - -GEN_XXSEL_ROW(0x00) -GEN_XXSEL_ROW(0x01) -GEN_XXSEL_ROW(0x02) -GEN_XXSEL_ROW(0x03) -GEN_XXSEL_ROW(0x04) -GEN_XXSEL_ROW(0x05) -GEN_XXSEL_ROW(0x06) -GEN_XXSEL_ROW(0x07) -GEN_XXSEL_ROW(0x08) -GEN_XXSEL_ROW(0x09) -GEN_XXSEL_ROW(0x0A) -GEN_XXSEL_ROW(0x0B) -GEN_XXSEL_ROW(0x0C) -GEN_XXSEL_ROW(0x0D) -GEN_XXSEL_ROW(0x0E) -GEN_XXSEL_ROW(0x0F) -GEN_XXSEL_ROW(0x10) -GEN_XXSEL_ROW(0x11) -GEN_XXSEL_ROW(0x12) -GEN_XXSEL_ROW(0x13) -GEN_XXSEL_ROW(0x14) -GEN_XXSEL_ROW(0x15) -GEN_XXSEL_ROW(0x16) -GEN_XXSEL_ROW(0x17) -GEN_XXSEL_ROW(0x18) -GEN_XXSEL_ROW(0x19) -GEN_XXSEL_ROW(0x1A) -GEN_XXSEL_ROW(0x1B) -GEN_XXSEL_ROW(0x1C) -GEN_XXSEL_ROW(0x1D) -GEN_XXSEL_ROW(0x1E) -GEN_XXSEL_ROW(0x1F) - -GEN_XX3FORM_DM(xxpermdi, 0x08, 0x01), diff --git a/tcg/ppc/tcg-target.c.inc b/tcg/ppc/tcg-target.c.inc index 9e79a7edeed15..b15f0fef0ede2 100644 --- a/tcg/ppc/tcg-target.c.inc +++ b/tcg/ppc/tcg-target.c.inc @@ -3905,3 +3905,9 @@ void tcg_register_jit(const void *buf, size_t buf_size) tcg_register_jit_int(buf, buf_size, &debug_frame, sizeof(debug_frame)); } #endif /* __ELF__ */ +#undef VMULEUB +#undef VMULEUH +#undef VMULEUW +#undef VMULOUB +#undef VMULOUH +#undef VMULOUW diff --git a/tcg/tcg-op-gvec.c b/tcg/tcg-op-gvec.c index ffe55e908f8a9..079a761b04023 100644 --- a/tcg/tcg-op-gvec.c +++ b/tcg/tcg-op-gvec.c @@ -836,6 +836,30 @@ static void expand_4_i32(uint32_t dofs, uint32_t aofs, uint32_t bofs, tcg_temp_free_i32(t0); } +static void expand_4i_i32(uint32_t dofs, uint32_t aofs, uint32_t bofs, + uint32_t cofs, uint32_t oprsz, int32_t c, + void (*fni)(TCGv_i32, TCGv_i32, TCGv_i32, TCGv_i32, + int32_t)) +{ + TCGv_i32 t0 = tcg_temp_new_i32(); + TCGv_i32 t1 = tcg_temp_new_i32(); + TCGv_i32 t2 = tcg_temp_new_i32(); + TCGv_i32 t3 = tcg_temp_new_i32(); + uint32_t i; + + for (i = 0; i < oprsz; i += 4) { + tcg_gen_ld_i32(t1, cpu_env, aofs + i); + tcg_gen_ld_i32(t2, cpu_env, bofs + i); + tcg_gen_ld_i32(t3, cpu_env, cofs + i); + fni(t0, t1, t2, t3, c); + tcg_gen_st_i32(t0, cpu_env, dofs + i); + } + tcg_temp_free_i32(t3); + tcg_temp_free_i32(t2); + tcg_temp_free_i32(t1); + tcg_temp_free_i32(t0); +} + /* Expand OPSZ bytes worth of two-operand operations using i64 elements. */ static void expand_2_i64(uint32_t dofs, uint32_t aofs, uint32_t oprsz, bool load_dest, void (*fni)(TCGv_i64, TCGv_i64)) @@ -971,6 +995,30 @@ static void expand_4_i64(uint32_t dofs, uint32_t aofs, uint32_t bofs, tcg_temp_free_i64(t0); } +static void expand_4i_i64(uint32_t dofs, uint32_t aofs, uint32_t bofs, + uint32_t cofs, uint32_t oprsz, int64_t c, + void (*fni)(TCGv_i64, TCGv_i64, TCGv_i64, TCGv_i64, + int64_t)) +{ + TCGv_i64 t0 = tcg_temp_new_i64(); + TCGv_i64 t1 = tcg_temp_new_i64(); + TCGv_i64 t2 = tcg_temp_new_i64(); + TCGv_i64 t3 = tcg_temp_new_i64(); + uint32_t i; + + for (i = 0; i < oprsz; i += 8) { + tcg_gen_ld_i64(t1, cpu_env, aofs + i); + tcg_gen_ld_i64(t2, cpu_env, bofs + i); + tcg_gen_ld_i64(t3, cpu_env, cofs + i); + fni(t0, t1, t2, t3, c); + tcg_gen_st_i64(t0, cpu_env, dofs + i); + } + tcg_temp_free_i64(t3); + tcg_temp_free_i64(t2); + tcg_temp_free_i64(t1); + tcg_temp_free_i64(t0); +} + /* Expand OPSZ bytes worth of two-operand operations using host vectors. */ static void expand_2_vec(unsigned vece, uint32_t dofs, uint32_t aofs, uint32_t oprsz, uint32_t tysz, TCGType type, @@ -1121,6 +1169,35 @@ static void expand_4_vec(unsigned vece, uint32_t dofs, uint32_t aofs, tcg_temp_free_vec(t0); } +/* + * Expand OPSZ bytes worth of four-vector operands and an immediate operand + * using host vectors. + */ +static void expand_4i_vec(unsigned vece, uint32_t dofs, uint32_t aofs, + uint32_t bofs, uint32_t cofs, uint32_t oprsz, + uint32_t tysz, TCGType type, int64_t c, + void (*fni)(unsigned, TCGv_vec, TCGv_vec, + TCGv_vec, TCGv_vec, int64_t)) +{ + TCGv_vec t0 = tcg_temp_new_vec(type); + TCGv_vec t1 = tcg_temp_new_vec(type); + TCGv_vec t2 = tcg_temp_new_vec(type); + TCGv_vec t3 = tcg_temp_new_vec(type); + uint32_t i; + + for (i = 0; i < oprsz; i += tysz) { + tcg_gen_ld_vec(t1, cpu_env, aofs + i); + tcg_gen_ld_vec(t2, cpu_env, bofs + i); + tcg_gen_ld_vec(t3, cpu_env, cofs + i); + fni(vece, t0, t1, t2, t3, c); + tcg_gen_st_vec(t0, cpu_env, dofs + i); + } + tcg_temp_free_vec(t3); + tcg_temp_free_vec(t2); + tcg_temp_free_vec(t1); + tcg_temp_free_vec(t0); +} + /* Expand a vector two-operand operation. */ void tcg_gen_gvec_2(uint32_t dofs, uint32_t aofs, uint32_t oprsz, uint32_t maxsz, const GVecGen2 *g) @@ -1533,6 +1610,75 @@ void tcg_gen_gvec_4(uint32_t dofs, uint32_t aofs, uint32_t bofs, uint32_t cofs, } } +/* Expand a vector four-operand operation. */ +void tcg_gen_gvec_4i(uint32_t dofs, uint32_t aofs, uint32_t bofs, uint32_t cofs, + uint32_t oprsz, uint32_t maxsz, int64_t c, + const GVecGen4i *g) +{ + const TCGOpcode *this_list = g->opt_opc ? : vecop_list_empty; + const TCGOpcode *hold_list = tcg_swap_vecop_list(this_list); + TCGType type; + uint32_t some; + + check_size_align(oprsz, maxsz, dofs | aofs | bofs | cofs); + check_overlap_4(dofs, aofs, bofs, cofs, maxsz); + + type = 0; + if (g->fniv) { + type = choose_vector_type(g->opt_opc, g->vece, oprsz, g->prefer_i64); + } + switch (type) { + case TCG_TYPE_V256: + /* + * Recall that ARM SVE allows vector sizes that are not a + * power of 2, but always a multiple of 16. The intent is + * that e.g. size == 80 would be expanded with 2x32 + 1x16. + */ + some = QEMU_ALIGN_DOWN(oprsz, 32); + expand_4i_vec(g->vece, dofs, aofs, bofs, cofs, some, + 32, TCG_TYPE_V256, c, g->fniv); + if (some == oprsz) { + break; + } + dofs += some; + aofs += some; + bofs += some; + cofs += some; + oprsz -= some; + maxsz -= some; + /* fallthru */ + case TCG_TYPE_V128: + expand_4i_vec(g->vece, dofs, aofs, bofs, cofs, oprsz, + 16, TCG_TYPE_V128, c, g->fniv); + break; + case TCG_TYPE_V64: + expand_4i_vec(g->vece, dofs, aofs, bofs, cofs, oprsz, + 8, TCG_TYPE_V64, c, g->fniv); + break; + + case 0: + if (g->fni8 && check_size_impl(oprsz, 8)) { + expand_4i_i64(dofs, aofs, bofs, cofs, oprsz, c, g->fni8); + } else if (g->fni4 && check_size_impl(oprsz, 4)) { + expand_4i_i32(dofs, aofs, bofs, cofs, oprsz, c, g->fni4); + } else { + assert(g->fno != NULL); + tcg_gen_gvec_4_ool(dofs, aofs, bofs, cofs, + oprsz, maxsz, c, g->fno); + oprsz = maxsz; + } + break; + + default: + g_assert_not_reached(); + } + tcg_swap_vecop_list(hold_list); + + if (oprsz < maxsz) { + expand_clr(dofs + oprsz, maxsz - oprsz); + } +} + /* * Expand specific vector operations. */ diff --git a/tests/tcg/ppc64/Makefile.target b/tests/tcg/ppc64/Makefile.target index 0368007028c9a..b2d2ec7d7be06 100644 --- a/tests/tcg/ppc64/Makefile.target +++ b/tests/tcg/ppc64/Makefile.target @@ -10,11 +10,13 @@ PPC64_TESTS=bcdsub non_signalling_xscv endif $(PPC64_TESTS): CFLAGS += -mpower8-vector -PPC64_TESTS += byte_reverse -PPC64_TESTS += mtfsf +PPC64_TESTS += byte_reverse mtfsf hash ifneq ($(DOCKER_IMAGE)$(CROSS_CC_HAS_POWER10),) run-byte_reverse: QEMU_OPTS+=-cpu POWER10 run-plugin-byte_reverse-with-%: QEMU_OPTS+=-cpu POWER10 + +run-hash: QEMU_OPTS+=-cpu POWER10 +run-plugin-hash-with-%: QEMU_OPTS+=-cpu POWER10 else byte_reverse: $(call skip-test, "BUILD of $@", "missing compiler support") @@ -22,6 +24,13 @@ run-byte_reverse: $(call skip-test, "RUN of byte_reverse", "not built") run-plugin-byte_reverse-with-%: $(call skip-test, "RUN of byte_reverse ($*)", "not built") + +hash: + $(call skip-test, "BUILD of $@", "missing compiler support") +run-hash: + $(call skip-test, "RUN of hash", "not built") +run-plugin-hash-with-%: + $(call skip-test, "RUN of hash ($*)", "not built") endif PPC64_TESTS += signal_save_restore_xer diff --git a/tests/tcg/ppc64le/Makefile.target b/tests/tcg/ppc64le/Makefile.target index 480ff0898d7ea..5f36735ec13a0 100644 --- a/tests/tcg/ppc64le/Makefile.target +++ b/tests/tcg/ppc64le/Makefile.target @@ -10,12 +10,16 @@ endif $(PPC64LE_TESTS): CFLAGS += -mpower8-vector ifneq ($(DOCKER_IMAGE)$(CROSS_CC_HAS_POWER10),) -PPC64LE_TESTS += byte_reverse +PPC64LE_TESTS += byte_reverse hash endif byte_reverse: CFLAGS += -mcpu=power10 run-byte_reverse: QEMU_OPTS+=-cpu POWER10 run-plugin-byte_reverse-with-%: QEMU_OPTS+=-cpu POWER10 +hash: CFLAGS += -mcpu=power10 +run-hash: QEMU_OPTS+=-cpu POWER10 +run-plugin-hash-with-%: QEMU_OPTS+=-cpu POWER10 + PPC64LE_TESTS += mtfsf PPC64LE_TESTS += signal_save_restore_xer diff --git a/tests/tcg/ppc64le/hash.c b/tests/tcg/ppc64le/hash.c new file mode 100644 index 0000000000000..56d886eba3343 --- /dev/null +++ b/tests/tcg/ppc64le/hash.c @@ -0,0 +1,31 @@ +#include +#include +#include +#include +#include +#include + +void sigtrap_handler(int sig, siginfo_t *si, void *ucontext) +{ + exit(0); +} + +int main() +{ + struct sigaction action; + + asm volatile("hashst 0, -16(1)\n\t" + "hashchk 0, -16(1)\n\t"); + + action.sa_sigaction = sigtrap_handler; + sigaction(SIGTRAP, &action, NULL); + + asm volatile("hashst 0, -16(1)\n\t" + "andi. 2, 2, 0x0\n\t" + "std 2, -16(1)\n\t" /* tamper with stored hash */ + "hashchk 0, -16(1)\n\t"); + /* SIGTRAP should be raised and sigtrap_handler() called */ + + /* return != 0 if sitrap_handler was not called */ + return 1; +} diff --git a/tests/tcg/ppc64le/signal_save_restore_xer.c b/tests/tcg/ppc64le/signal_save_restore_xer.c index e4f8a07dd7e0f..9227f4f455512 100644 --- a/tests/tcg/ppc64le/signal_save_restore_xer.c +++ b/tests/tcg/ppc64le/signal_save_restore_xer.c @@ -11,7 +11,7 @@ uint64_t saved; -void sigill_handler(int sig, siginfo_t *si, void *ucontext) +void sigtrap_handler(int sig, siginfo_t *si, void *ucontext) { ucontext_t *uc = ucontext; uc->uc_mcontext.regs->nip += 4; @@ -23,14 +23,14 @@ int main(void) { uint64_t initial = XER_CA | XER_CA32, restored; struct sigaction sa = { - .sa_sigaction = sigill_handler, + .sa_sigaction = sigtrap_handler, .sa_flags = SA_SIGINFO }; - sigaction(SIGILL, &sa, NULL); + sigaction(SIGTRAP, &sa, NULL); asm("mtspr 1, %1\n\t" - ".long 0x0\n\t" + "trap\n\t" "mfspr %0, 1\n\t" : "=r" (restored) : "r" (initial));