extern TCGPowerISA have_isa;
extern bool have_altivec;
+extern bool have_vsx;
#define have_isa_2_06 (have_isa >= tcg_isa_2_06)
#define have_isa_3_00 (have_isa >= tcg_isa_3_00)
* instruction and substituting two 32-bit stores makes the generated
* code quite large.
*/
-#define TCG_TARGET_HAS_v64 0
+#define TCG_TARGET_HAS_v64 have_vsx
#define TCG_TARGET_HAS_v128 have_altivec
#define TCG_TARGET_HAS_v256 0
#define TCG_TARGET_HAS_mul_vec 1
#define TCG_TARGET_HAS_sat_vec 1
#define TCG_TARGET_HAS_minmax_vec 1
-#define TCG_TARGET_HAS_bitsel_vec 0
+#define TCG_TARGET_HAS_bitsel_vec have_vsx
#define TCG_TARGET_HAS_cmpsel_vec 0
void flush_icache_range(uintptr_t start, uintptr_t stop);
TCGPowerISA have_isa;
static bool have_isel;
bool have_altivec;
+bool have_vsx;
#ifndef CONFIG_SOFTMMU
#define TCG_GUEST_BASE_REG 30
#define LVEBX XO31(7)
#define LVEHX XO31(39)
#define LVEWX XO31(71)
+#define LXSDX (XO31(588) | 1) /* v2.06, force tx=1 */
+#define LXVDSX (XO31(332) | 1) /* v2.06, force tx=1 */
#define STVX XO31(231)
#define STVEWX XO31(199)
+#define STXSDX (XO31(716) | 1) /* v2.06, force sx=1 */
#define VADDSBS VX4(768)
#define VADDUBS VX4(512)
#define VSLDOI VX4(44)
+#define XXPERMDI (OPCD(60) | (10 << 3) | 7) /* v2.06, force ax=bx=tx=1 */
+#define XXSEL (OPCD(60) | (3 << 4) | 0xf) /* v2.06, force ax=bx=cx=tx=1 */
+
#define RT(r) ((r)<<21)
#define RS(r) ((r)<<21)
#define RA(r) ((r)<<16)
add = 0;
}
- load_insn = LVX | VRT(ret) | RB(TCG_REG_TMP1);
- if (TCG_TARGET_REG_BITS == 64) {
- new_pool_l2(s, rel, s->code_ptr, add, val, val);
+ if (have_vsx) {
+ load_insn = type == TCG_TYPE_V64 ? LXSDX : LXVDSX;
+ load_insn |= VRT(ret) | RB(TCG_REG_TMP1);
+ if (TCG_TARGET_REG_BITS == 64) {
+ new_pool_label(s, val, rel, s->code_ptr, add);
+ } else {
+ new_pool_l2(s, rel, s->code_ptr, add, val, val);
+ }
} else {
- new_pool_l4(s, rel, s->code_ptr, add, val, val, val, val);
+ load_insn = LVX | VRT(ret) | RB(TCG_REG_TMP1);
+ if (TCG_TARGET_REG_BITS == 64) {
+ new_pool_l2(s, rel, s->code_ptr, add, val, val);
+ } else {
+ new_pool_l4(s, rel, s->code_ptr, add, val, val, val, val);
+ }
}
if (USE_REG_TB) {
/* fallthru */
case TCG_TYPE_V64:
tcg_debug_assert(ret >= TCG_REG_V0);
+ if (have_vsx) {
+ tcg_out_mem_long(s, 0, LXSDX, ret, base, offset);
+ break;
+ }
tcg_debug_assert((offset & 7) == 0);
tcg_out_mem_long(s, 0, LVX, ret, base, offset & -16);
if (offset & 8) {
/* fallthru */
case TCG_TYPE_V64:
tcg_debug_assert(arg >= TCG_REG_V0);
+ if (have_vsx) {
+ tcg_out_mem_long(s, 0, STXSDX, arg, base, offset);
+ break;
+ }
tcg_debug_assert((offset & 7) == 0);
if (offset & 8) {
tcg_out_vsldoi(s, TCG_VEC_TMP1, arg, arg, 8);
case INDEX_op_shri_vec:
case INDEX_op_sari_vec:
return vece <= MO_32 ? -1 : 0;
+ case INDEX_op_bitsel_vec:
+ return have_vsx;
default:
return 0;
}
tcg_out32(s, VSPLTW | VRT(dst) | VRB(src) | (1 << 16));
break;
case MO_64:
+ if (have_vsx) {
+ tcg_out32(s, XXPERMDI | VRT(dst) | VRA(src) | VRB(src));
+ break;
+ }
tcg_out_vsldoi(s, TCG_VEC_TMP1, src, src, 8);
tcg_out_vsldoi(s, dst, TCG_VEC_TMP1, src, 8);
break;
tcg_out32(s, VSPLTW | VRT(out) | VRB(out) | (elt << 16));
break;
case MO_64:
+ if (have_vsx) {
+ tcg_out_mem_long(s, 0, LXVDSX, out, base, offset);
+ break;
+ }
tcg_debug_assert((offset & 7) == 0);
tcg_out_mem_long(s, 0, LVX, out, base, offset & -16);
tcg_out_vsldoi(s, TCG_VEC_TMP1, out, out, 8);
}
break;
+ case INDEX_op_bitsel_vec:
+ tcg_out32(s, XXSEL | VRT(a0) | VRC(a1) | VRB(a2) | VRA(args[3]));
+ return;
+
case INDEX_op_dup2_vec:
assert(TCG_TARGET_REG_BITS == 32);
/* With inputs a1 = xLxx, a2 = xHxx */
case INDEX_op_st_vec:
case INDEX_op_dupm_vec:
return &v_r;
+ case INDEX_op_bitsel_vec:
case INDEX_op_ppc_msum_vec:
return &v_v_v_v;
if (hwcap & PPC_FEATURE_HAS_ALTIVEC) {
have_altivec = true;
+ /* We only care about the portion of VSX that overlaps Altivec. */
+ if (hwcap & PPC_FEATURE_HAS_VSX) {
+ have_vsx = true;
+ }
}
tcg_target_available_regs[TCG_TYPE_I32] = 0xffffffff;