tcg/ppc: Update vector support for VSX

author Richard Henderson <richard.henderson@linaro.org>

Sun, 23 Jun 2019 17:04:47 +0000 (19:04 +0200)

committer Richard Henderson <richard.henderson@linaro.org>

Mon, 14 Oct 2019 14:10:20 +0000 (07:10 -0700)
author Richard Henderson <richard.henderson@linaro.org>
Sun, 23 Jun 2019 17:04:47 +0000 (19:04 +0200)
committer Richard Henderson <richard.henderson@linaro.org>
Mon, 14 Oct 2019 14:10:20 +0000 (07:10 -0700)
diff --git a/tcg/ppc/tcg-target.h b/tcg/ppc/tcg-target.h

index f50b7f4bac02deaf6ecfeba0741d26c1f40daa29..c974ca274a997ef4d1db588038deb54da8df2a2f 100644 (file)
--- a/tcg/ppc/tcg-target.h
+++ b/tcg/ppc/tcg-target.h
@@ -66,6 +66,7 @@ typedef enum {
  
  extern TCGPowerISA have_isa;
  extern bool have_altivec;
+extern bool have_vsx;
  
  #define have_isa_2_06  (have_isa >= tcg_isa_2_06)
  #define have_isa_3_00  (have_isa >= tcg_isa_3_00)
@@ -149,7 +150,7 @@ extern bool have_altivec;
   * instruction and substituting two 32-bit stores makes the generated
   * code quite large.
   */
-#define TCG_TARGET_HAS_v64              0
+#define TCG_TARGET_HAS_v64              have_vsx
  #define TCG_TARGET_HAS_v128             have_altivec
  #define TCG_TARGET_HAS_v256             0
  
@@ -165,7 +166,7 @@ extern bool have_altivec;
  #define TCG_TARGET_HAS_mul_vec          1
  #define TCG_TARGET_HAS_sat_vec          1
  #define TCG_TARGET_HAS_minmax_vec       1
-#define TCG_TARGET_HAS_bitsel_vec       0
+#define TCG_TARGET_HAS_bitsel_vec       have_vsx
  #define TCG_TARGET_HAS_cmpsel_vec       0
  
  void flush_icache_range(uintptr_t start, uintptr_t stop);
diff --git a/tcg/ppc/tcg-target.inc.c b/tcg/ppc/tcg-target.inc.c

index d739f4b6053b3afa91083bfe317353a213b0b68e..238895840557253dcca91f8aff59cb50fa35c0b1 100644 (file)
--- a/tcg/ppc/tcg-target.inc.c
+++ b/tcg/ppc/tcg-target.inc.c
@@ -67,6 +67,7 @@ static tcg_insn_unit *tb_ret_addr;
  TCGPowerISA have_isa;
  static bool have_isel;
  bool have_altivec;
+bool have_vsx;
  
  #ifndef CONFIG_SOFTMMU
  #define TCG_GUEST_BASE_REG 30
@@ -467,9 +468,12 @@ static int tcg_target_const_match(tcg_target_long val, TCGType type,
  #define LVEBX      XO31(7)
  #define LVEHX      XO31(39)
  #define LVEWX      XO31(71)
+#define LXSDX      (XO31(588) | 1)  /* v2.06, force tx=1 */
+#define LXVDSX     (XO31(332) | 1)  /* v2.06, force tx=1 */
  
  #define STVX       XO31(231)
  #define STVEWX     XO31(199)
+#define STXSDX     (XO31(716) | 1)  /* v2.06, force sx=1 */
  
  #define VADDSBS    VX4(768)
  #define VADDUBS    VX4(512)
@@ -558,6 +562,9 @@ static int tcg_target_const_match(tcg_target_long val, TCGType type,
  
  #define VSLDOI     VX4(44)
  
+#define XXPERMDI   (OPCD(60) | (10 << 3) | 7)  /* v2.06, force ax=bx=tx=1 */
+#define XXSEL      (OPCD(60) | (3 << 4) | 0xf) /* v2.06, force ax=bx=cx=tx=1 */
+
  #define RT(r) ((r)<<21)
  #define RS(r) ((r)<<21)
  #define RA(r) ((r)<<16)
@@ -884,11 +891,21 @@ static void tcg_out_dupi_vec(TCGContext *s, TCGType type, TCGReg ret,
          add = 0;
      }
  
-    load_insn = LVX | VRT(ret) | RB(TCG_REG_TMP1);
-    if (TCG_TARGET_REG_BITS == 64) {
-        new_pool_l2(s, rel, s->code_ptr, add, val, val);
+    if (have_vsx) {
+        load_insn = type == TCG_TYPE_V64 ? LXSDX : LXVDSX;
+        load_insn |= VRT(ret) | RB(TCG_REG_TMP1);
+        if (TCG_TARGET_REG_BITS == 64) {
+            new_pool_label(s, val, rel, s->code_ptr, add);
+        } else {
+            new_pool_l2(s, rel, s->code_ptr, add, val, val);
+        }
      } else {
-        new_pool_l4(s, rel, s->code_ptr, add, val, val, val, val);
+        load_insn = LVX | VRT(ret) | RB(TCG_REG_TMP1);
+        if (TCG_TARGET_REG_BITS == 64) {
+            new_pool_l2(s, rel, s->code_ptr, add, val, val);
+        } else {
+            new_pool_l4(s, rel, s->code_ptr, add, val, val, val, val);
+        }
      }
  
      if (USE_REG_TB) {
@@ -1136,6 +1153,10 @@ static void tcg_out_ld(TCGContext *s, TCGType type, TCGReg ret,
          /* fallthru */
      case TCG_TYPE_V64:
          tcg_debug_assert(ret >= TCG_REG_V0);
+        if (have_vsx) {
+            tcg_out_mem_long(s, 0, LXSDX, ret, base, offset);
+            break;
+        }
          tcg_debug_assert((offset & 7) == 0);
          tcg_out_mem_long(s, 0, LVX, ret, base, offset & -16);
          if (offset & 8) {
@@ -1180,6 +1201,10 @@ static void tcg_out_st(TCGContext *s, TCGType type, TCGReg arg,
          /* fallthru */
      case TCG_TYPE_V64:
          tcg_debug_assert(arg >= TCG_REG_V0);
+        if (have_vsx) {
+            tcg_out_mem_long(s, 0, STXSDX, arg, base, offset);
+            break;
+        }
          tcg_debug_assert((offset & 7) == 0);
          if (offset & 8) {
              tcg_out_vsldoi(s, TCG_VEC_TMP1, arg, arg, 8);
@@ -2899,6 +2924,8 @@ int tcg_can_emit_vec_op(TCGOpcode opc, TCGType type, unsigned vece)
      case INDEX_op_shri_vec:
      case INDEX_op_sari_vec:
          return vece <= MO_32 ? -1 : 0;
+    case INDEX_op_bitsel_vec:
+        return have_vsx;
      default:
          return 0;
      }
@@ -2925,6 +2952,10 @@ static bool tcg_out_dup_vec(TCGContext *s, TCGType type, unsigned vece,
          tcg_out32(s, VSPLTW | VRT(dst) | VRB(src) | (1 << 16));
          break;
      case MO_64:
+        if (have_vsx) {
+            tcg_out32(s, XXPERMDI | VRT(dst) | VRA(src) | VRB(src));
+            break;
+        }
          tcg_out_vsldoi(s, TCG_VEC_TMP1, src, src, 8);
          tcg_out_vsldoi(s, dst, TCG_VEC_TMP1, src, 8);
          break;
@@ -2968,6 +2999,10 @@ static bool tcg_out_dupm_vec(TCGContext *s, TCGType type, unsigned vece,
          tcg_out32(s, VSPLTW | VRT(out) | VRB(out) | (elt << 16));
          break;
      case MO_64:
+        if (have_vsx) {
+            tcg_out_mem_long(s, 0, LXVDSX, out, base, offset);
+            break;
+        }
          tcg_debug_assert((offset & 7) == 0);
          tcg_out_mem_long(s, 0, LVX, out, base, offset & -16);
          tcg_out_vsldoi(s, TCG_VEC_TMP1, out, out, 8);
@@ -3102,6 +3137,10 @@ static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
          }
          break;
  
+    case INDEX_op_bitsel_vec:
+        tcg_out32(s, XXSEL | VRT(a0) | VRC(a1) | VRB(a2) | VRA(args[3]));
+        return;
+
      case INDEX_op_dup2_vec:
          assert(TCG_TARGET_REG_BITS == 32);
          /* With inputs a1 = xLxx, a2 = xHxx  */
@@ -3497,6 +3536,7 @@ static const TCGTargetOpDef *tcg_target_op_def(TCGOpcode op)
      case INDEX_op_st_vec:
      case INDEX_op_dupm_vec:
          return &v_r;
+    case INDEX_op_bitsel_vec:
      case INDEX_op_ppc_msum_vec:
          return &v_v_v_v;
  
@@ -3530,6 +3570,10 @@ static void tcg_target_init(TCGContext *s)
  
      if (hwcap & PPC_FEATURE_HAS_ALTIVEC) {
          have_altivec = true;
+        /* We only care about the portion of VSX that overlaps Altivec. */
+        if (hwcap & PPC_FEATURE_HAS_VSX) {
+            have_vsx = true;
+        }
      }
  
      tcg_target_available_regs[TCG_TYPE_I32] = 0xffffffff;
author	Richard Henderson <richard.henderson@linaro.org>
	Sun, 23 Jun 2019 17:04:47 +0000 (19:04 +0200)
committer	Richard Henderson <richard.henderson@linaro.org>
	Mon, 14 Oct 2019 14:10:20 +0000 (07:10 -0700)
tcg/ppc/tcg-target.h		patch \| blob \| history
tcg/ppc/tcg-target.inc.c		patch \| blob \| history