tcg/s390x: Use register pair allocation for div and mulu2

author Richard Henderson <richard.henderson@linaro.org>

Mon, 10 Oct 2022 15:41:15 +0000 (08:41 -0700)

committer Richard Henderson <richard.henderson@linaro.org>

Fri, 6 Jan 2023 23:07:03 +0000 (23:07 +0000)
author Richard Henderson <richard.henderson@linaro.org>
Mon, 10 Oct 2022 15:41:15 +0000 (08:41 -0700)
committer Richard Henderson <richard.henderson@linaro.org>
Fri, 6 Jan 2023 23:07:03 +0000 (23:07 +0000)
diff --git a/tcg/s390x/tcg-target-con-set.h b/tcg/s390x/tcg-target-con-set.h

index 426dd92e51a1956e3459636d9dec7518a1564c07..00ba727b70f623f1c95fb6b8948c9b466c480c47 100644 (file)
--- a/tcg/s390x/tcg-target-con-set.h
+++ b/tcg/s390x/tcg-target-con-set.h
@@ -29,8 +29,8 @@ C_O1_I2(v, v, v)
  C_O1_I3(v, v, v, v)
  C_O1_I4(r, r, ri, r, 0)
  C_O1_I4(r, r, ri, rI, 0)
-C_O2_I2(b, a, 0, r)
-C_O2_I3(b, a, 0, 1, r)
+C_O2_I2(o, m, 0, r)
+C_O2_I3(o, m, 0, 1, r)
  C_O2_I4(r, r, 0, 1, rA, r)
  C_O2_I4(r, r, 0, 1, ri, r)
  C_O2_I4(r, r, 0, 1, r, r)
diff --git a/tcg/s390x/tcg-target-con-str.h b/tcg/s390x/tcg-target-con-str.h

index 8bb0358ae51c8db8ba1dd343a65b6307b190b3a7..76446aecae85fdd9f49ab410d93ffe283fbeb33a 100644 (file)
--- a/tcg/s390x/tcg-target-con-str.h
+++ b/tcg/s390x/tcg-target-con-str.h
@@ -11,13 +11,7 @@
  REGS('r', ALL_GENERAL_REGS)
  REGS('L', ALL_GENERAL_REGS & ~SOFTMMU_RESERVE_REGS)
  REGS('v', ALL_VECTOR_REGS)
-/*
- * A (single) even/odd pair for division.
- * TODO: Add something to the register allocator to allow
- * this kind of regno+1 pairing to be done more generally.
- */
-REGS('a', 1u << TCG_REG_R2)
-REGS('b', 1u << TCG_REG_R3)
+REGS('o', 0xaaaa) /* odd numbered general regs */
  
  /*
   * Define constraint letters for constants:
diff --git a/tcg/s390x/tcg-target.c.inc b/tcg/s390x/tcg-target.c.inc

index b9ba7b605e623fcd1e3317e148447c74206d13df..cb00bb69996e30825b9cb9dc53b77036264b5959 100644 (file)
--- a/tcg/s390x/tcg-target.c.inc
+++ b/tcg/s390x/tcg-target.c.inc
@@ -2264,10 +2264,18 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
          break;
  
      case INDEX_op_div2_i32:
-        tcg_out_insn(s, RR, DR, TCG_REG_R2, args[4]);
+        tcg_debug_assert(args[0] == args[2]);
+        tcg_debug_assert(args[1] == args[3]);
+        tcg_debug_assert((args[1] & 1) == 0);
+        tcg_debug_assert(args[0] == args[1] + 1);
+        tcg_out_insn(s, RR, DR, args[1], args[4]);
          break;
      case INDEX_op_divu2_i32:
-        tcg_out_insn(s, RRE, DLR, TCG_REG_R2, args[4]);
+        tcg_debug_assert(args[0] == args[2]);
+        tcg_debug_assert(args[1] == args[3]);
+        tcg_debug_assert((args[1] & 1) == 0);
+        tcg_debug_assert(args[0] == args[1] + 1);
+        tcg_out_insn(s, RRE, DLR, args[1], args[4]);
          break;
  
      case INDEX_op_shl_i32:
@@ -2521,17 +2529,30 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
          break;
  
      case INDEX_op_div2_i64:
-        /* ??? We get an unnecessary sign-extension of the dividend
-           into R3 with this definition, but as we do in fact always
-           produce both quotient and remainder using INDEX_op_div_i64
-           instead requires jumping through even more hoops.  */
-        tcg_out_insn(s, RRE, DSGR, TCG_REG_R2, args[4]);
+        /*
+         * ??? We get an unnecessary sign-extension of the dividend
+         * into op0 with this definition, but as we do in fact always
+         * produce both quotient and remainder using INDEX_op_div_i64
+         * instead requires jumping through even more hoops.
+         */
+        tcg_debug_assert(args[0] == args[2]);
+        tcg_debug_assert(args[1] == args[3]);
+        tcg_debug_assert((args[1] & 1) == 0);
+        tcg_debug_assert(args[0] == args[1] + 1);
+        tcg_out_insn(s, RRE, DSGR, args[1], args[4]);
          break;
      case INDEX_op_divu2_i64:
-        tcg_out_insn(s, RRE, DLGR, TCG_REG_R2, args[4]);
+        tcg_debug_assert(args[0] == args[2]);
+        tcg_debug_assert(args[1] == args[3]);
+        tcg_debug_assert((args[1] & 1) == 0);
+        tcg_debug_assert(args[0] == args[1] + 1);
+        tcg_out_insn(s, RRE, DLGR, args[1], args[4]);
          break;
      case INDEX_op_mulu2_i64:
-        tcg_out_insn(s, RRE, MLGR, TCG_REG_R2, args[3]);
+        tcg_debug_assert(args[0] == args[2]);
+        tcg_debug_assert((args[1] & 1) == 0);
+        tcg_debug_assert(args[0] == args[1] + 1);
+        tcg_out_insn(s, RRE, MLGR, args[1], args[3]);
          break;
  
      case INDEX_op_shl_i64:
@@ -3226,10 +3247,10 @@ static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op)
      case INDEX_op_div2_i64:
      case INDEX_op_divu2_i32:
      case INDEX_op_divu2_i64:
-        return C_O2_I3(b, a, 0, 1, r);
+        return C_O2_I3(o, m, 0, 1, r);
  
      case INDEX_op_mulu2_i64:
-        return C_O2_I2(b, a, 0, r);
+        return C_O2_I2(o, m, 0, r);
  
      case INDEX_op_add2_i32:
      case INDEX_op_sub2_i32:
author	Richard Henderson <richard.henderson@linaro.org>
	Mon, 10 Oct 2022 15:41:15 +0000 (08:41 -0700)
committer	Richard Henderson <richard.henderson@linaro.org>
	Fri, 6 Jan 2023 23:07:03 +0000 (23:07 +0000)
tcg/s390x/tcg-target-con-set.h		patch \| blob \| history
tcg/s390x/tcg-target-con-str.h		patch \| blob \| history
tcg/s390x/tcg-target.c.inc		patch \| blob \| history