return v;
}
+/* Clear the bits above an N-bit vector, for N = (is_q ? 128 : 64).
+ * If SVE is not enabled, then there are only 128 bits in the vector.
+ */
+static void clear_vec_high(DisasContext *s, bool is_q, int rd)
+{
+ unsigned ofs = fp_reg_offset(s, rd, MO_64);
+ unsigned vsz = vec_full_reg_size(s);
+
+ if (!is_q) {
+ TCGv_i64 tcg_zero = tcg_const_i64(0);
+ tcg_gen_st_i64(tcg_zero, cpu_env, ofs + 8);
+ tcg_temp_free_i64(tcg_zero);
+ }
+ if (vsz > 16) {
+ tcg_gen_gvec_dup8i(ofs + 16, vsz - 16, vsz - 16, 0);
+ }
+}
+
static void write_fp_dreg(DisasContext *s, int reg, TCGv_i64 v)
{
- TCGv_i64 tcg_zero = tcg_const_i64(0);
+ unsigned ofs = fp_reg_offset(s, reg, MO_64);
- tcg_gen_st_i64(v, cpu_env, fp_reg_offset(s, reg, MO_64));
- tcg_gen_st_i64(tcg_zero, cpu_env, fp_reg_hi_offset(s, reg));
- tcg_temp_free_i64(tcg_zero);
+ tcg_gen_st_i64(v, cpu_env, ofs);
+ clear_vec_high(s, false, reg);
}
static void write_fp_sreg(DisasContext *s, int reg, TCGv_i32 v)
tcg_temp_free_i64(tmplo);
tcg_temp_free_i64(tmphi);
+
+ clear_vec_high(s, true, destidx);
}
/*
}
}
-/* Clear the high 64 bits of a 128 bit vector (in general non-quad
- * vector ops all need to do this).
- */
-static void clear_vec_high(DisasContext *s, int rd)
-{
- TCGv_i64 tcg_zero = tcg_const_i64(0);
-
- write_vec_element(s, tcg_zero, rd, 1, MO_64);
- tcg_temp_free_i64(tcg_zero);
-}
-
/* Store from vector register to memory */
static void do_vec_st(DisasContext *s, int srcidx, int element,
TCGv_i64 tcg_addr, int size)
/* For non-quad operations, setting a slice of the low
* 64 bits of the register clears the high 64 bits (in
* the ARM ARM pseudocode this is implicit in the fact
- * that 'rval' is a 64 bit wide variable). We optimize
- * by noticing that we only need to do this the first
- * time we touch a register.
+ * that 'rval' is a 64 bit wide variable).
+ * For quad operations, we might still need to zero the
+ * high bits of SVE. We optimize by noticing that we only
+ * need to do this the first time we touch a register.
*/
- if (!is_q && e == 0 && (r == 0 || xs == selem - 1)) {
- clear_vec_high(s, tt);
+ if (e == 0 && (r == 0 || xs == selem - 1)) {
+ clear_vec_high(s, is_q, tt);
}
}
tcg_gen_addi_i64(tcg_addr, tcg_addr, ebytes);
write_vec_element(s, tcg_tmp, rt, 0, MO_64);
if (is_q) {
write_vec_element(s, tcg_tmp, rt, 1, MO_64);
- } else {
- clear_vec_high(s, rt);
}
tcg_temp_free_i64(tcg_tmp);
+ clear_vec_high(s, is_q, rt);
} else {
/* Load/store one element per register */
if (is_load) {
}
if (!is_q) {
- clear_vec_high(s, rd);
write_vec_element(s, tcg_final, rd, 0, MO_64);
} else {
write_vec_element(s, tcg_final, rd, 1, MO_64);
tcg_temp_free_i64(tcg_rd);
tcg_temp_free_i32(tcg_rd_narrowed);
tcg_temp_free_i64(tcg_final);
- return;
+
+ clear_vec_high(s, is_q, rd);
}
/* SQSHLU, UQSHL, SQSHL: saturating left shifts */
tcg_temp_free_i64(tcg_op);
}
tcg_temp_free_i64(tcg_shift);
-
- if (!is_q) {
- clear_vec_high(s, rd);
- }
+ clear_vec_high(s, is_q, rd);
} else {
TCGv_i32 tcg_shift = tcg_const_i32(shift);
static NeonGenTwoOpEnvFn * const fns[2][2][3] = {
}
tcg_temp_free_i32(tcg_shift);
- if (!is_q && !scalar) {
- clear_vec_high(s, rd);
+ if (!scalar) {
+ clear_vec_high(s, is_q, rd);
}
}
}
}
}
- if (!is_double && elements == 2) {
- clear_vec_high(s, rd);
- }
-
tcg_temp_free_i64(tcg_int);
tcg_temp_free_ptr(tcg_fpst);
tcg_temp_free_i32(tcg_shift);
+
+ clear_vec_high(s, elements << size == 16, rd);
}
/* UCVTF/SCVTF - Integer to FP conversion */
write_vec_element(s, tcg_op, rd, pass, MO_64);
tcg_temp_free_i64(tcg_op);
}
- if (!is_q) {
- clear_vec_high(s, rd);
- }
+ clear_vec_high(s, is_q, rd);
} else {
int maxpass = is_scalar ? 1 : is_q ? 4 : 2;
for (pass = 0; pass < maxpass; pass++) {
}
tcg_temp_free_i32(tcg_op);
}
- if (!is_q && !is_scalar) {
- clear_vec_high(s, rd);
+ if (!is_scalar) {
+ clear_vec_high(s, is_q, rd);
}
}
tcg_temp_free_ptr(fpst);
- if ((elements << size) < 4) {
- /* scalar, or non-quad vector op */
- clear_vec_high(s, rd);
- }
+ clear_vec_high(s, elements * (size ? 8 : 4) > 8, rd);
}
/* AdvSIMD scalar three same
}
write_vec_element(s, tcg_res, rd, pass, MO_64);
}
- if (is_scalar) {
- clear_vec_high(s, rd);
- }
-
tcg_temp_free_i64(tcg_res);
tcg_temp_free_i64(tcg_zero);
tcg_temp_free_i64(tcg_op);
+
+ clear_vec_high(s, !is_scalar, rd);
} else {
TCGv_i32 tcg_op = tcg_temp_new_i32();
TCGv_i32 tcg_zero = tcg_const_i32(0);
tcg_temp_free_i32(tcg_res);
tcg_temp_free_i32(tcg_zero);
tcg_temp_free_i32(tcg_op);
- if (!is_q && !is_scalar) {
- clear_vec_high(s, rd);
+ if (!is_scalar) {
+ clear_vec_high(s, is_q, rd);
}
}
}
write_vec_element(s, tcg_res, rd, pass, MO_64);
}
- if (is_scalar) {
- clear_vec_high(s, rd);
- }
-
tcg_temp_free_i64(tcg_res);
tcg_temp_free_i64(tcg_op);
+ clear_vec_high(s, !is_scalar, rd);
} else {
TCGv_i32 tcg_op = tcg_temp_new_i32();
TCGv_i32 tcg_res = tcg_temp_new_i32();
}
tcg_temp_free_i32(tcg_res);
tcg_temp_free_i32(tcg_op);
- if (!is_q && !is_scalar) {
- clear_vec_high(s, rd);
+ if (!is_scalar) {
+ clear_vec_high(s, is_q, rd);
}
}
tcg_temp_free_ptr(fpst);
write_vec_element_i32(s, tcg_res[pass], rd, destelt + pass, MO_32);
tcg_temp_free_i32(tcg_res[pass]);
}
- if (!is_q) {
- clear_vec_high(s, rd);
- }
+ clear_vec_high(s, is_q, rd);
}
/* Remaining saturating accumulating ops */
}
write_vec_element(s, tcg_rd, rd, pass, MO_64);
}
- if (is_scalar) {
- clear_vec_high(s, rd);
- }
-
tcg_temp_free_i64(tcg_rd);
tcg_temp_free_i64(tcg_rn);
+ clear_vec_high(s, !is_scalar, rd);
} else {
TCGv_i32 tcg_rn = tcg_temp_new_i32();
TCGv_i32 tcg_rd = tcg_temp_new_i32();
}
write_vec_element_i32(s, tcg_rd, rd, pass, MO_32);
}
-
- if (!is_q) {
- clear_vec_high(s, rd);
- }
-
tcg_temp_free_i32(tcg_rd);
tcg_temp_free_i32(tcg_rn);
+ clear_vec_high(s, is_q, rd);
}
}
tcg_temp_free_i64(tcg_round);
done:
- if (!is_q) {
- clear_vec_high(s, rd);
- }
+ clear_vec_high(s, is_q, rd);
}
static void gen_shl8_ins_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
}
if (!is_q) {
- clear_vec_high(s, rd);
write_vec_element(s, tcg_final, rd, 0, MO_64);
} else {
write_vec_element(s, tcg_final, rd, 1, MO_64);
}
-
if (round) {
tcg_temp_free_i64(tcg_round);
}
tcg_temp_free_i64(tcg_rn);
tcg_temp_free_i64(tcg_rd);
tcg_temp_free_i64(tcg_final);
- return;
+
+ clear_vec_high(s, is_q, rd);
}
write_vec_element_i32(s, tcg_res[pass], rd, pass + part, MO_32);
tcg_temp_free_i32(tcg_res[pass]);
}
- if (!is_q) {
- clear_vec_high(s, rd);
- }
+ clear_vec_high(s, is_q, rd);
}
static void handle_pmull_64(DisasContext *s, int is_q, int rd, int rn, int rm)
write_vec_element_i32(s, tcg_res[pass], rd, pass, MO_32);
tcg_temp_free_i32(tcg_res[pass]);
}
- if (!is_q) {
- clear_vec_high(s, rd);
- }
+ clear_vec_high(s, is_q, rd);
}
if (fpst) {
tcg_temp_free_i32(tcg_op2);
}
}
-
- if (!is_q) {
- clear_vec_high(s, rd);
- }
+ clear_vec_high(s, is_q, rd);
}
/* AdvSIMD three same
write_vec_element(s, tcg_tmp, rd, i, grp_size);
tcg_temp_free_i64(tcg_tmp);
}
- if (!is_q) {
- clear_vec_high(s, rd);
- }
+ clear_vec_high(s, is_q, rd);
} else {
int revmask = (1 << grp_size) - 1;
int esize = 8 << size;
tcg_temp_free_i32(tcg_op);
}
}
- if (!is_q) {
- clear_vec_high(s, rd);
- }
+ clear_vec_high(s, is_q, rd);
if (need_rmode) {
gen_helper_set_rmode(tcg_rmode, tcg_rmode, cpu_env);
tcg_temp_free_i64(tcg_res);
}
- if (is_scalar) {
- clear_vec_high(s, rd);
- }
-
tcg_temp_free_i64(tcg_idx);
+ clear_vec_high(s, !is_scalar, rd);
} else if (!is_long) {
/* 32 bit floating point, or 16 or 32 bit integer.
* For the 16 bit scalar case we use the usual Neon helpers and
}
tcg_temp_free_i32(tcg_idx);
-
- if (!is_q) {
- clear_vec_high(s, rd);
- }
+ clear_vec_high(s, is_q, rd);
} else {
/* long ops: 16x16->32 or 32x32->64 */
TCGv_i64 tcg_res[2];
}
tcg_temp_free_i64(tcg_idx);
- if (is_scalar) {
- clear_vec_high(s, rd);
- }
+ clear_vec_high(s, !is_scalar, rd);
} else {
TCGv_i32 tcg_idx = tcg_temp_new_i32();