From: Song Gao Date: Tue, 2 Jan 2024 02:02:00 +0000 (+0800) Subject: target/loongarch: move translate modules to tcg/ X-Git-Url: http://git.maquefel.me/?a=commitdiff_plain;h=5c23704e4725f935b3171787f00e9922a7fc58cb;p=qemu.git target/loongarch: move translate modules to tcg/ Introduce the target/loongarch/tcg directory. Its purpose is to hold the TCG code that is selected by CONFIG_TCG Reviewed-by: Philippe Mathieu-Daudé Signed-off-by: Song Gao Message-Id: <20240102020200.3462097-2-gaosong@loongson.cn> --- diff --git a/target/loongarch/constant_timer.c b/target/loongarch/constant_timer.c deleted file mode 100644 index 1851f53fd6..0000000000 --- a/target/loongarch/constant_timer.c +++ /dev/null @@ -1,64 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-or-later */ -/* - * QEMU LoongArch constant timer support - * - * Copyright (c) 2021 Loongson Technology Corporation Limited - */ - -#include "qemu/osdep.h" -#include "qemu/timer.h" -#include "cpu.h" -#include "internals.h" -#include "cpu-csr.h" - -#define TIMER_PERIOD 10 /* 10 ns period for 100 MHz frequency */ -#define CONSTANT_TIMER_TICK_MASK 0xfffffffffffcUL -#define CONSTANT_TIMER_ENABLE 0x1UL - -uint64_t cpu_loongarch_get_constant_timer_counter(LoongArchCPU *cpu) -{ - return qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) / TIMER_PERIOD; -} - -uint64_t cpu_loongarch_get_constant_timer_ticks(LoongArchCPU *cpu) -{ - uint64_t now, expire; - - now = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL); - expire = timer_expire_time_ns(&cpu->timer); - - return (expire - now) / TIMER_PERIOD; -} - -void cpu_loongarch_store_constant_timer_config(LoongArchCPU *cpu, - uint64_t value) -{ - CPULoongArchState *env = &cpu->env; - uint64_t now, next; - - env->CSR_TCFG = value; - if (value & CONSTANT_TIMER_ENABLE) { - now = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL); - next = now + (value & CONSTANT_TIMER_TICK_MASK) * TIMER_PERIOD; - timer_mod(&cpu->timer, next); - } else { - timer_del(&cpu->timer); - } -} - -void loongarch_constant_timer_cb(void *opaque) -{ - LoongArchCPU *cpu = opaque; - CPULoongArchState *env = &cpu->env; - uint64_t now, next; - - if (FIELD_EX64(env->CSR_TCFG, CSR_TCFG, PERIODIC)) { - now = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL); - next = now + (env->CSR_TCFG & CONSTANT_TIMER_TICK_MASK) * TIMER_PERIOD; - timer_mod(&cpu->timer, next); - } else { - env->CSR_TCFG = FIELD_DP64(env->CSR_TCFG, CSR_TCFG, EN, 0); - } - - loongarch_cpu_set_irq(opaque, IRQ_TIMER, 1); -} diff --git a/target/loongarch/csr_helper.c b/target/loongarch/csr_helper.c deleted file mode 100644 index 55341551a5..0000000000 --- a/target/loongarch/csr_helper.c +++ /dev/null @@ -1,97 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-or-later */ -/* - * LoongArch emulation helpers for CSRs - * - * Copyright (c) 2021 Loongson Technology Corporation Limited - */ - -#include "qemu/osdep.h" -#include "qemu/main-loop.h" -#include "cpu.h" -#include "internals.h" -#include "qemu/host-utils.h" -#include "exec/helper-proto.h" -#include "exec/exec-all.h" -#include "exec/cpu_ldst.h" -#include "hw/irq.h" -#include "cpu-csr.h" - -target_ulong helper_csrrd_pgd(CPULoongArchState *env) -{ - int64_t v; - - if (env->CSR_TLBRERA & 0x1) { - v = env->CSR_TLBRBADV; - } else { - v = env->CSR_BADV; - } - - if ((v >> 63) & 0x1) { - v = env->CSR_PGDH; - } else { - v = env->CSR_PGDL; - } - - return v; -} - -target_ulong helper_csrrd_cpuid(CPULoongArchState *env) -{ - LoongArchCPU *lac = env_archcpu(env); - - env->CSR_CPUID = CPU(lac)->cpu_index; - - return env->CSR_CPUID; -} - -target_ulong helper_csrrd_tval(CPULoongArchState *env) -{ - LoongArchCPU *cpu = env_archcpu(env); - - return cpu_loongarch_get_constant_timer_ticks(cpu); -} - -target_ulong helper_csrwr_estat(CPULoongArchState *env, target_ulong val) -{ - int64_t old_v = env->CSR_ESTAT; - - /* Only IS[1:0] can be written */ - env->CSR_ESTAT = deposit64(env->CSR_ESTAT, 0, 2, val); - - return old_v; -} - -target_ulong helper_csrwr_asid(CPULoongArchState *env, target_ulong val) -{ - int64_t old_v = env->CSR_ASID; - - /* Only ASID filed of CSR_ASID can be written */ - env->CSR_ASID = deposit64(env->CSR_ASID, 0, 10, val); - if (old_v != env->CSR_ASID) { - tlb_flush(env_cpu(env)); - } - return old_v; -} - -target_ulong helper_csrwr_tcfg(CPULoongArchState *env, target_ulong val) -{ - LoongArchCPU *cpu = env_archcpu(env); - int64_t old_v = env->CSR_TCFG; - - cpu_loongarch_store_constant_timer_config(cpu, val); - - return old_v; -} - -target_ulong helper_csrwr_ticlr(CPULoongArchState *env, target_ulong val) -{ - LoongArchCPU *cpu = env_archcpu(env); - int64_t old_v = 0; - - if (val & 0x1) { - qemu_mutex_lock_iothread(); - loongarch_cpu_set_irq(cpu, IRQ_TIMER, 0); - qemu_mutex_unlock_iothread(); - } - return old_v; -} diff --git a/target/loongarch/fpu_helper.c b/target/loongarch/fpu_helper.c deleted file mode 100644 index f6753c5875..0000000000 --- a/target/loongarch/fpu_helper.c +++ /dev/null @@ -1,879 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-or-later */ -/* - * LoongArch float point emulation helpers for QEMU - * - * Copyright (c) 2021 Loongson Technology Corporation Limited - */ - -#include "qemu/osdep.h" -#include "cpu.h" -#include "exec/helper-proto.h" -#include "exec/exec-all.h" -#include "exec/cpu_ldst.h" -#include "fpu/softfloat.h" -#include "internals.h" - -static inline uint64_t nanbox_s(float32 fp) -{ - return fp | MAKE_64BIT_MASK(32, 32); -} - -/* Convert loongarch rounding mode in fcsr0 to IEEE library */ -static const FloatRoundMode ieee_rm[4] = { - float_round_nearest_even, - float_round_to_zero, - float_round_up, - float_round_down -}; - -void restore_fp_status(CPULoongArchState *env) -{ - set_float_rounding_mode(ieee_rm[(env->fcsr0 >> FCSR0_RM) & 0x3], - &env->fp_status); - set_flush_to_zero(0, &env->fp_status); -} - -int ieee_ex_to_loongarch(int xcpt) -{ - int ret = 0; - if (xcpt & float_flag_invalid) { - ret |= FP_INVALID; - } - if (xcpt & float_flag_overflow) { - ret |= FP_OVERFLOW; - } - if (xcpt & float_flag_underflow) { - ret |= FP_UNDERFLOW; - } - if (xcpt & float_flag_divbyzero) { - ret |= FP_DIV0; - } - if (xcpt & float_flag_inexact) { - ret |= FP_INEXACT; - } - return ret; -} - -static void update_fcsr0_mask(CPULoongArchState *env, uintptr_t pc, int mask) -{ - int flags = get_float_exception_flags(&env->fp_status); - - set_float_exception_flags(0, &env->fp_status); - - flags &= ~mask; - - if (!flags) { - SET_FP_CAUSE(env->fcsr0, flags); - return; - } else { - flags = ieee_ex_to_loongarch(flags); - SET_FP_CAUSE(env->fcsr0, flags); - } - - if (GET_FP_ENABLES(env->fcsr0) & flags) { - do_raise_exception(env, EXCCODE_FPE, pc); - } else { - UPDATE_FP_FLAGS(env->fcsr0, flags); - } -} - -static void update_fcsr0(CPULoongArchState *env, uintptr_t pc) -{ - update_fcsr0_mask(env, pc, 0); -} - -uint64_t helper_fadd_s(CPULoongArchState *env, uint64_t fj, uint64_t fk) -{ - uint64_t fd; - - fd = nanbox_s(float32_add((uint32_t)fj, (uint32_t)fk, &env->fp_status)); - update_fcsr0(env, GETPC()); - return fd; -} - -uint64_t helper_fadd_d(CPULoongArchState *env, uint64_t fj, uint64_t fk) -{ - uint64_t fd; - - fd = float64_add(fj, fk, &env->fp_status); - update_fcsr0(env, GETPC()); - return fd; -} - -uint64_t helper_fsub_s(CPULoongArchState *env, uint64_t fj, uint64_t fk) -{ - uint64_t fd; - - fd = nanbox_s(float32_sub((uint32_t)fj, (uint32_t)fk, &env->fp_status)); - update_fcsr0(env, GETPC()); - return fd; -} - -uint64_t helper_fsub_d(CPULoongArchState *env, uint64_t fj, uint64_t fk) -{ - uint64_t fd; - - fd = float64_sub(fj, fk, &env->fp_status); - update_fcsr0(env, GETPC()); - return fd; -} - -uint64_t helper_fmul_s(CPULoongArchState *env, uint64_t fj, uint64_t fk) -{ - uint64_t fd; - - fd = nanbox_s(float32_mul((uint32_t)fj, (uint32_t)fk, &env->fp_status)); - update_fcsr0(env, GETPC()); - return fd; -} - -uint64_t helper_fmul_d(CPULoongArchState *env, uint64_t fj, uint64_t fk) -{ - uint64_t fd; - - fd = float64_mul(fj, fk, &env->fp_status); - update_fcsr0(env, GETPC()); - return fd; -} - -uint64_t helper_fdiv_s(CPULoongArchState *env, uint64_t fj, uint64_t fk) -{ - uint64_t fd; - - fd = nanbox_s(float32_div((uint32_t)fj, (uint32_t)fk, &env->fp_status)); - update_fcsr0(env, GETPC()); - return fd; -} - -uint64_t helper_fdiv_d(CPULoongArchState *env, uint64_t fj, uint64_t fk) -{ - uint64_t fd; - - fd = float64_div(fj, fk, &env->fp_status); - update_fcsr0(env, GETPC()); - return fd; -} - -uint64_t helper_fmax_s(CPULoongArchState *env, uint64_t fj, uint64_t fk) -{ - uint64_t fd; - - fd = nanbox_s(float32_maxnum((uint32_t)fj, (uint32_t)fk, &env->fp_status)); - update_fcsr0(env, GETPC()); - return fd; -} - -uint64_t helper_fmax_d(CPULoongArchState *env, uint64_t fj, uint64_t fk) -{ - uint64_t fd; - - fd = float64_maxnum(fj, fk, &env->fp_status); - update_fcsr0(env, GETPC()); - return fd; -} - -uint64_t helper_fmin_s(CPULoongArchState *env, uint64_t fj, uint64_t fk) -{ - uint64_t fd; - - fd = nanbox_s(float32_minnum((uint32_t)fj, (uint32_t)fk, &env->fp_status)); - update_fcsr0(env, GETPC()); - return fd; -} - -uint64_t helper_fmin_d(CPULoongArchState *env, uint64_t fj, uint64_t fk) -{ - uint64_t fd; - - fd = float64_minnum(fj, fk, &env->fp_status); - update_fcsr0(env, GETPC()); - return fd; -} - -uint64_t helper_fmaxa_s(CPULoongArchState *env, uint64_t fj, uint64_t fk) -{ - uint64_t fd; - - fd = nanbox_s(float32_maxnummag((uint32_t)fj, - (uint32_t)fk, &env->fp_status)); - update_fcsr0(env, GETPC()); - return fd; -} - -uint64_t helper_fmaxa_d(CPULoongArchState *env, uint64_t fj, uint64_t fk) -{ - uint64_t fd; - - fd = float64_maxnummag(fj, fk, &env->fp_status); - update_fcsr0(env, GETPC()); - return fd; -} - -uint64_t helper_fmina_s(CPULoongArchState *env, uint64_t fj, uint64_t fk) -{ - uint64_t fd; - - fd = nanbox_s(float32_minnummag((uint32_t)fj, - (uint32_t)fk, &env->fp_status)); - update_fcsr0(env, GETPC()); - return fd; -} - -uint64_t helper_fmina_d(CPULoongArchState *env, uint64_t fj, uint64_t fk) -{ - uint64_t fd; - - fd = float64_minnummag(fj, fk, &env->fp_status); - update_fcsr0(env, GETPC()); - return fd; -} - -uint64_t helper_fscaleb_s(CPULoongArchState *env, uint64_t fj, uint64_t fk) -{ - uint64_t fd; - int32_t n = (int32_t)fk; - - fd = nanbox_s(float32_scalbn((uint32_t)fj, - n > 0x200 ? 0x200 : - n < -0x200 ? -0x200 : n, - &env->fp_status)); - update_fcsr0(env, GETPC()); - return fd; -} - -uint64_t helper_fscaleb_d(CPULoongArchState *env, uint64_t fj, uint64_t fk) -{ - uint64_t fd; - int64_t n = (int64_t)fk; - - fd = float64_scalbn(fj, - n > 0x1000 ? 0x1000 : - n < -0x1000 ? -0x1000 : n, - &env->fp_status); - update_fcsr0(env, GETPC()); - return fd; -} - -uint64_t helper_fsqrt_s(CPULoongArchState *env, uint64_t fj) -{ - uint64_t fd; - - fd = nanbox_s(float32_sqrt((uint32_t)fj, &env->fp_status)); - update_fcsr0(env, GETPC()); - return fd; -} - -uint64_t helper_fsqrt_d(CPULoongArchState *env, uint64_t fj) -{ - uint64_t fd; - - fd = float64_sqrt(fj, &env->fp_status); - update_fcsr0(env, GETPC()); - return fd; -} - -uint64_t helper_frecip_s(CPULoongArchState *env, uint64_t fj) -{ - uint64_t fd; - - fd = nanbox_s(float32_div(float32_one, (uint32_t)fj, &env->fp_status)); - update_fcsr0(env, GETPC()); - return fd; -} - -uint64_t helper_frecip_d(CPULoongArchState *env, uint64_t fj) -{ - uint64_t fd; - - fd = float64_div(float64_one, fj, &env->fp_status); - update_fcsr0(env, GETPC()); - return fd; -} - -uint64_t helper_frsqrt_s(CPULoongArchState *env, uint64_t fj) -{ - uint64_t fd; - uint32_t fp; - - fp = float32_sqrt((uint32_t)fj, &env->fp_status); - fd = nanbox_s(float32_div(float32_one, fp, &env->fp_status)); - update_fcsr0(env, GETPC()); - return fd; -} - -uint64_t helper_frsqrt_d(CPULoongArchState *env, uint64_t fj) -{ - uint64_t fp, fd; - - fp = float64_sqrt(fj, &env->fp_status); - fd = float64_div(float64_one, fp, &env->fp_status); - update_fcsr0(env, GETPC()); - return fd; -} - -uint64_t helper_flogb_s(CPULoongArchState *env, uint64_t fj) -{ - uint64_t fd; - uint32_t fp; - float_status *status = &env->fp_status; - FloatRoundMode old_mode = get_float_rounding_mode(status); - - set_float_rounding_mode(float_round_down, status); - fp = float32_log2((uint32_t)fj, status); - fd = nanbox_s(float32_round_to_int(fp, status)); - set_float_rounding_mode(old_mode, status); - update_fcsr0_mask(env, GETPC(), float_flag_inexact); - return fd; -} - -uint64_t helper_flogb_d(CPULoongArchState *env, uint64_t fj) -{ - uint64_t fd; - float_status *status = &env->fp_status; - FloatRoundMode old_mode = get_float_rounding_mode(status); - - set_float_rounding_mode(float_round_down, status); - fd = float64_log2(fj, status); - fd = float64_round_to_int(fd, status); - set_float_rounding_mode(old_mode, status); - update_fcsr0_mask(env, GETPC(), float_flag_inexact); - return fd; -} - -uint64_t helper_fclass_s(CPULoongArchState *env, uint64_t fj) -{ - float32 f = fj; - bool sign = float32_is_neg(f); - - if (float32_is_infinity(f)) { - return sign ? 1 << 2 : 1 << 6; - } else if (float32_is_zero(f)) { - return sign ? 1 << 5 : 1 << 9; - } else if (float32_is_zero_or_denormal(f)) { - return sign ? 1 << 4 : 1 << 8; - } else if (float32_is_any_nan(f)) { - float_status s = { }; /* for snan_bit_is_one */ - return float32_is_quiet_nan(f, &s) ? 1 << 1 : 1 << 0; - } else { - return sign ? 1 << 3 : 1 << 7; - } -} - -uint64_t helper_fclass_d(CPULoongArchState *env, uint64_t fj) -{ - float64 f = fj; - bool sign = float64_is_neg(f); - - if (float64_is_infinity(f)) { - return sign ? 1 << 2 : 1 << 6; - } else if (float64_is_zero(f)) { - return sign ? 1 << 5 : 1 << 9; - } else if (float64_is_zero_or_denormal(f)) { - return sign ? 1 << 4 : 1 << 8; - } else if (float64_is_any_nan(f)) { - float_status s = { }; /* for snan_bit_is_one */ - return float64_is_quiet_nan(f, &s) ? 1 << 1 : 1 << 0; - } else { - return sign ? 1 << 3 : 1 << 7; - } -} - -uint64_t helper_fmuladd_s(CPULoongArchState *env, uint64_t fj, - uint64_t fk, uint64_t fa, uint32_t flag) -{ - uint64_t fd; - - fd = nanbox_s(float32_muladd((uint32_t)fj, (uint32_t)fk, - (uint32_t)fa, flag, &env->fp_status)); - update_fcsr0(env, GETPC()); - return fd; -} - -uint64_t helper_fmuladd_d(CPULoongArchState *env, uint64_t fj, - uint64_t fk, uint64_t fa, uint32_t flag) -{ - uint64_t fd; - - fd = float64_muladd(fj, fk, fa, flag, &env->fp_status); - update_fcsr0(env, GETPC()); - return fd; -} - -static uint64_t fcmp_common(CPULoongArchState *env, FloatRelation cmp, - uint32_t flags) -{ - bool ret; - - switch (cmp) { - case float_relation_less: - ret = (flags & FCMP_LT); - break; - case float_relation_equal: - ret = (flags & FCMP_EQ); - break; - case float_relation_greater: - ret = (flags & FCMP_GT); - break; - case float_relation_unordered: - ret = (flags & FCMP_UN); - break; - default: - g_assert_not_reached(); - } - update_fcsr0(env, GETPC()); - - return ret; -} - -/* fcmp_cXXX_s */ -uint64_t helper_fcmp_c_s(CPULoongArchState *env, uint64_t fj, - uint64_t fk, uint32_t flags) -{ - FloatRelation cmp = float32_compare_quiet((uint32_t)fj, - (uint32_t)fk, &env->fp_status); - return fcmp_common(env, cmp, flags); -} - -/* fcmp_sXXX_s */ -uint64_t helper_fcmp_s_s(CPULoongArchState *env, uint64_t fj, - uint64_t fk, uint32_t flags) -{ - FloatRelation cmp = float32_compare((uint32_t)fj, - (uint32_t)fk, &env->fp_status); - return fcmp_common(env, cmp, flags); -} - -/* fcmp_cXXX_d */ -uint64_t helper_fcmp_c_d(CPULoongArchState *env, uint64_t fj, - uint64_t fk, uint32_t flags) -{ - FloatRelation cmp = float64_compare_quiet(fj, fk, &env->fp_status); - return fcmp_common(env, cmp, flags); -} - -/* fcmp_sXXX_d */ -uint64_t helper_fcmp_s_d(CPULoongArchState *env, uint64_t fj, - uint64_t fk, uint32_t flags) -{ - FloatRelation cmp = float64_compare(fj, fk, &env->fp_status); - return fcmp_common(env, cmp, flags); -} - -/* floating point conversion */ -uint64_t helper_fcvt_s_d(CPULoongArchState *env, uint64_t fj) -{ - uint64_t fd; - - fd = nanbox_s(float64_to_float32(fj, &env->fp_status)); - update_fcsr0(env, GETPC()); - return fd; -} - -uint64_t helper_fcvt_d_s(CPULoongArchState *env, uint64_t fj) -{ - uint64_t fd; - - fd = float32_to_float64((uint32_t)fj, &env->fp_status); - update_fcsr0(env, GETPC()); - return fd; -} - -uint64_t helper_ffint_s_w(CPULoongArchState *env, uint64_t fj) -{ - uint64_t fd; - - fd = nanbox_s(int32_to_float32((int32_t)fj, &env->fp_status)); - update_fcsr0(env, GETPC()); - return fd; -} - -uint64_t helper_ffint_s_l(CPULoongArchState *env, uint64_t fj) -{ - uint64_t fd; - - fd = nanbox_s(int64_to_float32(fj, &env->fp_status)); - update_fcsr0(env, GETPC()); - return fd; -} - -uint64_t helper_ffint_d_w(CPULoongArchState *env, uint64_t fj) -{ - uint64_t fd; - - fd = int32_to_float64((int32_t)fj, &env->fp_status); - update_fcsr0(env, GETPC()); - return fd; -} - -uint64_t helper_ffint_d_l(CPULoongArchState *env, uint64_t fj) -{ - uint64_t fd; - - fd = int64_to_float64(fj, &env->fp_status); - update_fcsr0(env, GETPC()); - return fd; -} - -uint64_t helper_frint_s(CPULoongArchState *env, uint64_t fj) -{ - uint64_t fd; - - fd = (uint64_t)(float32_round_to_int((uint32_t)fj, &env->fp_status)); - update_fcsr0(env, GETPC()); - return fd; -} - -uint64_t helper_frint_d(CPULoongArchState *env, uint64_t fj) -{ - uint64_t fd; - - fd = float64_round_to_int(fj, &env->fp_status); - update_fcsr0(env, GETPC()); - return fd; -} - -uint64_t helper_ftintrm_l_d(CPULoongArchState *env, uint64_t fj) -{ - uint64_t fd; - FloatRoundMode old_mode = get_float_rounding_mode(&env->fp_status); - - set_float_rounding_mode(float_round_down, &env->fp_status); - fd = float64_to_int64(fj, &env->fp_status); - set_float_rounding_mode(old_mode, &env->fp_status); - - if (get_float_exception_flags(&env->fp_status) & (float_flag_invalid)) { - if (float64_is_any_nan(fj)) { - fd = 0; - } - } - update_fcsr0(env, GETPC()); - return fd; -} - -uint64_t helper_ftintrm_l_s(CPULoongArchState *env, uint64_t fj) -{ - uint64_t fd; - FloatRoundMode old_mode = get_float_rounding_mode(&env->fp_status); - - set_float_rounding_mode(float_round_down, &env->fp_status); - fd = float32_to_int64((uint32_t)fj, &env->fp_status); - set_float_rounding_mode(old_mode, &env->fp_status); - - if (get_float_exception_flags(&env->fp_status) & (float_flag_invalid)) { - if (float32_is_any_nan((uint32_t)fj)) { - fd = 0; - } - } - update_fcsr0(env, GETPC()); - return fd; -} - -uint64_t helper_ftintrm_w_d(CPULoongArchState *env, uint64_t fj) -{ - uint64_t fd; - FloatRoundMode old_mode = get_float_rounding_mode(&env->fp_status); - - set_float_rounding_mode(float_round_down, &env->fp_status); - fd = (uint64_t)float64_to_int32(fj, &env->fp_status); - set_float_rounding_mode(old_mode, &env->fp_status); - - if (get_float_exception_flags(&env->fp_status) & (float_flag_invalid)) { - if (float64_is_any_nan(fj)) { - fd = 0; - } - } - update_fcsr0(env, GETPC()); - return fd; -} - -uint64_t helper_ftintrm_w_s(CPULoongArchState *env, uint64_t fj) -{ - uint64_t fd; - FloatRoundMode old_mode = get_float_rounding_mode(&env->fp_status); - - set_float_rounding_mode(float_round_down, &env->fp_status); - fd = (uint64_t)float32_to_int32((uint32_t)fj, &env->fp_status); - set_float_rounding_mode(old_mode, &env->fp_status); - - if (get_float_exception_flags(&env->fp_status) & (float_flag_invalid)) { - if (float32_is_any_nan((uint32_t)fj)) { - fd = 0; - } - } - update_fcsr0(env, GETPC()); - return fd; -} - -uint64_t helper_ftintrp_l_d(CPULoongArchState *env, uint64_t fj) -{ - uint64_t fd; - FloatRoundMode old_mode = get_float_rounding_mode(&env->fp_status); - - set_float_rounding_mode(float_round_up, &env->fp_status); - fd = float64_to_int64(fj, &env->fp_status); - set_float_rounding_mode(old_mode, &env->fp_status); - - if (get_float_exception_flags(&env->fp_status) & (float_flag_invalid)) { - if (float64_is_any_nan(fj)) { - fd = 0; - } - } - update_fcsr0(env, GETPC()); - return fd; -} - -uint64_t helper_ftintrp_l_s(CPULoongArchState *env, uint64_t fj) -{ - uint64_t fd; - FloatRoundMode old_mode = get_float_rounding_mode(&env->fp_status); - - set_float_rounding_mode(float_round_up, &env->fp_status); - fd = float32_to_int64((uint32_t)fj, &env->fp_status); - set_float_rounding_mode(old_mode, &env->fp_status); - - if (get_float_exception_flags(&env->fp_status) & (float_flag_invalid)) { - if (float32_is_any_nan((uint32_t)fj)) { - fd = 0; - } - } - update_fcsr0(env, GETPC()); - return fd; -} - -uint64_t helper_ftintrp_w_d(CPULoongArchState *env, uint64_t fj) -{ - uint64_t fd; - FloatRoundMode old_mode = get_float_rounding_mode(&env->fp_status); - - set_float_rounding_mode(float_round_up, &env->fp_status); - fd = (uint64_t)float64_to_int32(fj, &env->fp_status); - set_float_rounding_mode(old_mode, &env->fp_status); - - if (get_float_exception_flags(&env->fp_status) & (float_flag_invalid)) { - if (float64_is_any_nan(fj)) { - fd = 0; - } - } - update_fcsr0(env, GETPC()); - return fd; -} - -uint64_t helper_ftintrp_w_s(CPULoongArchState *env, uint64_t fj) -{ - uint64_t fd; - FloatRoundMode old_mode = get_float_rounding_mode(&env->fp_status); - - set_float_rounding_mode(float_round_up, &env->fp_status); - fd = (uint64_t)float32_to_int32((uint32_t)fj, &env->fp_status); - set_float_rounding_mode(old_mode, &env->fp_status); - - if (get_float_exception_flags(&env->fp_status) & (float_flag_invalid)) { - if (float32_is_any_nan((uint32_t)fj)) { - fd = 0; - } - } - update_fcsr0(env, GETPC()); - return fd; -} - -uint64_t helper_ftintrz_l_d(CPULoongArchState *env, uint64_t fj) -{ - uint64_t fd; - FloatRoundMode old_mode = get_float_rounding_mode(&env->fp_status); - - fd = float64_to_int64_round_to_zero(fj, &env->fp_status); - set_float_rounding_mode(old_mode, &env->fp_status); - - if (get_float_exception_flags(&env->fp_status) & (float_flag_invalid)) { - if (float64_is_any_nan(fj)) { - fd = 0; - } - } - update_fcsr0(env, GETPC()); - return fd; -} - -uint64_t helper_ftintrz_l_s(CPULoongArchState *env, uint64_t fj) -{ - uint64_t fd; - FloatRoundMode old_mode = get_float_rounding_mode(&env->fp_status); - - fd = float32_to_int64_round_to_zero((uint32_t)fj, &env->fp_status); - set_float_rounding_mode(old_mode, &env->fp_status); - - if (get_float_exception_flags(&env->fp_status) & (float_flag_invalid)) { - if (float32_is_any_nan((uint32_t)fj)) { - fd = 0; - } - } - update_fcsr0(env, GETPC()); - return fd; -} - -uint64_t helper_ftintrz_w_d(CPULoongArchState *env, uint64_t fj) -{ - uint64_t fd; - FloatRoundMode old_mode = get_float_rounding_mode(&env->fp_status); - - fd = (uint64_t)float64_to_int32_round_to_zero(fj, &env->fp_status); - set_float_rounding_mode(old_mode, &env->fp_status); - - if (get_float_exception_flags(&env->fp_status) & (float_flag_invalid)) { - if (float64_is_any_nan(fj)) { - fd = 0; - } - } - update_fcsr0(env, GETPC()); - return fd; -} - -uint64_t helper_ftintrz_w_s(CPULoongArchState *env, uint64_t fj) -{ - uint32_t fd; - FloatRoundMode old_mode = get_float_rounding_mode(&env->fp_status); - - fd = float32_to_int32_round_to_zero((uint32_t)fj, &env->fp_status); - set_float_rounding_mode(old_mode, &env->fp_status); - - if (get_float_exception_flags(&env->fp_status) & (float_flag_invalid)) { - if (float32_is_any_nan((uint32_t)fj)) { - fd = 0; - } - } - update_fcsr0(env, GETPC()); - return (uint64_t)fd; -} - -uint64_t helper_ftintrne_l_d(CPULoongArchState *env, uint64_t fj) -{ - uint64_t fd; - FloatRoundMode old_mode = get_float_rounding_mode(&env->fp_status); - - set_float_rounding_mode(float_round_nearest_even, &env->fp_status); - fd = float64_to_int64(fj, &env->fp_status); - set_float_rounding_mode(old_mode, &env->fp_status); - - if (get_float_exception_flags(&env->fp_status) & (float_flag_invalid)) { - if (float64_is_any_nan(fj)) { - fd = 0; - } - } - update_fcsr0(env, GETPC()); - return fd; -} - -uint64_t helper_ftintrne_l_s(CPULoongArchState *env, uint64_t fj) -{ - uint64_t fd; - FloatRoundMode old_mode = get_float_rounding_mode(&env->fp_status); - - set_float_rounding_mode(float_round_nearest_even, &env->fp_status); - fd = float32_to_int64((uint32_t)fj, &env->fp_status); - set_float_rounding_mode(old_mode, &env->fp_status); - - if (get_float_exception_flags(&env->fp_status) & (float_flag_invalid)) { - if (float32_is_any_nan((uint32_t)fj)) { - fd = 0; - } - } - update_fcsr0(env, GETPC()); - return fd; -} - -uint64_t helper_ftintrne_w_d(CPULoongArchState *env, uint64_t fj) -{ - uint64_t fd; - FloatRoundMode old_mode = get_float_rounding_mode(&env->fp_status); - - set_float_rounding_mode(float_round_nearest_even, &env->fp_status); - fd = (uint64_t)float64_to_int32(fj, &env->fp_status); - set_float_rounding_mode(old_mode, &env->fp_status); - - if (get_float_exception_flags(&env->fp_status) & (float_flag_invalid)) { - if (float64_is_any_nan(fj)) { - fd = 0; - } - } - update_fcsr0(env, GETPC()); - return fd; -} - -uint64_t helper_ftintrne_w_s(CPULoongArchState *env, uint64_t fj) -{ - uint32_t fd; - FloatRoundMode old_mode = get_float_rounding_mode(&env->fp_status); - - set_float_rounding_mode(float_round_nearest_even, &env->fp_status); - fd = float32_to_int32((uint32_t)fj, &env->fp_status); - set_float_rounding_mode(old_mode, &env->fp_status); - - if (get_float_exception_flags(&env->fp_status) & (float_flag_invalid)) { - if (float32_is_any_nan((uint32_t)fj)) { - fd = 0; - } - } - update_fcsr0(env, GETPC()); - return (uint64_t)fd; -} - -uint64_t helper_ftint_l_d(CPULoongArchState *env, uint64_t fj) -{ - uint64_t fd; - - fd = float64_to_int64(fj, &env->fp_status); - if (get_float_exception_flags(&env->fp_status) & (float_flag_invalid)) { - if (float64_is_any_nan(fj)) { - fd = 0; - } - } - update_fcsr0(env, GETPC()); - return fd; -} - -uint64_t helper_ftint_l_s(CPULoongArchState *env, uint64_t fj) -{ - uint64_t fd; - - fd = float32_to_int64((uint32_t)fj, &env->fp_status); - if (get_float_exception_flags(&env->fp_status) & (float_flag_invalid)) { - if (float32_is_any_nan((uint32_t)fj)) { - fd = 0; - } - } - update_fcsr0(env, GETPC()); - return fd; -} - -uint64_t helper_ftint_w_s(CPULoongArchState *env, uint64_t fj) -{ - uint64_t fd; - - fd = (uint64_t)float32_to_int32((uint32_t)fj, &env->fp_status); - if (get_float_exception_flags(&env->fp_status) & (float_flag_invalid)) { - if (float32_is_any_nan((uint32_t)fj)) { - fd = 0; - } - } - update_fcsr0(env, GETPC()); - return fd; -} - -uint64_t helper_ftint_w_d(CPULoongArchState *env, uint64_t fj) -{ - uint64_t fd; - - fd = (uint64_t)float64_to_int32(fj, &env->fp_status); - if (get_float_exception_flags(&env->fp_status) & (float_flag_invalid)) { - if (float64_is_any_nan(fj)) { - fd = 0; - } - } - update_fcsr0(env, GETPC()); - return fd; -} - -void helper_set_rounding_mode(CPULoongArchState *env) -{ - set_float_rounding_mode(ieee_rm[(env->fcsr0 >> FCSR0_RM) & 0x3], - &env->fp_status); -} diff --git a/target/loongarch/insn_trans/trans_arith.c.inc b/target/loongarch/insn_trans/trans_arith.c.inc deleted file mode 100644 index 2be057e932..0000000000 --- a/target/loongarch/insn_trans/trans_arith.c.inc +++ /dev/null @@ -1,304 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-or-later */ -/* - * Copyright (c) 2021 Loongson Technology Corporation Limited - */ - -static bool gen_rrr(DisasContext *ctx, arg_rrr *a, - DisasExtend src1_ext, DisasExtend src2_ext, - DisasExtend dst_ext, void (*func)(TCGv, TCGv, TCGv)) -{ - TCGv dest = gpr_dst(ctx, a->rd, dst_ext); - TCGv src1 = gpr_src(ctx, a->rj, src1_ext); - TCGv src2 = gpr_src(ctx, a->rk, src2_ext); - - func(dest, src1, src2); - gen_set_gpr(a->rd, dest, dst_ext); - - return true; -} - -static bool gen_rri_v(DisasContext *ctx, arg_rr_i *a, - DisasExtend src_ext, DisasExtend dst_ext, - void (*func)(TCGv, TCGv, TCGv)) -{ - TCGv dest = gpr_dst(ctx, a->rd, dst_ext); - TCGv src1 = gpr_src(ctx, a->rj, src_ext); - TCGv src2 = tcg_constant_tl(a->imm); - - func(dest, src1, src2); - gen_set_gpr(a->rd, dest, dst_ext); - - return true; -} - -static bool gen_rri_c(DisasContext *ctx, arg_rr_i *a, - DisasExtend src_ext, DisasExtend dst_ext, - void (*func)(TCGv, TCGv, target_long)) -{ - TCGv dest = gpr_dst(ctx, a->rd, dst_ext); - TCGv src1 = gpr_src(ctx, a->rj, src_ext); - - func(dest, src1, a->imm); - gen_set_gpr(a->rd, dest, dst_ext); - - return true; -} - -static bool gen_rrr_sa(DisasContext *ctx, arg_rrr_sa *a, - DisasExtend src_ext, DisasExtend dst_ext, - void (*func)(TCGv, TCGv, TCGv, target_long)) -{ - TCGv dest = gpr_dst(ctx, a->rd, dst_ext); - TCGv src1 = gpr_src(ctx, a->rj, src_ext); - TCGv src2 = gpr_src(ctx, a->rk, src_ext); - - func(dest, src1, src2, a->sa); - gen_set_gpr(a->rd, dest, dst_ext); - - return true; -} - -static bool trans_lu12i_w(DisasContext *ctx, arg_lu12i_w *a) -{ - TCGv dest = gpr_dst(ctx, a->rd, EXT_NONE); - - tcg_gen_movi_tl(dest, a->imm << 12); - gen_set_gpr(a->rd, dest, EXT_NONE); - - return true; -} - -static bool gen_pc(DisasContext *ctx, arg_r_i *a, - target_ulong (*func)(target_ulong, int)) -{ - TCGv dest = gpr_dst(ctx, a->rd, EXT_NONE); - target_ulong addr = make_address_pc(ctx, func(ctx->base.pc_next, a->imm)); - - tcg_gen_movi_tl(dest, addr); - gen_set_gpr(a->rd, dest, EXT_NONE); - - return true; -} - -static void gen_slt(TCGv dest, TCGv src1, TCGv src2) -{ - tcg_gen_setcond_tl(TCG_COND_LT, dest, src1, src2); -} - -static void gen_sltu(TCGv dest, TCGv src1, TCGv src2) -{ - tcg_gen_setcond_tl(TCG_COND_LTU, dest, src1, src2); -} - -static void gen_mulh_w(TCGv dest, TCGv src1, TCGv src2) -{ - tcg_gen_mul_i64(dest, src1, src2); - tcg_gen_sari_i64(dest, dest, 32); -} - -static void gen_mulh_d(TCGv dest, TCGv src1, TCGv src2) -{ - TCGv discard = tcg_temp_new(); - tcg_gen_muls2_tl(discard, dest, src1, src2); -} - -static void gen_mulh_du(TCGv dest, TCGv src1, TCGv src2) -{ - TCGv discard = tcg_temp_new(); - tcg_gen_mulu2_tl(discard, dest, src1, src2); -} - -static void prep_divisor_d(TCGv ret, TCGv src1, TCGv src2) -{ - TCGv t0 = tcg_temp_new(); - TCGv t1 = tcg_temp_new(); - TCGv zero = tcg_constant_tl(0); - - /* - * If min / -1, set the divisor to 1. - * This avoids potential host overflow trap and produces min. - * If x / 0, set the divisor to 1. - * This avoids potential host overflow trap; - * the required result is undefined. - */ - tcg_gen_setcondi_tl(TCG_COND_EQ, ret, src1, INT64_MIN); - tcg_gen_setcondi_tl(TCG_COND_EQ, t0, src2, -1); - tcg_gen_setcondi_tl(TCG_COND_EQ, t1, src2, 0); - tcg_gen_and_tl(ret, ret, t0); - tcg_gen_or_tl(ret, ret, t1); - tcg_gen_movcond_tl(TCG_COND_NE, ret, ret, zero, ret, src2); -} - -static void prep_divisor_du(TCGv ret, TCGv src2) -{ - TCGv zero = tcg_constant_tl(0); - TCGv one = tcg_constant_tl(1); - - /* - * If x / 0, set the divisor to 1. - * This avoids potential host overflow trap; - * the required result is undefined. - */ - tcg_gen_movcond_tl(TCG_COND_EQ, ret, src2, zero, one, src2); -} - -static void gen_div_d(TCGv dest, TCGv src1, TCGv src2) -{ - TCGv t0 = tcg_temp_new(); - prep_divisor_d(t0, src1, src2); - tcg_gen_div_tl(dest, src1, t0); -} - -static void gen_rem_d(TCGv dest, TCGv src1, TCGv src2) -{ - TCGv t0 = tcg_temp_new(); - prep_divisor_d(t0, src1, src2); - tcg_gen_rem_tl(dest, src1, t0); -} - -static void gen_div_du(TCGv dest, TCGv src1, TCGv src2) -{ - TCGv t0 = tcg_temp_new(); - prep_divisor_du(t0, src2); - tcg_gen_divu_tl(dest, src1, t0); -} - -static void gen_rem_du(TCGv dest, TCGv src1, TCGv src2) -{ - TCGv t0 = tcg_temp_new(); - prep_divisor_du(t0, src2); - tcg_gen_remu_tl(dest, src1, t0); -} - -static void gen_div_w(TCGv dest, TCGv src1, TCGv src2) -{ - TCGv t0 = tcg_temp_new(); - /* We need not check for integer overflow for div_w. */ - prep_divisor_du(t0, src2); - tcg_gen_div_tl(dest, src1, t0); -} - -static void gen_rem_w(TCGv dest, TCGv src1, TCGv src2) -{ - TCGv t0 = tcg_temp_new(); - /* We need not check for integer overflow for rem_w. */ - prep_divisor_du(t0, src2); - tcg_gen_rem_tl(dest, src1, t0); -} - -static void gen_alsl(TCGv dest, TCGv src1, TCGv src2, target_long sa) -{ - TCGv t0 = tcg_temp_new(); - tcg_gen_shli_tl(t0, src1, sa); - tcg_gen_add_tl(dest, t0, src2); -} - -static bool trans_lu32i_d(DisasContext *ctx, arg_lu32i_d *a) -{ - TCGv dest = gpr_dst(ctx, a->rd, EXT_NONE); - TCGv src1 = gpr_src(ctx, a->rd, EXT_NONE); - TCGv src2 = tcg_constant_tl(a->imm); - - if (!avail_64(ctx)) { - return false; - } - - tcg_gen_deposit_tl(dest, src1, src2, 32, 32); - gen_set_gpr(a->rd, dest, EXT_NONE); - - return true; -} - -static bool trans_lu52i_d(DisasContext *ctx, arg_lu52i_d *a) -{ - TCGv dest = gpr_dst(ctx, a->rd, EXT_NONE); - TCGv src1 = gpr_src(ctx, a->rj, EXT_NONE); - TCGv src2 = tcg_constant_tl(a->imm); - - if (!avail_64(ctx)) { - return false; - } - - tcg_gen_deposit_tl(dest, src1, src2, 52, 12); - gen_set_gpr(a->rd, dest, EXT_NONE); - - return true; -} - -static target_ulong gen_pcaddi(target_ulong pc, int imm) -{ - return pc + (imm << 2); -} - -static target_ulong gen_pcalau12i(target_ulong pc, int imm) -{ - return (pc + (imm << 12)) & ~0xfff; -} - -static target_ulong gen_pcaddu12i(target_ulong pc, int imm) -{ - return pc + (imm << 12); -} - -static target_ulong gen_pcaddu18i(target_ulong pc, int imm) -{ - return pc + ((target_ulong)(imm) << 18); -} - -static bool trans_addu16i_d(DisasContext *ctx, arg_addu16i_d *a) -{ - TCGv dest = gpr_dst(ctx, a->rd, EXT_NONE); - TCGv src1 = gpr_src(ctx, a->rj, EXT_NONE); - - if (!avail_64(ctx)) { - return false; - } - - tcg_gen_addi_tl(dest, src1, a->imm << 16); - gen_set_gpr(a->rd, dest, EXT_NONE); - - return true; -} - -TRANS(add_w, ALL, gen_rrr, EXT_NONE, EXT_NONE, EXT_SIGN, tcg_gen_add_tl) -TRANS(add_d, 64, gen_rrr, EXT_NONE, EXT_NONE, EXT_NONE, tcg_gen_add_tl) -TRANS(sub_w, ALL, gen_rrr, EXT_NONE, EXT_NONE, EXT_SIGN, tcg_gen_sub_tl) -TRANS(sub_d, 64, gen_rrr, EXT_NONE, EXT_NONE, EXT_NONE, tcg_gen_sub_tl) -TRANS(and, ALL, gen_rrr, EXT_NONE, EXT_NONE, EXT_NONE, tcg_gen_and_tl) -TRANS(or, ALL, gen_rrr, EXT_NONE, EXT_NONE, EXT_NONE, tcg_gen_or_tl) -TRANS(xor, ALL, gen_rrr, EXT_NONE, EXT_NONE, EXT_NONE, tcg_gen_xor_tl) -TRANS(nor, ALL, gen_rrr, EXT_NONE, EXT_NONE, EXT_NONE, tcg_gen_nor_tl) -TRANS(andn, ALL, gen_rrr, EXT_NONE, EXT_NONE, EXT_NONE, tcg_gen_andc_tl) -TRANS(orn, ALL, gen_rrr, EXT_NONE, EXT_NONE, EXT_NONE, tcg_gen_orc_tl) -TRANS(slt, ALL, gen_rrr, EXT_NONE, EXT_NONE, EXT_NONE, gen_slt) -TRANS(sltu, ALL, gen_rrr, EXT_NONE, EXT_NONE, EXT_NONE, gen_sltu) -TRANS(mul_w, ALL, gen_rrr, EXT_SIGN, EXT_SIGN, EXT_SIGN, tcg_gen_mul_tl) -TRANS(mul_d, 64, gen_rrr, EXT_NONE, EXT_NONE, EXT_NONE, tcg_gen_mul_tl) -TRANS(mulh_w, ALL, gen_rrr, EXT_SIGN, EXT_SIGN, EXT_NONE, gen_mulh_w) -TRANS(mulh_wu, ALL, gen_rrr, EXT_ZERO, EXT_ZERO, EXT_NONE, gen_mulh_w) -TRANS(mulh_d, 64, gen_rrr, EXT_NONE, EXT_NONE, EXT_NONE, gen_mulh_d) -TRANS(mulh_du, 64, gen_rrr, EXT_NONE, EXT_NONE, EXT_NONE, gen_mulh_du) -TRANS(mulw_d_w, 64, gen_rrr, EXT_SIGN, EXT_SIGN, EXT_NONE, tcg_gen_mul_tl) -TRANS(mulw_d_wu, 64, gen_rrr, EXT_ZERO, EXT_ZERO, EXT_NONE, tcg_gen_mul_tl) -TRANS(div_w, ALL, gen_rrr, EXT_SIGN, EXT_SIGN, EXT_SIGN, gen_div_w) -TRANS(mod_w, ALL, gen_rrr, EXT_SIGN, EXT_SIGN, EXT_SIGN, gen_rem_w) -TRANS(div_wu, ALL, gen_rrr, EXT_ZERO, EXT_ZERO, EXT_SIGN, gen_div_du) -TRANS(mod_wu, ALL, gen_rrr, EXT_ZERO, EXT_ZERO, EXT_SIGN, gen_rem_du) -TRANS(div_d, 64, gen_rrr, EXT_NONE, EXT_NONE, EXT_NONE, gen_div_d) -TRANS(mod_d, 64, gen_rrr, EXT_NONE, EXT_NONE, EXT_NONE, gen_rem_d) -TRANS(div_du, 64, gen_rrr, EXT_NONE, EXT_NONE, EXT_NONE, gen_div_du) -TRANS(mod_du, 64, gen_rrr, EXT_NONE, EXT_NONE, EXT_NONE, gen_rem_du) -TRANS(slti, ALL, gen_rri_v, EXT_NONE, EXT_NONE, gen_slt) -TRANS(sltui, ALL, gen_rri_v, EXT_NONE, EXT_NONE, gen_sltu) -TRANS(addi_w, ALL, gen_rri_c, EXT_NONE, EXT_SIGN, tcg_gen_addi_tl) -TRANS(addi_d, 64, gen_rri_c, EXT_NONE, EXT_NONE, tcg_gen_addi_tl) -TRANS(alsl_w, ALL, gen_rrr_sa, EXT_NONE, EXT_SIGN, gen_alsl) -TRANS(alsl_wu, 64, gen_rrr_sa, EXT_NONE, EXT_ZERO, gen_alsl) -TRANS(alsl_d, 64, gen_rrr_sa, EXT_NONE, EXT_NONE, gen_alsl) -TRANS(pcaddi, ALL, gen_pc, gen_pcaddi) -TRANS(pcalau12i, ALL, gen_pc, gen_pcalau12i) -TRANS(pcaddu12i, ALL, gen_pc, gen_pcaddu12i) -TRANS(pcaddu18i, 64, gen_pc, gen_pcaddu18i) -TRANS(andi, ALL, gen_rri_c, EXT_NONE, EXT_NONE, tcg_gen_andi_tl) -TRANS(ori, ALL, gen_rri_c, EXT_NONE, EXT_NONE, tcg_gen_ori_tl) -TRANS(xori, ALL, gen_rri_c, EXT_NONE, EXT_NONE, tcg_gen_xori_tl) diff --git a/target/loongarch/insn_trans/trans_atomic.c.inc b/target/loongarch/insn_trans/trans_atomic.c.inc deleted file mode 100644 index 80c2e286fd..0000000000 --- a/target/loongarch/insn_trans/trans_atomic.c.inc +++ /dev/null @@ -1,111 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-or-later */ -/* - * Copyright (c) 2021 Loongson Technology Corporation Limited - */ - -static bool gen_ll(DisasContext *ctx, arg_rr_i *a, MemOp mop) -{ - TCGv dest = gpr_dst(ctx, a->rd, EXT_NONE); - TCGv src1 = gpr_src(ctx, a->rj, EXT_NONE); - TCGv t0 = make_address_i(ctx, src1, a->imm); - - tcg_gen_qemu_ld_i64(dest, t0, ctx->mem_idx, mop); - tcg_gen_st_tl(t0, tcg_env, offsetof(CPULoongArchState, lladdr)); - tcg_gen_st_tl(dest, tcg_env, offsetof(CPULoongArchState, llval)); - gen_set_gpr(a->rd, dest, EXT_NONE); - - return true; -} - -static bool gen_sc(DisasContext *ctx, arg_rr_i *a, MemOp mop) -{ - TCGv dest = gpr_dst(ctx, a->rd, EXT_NONE); - TCGv src1 = gpr_src(ctx, a->rj, EXT_NONE); - TCGv src2 = gpr_src(ctx, a->rd, EXT_NONE); - TCGv t0 = tcg_temp_new(); - TCGv val = tcg_temp_new(); - - TCGLabel *l1 = gen_new_label(); - TCGLabel *done = gen_new_label(); - - tcg_gen_addi_tl(t0, src1, a->imm); - tcg_gen_brcond_tl(TCG_COND_EQ, t0, cpu_lladdr, l1); - tcg_gen_movi_tl(dest, 0); - tcg_gen_br(done); - - gen_set_label(l1); - tcg_gen_mov_tl(val, src2); - /* generate cmpxchg */ - tcg_gen_atomic_cmpxchg_tl(t0, cpu_lladdr, cpu_llval, - val, ctx->mem_idx, mop); - tcg_gen_setcond_tl(TCG_COND_EQ, dest, t0, cpu_llval); - gen_set_label(done); - gen_set_gpr(a->rd, dest, EXT_NONE); - - return true; -} - -static bool gen_am(DisasContext *ctx, arg_rrr *a, - void (*func)(TCGv, TCGv, TCGv, TCGArg, MemOp), - MemOp mop) -{ - TCGv dest = gpr_dst(ctx, a->rd, EXT_NONE); - TCGv addr = gpr_src(ctx, a->rj, EXT_NONE); - TCGv val = gpr_src(ctx, a->rk, EXT_NONE); - - if (a->rd != 0 && (a->rj == a->rd || a->rk == a->rd)) { - qemu_log_mask(LOG_GUEST_ERROR, - "Warning: source register overlaps destination register" - "in atomic insn at pc=0x" TARGET_FMT_lx "\n", - ctx->base.pc_next - 4); - return false; - } - - addr = make_address_i(ctx, addr, 0); - - func(dest, addr, val, ctx->mem_idx, mop); - gen_set_gpr(a->rd, dest, EXT_NONE); - - return true; -} - -TRANS(ll_w, ALL, gen_ll, MO_TESL) -TRANS(sc_w, ALL, gen_sc, MO_TESL) -TRANS(ll_d, 64, gen_ll, MO_TEUQ) -TRANS(sc_d, 64, gen_sc, MO_TEUQ) -TRANS(amswap_w, LAM, gen_am, tcg_gen_atomic_xchg_tl, MO_TESL) -TRANS(amswap_d, LAM, gen_am, tcg_gen_atomic_xchg_tl, MO_TEUQ) -TRANS(amadd_w, LAM, gen_am, tcg_gen_atomic_fetch_add_tl, MO_TESL) -TRANS(amadd_d, LAM, gen_am, tcg_gen_atomic_fetch_add_tl, MO_TEUQ) -TRANS(amand_w, LAM, gen_am, tcg_gen_atomic_fetch_and_tl, MO_TESL) -TRANS(amand_d, LAM, gen_am, tcg_gen_atomic_fetch_and_tl, MO_TEUQ) -TRANS(amor_w, LAM, gen_am, tcg_gen_atomic_fetch_or_tl, MO_TESL) -TRANS(amor_d, LAM, gen_am, tcg_gen_atomic_fetch_or_tl, MO_TEUQ) -TRANS(amxor_w, LAM, gen_am, tcg_gen_atomic_fetch_xor_tl, MO_TESL) -TRANS(amxor_d, LAM, gen_am, tcg_gen_atomic_fetch_xor_tl, MO_TEUQ) -TRANS(ammax_w, LAM, gen_am, tcg_gen_atomic_fetch_smax_tl, MO_TESL) -TRANS(ammax_d, LAM, gen_am, tcg_gen_atomic_fetch_smax_tl, MO_TEUQ) -TRANS(ammin_w, LAM, gen_am, tcg_gen_atomic_fetch_smin_tl, MO_TESL) -TRANS(ammin_d, LAM, gen_am, tcg_gen_atomic_fetch_smin_tl, MO_TEUQ) -TRANS(ammax_wu, LAM, gen_am, tcg_gen_atomic_fetch_umax_tl, MO_TESL) -TRANS(ammax_du, LAM, gen_am, tcg_gen_atomic_fetch_umax_tl, MO_TEUQ) -TRANS(ammin_wu, LAM, gen_am, tcg_gen_atomic_fetch_umin_tl, MO_TESL) -TRANS(ammin_du, LAM, gen_am, tcg_gen_atomic_fetch_umin_tl, MO_TEUQ) -TRANS(amswap_db_w, LAM, gen_am, tcg_gen_atomic_xchg_tl, MO_TESL) -TRANS(amswap_db_d, LAM, gen_am, tcg_gen_atomic_xchg_tl, MO_TEUQ) -TRANS(amadd_db_w, LAM, gen_am, tcg_gen_atomic_fetch_add_tl, MO_TESL) -TRANS(amadd_db_d, LAM, gen_am, tcg_gen_atomic_fetch_add_tl, MO_TEUQ) -TRANS(amand_db_w, LAM, gen_am, tcg_gen_atomic_fetch_and_tl, MO_TESL) -TRANS(amand_db_d, LAM, gen_am, tcg_gen_atomic_fetch_and_tl, MO_TEUQ) -TRANS(amor_db_w, LAM, gen_am, tcg_gen_atomic_fetch_or_tl, MO_TESL) -TRANS(amor_db_d, LAM, gen_am, tcg_gen_atomic_fetch_or_tl, MO_TEUQ) -TRANS(amxor_db_w, LAM, gen_am, tcg_gen_atomic_fetch_xor_tl, MO_TESL) -TRANS(amxor_db_d, LAM, gen_am, tcg_gen_atomic_fetch_xor_tl, MO_TEUQ) -TRANS(ammax_db_w, LAM, gen_am, tcg_gen_atomic_fetch_smax_tl, MO_TESL) -TRANS(ammax_db_d, LAM, gen_am, tcg_gen_atomic_fetch_smax_tl, MO_TEUQ) -TRANS(ammin_db_w, LAM, gen_am, tcg_gen_atomic_fetch_smin_tl, MO_TESL) -TRANS(ammin_db_d, LAM, gen_am, tcg_gen_atomic_fetch_smin_tl, MO_TEUQ) -TRANS(ammax_db_wu, LAM, gen_am, tcg_gen_atomic_fetch_umax_tl, MO_TESL) -TRANS(ammax_db_du, LAM, gen_am, tcg_gen_atomic_fetch_umax_tl, MO_TEUQ) -TRANS(ammin_db_wu, LAM, gen_am, tcg_gen_atomic_fetch_umin_tl, MO_TESL) -TRANS(ammin_db_du, LAM, gen_am, tcg_gen_atomic_fetch_umin_tl, MO_TEUQ) diff --git a/target/loongarch/insn_trans/trans_bit.c.inc b/target/loongarch/insn_trans/trans_bit.c.inc deleted file mode 100644 index ee5fa003ce..0000000000 --- a/target/loongarch/insn_trans/trans_bit.c.inc +++ /dev/null @@ -1,208 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-or-later */ -/* - * Copyright (c) 2021 Loongson Technology Corporation Limited - */ - -static bool gen_rr(DisasContext *ctx, arg_rr *a, - DisasExtend src_ext, DisasExtend dst_ext, - void (*func)(TCGv, TCGv)) -{ - TCGv dest = gpr_dst(ctx, a->rd, dst_ext); - TCGv src1 = gpr_src(ctx, a->rj, src_ext); - - func(dest, src1); - gen_set_gpr(a->rd, dest, dst_ext); - - return true; -} - -static void gen_bytepick_w(TCGv dest, TCGv src1, TCGv src2, target_long sa) -{ - tcg_gen_concat_tl_i64(dest, src1, src2); - tcg_gen_sextract_i64(dest, dest, (32 - sa * 8), 32); -} - -static void gen_bytepick_d(TCGv dest, TCGv src1, TCGv src2, target_long sa) -{ - tcg_gen_extract2_i64(dest, src1, src2, (64 - sa * 8)); -} - -static bool gen_bstrins(DisasContext *ctx, arg_rr_ms_ls *a, - DisasExtend dst_ext) -{ - TCGv src1 = gpr_src(ctx, a->rd, EXT_NONE); - TCGv src2 = gpr_src(ctx, a->rj, EXT_NONE); - TCGv dest = gpr_dst(ctx, a->rd, EXT_NONE); - - if (a->ls > a->ms) { - return false; - } - - tcg_gen_deposit_tl(dest, src1, src2, a->ls, a->ms - a->ls + 1); - gen_set_gpr(a->rd, dest, dst_ext); - return true; -} - -static bool gen_bstrpick(DisasContext *ctx, arg_rr_ms_ls *a, - DisasExtend dst_ext) -{ - TCGv dest = gpr_dst(ctx, a->rd, EXT_NONE); - TCGv src1 = gpr_src(ctx, a->rj, EXT_NONE); - - if (a->ls > a->ms) { - return false; - } - - tcg_gen_extract_tl(dest, src1, a->ls, a->ms - a->ls + 1); - gen_set_gpr(a->rd, dest, dst_ext); - return true; -} - -static void gen_clz_w(TCGv dest, TCGv src1) -{ - tcg_gen_clzi_tl(dest, src1, TARGET_LONG_BITS); - tcg_gen_subi_tl(dest, dest, TARGET_LONG_BITS - 32); -} - -static void gen_clo_w(TCGv dest, TCGv src1) -{ - tcg_gen_not_tl(dest, src1); - tcg_gen_ext32u_tl(dest, dest); - gen_clz_w(dest, dest); -} - -static void gen_ctz_w(TCGv dest, TCGv src1) -{ - tcg_gen_ori_tl(dest, src1, (target_ulong)MAKE_64BIT_MASK(32, 32)); - tcg_gen_ctzi_tl(dest, dest, TARGET_LONG_BITS); -} - -static void gen_cto_w(TCGv dest, TCGv src1) -{ - tcg_gen_not_tl(dest, src1); - gen_ctz_w(dest, dest); -} - -static void gen_clz_d(TCGv dest, TCGv src1) -{ - tcg_gen_clzi_i64(dest, src1, TARGET_LONG_BITS); -} - -static void gen_clo_d(TCGv dest, TCGv src1) -{ - tcg_gen_not_tl(dest, src1); - gen_clz_d(dest, dest); -} - -static void gen_ctz_d(TCGv dest, TCGv src1) -{ - tcg_gen_ctzi_tl(dest, src1, TARGET_LONG_BITS); -} - -static void gen_cto_d(TCGv dest, TCGv src1) -{ - tcg_gen_not_tl(dest, src1); - gen_ctz_d(dest, dest); -} - -static void gen_revb_2w(TCGv dest, TCGv src1) -{ - tcg_gen_bswap64_i64(dest, src1); - tcg_gen_rotri_i64(dest, dest, 32); -} - -static void gen_revb_2h(TCGv dest, TCGv src1) -{ - TCGv mask = tcg_constant_tl(0x00FF00FF); - TCGv t0 = tcg_temp_new(); - TCGv t1 = tcg_temp_new(); - - tcg_gen_shri_tl(t0, src1, 8); - tcg_gen_and_tl(t0, t0, mask); - tcg_gen_and_tl(t1, src1, mask); - tcg_gen_shli_tl(t1, t1, 8); - tcg_gen_or_tl(dest, t0, t1); -} - -static void gen_revb_4h(TCGv dest, TCGv src1) -{ - TCGv mask = tcg_constant_tl(0x00FF00FF00FF00FFULL); - TCGv t0 = tcg_temp_new(); - TCGv t1 = tcg_temp_new(); - - tcg_gen_shri_tl(t0, src1, 8); - tcg_gen_and_tl(t0, t0, mask); - tcg_gen_and_tl(t1, src1, mask); - tcg_gen_shli_tl(t1, t1, 8); - tcg_gen_or_tl(dest, t0, t1); -} - -static void gen_revh_2w(TCGv dest, TCGv src1) -{ - TCGv_i64 t0 = tcg_temp_new_i64(); - TCGv_i64 t1 = tcg_temp_new_i64(); - TCGv_i64 mask = tcg_constant_i64(0x0000ffff0000ffffull); - - tcg_gen_shri_i64(t0, src1, 16); - tcg_gen_and_i64(t1, src1, mask); - tcg_gen_and_i64(t0, t0, mask); - tcg_gen_shli_i64(t1, t1, 16); - tcg_gen_or_i64(dest, t1, t0); -} - -static void gen_revh_d(TCGv dest, TCGv src1) -{ - TCGv t0 = tcg_temp_new(); - TCGv t1 = tcg_temp_new(); - TCGv mask = tcg_constant_tl(0x0000FFFF0000FFFFULL); - - tcg_gen_shri_tl(t1, src1, 16); - tcg_gen_and_tl(t1, t1, mask); - tcg_gen_and_tl(t0, src1, mask); - tcg_gen_shli_tl(t0, t0, 16); - tcg_gen_or_tl(t0, t0, t1); - tcg_gen_rotri_tl(dest, t0, 32); -} - -static void gen_maskeqz(TCGv dest, TCGv src1, TCGv src2) -{ - TCGv zero = tcg_constant_tl(0); - - tcg_gen_movcond_tl(TCG_COND_EQ, dest, src2, zero, zero, src1); -} - -static void gen_masknez(TCGv dest, TCGv src1, TCGv src2) -{ - TCGv zero = tcg_constant_tl(0); - - tcg_gen_movcond_tl(TCG_COND_NE, dest, src2, zero, zero, src1); -} - -TRANS(ext_w_h, ALL, gen_rr, EXT_NONE, EXT_NONE, tcg_gen_ext16s_tl) -TRANS(ext_w_b, ALL, gen_rr, EXT_NONE, EXT_NONE, tcg_gen_ext8s_tl) -TRANS(clo_w, ALL, gen_rr, EXT_NONE, EXT_NONE, gen_clo_w) -TRANS(clz_w, ALL, gen_rr, EXT_ZERO, EXT_NONE, gen_clz_w) -TRANS(cto_w, ALL, gen_rr, EXT_NONE, EXT_NONE, gen_cto_w) -TRANS(ctz_w, ALL, gen_rr, EXT_NONE, EXT_NONE, gen_ctz_w) -TRANS(clo_d, 64, gen_rr, EXT_NONE, EXT_NONE, gen_clo_d) -TRANS(clz_d, 64, gen_rr, EXT_NONE, EXT_NONE, gen_clz_d) -TRANS(cto_d, 64, gen_rr, EXT_NONE, EXT_NONE, gen_cto_d) -TRANS(ctz_d, 64, gen_rr, EXT_NONE, EXT_NONE, gen_ctz_d) -TRANS(revb_2h, ALL, gen_rr, EXT_NONE, EXT_SIGN, gen_revb_2h) -TRANS(revb_4h, 64, gen_rr, EXT_NONE, EXT_NONE, gen_revb_4h) -TRANS(revb_2w, 64, gen_rr, EXT_NONE, EXT_NONE, gen_revb_2w) -TRANS(revb_d, 64, gen_rr, EXT_NONE, EXT_NONE, tcg_gen_bswap64_i64) -TRANS(revh_2w, 64, gen_rr, EXT_NONE, EXT_NONE, gen_revh_2w) -TRANS(revh_d, 64, gen_rr, EXT_NONE, EXT_NONE, gen_revh_d) -TRANS(bitrev_4b, ALL, gen_rr, EXT_ZERO, EXT_SIGN, gen_helper_bitswap) -TRANS(bitrev_8b, 64, gen_rr, EXT_NONE, EXT_NONE, gen_helper_bitswap) -TRANS(bitrev_w, ALL, gen_rr, EXT_NONE, EXT_SIGN, gen_helper_bitrev_w) -TRANS(bitrev_d, 64, gen_rr, EXT_NONE, EXT_NONE, gen_helper_bitrev_d) -TRANS(maskeqz, ALL, gen_rrr, EXT_NONE, EXT_NONE, EXT_NONE, gen_maskeqz) -TRANS(masknez, ALL, gen_rrr, EXT_NONE, EXT_NONE, EXT_NONE, gen_masknez) -TRANS(bytepick_w, ALL, gen_rrr_sa, EXT_NONE, EXT_NONE, gen_bytepick_w) -TRANS(bytepick_d, 64, gen_rrr_sa, EXT_NONE, EXT_NONE, gen_bytepick_d) -TRANS(bstrins_w, ALL, gen_bstrins, EXT_SIGN) -TRANS(bstrins_d, 64, gen_bstrins, EXT_NONE) -TRANS(bstrpick_w, ALL, gen_bstrpick, EXT_SIGN) -TRANS(bstrpick_d, 64, gen_bstrpick, EXT_NONE) diff --git a/target/loongarch/insn_trans/trans_branch.c.inc b/target/loongarch/insn_trans/trans_branch.c.inc deleted file mode 100644 index 221e5159db..0000000000 --- a/target/loongarch/insn_trans/trans_branch.c.inc +++ /dev/null @@ -1,84 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-or-later */ -/* - * Copyright (c) 2021 Loongson Technology Corporation Limited - */ - -static bool trans_b(DisasContext *ctx, arg_b *a) -{ - gen_goto_tb(ctx, 0, ctx->base.pc_next + a->offs); - ctx->base.is_jmp = DISAS_NORETURN; - return true; -} - -static bool trans_bl(DisasContext *ctx, arg_bl *a) -{ - tcg_gen_movi_tl(cpu_gpr[1], make_address_pc(ctx, ctx->base.pc_next + 4)); - gen_goto_tb(ctx, 0, ctx->base.pc_next + a->offs); - ctx->base.is_jmp = DISAS_NORETURN; - return true; -} - -static bool trans_jirl(DisasContext *ctx, arg_jirl *a) -{ - TCGv dest = gpr_dst(ctx, a->rd, EXT_NONE); - TCGv src1 = gpr_src(ctx, a->rj, EXT_NONE); - - TCGv addr = make_address_i(ctx, src1, a->imm); - tcg_gen_mov_tl(cpu_pc, addr); - tcg_gen_movi_tl(dest, make_address_pc(ctx, ctx->base.pc_next + 4)); - gen_set_gpr(a->rd, dest, EXT_NONE); - tcg_gen_lookup_and_goto_ptr(); - ctx->base.is_jmp = DISAS_NORETURN; - return true; -} - -static void gen_bc(DisasContext *ctx, TCGv src1, TCGv src2, - target_long offs, TCGCond cond) -{ - TCGLabel *l = gen_new_label(); - tcg_gen_brcond_tl(cond, src1, src2, l); - gen_goto_tb(ctx, 1, ctx->base.pc_next + 4); - gen_set_label(l); - gen_goto_tb(ctx, 0, ctx->base.pc_next + offs); - ctx->base.is_jmp = DISAS_NORETURN; -} - -static bool gen_rr_bc(DisasContext *ctx, arg_rr_offs *a, TCGCond cond) -{ - TCGv src1 = gpr_src(ctx, a->rj, EXT_NONE); - TCGv src2 = gpr_src(ctx, a->rd, EXT_NONE); - - gen_bc(ctx, src1, src2, a->offs, cond); - return true; -} - -static bool gen_rz_bc(DisasContext *ctx, arg_r_offs *a, TCGCond cond) -{ - TCGv src1 = gpr_src(ctx, a->rj, EXT_NONE); - TCGv src2 = tcg_constant_tl(0); - - gen_bc(ctx, src1, src2, a->offs, cond); - return true; -} - -static bool gen_cz_bc(DisasContext *ctx, arg_c_offs *a, TCGCond cond) -{ - TCGv src1 = tcg_temp_new(); - TCGv src2 = tcg_constant_tl(0); - - tcg_gen_ld8u_tl(src1, tcg_env, - offsetof(CPULoongArchState, cf[a->cj])); - gen_bc(ctx, src1, src2, a->offs, cond); - return true; -} - -TRANS(beq, ALL, gen_rr_bc, TCG_COND_EQ) -TRANS(bne, ALL, gen_rr_bc, TCG_COND_NE) -TRANS(blt, ALL, gen_rr_bc, TCG_COND_LT) -TRANS(bge, ALL, gen_rr_bc, TCG_COND_GE) -TRANS(bltu, ALL, gen_rr_bc, TCG_COND_LTU) -TRANS(bgeu, ALL, gen_rr_bc, TCG_COND_GEU) -TRANS(beqz, ALL, gen_rz_bc, TCG_COND_EQ) -TRANS(bnez, ALL, gen_rz_bc, TCG_COND_NE) -TRANS(bceqz, 64, gen_cz_bc, TCG_COND_EQ) -TRANS(bcnez, 64, gen_cz_bc, TCG_COND_NE) diff --git a/target/loongarch/insn_trans/trans_extra.c.inc b/target/loongarch/insn_trans/trans_extra.c.inc deleted file mode 100644 index cfa361fecf..0000000000 --- a/target/loongarch/insn_trans/trans_extra.c.inc +++ /dev/null @@ -1,107 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-or-later */ -/* - * Copyright (c) 2021 Loongson Technology Corporation Limited - */ - -static bool trans_break(DisasContext *ctx, arg_break *a) -{ - generate_exception(ctx, EXCCODE_BRK); - return true; -} - -static bool trans_syscall(DisasContext *ctx, arg_syscall *a) -{ - generate_exception(ctx, EXCCODE_SYS); - return true; -} - -static bool trans_asrtle_d(DisasContext *ctx, arg_asrtle_d * a) -{ - TCGv src1 = gpr_src(ctx, a->rj, EXT_NONE); - TCGv src2 = gpr_src(ctx, a->rk, EXT_NONE); - - if (!avail_64(ctx)) { - return false; - } - - gen_helper_asrtle_d(tcg_env, src1, src2); - return true; -} - -static bool trans_asrtgt_d(DisasContext *ctx, arg_asrtgt_d * a) -{ - TCGv src1 = gpr_src(ctx, a->rj, EXT_NONE); - TCGv src2 = gpr_src(ctx, a->rk, EXT_NONE); - - if (!avail_64(ctx)) { - return false; - } - - gen_helper_asrtgt_d(tcg_env, src1, src2); - return true; -} - -static bool gen_rdtime(DisasContext *ctx, arg_rr *a, - bool word, bool high) -{ - TCGv dst1 = gpr_dst(ctx, a->rd, EXT_NONE); - TCGv dst2 = gpr_dst(ctx, a->rj, EXT_NONE); - - translator_io_start(&ctx->base); - gen_helper_rdtime_d(dst1, tcg_env); - if (word) { - tcg_gen_sextract_tl(dst1, dst1, high ? 32 : 0, 32); - } - tcg_gen_ld_i64(dst2, tcg_env, offsetof(CPULoongArchState, CSR_TID)); - - return true; -} - -static bool trans_rdtimel_w(DisasContext *ctx, arg_rdtimel_w *a) -{ - return gen_rdtime(ctx, a, 1, 0); -} - -static bool trans_rdtimeh_w(DisasContext *ctx, arg_rdtimeh_w *a) -{ - return gen_rdtime(ctx, a, 1, 1); -} - -static bool trans_rdtime_d(DisasContext *ctx, arg_rdtime_d *a) -{ - return gen_rdtime(ctx, a, 0, 0); -} - -static bool trans_cpucfg(DisasContext *ctx, arg_cpucfg *a) -{ - TCGv dest = gpr_dst(ctx, a->rd, EXT_NONE); - TCGv src1 = gpr_src(ctx, a->rj, EXT_NONE); - - gen_helper_cpucfg(dest, tcg_env, src1); - gen_set_gpr(a->rd, dest, EXT_NONE); - - return true; -} - -static bool gen_crc(DisasContext *ctx, arg_rrr *a, - void (*func)(TCGv, TCGv, TCGv, TCGv), - TCGv tsz) -{ - TCGv dest = gpr_dst(ctx, a->rd, EXT_SIGN); - TCGv src1 = gpr_src(ctx, a->rj, EXT_NONE); - TCGv src2 = gpr_src(ctx, a->rk, EXT_NONE); - - func(dest, src2, src1, tsz); - gen_set_gpr(a->rd, dest, EXT_SIGN); - - return true; -} - -TRANS(crc_w_b_w, 64, gen_crc, gen_helper_crc32, tcg_constant_tl(1)) -TRANS(crc_w_h_w, 64, gen_crc, gen_helper_crc32, tcg_constant_tl(2)) -TRANS(crc_w_w_w, 64, gen_crc, gen_helper_crc32, tcg_constant_tl(4)) -TRANS(crc_w_d_w, 64, gen_crc, gen_helper_crc32, tcg_constant_tl(8)) -TRANS(crcc_w_b_w, 64, gen_crc, gen_helper_crc32c, tcg_constant_tl(1)) -TRANS(crcc_w_h_w, 64, gen_crc, gen_helper_crc32c, tcg_constant_tl(2)) -TRANS(crcc_w_w_w, 64, gen_crc, gen_helper_crc32c, tcg_constant_tl(4)) -TRANS(crcc_w_d_w, 64, gen_crc, gen_helper_crc32c, tcg_constant_tl(8)) diff --git a/target/loongarch/insn_trans/trans_farith.c.inc b/target/loongarch/insn_trans/trans_farith.c.inc deleted file mode 100644 index f4a0dea727..0000000000 --- a/target/loongarch/insn_trans/trans_farith.c.inc +++ /dev/null @@ -1,207 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-or-later */ -/* - * Copyright (c) 2021 Loongson Technology Corporation Limited - */ - -#ifndef CONFIG_USER_ONLY -#define CHECK_FPE do { \ - if ((ctx->base.tb->flags & HW_FLAGS_EUEN_FPE) == 0) { \ - generate_exception(ctx, EXCCODE_FPD); \ - return true; \ - } \ -} while (0) -#else -#define CHECK_FPE -#endif - -static bool gen_fff(DisasContext *ctx, arg_fff *a, - void (*func)(TCGv, TCGv_env, TCGv, TCGv)) -{ - TCGv dest = get_fpr(ctx, a->fd); - TCGv src1 = get_fpr(ctx, a->fj); - TCGv src2 = get_fpr(ctx, a->fk); - - CHECK_FPE; - - func(dest, tcg_env, src1, src2); - set_fpr(a->fd, dest); - - return true; -} - -static bool gen_ff(DisasContext *ctx, arg_ff *a, - void (*func)(TCGv, TCGv_env, TCGv)) -{ - TCGv dest = get_fpr(ctx, a->fd); - TCGv src = get_fpr(ctx, a->fj); - - CHECK_FPE; - - func(dest, tcg_env, src); - set_fpr(a->fd, dest); - - return true; -} - -static bool gen_muladd(DisasContext *ctx, arg_ffff *a, - void (*func)(TCGv, TCGv_env, TCGv, TCGv, TCGv, TCGv_i32), - int flag) -{ - TCGv_i32 tflag = tcg_constant_i32(flag); - TCGv dest = get_fpr(ctx, a->fd); - TCGv src1 = get_fpr(ctx, a->fj); - TCGv src2 = get_fpr(ctx, a->fk); - TCGv src3 = get_fpr(ctx, a->fa); - - CHECK_FPE; - - func(dest, tcg_env, src1, src2, src3, tflag); - set_fpr(a->fd, dest); - - return true; -} - -static bool trans_fcopysign_s(DisasContext *ctx, arg_fcopysign_s *a) -{ - TCGv dest = get_fpr(ctx, a->fd); - TCGv src1 = get_fpr(ctx, a->fk); - TCGv src2 = get_fpr(ctx, a->fj); - - if (!avail_FP_SP(ctx)) { - return false; - } - - CHECK_FPE; - - tcg_gen_deposit_i64(dest, src1, src2, 0, 31); - set_fpr(a->fd, dest); - - return true; -} - -static bool trans_fcopysign_d(DisasContext *ctx, arg_fcopysign_d *a) -{ - TCGv dest = get_fpr(ctx, a->fd); - TCGv src1 = get_fpr(ctx, a->fk); - TCGv src2 = get_fpr(ctx, a->fj); - - if (!avail_FP_DP(ctx)) { - return false; - } - - CHECK_FPE; - - tcg_gen_deposit_i64(dest, src1, src2, 0, 63); - set_fpr(a->fd, dest); - - return true; -} - -static bool trans_fabs_s(DisasContext *ctx, arg_fabs_s *a) -{ - TCGv dest = get_fpr(ctx, a->fd); - TCGv src = get_fpr(ctx, a->fj); - - if (!avail_FP_SP(ctx)) { - return false; - } - - CHECK_FPE; - - tcg_gen_andi_i64(dest, src, MAKE_64BIT_MASK(0, 31)); - gen_nanbox_s(dest, dest); - set_fpr(a->fd, dest); - - return true; -} - -static bool trans_fabs_d(DisasContext *ctx, arg_fabs_d *a) -{ - TCGv dest = get_fpr(ctx, a->fd); - TCGv src = get_fpr(ctx, a->fj); - - if (!avail_FP_DP(ctx)) { - return false; - } - - CHECK_FPE; - - tcg_gen_andi_i64(dest, src, MAKE_64BIT_MASK(0, 63)); - set_fpr(a->fd, dest); - - return true; -} - -static bool trans_fneg_s(DisasContext *ctx, arg_fneg_s *a) -{ - TCGv dest = get_fpr(ctx, a->fd); - TCGv src = get_fpr(ctx, a->fj); - - if (!avail_FP_SP(ctx)) { - return false; - } - - CHECK_FPE; - - tcg_gen_xori_i64(dest, src, 0x80000000); - gen_nanbox_s(dest, dest); - set_fpr(a->fd, dest); - - return true; -} - -static bool trans_fneg_d(DisasContext *ctx, arg_fneg_d *a) -{ - TCGv dest = get_fpr(ctx, a->fd); - TCGv src = get_fpr(ctx, a->fj); - - if (!avail_FP_DP(ctx)) { - return false; - } - - CHECK_FPE; - - tcg_gen_xori_i64(dest, src, 0x8000000000000000LL); - set_fpr(a->fd, dest); - - return true; -} - -TRANS(fadd_s, FP_SP, gen_fff, gen_helper_fadd_s) -TRANS(fadd_d, FP_DP, gen_fff, gen_helper_fadd_d) -TRANS(fsub_s, FP_SP, gen_fff, gen_helper_fsub_s) -TRANS(fsub_d, FP_DP, gen_fff, gen_helper_fsub_d) -TRANS(fmul_s, FP_SP, gen_fff, gen_helper_fmul_s) -TRANS(fmul_d, FP_DP, gen_fff, gen_helper_fmul_d) -TRANS(fdiv_s, FP_SP, gen_fff, gen_helper_fdiv_s) -TRANS(fdiv_d, FP_DP, gen_fff, gen_helper_fdiv_d) -TRANS(fmax_s, FP_SP, gen_fff, gen_helper_fmax_s) -TRANS(fmax_d, FP_DP, gen_fff, gen_helper_fmax_d) -TRANS(fmin_s, FP_SP, gen_fff, gen_helper_fmin_s) -TRANS(fmin_d, FP_DP, gen_fff, gen_helper_fmin_d) -TRANS(fmaxa_s, FP_SP, gen_fff, gen_helper_fmaxa_s) -TRANS(fmaxa_d, FP_DP, gen_fff, gen_helper_fmaxa_d) -TRANS(fmina_s, FP_SP, gen_fff, gen_helper_fmina_s) -TRANS(fmina_d, FP_DP, gen_fff, gen_helper_fmina_d) -TRANS(fscaleb_s, FP_SP, gen_fff, gen_helper_fscaleb_s) -TRANS(fscaleb_d, FP_DP, gen_fff, gen_helper_fscaleb_d) -TRANS(fsqrt_s, FP_SP, gen_ff, gen_helper_fsqrt_s) -TRANS(fsqrt_d, FP_DP, gen_ff, gen_helper_fsqrt_d) -TRANS(frecip_s, FP_SP, gen_ff, gen_helper_frecip_s) -TRANS(frecip_d, FP_DP, gen_ff, gen_helper_frecip_d) -TRANS(frsqrt_s, FP_SP, gen_ff, gen_helper_frsqrt_s) -TRANS(frsqrt_d, FP_DP, gen_ff, gen_helper_frsqrt_d) -TRANS(flogb_s, FP_SP, gen_ff, gen_helper_flogb_s) -TRANS(flogb_d, FP_DP, gen_ff, gen_helper_flogb_d) -TRANS(fclass_s, FP_SP, gen_ff, gen_helper_fclass_s) -TRANS(fclass_d, FP_DP, gen_ff, gen_helper_fclass_d) -TRANS(fmadd_s, FP_SP, gen_muladd, gen_helper_fmuladd_s, 0) -TRANS(fmadd_d, FP_DP, gen_muladd, gen_helper_fmuladd_d, 0) -TRANS(fmsub_s, FP_SP, gen_muladd, gen_helper_fmuladd_s, float_muladd_negate_c) -TRANS(fmsub_d, FP_DP, gen_muladd, gen_helper_fmuladd_d, float_muladd_negate_c) -TRANS(fnmadd_s, FP_SP, gen_muladd, gen_helper_fmuladd_s, float_muladd_negate_result) -TRANS(fnmadd_d, FP_DP, gen_muladd, gen_helper_fmuladd_d, float_muladd_negate_result) -TRANS(fnmsub_s, FP_SP, gen_muladd, gen_helper_fmuladd_s, - float_muladd_negate_c | float_muladd_negate_result) -TRANS(fnmsub_d, FP_DP, gen_muladd, gen_helper_fmuladd_d, - float_muladd_negate_c | float_muladd_negate_result) diff --git a/target/loongarch/insn_trans/trans_fcmp.c.inc b/target/loongarch/insn_trans/trans_fcmp.c.inc deleted file mode 100644 index 3babf69e4a..0000000000 --- a/target/loongarch/insn_trans/trans_fcmp.c.inc +++ /dev/null @@ -1,72 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-or-later */ -/* - * Copyright (c) 2021 Loongson Technology Corporation Limited - */ - -/* bit0(signaling/quiet) bit1(lt) bit2(eq) bit3(un) bit4(neq) */ -static uint32_t get_fcmp_flags(int cond) -{ - uint32_t flags = 0; - - if (cond & 0x1) { - flags |= FCMP_LT; - } - if (cond & 0x2) { - flags |= FCMP_EQ; - } - if (cond & 0x4) { - flags |= FCMP_UN; - } - if (cond & 0x8) { - flags |= FCMP_GT | FCMP_LT; - } - return flags; -} - -static bool trans_fcmp_cond_s(DisasContext *ctx, arg_fcmp_cond_s *a) -{ - TCGv var, src1, src2; - uint32_t flags; - void (*fn)(TCGv, TCGv_env, TCGv, TCGv, TCGv_i32); - - if (!avail_FP_SP(ctx)) { - return false; - } - - CHECK_FPE; - - var = tcg_temp_new(); - src1 = get_fpr(ctx, a->fj); - src2 = get_fpr(ctx, a->fk); - fn = (a->fcond & 1 ? gen_helper_fcmp_s_s : gen_helper_fcmp_c_s); - flags = get_fcmp_flags(a->fcond >> 1); - - fn(var, tcg_env, src1, src2, tcg_constant_i32(flags)); - - tcg_gen_st8_tl(var, tcg_env, offsetof(CPULoongArchState, cf[a->cd])); - return true; -} - -static bool trans_fcmp_cond_d(DisasContext *ctx, arg_fcmp_cond_d *a) -{ - TCGv var, src1, src2; - uint32_t flags; - void (*fn)(TCGv, TCGv_env, TCGv, TCGv, TCGv_i32); - - if (!avail_FP_DP(ctx)) { - return false; - } - - CHECK_FPE; - - var = tcg_temp_new(); - src1 = get_fpr(ctx, a->fj); - src2 = get_fpr(ctx, a->fk); - fn = (a->fcond & 1 ? gen_helper_fcmp_s_d : gen_helper_fcmp_c_d); - flags = get_fcmp_flags(a->fcond >> 1); - - fn(var, tcg_env, src1, src2, tcg_constant_i32(flags)); - - tcg_gen_st8_tl(var, tcg_env, offsetof(CPULoongArchState, cf[a->cd])); - return true; -} diff --git a/target/loongarch/insn_trans/trans_fcnv.c.inc b/target/loongarch/insn_trans/trans_fcnv.c.inc deleted file mode 100644 index 833c059d6d..0000000000 --- a/target/loongarch/insn_trans/trans_fcnv.c.inc +++ /dev/null @@ -1,33 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-or-later */ -/* - * Copyright (c) 2021 Loongson Technology Corporation Limited - */ - -TRANS(fcvt_s_d, FP_DP, gen_ff, gen_helper_fcvt_s_d) -TRANS(fcvt_d_s, FP_DP, gen_ff, gen_helper_fcvt_d_s) -TRANS(ftintrm_w_s, FP_SP, gen_ff, gen_helper_ftintrm_w_s) -TRANS(ftintrm_w_d, FP_DP, gen_ff, gen_helper_ftintrm_w_d) -TRANS(ftintrm_l_s, FP_SP, gen_ff, gen_helper_ftintrm_l_s) -TRANS(ftintrm_l_d, FP_DP, gen_ff, gen_helper_ftintrm_l_d) -TRANS(ftintrp_w_s, FP_SP, gen_ff, gen_helper_ftintrp_w_s) -TRANS(ftintrp_w_d, FP_DP, gen_ff, gen_helper_ftintrp_w_d) -TRANS(ftintrp_l_s, FP_SP, gen_ff, gen_helper_ftintrp_l_s) -TRANS(ftintrp_l_d, FP_DP, gen_ff, gen_helper_ftintrp_l_d) -TRANS(ftintrz_w_s, FP_SP, gen_ff, gen_helper_ftintrz_w_s) -TRANS(ftintrz_w_d, FP_DP, gen_ff, gen_helper_ftintrz_w_d) -TRANS(ftintrz_l_s, FP_SP, gen_ff, gen_helper_ftintrz_l_s) -TRANS(ftintrz_l_d, FP_DP, gen_ff, gen_helper_ftintrz_l_d) -TRANS(ftintrne_w_s, FP_SP, gen_ff, gen_helper_ftintrne_w_s) -TRANS(ftintrne_w_d, FP_DP, gen_ff, gen_helper_ftintrne_w_d) -TRANS(ftintrne_l_s, FP_SP, gen_ff, gen_helper_ftintrne_l_s) -TRANS(ftintrne_l_d, FP_DP, gen_ff, gen_helper_ftintrne_l_d) -TRANS(ftint_w_s, FP_SP, gen_ff, gen_helper_ftint_w_s) -TRANS(ftint_w_d, FP_DP, gen_ff, gen_helper_ftint_w_d) -TRANS(ftint_l_s, FP_SP, gen_ff, gen_helper_ftint_l_s) -TRANS(ftint_l_d, FP_DP, gen_ff, gen_helper_ftint_l_d) -TRANS(ffint_s_w, FP_SP, gen_ff, gen_helper_ffint_s_w) -TRANS(ffint_s_l, FP_SP, gen_ff, gen_helper_ffint_s_l) -TRANS(ffint_d_w, FP_DP, gen_ff, gen_helper_ffint_d_w) -TRANS(ffint_d_l, FP_DP, gen_ff, gen_helper_ffint_d_l) -TRANS(frint_s, FP_SP, gen_ff, gen_helper_frint_s) -TRANS(frint_d, FP_DP, gen_ff, gen_helper_frint_d) diff --git a/target/loongarch/insn_trans/trans_fmemory.c.inc b/target/loongarch/insn_trans/trans_fmemory.c.inc deleted file mode 100644 index 13452bc7e5..0000000000 --- a/target/loongarch/insn_trans/trans_fmemory.c.inc +++ /dev/null @@ -1,158 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-or-later */ -/* - * Copyright (c) 2021 Loongson Technology Corporation Limited - */ - -static void maybe_nanbox_load(TCGv freg, MemOp mop) -{ - if ((mop & MO_SIZE) == MO_32) { - gen_nanbox_s(freg, freg); - } -} - -static bool gen_fload_i(DisasContext *ctx, arg_fr_i *a, MemOp mop) -{ - TCGv addr = gpr_src(ctx, a->rj, EXT_NONE); - TCGv dest = get_fpr(ctx, a->fd); - - CHECK_FPE; - - addr = make_address_i(ctx, addr, a->imm); - - tcg_gen_qemu_ld_tl(dest, addr, ctx->mem_idx, mop); - maybe_nanbox_load(dest, mop); - set_fpr(a->fd, dest); - - return true; -} - -static bool gen_fstore_i(DisasContext *ctx, arg_fr_i *a, MemOp mop) -{ - TCGv addr = gpr_src(ctx, a->rj, EXT_NONE); - TCGv src = get_fpr(ctx, a->fd); - - CHECK_FPE; - - addr = make_address_i(ctx, addr, a->imm); - - tcg_gen_qemu_st_tl(src, addr, ctx->mem_idx, mop); - - return true; -} - -static bool gen_floadx(DisasContext *ctx, arg_frr *a, MemOp mop) -{ - TCGv src1 = gpr_src(ctx, a->rj, EXT_NONE); - TCGv src2 = gpr_src(ctx, a->rk, EXT_NONE); - TCGv dest = get_fpr(ctx, a->fd); - TCGv addr; - - CHECK_FPE; - - addr = make_address_x(ctx, src1, src2); - tcg_gen_qemu_ld_tl(dest, addr, ctx->mem_idx, mop); - maybe_nanbox_load(dest, mop); - set_fpr(a->fd, dest); - - return true; -} - -static bool gen_fstorex(DisasContext *ctx, arg_frr *a, MemOp mop) -{ - TCGv src1 = gpr_src(ctx, a->rj, EXT_NONE); - TCGv src2 = gpr_src(ctx, a->rk, EXT_NONE); - TCGv src3 = get_fpr(ctx, a->fd); - TCGv addr; - - CHECK_FPE; - - addr = make_address_x(ctx, src1, src2); - tcg_gen_qemu_st_tl(src3, addr, ctx->mem_idx, mop); - - return true; -} - -static bool gen_fload_gt(DisasContext *ctx, arg_frr *a, MemOp mop) -{ - TCGv src1 = gpr_src(ctx, a->rj, EXT_NONE); - TCGv src2 = gpr_src(ctx, a->rk, EXT_NONE); - TCGv dest = get_fpr(ctx, a->fd); - TCGv addr; - - CHECK_FPE; - - gen_helper_asrtgt_d(tcg_env, src1, src2); - addr = make_address_x(ctx, src1, src2); - tcg_gen_qemu_ld_tl(dest, addr, ctx->mem_idx, mop); - maybe_nanbox_load(dest, mop); - set_fpr(a->fd, dest); - - return true; -} - -static bool gen_fstore_gt(DisasContext *ctx, arg_frr *a, MemOp mop) -{ - TCGv src1 = gpr_src(ctx, a->rj, EXT_NONE); - TCGv src2 = gpr_src(ctx, a->rk, EXT_NONE); - TCGv src3 = get_fpr(ctx, a->fd); - TCGv addr; - - CHECK_FPE; - - gen_helper_asrtgt_d(tcg_env, src1, src2); - addr = make_address_x(ctx, src1, src2); - tcg_gen_qemu_st_tl(src3, addr, ctx->mem_idx, mop); - - return true; -} - -static bool gen_fload_le(DisasContext *ctx, arg_frr *a, MemOp mop) -{ - TCGv src1 = gpr_src(ctx, a->rj, EXT_NONE); - TCGv src2 = gpr_src(ctx, a->rk, EXT_NONE); - TCGv dest = get_fpr(ctx, a->fd); - TCGv addr; - - CHECK_FPE; - - gen_helper_asrtle_d(tcg_env, src1, src2); - addr = make_address_x(ctx, src1, src2); - tcg_gen_qemu_ld_tl(dest, addr, ctx->mem_idx, mop); - maybe_nanbox_load(dest, mop); - set_fpr(a->fd, dest); - - return true; -} - -static bool gen_fstore_le(DisasContext *ctx, arg_frr *a, MemOp mop) -{ - TCGv src1 = gpr_src(ctx, a->rj, EXT_NONE); - TCGv src2 = gpr_src(ctx, a->rk, EXT_NONE); - TCGv src3 = get_fpr(ctx, a->fd); - TCGv addr; - - CHECK_FPE; - - gen_helper_asrtle_d(tcg_env, src1, src2); - addr = make_address_x(ctx, src1, src2); - tcg_gen_qemu_st_tl(src3, addr, ctx->mem_idx, mop); - - return true; -} - -TRANS(fld_s, FP_SP, gen_fload_i, MO_TEUL) -TRANS(fst_s, FP_SP, gen_fstore_i, MO_TEUL) -TRANS(fld_d, FP_DP, gen_fload_i, MO_TEUQ) -TRANS(fst_d, FP_DP, gen_fstore_i, MO_TEUQ) -TRANS(fldx_s, FP_SP, gen_floadx, MO_TEUL) -TRANS(fldx_d, FP_DP, gen_floadx, MO_TEUQ) -TRANS(fstx_s, FP_SP, gen_fstorex, MO_TEUL) -TRANS(fstx_d, FP_DP, gen_fstorex, MO_TEUQ) -TRANS(fldgt_s, FP_SP, gen_fload_gt, MO_TEUL) -TRANS(fldgt_d, FP_DP, gen_fload_gt, MO_TEUQ) -TRANS(fldle_s, FP_SP, gen_fload_le, MO_TEUL) -TRANS(fldle_d, FP_DP, gen_fload_le, MO_TEUQ) -TRANS(fstgt_s, FP_SP, gen_fstore_gt, MO_TEUL) -TRANS(fstgt_d, FP_DP, gen_fstore_gt, MO_TEUQ) -TRANS(fstle_s, FP_SP, gen_fstore_le, MO_TEUL) -TRANS(fstle_d, FP_DP, gen_fstore_le, MO_TEUQ) diff --git a/target/loongarch/insn_trans/trans_fmov.c.inc b/target/loongarch/insn_trans/trans_fmov.c.inc deleted file mode 100644 index 5cbd9d3f34..0000000000 --- a/target/loongarch/insn_trans/trans_fmov.c.inc +++ /dev/null @@ -1,224 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-or-later */ -/* - * Copyright (c) 2021 Loongson Technology Corporation Limited - */ - -static const uint32_t fcsr_mask[4] = { - UINT32_MAX, FCSR0_M1, FCSR0_M2, FCSR0_M3 -}; - -static bool trans_fsel(DisasContext *ctx, arg_fsel *a) -{ - TCGv zero = tcg_constant_tl(0); - TCGv dest = get_fpr(ctx, a->fd); - TCGv src1 = get_fpr(ctx, a->fj); - TCGv src2 = get_fpr(ctx, a->fk); - TCGv cond; - - if (!avail_FP(ctx)) { - return false; - } - - CHECK_FPE; - - cond = tcg_temp_new(); - tcg_gen_ld8u_tl(cond, tcg_env, offsetof(CPULoongArchState, cf[a->ca])); - tcg_gen_movcond_tl(TCG_COND_EQ, dest, cond, zero, src1, src2); - set_fpr(a->fd, dest); - - return true; -} - -static bool gen_f2f(DisasContext *ctx, arg_ff *a, - void (*func)(TCGv, TCGv), bool nanbox) -{ - TCGv dest = get_fpr(ctx, a->fd); - TCGv src = get_fpr(ctx, a->fj); - - CHECK_FPE; - - func(dest, src); - if (nanbox) { - gen_nanbox_s(dest, dest); - } - set_fpr(a->fd, dest); - - return true; -} - -static bool gen_r2f(DisasContext *ctx, arg_fr *a, - void (*func)(TCGv, TCGv)) -{ - TCGv src = gpr_src(ctx, a->rj, EXT_NONE); - TCGv dest = get_fpr(ctx, a->fd); - - if (!avail_FP(ctx)) { - return false; - } - - CHECK_FPE; - - func(dest, src); - set_fpr(a->fd, dest); - - return true; -} - -static bool gen_f2r(DisasContext *ctx, arg_rf *a, - void (*func)(TCGv, TCGv)) -{ - TCGv dest = gpr_dst(ctx, a->rd, EXT_NONE); - TCGv src = get_fpr(ctx, a->fj); - - if (!avail_FP(ctx)) { - return false; - } - - CHECK_FPE; - - func(dest, src); - gen_set_gpr(a->rd, dest, EXT_NONE); - - return true; -} - -static bool trans_movgr2fcsr(DisasContext *ctx, arg_movgr2fcsr *a) -{ - uint32_t mask = fcsr_mask[a->fcsrd]; - TCGv Rj = gpr_src(ctx, a->rj, EXT_NONE); - - if (!avail_FP(ctx)) { - return false; - } - - CHECK_FPE; - - if (mask == UINT32_MAX) { - tcg_gen_st32_i64(Rj, tcg_env, offsetof(CPULoongArchState, fcsr0)); - } else { - TCGv_i32 fcsr0 = tcg_temp_new_i32(); - TCGv_i32 temp = tcg_temp_new_i32(); - - tcg_gen_ld_i32(fcsr0, tcg_env, offsetof(CPULoongArchState, fcsr0)); - tcg_gen_extrl_i64_i32(temp, Rj); - tcg_gen_andi_i32(temp, temp, mask); - tcg_gen_andi_i32(fcsr0, fcsr0, ~mask); - tcg_gen_or_i32(fcsr0, fcsr0, temp); - tcg_gen_st_i32(fcsr0, tcg_env, offsetof(CPULoongArchState, fcsr0)); - } - - /* - * Install the new rounding mode to fpu_status, if changed. - * Note that FCSR3 is exactly the rounding mode field. - */ - if (mask & FCSR0_M3) { - gen_helper_set_rounding_mode(tcg_env); - } - return true; -} - -static bool trans_movfcsr2gr(DisasContext *ctx, arg_movfcsr2gr *a) -{ - TCGv dest = gpr_dst(ctx, a->rd, EXT_NONE); - - if (!avail_FP(ctx)) { - return false; - } - - CHECK_FPE; - - tcg_gen_ld32u_i64(dest, tcg_env, offsetof(CPULoongArchState, fcsr0)); - tcg_gen_andi_i64(dest, dest, fcsr_mask[a->fcsrs]); - gen_set_gpr(a->rd, dest, EXT_NONE); - - return true; -} - -static void gen_movgr2fr_w(TCGv dest, TCGv src) -{ - tcg_gen_deposit_i64(dest, dest, src, 0, 32); -} - -static void gen_movgr2frh_w(TCGv dest, TCGv src) -{ - tcg_gen_deposit_i64(dest, dest, src, 32, 32); -} - -static void gen_movfrh2gr_s(TCGv dest, TCGv src) -{ - tcg_gen_sextract_tl(dest, src, 32, 32); -} - -static bool trans_movfr2cf(DisasContext *ctx, arg_movfr2cf *a) -{ - TCGv t0; - TCGv src = get_fpr(ctx, a->fj); - - if (!avail_FP(ctx)) { - return false; - } - - CHECK_FPE; - - t0 = tcg_temp_new(); - tcg_gen_andi_tl(t0, src, 0x1); - tcg_gen_st8_tl(t0, tcg_env, offsetof(CPULoongArchState, cf[a->cd & 0x7])); - - return true; -} - -static bool trans_movcf2fr(DisasContext *ctx, arg_movcf2fr *a) -{ - TCGv dest = get_fpr(ctx, a->fd); - - if (!avail_FP(ctx)) { - return false; - } - - CHECK_FPE; - - tcg_gen_ld8u_tl(dest, tcg_env, - offsetof(CPULoongArchState, cf[a->cj & 0x7])); - set_fpr(a->fd, dest); - - return true; -} - -static bool trans_movgr2cf(DisasContext *ctx, arg_movgr2cf *a) -{ - TCGv t0; - - if (!avail_FP(ctx)) { - return false; - } - - CHECK_FPE; - - t0 = tcg_temp_new(); - tcg_gen_andi_tl(t0, gpr_src(ctx, a->rj, EXT_NONE), 0x1); - tcg_gen_st8_tl(t0, tcg_env, offsetof(CPULoongArchState, cf[a->cd & 0x7])); - - return true; -} - -static bool trans_movcf2gr(DisasContext *ctx, arg_movcf2gr *a) -{ - if (!avail_FP(ctx)) { - return false; - } - - CHECK_FPE; - - tcg_gen_ld8u_tl(gpr_dst(ctx, a->rd, EXT_NONE), tcg_env, - offsetof(CPULoongArchState, cf[a->cj & 0x7])); - return true; -} - -TRANS(fmov_s, FP_SP, gen_f2f, tcg_gen_mov_tl, true) -TRANS(fmov_d, FP_DP, gen_f2f, tcg_gen_mov_tl, false) -TRANS(movgr2fr_w, FP_SP, gen_r2f, gen_movgr2fr_w) -TRANS(movgr2fr_d, 64, gen_r2f, tcg_gen_mov_tl) -TRANS(movgr2frh_w, FP_DP, gen_r2f, gen_movgr2frh_w) -TRANS(movfr2gr_s, FP_SP, gen_f2r, tcg_gen_ext32s_tl) -TRANS(movfr2gr_d, 64, gen_f2r, tcg_gen_mov_tl) -TRANS(movfrh2gr_s, FP_DP, gen_f2r, gen_movfrh2gr_s) diff --git a/target/loongarch/insn_trans/trans_memory.c.inc b/target/loongarch/insn_trans/trans_memory.c.inc deleted file mode 100644 index 42f4e74012..0000000000 --- a/target/loongarch/insn_trans/trans_memory.c.inc +++ /dev/null @@ -1,194 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-or-later */ -/* - * Copyright (c) 2021 Loongson Technology Corporation Limited - */ - -static bool gen_load(DisasContext *ctx, arg_rr_i *a, MemOp mop) -{ - TCGv dest = gpr_dst(ctx, a->rd, EXT_NONE); - TCGv addr = gpr_src(ctx, a->rj, EXT_NONE); - - addr = make_address_i(ctx, addr, a->imm); - - tcg_gen_qemu_ld_tl(dest, addr, ctx->mem_idx, mop); - gen_set_gpr(a->rd, dest, EXT_NONE); - return true; -} - -static bool gen_store(DisasContext *ctx, arg_rr_i *a, MemOp mop) -{ - TCGv data = gpr_src(ctx, a->rd, EXT_NONE); - TCGv addr = gpr_src(ctx, a->rj, EXT_NONE); - - addr = make_address_i(ctx, addr, a->imm); - - tcg_gen_qemu_st_tl(data, addr, ctx->mem_idx, mop); - return true; -} - -static bool gen_loadx(DisasContext *ctx, arg_rrr *a, MemOp mop) -{ - TCGv dest = gpr_dst(ctx, a->rd, EXT_NONE); - TCGv src1 = gpr_src(ctx, a->rj, EXT_NONE); - TCGv src2 = gpr_src(ctx, a->rk, EXT_NONE); - TCGv addr = make_address_x(ctx, src1, src2); - - tcg_gen_qemu_ld_tl(dest, addr, ctx->mem_idx, mop); - gen_set_gpr(a->rd, dest, EXT_NONE); - - return true; -} - -static bool gen_storex(DisasContext *ctx, arg_rrr *a, MemOp mop) -{ - TCGv data = gpr_src(ctx, a->rd, EXT_NONE); - TCGv src1 = gpr_src(ctx, a->rj, EXT_NONE); - TCGv src2 = gpr_src(ctx, a->rk, EXT_NONE); - TCGv addr = make_address_x(ctx, src1, src2); - - tcg_gen_qemu_st_tl(data, addr, ctx->mem_idx, mop); - - return true; -} - -static bool gen_load_gt(DisasContext *ctx, arg_rrr *a, MemOp mop) -{ - TCGv dest = gpr_dst(ctx, a->rd, EXT_NONE); - TCGv src1 = gpr_src(ctx, a->rj, EXT_NONE); - TCGv src2 = gpr_src(ctx, a->rk, EXT_NONE); - - gen_helper_asrtgt_d(tcg_env, src1, src2); - src1 = make_address_i(ctx, src1, 0); - tcg_gen_qemu_ld_tl(dest, src1, ctx->mem_idx, mop); - gen_set_gpr(a->rd, dest, EXT_NONE); - - return true; -} - -static bool gen_load_le(DisasContext *ctx, arg_rrr *a, MemOp mop) -{ - TCGv dest = gpr_dst(ctx, a->rd, EXT_NONE); - TCGv src1 = gpr_src(ctx, a->rj, EXT_NONE); - TCGv src2 = gpr_src(ctx, a->rk, EXT_NONE); - - gen_helper_asrtle_d(tcg_env, src1, src2); - src1 = make_address_i(ctx, src1, 0); - tcg_gen_qemu_ld_tl(dest, src1, ctx->mem_idx, mop); - gen_set_gpr(a->rd, dest, EXT_NONE); - - return true; -} - -static bool gen_store_gt(DisasContext *ctx, arg_rrr *a, MemOp mop) -{ - TCGv data = gpr_src(ctx, a->rd, EXT_NONE); - TCGv src1 = gpr_src(ctx, a->rj, EXT_NONE); - TCGv src2 = gpr_src(ctx, a->rk, EXT_NONE); - - gen_helper_asrtgt_d(tcg_env, src1, src2); - src1 = make_address_i(ctx, src1, 0); - tcg_gen_qemu_st_tl(data, src1, ctx->mem_idx, mop); - - return true; -} - -static bool gen_store_le(DisasContext *ctx, arg_rrr *a, MemOp mop) -{ - TCGv data = gpr_src(ctx, a->rd, EXT_NONE); - TCGv src1 = gpr_src(ctx, a->rj, EXT_NONE); - TCGv src2 = gpr_src(ctx, a->rk, EXT_NONE); - - gen_helper_asrtle_d(tcg_env, src1, src2); - src1 = make_address_i(ctx, src1, 0); - tcg_gen_qemu_st_tl(data, src1, ctx->mem_idx, mop); - - return true; -} - -static bool trans_preld(DisasContext *ctx, arg_preld *a) -{ - return true; -} - -static bool trans_preldx(DisasContext *ctx, arg_preldx * a) -{ - return true; -} - -static bool trans_dbar(DisasContext *ctx, arg_dbar * a) -{ - tcg_gen_mb(TCG_BAR_SC | TCG_MO_ALL); - return true; -} - -static bool trans_ibar(DisasContext *ctx, arg_ibar *a) -{ - ctx->base.is_jmp = DISAS_STOP; - return true; -} - -static bool gen_ldptr(DisasContext *ctx, arg_rr_i *a, MemOp mop) -{ - TCGv dest = gpr_dst(ctx, a->rd, EXT_NONE); - TCGv addr = gpr_src(ctx, a->rj, EXT_NONE); - - addr = make_address_i(ctx, addr, a->imm); - - tcg_gen_qemu_ld_tl(dest, addr, ctx->mem_idx, mop); - gen_set_gpr(a->rd, dest, EXT_NONE); - return true; -} - -static bool gen_stptr(DisasContext *ctx, arg_rr_i *a, MemOp mop) -{ - TCGv data = gpr_src(ctx, a->rd, EXT_NONE); - TCGv addr = gpr_src(ctx, a->rj, EXT_NONE); - - addr = make_address_i(ctx, addr, a->imm); - - tcg_gen_qemu_st_tl(data, addr, ctx->mem_idx, mop); - return true; -} - -TRANS(ld_b, ALL, gen_load, MO_SB) -TRANS(ld_h, ALL, gen_load, MO_TESW) -TRANS(ld_w, ALL, gen_load, MO_TESL) -TRANS(ld_d, 64, gen_load, MO_TEUQ) -TRANS(st_b, ALL, gen_store, MO_UB) -TRANS(st_h, ALL, gen_store, MO_TEUW) -TRANS(st_w, ALL, gen_store, MO_TEUL) -TRANS(st_d, 64, gen_store, MO_TEUQ) -TRANS(ld_bu, ALL, gen_load, MO_UB) -TRANS(ld_hu, ALL, gen_load, MO_TEUW) -TRANS(ld_wu, 64, gen_load, MO_TEUL) -TRANS(ldx_b, 64, gen_loadx, MO_SB) -TRANS(ldx_h, 64, gen_loadx, MO_TESW) -TRANS(ldx_w, 64, gen_loadx, MO_TESL) -TRANS(ldx_d, 64, gen_loadx, MO_TEUQ) -TRANS(stx_b, 64, gen_storex, MO_UB) -TRANS(stx_h, 64, gen_storex, MO_TEUW) -TRANS(stx_w, 64, gen_storex, MO_TEUL) -TRANS(stx_d, 64, gen_storex, MO_TEUQ) -TRANS(ldx_bu, 64, gen_loadx, MO_UB) -TRANS(ldx_hu, 64, gen_loadx, MO_TEUW) -TRANS(ldx_wu, 64, gen_loadx, MO_TEUL) -TRANS(ldptr_w, 64, gen_ldptr, MO_TESL) -TRANS(stptr_w, 64, gen_stptr, MO_TEUL) -TRANS(ldptr_d, 64, gen_ldptr, MO_TEUQ) -TRANS(stptr_d, 64, gen_stptr, MO_TEUQ) -TRANS(ldgt_b, 64, gen_load_gt, MO_SB) -TRANS(ldgt_h, 64, gen_load_gt, MO_TESW) -TRANS(ldgt_w, 64, gen_load_gt, MO_TESL) -TRANS(ldgt_d, 64, gen_load_gt, MO_TEUQ) -TRANS(ldle_b, 64, gen_load_le, MO_SB) -TRANS(ldle_h, 64, gen_load_le, MO_TESW) -TRANS(ldle_w, 64, gen_load_le, MO_TESL) -TRANS(ldle_d, 64, gen_load_le, MO_TEUQ) -TRANS(stgt_b, 64, gen_store_gt, MO_UB) -TRANS(stgt_h, 64, gen_store_gt, MO_TEUW) -TRANS(stgt_w, 64, gen_store_gt, MO_TEUL) -TRANS(stgt_d, 64, gen_store_gt, MO_TEUQ) -TRANS(stle_b, 64, gen_store_le, MO_UB) -TRANS(stle_h, 64, gen_store_le, MO_TEUW) -TRANS(stle_w, 64, gen_store_le, MO_TEUL) -TRANS(stle_d, 64, gen_store_le, MO_TEUQ) diff --git a/target/loongarch/insn_trans/trans_privileged.c.inc b/target/loongarch/insn_trans/trans_privileged.c.inc deleted file mode 100644 index 01d457212b..0000000000 --- a/target/loongarch/insn_trans/trans_privileged.c.inc +++ /dev/null @@ -1,498 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-or-later */ -/* - * Copyright (c) 2021 Loongson Technology Corporation Limited - * - * LoongArch translation routines for the privileged instructions. - */ - -#include "cpu-csr.h" - -#ifdef CONFIG_USER_ONLY - -#define GEN_FALSE_TRANS(name) \ -static bool trans_##name(DisasContext *ctx, arg_##name * a) \ -{ \ - return false; \ -} - -GEN_FALSE_TRANS(csrrd) -GEN_FALSE_TRANS(csrwr) -GEN_FALSE_TRANS(csrxchg) -GEN_FALSE_TRANS(iocsrrd_b) -GEN_FALSE_TRANS(iocsrrd_h) -GEN_FALSE_TRANS(iocsrrd_w) -GEN_FALSE_TRANS(iocsrrd_d) -GEN_FALSE_TRANS(iocsrwr_b) -GEN_FALSE_TRANS(iocsrwr_h) -GEN_FALSE_TRANS(iocsrwr_w) -GEN_FALSE_TRANS(iocsrwr_d) -GEN_FALSE_TRANS(tlbsrch) -GEN_FALSE_TRANS(tlbrd) -GEN_FALSE_TRANS(tlbwr) -GEN_FALSE_TRANS(tlbfill) -GEN_FALSE_TRANS(tlbclr) -GEN_FALSE_TRANS(tlbflush) -GEN_FALSE_TRANS(invtlb) -GEN_FALSE_TRANS(cacop) -GEN_FALSE_TRANS(ldpte) -GEN_FALSE_TRANS(lddir) -GEN_FALSE_TRANS(ertn) -GEN_FALSE_TRANS(dbcl) -GEN_FALSE_TRANS(idle) - -#else - -typedef void (*GenCSRRead)(TCGv dest, TCGv_ptr env); -typedef void (*GenCSRWrite)(TCGv dest, TCGv_ptr env, TCGv src); - -typedef struct { - int offset; - int flags; - GenCSRRead readfn; - GenCSRWrite writefn; -} CSRInfo; - -enum { - CSRFL_READONLY = (1 << 0), - CSRFL_EXITTB = (1 << 1), - CSRFL_IO = (1 << 2), -}; - -#define CSR_OFF_FUNCS(NAME, FL, RD, WR) \ - [LOONGARCH_CSR_##NAME] = { \ - .offset = offsetof(CPULoongArchState, CSR_##NAME), \ - .flags = FL, .readfn = RD, .writefn = WR \ - } - -#define CSR_OFF_ARRAY(NAME, N) \ - [LOONGARCH_CSR_##NAME(N)] = { \ - .offset = offsetof(CPULoongArchState, CSR_##NAME[N]), \ - .flags = 0, .readfn = NULL, .writefn = NULL \ - } - -#define CSR_OFF_FLAGS(NAME, FL) \ - CSR_OFF_FUNCS(NAME, FL, NULL, NULL) - -#define CSR_OFF(NAME) \ - CSR_OFF_FLAGS(NAME, 0) - -static const CSRInfo csr_info[] = { - CSR_OFF_FLAGS(CRMD, CSRFL_EXITTB), - CSR_OFF(PRMD), - CSR_OFF_FLAGS(EUEN, CSRFL_EXITTB), - CSR_OFF_FLAGS(MISC, CSRFL_READONLY), - CSR_OFF(ECFG), - CSR_OFF_FUNCS(ESTAT, CSRFL_EXITTB, NULL, gen_helper_csrwr_estat), - CSR_OFF(ERA), - CSR_OFF(BADV), - CSR_OFF_FLAGS(BADI, CSRFL_READONLY), - CSR_OFF(EENTRY), - CSR_OFF(TLBIDX), - CSR_OFF(TLBEHI), - CSR_OFF(TLBELO0), - CSR_OFF(TLBELO1), - CSR_OFF_FUNCS(ASID, CSRFL_EXITTB, NULL, gen_helper_csrwr_asid), - CSR_OFF(PGDL), - CSR_OFF(PGDH), - CSR_OFF_FUNCS(PGD, CSRFL_READONLY, gen_helper_csrrd_pgd, NULL), - CSR_OFF(PWCL), - CSR_OFF(PWCH), - CSR_OFF(STLBPS), - CSR_OFF(RVACFG), - CSR_OFF_FUNCS(CPUID, CSRFL_READONLY, gen_helper_csrrd_cpuid, NULL), - CSR_OFF_FLAGS(PRCFG1, CSRFL_READONLY), - CSR_OFF_FLAGS(PRCFG2, CSRFL_READONLY), - CSR_OFF_FLAGS(PRCFG3, CSRFL_READONLY), - CSR_OFF_ARRAY(SAVE, 0), - CSR_OFF_ARRAY(SAVE, 1), - CSR_OFF_ARRAY(SAVE, 2), - CSR_OFF_ARRAY(SAVE, 3), - CSR_OFF_ARRAY(SAVE, 4), - CSR_OFF_ARRAY(SAVE, 5), - CSR_OFF_ARRAY(SAVE, 6), - CSR_OFF_ARRAY(SAVE, 7), - CSR_OFF_ARRAY(SAVE, 8), - CSR_OFF_ARRAY(SAVE, 9), - CSR_OFF_ARRAY(SAVE, 10), - CSR_OFF_ARRAY(SAVE, 11), - CSR_OFF_ARRAY(SAVE, 12), - CSR_OFF_ARRAY(SAVE, 13), - CSR_OFF_ARRAY(SAVE, 14), - CSR_OFF_ARRAY(SAVE, 15), - CSR_OFF(TID), - CSR_OFF_FUNCS(TCFG, CSRFL_IO, NULL, gen_helper_csrwr_tcfg), - CSR_OFF_FUNCS(TVAL, CSRFL_READONLY | CSRFL_IO, gen_helper_csrrd_tval, NULL), - CSR_OFF(CNTC), - CSR_OFF_FUNCS(TICLR, CSRFL_IO, NULL, gen_helper_csrwr_ticlr), - CSR_OFF(LLBCTL), - CSR_OFF(IMPCTL1), - CSR_OFF(IMPCTL2), - CSR_OFF(TLBRENTRY), - CSR_OFF(TLBRBADV), - CSR_OFF(TLBRERA), - CSR_OFF(TLBRSAVE), - CSR_OFF(TLBRELO0), - CSR_OFF(TLBRELO1), - CSR_OFF(TLBREHI), - CSR_OFF(TLBRPRMD), - CSR_OFF(MERRCTL), - CSR_OFF(MERRINFO1), - CSR_OFF(MERRINFO2), - CSR_OFF(MERRENTRY), - CSR_OFF(MERRERA), - CSR_OFF(MERRSAVE), - CSR_OFF(CTAG), - CSR_OFF_ARRAY(DMW, 0), - CSR_OFF_ARRAY(DMW, 1), - CSR_OFF_ARRAY(DMW, 2), - CSR_OFF_ARRAY(DMW, 3), - CSR_OFF(DBG), - CSR_OFF(DERA), - CSR_OFF(DSAVE), -}; - -static bool check_plv(DisasContext *ctx) -{ - if (ctx->plv == MMU_PLV_USER) { - generate_exception(ctx, EXCCODE_IPE); - return true; - } - return false; -} - -static const CSRInfo *get_csr(unsigned csr_num) -{ - const CSRInfo *csr; - - if (csr_num >= ARRAY_SIZE(csr_info)) { - return NULL; - } - csr = &csr_info[csr_num]; - if (csr->offset == 0) { - return NULL; - } - return csr; -} - -static bool check_csr_flags(DisasContext *ctx, const CSRInfo *csr, bool write) -{ - if ((csr->flags & CSRFL_READONLY) && write) { - return false; - } - if ((csr->flags & CSRFL_IO) && translator_io_start(&ctx->base)) { - ctx->base.is_jmp = DISAS_EXIT_UPDATE; - } else if ((csr->flags & CSRFL_EXITTB) && write) { - ctx->base.is_jmp = DISAS_EXIT_UPDATE; - } - return true; -} - -static bool trans_csrrd(DisasContext *ctx, arg_csrrd *a) -{ - TCGv dest; - const CSRInfo *csr; - - if (check_plv(ctx)) { - return false; - } - csr = get_csr(a->csr); - if (csr == NULL) { - /* CSR is undefined: read as 0. */ - dest = tcg_constant_tl(0); - } else { - check_csr_flags(ctx, csr, false); - dest = gpr_dst(ctx, a->rd, EXT_NONE); - if (csr->readfn) { - csr->readfn(dest, tcg_env); - } else { - tcg_gen_ld_tl(dest, tcg_env, csr->offset); - } - } - gen_set_gpr(a->rd, dest, EXT_NONE); - return true; -} - -static bool trans_csrwr(DisasContext *ctx, arg_csrwr *a) -{ - TCGv dest, src1; - const CSRInfo *csr; - - if (check_plv(ctx)) { - return false; - } - csr = get_csr(a->csr); - if (csr == NULL) { - /* CSR is undefined: write ignored, read old_value as 0. */ - gen_set_gpr(a->rd, tcg_constant_tl(0), EXT_NONE); - return true; - } - if (!check_csr_flags(ctx, csr, true)) { - /* CSR is readonly: trap. */ - return false; - } - src1 = gpr_src(ctx, a->rd, EXT_NONE); - if (csr->writefn) { - dest = gpr_dst(ctx, a->rd, EXT_NONE); - csr->writefn(dest, tcg_env, src1); - } else { - dest = tcg_temp_new(); - tcg_gen_ld_tl(dest, tcg_env, csr->offset); - tcg_gen_st_tl(src1, tcg_env, csr->offset); - } - gen_set_gpr(a->rd, dest, EXT_NONE); - return true; -} - -static bool trans_csrxchg(DisasContext *ctx, arg_csrxchg *a) -{ - TCGv src1, mask, oldv, newv, temp; - const CSRInfo *csr; - - if (check_plv(ctx)) { - return false; - } - csr = get_csr(a->csr); - if (csr == NULL) { - /* CSR is undefined: write ignored, read old_value as 0. */ - gen_set_gpr(a->rd, tcg_constant_tl(0), EXT_NONE); - return true; - } - - if (!check_csr_flags(ctx, csr, true)) { - /* CSR is readonly: trap. */ - return false; - } - - /* So far only readonly csrs have readfn. */ - assert(csr->readfn == NULL); - - src1 = gpr_src(ctx, a->rd, EXT_NONE); - mask = gpr_src(ctx, a->rj, EXT_NONE); - oldv = tcg_temp_new(); - newv = tcg_temp_new(); - temp = tcg_temp_new(); - - tcg_gen_ld_tl(oldv, tcg_env, csr->offset); - tcg_gen_and_tl(newv, src1, mask); - tcg_gen_andc_tl(temp, oldv, mask); - tcg_gen_or_tl(newv, newv, temp); - - if (csr->writefn) { - csr->writefn(oldv, tcg_env, newv); - } else { - tcg_gen_st_tl(newv, tcg_env, csr->offset); - } - gen_set_gpr(a->rd, oldv, EXT_NONE); - return true; -} - -static bool gen_iocsrrd(DisasContext *ctx, arg_rr *a, - void (*func)(TCGv, TCGv_ptr, TCGv)) -{ - TCGv dest = gpr_dst(ctx, a->rd, EXT_NONE); - TCGv src1 = gpr_src(ctx, a->rj, EXT_NONE); - - if (check_plv(ctx)) { - return false; - } - func(dest, tcg_env, src1); - return true; -} - -static bool gen_iocsrwr(DisasContext *ctx, arg_rr *a, - void (*func)(TCGv_ptr, TCGv, TCGv)) -{ - TCGv val = gpr_src(ctx, a->rd, EXT_NONE); - TCGv addr = gpr_src(ctx, a->rj, EXT_NONE); - - if (check_plv(ctx)) { - return false; - } - func(tcg_env, addr, val); - return true; -} - -TRANS(iocsrrd_b, IOCSR, gen_iocsrrd, gen_helper_iocsrrd_b) -TRANS(iocsrrd_h, IOCSR, gen_iocsrrd, gen_helper_iocsrrd_h) -TRANS(iocsrrd_w, IOCSR, gen_iocsrrd, gen_helper_iocsrrd_w) -TRANS(iocsrrd_d, IOCSR, gen_iocsrrd, gen_helper_iocsrrd_d) -TRANS(iocsrwr_b, IOCSR, gen_iocsrwr, gen_helper_iocsrwr_b) -TRANS(iocsrwr_h, IOCSR, gen_iocsrwr, gen_helper_iocsrwr_h) -TRANS(iocsrwr_w, IOCSR, gen_iocsrwr, gen_helper_iocsrwr_w) -TRANS(iocsrwr_d, IOCSR, gen_iocsrwr, gen_helper_iocsrwr_d) - -static void check_mmu_idx(DisasContext *ctx) -{ - if (ctx->mem_idx != MMU_IDX_DA) { - tcg_gen_movi_tl(cpu_pc, ctx->base.pc_next + 4); - ctx->base.is_jmp = DISAS_EXIT; - } -} - -static bool trans_tlbsrch(DisasContext *ctx, arg_tlbsrch *a) -{ - if (check_plv(ctx)) { - return false; - } - gen_helper_tlbsrch(tcg_env); - return true; -} - -static bool trans_tlbrd(DisasContext *ctx, arg_tlbrd *a) -{ - if (check_plv(ctx)) { - return false; - } - gen_helper_tlbrd(tcg_env); - return true; -} - -static bool trans_tlbwr(DisasContext *ctx, arg_tlbwr *a) -{ - if (check_plv(ctx)) { - return false; - } - gen_helper_tlbwr(tcg_env); - check_mmu_idx(ctx); - return true; -} - -static bool trans_tlbfill(DisasContext *ctx, arg_tlbfill *a) -{ - if (check_plv(ctx)) { - return false; - } - gen_helper_tlbfill(tcg_env); - check_mmu_idx(ctx); - return true; -} - -static bool trans_tlbclr(DisasContext *ctx, arg_tlbclr *a) -{ - if (check_plv(ctx)) { - return false; - } - gen_helper_tlbclr(tcg_env); - check_mmu_idx(ctx); - return true; -} - -static bool trans_tlbflush(DisasContext *ctx, arg_tlbflush *a) -{ - if (check_plv(ctx)) { - return false; - } - gen_helper_tlbflush(tcg_env); - check_mmu_idx(ctx); - return true; -} - -static bool trans_invtlb(DisasContext *ctx, arg_invtlb *a) -{ - TCGv rj = gpr_src(ctx, a->rj, EXT_NONE); - TCGv rk = gpr_src(ctx, a->rk, EXT_NONE); - - if (check_plv(ctx)) { - return false; - } - - switch (a->imm) { - case 0: - case 1: - gen_helper_invtlb_all(tcg_env); - break; - case 2: - gen_helper_invtlb_all_g(tcg_env, tcg_constant_i32(1)); - break; - case 3: - gen_helper_invtlb_all_g(tcg_env, tcg_constant_i32(0)); - break; - case 4: - gen_helper_invtlb_all_asid(tcg_env, rj); - break; - case 5: - gen_helper_invtlb_page_asid(tcg_env, rj, rk); - break; - case 6: - gen_helper_invtlb_page_asid_or_g(tcg_env, rj, rk); - break; - default: - return false; - } - ctx->base.is_jmp = DISAS_STOP; - return true; -} - -static bool trans_cacop(DisasContext *ctx, arg_cacop *a) -{ - /* Treat the cacop as a nop */ - if (check_plv(ctx)) { - return false; - } - return true; -} - -static bool trans_ldpte(DisasContext *ctx, arg_ldpte *a) -{ - TCGv_i32 mem_idx = tcg_constant_i32(ctx->mem_idx); - TCGv src1 = gpr_src(ctx, a->rj, EXT_NONE); - - if (!avail_LSPW(ctx)) { - return true; - } - - if (check_plv(ctx)) { - return false; - } - gen_helper_ldpte(tcg_env, src1, tcg_constant_tl(a->imm), mem_idx); - return true; -} - -static bool trans_lddir(DisasContext *ctx, arg_lddir *a) -{ - TCGv_i32 mem_idx = tcg_constant_i32(ctx->mem_idx); - TCGv src = gpr_src(ctx, a->rj, EXT_NONE); - TCGv dest = gpr_dst(ctx, a->rd, EXT_NONE); - - if (!avail_LSPW(ctx)) { - return true; - } - - if (check_plv(ctx)) { - return false; - } - gen_helper_lddir(dest, tcg_env, src, tcg_constant_tl(a->imm), mem_idx); - return true; -} - -static bool trans_ertn(DisasContext *ctx, arg_ertn *a) -{ - if (check_plv(ctx)) { - return false; - } - gen_helper_ertn(tcg_env); - ctx->base.is_jmp = DISAS_EXIT; - return true; -} - -static bool trans_dbcl(DisasContext *ctx, arg_dbcl *a) -{ - if (check_plv(ctx)) { - return false; - } - generate_exception(ctx, EXCCODE_DBP); - return true; -} - -static bool trans_idle(DisasContext *ctx, arg_idle *a) -{ - if (check_plv(ctx)) { - return false; - } - - tcg_gen_movi_tl(cpu_pc, ctx->base.pc_next + 4); - gen_helper_idle(tcg_env); - ctx->base.is_jmp = DISAS_NORETURN; - return true; -} -#endif diff --git a/target/loongarch/insn_trans/trans_shift.c.inc b/target/loongarch/insn_trans/trans_shift.c.inc deleted file mode 100644 index 2f4bd6ff28..0000000000 --- a/target/loongarch/insn_trans/trans_shift.c.inc +++ /dev/null @@ -1,99 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-or-later */ -/* - * Copyright (c) 2021 Loongson Technology Corporation Limited - */ - -static void gen_sll_w(TCGv dest, TCGv src1, TCGv src2) -{ - TCGv t0 = tcg_temp_new(); - tcg_gen_andi_tl(t0, src2, 0x1f); - tcg_gen_shl_tl(dest, src1, t0); -} - -static void gen_srl_w(TCGv dest, TCGv src1, TCGv src2) -{ - TCGv t0 = tcg_temp_new(); - tcg_gen_andi_tl(t0, src2, 0x1f); - tcg_gen_shr_tl(dest, src1, t0); -} - -static void gen_sra_w(TCGv dest, TCGv src1, TCGv src2) -{ - TCGv t0 = tcg_temp_new(); - tcg_gen_andi_tl(t0, src2, 0x1f); - tcg_gen_sar_tl(dest, src1, t0); -} - -static void gen_sll_d(TCGv dest, TCGv src1, TCGv src2) -{ - TCGv t0 = tcg_temp_new(); - tcg_gen_andi_tl(t0, src2, 0x3f); - tcg_gen_shl_tl(dest, src1, t0); -} - -static void gen_srl_d(TCGv dest, TCGv src1, TCGv src2) -{ - TCGv t0 = tcg_temp_new(); - tcg_gen_andi_tl(t0, src2, 0x3f); - tcg_gen_shr_tl(dest, src1, t0); -} - -static void gen_sra_d(TCGv dest, TCGv src1, TCGv src2) -{ - TCGv t0 = tcg_temp_new(); - tcg_gen_andi_tl(t0, src2, 0x3f); - tcg_gen_sar_tl(dest, src1, t0); -} - -static void gen_rotr_w(TCGv dest, TCGv src1, TCGv src2) -{ - TCGv_i32 t1 = tcg_temp_new_i32(); - TCGv_i32 t2 = tcg_temp_new_i32(); - TCGv t0 = tcg_temp_new(); - - tcg_gen_andi_tl(t0, src2, 0x1f); - - tcg_gen_trunc_tl_i32(t1, src1); - tcg_gen_trunc_tl_i32(t2, t0); - - tcg_gen_rotr_i32(t1, t1, t2); - tcg_gen_ext_i32_tl(dest, t1); -} - -static void gen_rotr_d(TCGv dest, TCGv src1, TCGv src2) -{ - TCGv t0 = tcg_temp_new(); - tcg_gen_andi_tl(t0, src2, 0x3f); - tcg_gen_rotr_tl(dest, src1, t0); -} - -static bool trans_srai_w(DisasContext *ctx, arg_srai_w *a) -{ - TCGv dest = gpr_dst(ctx, a->rd, EXT_NONE); - TCGv src1 = gpr_src(ctx, a->rj, EXT_ZERO); - - if (!avail_64(ctx)) { - return false; - } - - tcg_gen_sextract_tl(dest, src1, a->imm, 32 - a->imm); - gen_set_gpr(a->rd, dest, EXT_NONE); - - return true; -} - -TRANS(sll_w, ALL, gen_rrr, EXT_ZERO, EXT_NONE, EXT_SIGN, gen_sll_w) -TRANS(srl_w, ALL, gen_rrr, EXT_ZERO, EXT_NONE, EXT_SIGN, gen_srl_w) -TRANS(sra_w, ALL, gen_rrr, EXT_SIGN, EXT_NONE, EXT_SIGN, gen_sra_w) -TRANS(sll_d, 64, gen_rrr, EXT_NONE, EXT_NONE, EXT_NONE, gen_sll_d) -TRANS(srl_d, 64, gen_rrr, EXT_NONE, EXT_NONE, EXT_NONE, gen_srl_d) -TRANS(sra_d, 64, gen_rrr, EXT_NONE, EXT_NONE, EXT_NONE, gen_sra_d) -TRANS(rotr_w, 64, gen_rrr, EXT_ZERO, EXT_NONE, EXT_SIGN, gen_rotr_w) -TRANS(rotr_d, 64, gen_rrr, EXT_NONE, EXT_NONE, EXT_NONE, gen_rotr_d) -TRANS(slli_w, ALL, gen_rri_c, EXT_NONE, EXT_SIGN, tcg_gen_shli_tl) -TRANS(slli_d, 64, gen_rri_c, EXT_NONE, EXT_NONE, tcg_gen_shli_tl) -TRANS(srli_w, ALL, gen_rri_c, EXT_ZERO, EXT_SIGN, tcg_gen_shri_tl) -TRANS(srli_d, 64, gen_rri_c, EXT_NONE, EXT_NONE, tcg_gen_shri_tl) -TRANS(srai_d, 64, gen_rri_c, EXT_NONE, EXT_NONE, tcg_gen_sari_tl) -TRANS(rotri_w, 64, gen_rri_v, EXT_NONE, EXT_NONE, gen_rotr_w) -TRANS(rotri_d, 64, gen_rri_c, EXT_NONE, EXT_NONE, tcg_gen_rotri_tl) diff --git a/target/loongarch/insn_trans/trans_vec.c.inc b/target/loongarch/insn_trans/trans_vec.c.inc deleted file mode 100644 index 92b1d22e28..0000000000 --- a/target/loongarch/insn_trans/trans_vec.c.inc +++ /dev/null @@ -1,5511 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-or-later */ -/* - * LoongArch vector translate functions - * Copyright (c) 2022-2023 Loongson Technology Corporation Limited - */ - -static bool check_vec(DisasContext *ctx, uint32_t oprsz) -{ - if ((oprsz == 16) && ((ctx->base.tb->flags & HW_FLAGS_EUEN_SXE) == 0)) { - generate_exception(ctx, EXCCODE_SXD); - return false; - } - - if ((oprsz == 32) && ((ctx->base.tb->flags & HW_FLAGS_EUEN_ASXE) == 0)) { - generate_exception(ctx, EXCCODE_ASXD); - return false; - } - - return true; -} - -static bool gen_vvvv_ptr_vl(DisasContext *ctx, arg_vvvv *a, uint32_t oprsz, - gen_helper_gvec_4_ptr *fn) -{ - if (!check_vec(ctx, oprsz)) { - return true; - } - - tcg_gen_gvec_4_ptr(vec_full_offset(a->vd), - vec_full_offset(a->vj), - vec_full_offset(a->vk), - vec_full_offset(a->va), - tcg_env, - oprsz, ctx->vl / 8, 0, fn); - return true; -} - -static bool gen_vvvv_ptr(DisasContext *ctx, arg_vvvv *a, - gen_helper_gvec_4_ptr *fn) -{ - return gen_vvvv_ptr_vl(ctx, a, 16, fn); -} - -static bool gen_xxxx_ptr(DisasContext *ctx, arg_vvvv *a, - gen_helper_gvec_4_ptr *fn) -{ - return gen_vvvv_ptr_vl(ctx, a, 32, fn); -} - -static bool gen_vvvv_vl(DisasContext *ctx, arg_vvvv *a, uint32_t oprsz, - gen_helper_gvec_4 *fn) -{ - if (!check_vec(ctx, oprsz)) { - return true; - } - - tcg_gen_gvec_4_ool(vec_full_offset(a->vd), - vec_full_offset(a->vj), - vec_full_offset(a->vk), - vec_full_offset(a->va), - oprsz, ctx->vl / 8, 0, fn); - return true; -} - -static bool gen_vvvv(DisasContext *ctx, arg_vvvv *a, - gen_helper_gvec_4 *fn) -{ - return gen_vvvv_vl(ctx, a, 16, fn); -} - -static bool gen_xxxx(DisasContext *ctx, arg_vvvv *a, - gen_helper_gvec_4 *fn) -{ - return gen_vvvv_vl(ctx, a, 32, fn); -} - -static bool gen_vvv_ptr_vl(DisasContext *ctx, arg_vvv *a, uint32_t oprsz, - gen_helper_gvec_3_ptr *fn) -{ - if (!check_vec(ctx, oprsz)) { - return true; - } - tcg_gen_gvec_3_ptr(vec_full_offset(a->vd), - vec_full_offset(a->vj), - vec_full_offset(a->vk), - tcg_env, - oprsz, ctx->vl / 8, 0, fn); - return true; -} - -static bool gen_vvv_ptr(DisasContext *ctx, arg_vvv *a, - gen_helper_gvec_3_ptr *fn) -{ - return gen_vvv_ptr_vl(ctx, a, 16, fn); -} - -static bool gen_xxx_ptr(DisasContext *ctx, arg_vvv *a, - gen_helper_gvec_3_ptr *fn) -{ - return gen_vvv_ptr_vl(ctx, a, 32, fn); -} - -static bool gen_vvv_vl(DisasContext *ctx, arg_vvv *a, uint32_t oprsz, - gen_helper_gvec_3 *fn) -{ - if (!check_vec(ctx, oprsz)) { - return true; - } - - tcg_gen_gvec_3_ool(vec_full_offset(a->vd), - vec_full_offset(a->vj), - vec_full_offset(a->vk), - oprsz, ctx->vl / 8, 0, fn); - return true; -} - -static bool gen_vvv(DisasContext *ctx, arg_vvv *a, gen_helper_gvec_3 *fn) -{ - return gen_vvv_vl(ctx, a, 16, fn); -} - -static bool gen_xxx(DisasContext *ctx, arg_vvv *a, gen_helper_gvec_3 *fn) -{ - return gen_vvv_vl(ctx, a, 32, fn); -} - -static bool gen_vv_ptr_vl(DisasContext *ctx, arg_vv *a, uint32_t oprsz, - gen_helper_gvec_2_ptr *fn) -{ - if (!check_vec(ctx, oprsz)) { - return true; - } - - tcg_gen_gvec_2_ptr(vec_full_offset(a->vd), - vec_full_offset(a->vj), - tcg_env, - oprsz, ctx->vl / 8, 0, fn); - return true; -} - -static bool gen_vv_ptr(DisasContext *ctx, arg_vv *a, - gen_helper_gvec_2_ptr *fn) -{ - return gen_vv_ptr_vl(ctx, a, 16, fn); -} - -static bool gen_xx_ptr(DisasContext *ctx, arg_vv *a, - gen_helper_gvec_2_ptr *fn) -{ - return gen_vv_ptr_vl(ctx, a, 32, fn); -} - -static bool gen_vv_vl(DisasContext *ctx, arg_vv *a, uint32_t oprsz, - gen_helper_gvec_2 *fn) -{ - if (!check_vec(ctx, oprsz)) { - return true; - } - - tcg_gen_gvec_2_ool(vec_full_offset(a->vd), - vec_full_offset(a->vj), - oprsz, ctx->vl / 8, 0, fn); - return true; -} - -static bool gen_vv(DisasContext *ctx, arg_vv *a, gen_helper_gvec_2 *fn) -{ - return gen_vv_vl(ctx, a, 16, fn); -} - -static bool gen_xx(DisasContext *ctx, arg_vv *a, gen_helper_gvec_2 *fn) -{ - return gen_vv_vl(ctx, a, 32, fn); -} - -static bool gen_vv_i_vl(DisasContext *ctx, arg_vv_i *a, uint32_t oprsz, - gen_helper_gvec_2i *fn) -{ - if (!check_vec(ctx, oprsz)) { - return true; - } - - tcg_gen_gvec_2i_ool(vec_full_offset(a->vd), - vec_full_offset(a->vj), - tcg_constant_i64(a->imm), - oprsz, ctx->vl / 8, 0, fn); - return true; -} - -static bool gen_vv_i(DisasContext *ctx, arg_vv_i *a, gen_helper_gvec_2i *fn) -{ - return gen_vv_i_vl(ctx, a, 16, fn); -} - -static bool gen_xx_i(DisasContext *ctx, arg_vv_i *a, gen_helper_gvec_2i *fn) -{ - return gen_vv_i_vl(ctx, a, 32, fn); -} - -static bool gen_cv_vl(DisasContext *ctx, arg_cv *a, uint32_t sz, - void (*func)(TCGv_ptr, TCGv_i32, TCGv_i32, TCGv_i32)) -{ - if (!check_vec(ctx, sz)) { - return true; - } - - TCGv_i32 vj = tcg_constant_i32(a->vj); - TCGv_i32 cd = tcg_constant_i32(a->cd); - TCGv_i32 oprsz = tcg_constant_i32(sz); - - func(tcg_env, oprsz, cd, vj); - return true; -} - -static bool gen_cv(DisasContext *ctx, arg_cv *a, - void (*func)(TCGv_ptr, TCGv_i32, TCGv_i32, TCGv_i32)) -{ - return gen_cv_vl(ctx, a, 16, func); -} - -static bool gen_cx(DisasContext *ctx, arg_cv *a, - void (*func)(TCGv_ptr, TCGv_i32, TCGv_i32, TCGv_i32)) -{ - return gen_cv_vl(ctx, a, 32, func); -} - -static bool gvec_vvv_vl(DisasContext *ctx, arg_vvv *a, - uint32_t oprsz, MemOp mop, - void (*func)(unsigned, uint32_t, uint32_t, - uint32_t, uint32_t, uint32_t)) -{ - uint32_t vd_ofs = vec_full_offset(a->vd); - uint32_t vj_ofs = vec_full_offset(a->vj); - uint32_t vk_ofs = vec_full_offset(a->vk); - - if (!check_vec(ctx, oprsz)) { - return true; - } - - func(mop, vd_ofs, vj_ofs, vk_ofs, oprsz, ctx->vl / 8); - return true; -} - -static bool gvec_vvv(DisasContext *ctx, arg_vvv *a, MemOp mop, - void (*func)(unsigned, uint32_t, uint32_t, - uint32_t, uint32_t, uint32_t)) -{ - return gvec_vvv_vl(ctx, a, 16, mop, func); -} - -static bool gvec_xxx(DisasContext *ctx, arg_vvv *a, MemOp mop, - void (*func)(unsigned, uint32_t, uint32_t, - uint32_t, uint32_t, uint32_t)) -{ - return gvec_vvv_vl(ctx, a, 32, mop, func); -} - -static bool gvec_vv_vl(DisasContext *ctx, arg_vv *a, - uint32_t oprsz, MemOp mop, - void (*func)(unsigned, uint32_t, uint32_t, - uint32_t, uint32_t)) -{ - uint32_t vd_ofs = vec_full_offset(a->vd); - uint32_t vj_ofs = vec_full_offset(a->vj); - - if (!check_vec(ctx, oprsz)) { - return true; - } - - func(mop, vd_ofs, vj_ofs, oprsz, ctx->vl / 8); - return true; -} - - -static bool gvec_vv(DisasContext *ctx, arg_vv *a, MemOp mop, - void (*func)(unsigned, uint32_t, uint32_t, - uint32_t, uint32_t)) -{ - return gvec_vv_vl(ctx, a, 16, mop, func); -} - -static bool gvec_xx(DisasContext *ctx, arg_vv *a, MemOp mop, - void (*func)(unsigned, uint32_t, uint32_t, - uint32_t, uint32_t)) -{ - return gvec_vv_vl(ctx, a, 32, mop, func); -} - -static bool gvec_vv_i_vl(DisasContext *ctx, arg_vv_i *a, - uint32_t oprsz, MemOp mop, - void (*func)(unsigned, uint32_t, uint32_t, - int64_t, uint32_t, uint32_t)) -{ - uint32_t vd_ofs = vec_full_offset(a->vd); - uint32_t vj_ofs = vec_full_offset(a->vj); - - if (!check_vec(ctx, oprsz)) { - return true; - } - - func(mop, vd_ofs, vj_ofs, a->imm, oprsz, ctx->vl / 8); - return true; -} - -static bool gvec_vv_i(DisasContext *ctx, arg_vv_i *a, MemOp mop, - void (*func)(unsigned, uint32_t, uint32_t, - int64_t, uint32_t, uint32_t)) -{ - return gvec_vv_i_vl(ctx, a, 16, mop, func); -} - -static bool gvec_xx_i(DisasContext *ctx, arg_vv_i *a, MemOp mop, - void (*func)(unsigned, uint32_t, uint32_t, - int64_t, uint32_t, uint32_t)) -{ - return gvec_vv_i_vl(ctx,a, 32, mop, func); -} - -static bool gvec_subi_vl(DisasContext *ctx, arg_vv_i *a, - uint32_t oprsz, MemOp mop) -{ - uint32_t vd_ofs = vec_full_offset(a->vd); - uint32_t vj_ofs = vec_full_offset(a->vj); - - if (!check_vec(ctx, oprsz)) { - return true; - } - - tcg_gen_gvec_addi(mop, vd_ofs, vj_ofs, -a->imm, oprsz, ctx->vl / 8); - return true; -} - -static bool gvec_subi(DisasContext *ctx, arg_vv_i *a, MemOp mop) -{ - return gvec_subi_vl(ctx, a, 16, mop); -} - -static bool gvec_xsubi(DisasContext *ctx, arg_vv_i *a, MemOp mop) -{ - return gvec_subi_vl(ctx, a, 32, mop); -} - -TRANS(vadd_b, LSX, gvec_vvv, MO_8, tcg_gen_gvec_add) -TRANS(vadd_h, LSX, gvec_vvv, MO_16, tcg_gen_gvec_add) -TRANS(vadd_w, LSX, gvec_vvv, MO_32, tcg_gen_gvec_add) -TRANS(vadd_d, LSX, gvec_vvv, MO_64, tcg_gen_gvec_add) -TRANS(xvadd_b, LASX, gvec_xxx, MO_8, tcg_gen_gvec_add) -TRANS(xvadd_h, LASX, gvec_xxx, MO_16, tcg_gen_gvec_add) -TRANS(xvadd_w, LASX, gvec_xxx, MO_32, tcg_gen_gvec_add) -TRANS(xvadd_d, LASX, gvec_xxx, MO_64, tcg_gen_gvec_add) - -static bool gen_vaddsub_q_vl(DisasContext *ctx, arg_vvv *a, uint32_t oprsz, - void (*func)(TCGv_i64, TCGv_i64, TCGv_i64, - TCGv_i64, TCGv_i64, TCGv_i64)) -{ - int i; - TCGv_i64 rh, rl, ah, al, bh, bl; - - if (!check_vec(ctx, oprsz)) { - return true; - } - - rh = tcg_temp_new_i64(); - rl = tcg_temp_new_i64(); - ah = tcg_temp_new_i64(); - al = tcg_temp_new_i64(); - bh = tcg_temp_new_i64(); - bl = tcg_temp_new_i64(); - - for (i = 0; i < oprsz / 16; i++) { - get_vreg64(ah, a->vj, 1 + i * 2); - get_vreg64(al, a->vj, i * 2); - get_vreg64(bh, a->vk, 1 + i * 2); - get_vreg64(bl, a->vk, i * 2); - - func(rl, rh, al, ah, bl, bh); - - set_vreg64(rh, a->vd, 1 + i * 2); - set_vreg64(rl, a->vd, i * 2); - } - return true; -} - -static bool gen_vaddsub_q(DisasContext *ctx, arg_vvv *a, - void (*func)(TCGv_i64, TCGv_i64, TCGv_i64, - TCGv_i64, TCGv_i64, TCGv_i64)) -{ - return gen_vaddsub_q_vl(ctx, a, 16, func); -} - -static bool gen_xvaddsub_q(DisasContext *ctx, arg_vvv *a, - void (*func)(TCGv_i64, TCGv_i64, TCGv_i64, - TCGv_i64, TCGv_i64, TCGv_i64)) -{ - return gen_vaddsub_q_vl(ctx, a, 32, func); -} - -TRANS(vsub_b, LSX, gvec_vvv, MO_8, tcg_gen_gvec_sub) -TRANS(vsub_h, LSX, gvec_vvv, MO_16, tcg_gen_gvec_sub) -TRANS(vsub_w, LSX, gvec_vvv, MO_32, tcg_gen_gvec_sub) -TRANS(vsub_d, LSX, gvec_vvv, MO_64, tcg_gen_gvec_sub) -TRANS(xvsub_b, LASX, gvec_xxx, MO_8, tcg_gen_gvec_sub) -TRANS(xvsub_h, LASX, gvec_xxx, MO_16, tcg_gen_gvec_sub) -TRANS(xvsub_w, LASX, gvec_xxx, MO_32, tcg_gen_gvec_sub) -TRANS(xvsub_d, LASX, gvec_xxx, MO_64, tcg_gen_gvec_sub) - -TRANS(vadd_q, LSX, gen_vaddsub_q, tcg_gen_add2_i64) -TRANS(vsub_q, LSX, gen_vaddsub_q, tcg_gen_sub2_i64) -TRANS(xvadd_q, LASX, gen_xvaddsub_q, tcg_gen_add2_i64) -TRANS(xvsub_q, LASX, gen_xvaddsub_q, tcg_gen_sub2_i64) - -TRANS(vaddi_bu, LSX, gvec_vv_i, MO_8, tcg_gen_gvec_addi) -TRANS(vaddi_hu, LSX, gvec_vv_i, MO_16, tcg_gen_gvec_addi) -TRANS(vaddi_wu, LSX, gvec_vv_i, MO_32, tcg_gen_gvec_addi) -TRANS(vaddi_du, LSX, gvec_vv_i, MO_64, tcg_gen_gvec_addi) -TRANS(vsubi_bu, LSX, gvec_subi, MO_8) -TRANS(vsubi_hu, LSX, gvec_subi, MO_16) -TRANS(vsubi_wu, LSX, gvec_subi, MO_32) -TRANS(vsubi_du, LSX, gvec_subi, MO_64) -TRANS(xvaddi_bu, LASX, gvec_xx_i, MO_8, tcg_gen_gvec_addi) -TRANS(xvaddi_hu, LASX, gvec_xx_i, MO_16, tcg_gen_gvec_addi) -TRANS(xvaddi_wu, LASX, gvec_xx_i, MO_32, tcg_gen_gvec_addi) -TRANS(xvaddi_du, LASX, gvec_xx_i, MO_64, tcg_gen_gvec_addi) -TRANS(xvsubi_bu, LASX, gvec_xsubi, MO_8) -TRANS(xvsubi_hu, LASX, gvec_xsubi, MO_16) -TRANS(xvsubi_wu, LASX, gvec_xsubi, MO_32) -TRANS(xvsubi_du, LASX, gvec_xsubi, MO_64) - -TRANS(vneg_b, LSX, gvec_vv, MO_8, tcg_gen_gvec_neg) -TRANS(vneg_h, LSX, gvec_vv, MO_16, tcg_gen_gvec_neg) -TRANS(vneg_w, LSX, gvec_vv, MO_32, tcg_gen_gvec_neg) -TRANS(vneg_d, LSX, gvec_vv, MO_64, tcg_gen_gvec_neg) -TRANS(xvneg_b, LASX, gvec_xx, MO_8, tcg_gen_gvec_neg) -TRANS(xvneg_h, LASX, gvec_xx, MO_16, tcg_gen_gvec_neg) -TRANS(xvneg_w, LASX, gvec_xx, MO_32, tcg_gen_gvec_neg) -TRANS(xvneg_d, LASX, gvec_xx, MO_64, tcg_gen_gvec_neg) - -TRANS(vsadd_b, LSX, gvec_vvv, MO_8, tcg_gen_gvec_ssadd) -TRANS(vsadd_h, LSX, gvec_vvv, MO_16, tcg_gen_gvec_ssadd) -TRANS(vsadd_w, LSX, gvec_vvv, MO_32, tcg_gen_gvec_ssadd) -TRANS(vsadd_d, LSX, gvec_vvv, MO_64, tcg_gen_gvec_ssadd) -TRANS(vsadd_bu, LSX, gvec_vvv, MO_8, tcg_gen_gvec_usadd) -TRANS(vsadd_hu, LSX, gvec_vvv, MO_16, tcg_gen_gvec_usadd) -TRANS(vsadd_wu, LSX, gvec_vvv, MO_32, tcg_gen_gvec_usadd) -TRANS(vsadd_du, LSX, gvec_vvv, MO_64, tcg_gen_gvec_usadd) -TRANS(vssub_b, LSX, gvec_vvv, MO_8, tcg_gen_gvec_sssub) -TRANS(vssub_h, LSX, gvec_vvv, MO_16, tcg_gen_gvec_sssub) -TRANS(vssub_w, LSX, gvec_vvv, MO_32, tcg_gen_gvec_sssub) -TRANS(vssub_d, LSX, gvec_vvv, MO_64, tcg_gen_gvec_sssub) -TRANS(vssub_bu, LSX, gvec_vvv, MO_8, tcg_gen_gvec_ussub) -TRANS(vssub_hu, LSX, gvec_vvv, MO_16, tcg_gen_gvec_ussub) -TRANS(vssub_wu, LSX, gvec_vvv, MO_32, tcg_gen_gvec_ussub) -TRANS(vssub_du, LSX, gvec_vvv, MO_64, tcg_gen_gvec_ussub) - -TRANS(xvsadd_b, LASX, gvec_xxx, MO_8, tcg_gen_gvec_ssadd) -TRANS(xvsadd_h, LASX, gvec_xxx, MO_16, tcg_gen_gvec_ssadd) -TRANS(xvsadd_w, LASX, gvec_xxx, MO_32, tcg_gen_gvec_ssadd) -TRANS(xvsadd_d, LASX, gvec_xxx, MO_64, tcg_gen_gvec_ssadd) -TRANS(xvsadd_bu, LASX, gvec_xxx, MO_8, tcg_gen_gvec_usadd) -TRANS(xvsadd_hu, LASX, gvec_xxx, MO_16, tcg_gen_gvec_usadd) -TRANS(xvsadd_wu, LASX, gvec_xxx, MO_32, tcg_gen_gvec_usadd) -TRANS(xvsadd_du, LASX, gvec_xxx, MO_64, tcg_gen_gvec_usadd) -TRANS(xvssub_b, LASX, gvec_xxx, MO_8, tcg_gen_gvec_sssub) -TRANS(xvssub_h, LASX, gvec_xxx, MO_16, tcg_gen_gvec_sssub) -TRANS(xvssub_w, LASX, gvec_xxx, MO_32, tcg_gen_gvec_sssub) -TRANS(xvssub_d, LASX, gvec_xxx, MO_64, tcg_gen_gvec_sssub) -TRANS(xvssub_bu, LASX, gvec_xxx, MO_8, tcg_gen_gvec_ussub) -TRANS(xvssub_hu, LASX, gvec_xxx, MO_16, tcg_gen_gvec_ussub) -TRANS(xvssub_wu, LASX, gvec_xxx, MO_32, tcg_gen_gvec_ussub) -TRANS(xvssub_du, LASX, gvec_xxx, MO_64, tcg_gen_gvec_ussub) - -TRANS(vhaddw_h_b, LSX, gen_vvv, gen_helper_vhaddw_h_b) -TRANS(vhaddw_w_h, LSX, gen_vvv, gen_helper_vhaddw_w_h) -TRANS(vhaddw_d_w, LSX, gen_vvv, gen_helper_vhaddw_d_w) -TRANS(vhaddw_q_d, LSX, gen_vvv, gen_helper_vhaddw_q_d) -TRANS(vhaddw_hu_bu, LSX, gen_vvv, gen_helper_vhaddw_hu_bu) -TRANS(vhaddw_wu_hu, LSX, gen_vvv, gen_helper_vhaddw_wu_hu) -TRANS(vhaddw_du_wu, LSX, gen_vvv, gen_helper_vhaddw_du_wu) -TRANS(vhaddw_qu_du, LSX, gen_vvv, gen_helper_vhaddw_qu_du) -TRANS(vhsubw_h_b, LSX, gen_vvv, gen_helper_vhsubw_h_b) -TRANS(vhsubw_w_h, LSX, gen_vvv, gen_helper_vhsubw_w_h) -TRANS(vhsubw_d_w, LSX, gen_vvv, gen_helper_vhsubw_d_w) -TRANS(vhsubw_q_d, LSX, gen_vvv, gen_helper_vhsubw_q_d) -TRANS(vhsubw_hu_bu, LSX, gen_vvv, gen_helper_vhsubw_hu_bu) -TRANS(vhsubw_wu_hu, LSX, gen_vvv, gen_helper_vhsubw_wu_hu) -TRANS(vhsubw_du_wu, LSX, gen_vvv, gen_helper_vhsubw_du_wu) -TRANS(vhsubw_qu_du, LSX, gen_vvv, gen_helper_vhsubw_qu_du) - -TRANS(xvhaddw_h_b, LASX, gen_xxx, gen_helper_vhaddw_h_b) -TRANS(xvhaddw_w_h, LASX, gen_xxx, gen_helper_vhaddw_w_h) -TRANS(xvhaddw_d_w, LASX, gen_xxx, gen_helper_vhaddw_d_w) -TRANS(xvhaddw_q_d, LASX, gen_xxx, gen_helper_vhaddw_q_d) -TRANS(xvhaddw_hu_bu, LASX, gen_xxx, gen_helper_vhaddw_hu_bu) -TRANS(xvhaddw_wu_hu, LASX, gen_xxx, gen_helper_vhaddw_wu_hu) -TRANS(xvhaddw_du_wu, LASX, gen_xxx, gen_helper_vhaddw_du_wu) -TRANS(xvhaddw_qu_du, LASX, gen_xxx, gen_helper_vhaddw_qu_du) -TRANS(xvhsubw_h_b, LASX, gen_xxx, gen_helper_vhsubw_h_b) -TRANS(xvhsubw_w_h, LASX, gen_xxx, gen_helper_vhsubw_w_h) -TRANS(xvhsubw_d_w, LASX, gen_xxx, gen_helper_vhsubw_d_w) -TRANS(xvhsubw_q_d, LASX, gen_xxx, gen_helper_vhsubw_q_d) -TRANS(xvhsubw_hu_bu, LASX, gen_xxx, gen_helper_vhsubw_hu_bu) -TRANS(xvhsubw_wu_hu, LASX, gen_xxx, gen_helper_vhsubw_wu_hu) -TRANS(xvhsubw_du_wu, LASX, gen_xxx, gen_helper_vhsubw_du_wu) -TRANS(xvhsubw_qu_du, LASX, gen_xxx, gen_helper_vhsubw_qu_du) - -static void gen_vaddwev_s(unsigned vece, TCGv_vec t, TCGv_vec a, TCGv_vec b) -{ - TCGv_vec t1, t2; - - int halfbits = 4 << vece; - - t1 = tcg_temp_new_vec_matching(a); - t2 = tcg_temp_new_vec_matching(b); - - /* Sign-extend the even elements from a */ - tcg_gen_shli_vec(vece, t1, a, halfbits); - tcg_gen_sari_vec(vece, t1, t1, halfbits); - - /* Sign-extend the even elements from b */ - tcg_gen_shli_vec(vece, t2, b, halfbits); - tcg_gen_sari_vec(vece, t2, t2, halfbits); - - tcg_gen_add_vec(vece, t, t1, t2); -} - -static void gen_vaddwev_w_h(TCGv_i32 t, TCGv_i32 a, TCGv_i32 b) -{ - TCGv_i32 t1, t2; - - t1 = tcg_temp_new_i32(); - t2 = tcg_temp_new_i32(); - tcg_gen_ext16s_i32(t1, a); - tcg_gen_ext16s_i32(t2, b); - tcg_gen_add_i32(t, t1, t2); -} - -static void gen_vaddwev_d_w(TCGv_i64 t, TCGv_i64 a, TCGv_i64 b) -{ - TCGv_i64 t1, t2; - - t1 = tcg_temp_new_i64(); - t2 = tcg_temp_new_i64(); - tcg_gen_ext32s_i64(t1, a); - tcg_gen_ext32s_i64(t2, b); - tcg_gen_add_i64(t, t1, t2); -} - -static void do_vaddwev_s(unsigned vece, uint32_t vd_ofs, uint32_t vj_ofs, - uint32_t vk_ofs, uint32_t oprsz, uint32_t maxsz) -{ - static const TCGOpcode vecop_list[] = { - INDEX_op_shli_vec, INDEX_op_sari_vec, INDEX_op_add_vec, 0 - }; - static const GVecGen3 op[4] = { - { - .fniv = gen_vaddwev_s, - .fno = gen_helper_vaddwev_h_b, - .opt_opc = vecop_list, - .vece = MO_16 - }, - { - .fni4 = gen_vaddwev_w_h, - .fniv = gen_vaddwev_s, - .fno = gen_helper_vaddwev_w_h, - .opt_opc = vecop_list, - .vece = MO_32 - }, - { - .fni8 = gen_vaddwev_d_w, - .fniv = gen_vaddwev_s, - .fno = gen_helper_vaddwev_d_w, - .opt_opc = vecop_list, - .vece = MO_64 - }, - { - .fno = gen_helper_vaddwev_q_d, - .vece = MO_128 - }, - }; - - tcg_gen_gvec_3(vd_ofs, vj_ofs, vk_ofs, oprsz, maxsz, &op[vece]); -} - -TRANS(vaddwev_h_b, LSX, gvec_vvv, MO_8, do_vaddwev_s) -TRANS(vaddwev_w_h, LSX, gvec_vvv, MO_16, do_vaddwev_s) -TRANS(vaddwev_d_w, LSX, gvec_vvv, MO_32, do_vaddwev_s) -TRANS(vaddwev_q_d, LSX, gvec_vvv, MO_64, do_vaddwev_s) -TRANS(xvaddwev_h_b, LASX, gvec_xxx, MO_8, do_vaddwev_s) -TRANS(xvaddwev_w_h, LASX, gvec_xxx, MO_16, do_vaddwev_s) -TRANS(xvaddwev_d_w, LASX, gvec_xxx, MO_32, do_vaddwev_s) -TRANS(xvaddwev_q_d, LASX, gvec_xxx, MO_64, do_vaddwev_s) - -static void gen_vaddwod_w_h(TCGv_i32 t, TCGv_i32 a, TCGv_i32 b) -{ - TCGv_i32 t1, t2; - - t1 = tcg_temp_new_i32(); - t2 = tcg_temp_new_i32(); - tcg_gen_sari_i32(t1, a, 16); - tcg_gen_sari_i32(t2, b, 16); - tcg_gen_add_i32(t, t1, t2); -} - -static void gen_vaddwod_d_w(TCGv_i64 t, TCGv_i64 a, TCGv_i64 b) -{ - TCGv_i64 t1, t2; - - t1 = tcg_temp_new_i64(); - t2 = tcg_temp_new_i64(); - tcg_gen_sari_i64(t1, a, 32); - tcg_gen_sari_i64(t2, b, 32); - tcg_gen_add_i64(t, t1, t2); -} - -static void gen_vaddwod_s(unsigned vece, TCGv_vec t, TCGv_vec a, TCGv_vec b) -{ - TCGv_vec t1, t2; - - int halfbits = 4 << vece; - - t1 = tcg_temp_new_vec_matching(a); - t2 = tcg_temp_new_vec_matching(b); - - /* Sign-extend the odd elements for vector */ - tcg_gen_sari_vec(vece, t1, a, halfbits); - tcg_gen_sari_vec(vece, t2, b, halfbits); - - tcg_gen_add_vec(vece, t, t1, t2); -} - -static void do_vaddwod_s(unsigned vece, uint32_t vd_ofs, uint32_t vj_ofs, - uint32_t vk_ofs, uint32_t oprsz, uint32_t maxsz) -{ - static const TCGOpcode vecop_list[] = { - INDEX_op_sari_vec, INDEX_op_add_vec, 0 - }; - static const GVecGen3 op[4] = { - { - .fniv = gen_vaddwod_s, - .fno = gen_helper_vaddwod_h_b, - .opt_opc = vecop_list, - .vece = MO_16 - }, - { - .fni4 = gen_vaddwod_w_h, - .fniv = gen_vaddwod_s, - .fno = gen_helper_vaddwod_w_h, - .opt_opc = vecop_list, - .vece = MO_32 - }, - { - .fni8 = gen_vaddwod_d_w, - .fniv = gen_vaddwod_s, - .fno = gen_helper_vaddwod_d_w, - .opt_opc = vecop_list, - .vece = MO_64 - }, - { - .fno = gen_helper_vaddwod_q_d, - .vece = MO_128 - }, - }; - - tcg_gen_gvec_3(vd_ofs, vj_ofs, vk_ofs, oprsz, maxsz, &op[vece]); -} - -TRANS(vaddwod_h_b, LSX, gvec_vvv, MO_8, do_vaddwod_s) -TRANS(vaddwod_w_h, LSX, gvec_vvv, MO_16, do_vaddwod_s) -TRANS(vaddwod_d_w, LSX, gvec_vvv, MO_32, do_vaddwod_s) -TRANS(vaddwod_q_d, LSX, gvec_vvv, MO_64, do_vaddwod_s) -TRANS(xvaddwod_h_b, LASX, gvec_xxx, MO_8, do_vaddwod_s) -TRANS(xvaddwod_w_h, LASX, gvec_xxx, MO_16, do_vaddwod_s) -TRANS(xvaddwod_d_w, LASX, gvec_xxx, MO_32, do_vaddwod_s) -TRANS(xvaddwod_q_d, LASX, gvec_xxx, MO_64, do_vaddwod_s) - - -static void gen_vsubwev_s(unsigned vece, TCGv_vec t, TCGv_vec a, TCGv_vec b) -{ - TCGv_vec t1, t2; - - int halfbits = 4 << vece; - - t1 = tcg_temp_new_vec_matching(a); - t2 = tcg_temp_new_vec_matching(b); - - /* Sign-extend the even elements from a */ - tcg_gen_shli_vec(vece, t1, a, halfbits); - tcg_gen_sari_vec(vece, t1, t1, halfbits); - - /* Sign-extend the even elements from b */ - tcg_gen_shli_vec(vece, t2, b, halfbits); - tcg_gen_sari_vec(vece, t2, t2, halfbits); - - tcg_gen_sub_vec(vece, t, t1, t2); -} - -static void gen_vsubwev_w_h(TCGv_i32 t, TCGv_i32 a, TCGv_i32 b) -{ - TCGv_i32 t1, t2; - - t1 = tcg_temp_new_i32(); - t2 = tcg_temp_new_i32(); - tcg_gen_ext16s_i32(t1, a); - tcg_gen_ext16s_i32(t2, b); - tcg_gen_sub_i32(t, t1, t2); -} - -static void gen_vsubwev_d_w(TCGv_i64 t, TCGv_i64 a, TCGv_i64 b) -{ - TCGv_i64 t1, t2; - - t1 = tcg_temp_new_i64(); - t2 = tcg_temp_new_i64(); - tcg_gen_ext32s_i64(t1, a); - tcg_gen_ext32s_i64(t2, b); - tcg_gen_sub_i64(t, t1, t2); -} - -static void do_vsubwev_s(unsigned vece, uint32_t vd_ofs, uint32_t vj_ofs, - uint32_t vk_ofs, uint32_t oprsz, uint32_t maxsz) -{ - static const TCGOpcode vecop_list[] = { - INDEX_op_shli_vec, INDEX_op_sari_vec, INDEX_op_sub_vec, 0 - }; - static const GVecGen3 op[4] = { - { - .fniv = gen_vsubwev_s, - .fno = gen_helper_vsubwev_h_b, - .opt_opc = vecop_list, - .vece = MO_16 - }, - { - .fni4 = gen_vsubwev_w_h, - .fniv = gen_vsubwev_s, - .fno = gen_helper_vsubwev_w_h, - .opt_opc = vecop_list, - .vece = MO_32 - }, - { - .fni8 = gen_vsubwev_d_w, - .fniv = gen_vsubwev_s, - .fno = gen_helper_vsubwev_d_w, - .opt_opc = vecop_list, - .vece = MO_64 - }, - { - .fno = gen_helper_vsubwev_q_d, - .vece = MO_128 - }, - }; - - tcg_gen_gvec_3(vd_ofs, vj_ofs, vk_ofs, oprsz, maxsz, &op[vece]); -} - -TRANS(vsubwev_h_b, LSX, gvec_vvv, MO_8, do_vsubwev_s) -TRANS(vsubwev_w_h, LSX, gvec_vvv, MO_16, do_vsubwev_s) -TRANS(vsubwev_d_w, LSX, gvec_vvv, MO_32, do_vsubwev_s) -TRANS(vsubwev_q_d, LSX, gvec_vvv, MO_64, do_vsubwev_s) -TRANS(xvsubwev_h_b, LASX, gvec_xxx, MO_8, do_vsubwev_s) -TRANS(xvsubwev_w_h, LASX, gvec_xxx, MO_16, do_vsubwev_s) -TRANS(xvsubwev_d_w, LASX, gvec_xxx, MO_32, do_vsubwev_s) -TRANS(xvsubwev_q_d, LASX, gvec_xxx, MO_64, do_vsubwev_s) - -static void gen_vsubwod_s(unsigned vece, TCGv_vec t, TCGv_vec a, TCGv_vec b) -{ - TCGv_vec t1, t2; - - int halfbits = 4 << vece; - - t1 = tcg_temp_new_vec_matching(a); - t2 = tcg_temp_new_vec_matching(b); - - /* Sign-extend the odd elements for vector */ - tcg_gen_sari_vec(vece, t1, a, halfbits); - tcg_gen_sari_vec(vece, t2, b, halfbits); - - tcg_gen_sub_vec(vece, t, t1, t2); -} - -static void gen_vsubwod_w_h(TCGv_i32 t, TCGv_i32 a, TCGv_i32 b) -{ - TCGv_i32 t1, t2; - - t1 = tcg_temp_new_i32(); - t2 = tcg_temp_new_i32(); - tcg_gen_sari_i32(t1, a, 16); - tcg_gen_sari_i32(t2, b, 16); - tcg_gen_sub_i32(t, t1, t2); -} - -static void gen_vsubwod_d_w(TCGv_i64 t, TCGv_i64 a, TCGv_i64 b) -{ - TCGv_i64 t1, t2; - - t1 = tcg_temp_new_i64(); - t2 = tcg_temp_new_i64(); - tcg_gen_sari_i64(t1, a, 32); - tcg_gen_sari_i64(t2, b, 32); - tcg_gen_sub_i64(t, t1, t2); -} - -static void do_vsubwod_s(unsigned vece, uint32_t vd_ofs, uint32_t vj_ofs, - uint32_t vk_ofs, uint32_t oprsz, uint32_t maxsz) -{ - static const TCGOpcode vecop_list[] = { - INDEX_op_sari_vec, INDEX_op_sub_vec, 0 - }; - static const GVecGen3 op[4] = { - { - .fniv = gen_vsubwod_s, - .fno = gen_helper_vsubwod_h_b, - .opt_opc = vecop_list, - .vece = MO_16 - }, - { - .fni4 = gen_vsubwod_w_h, - .fniv = gen_vsubwod_s, - .fno = gen_helper_vsubwod_w_h, - .opt_opc = vecop_list, - .vece = MO_32 - }, - { - .fni8 = gen_vsubwod_d_w, - .fniv = gen_vsubwod_s, - .fno = gen_helper_vsubwod_d_w, - .opt_opc = vecop_list, - .vece = MO_64 - }, - { - .fno = gen_helper_vsubwod_q_d, - .vece = MO_128 - }, - }; - - tcg_gen_gvec_3(vd_ofs, vj_ofs, vk_ofs, oprsz, maxsz, &op[vece]); -} - -TRANS(vsubwod_h_b, LSX, gvec_vvv, MO_8, do_vsubwod_s) -TRANS(vsubwod_w_h, LSX, gvec_vvv, MO_16, do_vsubwod_s) -TRANS(vsubwod_d_w, LSX, gvec_vvv, MO_32, do_vsubwod_s) -TRANS(vsubwod_q_d, LSX, gvec_vvv, MO_64, do_vsubwod_s) -TRANS(xvsubwod_h_b, LASX, gvec_xxx, MO_8, do_vsubwod_s) -TRANS(xvsubwod_w_h, LASX, gvec_xxx, MO_16, do_vsubwod_s) -TRANS(xvsubwod_d_w, LASX, gvec_xxx, MO_32, do_vsubwod_s) -TRANS(xvsubwod_q_d, LASX, gvec_xxx, MO_64, do_vsubwod_s) - -static void gen_vaddwev_u(unsigned vece, TCGv_vec t, TCGv_vec a, TCGv_vec b) -{ - TCGv_vec t1, t2, t3; - - t1 = tcg_temp_new_vec_matching(a); - t2 = tcg_temp_new_vec_matching(b); - t3 = tcg_constant_vec_matching(t, vece, MAKE_64BIT_MASK(0, 4 << vece)); - tcg_gen_and_vec(vece, t1, a, t3); - tcg_gen_and_vec(vece, t2, b, t3); - tcg_gen_add_vec(vece, t, t1, t2); -} - -static void gen_vaddwev_w_hu(TCGv_i32 t, TCGv_i32 a, TCGv_i32 b) -{ - TCGv_i32 t1, t2; - - t1 = tcg_temp_new_i32(); - t2 = tcg_temp_new_i32(); - tcg_gen_ext16u_i32(t1, a); - tcg_gen_ext16u_i32(t2, b); - tcg_gen_add_i32(t, t1, t2); -} - -static void gen_vaddwev_d_wu(TCGv_i64 t, TCGv_i64 a, TCGv_i64 b) -{ - TCGv_i64 t1, t2; - - t1 = tcg_temp_new_i64(); - t2 = tcg_temp_new_i64(); - tcg_gen_ext32u_i64(t1, a); - tcg_gen_ext32u_i64(t2, b); - tcg_gen_add_i64(t, t1, t2); -} - -static void do_vaddwev_u(unsigned vece, uint32_t vd_ofs, uint32_t vj_ofs, - uint32_t vk_ofs, uint32_t oprsz, uint32_t maxsz) -{ - static const TCGOpcode vecop_list[] = { - INDEX_op_add_vec, 0 - }; - static const GVecGen3 op[4] = { - { - .fniv = gen_vaddwev_u, - .fno = gen_helper_vaddwev_h_bu, - .opt_opc = vecop_list, - .vece = MO_16 - }, - { - .fni4 = gen_vaddwev_w_hu, - .fniv = gen_vaddwev_u, - .fno = gen_helper_vaddwev_w_hu, - .opt_opc = vecop_list, - .vece = MO_32 - }, - { - .fni8 = gen_vaddwev_d_wu, - .fniv = gen_vaddwev_u, - .fno = gen_helper_vaddwev_d_wu, - .opt_opc = vecop_list, - .vece = MO_64 - }, - { - .fno = gen_helper_vaddwev_q_du, - .vece = MO_128 - }, - }; - - tcg_gen_gvec_3(vd_ofs, vj_ofs, vk_ofs, oprsz, maxsz, &op[vece]); -} - -TRANS(vaddwev_h_bu, LSX, gvec_vvv, MO_8, do_vaddwev_u) -TRANS(vaddwev_w_hu, LSX, gvec_vvv, MO_16, do_vaddwev_u) -TRANS(vaddwev_d_wu, LSX, gvec_vvv, MO_32, do_vaddwev_u) -TRANS(vaddwev_q_du, LSX, gvec_vvv, MO_64, do_vaddwev_u) -TRANS(xvaddwev_h_bu, LASX, gvec_xxx, MO_8, do_vaddwev_u) -TRANS(xvaddwev_w_hu, LASX, gvec_xxx, MO_16, do_vaddwev_u) -TRANS(xvaddwev_d_wu, LASX, gvec_xxx, MO_32, do_vaddwev_u) -TRANS(xvaddwev_q_du, LASX, gvec_xxx, MO_64, do_vaddwev_u) - -static void gen_vaddwod_u(unsigned vece, TCGv_vec t, TCGv_vec a, TCGv_vec b) -{ - TCGv_vec t1, t2; - - int halfbits = 4 << vece; - - t1 = tcg_temp_new_vec_matching(a); - t2 = tcg_temp_new_vec_matching(b); - - /* Zero-extend the odd elements for vector */ - tcg_gen_shri_vec(vece, t1, a, halfbits); - tcg_gen_shri_vec(vece, t2, b, halfbits); - - tcg_gen_add_vec(vece, t, t1, t2); -} - -static void gen_vaddwod_w_hu(TCGv_i32 t, TCGv_i32 a, TCGv_i32 b) -{ - TCGv_i32 t1, t2; - - t1 = tcg_temp_new_i32(); - t2 = tcg_temp_new_i32(); - tcg_gen_shri_i32(t1, a, 16); - tcg_gen_shri_i32(t2, b, 16); - tcg_gen_add_i32(t, t1, t2); -} - -static void gen_vaddwod_d_wu(TCGv_i64 t, TCGv_i64 a, TCGv_i64 b) -{ - TCGv_i64 t1, t2; - - t1 = tcg_temp_new_i64(); - t2 = tcg_temp_new_i64(); - tcg_gen_shri_i64(t1, a, 32); - tcg_gen_shri_i64(t2, b, 32); - tcg_gen_add_i64(t, t1, t2); -} - -static void do_vaddwod_u(unsigned vece, uint32_t vd_ofs, uint32_t vj_ofs, - uint32_t vk_ofs, uint32_t oprsz, uint32_t maxsz) -{ - static const TCGOpcode vecop_list[] = { - INDEX_op_shri_vec, INDEX_op_add_vec, 0 - }; - static const GVecGen3 op[4] = { - { - .fniv = gen_vaddwod_u, - .fno = gen_helper_vaddwod_h_bu, - .opt_opc = vecop_list, - .vece = MO_16 - }, - { - .fni4 = gen_vaddwod_w_hu, - .fniv = gen_vaddwod_u, - .fno = gen_helper_vaddwod_w_hu, - .opt_opc = vecop_list, - .vece = MO_32 - }, - { - .fni8 = gen_vaddwod_d_wu, - .fniv = gen_vaddwod_u, - .fno = gen_helper_vaddwod_d_wu, - .opt_opc = vecop_list, - .vece = MO_64 - }, - { - .fno = gen_helper_vaddwod_q_du, - .vece = MO_128 - }, - }; - - tcg_gen_gvec_3(vd_ofs, vj_ofs, vk_ofs, oprsz, maxsz, &op[vece]); -} - -TRANS(vaddwod_h_bu, LSX, gvec_vvv, MO_8, do_vaddwod_u) -TRANS(vaddwod_w_hu, LSX, gvec_vvv, MO_16, do_vaddwod_u) -TRANS(vaddwod_d_wu, LSX, gvec_vvv, MO_32, do_vaddwod_u) -TRANS(vaddwod_q_du, LSX, gvec_vvv, MO_64, do_vaddwod_u) -TRANS(xvaddwod_h_bu, LASX, gvec_xxx, MO_8, do_vaddwod_u) -TRANS(xvaddwod_w_hu, LASX, gvec_xxx, MO_16, do_vaddwod_u) -TRANS(xvaddwod_d_wu, LASX, gvec_xxx, MO_32, do_vaddwod_u) -TRANS(xvaddwod_q_du, LASX, gvec_xxx, MO_64, do_vaddwod_u) - -static void gen_vsubwev_u(unsigned vece, TCGv_vec t, TCGv_vec a, TCGv_vec b) -{ - TCGv_vec t1, t2, t3; - - t1 = tcg_temp_new_vec_matching(a); - t2 = tcg_temp_new_vec_matching(b); - t3 = tcg_constant_vec_matching(t, vece, MAKE_64BIT_MASK(0, 4 << vece)); - tcg_gen_and_vec(vece, t1, a, t3); - tcg_gen_and_vec(vece, t2, b, t3); - tcg_gen_sub_vec(vece, t, t1, t2); -} - -static void gen_vsubwev_w_hu(TCGv_i32 t, TCGv_i32 a, TCGv_i32 b) -{ - TCGv_i32 t1, t2; - - t1 = tcg_temp_new_i32(); - t2 = tcg_temp_new_i32(); - tcg_gen_ext16u_i32(t1, a); - tcg_gen_ext16u_i32(t2, b); - tcg_gen_sub_i32(t, t1, t2); -} - -static void gen_vsubwev_d_wu(TCGv_i64 t, TCGv_i64 a, TCGv_i64 b) -{ - TCGv_i64 t1, t2; - - t1 = tcg_temp_new_i64(); - t2 = tcg_temp_new_i64(); - tcg_gen_ext32u_i64(t1, a); - tcg_gen_ext32u_i64(t2, b); - tcg_gen_sub_i64(t, t1, t2); -} - -static void do_vsubwev_u(unsigned vece, uint32_t vd_ofs, uint32_t vj_ofs, - uint32_t vk_ofs, uint32_t oprsz, uint32_t maxsz) -{ - static const TCGOpcode vecop_list[] = { - INDEX_op_sub_vec, 0 - }; - static const GVecGen3 op[4] = { - { - .fniv = gen_vsubwev_u, - .fno = gen_helper_vsubwev_h_bu, - .opt_opc = vecop_list, - .vece = MO_16 - }, - { - .fni4 = gen_vsubwev_w_hu, - .fniv = gen_vsubwev_u, - .fno = gen_helper_vsubwev_w_hu, - .opt_opc = vecop_list, - .vece = MO_32 - }, - { - .fni8 = gen_vsubwev_d_wu, - .fniv = gen_vsubwev_u, - .fno = gen_helper_vsubwev_d_wu, - .opt_opc = vecop_list, - .vece = MO_64 - }, - { - .fno = gen_helper_vsubwev_q_du, - .vece = MO_128 - }, - }; - - tcg_gen_gvec_3(vd_ofs, vj_ofs, vk_ofs, oprsz, maxsz, &op[vece]); -} - -TRANS(vsubwev_h_bu, LSX, gvec_vvv, MO_8, do_vsubwev_u) -TRANS(vsubwev_w_hu, LSX, gvec_vvv, MO_16, do_vsubwev_u) -TRANS(vsubwev_d_wu, LSX, gvec_vvv, MO_32, do_vsubwev_u) -TRANS(vsubwev_q_du, LSX, gvec_vvv, MO_64, do_vsubwev_u) -TRANS(xvsubwev_h_bu, LASX, gvec_xxx, MO_8, do_vsubwev_u) -TRANS(xvsubwev_w_hu, LASX, gvec_xxx, MO_16, do_vsubwev_u) -TRANS(xvsubwev_d_wu, LASX, gvec_xxx, MO_32, do_vsubwev_u) -TRANS(xvsubwev_q_du, LASX, gvec_xxx, MO_64, do_vsubwev_u) - -static void gen_vsubwod_u(unsigned vece, TCGv_vec t, TCGv_vec a, TCGv_vec b) -{ - TCGv_vec t1, t2; - - int halfbits = 4 << vece; - - t1 = tcg_temp_new_vec_matching(a); - t2 = tcg_temp_new_vec_matching(b); - - /* Zero-extend the odd elements for vector */ - tcg_gen_shri_vec(vece, t1, a, halfbits); - tcg_gen_shri_vec(vece, t2, b, halfbits); - - tcg_gen_sub_vec(vece, t, t1, t2); -} - -static void gen_vsubwod_w_hu(TCGv_i32 t, TCGv_i32 a, TCGv_i32 b) -{ - TCGv_i32 t1, t2; - - t1 = tcg_temp_new_i32(); - t2 = tcg_temp_new_i32(); - tcg_gen_shri_i32(t1, a, 16); - tcg_gen_shri_i32(t2, b, 16); - tcg_gen_sub_i32(t, t1, t2); -} - -static void gen_vsubwod_d_wu(TCGv_i64 t, TCGv_i64 a, TCGv_i64 b) -{ - TCGv_i64 t1, t2; - - t1 = tcg_temp_new_i64(); - t2 = tcg_temp_new_i64(); - tcg_gen_shri_i64(t1, a, 32); - tcg_gen_shri_i64(t2, b, 32); - tcg_gen_sub_i64(t, t1, t2); -} - -static void do_vsubwod_u(unsigned vece, uint32_t vd_ofs, uint32_t vj_ofs, - uint32_t vk_ofs, uint32_t oprsz, uint32_t maxsz) -{ - static const TCGOpcode vecop_list[] = { - INDEX_op_shri_vec, INDEX_op_sub_vec, 0 - }; - static const GVecGen3 op[4] = { - { - .fniv = gen_vsubwod_u, - .fno = gen_helper_vsubwod_h_bu, - .opt_opc = vecop_list, - .vece = MO_16 - }, - { - .fni4 = gen_vsubwod_w_hu, - .fniv = gen_vsubwod_u, - .fno = gen_helper_vsubwod_w_hu, - .opt_opc = vecop_list, - .vece = MO_32 - }, - { - .fni8 = gen_vsubwod_d_wu, - .fniv = gen_vsubwod_u, - .fno = gen_helper_vsubwod_d_wu, - .opt_opc = vecop_list, - .vece = MO_64 - }, - { - .fno = gen_helper_vsubwod_q_du, - .vece = MO_128 - }, - }; - - tcg_gen_gvec_3(vd_ofs, vj_ofs, vk_ofs, oprsz, maxsz, &op[vece]); -} - -TRANS(vsubwod_h_bu, LSX, gvec_vvv, MO_8, do_vsubwod_u) -TRANS(vsubwod_w_hu, LSX, gvec_vvv, MO_16, do_vsubwod_u) -TRANS(vsubwod_d_wu, LSX, gvec_vvv, MO_32, do_vsubwod_u) -TRANS(vsubwod_q_du, LSX, gvec_vvv, MO_64, do_vsubwod_u) -TRANS(xvsubwod_h_bu, LASX, gvec_xxx, MO_8, do_vsubwod_u) -TRANS(xvsubwod_w_hu, LASX, gvec_xxx, MO_16, do_vsubwod_u) -TRANS(xvsubwod_d_wu, LASX, gvec_xxx, MO_32, do_vsubwod_u) -TRANS(xvsubwod_q_du, LASX, gvec_xxx, MO_64, do_vsubwod_u) - -static void gen_vaddwev_u_s(unsigned vece, TCGv_vec t, TCGv_vec a, TCGv_vec b) -{ - TCGv_vec t1, t2, t3; - - int halfbits = 4 << vece; - - t1 = tcg_temp_new_vec_matching(a); - t2 = tcg_temp_new_vec_matching(b); - t3 = tcg_constant_vec_matching(t, vece, MAKE_64BIT_MASK(0, halfbits)); - - /* Zero-extend the even elements from a */ - tcg_gen_and_vec(vece, t1, a, t3); - - /* Sign-extend the even elements from b */ - tcg_gen_shli_vec(vece, t2, b, halfbits); - tcg_gen_sari_vec(vece, t2, t2, halfbits); - - tcg_gen_add_vec(vece, t, t1, t2); -} - -static void gen_vaddwev_w_hu_h(TCGv_i32 t, TCGv_i32 a, TCGv_i32 b) -{ - TCGv_i32 t1, t2; - - t1 = tcg_temp_new_i32(); - t2 = tcg_temp_new_i32(); - tcg_gen_ext16u_i32(t1, a); - tcg_gen_ext16s_i32(t2, b); - tcg_gen_add_i32(t, t1, t2); -} - -static void gen_vaddwev_d_wu_w(TCGv_i64 t, TCGv_i64 a, TCGv_i64 b) -{ - TCGv_i64 t1, t2; - - t1 = tcg_temp_new_i64(); - t2 = tcg_temp_new_i64(); - tcg_gen_ext32u_i64(t1, a); - tcg_gen_ext32s_i64(t2, b); - tcg_gen_add_i64(t, t1, t2); -} - -static void do_vaddwev_u_s(unsigned vece, uint32_t vd_ofs, uint32_t vj_ofs, - uint32_t vk_ofs, uint32_t oprsz, uint32_t maxsz) -{ - static const TCGOpcode vecop_list[] = { - INDEX_op_shli_vec, INDEX_op_sari_vec, INDEX_op_add_vec, 0 - }; - static const GVecGen3 op[4] = { - { - .fniv = gen_vaddwev_u_s, - .fno = gen_helper_vaddwev_h_bu_b, - .opt_opc = vecop_list, - .vece = MO_16 - }, - { - .fni4 = gen_vaddwev_w_hu_h, - .fniv = gen_vaddwev_u_s, - .fno = gen_helper_vaddwev_w_hu_h, - .opt_opc = vecop_list, - .vece = MO_32 - }, - { - .fni8 = gen_vaddwev_d_wu_w, - .fniv = gen_vaddwev_u_s, - .fno = gen_helper_vaddwev_d_wu_w, - .opt_opc = vecop_list, - .vece = MO_64 - }, - { - .fno = gen_helper_vaddwev_q_du_d, - .vece = MO_128 - }, - }; - - tcg_gen_gvec_3(vd_ofs, vj_ofs, vk_ofs, oprsz, maxsz, &op[vece]); -} - -TRANS(vaddwev_h_bu_b, LSX, gvec_vvv, MO_8, do_vaddwev_u_s) -TRANS(vaddwev_w_hu_h, LSX, gvec_vvv, MO_16, do_vaddwev_u_s) -TRANS(vaddwev_d_wu_w, LSX, gvec_vvv, MO_32, do_vaddwev_u_s) -TRANS(vaddwev_q_du_d, LSX, gvec_vvv, MO_64, do_vaddwev_u_s) -TRANS(xvaddwev_h_bu_b, LASX, gvec_xxx, MO_8, do_vaddwev_u_s) -TRANS(xvaddwev_w_hu_h, LASX, gvec_xxx, MO_16, do_vaddwev_u_s) -TRANS(xvaddwev_d_wu_w, LASX, gvec_xxx, MO_32, do_vaddwev_u_s) -TRANS(xvaddwev_q_du_d, LASX, gvec_xxx, MO_64, do_vaddwev_u_s) - -static void gen_vaddwod_u_s(unsigned vece, TCGv_vec t, TCGv_vec a, TCGv_vec b) -{ - TCGv_vec t1, t2; - - int halfbits = 4 << vece; - - t1 = tcg_temp_new_vec_matching(a); - t2 = tcg_temp_new_vec_matching(b); - - /* Zero-extend the odd elements from a */ - tcg_gen_shri_vec(vece, t1, a, halfbits); - /* Sign-extend the odd elements from b */ - tcg_gen_sari_vec(vece, t2, b, halfbits); - - tcg_gen_add_vec(vece, t, t1, t2); -} - -static void gen_vaddwod_w_hu_h(TCGv_i32 t, TCGv_i32 a, TCGv_i32 b) -{ - TCGv_i32 t1, t2; - - t1 = tcg_temp_new_i32(); - t2 = tcg_temp_new_i32(); - tcg_gen_shri_i32(t1, a, 16); - tcg_gen_sari_i32(t2, b, 16); - tcg_gen_add_i32(t, t1, t2); -} - -static void gen_vaddwod_d_wu_w(TCGv_i64 t, TCGv_i64 a, TCGv_i64 b) -{ - TCGv_i64 t1, t2; - - t1 = tcg_temp_new_i64(); - t2 = tcg_temp_new_i64(); - tcg_gen_shri_i64(t1, a, 32); - tcg_gen_sari_i64(t2, b, 32); - tcg_gen_add_i64(t, t1, t2); -} - -static void do_vaddwod_u_s(unsigned vece, uint32_t vd_ofs, uint32_t vj_ofs, - uint32_t vk_ofs, uint32_t oprsz, uint32_t maxsz) -{ - static const TCGOpcode vecop_list[] = { - INDEX_op_shri_vec, INDEX_op_sari_vec, INDEX_op_add_vec, 0 - }; - static const GVecGen3 op[4] = { - { - .fniv = gen_vaddwod_u_s, - .fno = gen_helper_vaddwod_h_bu_b, - .opt_opc = vecop_list, - .vece = MO_16 - }, - { - .fni4 = gen_vaddwod_w_hu_h, - .fniv = gen_vaddwod_u_s, - .fno = gen_helper_vaddwod_w_hu_h, - .opt_opc = vecop_list, - .vece = MO_32 - }, - { - .fni8 = gen_vaddwod_d_wu_w, - .fniv = gen_vaddwod_u_s, - .fno = gen_helper_vaddwod_d_wu_w, - .opt_opc = vecop_list, - .vece = MO_64 - }, - { - .fno = gen_helper_vaddwod_q_du_d, - .vece = MO_128 - }, - }; - - tcg_gen_gvec_3(vd_ofs, vj_ofs, vk_ofs, oprsz, maxsz, &op[vece]); -} - -TRANS(vaddwod_h_bu_b, LSX, gvec_vvv, MO_8, do_vaddwod_u_s) -TRANS(vaddwod_w_hu_h, LSX, gvec_vvv, MO_16, do_vaddwod_u_s) -TRANS(vaddwod_d_wu_w, LSX, gvec_vvv, MO_32, do_vaddwod_u_s) -TRANS(vaddwod_q_du_d, LSX, gvec_vvv, MO_64, do_vaddwod_u_s) -TRANS(xvaddwod_h_bu_b, LSX, gvec_xxx, MO_8, do_vaddwod_u_s) -TRANS(xvaddwod_w_hu_h, LSX, gvec_xxx, MO_16, do_vaddwod_u_s) -TRANS(xvaddwod_d_wu_w, LSX, gvec_xxx, MO_32, do_vaddwod_u_s) -TRANS(xvaddwod_q_du_d, LSX, gvec_xxx, MO_64, do_vaddwod_u_s) - -static void do_vavg(unsigned vece, TCGv_vec t, TCGv_vec a, TCGv_vec b, - void (*gen_shr_vec)(unsigned, TCGv_vec, - TCGv_vec, int64_t), - void (*gen_round_vec)(unsigned, TCGv_vec, - TCGv_vec, TCGv_vec)) -{ - TCGv_vec tmp = tcg_temp_new_vec_matching(t); - gen_round_vec(vece, tmp, a, b); - tcg_gen_and_vec(vece, tmp, tmp, tcg_constant_vec_matching(t, vece, 1)); - gen_shr_vec(vece, a, a, 1); - gen_shr_vec(vece, b, b, 1); - tcg_gen_add_vec(vece, t, a, b); - tcg_gen_add_vec(vece, t, t, tmp); -} - -static void gen_vavg_s(unsigned vece, TCGv_vec t, TCGv_vec a, TCGv_vec b) -{ - do_vavg(vece, t, a, b, tcg_gen_sari_vec, tcg_gen_and_vec); -} - -static void gen_vavg_u(unsigned vece, TCGv_vec t, TCGv_vec a, TCGv_vec b) -{ - do_vavg(vece, t, a, b, tcg_gen_shri_vec, tcg_gen_and_vec); -} - -static void gen_vavgr_s(unsigned vece, TCGv_vec t, TCGv_vec a, TCGv_vec b) -{ - do_vavg(vece, t, a, b, tcg_gen_sari_vec, tcg_gen_or_vec); -} - -static void gen_vavgr_u(unsigned vece, TCGv_vec t, TCGv_vec a, TCGv_vec b) -{ - do_vavg(vece, t, a, b, tcg_gen_shri_vec, tcg_gen_or_vec); -} - -static void do_vavg_s(unsigned vece, uint32_t vd_ofs, uint32_t vj_ofs, - uint32_t vk_ofs, uint32_t oprsz, uint32_t maxsz) -{ - static const TCGOpcode vecop_list[] = { - INDEX_op_sari_vec, INDEX_op_add_vec, 0 - }; - static const GVecGen3 op[4] = { - { - .fniv = gen_vavg_s, - .fno = gen_helper_vavg_b, - .opt_opc = vecop_list, - .vece = MO_8 - }, - { - .fniv = gen_vavg_s, - .fno = gen_helper_vavg_h, - .opt_opc = vecop_list, - .vece = MO_16 - }, - { - .fniv = gen_vavg_s, - .fno = gen_helper_vavg_w, - .opt_opc = vecop_list, - .vece = MO_32 - }, - { - .fniv = gen_vavg_s, - .fno = gen_helper_vavg_d, - .opt_opc = vecop_list, - .vece = MO_64 - }, - }; - - tcg_gen_gvec_3(vd_ofs, vj_ofs, vk_ofs, oprsz, maxsz, &op[vece]); -} - -static void do_vavg_u(unsigned vece, uint32_t vd_ofs, uint32_t vj_ofs, - uint32_t vk_ofs, uint32_t oprsz, uint32_t maxsz) -{ - static const TCGOpcode vecop_list[] = { - INDEX_op_shri_vec, INDEX_op_add_vec, 0 - }; - static const GVecGen3 op[4] = { - { - .fniv = gen_vavg_u, - .fno = gen_helper_vavg_bu, - .opt_opc = vecop_list, - .vece = MO_8 - }, - { - .fniv = gen_vavg_u, - .fno = gen_helper_vavg_hu, - .opt_opc = vecop_list, - .vece = MO_16 - }, - { - .fniv = gen_vavg_u, - .fno = gen_helper_vavg_wu, - .opt_opc = vecop_list, - .vece = MO_32 - }, - { - .fniv = gen_vavg_u, - .fno = gen_helper_vavg_du, - .opt_opc = vecop_list, - .vece = MO_64 - }, - }; - - tcg_gen_gvec_3(vd_ofs, vj_ofs, vk_ofs, oprsz, maxsz, &op[vece]); -} - -TRANS(vavg_b, LSX, gvec_vvv, MO_8, do_vavg_s) -TRANS(vavg_h, LSX, gvec_vvv, MO_16, do_vavg_s) -TRANS(vavg_w, LSX, gvec_vvv, MO_32, do_vavg_s) -TRANS(vavg_d, LSX, gvec_vvv, MO_64, do_vavg_s) -TRANS(vavg_bu, LSX, gvec_vvv, MO_8, do_vavg_u) -TRANS(vavg_hu, LSX, gvec_vvv, MO_16, do_vavg_u) -TRANS(vavg_wu, LSX, gvec_vvv, MO_32, do_vavg_u) -TRANS(vavg_du, LSX, gvec_vvv, MO_64, do_vavg_u) -TRANS(xvavg_b, LASX, gvec_xxx, MO_8, do_vavg_s) -TRANS(xvavg_h, LASX, gvec_xxx, MO_16, do_vavg_s) -TRANS(xvavg_w, LASX, gvec_xxx, MO_32, do_vavg_s) -TRANS(xvavg_d, LASX, gvec_xxx, MO_64, do_vavg_s) -TRANS(xvavg_bu, LASX, gvec_xxx, MO_8, do_vavg_u) -TRANS(xvavg_hu, LASX, gvec_xxx, MO_16, do_vavg_u) -TRANS(xvavg_wu, LASX, gvec_xxx, MO_32, do_vavg_u) -TRANS(xvavg_du, LASX, gvec_xxx, MO_64, do_vavg_u) - -static void do_vavgr_s(unsigned vece, uint32_t vd_ofs, uint32_t vj_ofs, - uint32_t vk_ofs, uint32_t oprsz, uint32_t maxsz) -{ - static const TCGOpcode vecop_list[] = { - INDEX_op_sari_vec, INDEX_op_add_vec, 0 - }; - static const GVecGen3 op[4] = { - { - .fniv = gen_vavgr_s, - .fno = gen_helper_vavgr_b, - .opt_opc = vecop_list, - .vece = MO_8 - }, - { - .fniv = gen_vavgr_s, - .fno = gen_helper_vavgr_h, - .opt_opc = vecop_list, - .vece = MO_16 - }, - { - .fniv = gen_vavgr_s, - .fno = gen_helper_vavgr_w, - .opt_opc = vecop_list, - .vece = MO_32 - }, - { - .fniv = gen_vavgr_s, - .fno = gen_helper_vavgr_d, - .opt_opc = vecop_list, - .vece = MO_64 - }, - }; - - tcg_gen_gvec_3(vd_ofs, vj_ofs, vk_ofs, oprsz, maxsz, &op[vece]); -} - -static void do_vavgr_u(unsigned vece, uint32_t vd_ofs, uint32_t vj_ofs, - uint32_t vk_ofs, uint32_t oprsz, uint32_t maxsz) -{ - static const TCGOpcode vecop_list[] = { - INDEX_op_shri_vec, INDEX_op_add_vec, 0 - }; - static const GVecGen3 op[4] = { - { - .fniv = gen_vavgr_u, - .fno = gen_helper_vavgr_bu, - .opt_opc = vecop_list, - .vece = MO_8 - }, - { - .fniv = gen_vavgr_u, - .fno = gen_helper_vavgr_hu, - .opt_opc = vecop_list, - .vece = MO_16 - }, - { - .fniv = gen_vavgr_u, - .fno = gen_helper_vavgr_wu, - .opt_opc = vecop_list, - .vece = MO_32 - }, - { - .fniv = gen_vavgr_u, - .fno = gen_helper_vavgr_du, - .opt_opc = vecop_list, - .vece = MO_64 - }, - }; - - tcg_gen_gvec_3(vd_ofs, vj_ofs, vk_ofs, oprsz, maxsz, &op[vece]); -} - -TRANS(vavgr_b, LSX, gvec_vvv, MO_8, do_vavgr_s) -TRANS(vavgr_h, LSX, gvec_vvv, MO_16, do_vavgr_s) -TRANS(vavgr_w, LSX, gvec_vvv, MO_32, do_vavgr_s) -TRANS(vavgr_d, LSX, gvec_vvv, MO_64, do_vavgr_s) -TRANS(vavgr_bu, LSX, gvec_vvv, MO_8, do_vavgr_u) -TRANS(vavgr_hu, LSX, gvec_vvv, MO_16, do_vavgr_u) -TRANS(vavgr_wu, LSX, gvec_vvv, MO_32, do_vavgr_u) -TRANS(vavgr_du, LSX, gvec_vvv, MO_64, do_vavgr_u) -TRANS(xvavgr_b, LASX, gvec_xxx, MO_8, do_vavgr_s) -TRANS(xvavgr_h, LASX, gvec_xxx, MO_16, do_vavgr_s) -TRANS(xvavgr_w, LASX, gvec_xxx, MO_32, do_vavgr_s) -TRANS(xvavgr_d, LASX, gvec_xxx, MO_64, do_vavgr_s) -TRANS(xvavgr_bu, LASX, gvec_xxx, MO_8, do_vavgr_u) -TRANS(xvavgr_hu, LASX, gvec_xxx, MO_16, do_vavgr_u) -TRANS(xvavgr_wu, LASX, gvec_xxx, MO_32, do_vavgr_u) -TRANS(xvavgr_du, LASX, gvec_xxx, MO_64, do_vavgr_u) - -static void gen_vabsd_s(unsigned vece, TCGv_vec t, TCGv_vec a, TCGv_vec b) -{ - tcg_gen_smax_vec(vece, t, a, b); - tcg_gen_smin_vec(vece, a, a, b); - tcg_gen_sub_vec(vece, t, t, a); -} - -static void do_vabsd_s(unsigned vece, uint32_t vd_ofs, uint32_t vj_ofs, - uint32_t vk_ofs, uint32_t oprsz, uint32_t maxsz) -{ - static const TCGOpcode vecop_list[] = { - INDEX_op_smax_vec, INDEX_op_smin_vec, INDEX_op_sub_vec, 0 - }; - static const GVecGen3 op[4] = { - { - .fniv = gen_vabsd_s, - .fno = gen_helper_vabsd_b, - .opt_opc = vecop_list, - .vece = MO_8 - }, - { - .fniv = gen_vabsd_s, - .fno = gen_helper_vabsd_h, - .opt_opc = vecop_list, - .vece = MO_16 - }, - { - .fniv = gen_vabsd_s, - .fno = gen_helper_vabsd_w, - .opt_opc = vecop_list, - .vece = MO_32 - }, - { - .fniv = gen_vabsd_s, - .fno = gen_helper_vabsd_d, - .opt_opc = vecop_list, - .vece = MO_64 - }, - }; - - tcg_gen_gvec_3(vd_ofs, vj_ofs, vk_ofs, oprsz, maxsz, &op[vece]); -} - -static void gen_vabsd_u(unsigned vece, TCGv_vec t, TCGv_vec a, TCGv_vec b) -{ - tcg_gen_umax_vec(vece, t, a, b); - tcg_gen_umin_vec(vece, a, a, b); - tcg_gen_sub_vec(vece, t, t, a); -} - -static void do_vabsd_u(unsigned vece, uint32_t vd_ofs, uint32_t vj_ofs, - uint32_t vk_ofs, uint32_t oprsz, uint32_t maxsz) -{ - static const TCGOpcode vecop_list[] = { - INDEX_op_umax_vec, INDEX_op_umin_vec, INDEX_op_sub_vec, 0 - }; - static const GVecGen3 op[4] = { - { - .fniv = gen_vabsd_u, - .fno = gen_helper_vabsd_bu, - .opt_opc = vecop_list, - .vece = MO_8 - }, - { - .fniv = gen_vabsd_u, - .fno = gen_helper_vabsd_hu, - .opt_opc = vecop_list, - .vece = MO_16 - }, - { - .fniv = gen_vabsd_u, - .fno = gen_helper_vabsd_wu, - .opt_opc = vecop_list, - .vece = MO_32 - }, - { - .fniv = gen_vabsd_u, - .fno = gen_helper_vabsd_du, - .opt_opc = vecop_list, - .vece = MO_64 - }, - }; - - tcg_gen_gvec_3(vd_ofs, vj_ofs, vk_ofs, oprsz, maxsz, &op[vece]); -} - -TRANS(vabsd_b, LSX, gvec_vvv, MO_8, do_vabsd_s) -TRANS(vabsd_h, LSX, gvec_vvv, MO_16, do_vabsd_s) -TRANS(vabsd_w, LSX, gvec_vvv, MO_32, do_vabsd_s) -TRANS(vabsd_d, LSX, gvec_vvv, MO_64, do_vabsd_s) -TRANS(vabsd_bu, LSX, gvec_vvv, MO_8, do_vabsd_u) -TRANS(vabsd_hu, LSX, gvec_vvv, MO_16, do_vabsd_u) -TRANS(vabsd_wu, LSX, gvec_vvv, MO_32, do_vabsd_u) -TRANS(vabsd_du, LSX, gvec_vvv, MO_64, do_vabsd_u) -TRANS(xvabsd_b, LASX, gvec_xxx, MO_8, do_vabsd_s) -TRANS(xvabsd_h, LASX, gvec_xxx, MO_16, do_vabsd_s) -TRANS(xvabsd_w, LASX, gvec_xxx, MO_32, do_vabsd_s) -TRANS(xvabsd_d, LASX, gvec_xxx, MO_64, do_vabsd_s) -TRANS(xvabsd_bu, LASX, gvec_xxx, MO_8, do_vabsd_u) -TRANS(xvabsd_hu, LASX, gvec_xxx, MO_16, do_vabsd_u) -TRANS(xvabsd_wu, LASX, gvec_xxx, MO_32, do_vabsd_u) -TRANS(xvabsd_du, LASX, gvec_xxx, MO_64, do_vabsd_u) - -static void gen_vadda(unsigned vece, TCGv_vec t, TCGv_vec a, TCGv_vec b) -{ - TCGv_vec t1, t2; - - t1 = tcg_temp_new_vec_matching(a); - t2 = tcg_temp_new_vec_matching(b); - - tcg_gen_abs_vec(vece, t1, a); - tcg_gen_abs_vec(vece, t2, b); - tcg_gen_add_vec(vece, t, t1, t2); -} - -static void do_vadda(unsigned vece, uint32_t vd_ofs, uint32_t vj_ofs, - uint32_t vk_ofs, uint32_t oprsz, uint32_t maxsz) -{ - static const TCGOpcode vecop_list[] = { - INDEX_op_abs_vec, INDEX_op_add_vec, 0 - }; - static const GVecGen3 op[4] = { - { - .fniv = gen_vadda, - .fno = gen_helper_vadda_b, - .opt_opc = vecop_list, - .vece = MO_8 - }, - { - .fniv = gen_vadda, - .fno = gen_helper_vadda_h, - .opt_opc = vecop_list, - .vece = MO_16 - }, - { - .fniv = gen_vadda, - .fno = gen_helper_vadda_w, - .opt_opc = vecop_list, - .vece = MO_32 - }, - { - .fniv = gen_vadda, - .fno = gen_helper_vadda_d, - .opt_opc = vecop_list, - .vece = MO_64 - }, - }; - - tcg_gen_gvec_3(vd_ofs, vj_ofs, vk_ofs, oprsz, maxsz, &op[vece]); -} - -TRANS(vadda_b, LSX, gvec_vvv, MO_8, do_vadda) -TRANS(vadda_h, LSX, gvec_vvv, MO_16, do_vadda) -TRANS(vadda_w, LSX, gvec_vvv, MO_32, do_vadda) -TRANS(vadda_d, LSX, gvec_vvv, MO_64, do_vadda) -TRANS(xvadda_b, LASX, gvec_xxx, MO_8, do_vadda) -TRANS(xvadda_h, LASX, gvec_xxx, MO_16, do_vadda) -TRANS(xvadda_w, LASX, gvec_xxx, MO_32, do_vadda) -TRANS(xvadda_d, LASX, gvec_xxx, MO_64, do_vadda) - -TRANS(vmax_b, LSX, gvec_vvv, MO_8, tcg_gen_gvec_smax) -TRANS(vmax_h, LSX, gvec_vvv, MO_16, tcg_gen_gvec_smax) -TRANS(vmax_w, LSX, gvec_vvv, MO_32, tcg_gen_gvec_smax) -TRANS(vmax_d, LSX, gvec_vvv, MO_64, tcg_gen_gvec_smax) -TRANS(vmax_bu, LSX, gvec_vvv, MO_8, tcg_gen_gvec_umax) -TRANS(vmax_hu, LSX, gvec_vvv, MO_16, tcg_gen_gvec_umax) -TRANS(vmax_wu, LSX, gvec_vvv, MO_32, tcg_gen_gvec_umax) -TRANS(vmax_du, LSX, gvec_vvv, MO_64, tcg_gen_gvec_umax) -TRANS(xvmax_b, LASX, gvec_xxx, MO_8, tcg_gen_gvec_smax) -TRANS(xvmax_h, LASX, gvec_xxx, MO_16, tcg_gen_gvec_smax) -TRANS(xvmax_w, LASX, gvec_xxx, MO_32, tcg_gen_gvec_smax) -TRANS(xvmax_d, LASX, gvec_xxx, MO_64, tcg_gen_gvec_smax) -TRANS(xvmax_bu, LASX, gvec_xxx, MO_8, tcg_gen_gvec_umax) -TRANS(xvmax_hu, LASX, gvec_xxx, MO_16, tcg_gen_gvec_umax) -TRANS(xvmax_wu, LASX, gvec_xxx, MO_32, tcg_gen_gvec_umax) -TRANS(xvmax_du, LASX, gvec_xxx, MO_64, tcg_gen_gvec_umax) - -TRANS(vmin_b, LSX, gvec_vvv, MO_8, tcg_gen_gvec_smin) -TRANS(vmin_h, LSX, gvec_vvv, MO_16, tcg_gen_gvec_smin) -TRANS(vmin_w, LSX, gvec_vvv, MO_32, tcg_gen_gvec_smin) -TRANS(vmin_d, LSX, gvec_vvv, MO_64, tcg_gen_gvec_smin) -TRANS(vmin_bu, LSX, gvec_vvv, MO_8, tcg_gen_gvec_umin) -TRANS(vmin_hu, LSX, gvec_vvv, MO_16, tcg_gen_gvec_umin) -TRANS(vmin_wu, LSX, gvec_vvv, MO_32, tcg_gen_gvec_umin) -TRANS(vmin_du, LSX, gvec_vvv, MO_64, tcg_gen_gvec_umin) -TRANS(xvmin_b, LASX, gvec_xxx, MO_8, tcg_gen_gvec_smin) -TRANS(xvmin_h, LASX, gvec_xxx, MO_16, tcg_gen_gvec_smin) -TRANS(xvmin_w, LASX, gvec_xxx, MO_32, tcg_gen_gvec_smin) -TRANS(xvmin_d, LASX, gvec_xxx, MO_64, tcg_gen_gvec_smin) -TRANS(xvmin_bu, LASX, gvec_xxx, MO_8, tcg_gen_gvec_umin) -TRANS(xvmin_hu, LASX, gvec_xxx, MO_16, tcg_gen_gvec_umin) -TRANS(xvmin_wu, LASX, gvec_xxx, MO_32, tcg_gen_gvec_umin) -TRANS(xvmin_du, LASX, gvec_xxx, MO_64, tcg_gen_gvec_umin) - -static void gen_vmini_s(unsigned vece, TCGv_vec t, TCGv_vec a, int64_t imm) -{ - tcg_gen_smin_vec(vece, t, a, tcg_constant_vec_matching(t, vece, imm)); -} - -static void gen_vmini_u(unsigned vece, TCGv_vec t, TCGv_vec a, int64_t imm) -{ - tcg_gen_umin_vec(vece, t, a, tcg_constant_vec_matching(t, vece, imm)); -} - -static void gen_vmaxi_s(unsigned vece, TCGv_vec t, TCGv_vec a, int64_t imm) -{ - tcg_gen_smax_vec(vece, t, a, tcg_constant_vec_matching(t, vece, imm)); -} - -static void gen_vmaxi_u(unsigned vece, TCGv_vec t, TCGv_vec a, int64_t imm) -{ - tcg_gen_umax_vec(vece, t, a, tcg_constant_vec_matching(t, vece, imm)); -} - -static void do_vmini_s(unsigned vece, uint32_t vd_ofs, uint32_t vj_ofs, - int64_t imm, uint32_t oprsz, uint32_t maxsz) -{ - static const TCGOpcode vecop_list[] = { - INDEX_op_smin_vec, 0 - }; - static const GVecGen2i op[4] = { - { - .fniv = gen_vmini_s, - .fnoi = gen_helper_vmini_b, - .opt_opc = vecop_list, - .vece = MO_8 - }, - { - .fniv = gen_vmini_s, - .fnoi = gen_helper_vmini_h, - .opt_opc = vecop_list, - .vece = MO_16 - }, - { - .fniv = gen_vmini_s, - .fnoi = gen_helper_vmini_w, - .opt_opc = vecop_list, - .vece = MO_32 - }, - { - .fniv = gen_vmini_s, - .fnoi = gen_helper_vmini_d, - .opt_opc = vecop_list, - .vece = MO_64 - }, - }; - - tcg_gen_gvec_2i(vd_ofs, vj_ofs, oprsz, maxsz, imm, &op[vece]); -} - -static void do_vmini_u(unsigned vece, uint32_t vd_ofs, uint32_t vj_ofs, - int64_t imm, uint32_t oprsz, uint32_t maxsz) -{ - static const TCGOpcode vecop_list[] = { - INDEX_op_umin_vec, 0 - }; - static const GVecGen2i op[4] = { - { - .fniv = gen_vmini_u, - .fnoi = gen_helper_vmini_bu, - .opt_opc = vecop_list, - .vece = MO_8 - }, - { - .fniv = gen_vmini_u, - .fnoi = gen_helper_vmini_hu, - .opt_opc = vecop_list, - .vece = MO_16 - }, - { - .fniv = gen_vmini_u, - .fnoi = gen_helper_vmini_wu, - .opt_opc = vecop_list, - .vece = MO_32 - }, - { - .fniv = gen_vmini_u, - .fnoi = gen_helper_vmini_du, - .opt_opc = vecop_list, - .vece = MO_64 - }, - }; - - tcg_gen_gvec_2i(vd_ofs, vj_ofs, oprsz, maxsz, imm, &op[vece]); -} - -TRANS(vmini_b, LSX, gvec_vv_i, MO_8, do_vmini_s) -TRANS(vmini_h, LSX, gvec_vv_i, MO_16, do_vmini_s) -TRANS(vmini_w, LSX, gvec_vv_i, MO_32, do_vmini_s) -TRANS(vmini_d, LSX, gvec_vv_i, MO_64, do_vmini_s) -TRANS(vmini_bu, LSX, gvec_vv_i, MO_8, do_vmini_u) -TRANS(vmini_hu, LSX, gvec_vv_i, MO_16, do_vmini_u) -TRANS(vmini_wu, LSX, gvec_vv_i, MO_32, do_vmini_u) -TRANS(vmini_du, LSX, gvec_vv_i, MO_64, do_vmini_u) -TRANS(xvmini_b, LASX, gvec_xx_i, MO_8, do_vmini_s) -TRANS(xvmini_h, LASX, gvec_xx_i, MO_16, do_vmini_s) -TRANS(xvmini_w, LASX, gvec_xx_i, MO_32, do_vmini_s) -TRANS(xvmini_d, LASX, gvec_xx_i, MO_64, do_vmini_s) -TRANS(xvmini_bu, LASX, gvec_xx_i, MO_8, do_vmini_u) -TRANS(xvmini_hu, LASX, gvec_xx_i, MO_16, do_vmini_u) -TRANS(xvmini_wu, LASX, gvec_xx_i, MO_32, do_vmini_u) -TRANS(xvmini_du, LASX, gvec_xx_i, MO_64, do_vmini_u) - -static void do_vmaxi_s(unsigned vece, uint32_t vd_ofs, uint32_t vj_ofs, - int64_t imm, uint32_t oprsz, uint32_t maxsz) -{ - static const TCGOpcode vecop_list[] = { - INDEX_op_smax_vec, 0 - }; - static const GVecGen2i op[4] = { - { - .fniv = gen_vmaxi_s, - .fnoi = gen_helper_vmaxi_b, - .opt_opc = vecop_list, - .vece = MO_8 - }, - { - .fniv = gen_vmaxi_s, - .fnoi = gen_helper_vmaxi_h, - .opt_opc = vecop_list, - .vece = MO_16 - }, - { - .fniv = gen_vmaxi_s, - .fnoi = gen_helper_vmaxi_w, - .opt_opc = vecop_list, - .vece = MO_32 - }, - { - .fniv = gen_vmaxi_s, - .fnoi = gen_helper_vmaxi_d, - .opt_opc = vecop_list, - .vece = MO_64 - }, - }; - - tcg_gen_gvec_2i(vd_ofs, vj_ofs, oprsz, maxsz, imm, &op[vece]); -} - -static void do_vmaxi_u(unsigned vece, uint32_t vd_ofs, uint32_t vj_ofs, - int64_t imm, uint32_t oprsz, uint32_t maxsz) -{ - static const TCGOpcode vecop_list[] = { - INDEX_op_umax_vec, 0 - }; - static const GVecGen2i op[4] = { - { - .fniv = gen_vmaxi_u, - .fnoi = gen_helper_vmaxi_bu, - .opt_opc = vecop_list, - .vece = MO_8 - }, - { - .fniv = gen_vmaxi_u, - .fnoi = gen_helper_vmaxi_hu, - .opt_opc = vecop_list, - .vece = MO_16 - }, - { - .fniv = gen_vmaxi_u, - .fnoi = gen_helper_vmaxi_wu, - .opt_opc = vecop_list, - .vece = MO_32 - }, - { - .fniv = gen_vmaxi_u, - .fnoi = gen_helper_vmaxi_du, - .opt_opc = vecop_list, - .vece = MO_64 - }, - }; - - tcg_gen_gvec_2i(vd_ofs, vj_ofs, oprsz, maxsz, imm, &op[vece]); -} - -TRANS(vmaxi_b, LSX, gvec_vv_i, MO_8, do_vmaxi_s) -TRANS(vmaxi_h, LSX, gvec_vv_i, MO_16, do_vmaxi_s) -TRANS(vmaxi_w, LSX, gvec_vv_i, MO_32, do_vmaxi_s) -TRANS(vmaxi_d, LSX, gvec_vv_i, MO_64, do_vmaxi_s) -TRANS(vmaxi_bu, LSX, gvec_vv_i, MO_8, do_vmaxi_u) -TRANS(vmaxi_hu, LSX, gvec_vv_i, MO_16, do_vmaxi_u) -TRANS(vmaxi_wu, LSX, gvec_vv_i, MO_32, do_vmaxi_u) -TRANS(vmaxi_du, LSX, gvec_vv_i, MO_64, do_vmaxi_u) -TRANS(xvmaxi_b, LASX, gvec_xx_i, MO_8, do_vmaxi_s) -TRANS(xvmaxi_h, LASX, gvec_xx_i, MO_16, do_vmaxi_s) -TRANS(xvmaxi_w, LASX, gvec_xx_i, MO_32, do_vmaxi_s) -TRANS(xvmaxi_d, LASX, gvec_xx_i, MO_64, do_vmaxi_s) -TRANS(xvmaxi_bu, LASX, gvec_xx_i, MO_8, do_vmaxi_u) -TRANS(xvmaxi_hu, LASX, gvec_xx_i, MO_16, do_vmaxi_u) -TRANS(xvmaxi_wu, LASX, gvec_xx_i, MO_32, do_vmaxi_u) -TRANS(xvmaxi_du, LASX, gvec_xx_i, MO_64, do_vmaxi_u) - -TRANS(vmul_b, LSX, gvec_vvv, MO_8, tcg_gen_gvec_mul) -TRANS(vmul_h, LSX, gvec_vvv, MO_16, tcg_gen_gvec_mul) -TRANS(vmul_w, LSX, gvec_vvv, MO_32, tcg_gen_gvec_mul) -TRANS(vmul_d, LSX, gvec_vvv, MO_64, tcg_gen_gvec_mul) -TRANS(xvmul_b, LASX, gvec_xxx, MO_8, tcg_gen_gvec_mul) -TRANS(xvmul_h, LASX, gvec_xxx, MO_16, tcg_gen_gvec_mul) -TRANS(xvmul_w, LASX, gvec_xxx, MO_32, tcg_gen_gvec_mul) -TRANS(xvmul_d, LASX, gvec_xxx, MO_64, tcg_gen_gvec_mul) - -static void gen_vmuh_w(TCGv_i32 t, TCGv_i32 a, TCGv_i32 b) -{ - TCGv_i32 discard = tcg_temp_new_i32(); - tcg_gen_muls2_i32(discard, t, a, b); -} - -static void gen_vmuh_d(TCGv_i64 t, TCGv_i64 a, TCGv_i64 b) -{ - TCGv_i64 discard = tcg_temp_new_i64(); - tcg_gen_muls2_i64(discard, t, a, b); -} - -static void do_vmuh_s(unsigned vece, uint32_t vd_ofs, uint32_t vj_ofs, - uint32_t vk_ofs, uint32_t oprsz, uint32_t maxsz) -{ - static const GVecGen3 op[4] = { - { - .fno = gen_helper_vmuh_b, - .vece = MO_8 - }, - { - .fno = gen_helper_vmuh_h, - .vece = MO_16 - }, - { - .fni4 = gen_vmuh_w, - .fno = gen_helper_vmuh_w, - .vece = MO_32 - }, - { - .fni8 = gen_vmuh_d, - .fno = gen_helper_vmuh_d, - .vece = MO_64 - }, - }; - - tcg_gen_gvec_3(vd_ofs, vj_ofs, vk_ofs, oprsz, maxsz, &op[vece]); -} - -TRANS(vmuh_b, LSX, gvec_vvv, MO_8, do_vmuh_s) -TRANS(vmuh_h, LSX, gvec_vvv, MO_16, do_vmuh_s) -TRANS(vmuh_w, LSX, gvec_vvv, MO_32, do_vmuh_s) -TRANS(vmuh_d, LSX, gvec_vvv, MO_64, do_vmuh_s) -TRANS(xvmuh_b, LASX, gvec_xxx, MO_8, do_vmuh_s) -TRANS(xvmuh_h, LASX, gvec_xxx, MO_16, do_vmuh_s) -TRANS(xvmuh_w, LASX, gvec_xxx, MO_32, do_vmuh_s) -TRANS(xvmuh_d, LASX, gvec_xxx, MO_64, do_vmuh_s) - -static void gen_vmuh_wu(TCGv_i32 t, TCGv_i32 a, TCGv_i32 b) -{ - TCGv_i32 discard = tcg_temp_new_i32(); - tcg_gen_mulu2_i32(discard, t, a, b); -} - -static void gen_vmuh_du(TCGv_i64 t, TCGv_i64 a, TCGv_i64 b) -{ - TCGv_i64 discard = tcg_temp_new_i64(); - tcg_gen_mulu2_i64(discard, t, a, b); -} - -static void do_vmuh_u(unsigned vece, uint32_t vd_ofs, uint32_t vj_ofs, - uint32_t vk_ofs, uint32_t oprsz, uint32_t maxsz) -{ - static const GVecGen3 op[4] = { - { - .fno = gen_helper_vmuh_bu, - .vece = MO_8 - }, - { - .fno = gen_helper_vmuh_hu, - .vece = MO_16 - }, - { - .fni4 = gen_vmuh_wu, - .fno = gen_helper_vmuh_wu, - .vece = MO_32 - }, - { - .fni8 = gen_vmuh_du, - .fno = gen_helper_vmuh_du, - .vece = MO_64 - }, - }; - - tcg_gen_gvec_3(vd_ofs, vj_ofs, vk_ofs, oprsz, maxsz, &op[vece]); -} - -TRANS(vmuh_bu, LSX, gvec_vvv, MO_8, do_vmuh_u) -TRANS(vmuh_hu, LSX, gvec_vvv, MO_16, do_vmuh_u) -TRANS(vmuh_wu, LSX, gvec_vvv, MO_32, do_vmuh_u) -TRANS(vmuh_du, LSX, gvec_vvv, MO_64, do_vmuh_u) -TRANS(xvmuh_bu, LASX, gvec_xxx, MO_8, do_vmuh_u) -TRANS(xvmuh_hu, LASX, gvec_xxx, MO_16, do_vmuh_u) -TRANS(xvmuh_wu, LASX, gvec_xxx, MO_32, do_vmuh_u) -TRANS(xvmuh_du, LASX, gvec_xxx, MO_64, do_vmuh_u) - -static void gen_vmulwev_s(unsigned vece, TCGv_vec t, TCGv_vec a, TCGv_vec b) -{ - TCGv_vec t1, t2; - int halfbits = 4 << vece; - - t1 = tcg_temp_new_vec_matching(a); - t2 = tcg_temp_new_vec_matching(b); - tcg_gen_shli_vec(vece, t1, a, halfbits); - tcg_gen_sari_vec(vece, t1, t1, halfbits); - tcg_gen_shli_vec(vece, t2, b, halfbits); - tcg_gen_sari_vec(vece, t2, t2, halfbits); - tcg_gen_mul_vec(vece, t, t1, t2); -} - -static void gen_vmulwev_w_h(TCGv_i32 t, TCGv_i32 a, TCGv_i32 b) -{ - TCGv_i32 t1, t2; - - t1 = tcg_temp_new_i32(); - t2 = tcg_temp_new_i32(); - tcg_gen_ext16s_i32(t1, a); - tcg_gen_ext16s_i32(t2, b); - tcg_gen_mul_i32(t, t1, t2); -} - -static void gen_vmulwev_d_w(TCGv_i64 t, TCGv_i64 a, TCGv_i64 b) -{ - TCGv_i64 t1, t2; - - t1 = tcg_temp_new_i64(); - t2 = tcg_temp_new_i64(); - tcg_gen_ext32s_i64(t1, a); - tcg_gen_ext32s_i64(t2, b); - tcg_gen_mul_i64(t, t1, t2); -} - -static void do_vmulwev_s(unsigned vece, uint32_t vd_ofs, uint32_t vj_ofs, - uint32_t vk_ofs, uint32_t oprsz, uint32_t maxsz) -{ - static const TCGOpcode vecop_list[] = { - INDEX_op_shli_vec, INDEX_op_sari_vec, INDEX_op_mul_vec, 0 - }; - static const GVecGen3 op[3] = { - { - .fniv = gen_vmulwev_s, - .fno = gen_helper_vmulwev_h_b, - .opt_opc = vecop_list, - .vece = MO_16 - }, - { - .fni4 = gen_vmulwev_w_h, - .fniv = gen_vmulwev_s, - .fno = gen_helper_vmulwev_w_h, - .opt_opc = vecop_list, - .vece = MO_32 - }, - { - .fni8 = gen_vmulwev_d_w, - .fniv = gen_vmulwev_s, - .fno = gen_helper_vmulwev_d_w, - .opt_opc = vecop_list, - .vece = MO_64 - }, - }; - - tcg_gen_gvec_3(vd_ofs, vj_ofs, vk_ofs, oprsz, maxsz, &op[vece]); -} - -TRANS(vmulwev_h_b, LSX, gvec_vvv, MO_8, do_vmulwev_s) -TRANS(vmulwev_w_h, LSX, gvec_vvv, MO_16, do_vmulwev_s) -TRANS(vmulwev_d_w, LSX, gvec_vvv, MO_32, do_vmulwev_s) -TRANS(xvmulwev_h_b, LASX, gvec_xxx, MO_8, do_vmulwev_s) -TRANS(xvmulwev_w_h, LASX, gvec_xxx, MO_16, do_vmulwev_s) -TRANS(xvmulwev_d_w, LASX, gvec_xxx, MO_32, do_vmulwev_s) - -static void tcg_gen_mulus2_i64(TCGv_i64 rl, TCGv_i64 rh, - TCGv_i64 arg1, TCGv_i64 arg2) -{ - tcg_gen_mulsu2_i64(rl, rh, arg2, arg1); -} - -static bool gen_vmul_q_vl(DisasContext *ctx, - arg_vvv *a, uint32_t oprsz, int idx1, int idx2, - void (*func)(TCGv_i64, TCGv_i64, - TCGv_i64, TCGv_i64)) -{ - TCGv_i64 rh, rl, arg1, arg2; - int i; - - if (!check_vec(ctx, oprsz)) { - return true; - } - - rh = tcg_temp_new_i64(); - rl = tcg_temp_new_i64(); - arg1 = tcg_temp_new_i64(); - arg2 = tcg_temp_new_i64(); - - for (i = 0; i < oprsz / 16; i++) { - get_vreg64(arg1, a->vj, 2 * i + idx1); - get_vreg64(arg2, a->vk, 2 * i + idx2); - - func(rl, rh, arg1, arg2); - - set_vreg64(rh, a->vd, 2 * i + 1); - set_vreg64(rl, a->vd, 2 * i); - } - - return true; -} - -static bool gen_vmul_q(DisasContext *ctx, arg_vvv *a, int idx1, int idx2, - void (*func)(TCGv_i64, TCGv_i64, - TCGv_i64, TCGv_i64)) -{ - return gen_vmul_q_vl(ctx, a, 16, idx1, idx2, func); -} - -static bool gen_xvmul_q(DisasContext *ctx, arg_vvv *a, int idx1, int idx2, - void (*func)(TCGv_i64, TCGv_i64, - TCGv_i64, TCGv_i64)) -{ - return gen_vmul_q_vl(ctx, a, 32, idx1, idx2, func); -} - -TRANS(vmulwev_q_d, LSX, gen_vmul_q, 0, 0, tcg_gen_muls2_i64) -TRANS(vmulwod_q_d, LSX, gen_vmul_q, 1, 1, tcg_gen_muls2_i64) -TRANS(vmulwev_q_du, LSX, gen_vmul_q, 0, 0, tcg_gen_mulu2_i64) -TRANS(vmulwod_q_du, LSX, gen_vmul_q, 1, 1, tcg_gen_mulu2_i64) -TRANS(vmulwev_q_du_d, LSX, gen_vmul_q, 0, 0, tcg_gen_mulus2_i64) -TRANS(vmulwod_q_du_d, LSX, gen_vmul_q, 1, 1, tcg_gen_mulus2_i64) -TRANS(xvmulwev_q_d, LASX, gen_xvmul_q, 0, 0, tcg_gen_muls2_i64) -TRANS(xvmulwod_q_d, LASX, gen_xvmul_q, 1, 1, tcg_gen_muls2_i64) -TRANS(xvmulwev_q_du, LASX, gen_xvmul_q, 0, 0, tcg_gen_mulu2_i64) -TRANS(xvmulwod_q_du, LASX, gen_xvmul_q, 1, 1, tcg_gen_mulu2_i64) -TRANS(xvmulwev_q_du_d, LASX, gen_xvmul_q, 0, 0, tcg_gen_mulus2_i64) -TRANS(xvmulwod_q_du_d, LASX, gen_xvmul_q, 1, 1, tcg_gen_mulus2_i64) - -static void gen_vmulwod_s(unsigned vece, TCGv_vec t, TCGv_vec a, TCGv_vec b) -{ - TCGv_vec t1, t2; - int halfbits = 4 << vece; - - t1 = tcg_temp_new_vec_matching(a); - t2 = tcg_temp_new_vec_matching(b); - tcg_gen_sari_vec(vece, t1, a, halfbits); - tcg_gen_sari_vec(vece, t2, b, halfbits); - tcg_gen_mul_vec(vece, t, t1, t2); -} - -static void gen_vmulwod_w_h(TCGv_i32 t, TCGv_i32 a, TCGv_i32 b) -{ - TCGv_i32 t1, t2; - - t1 = tcg_temp_new_i32(); - t2 = tcg_temp_new_i32(); - tcg_gen_sari_i32(t1, a, 16); - tcg_gen_sari_i32(t2, b, 16); - tcg_gen_mul_i32(t, t1, t2); -} - -static void gen_vmulwod_d_w(TCGv_i64 t, TCGv_i64 a, TCGv_i64 b) -{ - TCGv_i64 t1, t2; - - t1 = tcg_temp_new_i64(); - t2 = tcg_temp_new_i64(); - tcg_gen_sari_i64(t1, a, 32); - tcg_gen_sari_i64(t2, b, 32); - tcg_gen_mul_i64(t, t1, t2); -} - -static void do_vmulwod_s(unsigned vece, uint32_t vd_ofs, uint32_t vj_ofs, - uint32_t vk_ofs, uint32_t oprsz, uint32_t maxsz) -{ - static const TCGOpcode vecop_list[] = { - INDEX_op_sari_vec, INDEX_op_mul_vec, 0 - }; - static const GVecGen3 op[3] = { - { - .fniv = gen_vmulwod_s, - .fno = gen_helper_vmulwod_h_b, - .opt_opc = vecop_list, - .vece = MO_16 - }, - { - .fni4 = gen_vmulwod_w_h, - .fniv = gen_vmulwod_s, - .fno = gen_helper_vmulwod_w_h, - .opt_opc = vecop_list, - .vece = MO_32 - }, - { - .fni8 = gen_vmulwod_d_w, - .fniv = gen_vmulwod_s, - .fno = gen_helper_vmulwod_d_w, - .opt_opc = vecop_list, - .vece = MO_64 - }, - }; - - tcg_gen_gvec_3(vd_ofs, vj_ofs, vk_ofs, oprsz, maxsz, &op[vece]); -} - -TRANS(vmulwod_h_b, LSX, gvec_vvv, MO_8, do_vmulwod_s) -TRANS(vmulwod_w_h, LSX, gvec_vvv, MO_16, do_vmulwod_s) -TRANS(vmulwod_d_w, LSX, gvec_vvv, MO_32, do_vmulwod_s) -TRANS(xvmulwod_h_b, LASX, gvec_xxx, MO_8, do_vmulwod_s) -TRANS(xvmulwod_w_h, LASX, gvec_xxx, MO_16, do_vmulwod_s) -TRANS(xvmulwod_d_w, LASX, gvec_xxx, MO_32, do_vmulwod_s) - -static void gen_vmulwev_u(unsigned vece, TCGv_vec t, TCGv_vec a, TCGv_vec b) -{ - TCGv_vec t1, t2, mask; - - t1 = tcg_temp_new_vec_matching(a); - t2 = tcg_temp_new_vec_matching(b); - mask = tcg_constant_vec_matching(t, vece, MAKE_64BIT_MASK(0, 4 << vece)); - tcg_gen_and_vec(vece, t1, a, mask); - tcg_gen_and_vec(vece, t2, b, mask); - tcg_gen_mul_vec(vece, t, t1, t2); -} - -static void gen_vmulwev_w_hu(TCGv_i32 t, TCGv_i32 a, TCGv_i32 b) -{ - TCGv_i32 t1, t2; - - t1 = tcg_temp_new_i32(); - t2 = tcg_temp_new_i32(); - tcg_gen_ext16u_i32(t1, a); - tcg_gen_ext16u_i32(t2, b); - tcg_gen_mul_i32(t, t1, t2); -} - -static void gen_vmulwev_d_wu(TCGv_i64 t, TCGv_i64 a, TCGv_i64 b) -{ - TCGv_i64 t1, t2; - - t1 = tcg_temp_new_i64(); - t2 = tcg_temp_new_i64(); - tcg_gen_ext32u_i64(t1, a); - tcg_gen_ext32u_i64(t2, b); - tcg_gen_mul_i64(t, t1, t2); -} - -static void do_vmulwev_u(unsigned vece, uint32_t vd_ofs, uint32_t vj_ofs, - uint32_t vk_ofs, uint32_t oprsz, uint32_t maxsz) -{ - static const TCGOpcode vecop_list[] = { - INDEX_op_mul_vec, 0 - }; - static const GVecGen3 op[3] = { - { - .fniv = gen_vmulwev_u, - .fno = gen_helper_vmulwev_h_bu, - .opt_opc = vecop_list, - .vece = MO_16 - }, - { - .fni4 = gen_vmulwev_w_hu, - .fniv = gen_vmulwev_u, - .fno = gen_helper_vmulwev_w_hu, - .opt_opc = vecop_list, - .vece = MO_32 - }, - { - .fni8 = gen_vmulwev_d_wu, - .fniv = gen_vmulwev_u, - .fno = gen_helper_vmulwev_d_wu, - .opt_opc = vecop_list, - .vece = MO_64 - }, - }; - - tcg_gen_gvec_3(vd_ofs, vj_ofs, vk_ofs, oprsz, maxsz, &op[vece]); -} - -TRANS(vmulwev_h_bu, LSX, gvec_vvv, MO_8, do_vmulwev_u) -TRANS(vmulwev_w_hu, LSX, gvec_vvv, MO_16, do_vmulwev_u) -TRANS(vmulwev_d_wu, LSX, gvec_vvv, MO_32, do_vmulwev_u) -TRANS(xvmulwev_h_bu, LASX, gvec_xxx, MO_8, do_vmulwev_u) -TRANS(xvmulwev_w_hu, LASX, gvec_xxx, MO_16, do_vmulwev_u) -TRANS(xvmulwev_d_wu, LASX, gvec_xxx, MO_32, do_vmulwev_u) - -static void gen_vmulwod_u(unsigned vece, TCGv_vec t, TCGv_vec a, TCGv_vec b) -{ - TCGv_vec t1, t2; - int halfbits = 4 << vece; - - t1 = tcg_temp_new_vec_matching(a); - t2 = tcg_temp_new_vec_matching(b); - tcg_gen_shri_vec(vece, t1, a, halfbits); - tcg_gen_shri_vec(vece, t2, b, halfbits); - tcg_gen_mul_vec(vece, t, t1, t2); -} - -static void gen_vmulwod_w_hu(TCGv_i32 t, TCGv_i32 a, TCGv_i32 b) -{ - TCGv_i32 t1, t2; - - t1 = tcg_temp_new_i32(); - t2 = tcg_temp_new_i32(); - tcg_gen_shri_i32(t1, a, 16); - tcg_gen_shri_i32(t2, b, 16); - tcg_gen_mul_i32(t, t1, t2); -} - -static void gen_vmulwod_d_wu(TCGv_i64 t, TCGv_i64 a, TCGv_i64 b) -{ - TCGv_i64 t1, t2; - - t1 = tcg_temp_new_i64(); - t2 = tcg_temp_new_i64(); - tcg_gen_shri_i64(t1, a, 32); - tcg_gen_shri_i64(t2, b, 32); - tcg_gen_mul_i64(t, t1, t2); -} - -static void do_vmulwod_u(unsigned vece, uint32_t vd_ofs, uint32_t vj_ofs, - uint32_t vk_ofs, uint32_t oprsz, uint32_t maxsz) -{ - static const TCGOpcode vecop_list[] = { - INDEX_op_shri_vec, INDEX_op_mul_vec, 0 - }; - static const GVecGen3 op[3] = { - { - .fniv = gen_vmulwod_u, - .fno = gen_helper_vmulwod_h_bu, - .opt_opc = vecop_list, - .vece = MO_16 - }, - { - .fni4 = gen_vmulwod_w_hu, - .fniv = gen_vmulwod_u, - .fno = gen_helper_vmulwod_w_hu, - .opt_opc = vecop_list, - .vece = MO_32 - }, - { - .fni8 = gen_vmulwod_d_wu, - .fniv = gen_vmulwod_u, - .fno = gen_helper_vmulwod_d_wu, - .opt_opc = vecop_list, - .vece = MO_64 - }, - }; - - tcg_gen_gvec_3(vd_ofs, vj_ofs, vk_ofs, oprsz, maxsz, &op[vece]); -} - -TRANS(vmulwod_h_bu, LSX, gvec_vvv, MO_8, do_vmulwod_u) -TRANS(vmulwod_w_hu, LSX, gvec_vvv, MO_16, do_vmulwod_u) -TRANS(vmulwod_d_wu, LSX, gvec_vvv, MO_32, do_vmulwod_u) -TRANS(xvmulwod_h_bu, LASX, gvec_xxx, MO_8, do_vmulwod_u) -TRANS(xvmulwod_w_hu, LASX, gvec_xxx, MO_16, do_vmulwod_u) -TRANS(xvmulwod_d_wu, LASX, gvec_xxx, MO_32, do_vmulwod_u) - -static void gen_vmulwev_u_s(unsigned vece, TCGv_vec t, TCGv_vec a, TCGv_vec b) -{ - TCGv_vec t1, t2, mask; - int halfbits = 4 << vece; - - t1 = tcg_temp_new_vec_matching(a); - t2 = tcg_temp_new_vec_matching(b); - mask = tcg_constant_vec_matching(t, vece, MAKE_64BIT_MASK(0, 4 << vece)); - tcg_gen_and_vec(vece, t1, a, mask); - tcg_gen_shli_vec(vece, t2, b, halfbits); - tcg_gen_sari_vec(vece, t2, t2, halfbits); - tcg_gen_mul_vec(vece, t, t1, t2); -} - -static void gen_vmulwev_w_hu_h(TCGv_i32 t, TCGv_i32 a, TCGv_i32 b) -{ - TCGv_i32 t1, t2; - - t1 = tcg_temp_new_i32(); - t2 = tcg_temp_new_i32(); - tcg_gen_ext16u_i32(t1, a); - tcg_gen_ext16s_i32(t2, b); - tcg_gen_mul_i32(t, t1, t2); -} - -static void gen_vmulwev_d_wu_w(TCGv_i64 t, TCGv_i64 a, TCGv_i64 b) -{ - TCGv_i64 t1, t2; - - t1 = tcg_temp_new_i64(); - t2 = tcg_temp_new_i64(); - tcg_gen_ext32u_i64(t1, a); - tcg_gen_ext32s_i64(t2, b); - tcg_gen_mul_i64(t, t1, t2); -} - -static void do_vmulwev_u_s(unsigned vece, uint32_t vd_ofs, uint32_t vj_ofs, - uint32_t vk_ofs, uint32_t oprsz, uint32_t maxsz) -{ - static const TCGOpcode vecop_list[] = { - INDEX_op_shli_vec, INDEX_op_sari_vec, INDEX_op_mul_vec, 0 - }; - static const GVecGen3 op[3] = { - { - .fniv = gen_vmulwev_u_s, - .fno = gen_helper_vmulwev_h_bu_b, - .opt_opc = vecop_list, - .vece = MO_16 - }, - { - .fni4 = gen_vmulwev_w_hu_h, - .fniv = gen_vmulwev_u_s, - .fno = gen_helper_vmulwev_w_hu_h, - .opt_opc = vecop_list, - .vece = MO_32 - }, - { - .fni8 = gen_vmulwev_d_wu_w, - .fniv = gen_vmulwev_u_s, - .fno = gen_helper_vmulwev_d_wu_w, - .opt_opc = vecop_list, - .vece = MO_64 - }, - }; - - tcg_gen_gvec_3(vd_ofs, vj_ofs, vk_ofs, oprsz, maxsz, &op[vece]); -} - -TRANS(vmulwev_h_bu_b, LSX, gvec_vvv, MO_8, do_vmulwev_u_s) -TRANS(vmulwev_w_hu_h, LSX, gvec_vvv, MO_16, do_vmulwev_u_s) -TRANS(vmulwev_d_wu_w, LSX, gvec_vvv, MO_32, do_vmulwev_u_s) -TRANS(xvmulwev_h_bu_b, LASX, gvec_xxx, MO_8, do_vmulwev_u_s) -TRANS(xvmulwev_w_hu_h, LASX, gvec_xxx, MO_16, do_vmulwev_u_s) -TRANS(xvmulwev_d_wu_w, LASX, gvec_xxx, MO_32, do_vmulwev_u_s) - -static void gen_vmulwod_u_s(unsigned vece, TCGv_vec t, TCGv_vec a, TCGv_vec b) -{ - TCGv_vec t1, t2; - int halfbits = 4 << vece; - - t1 = tcg_temp_new_vec_matching(a); - t2 = tcg_temp_new_vec_matching(b); - tcg_gen_shri_vec(vece, t1, a, halfbits); - tcg_gen_sari_vec(vece, t2, b, halfbits); - tcg_gen_mul_vec(vece, t, t1, t2); -} - -static void gen_vmulwod_w_hu_h(TCGv_i32 t, TCGv_i32 a, TCGv_i32 b) -{ - TCGv_i32 t1, t2; - - t1 = tcg_temp_new_i32(); - t2 = tcg_temp_new_i32(); - tcg_gen_shri_i32(t1, a, 16); - tcg_gen_sari_i32(t2, b, 16); - tcg_gen_mul_i32(t, t1, t2); -} -static void gen_vmulwod_d_wu_w(TCGv_i64 t, TCGv_i64 a, TCGv_i64 b) -{ - TCGv_i64 t1, t2; - - t1 = tcg_temp_new_i64(); - t2 = tcg_temp_new_i64(); - tcg_gen_shri_i64(t1, a, 32); - tcg_gen_sari_i64(t2, b, 32); - tcg_gen_mul_i64(t, t1, t2); -} - -static void do_vmulwod_u_s(unsigned vece, uint32_t vd_ofs, uint32_t vj_ofs, - uint32_t vk_ofs, uint32_t oprsz, uint32_t maxsz) -{ - static const TCGOpcode vecop_list[] = { - INDEX_op_shri_vec, INDEX_op_sari_vec, INDEX_op_mul_vec, 0 - }; - static const GVecGen3 op[3] = { - { - .fniv = gen_vmulwod_u_s, - .fno = gen_helper_vmulwod_h_bu_b, - .opt_opc = vecop_list, - .vece = MO_16 - }, - { - .fni4 = gen_vmulwod_w_hu_h, - .fniv = gen_vmulwod_u_s, - .fno = gen_helper_vmulwod_w_hu_h, - .opt_opc = vecop_list, - .vece = MO_32 - }, - { - .fni8 = gen_vmulwod_d_wu_w, - .fniv = gen_vmulwod_u_s, - .fno = gen_helper_vmulwod_d_wu_w, - .opt_opc = vecop_list, - .vece = MO_64 - }, - }; - - tcg_gen_gvec_3(vd_ofs, vj_ofs, vk_ofs, oprsz, maxsz, &op[vece]); -} - -TRANS(vmulwod_h_bu_b, LSX, gvec_vvv, MO_8, do_vmulwod_u_s) -TRANS(vmulwod_w_hu_h, LSX, gvec_vvv, MO_16, do_vmulwod_u_s) -TRANS(vmulwod_d_wu_w, LSX, gvec_vvv, MO_32, do_vmulwod_u_s) -TRANS(xvmulwod_h_bu_b, LASX, gvec_xxx, MO_8, do_vmulwod_u_s) -TRANS(xvmulwod_w_hu_h, LASX, gvec_xxx, MO_16, do_vmulwod_u_s) -TRANS(xvmulwod_d_wu_w, LASX, gvec_xxx, MO_32, do_vmulwod_u_s) - -static void gen_vmadd(unsigned vece, TCGv_vec t, TCGv_vec a, TCGv_vec b) -{ - TCGv_vec t1; - - t1 = tcg_temp_new_vec_matching(t); - tcg_gen_mul_vec(vece, t1, a, b); - tcg_gen_add_vec(vece, t, t, t1); -} - -static void gen_vmadd_w(TCGv_i32 t, TCGv_i32 a, TCGv_i32 b) -{ - TCGv_i32 t1; - - t1 = tcg_temp_new_i32(); - tcg_gen_mul_i32(t1, a, b); - tcg_gen_add_i32(t, t, t1); -} - -static void gen_vmadd_d(TCGv_i64 t, TCGv_i64 a, TCGv_i64 b) -{ - TCGv_i64 t1; - - t1 = tcg_temp_new_i64(); - tcg_gen_mul_i64(t1, a, b); - tcg_gen_add_i64(t, t, t1); -} - -static void do_vmadd(unsigned vece, uint32_t vd_ofs, uint32_t vj_ofs, - uint32_t vk_ofs, uint32_t oprsz, uint32_t maxsz) -{ - static const TCGOpcode vecop_list[] = { - INDEX_op_mul_vec, INDEX_op_add_vec, 0 - }; - static const GVecGen3 op[4] = { - { - .fniv = gen_vmadd, - .fno = gen_helper_vmadd_b, - .load_dest = true, - .opt_opc = vecop_list, - .vece = MO_8 - }, - { - .fniv = gen_vmadd, - .fno = gen_helper_vmadd_h, - .load_dest = true, - .opt_opc = vecop_list, - .vece = MO_16 - }, - { - .fni4 = gen_vmadd_w, - .fniv = gen_vmadd, - .fno = gen_helper_vmadd_w, - .load_dest = true, - .opt_opc = vecop_list, - .vece = MO_32 - }, - { - .fni8 = gen_vmadd_d, - .fniv = gen_vmadd, - .fno = gen_helper_vmadd_d, - .load_dest = true, - .opt_opc = vecop_list, - .vece = MO_64 - }, - }; - - tcg_gen_gvec_3(vd_ofs, vj_ofs, vk_ofs, oprsz, maxsz, &op[vece]); -} - -TRANS(vmadd_b, LSX, gvec_vvv, MO_8, do_vmadd) -TRANS(vmadd_h, LSX, gvec_vvv, MO_16, do_vmadd) -TRANS(vmadd_w, LSX, gvec_vvv, MO_32, do_vmadd) -TRANS(vmadd_d, LSX, gvec_vvv, MO_64, do_vmadd) -TRANS(xvmadd_b, LASX, gvec_xxx, MO_8, do_vmadd) -TRANS(xvmadd_h, LASX, gvec_xxx, MO_16, do_vmadd) -TRANS(xvmadd_w, LASX, gvec_xxx, MO_32, do_vmadd) -TRANS(xvmadd_d, LASX, gvec_xxx, MO_64, do_vmadd) - -static void gen_vmsub(unsigned vece, TCGv_vec t, TCGv_vec a, TCGv_vec b) -{ - TCGv_vec t1; - - t1 = tcg_temp_new_vec_matching(t); - tcg_gen_mul_vec(vece, t1, a, b); - tcg_gen_sub_vec(vece, t, t, t1); -} - -static void gen_vmsub_w(TCGv_i32 t, TCGv_i32 a, TCGv_i32 b) -{ - TCGv_i32 t1; - - t1 = tcg_temp_new_i32(); - tcg_gen_mul_i32(t1, a, b); - tcg_gen_sub_i32(t, t, t1); -} - -static void gen_vmsub_d(TCGv_i64 t, TCGv_i64 a, TCGv_i64 b) -{ - TCGv_i64 t1; - - t1 = tcg_temp_new_i64(); - tcg_gen_mul_i64(t1, a, b); - tcg_gen_sub_i64(t, t, t1); -} - -static void do_vmsub(unsigned vece, uint32_t vd_ofs, uint32_t vj_ofs, - uint32_t vk_ofs, uint32_t oprsz, uint32_t maxsz) -{ - static const TCGOpcode vecop_list[] = { - INDEX_op_mul_vec, INDEX_op_sub_vec, 0 - }; - static const GVecGen3 op[4] = { - { - .fniv = gen_vmsub, - .fno = gen_helper_vmsub_b, - .load_dest = true, - .opt_opc = vecop_list, - .vece = MO_8 - }, - { - .fniv = gen_vmsub, - .fno = gen_helper_vmsub_h, - .load_dest = true, - .opt_opc = vecop_list, - .vece = MO_16 - }, - { - .fni4 = gen_vmsub_w, - .fniv = gen_vmsub, - .fno = gen_helper_vmsub_w, - .load_dest = true, - .opt_opc = vecop_list, - .vece = MO_32 - }, - { - .fni8 = gen_vmsub_d, - .fniv = gen_vmsub, - .fno = gen_helper_vmsub_d, - .load_dest = true, - .opt_opc = vecop_list, - .vece = MO_64 - }, - }; - - tcg_gen_gvec_3(vd_ofs, vj_ofs, vk_ofs, oprsz, maxsz, &op[vece]); -} - -TRANS(vmsub_b, LSX, gvec_vvv, MO_8, do_vmsub) -TRANS(vmsub_h, LSX, gvec_vvv, MO_16, do_vmsub) -TRANS(vmsub_w, LSX, gvec_vvv, MO_32, do_vmsub) -TRANS(vmsub_d, LSX, gvec_vvv, MO_64, do_vmsub) -TRANS(xvmsub_b, LASX, gvec_xxx, MO_8, do_vmsub) -TRANS(xvmsub_h, LASX, gvec_xxx, MO_16, do_vmsub) -TRANS(xvmsub_w, LASX, gvec_xxx, MO_32, do_vmsub) -TRANS(xvmsub_d, LASX, gvec_xxx, MO_64, do_vmsub) - -static void gen_vmaddwev_s(unsigned vece, TCGv_vec t, TCGv_vec a, TCGv_vec b) -{ - TCGv_vec t1, t2, t3; - int halfbits = 4 << vece; - - t1 = tcg_temp_new_vec_matching(a); - t2 = tcg_temp_new_vec_matching(b); - t3 = tcg_temp_new_vec_matching(t); - tcg_gen_shli_vec(vece, t1, a, halfbits); - tcg_gen_sari_vec(vece, t1, t1, halfbits); - tcg_gen_shli_vec(vece, t2, b, halfbits); - tcg_gen_sari_vec(vece, t2, t2, halfbits); - tcg_gen_mul_vec(vece, t3, t1, t2); - tcg_gen_add_vec(vece, t, t, t3); -} - -static void gen_vmaddwev_w_h(TCGv_i32 t, TCGv_i32 a, TCGv_i32 b) -{ - TCGv_i32 t1; - - t1 = tcg_temp_new_i32(); - gen_vmulwev_w_h(t1, a, b); - tcg_gen_add_i32(t, t, t1); -} - -static void gen_vmaddwev_d_w(TCGv_i64 t, TCGv_i64 a, TCGv_i64 b) -{ - TCGv_i64 t1; - - t1 = tcg_temp_new_i64(); - gen_vmulwev_d_w(t1, a, b); - tcg_gen_add_i64(t, t, t1); -} - -static void do_vmaddwev_s(unsigned vece, uint32_t vd_ofs, uint32_t vj_ofs, - uint32_t vk_ofs, uint32_t oprsz, uint32_t maxsz) -{ - static const TCGOpcode vecop_list[] = { - INDEX_op_shli_vec, INDEX_op_sari_vec, - INDEX_op_mul_vec, INDEX_op_add_vec, 0 - }; - static const GVecGen3 op[3] = { - { - .fniv = gen_vmaddwev_s, - .fno = gen_helper_vmaddwev_h_b, - .load_dest = true, - .opt_opc = vecop_list, - .vece = MO_16 - }, - { - .fni4 = gen_vmaddwev_w_h, - .fniv = gen_vmaddwev_s, - .fno = gen_helper_vmaddwev_w_h, - .load_dest = true, - .opt_opc = vecop_list, - .vece = MO_32 - }, - { - .fni8 = gen_vmaddwev_d_w, - .fniv = gen_vmaddwev_s, - .fno = gen_helper_vmaddwev_d_w, - .load_dest = true, - .opt_opc = vecop_list, - .vece = MO_64 - }, - }; - - tcg_gen_gvec_3(vd_ofs, vj_ofs, vk_ofs, oprsz, maxsz, &op[vece]); -} - -TRANS(vmaddwev_h_b, LSX, gvec_vvv, MO_8, do_vmaddwev_s) -TRANS(vmaddwev_w_h, LSX, gvec_vvv, MO_16, do_vmaddwev_s) -TRANS(vmaddwev_d_w, LSX, gvec_vvv, MO_32, do_vmaddwev_s) -TRANS(xvmaddwev_h_b, LASX, gvec_xxx, MO_8, do_vmaddwev_s) -TRANS(xvmaddwev_w_h, LASX, gvec_xxx, MO_16, do_vmaddwev_s) -TRANS(xvmaddwev_d_w, LASX, gvec_xxx, MO_32, do_vmaddwev_s) - -static bool gen_vmadd_q_vl(DisasContext * ctx, - arg_vvv *a, uint32_t oprsz, int idx1, int idx2, - void (*func)(TCGv_i64, TCGv_i64, - TCGv_i64, TCGv_i64)) -{ - TCGv_i64 rh, rl, arg1, arg2, th, tl; - int i; - - if (!check_vec(ctx, oprsz)) { - return true; - } - - rh = tcg_temp_new_i64(); - rl = tcg_temp_new_i64(); - arg1 = tcg_temp_new_i64(); - arg2 = tcg_temp_new_i64(); - th = tcg_temp_new_i64(); - tl = tcg_temp_new_i64(); - - for (i = 0; i < oprsz / 16; i++) { - get_vreg64(arg1, a->vj, 2 * i + idx1); - get_vreg64(arg2, a->vk, 2 * i + idx2); - get_vreg64(rh, a->vd, 2 * i + 1); - get_vreg64(rl, a->vd, 2 * i); - - func(tl, th, arg1, arg2); - tcg_gen_add2_i64(rl, rh, rl, rh, tl, th); - - set_vreg64(rh, a->vd, 2 * i + 1); - set_vreg64(rl, a->vd, 2 * i); - } - - return true; -} - -static bool gen_vmadd_q(DisasContext *ctx, arg_vvv *a, int idx1, int idx2, - void (*func)(TCGv_i64, TCGv_i64, TCGv_i64, TCGv_i64)) -{ - return gen_vmadd_q_vl(ctx, a, 16, idx1, idx2, func); -} - -static bool gen_xvmadd_q(DisasContext *ctx, arg_vvv *a, int idx1, int idx2, - void (*func)(TCGv_i64, TCGv_i64, TCGv_i64, TCGv_i64)) -{ - return gen_vmadd_q_vl(ctx, a, 32, idx1, idx2, func); -} - -TRANS(vmaddwev_q_d, LSX, gen_vmadd_q, 0, 0, tcg_gen_muls2_i64) -TRANS(vmaddwod_q_d, LSX, gen_vmadd_q, 1, 1, tcg_gen_muls2_i64) -TRANS(vmaddwev_q_du, LSX, gen_vmadd_q, 0, 0, tcg_gen_mulu2_i64) -TRANS(vmaddwod_q_du, LSX, gen_vmadd_q, 1, 1, tcg_gen_mulu2_i64) -TRANS(vmaddwev_q_du_d, LSX, gen_vmadd_q, 0, 0, tcg_gen_mulus2_i64) -TRANS(vmaddwod_q_du_d, LSX, gen_vmadd_q, 1, 1, tcg_gen_mulus2_i64) -TRANS(xvmaddwev_q_d, LASX, gen_xvmadd_q, 0, 0, tcg_gen_muls2_i64) -TRANS(xvmaddwod_q_d, LASX, gen_xvmadd_q, 1, 1, tcg_gen_muls2_i64) -TRANS(xvmaddwev_q_du, LASX, gen_xvmadd_q, 0, 0, tcg_gen_mulu2_i64) -TRANS(xvmaddwod_q_du, LASX, gen_xvmadd_q, 1, 1, tcg_gen_mulu2_i64) -TRANS(xvmaddwev_q_du_d, LASX, gen_xvmadd_q, 0, 0, tcg_gen_mulus2_i64) -TRANS(xvmaddwod_q_du_d, LASX, gen_xvmadd_q, 1, 1, tcg_gen_mulus2_i64) - -static void gen_vmaddwod_s(unsigned vece, TCGv_vec t, TCGv_vec a, TCGv_vec b) -{ - TCGv_vec t1, t2, t3; - int halfbits = 4 << vece; - - t1 = tcg_temp_new_vec_matching(a); - t2 = tcg_temp_new_vec_matching(b); - t3 = tcg_temp_new_vec_matching(t); - tcg_gen_sari_vec(vece, t1, a, halfbits); - tcg_gen_sari_vec(vece, t2, b, halfbits); - tcg_gen_mul_vec(vece, t3, t1, t2); - tcg_gen_add_vec(vece, t, t, t3); -} - -static void gen_vmaddwod_w_h(TCGv_i32 t, TCGv_i32 a, TCGv_i32 b) -{ - TCGv_i32 t1; - - t1 = tcg_temp_new_i32(); - gen_vmulwod_w_h(t1, a, b); - tcg_gen_add_i32(t, t, t1); -} - -static void gen_vmaddwod_d_w(TCGv_i64 t, TCGv_i64 a, TCGv_i64 b) -{ - TCGv_i64 t1; - - t1 = tcg_temp_new_i64(); - gen_vmulwod_d_w(t1, a, b); - tcg_gen_add_i64(t, t, t1); -} - -static void do_vmaddwod_s(unsigned vece, uint32_t vd_ofs, uint32_t vj_ofs, - uint32_t vk_ofs, uint32_t oprsz, uint32_t maxsz) -{ - static const TCGOpcode vecop_list[] = { - INDEX_op_sari_vec, INDEX_op_mul_vec, INDEX_op_add_vec, 0 - }; - static const GVecGen3 op[3] = { - { - .fniv = gen_vmaddwod_s, - .fno = gen_helper_vmaddwod_h_b, - .load_dest = true, - .opt_opc = vecop_list, - .vece = MO_16 - }, - { - .fni4 = gen_vmaddwod_w_h, - .fniv = gen_vmaddwod_s, - .fno = gen_helper_vmaddwod_w_h, - .load_dest = true, - .opt_opc = vecop_list, - .vece = MO_32 - }, - { - .fni8 = gen_vmaddwod_d_w, - .fniv = gen_vmaddwod_s, - .fno = gen_helper_vmaddwod_d_w, - .load_dest = true, - .opt_opc = vecop_list, - .vece = MO_64 - }, - }; - - tcg_gen_gvec_3(vd_ofs, vj_ofs, vk_ofs, oprsz, maxsz, &op[vece]); -} - -TRANS(vmaddwod_h_b, LSX, gvec_vvv, MO_8, do_vmaddwod_s) -TRANS(vmaddwod_w_h, LSX, gvec_vvv, MO_16, do_vmaddwod_s) -TRANS(vmaddwod_d_w, LSX, gvec_vvv, MO_32, do_vmaddwod_s) -TRANS(xvmaddwod_h_b, LASX, gvec_xxx, MO_8, do_vmaddwod_s) -TRANS(xvmaddwod_w_h, LASX, gvec_xxx, MO_16, do_vmaddwod_s) -TRANS(xvmaddwod_d_w, LASX, gvec_xxx, MO_32, do_vmaddwod_s) - -static void gen_vmaddwev_u(unsigned vece, TCGv_vec t, TCGv_vec a, TCGv_vec b) -{ - TCGv_vec t1, t2, mask; - - t1 = tcg_temp_new_vec_matching(t); - t2 = tcg_temp_new_vec_matching(b); - mask = tcg_constant_vec_matching(t, vece, MAKE_64BIT_MASK(0, 4 << vece)); - tcg_gen_and_vec(vece, t1, a, mask); - tcg_gen_and_vec(vece, t2, b, mask); - tcg_gen_mul_vec(vece, t1, t1, t2); - tcg_gen_add_vec(vece, t, t, t1); -} - -static void gen_vmaddwev_w_hu(TCGv_i32 t, TCGv_i32 a, TCGv_i32 b) -{ - TCGv_i32 t1; - - t1 = tcg_temp_new_i32(); - gen_vmulwev_w_hu(t1, a, b); - tcg_gen_add_i32(t, t, t1); -} - -static void gen_vmaddwev_d_wu(TCGv_i64 t, TCGv_i64 a, TCGv_i64 b) -{ - TCGv_i64 t1; - - t1 = tcg_temp_new_i64(); - gen_vmulwev_d_wu(t1, a, b); - tcg_gen_add_i64(t, t, t1); -} - -static void do_vmaddwev_u(unsigned vece, uint32_t vd_ofs, uint32_t vj_ofs, - uint32_t vk_ofs, uint32_t oprsz, uint32_t maxsz) -{ - static const TCGOpcode vecop_list[] = { - INDEX_op_mul_vec, INDEX_op_add_vec, 0 - }; - static const GVecGen3 op[3] = { - { - .fniv = gen_vmaddwev_u, - .fno = gen_helper_vmaddwev_h_bu, - .load_dest = true, - .opt_opc = vecop_list, - .vece = MO_16 - }, - { - .fni4 = gen_vmaddwev_w_hu, - .fniv = gen_vmaddwev_u, - .fno = gen_helper_vmaddwev_w_hu, - .load_dest = true, - .opt_opc = vecop_list, - .vece = MO_32 - }, - { - .fni8 = gen_vmaddwev_d_wu, - .fniv = gen_vmaddwev_u, - .fno = gen_helper_vmaddwev_d_wu, - .load_dest = true, - .opt_opc = vecop_list, - .vece = MO_64 - }, - }; - - tcg_gen_gvec_3(vd_ofs, vj_ofs, vk_ofs, oprsz, maxsz, &op[vece]); -} - -TRANS(vmaddwev_h_bu, LSX, gvec_vvv, MO_8, do_vmaddwev_u) -TRANS(vmaddwev_w_hu, LSX, gvec_vvv, MO_16, do_vmaddwev_u) -TRANS(vmaddwev_d_wu, LSX, gvec_vvv, MO_32, do_vmaddwev_u) -TRANS(xvmaddwev_h_bu, LASX, gvec_xxx, MO_8, do_vmaddwev_u) -TRANS(xvmaddwev_w_hu, LASX, gvec_xxx, MO_16, do_vmaddwev_u) -TRANS(xvmaddwev_d_wu, LASX, gvec_xxx, MO_32, do_vmaddwev_u) - -static void gen_vmaddwod_u(unsigned vece, TCGv_vec t, TCGv_vec a, TCGv_vec b) -{ - TCGv_vec t1, t2, t3; - int halfbits = 4 << vece; - - t1 = tcg_temp_new_vec_matching(a); - t2 = tcg_temp_new_vec_matching(b); - t3 = tcg_temp_new_vec_matching(t); - tcg_gen_shri_vec(vece, t1, a, halfbits); - tcg_gen_shri_vec(vece, t2, b, halfbits); - tcg_gen_mul_vec(vece, t3, t1, t2); - tcg_gen_add_vec(vece, t, t, t3); -} - -static void gen_vmaddwod_w_hu(TCGv_i32 t, TCGv_i32 a, TCGv_i32 b) -{ - TCGv_i32 t1; - - t1 = tcg_temp_new_i32(); - gen_vmulwod_w_hu(t1, a, b); - tcg_gen_add_i32(t, t, t1); -} - -static void gen_vmaddwod_d_wu(TCGv_i64 t, TCGv_i64 a, TCGv_i64 b) -{ - TCGv_i64 t1; - - t1 = tcg_temp_new_i64(); - gen_vmulwod_d_wu(t1, a, b); - tcg_gen_add_i64(t, t, t1); -} - -static void do_vmaddwod_u(unsigned vece, uint32_t vd_ofs, uint32_t vj_ofs, - uint32_t vk_ofs, uint32_t oprsz, uint32_t maxsz) -{ - static const TCGOpcode vecop_list[] = { - INDEX_op_shri_vec, INDEX_op_mul_vec, INDEX_op_add_vec, 0 - }; - static const GVecGen3 op[3] = { - { - .fniv = gen_vmaddwod_u, - .fno = gen_helper_vmaddwod_h_bu, - .load_dest = true, - .opt_opc = vecop_list, - .vece = MO_16 - }, - { - .fni4 = gen_vmaddwod_w_hu, - .fniv = gen_vmaddwod_u, - .fno = gen_helper_vmaddwod_w_hu, - .load_dest = true, - .opt_opc = vecop_list, - .vece = MO_32 - }, - { - .fni8 = gen_vmaddwod_d_wu, - .fniv = gen_vmaddwod_u, - .fno = gen_helper_vmaddwod_d_wu, - .load_dest = true, - .opt_opc = vecop_list, - .vece = MO_64 - }, - }; - - tcg_gen_gvec_3(vd_ofs, vj_ofs, vk_ofs, oprsz, maxsz, &op[vece]); -} - -TRANS(vmaddwod_h_bu, LSX, gvec_vvv, MO_8, do_vmaddwod_u) -TRANS(vmaddwod_w_hu, LSX, gvec_vvv, MO_16, do_vmaddwod_u) -TRANS(vmaddwod_d_wu, LSX, gvec_vvv, MO_32, do_vmaddwod_u) -TRANS(xvmaddwod_h_bu, LASX, gvec_xxx, MO_8, do_vmaddwod_u) -TRANS(xvmaddwod_w_hu, LASX, gvec_xxx, MO_16, do_vmaddwod_u) -TRANS(xvmaddwod_d_wu, LASX, gvec_xxx, MO_32, do_vmaddwod_u) - -static void gen_vmaddwev_u_s(unsigned vece, TCGv_vec t, TCGv_vec a, TCGv_vec b) -{ - TCGv_vec t1, t2, mask; - int halfbits = 4 << vece; - - t1 = tcg_temp_new_vec_matching(a); - t2 = tcg_temp_new_vec_matching(b); - mask = tcg_constant_vec_matching(t, vece, MAKE_64BIT_MASK(0, 4 << vece)); - tcg_gen_and_vec(vece, t1, a, mask); - tcg_gen_shli_vec(vece, t2, b, halfbits); - tcg_gen_sari_vec(vece, t2, t2, halfbits); - tcg_gen_mul_vec(vece, t1, t1, t2); - tcg_gen_add_vec(vece, t, t, t1); -} - -static void gen_vmaddwev_w_hu_h(TCGv_i32 t, TCGv_i32 a, TCGv_i32 b) -{ - TCGv_i32 t1; - - t1 = tcg_temp_new_i32(); - gen_vmulwev_w_hu_h(t1, a, b); - tcg_gen_add_i32(t, t, t1); -} - -static void gen_vmaddwev_d_wu_w(TCGv_i64 t, TCGv_i64 a, TCGv_i64 b) -{ - TCGv_i64 t1; - - t1 = tcg_temp_new_i64(); - gen_vmulwev_d_wu_w(t1, a, b); - tcg_gen_add_i64(t, t, t1); -} - -static void do_vmaddwev_u_s(unsigned vece, uint32_t vd_ofs, uint32_t vj_ofs, - uint32_t vk_ofs, uint32_t oprsz, uint32_t maxsz) -{ - static const TCGOpcode vecop_list[] = { - INDEX_op_shli_vec, INDEX_op_sari_vec, - INDEX_op_mul_vec, INDEX_op_add_vec, 0 - }; - static const GVecGen3 op[3] = { - { - .fniv = gen_vmaddwev_u_s, - .fno = gen_helper_vmaddwev_h_bu_b, - .load_dest = true, - .opt_opc = vecop_list, - .vece = MO_16 - }, - { - .fni4 = gen_vmaddwev_w_hu_h, - .fniv = gen_vmaddwev_u_s, - .fno = gen_helper_vmaddwev_w_hu_h, - .load_dest = true, - .opt_opc = vecop_list, - .vece = MO_32 - }, - { - .fni8 = gen_vmaddwev_d_wu_w, - .fniv = gen_vmaddwev_u_s, - .fno = gen_helper_vmaddwev_d_wu_w, - .load_dest = true, - .opt_opc = vecop_list, - .vece = MO_64 - }, - }; - - tcg_gen_gvec_3(vd_ofs, vj_ofs, vk_ofs, oprsz, maxsz, &op[vece]); -} - -TRANS(vmaddwev_h_bu_b, LSX, gvec_vvv, MO_8, do_vmaddwev_u_s) -TRANS(vmaddwev_w_hu_h, LSX, gvec_vvv, MO_16, do_vmaddwev_u_s) -TRANS(vmaddwev_d_wu_w, LSX, gvec_vvv, MO_32, do_vmaddwev_u_s) -TRANS(xvmaddwev_h_bu_b, LASX, gvec_xxx, MO_8, do_vmaddwev_u_s) -TRANS(xvmaddwev_w_hu_h, LASX, gvec_xxx, MO_16, do_vmaddwev_u_s) -TRANS(xvmaddwev_d_wu_w, LASX, gvec_xxx, MO_32, do_vmaddwev_u_s) - -static void gen_vmaddwod_u_s(unsigned vece, TCGv_vec t, TCGv_vec a, TCGv_vec b) -{ - TCGv_vec t1, t2, t3; - int halfbits = 4 << vece; - - t1 = tcg_temp_new_vec_matching(a); - t2 = tcg_temp_new_vec_matching(b); - t3 = tcg_temp_new_vec_matching(t); - tcg_gen_shri_vec(vece, t1, a, halfbits); - tcg_gen_sari_vec(vece, t2, b, halfbits); - tcg_gen_mul_vec(vece, t3, t1, t2); - tcg_gen_add_vec(vece, t, t, t3); -} - -static void gen_vmaddwod_w_hu_h(TCGv_i32 t, TCGv_i32 a, TCGv_i32 b) -{ - TCGv_i32 t1; - - t1 = tcg_temp_new_i32(); - gen_vmulwod_w_hu_h(t1, a, b); - tcg_gen_add_i32(t, t, t1); -} - -static void gen_vmaddwod_d_wu_w(TCGv_i64 t, TCGv_i64 a, TCGv_i64 b) -{ - TCGv_i64 t1; - - t1 = tcg_temp_new_i64(); - gen_vmulwod_d_wu_w(t1, a, b); - tcg_gen_add_i64(t, t, t1); -} - -static void do_vmaddwod_u_s(unsigned vece, uint32_t vd_ofs, uint32_t vj_ofs, - uint32_t vk_ofs, uint32_t oprsz, uint32_t maxsz) -{ - static const TCGOpcode vecop_list[] = { - INDEX_op_shri_vec, INDEX_op_sari_vec, - INDEX_op_mul_vec, INDEX_op_add_vec, 0 - }; - static const GVecGen3 op[3] = { - { - .fniv = gen_vmaddwod_u_s, - .fno = gen_helper_vmaddwod_h_bu_b, - .load_dest = true, - .opt_opc = vecop_list, - .vece = MO_16 - }, - { - .fni4 = gen_vmaddwod_w_hu_h, - .fniv = gen_vmaddwod_u_s, - .fno = gen_helper_vmaddwod_w_hu_h, - .load_dest = true, - .opt_opc = vecop_list, - .vece = MO_32 - }, - { - .fni8 = gen_vmaddwod_d_wu_w, - .fniv = gen_vmaddwod_u_s, - .fno = gen_helper_vmaddwod_d_wu_w, - .load_dest = true, - .opt_opc = vecop_list, - .vece = MO_64 - }, - }; - - tcg_gen_gvec_3(vd_ofs, vj_ofs, vk_ofs, oprsz, maxsz, &op[vece]); -} - -TRANS(vmaddwod_h_bu_b, LSX, gvec_vvv, MO_8, do_vmaddwod_u_s) -TRANS(vmaddwod_w_hu_h, LSX, gvec_vvv, MO_16, do_vmaddwod_u_s) -TRANS(vmaddwod_d_wu_w, LSX, gvec_vvv, MO_32, do_vmaddwod_u_s) -TRANS(xvmaddwod_h_bu_b, LASX, gvec_xxx, MO_8, do_vmaddwod_u_s) -TRANS(xvmaddwod_w_hu_h, LASX, gvec_xxx, MO_16, do_vmaddwod_u_s) -TRANS(xvmaddwod_d_wu_w, LASX, gvec_xxx, MO_32, do_vmaddwod_u_s) - -TRANS(vdiv_b, LSX, gen_vvv, gen_helper_vdiv_b) -TRANS(vdiv_h, LSX, gen_vvv, gen_helper_vdiv_h) -TRANS(vdiv_w, LSX, gen_vvv, gen_helper_vdiv_w) -TRANS(vdiv_d, LSX, gen_vvv, gen_helper_vdiv_d) -TRANS(vdiv_bu, LSX, gen_vvv, gen_helper_vdiv_bu) -TRANS(vdiv_hu, LSX, gen_vvv, gen_helper_vdiv_hu) -TRANS(vdiv_wu, LSX, gen_vvv, gen_helper_vdiv_wu) -TRANS(vdiv_du, LSX, gen_vvv, gen_helper_vdiv_du) -TRANS(vmod_b, LSX, gen_vvv, gen_helper_vmod_b) -TRANS(vmod_h, LSX, gen_vvv, gen_helper_vmod_h) -TRANS(vmod_w, LSX, gen_vvv, gen_helper_vmod_w) -TRANS(vmod_d, LSX, gen_vvv, gen_helper_vmod_d) -TRANS(vmod_bu, LSX, gen_vvv, gen_helper_vmod_bu) -TRANS(vmod_hu, LSX, gen_vvv, gen_helper_vmod_hu) -TRANS(vmod_wu, LSX, gen_vvv, gen_helper_vmod_wu) -TRANS(vmod_du, LSX, gen_vvv, gen_helper_vmod_du) -TRANS(xvdiv_b, LASX, gen_xxx, gen_helper_vdiv_b) -TRANS(xvdiv_h, LASX, gen_xxx, gen_helper_vdiv_h) -TRANS(xvdiv_w, LASX, gen_xxx, gen_helper_vdiv_w) -TRANS(xvdiv_d, LASX, gen_xxx, gen_helper_vdiv_d) -TRANS(xvdiv_bu, LASX, gen_xxx, gen_helper_vdiv_bu) -TRANS(xvdiv_hu, LASX, gen_xxx, gen_helper_vdiv_hu) -TRANS(xvdiv_wu, LASX, gen_xxx, gen_helper_vdiv_wu) -TRANS(xvdiv_du, LASX, gen_xxx, gen_helper_vdiv_du) -TRANS(xvmod_b, LASX, gen_xxx, gen_helper_vmod_b) -TRANS(xvmod_h, LASX, gen_xxx, gen_helper_vmod_h) -TRANS(xvmod_w, LASX, gen_xxx, gen_helper_vmod_w) -TRANS(xvmod_d, LASX, gen_xxx, gen_helper_vmod_d) -TRANS(xvmod_bu, LASX, gen_xxx, gen_helper_vmod_bu) -TRANS(xvmod_hu, LASX, gen_xxx, gen_helper_vmod_hu) -TRANS(xvmod_wu, LASX, gen_xxx, gen_helper_vmod_wu) -TRANS(xvmod_du, LASX, gen_xxx, gen_helper_vmod_du) - -static void gen_vsat_s(unsigned vece, TCGv_vec t, TCGv_vec a, TCGv_vec max) -{ - TCGv_vec min; - - min = tcg_temp_new_vec_matching(t); - tcg_gen_not_vec(vece, min, max); - tcg_gen_smax_vec(vece, t, a, min); - tcg_gen_smin_vec(vece, t, t, max); -} - -static void do_vsat_s(unsigned vece, uint32_t vd_ofs, uint32_t vj_ofs, - int64_t imm, uint32_t oprsz, uint32_t maxsz) -{ - static const TCGOpcode vecop_list[] = { - INDEX_op_smax_vec, INDEX_op_smin_vec, 0 - }; - static const GVecGen2s op[4] = { - { - .fniv = gen_vsat_s, - .fno = gen_helper_vsat_b, - .opt_opc = vecop_list, - .vece = MO_8 - }, - { - .fniv = gen_vsat_s, - .fno = gen_helper_vsat_h, - .opt_opc = vecop_list, - .vece = MO_16 - }, - { - .fniv = gen_vsat_s, - .fno = gen_helper_vsat_w, - .opt_opc = vecop_list, - .vece = MO_32 - }, - { - .fniv = gen_vsat_s, - .fno = gen_helper_vsat_d, - .opt_opc = vecop_list, - .vece = MO_64 - }, - }; - - tcg_gen_gvec_2s(vd_ofs, vj_ofs, oprsz, maxsz, - tcg_constant_i64((1ll<< imm) -1), &op[vece]); -} - -TRANS(vsat_b, LSX, gvec_vv_i, MO_8, do_vsat_s) -TRANS(vsat_h, LSX, gvec_vv_i, MO_16, do_vsat_s) -TRANS(vsat_w, LSX, gvec_vv_i, MO_32, do_vsat_s) -TRANS(vsat_d, LSX, gvec_vv_i, MO_64, do_vsat_s) -TRANS(xvsat_b, LASX, gvec_xx_i, MO_8, do_vsat_s) -TRANS(xvsat_h, LASX, gvec_xx_i, MO_16, do_vsat_s) -TRANS(xvsat_w, LASX, gvec_xx_i, MO_32, do_vsat_s) -TRANS(xvsat_d, LASX, gvec_xx_i, MO_64, do_vsat_s) - -static void gen_vsat_u(unsigned vece, TCGv_vec t, TCGv_vec a, TCGv_vec max) -{ - tcg_gen_umin_vec(vece, t, a, max); -} - -static void do_vsat_u(unsigned vece, uint32_t vd_ofs, uint32_t vj_ofs, - int64_t imm, uint32_t oprsz, uint32_t maxsz) -{ - uint64_t max; - static const TCGOpcode vecop_list[] = { - INDEX_op_umin_vec, 0 - }; - static const GVecGen2s op[4] = { - { - .fniv = gen_vsat_u, - .fno = gen_helper_vsat_bu, - .opt_opc = vecop_list, - .vece = MO_8 - }, - { - .fniv = gen_vsat_u, - .fno = gen_helper_vsat_hu, - .opt_opc = vecop_list, - .vece = MO_16 - }, - { - .fniv = gen_vsat_u, - .fno = gen_helper_vsat_wu, - .opt_opc = vecop_list, - .vece = MO_32 - }, - { - .fniv = gen_vsat_u, - .fno = gen_helper_vsat_du, - .opt_opc = vecop_list, - .vece = MO_64 - }, - }; - - max = (imm == 0x3f) ? UINT64_MAX : (1ull << (imm + 1)) - 1; - tcg_gen_gvec_2s(vd_ofs, vj_ofs, oprsz, maxsz, - tcg_constant_i64(max), &op[vece]); -} - -TRANS(vsat_bu, LSX, gvec_vv_i, MO_8, do_vsat_u) -TRANS(vsat_hu, LSX, gvec_vv_i, MO_16, do_vsat_u) -TRANS(vsat_wu, LSX, gvec_vv_i, MO_32, do_vsat_u) -TRANS(vsat_du, LSX, gvec_vv_i, MO_64, do_vsat_u) -TRANS(xvsat_bu, LASX, gvec_xx_i, MO_8, do_vsat_u) -TRANS(xvsat_hu, LASX, gvec_xx_i, MO_16, do_vsat_u) -TRANS(xvsat_wu, LASX, gvec_xx_i, MO_32, do_vsat_u) -TRANS(xvsat_du, LASX, gvec_xx_i, MO_64, do_vsat_u) - -TRANS(vexth_h_b, LSX, gen_vv, gen_helper_vexth_h_b) -TRANS(vexth_w_h, LSX, gen_vv, gen_helper_vexth_w_h) -TRANS(vexth_d_w, LSX, gen_vv, gen_helper_vexth_d_w) -TRANS(vexth_q_d, LSX, gen_vv, gen_helper_vexth_q_d) -TRANS(vexth_hu_bu, LSX, gen_vv, gen_helper_vexth_hu_bu) -TRANS(vexth_wu_hu, LSX, gen_vv, gen_helper_vexth_wu_hu) -TRANS(vexth_du_wu, LSX, gen_vv, gen_helper_vexth_du_wu) -TRANS(vexth_qu_du, LSX, gen_vv, gen_helper_vexth_qu_du) -TRANS(xvexth_h_b, LASX, gen_xx, gen_helper_vexth_h_b) -TRANS(xvexth_w_h, LASX, gen_xx, gen_helper_vexth_w_h) -TRANS(xvexth_d_w, LASX, gen_xx, gen_helper_vexth_d_w) -TRANS(xvexth_q_d, LASX, gen_xx, gen_helper_vexth_q_d) -TRANS(xvexth_hu_bu, LASX, gen_xx, gen_helper_vexth_hu_bu) -TRANS(xvexth_wu_hu, LASX, gen_xx, gen_helper_vexth_wu_hu) -TRANS(xvexth_du_wu, LASX, gen_xx, gen_helper_vexth_du_wu) -TRANS(xvexth_qu_du, LASX, gen_xx, gen_helper_vexth_qu_du) - -TRANS(vext2xv_h_b, LASX, gen_xx, gen_helper_vext2xv_h_b) -TRANS(vext2xv_w_b, LASX, gen_xx, gen_helper_vext2xv_w_b) -TRANS(vext2xv_d_b, LASX, gen_xx, gen_helper_vext2xv_d_b) -TRANS(vext2xv_w_h, LASX, gen_xx, gen_helper_vext2xv_w_h) -TRANS(vext2xv_d_h, LASX, gen_xx, gen_helper_vext2xv_d_h) -TRANS(vext2xv_d_w, LASX, gen_xx, gen_helper_vext2xv_d_w) -TRANS(vext2xv_hu_bu, LASX, gen_xx, gen_helper_vext2xv_hu_bu) -TRANS(vext2xv_wu_bu, LASX, gen_xx, gen_helper_vext2xv_wu_bu) -TRANS(vext2xv_du_bu, LASX, gen_xx, gen_helper_vext2xv_du_bu) -TRANS(vext2xv_wu_hu, LASX, gen_xx, gen_helper_vext2xv_wu_hu) -TRANS(vext2xv_du_hu, LASX, gen_xx, gen_helper_vext2xv_du_hu) -TRANS(vext2xv_du_wu, LASX, gen_xx, gen_helper_vext2xv_du_wu) - -static void gen_vsigncov(unsigned vece, TCGv_vec t, TCGv_vec a, TCGv_vec b) -{ - TCGv_vec t1, zero; - - t1 = tcg_temp_new_vec_matching(t); - zero = tcg_constant_vec_matching(t, vece, 0); - - tcg_gen_neg_vec(vece, t1, b); - tcg_gen_cmpsel_vec(TCG_COND_LT, vece, t, a, zero, t1, b); - tcg_gen_cmpsel_vec(TCG_COND_EQ, vece, t, a, zero, zero, t); -} - -static void do_vsigncov(unsigned vece, uint32_t vd_ofs, uint32_t vj_ofs, - uint32_t vk_ofs, uint32_t oprsz, uint32_t maxsz) -{ - static const TCGOpcode vecop_list[] = { - INDEX_op_neg_vec, INDEX_op_cmpsel_vec, 0 - }; - static const GVecGen3 op[4] = { - { - .fniv = gen_vsigncov, - .fno = gen_helper_vsigncov_b, - .opt_opc = vecop_list, - .vece = MO_8 - }, - { - .fniv = gen_vsigncov, - .fno = gen_helper_vsigncov_h, - .opt_opc = vecop_list, - .vece = MO_16 - }, - { - .fniv = gen_vsigncov, - .fno = gen_helper_vsigncov_w, - .opt_opc = vecop_list, - .vece = MO_32 - }, - { - .fniv = gen_vsigncov, - .fno = gen_helper_vsigncov_d, - .opt_opc = vecop_list, - .vece = MO_64 - }, - }; - - tcg_gen_gvec_3(vd_ofs, vj_ofs, vk_ofs, oprsz, maxsz, &op[vece]); -} - -TRANS(vsigncov_b, LSX, gvec_vvv, MO_8, do_vsigncov) -TRANS(vsigncov_h, LSX, gvec_vvv, MO_16, do_vsigncov) -TRANS(vsigncov_w, LSX, gvec_vvv, MO_32, do_vsigncov) -TRANS(vsigncov_d, LSX, gvec_vvv, MO_64, do_vsigncov) -TRANS(xvsigncov_b, LASX, gvec_xxx, MO_8, do_vsigncov) -TRANS(xvsigncov_h, LASX, gvec_xxx, MO_16, do_vsigncov) -TRANS(xvsigncov_w, LASX, gvec_xxx, MO_32, do_vsigncov) -TRANS(xvsigncov_d, LASX, gvec_xxx, MO_64, do_vsigncov) - -TRANS(vmskltz_b, LSX, gen_vv, gen_helper_vmskltz_b) -TRANS(vmskltz_h, LSX, gen_vv, gen_helper_vmskltz_h) -TRANS(vmskltz_w, LSX, gen_vv, gen_helper_vmskltz_w) -TRANS(vmskltz_d, LSX, gen_vv, gen_helper_vmskltz_d) -TRANS(vmskgez_b, LSX, gen_vv, gen_helper_vmskgez_b) -TRANS(vmsknz_b, LSX, gen_vv, gen_helper_vmsknz_b) -TRANS(xvmskltz_b, LASX, gen_xx, gen_helper_vmskltz_b) -TRANS(xvmskltz_h, LASX, gen_xx, gen_helper_vmskltz_h) -TRANS(xvmskltz_w, LASX, gen_xx, gen_helper_vmskltz_w) -TRANS(xvmskltz_d, LASX, gen_xx, gen_helper_vmskltz_d) -TRANS(xvmskgez_b, LASX, gen_xx, gen_helper_vmskgez_b) -TRANS(xvmsknz_b, LASX, gen_xx, gen_helper_vmsknz_b) - -#define EXPAND_BYTE(bit) ((uint64_t)(bit ? 0xff : 0)) - -static uint64_t vldi_get_value(DisasContext *ctx, uint32_t imm) -{ - int mode; - uint64_t data, t; - - /* - * imm bit [11:8] is mode, mode value is 0-12. - * other values are invalid. - */ - mode = (imm >> 8) & 0xf; - t = imm & 0xff; - switch (mode) { - case 0: - /* data: {2{24'0, imm[7:0]}} */ - data = (t << 32) | t ; - break; - case 1: - /* data: {2{16'0, imm[7:0], 8'0}} */ - data = (t << 24) | (t << 8); - break; - case 2: - /* data: {2{8'0, imm[7:0], 16'0}} */ - data = (t << 48) | (t << 16); - break; - case 3: - /* data: {2{imm[7:0], 24'0}} */ - data = (t << 56) | (t << 24); - break; - case 4: - /* data: {4{8'0, imm[7:0]}} */ - data = (t << 48) | (t << 32) | (t << 16) | t; - break; - case 5: - /* data: {4{imm[7:0], 8'0}} */ - data = (t << 56) |(t << 40) | (t << 24) | (t << 8); - break; - case 6: - /* data: {2{16'0, imm[7:0], 8'1}} */ - data = (t << 40) | ((uint64_t)0xff << 32) | (t << 8) | 0xff; - break; - case 7: - /* data: {2{8'0, imm[7:0], 16'1}} */ - data = (t << 48) | ((uint64_t)0xffff << 32) | (t << 16) | 0xffff; - break; - case 8: - /* data: {8{imm[7:0]}} */ - data =(t << 56) | (t << 48) | (t << 40) | (t << 32) | - (t << 24) | (t << 16) | (t << 8) | t; - break; - case 9: - /* data: {{8{imm[7]}, ..., 8{imm[0]}}} */ - { - uint64_t b0,b1,b2,b3,b4,b5,b6,b7; - b0 = t& 0x1; - b1 = (t & 0x2) >> 1; - b2 = (t & 0x4) >> 2; - b3 = (t & 0x8) >> 3; - b4 = (t & 0x10) >> 4; - b5 = (t & 0x20) >> 5; - b6 = (t & 0x40) >> 6; - b7 = (t & 0x80) >> 7; - data = (EXPAND_BYTE(b7) << 56) | - (EXPAND_BYTE(b6) << 48) | - (EXPAND_BYTE(b5) << 40) | - (EXPAND_BYTE(b4) << 32) | - (EXPAND_BYTE(b3) << 24) | - (EXPAND_BYTE(b2) << 16) | - (EXPAND_BYTE(b1) << 8) | - EXPAND_BYTE(b0); - } - break; - case 10: - /* data: {2{imm[7], ~imm[6], {5{imm[6]}}, imm[5:0], 19'0}} */ - { - uint64_t b6, b7; - uint64_t t0, t1; - b6 = (imm & 0x40) >> 6; - b7 = (imm & 0x80) >> 7; - t0 = (imm & 0x3f); - t1 = (b7 << 6) | ((1-b6) << 5) | (uint64_t)(b6 ? 0x1f : 0); - data = (t1 << 57) | (t0 << 51) | (t1 << 25) | (t0 << 19); - } - break; - case 11: - /* data: {32'0, imm[7], ~{imm[6]}, 5{imm[6]}, imm[5:0], 19'0} */ - { - uint64_t b6,b7; - uint64_t t0, t1; - b6 = (imm & 0x40) >> 6; - b7 = (imm & 0x80) >> 7; - t0 = (imm & 0x3f); - t1 = (b7 << 6) | ((1-b6) << 5) | (b6 ? 0x1f : 0); - data = (t1 << 25) | (t0 << 19); - } - break; - case 12: - /* data: {imm[7], ~imm[6], 8{imm[6]}, imm[5:0], 48'0} */ - { - uint64_t b6,b7; - uint64_t t0, t1; - b6 = (imm & 0x40) >> 6; - b7 = (imm & 0x80) >> 7; - t0 = (imm & 0x3f); - t1 = (b7 << 9) | ((1-b6) << 8) | (b6 ? 0xff : 0); - data = (t1 << 54) | (t0 << 48); - } - break; - default: - generate_exception(ctx, EXCCODE_INE); - g_assert_not_reached(); - } - return data; -} - -static bool gen_vldi(DisasContext *ctx, arg_vldi *a, uint32_t oprsz) -{ - int sel, vece; - uint64_t value; - - if (!check_vec(ctx, oprsz)) { - return true; - } - - sel = (a->imm >> 12) & 0x1; - - if (sel) { - value = vldi_get_value(ctx, a->imm); - vece = MO_64; - } else { - value = ((int32_t)(a->imm << 22)) >> 22; - vece = (a->imm >> 10) & 0x3; - } - - tcg_gen_gvec_dup_i64(vece, vec_full_offset(a->vd), oprsz, ctx->vl/8, - tcg_constant_i64(value)); - return true; -} - -TRANS(vldi, LSX, gen_vldi, 16) -TRANS(xvldi, LASX, gen_vldi, 32) - -static bool gen_vandn_v(DisasContext *ctx, arg_vvv *a, uint32_t oprsz) -{ - uint32_t vd_ofs, vj_ofs, vk_ofs; - - if (!check_vec(ctx, oprsz)) { - return true; - } - - vd_ofs = vec_full_offset(a->vd); - vj_ofs = vec_full_offset(a->vj); - vk_ofs = vec_full_offset(a->vk); - - tcg_gen_gvec_andc(MO_64, vd_ofs, vk_ofs, vj_ofs, oprsz, ctx->vl / 8); - return true; -} - -static void gen_vnori(unsigned vece, TCGv_vec t, TCGv_vec a, int64_t imm) -{ - TCGv_vec t1; - - t1 = tcg_constant_vec_matching(t, vece, imm); - tcg_gen_nor_vec(vece, t, a, t1); -} - -static void gen_vnori_b(TCGv_i64 t, TCGv_i64 a, int64_t imm) -{ - tcg_gen_movi_i64(t, dup_const(MO_8, imm)); - tcg_gen_nor_i64(t, a, t); -} - -static void do_vnori_b(unsigned vece, uint32_t vd_ofs, uint32_t vj_ofs, - int64_t imm, uint32_t oprsz, uint32_t maxsz) -{ - static const TCGOpcode vecop_list[] = { - INDEX_op_nor_vec, 0 - }; - static const GVecGen2i op = { - .fni8 = gen_vnori_b, - .fniv = gen_vnori, - .fnoi = gen_helper_vnori_b, - .opt_opc = vecop_list, - .vece = MO_8 - }; - - tcg_gen_gvec_2i(vd_ofs, vj_ofs, oprsz, maxsz, imm, &op); -} - -TRANS(vand_v, LSX, gvec_vvv, MO_64, tcg_gen_gvec_and) -TRANS(vor_v, LSX, gvec_vvv, MO_64, tcg_gen_gvec_or) -TRANS(vxor_v, LSX, gvec_vvv, MO_64, tcg_gen_gvec_xor) -TRANS(vnor_v, LSX, gvec_vvv, MO_64, tcg_gen_gvec_nor) -TRANS(vandn_v, LSX, gen_vandn_v, 16) -TRANS(vorn_v, LSX, gvec_vvv, MO_64, tcg_gen_gvec_orc) -TRANS(vandi_b, LSX, gvec_vv_i, MO_8, tcg_gen_gvec_andi) -TRANS(vori_b, LSX, gvec_vv_i, MO_8, tcg_gen_gvec_ori) -TRANS(vxori_b, LSX, gvec_vv_i, MO_8, tcg_gen_gvec_xori) -TRANS(vnori_b, LSX, gvec_vv_i, MO_8, do_vnori_b) -TRANS(xvand_v, LASX, gvec_xxx, MO_64, tcg_gen_gvec_and) -TRANS(xvor_v, LASX, gvec_xxx, MO_64, tcg_gen_gvec_or) -TRANS(xvxor_v, LASX, gvec_xxx, MO_64, tcg_gen_gvec_xor) -TRANS(xvnor_v, LASX, gvec_xxx, MO_64, tcg_gen_gvec_nor) -TRANS(xvandn_v, LASX, gen_vandn_v, 32) -TRANS(xvorn_v, LASX, gvec_xxx, MO_64, tcg_gen_gvec_orc) -TRANS(xvandi_b, LASX, gvec_xx_i, MO_8, tcg_gen_gvec_andi) -TRANS(xvori_b, LASX, gvec_xx_i, MO_8, tcg_gen_gvec_ori) -TRANS(xvxori_b, LASX, gvec_xx_i, MO_8, tcg_gen_gvec_xori) -TRANS(xvnori_b, LASX, gvec_xx_i, MO_8, do_vnori_b) - -TRANS(vsll_b, LSX, gvec_vvv, MO_8, tcg_gen_gvec_shlv) -TRANS(vsll_h, LSX, gvec_vvv, MO_16, tcg_gen_gvec_shlv) -TRANS(vsll_w, LSX, gvec_vvv, MO_32, tcg_gen_gvec_shlv) -TRANS(vsll_d, LSX, gvec_vvv, MO_64, tcg_gen_gvec_shlv) -TRANS(vslli_b, LSX, gvec_vv_i, MO_8, tcg_gen_gvec_shli) -TRANS(vslli_h, LSX, gvec_vv_i, MO_16, tcg_gen_gvec_shli) -TRANS(vslli_w, LSX, gvec_vv_i, MO_32, tcg_gen_gvec_shli) -TRANS(vslli_d, LSX, gvec_vv_i, MO_64, tcg_gen_gvec_shli) -TRANS(xvsll_b, LASX, gvec_xxx, MO_8, tcg_gen_gvec_shlv) -TRANS(xvsll_h, LASX, gvec_xxx, MO_16, tcg_gen_gvec_shlv) -TRANS(xvsll_w, LASX, gvec_xxx, MO_32, tcg_gen_gvec_shlv) -TRANS(xvsll_d, LASX, gvec_xxx, MO_64, tcg_gen_gvec_shlv) -TRANS(xvslli_b, LASX, gvec_xx_i, MO_8, tcg_gen_gvec_shli) -TRANS(xvslli_h, LASX, gvec_xx_i, MO_16, tcg_gen_gvec_shli) -TRANS(xvslli_w, LASX, gvec_xx_i, MO_32, tcg_gen_gvec_shli) -TRANS(xvslli_d, LASX, gvec_xx_i, MO_64, tcg_gen_gvec_shli) - -TRANS(vsrl_b, LSX, gvec_vvv, MO_8, tcg_gen_gvec_shrv) -TRANS(vsrl_h, LSX, gvec_vvv, MO_16, tcg_gen_gvec_shrv) -TRANS(vsrl_w, LSX, gvec_vvv, MO_32, tcg_gen_gvec_shrv) -TRANS(vsrl_d, LSX, gvec_vvv, MO_64, tcg_gen_gvec_shrv) -TRANS(vsrli_b, LSX, gvec_vv_i, MO_8, tcg_gen_gvec_shri) -TRANS(vsrli_h, LSX, gvec_vv_i, MO_16, tcg_gen_gvec_shri) -TRANS(vsrli_w, LSX, gvec_vv_i, MO_32, tcg_gen_gvec_shri) -TRANS(vsrli_d, LSX, gvec_vv_i, MO_64, tcg_gen_gvec_shri) -TRANS(xvsrl_b, LASX, gvec_xxx, MO_8, tcg_gen_gvec_shrv) -TRANS(xvsrl_h, LASX, gvec_xxx, MO_16, tcg_gen_gvec_shrv) -TRANS(xvsrl_w, LASX, gvec_xxx, MO_32, tcg_gen_gvec_shrv) -TRANS(xvsrl_d, LASX, gvec_xxx, MO_64, tcg_gen_gvec_shrv) -TRANS(xvsrli_b, LASX, gvec_xx_i, MO_8, tcg_gen_gvec_shri) -TRANS(xvsrli_h, LASX, gvec_xx_i, MO_16, tcg_gen_gvec_shri) -TRANS(xvsrli_w, LASX, gvec_xx_i, MO_32, tcg_gen_gvec_shri) -TRANS(xvsrli_d, LASX, gvec_xx_i, MO_64, tcg_gen_gvec_shri) - -TRANS(vsra_b, LSX, gvec_vvv, MO_8, tcg_gen_gvec_sarv) -TRANS(vsra_h, LSX, gvec_vvv, MO_16, tcg_gen_gvec_sarv) -TRANS(vsra_w, LSX, gvec_vvv, MO_32, tcg_gen_gvec_sarv) -TRANS(vsra_d, LSX, gvec_vvv, MO_64, tcg_gen_gvec_sarv) -TRANS(vsrai_b, LSX, gvec_vv_i, MO_8, tcg_gen_gvec_sari) -TRANS(vsrai_h, LSX, gvec_vv_i, MO_16, tcg_gen_gvec_sari) -TRANS(vsrai_w, LSX, gvec_vv_i, MO_32, tcg_gen_gvec_sari) -TRANS(vsrai_d, LSX, gvec_vv_i, MO_64, tcg_gen_gvec_sari) -TRANS(xvsra_b, LASX, gvec_xxx, MO_8, tcg_gen_gvec_sarv) -TRANS(xvsra_h, LASX, gvec_xxx, MO_16, tcg_gen_gvec_sarv) -TRANS(xvsra_w, LASX, gvec_xxx, MO_32, tcg_gen_gvec_sarv) -TRANS(xvsra_d, LASX, gvec_xxx, MO_64, tcg_gen_gvec_sarv) -TRANS(xvsrai_b, LASX, gvec_xx_i, MO_8, tcg_gen_gvec_sari) -TRANS(xvsrai_h, LASX, gvec_xx_i, MO_16, tcg_gen_gvec_sari) -TRANS(xvsrai_w, LASX, gvec_xx_i, MO_32, tcg_gen_gvec_sari) -TRANS(xvsrai_d, LASX, gvec_xx_i, MO_64, tcg_gen_gvec_sari) - -TRANS(vrotr_b, LSX, gvec_vvv, MO_8, tcg_gen_gvec_rotrv) -TRANS(vrotr_h, LSX, gvec_vvv, MO_16, tcg_gen_gvec_rotrv) -TRANS(vrotr_w, LSX, gvec_vvv, MO_32, tcg_gen_gvec_rotrv) -TRANS(vrotr_d, LSX, gvec_vvv, MO_64, tcg_gen_gvec_rotrv) -TRANS(vrotri_b, LSX, gvec_vv_i, MO_8, tcg_gen_gvec_rotri) -TRANS(vrotri_h, LSX, gvec_vv_i, MO_16, tcg_gen_gvec_rotri) -TRANS(vrotri_w, LSX, gvec_vv_i, MO_32, tcg_gen_gvec_rotri) -TRANS(vrotri_d, LSX, gvec_vv_i, MO_64, tcg_gen_gvec_rotri) -TRANS(xvrotr_b, LASX, gvec_xxx, MO_8, tcg_gen_gvec_rotrv) -TRANS(xvrotr_h, LASX, gvec_xxx, MO_16, tcg_gen_gvec_rotrv) -TRANS(xvrotr_w, LASX, gvec_xxx, MO_32, tcg_gen_gvec_rotrv) -TRANS(xvrotr_d, LASX, gvec_xxx, MO_64, tcg_gen_gvec_rotrv) -TRANS(xvrotri_b, LASX, gvec_xx_i, MO_8, tcg_gen_gvec_rotri) -TRANS(xvrotri_h, LASX, gvec_xx_i, MO_16, tcg_gen_gvec_rotri) -TRANS(xvrotri_w, LASX, gvec_xx_i, MO_32, tcg_gen_gvec_rotri) -TRANS(xvrotri_d, LASX, gvec_xx_i, MO_64, tcg_gen_gvec_rotri) - -TRANS(vsllwil_h_b, LSX, gen_vv_i, gen_helper_vsllwil_h_b) -TRANS(vsllwil_w_h, LSX, gen_vv_i, gen_helper_vsllwil_w_h) -TRANS(vsllwil_d_w, LSX, gen_vv_i, gen_helper_vsllwil_d_w) -TRANS(vextl_q_d, LSX, gen_vv, gen_helper_vextl_q_d) -TRANS(vsllwil_hu_bu, LSX, gen_vv_i, gen_helper_vsllwil_hu_bu) -TRANS(vsllwil_wu_hu, LSX, gen_vv_i, gen_helper_vsllwil_wu_hu) -TRANS(vsllwil_du_wu, LSX, gen_vv_i, gen_helper_vsllwil_du_wu) -TRANS(vextl_qu_du, LSX, gen_vv, gen_helper_vextl_qu_du) -TRANS(xvsllwil_h_b, LASX, gen_xx_i, gen_helper_vsllwil_h_b) -TRANS(xvsllwil_w_h, LASX, gen_xx_i, gen_helper_vsllwil_w_h) -TRANS(xvsllwil_d_w, LASX, gen_xx_i, gen_helper_vsllwil_d_w) -TRANS(xvextl_q_d, LASX, gen_xx, gen_helper_vextl_q_d) -TRANS(xvsllwil_hu_bu, LASX, gen_xx_i, gen_helper_vsllwil_hu_bu) -TRANS(xvsllwil_wu_hu, LASX, gen_xx_i, gen_helper_vsllwil_wu_hu) -TRANS(xvsllwil_du_wu, LASX, gen_xx_i, gen_helper_vsllwil_du_wu) -TRANS(xvextl_qu_du, LASX, gen_xx, gen_helper_vextl_qu_du) - -TRANS(vsrlr_b, LSX, gen_vvv, gen_helper_vsrlr_b) -TRANS(vsrlr_h, LSX, gen_vvv, gen_helper_vsrlr_h) -TRANS(vsrlr_w, LSX, gen_vvv, gen_helper_vsrlr_w) -TRANS(vsrlr_d, LSX, gen_vvv, gen_helper_vsrlr_d) -TRANS(vsrlri_b, LSX, gen_vv_i, gen_helper_vsrlri_b) -TRANS(vsrlri_h, LSX, gen_vv_i, gen_helper_vsrlri_h) -TRANS(vsrlri_w, LSX, gen_vv_i, gen_helper_vsrlri_w) -TRANS(vsrlri_d, LSX, gen_vv_i, gen_helper_vsrlri_d) -TRANS(xvsrlr_b, LASX, gen_xxx, gen_helper_vsrlr_b) -TRANS(xvsrlr_h, LASX, gen_xxx, gen_helper_vsrlr_h) -TRANS(xvsrlr_w, LASX, gen_xxx, gen_helper_vsrlr_w) -TRANS(xvsrlr_d, LASX, gen_xxx, gen_helper_vsrlr_d) -TRANS(xvsrlri_b, LASX, gen_xx_i, gen_helper_vsrlri_b) -TRANS(xvsrlri_h, LASX, gen_xx_i, gen_helper_vsrlri_h) -TRANS(xvsrlri_w, LASX, gen_xx_i, gen_helper_vsrlri_w) -TRANS(xvsrlri_d, LASX, gen_xx_i, gen_helper_vsrlri_d) - -TRANS(vsrar_b, LSX, gen_vvv, gen_helper_vsrar_b) -TRANS(vsrar_h, LSX, gen_vvv, gen_helper_vsrar_h) -TRANS(vsrar_w, LSX, gen_vvv, gen_helper_vsrar_w) -TRANS(vsrar_d, LSX, gen_vvv, gen_helper_vsrar_d) -TRANS(vsrari_b, LSX, gen_vv_i, gen_helper_vsrari_b) -TRANS(vsrari_h, LSX, gen_vv_i, gen_helper_vsrari_h) -TRANS(vsrari_w, LSX, gen_vv_i, gen_helper_vsrari_w) -TRANS(vsrari_d, LSX, gen_vv_i, gen_helper_vsrari_d) -TRANS(xvsrar_b, LASX, gen_xxx, gen_helper_vsrar_b) -TRANS(xvsrar_h, LASX, gen_xxx, gen_helper_vsrar_h) -TRANS(xvsrar_w, LASX, gen_xxx, gen_helper_vsrar_w) -TRANS(xvsrar_d, LASX, gen_xxx, gen_helper_vsrar_d) -TRANS(xvsrari_b, LASX, gen_xx_i, gen_helper_vsrari_b) -TRANS(xvsrari_h, LASX, gen_xx_i, gen_helper_vsrari_h) -TRANS(xvsrari_w, LASX, gen_xx_i, gen_helper_vsrari_w) -TRANS(xvsrari_d, LASX, gen_xx_i, gen_helper_vsrari_d) - -TRANS(vsrln_b_h, LSX, gen_vvv, gen_helper_vsrln_b_h) -TRANS(vsrln_h_w, LSX, gen_vvv, gen_helper_vsrln_h_w) -TRANS(vsrln_w_d, LSX, gen_vvv, gen_helper_vsrln_w_d) -TRANS(vsran_b_h, LSX, gen_vvv, gen_helper_vsran_b_h) -TRANS(vsran_h_w, LSX, gen_vvv, gen_helper_vsran_h_w) -TRANS(vsran_w_d, LSX, gen_vvv, gen_helper_vsran_w_d) -TRANS(xvsrln_b_h, LASX, gen_xxx, gen_helper_vsrln_b_h) -TRANS(xvsrln_h_w, LASX, gen_xxx, gen_helper_vsrln_h_w) -TRANS(xvsrln_w_d, LASX, gen_xxx, gen_helper_vsrln_w_d) -TRANS(xvsran_b_h, LASX, gen_xxx, gen_helper_vsran_b_h) -TRANS(xvsran_h_w, LASX, gen_xxx, gen_helper_vsran_h_w) -TRANS(xvsran_w_d, LASX, gen_xxx, gen_helper_vsran_w_d) - -TRANS(vsrlni_b_h, LSX, gen_vv_i, gen_helper_vsrlni_b_h) -TRANS(vsrlni_h_w, LSX, gen_vv_i, gen_helper_vsrlni_h_w) -TRANS(vsrlni_w_d, LSX, gen_vv_i, gen_helper_vsrlni_w_d) -TRANS(vsrlni_d_q, LSX, gen_vv_i, gen_helper_vsrlni_d_q) -TRANS(vsrani_b_h, LSX, gen_vv_i, gen_helper_vsrani_b_h) -TRANS(vsrani_h_w, LSX, gen_vv_i, gen_helper_vsrani_h_w) -TRANS(vsrani_w_d, LSX, gen_vv_i, gen_helper_vsrani_w_d) -TRANS(vsrani_d_q, LSX, gen_vv_i, gen_helper_vsrani_d_q) -TRANS(xvsrlni_b_h, LASX, gen_xx_i, gen_helper_vsrlni_b_h) -TRANS(xvsrlni_h_w, LASX, gen_xx_i, gen_helper_vsrlni_h_w) -TRANS(xvsrlni_w_d, LASX, gen_xx_i, gen_helper_vsrlni_w_d) -TRANS(xvsrlni_d_q, LASX, gen_xx_i, gen_helper_vsrlni_d_q) -TRANS(xvsrani_b_h, LASX, gen_xx_i, gen_helper_vsrani_b_h) -TRANS(xvsrani_h_w, LASX, gen_xx_i, gen_helper_vsrani_h_w) -TRANS(xvsrani_w_d, LASX, gen_xx_i, gen_helper_vsrani_w_d) -TRANS(xvsrani_d_q, LASX, gen_xx_i, gen_helper_vsrani_d_q) - -TRANS(vsrlrn_b_h, LSX, gen_vvv, gen_helper_vsrlrn_b_h) -TRANS(vsrlrn_h_w, LSX, gen_vvv, gen_helper_vsrlrn_h_w) -TRANS(vsrlrn_w_d, LSX, gen_vvv, gen_helper_vsrlrn_w_d) -TRANS(vsrarn_b_h, LSX, gen_vvv, gen_helper_vsrarn_b_h) -TRANS(vsrarn_h_w, LSX, gen_vvv, gen_helper_vsrarn_h_w) -TRANS(vsrarn_w_d, LSX, gen_vvv, gen_helper_vsrarn_w_d) -TRANS(xvsrlrn_b_h, LASX, gen_xxx, gen_helper_vsrlrn_b_h) -TRANS(xvsrlrn_h_w, LASX, gen_xxx, gen_helper_vsrlrn_h_w) -TRANS(xvsrlrn_w_d, LASX, gen_xxx, gen_helper_vsrlrn_w_d) -TRANS(xvsrarn_b_h, LASX, gen_xxx, gen_helper_vsrarn_b_h) -TRANS(xvsrarn_h_w, LASX, gen_xxx, gen_helper_vsrarn_h_w) -TRANS(xvsrarn_w_d, LASX, gen_xxx, gen_helper_vsrarn_w_d) - -TRANS(vsrlrni_b_h, LSX, gen_vv_i, gen_helper_vsrlrni_b_h) -TRANS(vsrlrni_h_w, LSX, gen_vv_i, gen_helper_vsrlrni_h_w) -TRANS(vsrlrni_w_d, LSX, gen_vv_i, gen_helper_vsrlrni_w_d) -TRANS(vsrlrni_d_q, LSX, gen_vv_i, gen_helper_vsrlrni_d_q) -TRANS(vsrarni_b_h, LSX, gen_vv_i, gen_helper_vsrarni_b_h) -TRANS(vsrarni_h_w, LSX, gen_vv_i, gen_helper_vsrarni_h_w) -TRANS(vsrarni_w_d, LSX, gen_vv_i, gen_helper_vsrarni_w_d) -TRANS(vsrarni_d_q, LSX, gen_vv_i, gen_helper_vsrarni_d_q) -TRANS(xvsrlrni_b_h, LASX, gen_xx_i, gen_helper_vsrlrni_b_h) -TRANS(xvsrlrni_h_w, LASX, gen_xx_i, gen_helper_vsrlrni_h_w) -TRANS(xvsrlrni_w_d, LASX, gen_xx_i, gen_helper_vsrlrni_w_d) -TRANS(xvsrlrni_d_q, LASX, gen_xx_i, gen_helper_vsrlrni_d_q) -TRANS(xvsrarni_b_h, LASX, gen_xx_i, gen_helper_vsrarni_b_h) -TRANS(xvsrarni_h_w, LASX, gen_xx_i, gen_helper_vsrarni_h_w) -TRANS(xvsrarni_w_d, LASX, gen_xx_i, gen_helper_vsrarni_w_d) -TRANS(xvsrarni_d_q, LASX, gen_xx_i, gen_helper_vsrarni_d_q) - -TRANS(vssrln_b_h, LSX, gen_vvv, gen_helper_vssrln_b_h) -TRANS(vssrln_h_w, LSX, gen_vvv, gen_helper_vssrln_h_w) -TRANS(vssrln_w_d, LSX, gen_vvv, gen_helper_vssrln_w_d) -TRANS(vssran_b_h, LSX, gen_vvv, gen_helper_vssran_b_h) -TRANS(vssran_h_w, LSX, gen_vvv, gen_helper_vssran_h_w) -TRANS(vssran_w_d, LSX, gen_vvv, gen_helper_vssran_w_d) -TRANS(vssrln_bu_h, LSX, gen_vvv, gen_helper_vssrln_bu_h) -TRANS(vssrln_hu_w, LSX, gen_vvv, gen_helper_vssrln_hu_w) -TRANS(vssrln_wu_d, LSX, gen_vvv, gen_helper_vssrln_wu_d) -TRANS(vssran_bu_h, LSX, gen_vvv, gen_helper_vssran_bu_h) -TRANS(vssran_hu_w, LSX, gen_vvv, gen_helper_vssran_hu_w) -TRANS(vssran_wu_d, LSX, gen_vvv, gen_helper_vssran_wu_d) -TRANS(xvssrln_b_h, LASX, gen_xxx, gen_helper_vssrln_b_h) -TRANS(xvssrln_h_w, LASX, gen_xxx, gen_helper_vssrln_h_w) -TRANS(xvssrln_w_d, LASX, gen_xxx, gen_helper_vssrln_w_d) -TRANS(xvssran_b_h, LASX, gen_xxx, gen_helper_vssran_b_h) -TRANS(xvssran_h_w, LASX, gen_xxx, gen_helper_vssran_h_w) -TRANS(xvssran_w_d, LASX, gen_xxx, gen_helper_vssran_w_d) -TRANS(xvssrln_bu_h, LASX, gen_xxx, gen_helper_vssrln_bu_h) -TRANS(xvssrln_hu_w, LASX, gen_xxx, gen_helper_vssrln_hu_w) -TRANS(xvssrln_wu_d, LASX, gen_xxx, gen_helper_vssrln_wu_d) -TRANS(xvssran_bu_h, LASX, gen_xxx, gen_helper_vssran_bu_h) -TRANS(xvssran_hu_w, LASX, gen_xxx, gen_helper_vssran_hu_w) -TRANS(xvssran_wu_d, LASX, gen_xxx, gen_helper_vssran_wu_d) - -TRANS(vssrlni_b_h, LSX, gen_vv_i, gen_helper_vssrlni_b_h) -TRANS(vssrlni_h_w, LSX, gen_vv_i, gen_helper_vssrlni_h_w) -TRANS(vssrlni_w_d, LSX, gen_vv_i, gen_helper_vssrlni_w_d) -TRANS(vssrlni_d_q, LSX, gen_vv_i, gen_helper_vssrlni_d_q) -TRANS(vssrani_b_h, LSX, gen_vv_i, gen_helper_vssrani_b_h) -TRANS(vssrani_h_w, LSX, gen_vv_i, gen_helper_vssrani_h_w) -TRANS(vssrani_w_d, LSX, gen_vv_i, gen_helper_vssrani_w_d) -TRANS(vssrani_d_q, LSX, gen_vv_i, gen_helper_vssrani_d_q) -TRANS(vssrlni_bu_h, LSX, gen_vv_i, gen_helper_vssrlni_bu_h) -TRANS(vssrlni_hu_w, LSX, gen_vv_i, gen_helper_vssrlni_hu_w) -TRANS(vssrlni_wu_d, LSX, gen_vv_i, gen_helper_vssrlni_wu_d) -TRANS(vssrlni_du_q, LSX, gen_vv_i, gen_helper_vssrlni_du_q) -TRANS(vssrani_bu_h, LSX, gen_vv_i, gen_helper_vssrani_bu_h) -TRANS(vssrani_hu_w, LSX, gen_vv_i, gen_helper_vssrani_hu_w) -TRANS(vssrani_wu_d, LSX, gen_vv_i, gen_helper_vssrani_wu_d) -TRANS(vssrani_du_q, LSX, gen_vv_i, gen_helper_vssrani_du_q) -TRANS(xvssrlni_b_h, LASX, gen_xx_i, gen_helper_vssrlni_b_h) -TRANS(xvssrlni_h_w, LASX, gen_xx_i, gen_helper_vssrlni_h_w) -TRANS(xvssrlni_w_d, LASX, gen_xx_i, gen_helper_vssrlni_w_d) -TRANS(xvssrlni_d_q, LASX, gen_xx_i, gen_helper_vssrlni_d_q) -TRANS(xvssrani_b_h, LASX, gen_xx_i, gen_helper_vssrani_b_h) -TRANS(xvssrani_h_w, LASX, gen_xx_i, gen_helper_vssrani_h_w) -TRANS(xvssrani_w_d, LASX, gen_xx_i, gen_helper_vssrani_w_d) -TRANS(xvssrani_d_q, LASX, gen_xx_i, gen_helper_vssrani_d_q) -TRANS(xvssrlni_bu_h, LASX, gen_xx_i, gen_helper_vssrlni_bu_h) -TRANS(xvssrlni_hu_w, LASX, gen_xx_i, gen_helper_vssrlni_hu_w) -TRANS(xvssrlni_wu_d, LASX, gen_xx_i, gen_helper_vssrlni_wu_d) -TRANS(xvssrlni_du_q, LASX, gen_xx_i, gen_helper_vssrlni_du_q) -TRANS(xvssrani_bu_h, LASX, gen_xx_i, gen_helper_vssrani_bu_h) -TRANS(xvssrani_hu_w, LASX, gen_xx_i, gen_helper_vssrani_hu_w) -TRANS(xvssrani_wu_d, LASX, gen_xx_i, gen_helper_vssrani_wu_d) -TRANS(xvssrani_du_q, LASX, gen_xx_i, gen_helper_vssrani_du_q) - -TRANS(vssrlrn_b_h, LSX, gen_vvv, gen_helper_vssrlrn_b_h) -TRANS(vssrlrn_h_w, LSX, gen_vvv, gen_helper_vssrlrn_h_w) -TRANS(vssrlrn_w_d, LSX, gen_vvv, gen_helper_vssrlrn_w_d) -TRANS(vssrarn_b_h, LSX, gen_vvv, gen_helper_vssrarn_b_h) -TRANS(vssrarn_h_w, LSX, gen_vvv, gen_helper_vssrarn_h_w) -TRANS(vssrarn_w_d, LSX, gen_vvv, gen_helper_vssrarn_w_d) -TRANS(vssrlrn_bu_h, LSX, gen_vvv, gen_helper_vssrlrn_bu_h) -TRANS(vssrlrn_hu_w, LSX, gen_vvv, gen_helper_vssrlrn_hu_w) -TRANS(vssrlrn_wu_d, LSX, gen_vvv, gen_helper_vssrlrn_wu_d) -TRANS(vssrarn_bu_h, LSX, gen_vvv, gen_helper_vssrarn_bu_h) -TRANS(vssrarn_hu_w, LSX, gen_vvv, gen_helper_vssrarn_hu_w) -TRANS(vssrarn_wu_d, LSX, gen_vvv, gen_helper_vssrarn_wu_d) -TRANS(xvssrlrn_b_h, LASX, gen_xxx, gen_helper_vssrlrn_b_h) -TRANS(xvssrlrn_h_w, LASX, gen_xxx, gen_helper_vssrlrn_h_w) -TRANS(xvssrlrn_w_d, LASX, gen_xxx, gen_helper_vssrlrn_w_d) -TRANS(xvssrarn_b_h, LASX, gen_xxx, gen_helper_vssrarn_b_h) -TRANS(xvssrarn_h_w, LASX, gen_xxx, gen_helper_vssrarn_h_w) -TRANS(xvssrarn_w_d, LASX, gen_xxx, gen_helper_vssrarn_w_d) -TRANS(xvssrlrn_bu_h, LASX, gen_xxx, gen_helper_vssrlrn_bu_h) -TRANS(xvssrlrn_hu_w, LASX, gen_xxx, gen_helper_vssrlrn_hu_w) -TRANS(xvssrlrn_wu_d, LASX, gen_xxx, gen_helper_vssrlrn_wu_d) -TRANS(xvssrarn_bu_h, LASX, gen_xxx, gen_helper_vssrarn_bu_h) -TRANS(xvssrarn_hu_w, LASX, gen_xxx, gen_helper_vssrarn_hu_w) -TRANS(xvssrarn_wu_d, LASX, gen_xxx, gen_helper_vssrarn_wu_d) - -TRANS(vssrlrni_b_h, LSX, gen_vv_i, gen_helper_vssrlrni_b_h) -TRANS(vssrlrni_h_w, LSX, gen_vv_i, gen_helper_vssrlrni_h_w) -TRANS(vssrlrni_w_d, LSX, gen_vv_i, gen_helper_vssrlrni_w_d) -TRANS(vssrlrni_d_q, LSX, gen_vv_i, gen_helper_vssrlrni_d_q) -TRANS(vssrarni_b_h, LSX, gen_vv_i, gen_helper_vssrarni_b_h) -TRANS(vssrarni_h_w, LSX, gen_vv_i, gen_helper_vssrarni_h_w) -TRANS(vssrarni_w_d, LSX, gen_vv_i, gen_helper_vssrarni_w_d) -TRANS(vssrarni_d_q, LSX, gen_vv_i, gen_helper_vssrarni_d_q) -TRANS(vssrlrni_bu_h, LSX, gen_vv_i, gen_helper_vssrlrni_bu_h) -TRANS(vssrlrni_hu_w, LSX, gen_vv_i, gen_helper_vssrlrni_hu_w) -TRANS(vssrlrni_wu_d, LSX, gen_vv_i, gen_helper_vssrlrni_wu_d) -TRANS(vssrlrni_du_q, LSX, gen_vv_i, gen_helper_vssrlrni_du_q) -TRANS(vssrarni_bu_h, LSX, gen_vv_i, gen_helper_vssrarni_bu_h) -TRANS(vssrarni_hu_w, LSX, gen_vv_i, gen_helper_vssrarni_hu_w) -TRANS(vssrarni_wu_d, LSX, gen_vv_i, gen_helper_vssrarni_wu_d) -TRANS(vssrarni_du_q, LSX, gen_vv_i, gen_helper_vssrarni_du_q) -TRANS(xvssrlrni_b_h, LASX, gen_xx_i, gen_helper_vssrlrni_b_h) -TRANS(xvssrlrni_h_w, LASX, gen_xx_i, gen_helper_vssrlrni_h_w) -TRANS(xvssrlrni_w_d, LASX, gen_xx_i, gen_helper_vssrlrni_w_d) -TRANS(xvssrlrni_d_q, LASX, gen_xx_i, gen_helper_vssrlrni_d_q) -TRANS(xvssrarni_b_h, LASX, gen_xx_i, gen_helper_vssrarni_b_h) -TRANS(xvssrarni_h_w, LASX, gen_xx_i, gen_helper_vssrarni_h_w) -TRANS(xvssrarni_w_d, LASX, gen_xx_i, gen_helper_vssrarni_w_d) -TRANS(xvssrarni_d_q, LASX, gen_xx_i, gen_helper_vssrarni_d_q) -TRANS(xvssrlrni_bu_h, LASX, gen_xx_i, gen_helper_vssrlrni_bu_h) -TRANS(xvssrlrni_hu_w, LASX, gen_xx_i, gen_helper_vssrlrni_hu_w) -TRANS(xvssrlrni_wu_d, LASX, gen_xx_i, gen_helper_vssrlrni_wu_d) -TRANS(xvssrlrni_du_q, LASX, gen_xx_i, gen_helper_vssrlrni_du_q) -TRANS(xvssrarni_bu_h, LASX, gen_xx_i, gen_helper_vssrarni_bu_h) -TRANS(xvssrarni_hu_w, LASX, gen_xx_i, gen_helper_vssrarni_hu_w) -TRANS(xvssrarni_wu_d, LASX, gen_xx_i, gen_helper_vssrarni_wu_d) -TRANS(xvssrarni_du_q, LASX, gen_xx_i, gen_helper_vssrarni_du_q) - -TRANS(vclo_b, LSX, gen_vv, gen_helper_vclo_b) -TRANS(vclo_h, LSX, gen_vv, gen_helper_vclo_h) -TRANS(vclo_w, LSX, gen_vv, gen_helper_vclo_w) -TRANS(vclo_d, LSX, gen_vv, gen_helper_vclo_d) -TRANS(vclz_b, LSX, gen_vv, gen_helper_vclz_b) -TRANS(vclz_h, LSX, gen_vv, gen_helper_vclz_h) -TRANS(vclz_w, LSX, gen_vv, gen_helper_vclz_w) -TRANS(vclz_d, LSX, gen_vv, gen_helper_vclz_d) -TRANS(xvclo_b, LASX, gen_xx, gen_helper_vclo_b) -TRANS(xvclo_h, LASX, gen_xx, gen_helper_vclo_h) -TRANS(xvclo_w, LASX, gen_xx, gen_helper_vclo_w) -TRANS(xvclo_d, LASX, gen_xx, gen_helper_vclo_d) -TRANS(xvclz_b, LASX, gen_xx, gen_helper_vclz_b) -TRANS(xvclz_h, LASX, gen_xx, gen_helper_vclz_h) -TRANS(xvclz_w, LASX, gen_xx, gen_helper_vclz_w) -TRANS(xvclz_d, LASX, gen_xx, gen_helper_vclz_d) - -TRANS(vpcnt_b, LSX, gen_vv, gen_helper_vpcnt_b) -TRANS(vpcnt_h, LSX, gen_vv, gen_helper_vpcnt_h) -TRANS(vpcnt_w, LSX, gen_vv, gen_helper_vpcnt_w) -TRANS(vpcnt_d, LSX, gen_vv, gen_helper_vpcnt_d) -TRANS(xvpcnt_b, LASX, gen_xx, gen_helper_vpcnt_b) -TRANS(xvpcnt_h, LASX, gen_xx, gen_helper_vpcnt_h) -TRANS(xvpcnt_w, LASX, gen_xx, gen_helper_vpcnt_w) -TRANS(xvpcnt_d, LASX, gen_xx, gen_helper_vpcnt_d) - -static void do_vbit(unsigned vece, TCGv_vec t, TCGv_vec a, TCGv_vec b, - void (*func)(unsigned, TCGv_vec, TCGv_vec, TCGv_vec)) -{ - TCGv_vec mask, lsh, t1, one; - - lsh = tcg_temp_new_vec_matching(t); - t1 = tcg_temp_new_vec_matching(t); - mask = tcg_constant_vec_matching(t, vece, (8 << vece) - 1); - one = tcg_constant_vec_matching(t, vece, 1); - - tcg_gen_and_vec(vece, lsh, b, mask); - tcg_gen_shlv_vec(vece, t1, one, lsh); - func(vece, t, a, t1); -} - -static void gen_vbitclr(unsigned vece, TCGv_vec t, TCGv_vec a, TCGv_vec b) -{ - do_vbit(vece, t, a, b, tcg_gen_andc_vec); -} - -static void gen_vbitset(unsigned vece, TCGv_vec t, TCGv_vec a, TCGv_vec b) -{ - do_vbit(vece, t, a, b, tcg_gen_or_vec); -} - -static void gen_vbitrev(unsigned vece, TCGv_vec t, TCGv_vec a, TCGv_vec b) -{ - do_vbit(vece, t, a, b, tcg_gen_xor_vec); -} - -static void do_vbitclr(unsigned vece, uint32_t vd_ofs, uint32_t vj_ofs, - uint32_t vk_ofs, uint32_t oprsz, uint32_t maxsz) -{ - static const TCGOpcode vecop_list[] = { - INDEX_op_shlv_vec, INDEX_op_andc_vec, 0 - }; - static const GVecGen3 op[4] = { - { - .fniv = gen_vbitclr, - .fno = gen_helper_vbitclr_b, - .opt_opc = vecop_list, - .vece = MO_8 - }, - { - .fniv = gen_vbitclr, - .fno = gen_helper_vbitclr_h, - .opt_opc = vecop_list, - .vece = MO_16 - }, - { - .fniv = gen_vbitclr, - .fno = gen_helper_vbitclr_w, - .opt_opc = vecop_list, - .vece = MO_32 - }, - { - .fniv = gen_vbitclr, - .fno = gen_helper_vbitclr_d, - .opt_opc = vecop_list, - .vece = MO_64 - }, - }; - - tcg_gen_gvec_3(vd_ofs, vj_ofs, vk_ofs, oprsz, maxsz, &op[vece]); -} - -TRANS(vbitclr_b, LSX, gvec_vvv, MO_8, do_vbitclr) -TRANS(vbitclr_h, LSX, gvec_vvv, MO_16, do_vbitclr) -TRANS(vbitclr_w, LSX, gvec_vvv, MO_32, do_vbitclr) -TRANS(vbitclr_d, LSX, gvec_vvv, MO_64, do_vbitclr) -TRANS(xvbitclr_b, LASX, gvec_xxx, MO_8, do_vbitclr) -TRANS(xvbitclr_h, LASX, gvec_xxx, MO_16, do_vbitclr) -TRANS(xvbitclr_w, LASX, gvec_xxx, MO_32, do_vbitclr) -TRANS(xvbitclr_d, LASX, gvec_xxx, MO_64, do_vbitclr) - -static void do_vbiti(unsigned vece, TCGv_vec t, TCGv_vec a, int64_t imm, - void (*func)(unsigned, TCGv_vec, TCGv_vec, TCGv_vec)) -{ - int lsh; - TCGv_vec t1, one; - - lsh = imm & ((8 << vece) -1); - t1 = tcg_temp_new_vec_matching(t); - one = tcg_constant_vec_matching(t, vece, 1); - - tcg_gen_shli_vec(vece, t1, one, lsh); - func(vece, t, a, t1); -} - -static void gen_vbitclri(unsigned vece, TCGv_vec t, TCGv_vec a, int64_t imm) -{ - do_vbiti(vece, t, a, imm, tcg_gen_andc_vec); -} - -static void gen_vbitseti(unsigned vece, TCGv_vec t, TCGv_vec a, int64_t imm) -{ - do_vbiti(vece, t, a, imm, tcg_gen_or_vec); -} - -static void gen_vbitrevi(unsigned vece, TCGv_vec t, TCGv_vec a, int64_t imm) -{ - do_vbiti(vece, t, a, imm, tcg_gen_xor_vec); -} - -static void do_vbitclri(unsigned vece, uint32_t vd_ofs, uint32_t vj_ofs, - int64_t imm, uint32_t oprsz, uint32_t maxsz) -{ - static const TCGOpcode vecop_list[] = { - INDEX_op_shli_vec, INDEX_op_andc_vec, 0 - }; - static const GVecGen2i op[4] = { - { - .fniv = gen_vbitclri, - .fnoi = gen_helper_vbitclri_b, - .opt_opc = vecop_list, - .vece = MO_8 - }, - { - .fniv = gen_vbitclri, - .fnoi = gen_helper_vbitclri_h, - .opt_opc = vecop_list, - .vece = MO_16 - }, - { - .fniv = gen_vbitclri, - .fnoi = gen_helper_vbitclri_w, - .opt_opc = vecop_list, - .vece = MO_32 - }, - { - .fniv = gen_vbitclri, - .fnoi = gen_helper_vbitclri_d, - .opt_opc = vecop_list, - .vece = MO_64 - }, - }; - - tcg_gen_gvec_2i(vd_ofs, vj_ofs, oprsz, maxsz, imm, &op[vece]); -} - -TRANS(vbitclri_b, LSX, gvec_vv_i, MO_8, do_vbitclri) -TRANS(vbitclri_h, LSX, gvec_vv_i, MO_16, do_vbitclri) -TRANS(vbitclri_w, LSX, gvec_vv_i, MO_32, do_vbitclri) -TRANS(vbitclri_d, LSX, gvec_vv_i, MO_64, do_vbitclri) -TRANS(xvbitclri_b, LASX, gvec_xx_i, MO_8, do_vbitclri) -TRANS(xvbitclri_h, LASX, gvec_xx_i, MO_16, do_vbitclri) -TRANS(xvbitclri_w, LASX, gvec_xx_i, MO_32, do_vbitclri) -TRANS(xvbitclri_d, LASX, gvec_xx_i, MO_64, do_vbitclri) - -static void do_vbitset(unsigned vece, uint32_t vd_ofs, uint32_t vj_ofs, - uint32_t vk_ofs, uint32_t oprsz, uint32_t maxsz) -{ - static const TCGOpcode vecop_list[] = { - INDEX_op_shlv_vec, 0 - }; - static const GVecGen3 op[4] = { - { - .fniv = gen_vbitset, - .fno = gen_helper_vbitset_b, - .opt_opc = vecop_list, - .vece = MO_8 - }, - { - .fniv = gen_vbitset, - .fno = gen_helper_vbitset_h, - .opt_opc = vecop_list, - .vece = MO_16 - }, - { - .fniv = gen_vbitset, - .fno = gen_helper_vbitset_w, - .opt_opc = vecop_list, - .vece = MO_32 - }, - { - .fniv = gen_vbitset, - .fno = gen_helper_vbitset_d, - .opt_opc = vecop_list, - .vece = MO_64 - }, - }; - - tcg_gen_gvec_3(vd_ofs, vj_ofs, vk_ofs, oprsz, maxsz, &op[vece]); -} - -TRANS(vbitset_b, LSX, gvec_vvv, MO_8, do_vbitset) -TRANS(vbitset_h, LSX, gvec_vvv, MO_16, do_vbitset) -TRANS(vbitset_w, LSX, gvec_vvv, MO_32, do_vbitset) -TRANS(vbitset_d, LSX, gvec_vvv, MO_64, do_vbitset) -TRANS(xvbitset_b, LASX, gvec_xxx, MO_8, do_vbitset) -TRANS(xvbitset_h, LASX, gvec_xxx, MO_16, do_vbitset) -TRANS(xvbitset_w, LASX, gvec_xxx, MO_32, do_vbitset) -TRANS(xvbitset_d, LASX, gvec_xxx, MO_64, do_vbitset) - -static void do_vbitseti(unsigned vece, uint32_t vd_ofs, uint32_t vj_ofs, - int64_t imm, uint32_t oprsz, uint32_t maxsz) -{ - static const TCGOpcode vecop_list[] = { - INDEX_op_shli_vec, 0 - }; - static const GVecGen2i op[4] = { - { - .fniv = gen_vbitseti, - .fnoi = gen_helper_vbitseti_b, - .opt_opc = vecop_list, - .vece = MO_8 - }, - { - .fniv = gen_vbitseti, - .fnoi = gen_helper_vbitseti_h, - .opt_opc = vecop_list, - .vece = MO_16 - }, - { - .fniv = gen_vbitseti, - .fnoi = gen_helper_vbitseti_w, - .opt_opc = vecop_list, - .vece = MO_32 - }, - { - .fniv = gen_vbitseti, - .fnoi = gen_helper_vbitseti_d, - .opt_opc = vecop_list, - .vece = MO_64 - }, - }; - - tcg_gen_gvec_2i(vd_ofs, vj_ofs, oprsz, maxsz, imm, &op[vece]); -} - -TRANS(vbitseti_b, LSX, gvec_vv_i, MO_8, do_vbitseti) -TRANS(vbitseti_h, LSX, gvec_vv_i, MO_16, do_vbitseti) -TRANS(vbitseti_w, LSX, gvec_vv_i, MO_32, do_vbitseti) -TRANS(vbitseti_d, LSX, gvec_vv_i, MO_64, do_vbitseti) -TRANS(xvbitseti_b, LASX, gvec_xx_i, MO_8, do_vbitseti) -TRANS(xvbitseti_h, LASX, gvec_xx_i, MO_16, do_vbitseti) -TRANS(xvbitseti_w, LASX, gvec_xx_i, MO_32, do_vbitseti) -TRANS(xvbitseti_d, LASX, gvec_xx_i, MO_64, do_vbitseti) - -static void do_vbitrev(unsigned vece, uint32_t vd_ofs, uint32_t vj_ofs, - uint32_t vk_ofs, uint32_t oprsz, uint32_t maxsz) -{ - static const TCGOpcode vecop_list[] = { - INDEX_op_shlv_vec, 0 - }; - static const GVecGen3 op[4] = { - { - .fniv = gen_vbitrev, - .fno = gen_helper_vbitrev_b, - .opt_opc = vecop_list, - .vece = MO_8 - }, - { - .fniv = gen_vbitrev, - .fno = gen_helper_vbitrev_h, - .opt_opc = vecop_list, - .vece = MO_16 - }, - { - .fniv = gen_vbitrev, - .fno = gen_helper_vbitrev_w, - .opt_opc = vecop_list, - .vece = MO_32 - }, - { - .fniv = gen_vbitrev, - .fno = gen_helper_vbitrev_d, - .opt_opc = vecop_list, - .vece = MO_64 - }, - }; - - tcg_gen_gvec_3(vd_ofs, vj_ofs, vk_ofs, oprsz, maxsz, &op[vece]); -} - -TRANS(vbitrev_b, LSX, gvec_vvv, MO_8, do_vbitrev) -TRANS(vbitrev_h, LSX, gvec_vvv, MO_16, do_vbitrev) -TRANS(vbitrev_w, LSX, gvec_vvv, MO_32, do_vbitrev) -TRANS(vbitrev_d, LSX, gvec_vvv, MO_64, do_vbitrev) -TRANS(xvbitrev_b, LASX, gvec_xxx, MO_8, do_vbitrev) -TRANS(xvbitrev_h, LASX, gvec_xxx, MO_16, do_vbitrev) -TRANS(xvbitrev_w, LASX, gvec_xxx, MO_32, do_vbitrev) -TRANS(xvbitrev_d, LASX, gvec_xxx, MO_64, do_vbitrev) - -static void do_vbitrevi(unsigned vece, uint32_t vd_ofs, uint32_t vj_ofs, - int64_t imm, uint32_t oprsz, uint32_t maxsz) -{ - static const TCGOpcode vecop_list[] = { - INDEX_op_shli_vec, 0 - }; - static const GVecGen2i op[4] = { - { - .fniv = gen_vbitrevi, - .fnoi = gen_helper_vbitrevi_b, - .opt_opc = vecop_list, - .vece = MO_8 - }, - { - .fniv = gen_vbitrevi, - .fnoi = gen_helper_vbitrevi_h, - .opt_opc = vecop_list, - .vece = MO_16 - }, - { - .fniv = gen_vbitrevi, - .fnoi = gen_helper_vbitrevi_w, - .opt_opc = vecop_list, - .vece = MO_32 - }, - { - .fniv = gen_vbitrevi, - .fnoi = gen_helper_vbitrevi_d, - .opt_opc = vecop_list, - .vece = MO_64 - }, - }; - - tcg_gen_gvec_2i(vd_ofs, vj_ofs, oprsz, maxsz, imm, &op[vece]); -} - -TRANS(vbitrevi_b, LSX, gvec_vv_i, MO_8, do_vbitrevi) -TRANS(vbitrevi_h, LSX, gvec_vv_i, MO_16, do_vbitrevi) -TRANS(vbitrevi_w, LSX, gvec_vv_i, MO_32, do_vbitrevi) -TRANS(vbitrevi_d, LSX, gvec_vv_i, MO_64, do_vbitrevi) -TRANS(xvbitrevi_b, LASX, gvec_xx_i, MO_8, do_vbitrevi) -TRANS(xvbitrevi_h, LASX, gvec_xx_i, MO_16, do_vbitrevi) -TRANS(xvbitrevi_w, LASX, gvec_xx_i, MO_32, do_vbitrevi) -TRANS(xvbitrevi_d, LASX, gvec_xx_i, MO_64, do_vbitrevi) - -TRANS(vfrstp_b, LSX, gen_vvv, gen_helper_vfrstp_b) -TRANS(vfrstp_h, LSX, gen_vvv, gen_helper_vfrstp_h) -TRANS(vfrstpi_b, LSX, gen_vv_i, gen_helper_vfrstpi_b) -TRANS(vfrstpi_h, LSX, gen_vv_i, gen_helper_vfrstpi_h) -TRANS(xvfrstp_b, LASX, gen_xxx, gen_helper_vfrstp_b) -TRANS(xvfrstp_h, LASX, gen_xxx, gen_helper_vfrstp_h) -TRANS(xvfrstpi_b, LASX, gen_xx_i, gen_helper_vfrstpi_b) -TRANS(xvfrstpi_h, LASX, gen_xx_i, gen_helper_vfrstpi_h) - -TRANS(vfadd_s, LSX, gen_vvv_ptr, gen_helper_vfadd_s) -TRANS(vfadd_d, LSX, gen_vvv_ptr, gen_helper_vfadd_d) -TRANS(vfsub_s, LSX, gen_vvv_ptr, gen_helper_vfsub_s) -TRANS(vfsub_d, LSX, gen_vvv_ptr, gen_helper_vfsub_d) -TRANS(vfmul_s, LSX, gen_vvv_ptr, gen_helper_vfmul_s) -TRANS(vfmul_d, LSX, gen_vvv_ptr, gen_helper_vfmul_d) -TRANS(vfdiv_s, LSX, gen_vvv_ptr, gen_helper_vfdiv_s) -TRANS(vfdiv_d, LSX, gen_vvv_ptr, gen_helper_vfdiv_d) -TRANS(xvfadd_s, LASX, gen_xxx_ptr, gen_helper_vfadd_s) -TRANS(xvfadd_d, LASX, gen_xxx_ptr, gen_helper_vfadd_d) -TRANS(xvfsub_s, LASX, gen_xxx_ptr, gen_helper_vfsub_s) -TRANS(xvfsub_d, LASX, gen_xxx_ptr, gen_helper_vfsub_d) -TRANS(xvfmul_s, LASX, gen_xxx_ptr, gen_helper_vfmul_s) -TRANS(xvfmul_d, LASX, gen_xxx_ptr, gen_helper_vfmul_d) -TRANS(xvfdiv_s, LASX, gen_xxx_ptr, gen_helper_vfdiv_s) -TRANS(xvfdiv_d, LASX, gen_xxx_ptr, gen_helper_vfdiv_d) - -TRANS(vfmadd_s, LSX, gen_vvvv_ptr, gen_helper_vfmadd_s) -TRANS(vfmadd_d, LSX, gen_vvvv_ptr, gen_helper_vfmadd_d) -TRANS(vfmsub_s, LSX, gen_vvvv_ptr, gen_helper_vfmsub_s) -TRANS(vfmsub_d, LSX, gen_vvvv_ptr, gen_helper_vfmsub_d) -TRANS(vfnmadd_s, LSX, gen_vvvv_ptr, gen_helper_vfnmadd_s) -TRANS(vfnmadd_d, LSX, gen_vvvv_ptr, gen_helper_vfnmadd_d) -TRANS(vfnmsub_s, LSX, gen_vvvv_ptr, gen_helper_vfnmsub_s) -TRANS(vfnmsub_d, LSX, gen_vvvv_ptr, gen_helper_vfnmsub_d) -TRANS(xvfmadd_s, LASX, gen_xxxx_ptr, gen_helper_vfmadd_s) -TRANS(xvfmadd_d, LASX, gen_xxxx_ptr, gen_helper_vfmadd_d) -TRANS(xvfmsub_s, LASX, gen_xxxx_ptr, gen_helper_vfmsub_s) -TRANS(xvfmsub_d, LASX, gen_xxxx_ptr, gen_helper_vfmsub_d) -TRANS(xvfnmadd_s, LASX, gen_xxxx_ptr, gen_helper_vfnmadd_s) -TRANS(xvfnmadd_d, LASX, gen_xxxx_ptr, gen_helper_vfnmadd_d) -TRANS(xvfnmsub_s, LASX, gen_xxxx_ptr, gen_helper_vfnmsub_s) -TRANS(xvfnmsub_d, LASX, gen_xxxx_ptr, gen_helper_vfnmsub_d) - -TRANS(vfmax_s, LSX, gen_vvv_ptr, gen_helper_vfmax_s) -TRANS(vfmax_d, LSX, gen_vvv_ptr, gen_helper_vfmax_d) -TRANS(vfmin_s, LSX, gen_vvv_ptr, gen_helper_vfmin_s) -TRANS(vfmin_d, LSX, gen_vvv_ptr, gen_helper_vfmin_d) -TRANS(xvfmax_s, LASX, gen_xxx_ptr, gen_helper_vfmax_s) -TRANS(xvfmax_d, LASX, gen_xxx_ptr, gen_helper_vfmax_d) -TRANS(xvfmin_s, LASX, gen_xxx_ptr, gen_helper_vfmin_s) -TRANS(xvfmin_d, LASX, gen_xxx_ptr, gen_helper_vfmin_d) - -TRANS(vfmaxa_s, LSX, gen_vvv_ptr, gen_helper_vfmaxa_s) -TRANS(vfmaxa_d, LSX, gen_vvv_ptr, gen_helper_vfmaxa_d) -TRANS(vfmina_s, LSX, gen_vvv_ptr, gen_helper_vfmina_s) -TRANS(vfmina_d, LSX, gen_vvv_ptr, gen_helper_vfmina_d) -TRANS(xvfmaxa_s, LASX, gen_xxx_ptr, gen_helper_vfmaxa_s) -TRANS(xvfmaxa_d, LASX, gen_xxx_ptr, gen_helper_vfmaxa_d) -TRANS(xvfmina_s, LASX, gen_xxx_ptr, gen_helper_vfmina_s) -TRANS(xvfmina_d, LASX, gen_xxx_ptr, gen_helper_vfmina_d) - -TRANS(vflogb_s, LSX, gen_vv_ptr, gen_helper_vflogb_s) -TRANS(vflogb_d, LSX, gen_vv_ptr, gen_helper_vflogb_d) -TRANS(xvflogb_s, LASX, gen_xx_ptr, gen_helper_vflogb_s) -TRANS(xvflogb_d, LASX, gen_xx_ptr, gen_helper_vflogb_d) - -TRANS(vfclass_s, LSX, gen_vv_ptr, gen_helper_vfclass_s) -TRANS(vfclass_d, LSX, gen_vv_ptr, gen_helper_vfclass_d) -TRANS(xvfclass_s, LASX, gen_xx_ptr, gen_helper_vfclass_s) -TRANS(xvfclass_d, LASX, gen_xx_ptr, gen_helper_vfclass_d) - -TRANS(vfsqrt_s, LSX, gen_vv_ptr, gen_helper_vfsqrt_s) -TRANS(vfsqrt_d, LSX, gen_vv_ptr, gen_helper_vfsqrt_d) -TRANS(vfrecip_s, LSX, gen_vv_ptr, gen_helper_vfrecip_s) -TRANS(vfrecip_d, LSX, gen_vv_ptr, gen_helper_vfrecip_d) -TRANS(vfrsqrt_s, LSX, gen_vv_ptr, gen_helper_vfrsqrt_s) -TRANS(vfrsqrt_d, LSX, gen_vv_ptr, gen_helper_vfrsqrt_d) -TRANS(xvfsqrt_s, LASX, gen_xx_ptr, gen_helper_vfsqrt_s) -TRANS(xvfsqrt_d, LASX, gen_xx_ptr, gen_helper_vfsqrt_d) -TRANS(xvfrecip_s, LASX, gen_xx_ptr, gen_helper_vfrecip_s) -TRANS(xvfrecip_d, LASX, gen_xx_ptr, gen_helper_vfrecip_d) -TRANS(xvfrsqrt_s, LASX, gen_xx_ptr, gen_helper_vfrsqrt_s) -TRANS(xvfrsqrt_d, LASX, gen_xx_ptr, gen_helper_vfrsqrt_d) - -TRANS(vfcvtl_s_h, LSX, gen_vv_ptr, gen_helper_vfcvtl_s_h) -TRANS(vfcvth_s_h, LSX, gen_vv_ptr, gen_helper_vfcvth_s_h) -TRANS(vfcvtl_d_s, LSX, gen_vv_ptr, gen_helper_vfcvtl_d_s) -TRANS(vfcvth_d_s, LSX, gen_vv_ptr, gen_helper_vfcvth_d_s) -TRANS(vfcvt_h_s, LSX, gen_vvv_ptr, gen_helper_vfcvt_h_s) -TRANS(vfcvt_s_d, LSX, gen_vvv_ptr, gen_helper_vfcvt_s_d) -TRANS(xvfcvtl_s_h, LASX, gen_xx_ptr, gen_helper_vfcvtl_s_h) -TRANS(xvfcvth_s_h, LASX, gen_xx_ptr, gen_helper_vfcvth_s_h) -TRANS(xvfcvtl_d_s, LASX, gen_xx_ptr, gen_helper_vfcvtl_d_s) -TRANS(xvfcvth_d_s, LASX, gen_xx_ptr, gen_helper_vfcvth_d_s) -TRANS(xvfcvt_h_s, LASX, gen_xxx_ptr, gen_helper_vfcvt_h_s) -TRANS(xvfcvt_s_d, LASX, gen_xxx_ptr, gen_helper_vfcvt_s_d) - -TRANS(vfrintrne_s, LSX, gen_vv_ptr, gen_helper_vfrintrne_s) -TRANS(vfrintrne_d, LSX, gen_vv_ptr, gen_helper_vfrintrne_d) -TRANS(vfrintrz_s, LSX, gen_vv_ptr, gen_helper_vfrintrz_s) -TRANS(vfrintrz_d, LSX, gen_vv_ptr, gen_helper_vfrintrz_d) -TRANS(vfrintrp_s, LSX, gen_vv_ptr, gen_helper_vfrintrp_s) -TRANS(vfrintrp_d, LSX, gen_vv_ptr, gen_helper_vfrintrp_d) -TRANS(vfrintrm_s, LSX, gen_vv_ptr, gen_helper_vfrintrm_s) -TRANS(vfrintrm_d, LSX, gen_vv_ptr, gen_helper_vfrintrm_d) -TRANS(vfrint_s, LSX, gen_vv_ptr, gen_helper_vfrint_s) -TRANS(vfrint_d, LSX, gen_vv_ptr, gen_helper_vfrint_d) -TRANS(xvfrintrne_s, LASX, gen_xx_ptr, gen_helper_vfrintrne_s) -TRANS(xvfrintrne_d, LASX, gen_xx_ptr, gen_helper_vfrintrne_d) -TRANS(xvfrintrz_s, LASX, gen_xx_ptr, gen_helper_vfrintrz_s) -TRANS(xvfrintrz_d, LASX, gen_xx_ptr, gen_helper_vfrintrz_d) -TRANS(xvfrintrp_s, LASX, gen_xx_ptr, gen_helper_vfrintrp_s) -TRANS(xvfrintrp_d, LASX, gen_xx_ptr, gen_helper_vfrintrp_d) -TRANS(xvfrintrm_s, LASX, gen_xx_ptr, gen_helper_vfrintrm_s) -TRANS(xvfrintrm_d, LASX, gen_xx_ptr, gen_helper_vfrintrm_d) -TRANS(xvfrint_s, LASX, gen_xx_ptr, gen_helper_vfrint_s) -TRANS(xvfrint_d, LASX, gen_xx_ptr, gen_helper_vfrint_d) - -TRANS(vftintrne_w_s, LSX, gen_vv_ptr, gen_helper_vftintrne_w_s) -TRANS(vftintrne_l_d, LSX, gen_vv_ptr, gen_helper_vftintrne_l_d) -TRANS(vftintrz_w_s, LSX, gen_vv_ptr, gen_helper_vftintrz_w_s) -TRANS(vftintrz_l_d, LSX, gen_vv_ptr, gen_helper_vftintrz_l_d) -TRANS(vftintrp_w_s, LSX, gen_vv_ptr, gen_helper_vftintrp_w_s) -TRANS(vftintrp_l_d, LSX, gen_vv_ptr, gen_helper_vftintrp_l_d) -TRANS(vftintrm_w_s, LSX, gen_vv_ptr, gen_helper_vftintrm_w_s) -TRANS(vftintrm_l_d, LSX, gen_vv_ptr, gen_helper_vftintrm_l_d) -TRANS(vftint_w_s, LSX, gen_vv_ptr, gen_helper_vftint_w_s) -TRANS(vftint_l_d, LSX, gen_vv_ptr, gen_helper_vftint_l_d) -TRANS(vftintrz_wu_s, LSX, gen_vv_ptr, gen_helper_vftintrz_wu_s) -TRANS(vftintrz_lu_d, LSX, gen_vv_ptr, gen_helper_vftintrz_lu_d) -TRANS(vftint_wu_s, LSX, gen_vv_ptr, gen_helper_vftint_wu_s) -TRANS(vftint_lu_d, LSX, gen_vv_ptr, gen_helper_vftint_lu_d) -TRANS(vftintrne_w_d, LSX, gen_vvv_ptr, gen_helper_vftintrne_w_d) -TRANS(vftintrz_w_d, LSX, gen_vvv_ptr, gen_helper_vftintrz_w_d) -TRANS(vftintrp_w_d, LSX, gen_vvv_ptr, gen_helper_vftintrp_w_d) -TRANS(vftintrm_w_d, LSX, gen_vvv_ptr, gen_helper_vftintrm_w_d) -TRANS(vftint_w_d, LSX, gen_vvv_ptr, gen_helper_vftint_w_d) -TRANS(vftintrnel_l_s, LSX, gen_vv_ptr, gen_helper_vftintrnel_l_s) -TRANS(vftintrneh_l_s, LSX, gen_vv_ptr, gen_helper_vftintrneh_l_s) -TRANS(vftintrzl_l_s, LSX, gen_vv_ptr, gen_helper_vftintrzl_l_s) -TRANS(vftintrzh_l_s, LSX, gen_vv_ptr, gen_helper_vftintrzh_l_s) -TRANS(vftintrpl_l_s, LSX, gen_vv_ptr, gen_helper_vftintrpl_l_s) -TRANS(vftintrph_l_s, LSX, gen_vv_ptr, gen_helper_vftintrph_l_s) -TRANS(vftintrml_l_s, LSX, gen_vv_ptr, gen_helper_vftintrml_l_s) -TRANS(vftintrmh_l_s, LSX, gen_vv_ptr, gen_helper_vftintrmh_l_s) -TRANS(vftintl_l_s, LSX, gen_vv_ptr, gen_helper_vftintl_l_s) -TRANS(vftinth_l_s, LSX, gen_vv_ptr, gen_helper_vftinth_l_s) -TRANS(xvftintrne_w_s, LASX, gen_xx_ptr, gen_helper_vftintrne_w_s) -TRANS(xvftintrne_l_d, LASX, gen_xx_ptr, gen_helper_vftintrne_l_d) -TRANS(xvftintrz_w_s, LASX, gen_xx_ptr, gen_helper_vftintrz_w_s) -TRANS(xvftintrz_l_d, LASX, gen_xx_ptr, gen_helper_vftintrz_l_d) -TRANS(xvftintrp_w_s, LASX, gen_xx_ptr, gen_helper_vftintrp_w_s) -TRANS(xvftintrp_l_d, LASX, gen_xx_ptr, gen_helper_vftintrp_l_d) -TRANS(xvftintrm_w_s, LASX, gen_xx_ptr, gen_helper_vftintrm_w_s) -TRANS(xvftintrm_l_d, LASX, gen_xx_ptr, gen_helper_vftintrm_l_d) -TRANS(xvftint_w_s, LASX, gen_xx_ptr, gen_helper_vftint_w_s) -TRANS(xvftint_l_d, LASX, gen_xx_ptr, gen_helper_vftint_l_d) -TRANS(xvftintrz_wu_s, LASX, gen_xx_ptr, gen_helper_vftintrz_wu_s) -TRANS(xvftintrz_lu_d, LASX, gen_xx_ptr, gen_helper_vftintrz_lu_d) -TRANS(xvftint_wu_s, LASX, gen_xx_ptr, gen_helper_vftint_wu_s) -TRANS(xvftint_lu_d, LASX, gen_xx_ptr, gen_helper_vftint_lu_d) -TRANS(xvftintrne_w_d, LASX, gen_xxx_ptr, gen_helper_vftintrne_w_d) -TRANS(xvftintrz_w_d, LASX, gen_xxx_ptr, gen_helper_vftintrz_w_d) -TRANS(xvftintrp_w_d, LASX, gen_xxx_ptr, gen_helper_vftintrp_w_d) -TRANS(xvftintrm_w_d, LASX, gen_xxx_ptr, gen_helper_vftintrm_w_d) -TRANS(xvftint_w_d, LASX, gen_xxx_ptr, gen_helper_vftint_w_d) -TRANS(xvftintrnel_l_s, LASX, gen_xx_ptr, gen_helper_vftintrnel_l_s) -TRANS(xvftintrneh_l_s, LASX, gen_xx_ptr, gen_helper_vftintrneh_l_s) -TRANS(xvftintrzl_l_s, LASX, gen_xx_ptr, gen_helper_vftintrzl_l_s) -TRANS(xvftintrzh_l_s, LASX, gen_xx_ptr, gen_helper_vftintrzh_l_s) -TRANS(xvftintrpl_l_s, LASX, gen_xx_ptr, gen_helper_vftintrpl_l_s) -TRANS(xvftintrph_l_s, LASX, gen_xx_ptr, gen_helper_vftintrph_l_s) -TRANS(xvftintrml_l_s, LASX, gen_xx_ptr, gen_helper_vftintrml_l_s) -TRANS(xvftintrmh_l_s, LASX, gen_xx_ptr, gen_helper_vftintrmh_l_s) -TRANS(xvftintl_l_s, LASX, gen_xx_ptr, gen_helper_vftintl_l_s) -TRANS(xvftinth_l_s, LASX, gen_xx_ptr, gen_helper_vftinth_l_s) - -TRANS(vffint_s_w, LSX, gen_vv_ptr, gen_helper_vffint_s_w) -TRANS(vffint_d_l, LSX, gen_vv_ptr, gen_helper_vffint_d_l) -TRANS(vffint_s_wu, LSX, gen_vv_ptr, gen_helper_vffint_s_wu) -TRANS(vffint_d_lu, LSX, gen_vv_ptr, gen_helper_vffint_d_lu) -TRANS(vffintl_d_w, LSX, gen_vv_ptr, gen_helper_vffintl_d_w) -TRANS(vffinth_d_w, LSX, gen_vv_ptr, gen_helper_vffinth_d_w) -TRANS(vffint_s_l, LSX, gen_vvv_ptr, gen_helper_vffint_s_l) -TRANS(xvffint_s_w, LASX, gen_xx_ptr, gen_helper_vffint_s_w) -TRANS(xvffint_d_l, LASX, gen_xx_ptr, gen_helper_vffint_d_l) -TRANS(xvffint_s_wu, LASX, gen_xx_ptr, gen_helper_vffint_s_wu) -TRANS(xvffint_d_lu, LASX, gen_xx_ptr, gen_helper_vffint_d_lu) -TRANS(xvffintl_d_w, LASX, gen_xx_ptr, gen_helper_vffintl_d_w) -TRANS(xvffinth_d_w, LASX, gen_xx_ptr, gen_helper_vffinth_d_w) -TRANS(xvffint_s_l, LASX, gen_xxx_ptr, gen_helper_vffint_s_l) - -static bool do_cmp_vl(DisasContext *ctx, arg_vvv *a, - uint32_t oprsz, MemOp mop, TCGCond cond) -{ - uint32_t vd_ofs, vj_ofs, vk_ofs; - - if (!check_vec(ctx, oprsz)) { - return true; - } - - vd_ofs = vec_full_offset(a->vd); - vj_ofs = vec_full_offset(a->vj); - vk_ofs = vec_full_offset(a->vk); - - tcg_gen_gvec_cmp(cond, mop, vd_ofs, vj_ofs, vk_ofs, oprsz, ctx->vl / 8); - return true; -} - -static bool do_cmp(DisasContext *ctx, arg_vvv *a, - MemOp mop, TCGCond cond) -{ - return do_cmp_vl(ctx, a, 16, mop, cond); -} - -static bool do_xcmp(DisasContext *ctx, arg_vvv *a, - MemOp mop, TCGCond cond) -{ - return do_cmp_vl(ctx, a, 32, mop, cond); -} - -static bool do_cmpi_vl(DisasContext *ctx, arg_vv_i *a, - uint32_t oprsz, MemOp mop, TCGCond cond) -{ - uint32_t vd_ofs, vj_ofs; - - if (!check_vec(ctx, oprsz)) { - return true; - } - - vd_ofs = vec_full_offset(a->vd); - vj_ofs = vec_full_offset(a->vj); - - tcg_gen_gvec_cmpi(cond, mop, vd_ofs, vj_ofs, a->imm, oprsz, ctx->vl / 8); - return true; -} - -static bool do_cmpi(DisasContext *ctx, arg_vv_i *a, - MemOp mop, TCGCond cond) -{ - return do_cmpi_vl(ctx, a, 16, mop, cond); -} - -static bool do_xcmpi(DisasContext *ctx, arg_vv_i *a, - MemOp mop, TCGCond cond) -{ - return do_cmpi_vl(ctx, a, 32, mop, cond); -} - -TRANS(vseq_b, LSX, do_cmp, MO_8, TCG_COND_EQ) -TRANS(vseq_h, LSX, do_cmp, MO_16, TCG_COND_EQ) -TRANS(vseq_w, LSX, do_cmp, MO_32, TCG_COND_EQ) -TRANS(vseq_d, LSX, do_cmp, MO_64, TCG_COND_EQ) -TRANS(vseqi_b, LSX, do_cmpi, MO_8, TCG_COND_EQ) -TRANS(vseqi_h, LSX, do_cmpi, MO_16, TCG_COND_EQ) -TRANS(vseqi_w, LSX, do_cmpi, MO_32, TCG_COND_EQ) -TRANS(vseqi_d, LSX, do_cmpi, MO_64, TCG_COND_EQ) -TRANS(xvseq_b, LASX, do_xcmp, MO_8, TCG_COND_EQ) -TRANS(xvseq_h, LASX, do_xcmp, MO_16, TCG_COND_EQ) -TRANS(xvseq_w, LASX, do_xcmp, MO_32, TCG_COND_EQ) -TRANS(xvseq_d, LASX, do_xcmp, MO_64, TCG_COND_EQ) -TRANS(xvseqi_b, LASX, do_xcmpi, MO_8, TCG_COND_EQ) -TRANS(xvseqi_h, LASX, do_xcmpi, MO_16, TCG_COND_EQ) -TRANS(xvseqi_w, LASX, do_xcmpi, MO_32, TCG_COND_EQ) -TRANS(xvseqi_d, LASX, do_xcmpi, MO_64, TCG_COND_EQ) - -TRANS(vsle_b, LSX, do_cmp, MO_8, TCG_COND_LE) -TRANS(vsle_h, LSX, do_cmp, MO_16, TCG_COND_LE) -TRANS(vsle_w, LSX, do_cmp, MO_32, TCG_COND_LE) -TRANS(vsle_d, LSX, do_cmp, MO_64, TCG_COND_LE) -TRANS(vslei_b, LSX, do_cmpi, MO_8, TCG_COND_LE) -TRANS(vslei_h, LSX, do_cmpi, MO_16, TCG_COND_LE) -TRANS(vslei_w, LSX, do_cmpi, MO_32, TCG_COND_LE) -TRANS(vslei_d, LSX, do_cmpi, MO_64, TCG_COND_LE) -TRANS(vsle_bu, LSX, do_cmp, MO_8, TCG_COND_LEU) -TRANS(vsle_hu, LSX, do_cmp, MO_16, TCG_COND_LEU) -TRANS(vsle_wu, LSX, do_cmp, MO_32, TCG_COND_LEU) -TRANS(vsle_du, LSX, do_cmp, MO_64, TCG_COND_LEU) -TRANS(vslei_bu, LSX, do_cmpi, MO_8, TCG_COND_LEU) -TRANS(vslei_hu, LSX, do_cmpi, MO_16, TCG_COND_LEU) -TRANS(vslei_wu, LSX, do_cmpi, MO_32, TCG_COND_LEU) -TRANS(vslei_du, LSX, do_cmpi, MO_64, TCG_COND_LEU) -TRANS(xvsle_b, LASX, do_xcmp, MO_8, TCG_COND_LE) -TRANS(xvsle_h, LASX, do_xcmp, MO_16, TCG_COND_LE) -TRANS(xvsle_w, LASX, do_xcmp, MO_32, TCG_COND_LE) -TRANS(xvsle_d, LASX, do_xcmp, MO_64, TCG_COND_LE) -TRANS(xvslei_b, LASX, do_xcmpi, MO_8, TCG_COND_LE) -TRANS(xvslei_h, LASX, do_xcmpi, MO_16, TCG_COND_LE) -TRANS(xvslei_w, LASX, do_xcmpi, MO_32, TCG_COND_LE) -TRANS(xvslei_d, LASX, do_xcmpi, MO_64, TCG_COND_LE) -TRANS(xvsle_bu, LASX, do_xcmp, MO_8, TCG_COND_LEU) -TRANS(xvsle_hu, LASX, do_xcmp, MO_16, TCG_COND_LEU) -TRANS(xvsle_wu, LASX, do_xcmp, MO_32, TCG_COND_LEU) -TRANS(xvsle_du, LASX, do_xcmp, MO_64, TCG_COND_LEU) -TRANS(xvslei_bu, LASX, do_xcmpi, MO_8, TCG_COND_LEU) -TRANS(xvslei_hu, LASX, do_xcmpi, MO_16, TCG_COND_LEU) -TRANS(xvslei_wu, LASX, do_xcmpi, MO_32, TCG_COND_LEU) -TRANS(xvslei_du, LASX, do_xcmpi, MO_64, TCG_COND_LEU) - -TRANS(vslt_b, LSX, do_cmp, MO_8, TCG_COND_LT) -TRANS(vslt_h, LSX, do_cmp, MO_16, TCG_COND_LT) -TRANS(vslt_w, LSX, do_cmp, MO_32, TCG_COND_LT) -TRANS(vslt_d, LSX, do_cmp, MO_64, TCG_COND_LT) -TRANS(vslti_b, LSX, do_cmpi, MO_8, TCG_COND_LT) -TRANS(vslti_h, LSX, do_cmpi, MO_16, TCG_COND_LT) -TRANS(vslti_w, LSX, do_cmpi, MO_32, TCG_COND_LT) -TRANS(vslti_d, LSX, do_cmpi, MO_64, TCG_COND_LT) -TRANS(vslt_bu, LSX, do_cmp, MO_8, TCG_COND_LTU) -TRANS(vslt_hu, LSX, do_cmp, MO_16, TCG_COND_LTU) -TRANS(vslt_wu, LSX, do_cmp, MO_32, TCG_COND_LTU) -TRANS(vslt_du, LSX, do_cmp, MO_64, TCG_COND_LTU) -TRANS(vslti_bu, LSX, do_cmpi, MO_8, TCG_COND_LTU) -TRANS(vslti_hu, LSX, do_cmpi, MO_16, TCG_COND_LTU) -TRANS(vslti_wu, LSX, do_cmpi, MO_32, TCG_COND_LTU) -TRANS(vslti_du, LSX, do_cmpi, MO_64, TCG_COND_LTU) -TRANS(xvslt_b, LASX, do_xcmp, MO_8, TCG_COND_LT) -TRANS(xvslt_h, LASX, do_xcmp, MO_16, TCG_COND_LT) -TRANS(xvslt_w, LASX, do_xcmp, MO_32, TCG_COND_LT) -TRANS(xvslt_d, LASX, do_xcmp, MO_64, TCG_COND_LT) -TRANS(xvslti_b, LASX, do_xcmpi, MO_8, TCG_COND_LT) -TRANS(xvslti_h, LASX, do_xcmpi, MO_16, TCG_COND_LT) -TRANS(xvslti_w, LASX, do_xcmpi, MO_32, TCG_COND_LT) -TRANS(xvslti_d, LASX, do_xcmpi, MO_64, TCG_COND_LT) -TRANS(xvslt_bu, LASX, do_xcmp, MO_8, TCG_COND_LTU) -TRANS(xvslt_hu, LASX, do_xcmp, MO_16, TCG_COND_LTU) -TRANS(xvslt_wu, LASX, do_xcmp, MO_32, TCG_COND_LTU) -TRANS(xvslt_du, LASX, do_xcmp, MO_64, TCG_COND_LTU) -TRANS(xvslti_bu, LASX, do_xcmpi, MO_8, TCG_COND_LTU) -TRANS(xvslti_hu, LASX, do_xcmpi, MO_16, TCG_COND_LTU) -TRANS(xvslti_wu, LASX, do_xcmpi, MO_32, TCG_COND_LTU) -TRANS(xvslti_du, LASX, do_xcmpi, MO_64, TCG_COND_LTU) - -static bool do_vfcmp_cond_s(DisasContext *ctx, arg_vvv_fcond *a, uint32_t sz) -{ - uint32_t flags; - void (*fn)(TCGv_env, TCGv_i32, TCGv_i32, TCGv_i32, TCGv_i32, TCGv_i32); - TCGv_i32 vd = tcg_constant_i32(a->vd); - TCGv_i32 vj = tcg_constant_i32(a->vj); - TCGv_i32 vk = tcg_constant_i32(a->vk); - TCGv_i32 oprsz = tcg_constant_i32(sz); - - if (!check_vec(ctx, sz)) { - return true; - } - - fn = (a->fcond & 1 ? gen_helper_vfcmp_s_s : gen_helper_vfcmp_c_s); - flags = get_fcmp_flags(a->fcond >> 1); - fn(tcg_env, oprsz, vd, vj, vk, tcg_constant_i32(flags)); - - return true; -} - -static bool do_vfcmp_cond_d(DisasContext *ctx, arg_vvv_fcond *a, uint32_t sz) -{ - uint32_t flags; - void (*fn)(TCGv_env, TCGv_i32, TCGv_i32, TCGv_i32, TCGv_i32, TCGv_i32); - TCGv_i32 vd = tcg_constant_i32(a->vd); - TCGv_i32 vj = tcg_constant_i32(a->vj); - TCGv_i32 vk = tcg_constant_i32(a->vk); - TCGv_i32 oprsz = tcg_constant_i32(sz); - - if (!check_vec(ctx, sz)) { - return true; - } - - fn = (a->fcond & 1 ? gen_helper_vfcmp_s_d : gen_helper_vfcmp_c_d); - flags = get_fcmp_flags(a->fcond >> 1); - fn(tcg_env, oprsz, vd, vj, vk, tcg_constant_i32(flags)); - - return true; -} - -TRANS(vfcmp_cond_s, LSX, do_vfcmp_cond_s, 16) -TRANS(vfcmp_cond_d, LSX, do_vfcmp_cond_d, 16) -TRANS(xvfcmp_cond_s, LASX, do_vfcmp_cond_s, 32) -TRANS(xvfcmp_cond_d, LASX, do_vfcmp_cond_d, 32) - -static bool do_vbitsel_v(DisasContext *ctx, arg_vvvv *a, uint32_t oprsz) -{ - if (!check_vec(ctx, oprsz)) { - return true; - } - - tcg_gen_gvec_bitsel(MO_64, vec_full_offset(a->vd), vec_full_offset(a->va), - vec_full_offset(a->vk), vec_full_offset(a->vj), - oprsz, ctx->vl / 8); - return true; -} - -TRANS(vbitsel_v, LSX, do_vbitsel_v, 16) -TRANS(xvbitsel_v, LASX, do_vbitsel_v, 32) - -static void gen_vbitseli(unsigned vece, TCGv_vec a, TCGv_vec b, int64_t imm) -{ - tcg_gen_bitsel_vec(vece, a, a, tcg_constant_vec_matching(a, vece, imm), b); -} - -static bool do_vbitseli_b(DisasContext *ctx, arg_vv_i *a, uint32_t oprsz) -{ - static const GVecGen2i op = { - .fniv = gen_vbitseli, - .fnoi = gen_helper_vbitseli_b, - .vece = MO_8, - .load_dest = true - }; - - if (!check_vec(ctx, oprsz)) { - return true; - } - - tcg_gen_gvec_2i(vec_full_offset(a->vd), vec_full_offset(a->vj), - oprsz, ctx->vl / 8, a->imm , &op); - return true; -} - -TRANS(vbitseli_b, LSX, do_vbitseli_b, 16) -TRANS(xvbitseli_b, LASX, do_vbitseli_b, 32) - -#define VSET(NAME, COND) \ -static bool trans_## NAME (DisasContext *ctx, arg_cv *a) \ -{ \ - TCGv_i64 t1, al, ah; \ - \ - al = tcg_temp_new_i64(); \ - ah = tcg_temp_new_i64(); \ - t1 = tcg_temp_new_i64(); \ - \ - get_vreg64(ah, a->vj, 1); \ - get_vreg64(al, a->vj, 0); \ - \ - if (!avail_LSX(ctx)) { \ - return false; \ - } \ - \ - if (!check_vec(ctx, 16)) { \ - return true; \ - } \ - \ - tcg_gen_or_i64(t1, al, ah); \ - tcg_gen_setcondi_i64(COND, t1, t1, 0); \ - tcg_gen_st8_tl(t1, tcg_env, offsetof(CPULoongArchState, cf[a->cd & 0x7])); \ - \ - return true; \ -} - -VSET(vseteqz_v, TCG_COND_EQ) -VSET(vsetnez_v, TCG_COND_NE) - -TRANS(vsetanyeqz_b, LSX, gen_cv, gen_helper_vsetanyeqz_b) -TRANS(vsetanyeqz_h, LSX, gen_cv, gen_helper_vsetanyeqz_h) -TRANS(vsetanyeqz_w, LSX, gen_cv, gen_helper_vsetanyeqz_w) -TRANS(vsetanyeqz_d, LSX, gen_cv, gen_helper_vsetanyeqz_d) -TRANS(vsetallnez_b, LSX, gen_cv, gen_helper_vsetallnez_b) -TRANS(vsetallnez_h, LSX, gen_cv, gen_helper_vsetallnez_h) -TRANS(vsetallnez_w, LSX, gen_cv, gen_helper_vsetallnez_w) -TRANS(vsetallnez_d, LSX, gen_cv, gen_helper_vsetallnez_d) - -#define XVSET(NAME, COND) \ -static bool trans_## NAME(DisasContext *ctx, arg_cv * a) \ -{ \ - TCGv_i64 t1, t2, d[4]; \ - \ - d[0] = tcg_temp_new_i64(); \ - d[1] = tcg_temp_new_i64(); \ - d[2] = tcg_temp_new_i64(); \ - d[3] = tcg_temp_new_i64(); \ - t1 = tcg_temp_new_i64(); \ - t2 = tcg_temp_new_i64(); \ - \ - get_vreg64(d[0], a->vj, 0); \ - get_vreg64(d[1], a->vj, 1); \ - get_vreg64(d[2], a->vj, 2); \ - get_vreg64(d[3], a->vj, 3); \ - \ - if (!avail_LASX(ctx)) { \ - return false; \ - } \ - \ - if (!check_vec(ctx, 32)) { \ - return true; \ - } \ - \ - tcg_gen_or_i64(t1, d[0], d[1]); \ - tcg_gen_or_i64(t2, d[2], d[3]); \ - tcg_gen_or_i64(t1, t2, t1); \ - tcg_gen_setcondi_i64(COND, t1, t1, 0); \ - tcg_gen_st8_tl(t1, tcg_env, offsetof(CPULoongArchState, cf[a->cd & 0x7])); \ - \ - return true; \ -} - -XVSET(xvseteqz_v, TCG_COND_EQ) -XVSET(xvsetnez_v, TCG_COND_NE) - -TRANS(xvsetanyeqz_b, LASX, gen_cx, gen_helper_vsetanyeqz_b) -TRANS(xvsetanyeqz_h, LASX, gen_cx, gen_helper_vsetanyeqz_h) -TRANS(xvsetanyeqz_w, LASX, gen_cx, gen_helper_vsetanyeqz_w) -TRANS(xvsetanyeqz_d, LASX, gen_cx, gen_helper_vsetanyeqz_d) -TRANS(xvsetallnez_b, LASX, gen_cx, gen_helper_vsetallnez_b) -TRANS(xvsetallnez_h, LASX, gen_cx, gen_helper_vsetallnez_h) -TRANS(xvsetallnez_w, LASX, gen_cx, gen_helper_vsetallnez_w) -TRANS(xvsetallnez_d, LASX, gen_cx, gen_helper_vsetallnez_d) - -static bool gen_g2v_vl(DisasContext *ctx, arg_vr_i *a, uint32_t oprsz, MemOp mop, - void (*func)(TCGv, TCGv_ptr, tcg_target_long)) -{ - TCGv src = gpr_src(ctx, a->rj, EXT_NONE); - - if (!check_vec(ctx, oprsz)) { - return true; - } - - func(src, tcg_env, vec_reg_offset(a->vd, a->imm, mop)); - - return true; -} - -static bool gen_g2v(DisasContext *ctx, arg_vr_i *a, MemOp mop, - void (*func)(TCGv, TCGv_ptr, tcg_target_long)) -{ - return gen_g2v_vl(ctx, a, 16, mop, func); -} - -static bool gen_g2x(DisasContext *ctx, arg_vr_i *a, MemOp mop, - void (*func)(TCGv, TCGv_ptr, tcg_target_long)) -{ - return gen_g2v_vl(ctx, a, 32, mop, func); -} - -TRANS(vinsgr2vr_b, LSX, gen_g2v, MO_8, tcg_gen_st8_i64) -TRANS(vinsgr2vr_h, LSX, gen_g2v, MO_16, tcg_gen_st16_i64) -TRANS(vinsgr2vr_w, LSX, gen_g2v, MO_32, tcg_gen_st32_i64) -TRANS(vinsgr2vr_d, LSX, gen_g2v, MO_64, tcg_gen_st_i64) -TRANS(xvinsgr2vr_w, LASX, gen_g2x, MO_32, tcg_gen_st32_i64) -TRANS(xvinsgr2vr_d, LASX, gen_g2x, MO_64, tcg_gen_st_i64) - -static bool gen_v2g_vl(DisasContext *ctx, arg_rv_i *a, uint32_t oprsz, MemOp mop, - void (*func)(TCGv, TCGv_ptr, tcg_target_long)) -{ - TCGv dst = gpr_dst(ctx, a->rd, EXT_NONE); - - if (!check_vec(ctx, oprsz)) { - return true; - } - - func(dst, tcg_env, vec_reg_offset(a->vj, a->imm, mop)); - - return true; -} - -static bool gen_v2g(DisasContext *ctx, arg_rv_i *a, MemOp mop, - void (*func)(TCGv, TCGv_ptr, tcg_target_long)) -{ - return gen_v2g_vl(ctx, a, 16, mop, func); -} - -static bool gen_x2g(DisasContext *ctx, arg_rv_i *a, MemOp mop, - void (*func)(TCGv, TCGv_ptr, tcg_target_long)) -{ - return gen_v2g_vl(ctx, a, 32, mop, func); -} - -TRANS(vpickve2gr_b, LSX, gen_v2g, MO_8, tcg_gen_ld8s_i64) -TRANS(vpickve2gr_h, LSX, gen_v2g, MO_16, tcg_gen_ld16s_i64) -TRANS(vpickve2gr_w, LSX, gen_v2g, MO_32, tcg_gen_ld32s_i64) -TRANS(vpickve2gr_d, LSX, gen_v2g, MO_64, tcg_gen_ld_i64) -TRANS(vpickve2gr_bu, LSX, gen_v2g, MO_8, tcg_gen_ld8u_i64) -TRANS(vpickve2gr_hu, LSX, gen_v2g, MO_16, tcg_gen_ld16u_i64) -TRANS(vpickve2gr_wu, LSX, gen_v2g, MO_32, tcg_gen_ld32u_i64) -TRANS(vpickve2gr_du, LSX, gen_v2g, MO_64, tcg_gen_ld_i64) -TRANS(xvpickve2gr_w, LASX, gen_x2g, MO_32, tcg_gen_ld32s_i64) -TRANS(xvpickve2gr_d, LASX, gen_x2g, MO_64, tcg_gen_ld_i64) -TRANS(xvpickve2gr_wu, LASX, gen_x2g, MO_32, tcg_gen_ld32u_i64) -TRANS(xvpickve2gr_du, LASX, gen_x2g, MO_64, tcg_gen_ld_i64) - -static bool gvec_dup_vl(DisasContext *ctx, arg_vr *a, - uint32_t oprsz, MemOp mop) -{ - TCGv src = gpr_src(ctx, a->rj, EXT_NONE); - - if (!check_vec(ctx, oprsz)) { - return true; - } - - tcg_gen_gvec_dup_i64(mop, vec_full_offset(a->vd), - oprsz, ctx->vl/8, src); - return true; -} - -static bool gvec_dup(DisasContext *ctx, arg_vr *a, MemOp mop) -{ - return gvec_dup_vl(ctx, a, 16, mop); -} - -static bool gvec_dupx(DisasContext *ctx, arg_vr *a, MemOp mop) -{ - return gvec_dup_vl(ctx, a, 32, mop); -} - -TRANS(vreplgr2vr_b, LSX, gvec_dup, MO_8) -TRANS(vreplgr2vr_h, LSX, gvec_dup, MO_16) -TRANS(vreplgr2vr_w, LSX, gvec_dup, MO_32) -TRANS(vreplgr2vr_d, LSX, gvec_dup, MO_64) -TRANS(xvreplgr2vr_b, LASX, gvec_dupx, MO_8) -TRANS(xvreplgr2vr_h, LASX, gvec_dupx, MO_16) -TRANS(xvreplgr2vr_w, LASX, gvec_dupx, MO_32) -TRANS(xvreplgr2vr_d, LASX, gvec_dupx, MO_64) - -static bool trans_vreplvei_b(DisasContext *ctx, arg_vv_i *a) -{ - if (!avail_LSX(ctx)) { - return false; - } - - if (!check_vec(ctx, 16)) { - return true; - } - - tcg_gen_gvec_dup_mem(MO_8,vec_full_offset(a->vd), - offsetof(CPULoongArchState, - fpr[a->vj].vreg.B((a->imm))), - 16, ctx->vl/8); - return true; -} - -static bool trans_vreplvei_h(DisasContext *ctx, arg_vv_i *a) -{ - if (!avail_LSX(ctx)) { - return false; - } - - if (!check_vec(ctx, 16)) { - return true; - } - - tcg_gen_gvec_dup_mem(MO_16, vec_full_offset(a->vd), - offsetof(CPULoongArchState, - fpr[a->vj].vreg.H((a->imm))), - 16, ctx->vl/8); - return true; -} -static bool trans_vreplvei_w(DisasContext *ctx, arg_vv_i *a) -{ - if (!avail_LSX(ctx)) { - return false; - } - - if (!check_vec(ctx, 16)) { - return true; - } - - tcg_gen_gvec_dup_mem(MO_32, vec_full_offset(a->vd), - offsetof(CPULoongArchState, - fpr[a->vj].vreg.W((a->imm))), - 16, ctx->vl/8); - return true; -} -static bool trans_vreplvei_d(DisasContext *ctx, arg_vv_i *a) -{ - if (!avail_LSX(ctx)) { - return false; - } - - if (!check_vec(ctx, 16)) { - return true; - } - - tcg_gen_gvec_dup_mem(MO_64, vec_full_offset(a->vd), - offsetof(CPULoongArchState, - fpr[a->vj].vreg.D((a->imm))), - 16, ctx->vl/8); - return true; -} - -static bool gen_vreplve_vl(DisasContext *ctx, arg_vvr *a, - uint32_t oprsz, int vece, int bit, - void (*func)(TCGv_i64, TCGv_ptr, tcg_target_long)) -{ - int i; - TCGv_i64 t0 = tcg_temp_new_i64(); - TCGv_ptr t1 = tcg_temp_new_ptr(); - TCGv_i64 t2 = tcg_temp_new_i64(); - - if (!check_vec(ctx, oprsz)) { - return true; - } - - tcg_gen_andi_i64(t0, gpr_src(ctx, a->rk, EXT_NONE), (LSX_LEN / bit) - 1); - tcg_gen_shli_i64(t0, t0, vece); - if (HOST_BIG_ENDIAN) { - tcg_gen_xori_i64(t0, t0, vece << ((LSX_LEN / bit) - 1)); - } - - tcg_gen_trunc_i64_ptr(t1, t0); - tcg_gen_add_ptr(t1, t1, tcg_env); - - for (i = 0; i < oprsz; i += 16) { - func(t2, t1, vec_full_offset(a->vj) + i); - tcg_gen_gvec_dup_i64(vece, vec_full_offset(a->vd) + i, 16, 16, t2); - } - - return true; -} - -static bool gen_vreplve(DisasContext *ctx, arg_vvr *a, int vece, int bit, - void (*func)(TCGv_i64, TCGv_ptr, tcg_target_long)) -{ - return gen_vreplve_vl(ctx, a, 16, vece, bit, func); -} - -static bool gen_xvreplve(DisasContext *ctx, arg_vvr *a, int vece, int bit, - void (*func)(TCGv_i64, TCGv_ptr, tcg_target_long)) -{ - return gen_vreplve_vl(ctx, a, 32, vece, bit, func); -} - -TRANS(vreplve_b, LSX, gen_vreplve, MO_8, 8, tcg_gen_ld8u_i64) -TRANS(vreplve_h, LSX, gen_vreplve, MO_16, 16, tcg_gen_ld16u_i64) -TRANS(vreplve_w, LSX, gen_vreplve, MO_32, 32, tcg_gen_ld32u_i64) -TRANS(vreplve_d, LSX, gen_vreplve, MO_64, 64, tcg_gen_ld_i64) -TRANS(xvreplve_b, LASX, gen_xvreplve, MO_8, 8, tcg_gen_ld8u_i64) -TRANS(xvreplve_h, LASX, gen_xvreplve, MO_16, 16, tcg_gen_ld16u_i64) -TRANS(xvreplve_w, LASX, gen_xvreplve, MO_32, 32, tcg_gen_ld32u_i64) -TRANS(xvreplve_d, LASX, gen_xvreplve, MO_64, 64, tcg_gen_ld_i64) - -static bool gen_xvrepl128(DisasContext *ctx, arg_vv_i *a, MemOp mop) -{ - int i; - - if (!check_vec(ctx, 32)) { - return true; - } - - for (i = 0; i < 32; i += 16) { - tcg_gen_gvec_dup_mem(mop, vec_full_offset(a->vd) + i, - vec_reg_offset(a->vj, a->imm, mop) + i, 16, 16); - - } - return true; -} - -TRANS(xvrepl128vei_b, LASX, gen_xvrepl128, MO_8) -TRANS(xvrepl128vei_h, LASX, gen_xvrepl128, MO_16) -TRANS(xvrepl128vei_w, LASX, gen_xvrepl128, MO_32) -TRANS(xvrepl128vei_d, LASX, gen_xvrepl128, MO_64) - -static bool gen_xvreplve0(DisasContext *ctx, arg_vv *a, MemOp mop) -{ - if (!check_vec(ctx, 32)) { - return true; - } - - tcg_gen_gvec_dup_mem(mop, vec_full_offset(a->vd), - vec_full_offset(a->vj), 32, 32); - return true; -} - -TRANS(xvreplve0_b, LASX, gen_xvreplve0, MO_8) -TRANS(xvreplve0_h, LASX, gen_xvreplve0, MO_16) -TRANS(xvreplve0_w, LASX, gen_xvreplve0, MO_32) -TRANS(xvreplve0_d, LASX, gen_xvreplve0, MO_64) -TRANS(xvreplve0_q, LASX, gen_xvreplve0, MO_128) - -TRANS(xvinsve0_w, LASX, gen_xx_i, gen_helper_xvinsve0_w) -TRANS(xvinsve0_d, LASX, gen_xx_i, gen_helper_xvinsve0_d) - -TRANS(xvpickve_w, LASX, gen_xx_i, gen_helper_xvpickve_w) -TRANS(xvpickve_d, LASX, gen_xx_i, gen_helper_xvpickve_d) - -static bool do_vbsll_v(DisasContext *ctx, arg_vv_i *a, uint32_t oprsz) -{ - int i, ofs; - - if (!check_vec(ctx, oprsz)) { - return true; - } - - for (i = 0; i < oprsz / 16; i++) { - TCGv desthigh = tcg_temp_new_i64(); - TCGv destlow = tcg_temp_new_i64(); - TCGv high = tcg_temp_new_i64(); - TCGv low = tcg_temp_new_i64(); - - get_vreg64(low, a->vj, 2 * i); - - ofs = ((a->imm) & 0xf) * 8; - if (ofs < 64) { - get_vreg64(high, a->vj, 2 * i + 1); - tcg_gen_extract2_i64(desthigh, low, high, 64 - ofs); - tcg_gen_shli_i64(destlow, low, ofs); - } else { - tcg_gen_shli_i64(desthigh, low, ofs - 64); - destlow = tcg_constant_i64(0); - } - set_vreg64(desthigh, a->vd, 2 * i + 1); - set_vreg64(destlow, a->vd, 2 * i); - } - - return true; -} - -static bool do_vbsrl_v(DisasContext *ctx, arg_vv_i *a, uint32_t oprsz) -{ - int i, ofs; - - if (!check_vec(ctx, 32)) { - return true; - } - - for (i = 0; i < oprsz / 16; i++) { - TCGv desthigh = tcg_temp_new_i64(); - TCGv destlow = tcg_temp_new_i64(); - TCGv high = tcg_temp_new_i64(); - TCGv low = tcg_temp_new_i64(); - get_vreg64(high, a->vj, 2 * i + 1); - - ofs = ((a->imm) & 0xf) * 8; - if (ofs < 64) { - get_vreg64(low, a->vj, 2 * i); - tcg_gen_extract2_i64(destlow, low, high, ofs); - tcg_gen_shri_i64(desthigh, high, ofs); - } else { - tcg_gen_shri_i64(destlow, high, ofs - 64); - desthigh = tcg_constant_i64(0); - } - set_vreg64(desthigh, a->vd, 2 * i + 1); - set_vreg64(destlow, a->vd, 2 * i); - } - - return true; -} - -TRANS(vbsll_v, LSX, do_vbsll_v, 16) -TRANS(vbsrl_v, LSX, do_vbsrl_v, 16) -TRANS(xvbsll_v, LASX, do_vbsll_v, 32) -TRANS(xvbsrl_v, LASX, do_vbsrl_v, 32) - -TRANS(vpackev_b, LSX, gen_vvv, gen_helper_vpackev_b) -TRANS(vpackev_h, LSX, gen_vvv, gen_helper_vpackev_h) -TRANS(vpackev_w, LSX, gen_vvv, gen_helper_vpackev_w) -TRANS(vpackev_d, LSX, gen_vvv, gen_helper_vpackev_d) -TRANS(vpackod_b, LSX, gen_vvv, gen_helper_vpackod_b) -TRANS(vpackod_h, LSX, gen_vvv, gen_helper_vpackod_h) -TRANS(vpackod_w, LSX, gen_vvv, gen_helper_vpackod_w) -TRANS(vpackod_d, LSX, gen_vvv, gen_helper_vpackod_d) -TRANS(xvpackev_b, LASX, gen_xxx, gen_helper_vpackev_b) -TRANS(xvpackev_h, LASX, gen_xxx, gen_helper_vpackev_h) -TRANS(xvpackev_w, LASX, gen_xxx, gen_helper_vpackev_w) -TRANS(xvpackev_d, LASX, gen_xxx, gen_helper_vpackev_d) -TRANS(xvpackod_b, LASX, gen_xxx, gen_helper_vpackod_b) -TRANS(xvpackod_h, LASX, gen_xxx, gen_helper_vpackod_h) -TRANS(xvpackod_w, LASX, gen_xxx, gen_helper_vpackod_w) -TRANS(xvpackod_d, LASX, gen_xxx, gen_helper_vpackod_d) - -TRANS(vpickev_b, LSX, gen_vvv, gen_helper_vpickev_b) -TRANS(vpickev_h, LSX, gen_vvv, gen_helper_vpickev_h) -TRANS(vpickev_w, LSX, gen_vvv, gen_helper_vpickev_w) -TRANS(vpickev_d, LSX, gen_vvv, gen_helper_vpickev_d) -TRANS(vpickod_b, LSX, gen_vvv, gen_helper_vpickod_b) -TRANS(vpickod_h, LSX, gen_vvv, gen_helper_vpickod_h) -TRANS(vpickod_w, LSX, gen_vvv, gen_helper_vpickod_w) -TRANS(vpickod_d, LSX, gen_vvv, gen_helper_vpickod_d) -TRANS(xvpickev_b, LASX, gen_xxx, gen_helper_vpickev_b) -TRANS(xvpickev_h, LASX, gen_xxx, gen_helper_vpickev_h) -TRANS(xvpickev_w, LASX, gen_xxx, gen_helper_vpickev_w) -TRANS(xvpickev_d, LASX, gen_xxx, gen_helper_vpickev_d) -TRANS(xvpickod_b, LASX, gen_xxx, gen_helper_vpickod_b) -TRANS(xvpickod_h, LASX, gen_xxx, gen_helper_vpickod_h) -TRANS(xvpickod_w, LASX, gen_xxx, gen_helper_vpickod_w) -TRANS(xvpickod_d, LASX, gen_xxx, gen_helper_vpickod_d) - -TRANS(vilvl_b, LSX, gen_vvv, gen_helper_vilvl_b) -TRANS(vilvl_h, LSX, gen_vvv, gen_helper_vilvl_h) -TRANS(vilvl_w, LSX, gen_vvv, gen_helper_vilvl_w) -TRANS(vilvl_d, LSX, gen_vvv, gen_helper_vilvl_d) -TRANS(vilvh_b, LSX, gen_vvv, gen_helper_vilvh_b) -TRANS(vilvh_h, LSX, gen_vvv, gen_helper_vilvh_h) -TRANS(vilvh_w, LSX, gen_vvv, gen_helper_vilvh_w) -TRANS(vilvh_d, LSX, gen_vvv, gen_helper_vilvh_d) -TRANS(xvilvl_b, LASX, gen_xxx, gen_helper_vilvl_b) -TRANS(xvilvl_h, LASX, gen_xxx, gen_helper_vilvl_h) -TRANS(xvilvl_w, LASX, gen_xxx, gen_helper_vilvl_w) -TRANS(xvilvl_d, LASX, gen_xxx, gen_helper_vilvl_d) -TRANS(xvilvh_b, LASX, gen_xxx, gen_helper_vilvh_b) -TRANS(xvilvh_h, LASX, gen_xxx, gen_helper_vilvh_h) -TRANS(xvilvh_w, LASX, gen_xxx, gen_helper_vilvh_w) -TRANS(xvilvh_d, LASX, gen_xxx, gen_helper_vilvh_d) - -TRANS(vshuf_b, LSX, gen_vvvv, gen_helper_vshuf_b) -TRANS(vshuf_h, LSX, gen_vvv, gen_helper_vshuf_h) -TRANS(vshuf_w, LSX, gen_vvv, gen_helper_vshuf_w) -TRANS(vshuf_d, LSX, gen_vvv, gen_helper_vshuf_d) -TRANS(xvshuf_b, LASX, gen_xxxx, gen_helper_vshuf_b) -TRANS(xvshuf_h, LASX, gen_xxx, gen_helper_vshuf_h) -TRANS(xvshuf_w, LASX, gen_xxx, gen_helper_vshuf_w) -TRANS(xvshuf_d, LASX, gen_xxx, gen_helper_vshuf_d) -TRANS(vshuf4i_b, LSX, gen_vv_i, gen_helper_vshuf4i_b) -TRANS(vshuf4i_h, LSX, gen_vv_i, gen_helper_vshuf4i_h) -TRANS(vshuf4i_w, LSX, gen_vv_i, gen_helper_vshuf4i_w) -TRANS(vshuf4i_d, LSX, gen_vv_i, gen_helper_vshuf4i_d) -TRANS(xvshuf4i_b, LASX, gen_xx_i, gen_helper_vshuf4i_b) -TRANS(xvshuf4i_h, LASX, gen_xx_i, gen_helper_vshuf4i_h) -TRANS(xvshuf4i_w, LASX, gen_xx_i, gen_helper_vshuf4i_w) -TRANS(xvshuf4i_d, LASX, gen_xx_i, gen_helper_vshuf4i_d) - -TRANS(xvperm_w, LASX, gen_xxx, gen_helper_vperm_w) -TRANS(vpermi_w, LSX, gen_vv_i, gen_helper_vpermi_w) -TRANS(xvpermi_w, LASX, gen_xx_i, gen_helper_vpermi_w) -TRANS(xvpermi_d, LASX, gen_xx_i, gen_helper_vpermi_d) -TRANS(xvpermi_q, LASX, gen_xx_i, gen_helper_vpermi_q) - -TRANS(vextrins_b, LSX, gen_vv_i, gen_helper_vextrins_b) -TRANS(vextrins_h, LSX, gen_vv_i, gen_helper_vextrins_h) -TRANS(vextrins_w, LSX, gen_vv_i, gen_helper_vextrins_w) -TRANS(vextrins_d, LSX, gen_vv_i, gen_helper_vextrins_d) -TRANS(xvextrins_b, LASX, gen_xx_i, gen_helper_vextrins_b) -TRANS(xvextrins_h, LASX, gen_xx_i, gen_helper_vextrins_h) -TRANS(xvextrins_w, LASX, gen_xx_i, gen_helper_vextrins_w) -TRANS(xvextrins_d, LASX, gen_xx_i, gen_helper_vextrins_d) - -static bool trans_vld(DisasContext *ctx, arg_vr_i *a) -{ - TCGv addr; - TCGv_i64 rl, rh; - TCGv_i128 val; - - if (!avail_LSX(ctx)) { - return false; - } - - if (!check_vec(ctx, 16)) { - return true; - } - - addr = gpr_src(ctx, a->rj, EXT_NONE); - val = tcg_temp_new_i128(); - rl = tcg_temp_new_i64(); - rh = tcg_temp_new_i64(); - - addr = make_address_i(ctx, addr, a->imm); - - tcg_gen_qemu_ld_i128(val, addr, ctx->mem_idx, MO_128 | MO_TE); - tcg_gen_extr_i128_i64(rl, rh, val); - set_vreg64(rh, a->vd, 1); - set_vreg64(rl, a->vd, 0); - - return true; -} - -static bool trans_vst(DisasContext *ctx, arg_vr_i *a) -{ - TCGv addr; - TCGv_i128 val; - TCGv_i64 ah, al; - - if (!avail_LSX(ctx)) { - return false; - } - - if (!check_vec(ctx, 16)) { - return true; - } - - addr = gpr_src(ctx, a->rj, EXT_NONE); - val = tcg_temp_new_i128(); - ah = tcg_temp_new_i64(); - al = tcg_temp_new_i64(); - - addr = make_address_i(ctx, addr, a->imm); - - get_vreg64(ah, a->vd, 1); - get_vreg64(al, a->vd, 0); - tcg_gen_concat_i64_i128(val, al, ah); - tcg_gen_qemu_st_i128(val, addr, ctx->mem_idx, MO_128 | MO_TE); - - return true; -} - -static bool trans_vldx(DisasContext *ctx, arg_vrr *a) -{ - TCGv addr, src1, src2; - TCGv_i64 rl, rh; - TCGv_i128 val; - - if (!avail_LSX(ctx)) { - return false; - } - - if (!check_vec(ctx, 16)) { - return true; - } - - src1 = gpr_src(ctx, a->rj, EXT_NONE); - src2 = gpr_src(ctx, a->rk, EXT_NONE); - val = tcg_temp_new_i128(); - rl = tcg_temp_new_i64(); - rh = tcg_temp_new_i64(); - - addr = make_address_x(ctx, src1, src2); - tcg_gen_qemu_ld_i128(val, addr, ctx->mem_idx, MO_128 | MO_TE); - tcg_gen_extr_i128_i64(rl, rh, val); - set_vreg64(rh, a->vd, 1); - set_vreg64(rl, a->vd, 0); - - return true; -} - -static bool trans_vstx(DisasContext *ctx, arg_vrr *a) -{ - TCGv addr, src1, src2; - TCGv_i64 ah, al; - TCGv_i128 val; - - if (!avail_LSX(ctx)) { - return false; - } - - if (!check_vec(ctx, 16)) { - return true; - } - - src1 = gpr_src(ctx, a->rj, EXT_NONE); - src2 = gpr_src(ctx, a->rk, EXT_NONE); - val = tcg_temp_new_i128(); - ah = tcg_temp_new_i64(); - al = tcg_temp_new_i64(); - - addr = make_address_x(ctx, src1, src2); - get_vreg64(ah, a->vd, 1); - get_vreg64(al, a->vd, 0); - tcg_gen_concat_i64_i128(val, al, ah); - tcg_gen_qemu_st_i128(val, addr, ctx->mem_idx, MO_128 | MO_TE); - - return true; -} - -static bool do_vldrepl_vl(DisasContext *ctx, arg_vr_i *a, - uint32_t oprsz, MemOp mop) -{ - TCGv addr; - TCGv_i64 val; - - if (!check_vec(ctx, oprsz)) { - return true; - } - - addr = gpr_src(ctx, a->rj, EXT_NONE); - val = tcg_temp_new_i64(); - - addr = make_address_i(ctx, addr, a->imm); - - tcg_gen_qemu_ld_i64(val, addr, ctx->mem_idx, mop); - tcg_gen_gvec_dup_i64(mop, vec_full_offset(a->vd), oprsz, ctx->vl / 8, val); - - return true; -} - -static bool do_vldrepl(DisasContext *ctx, arg_vr_i *a, MemOp mop) -{ - return do_vldrepl_vl(ctx, a, 16, mop); -} - -static bool do_xvldrepl(DisasContext *ctx, arg_vr_i *a, MemOp mop) -{ - return do_vldrepl_vl(ctx, a, 32, mop); -} - -TRANS(vldrepl_b, LSX, do_vldrepl, MO_8) -TRANS(vldrepl_h, LSX, do_vldrepl, MO_16) -TRANS(vldrepl_w, LSX, do_vldrepl, MO_32) -TRANS(vldrepl_d, LSX, do_vldrepl, MO_64) -TRANS(xvldrepl_b, LASX, do_xvldrepl, MO_8) -TRANS(xvldrepl_h, LASX, do_xvldrepl, MO_16) -TRANS(xvldrepl_w, LASX, do_xvldrepl, MO_32) -TRANS(xvldrepl_d, LASX, do_xvldrepl, MO_64) - -static bool do_vstelm_vl(DisasContext *ctx, - arg_vr_ii *a, uint32_t oprsz, MemOp mop) -{ - TCGv addr; - TCGv_i64 val; - - if (!check_vec(ctx, oprsz)) { - return true; - } - - addr = gpr_src(ctx, a->rj, EXT_NONE); - val = tcg_temp_new_i64(); - - addr = make_address_i(ctx, addr, a->imm); - tcg_gen_ld_i64(val, tcg_env, vec_reg_offset(a->vd, a->imm2, mop)); - tcg_gen_qemu_st_i64(val, addr, ctx->mem_idx, mop); - return true; -} - -static bool do_vstelm(DisasContext *ctx, arg_vr_ii *a, MemOp mop) -{ - return do_vstelm_vl(ctx, a, 16, mop); -} - -static bool do_xvstelm(DisasContext *ctx, arg_vr_ii *a, MemOp mop) -{ - return do_vstelm_vl(ctx, a, 32, mop); -} - -TRANS(vstelm_b, LSX, do_vstelm, MO_8) -TRANS(vstelm_h, LSX, do_vstelm, MO_16) -TRANS(vstelm_w, LSX, do_vstelm, MO_32) -TRANS(vstelm_d, LSX, do_vstelm, MO_64) -TRANS(xvstelm_b, LASX, do_xvstelm, MO_8) -TRANS(xvstelm_h, LASX, do_xvstelm, MO_16) -TRANS(xvstelm_w, LASX, do_xvstelm, MO_32) -TRANS(xvstelm_d, LASX, do_xvstelm, MO_64) - -static bool gen_lasx_memory(DisasContext *ctx, arg_vr_i *a, - void (*func)(DisasContext *, int, TCGv)) -{ - TCGv addr = gpr_src(ctx, a->rj, EXT_NONE); - TCGv temp = NULL; - - if (!check_vec(ctx, 32)) { - return true; - } - - if (a->imm) { - temp = tcg_temp_new(); - tcg_gen_addi_tl(temp, addr, a->imm); - addr = temp; - } - - func(ctx, a->vd, addr); - return true; -} - -static void gen_xvld(DisasContext *ctx, int vreg, TCGv addr) -{ - int i; - TCGv temp = tcg_temp_new(); - TCGv dest = tcg_temp_new(); - - tcg_gen_qemu_ld_i64(dest, addr, ctx->mem_idx, MO_TEUQ); - set_vreg64(dest, vreg, 0); - - for (i = 1; i < 4; i++) { - tcg_gen_addi_tl(temp, addr, 8 * i); - tcg_gen_qemu_ld_i64(dest, temp, ctx->mem_idx, MO_TEUQ); - set_vreg64(dest, vreg, i); - } -} - -static void gen_xvst(DisasContext * ctx, int vreg, TCGv addr) -{ - int i; - TCGv temp = tcg_temp_new(); - TCGv dest = tcg_temp_new(); - - get_vreg64(dest, vreg, 0); - tcg_gen_qemu_st_i64(dest, addr, ctx->mem_idx, MO_TEUQ); - - for (i = 1; i < 4; i++) { - tcg_gen_addi_tl(temp, addr, 8 * i); - get_vreg64(dest, vreg, i); - tcg_gen_qemu_st_i64(dest, temp, ctx->mem_idx, MO_TEUQ); - } -} - -TRANS(xvld, LASX, gen_lasx_memory, gen_xvld) -TRANS(xvst, LASX, gen_lasx_memory, gen_xvst) - -static bool gen_lasx_memoryx(DisasContext *ctx, arg_vrr *a, - void (*func)(DisasContext*, int, TCGv)) -{ - TCGv src1 = gpr_src(ctx, a->rj, EXT_NONE); - TCGv src2 = gpr_src(ctx, a->rk, EXT_NONE); - TCGv addr = tcg_temp_new(); - - if (!check_vec(ctx, 32)) { - return true; - } - - tcg_gen_add_tl(addr, src1, src2); - func(ctx, a->vd, addr); - - return true; -} - -TRANS(xvldx, LASX, gen_lasx_memoryx, gen_xvld) -TRANS(xvstx, LASX, gen_lasx_memoryx, gen_xvst) diff --git a/target/loongarch/iocsr_helper.c b/target/loongarch/iocsr_helper.c deleted file mode 100644 index 6cd01d5f09..0000000000 --- a/target/loongarch/iocsr_helper.c +++ /dev/null @@ -1,68 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-or-later */ -/* - * Copyright (c) 2021 Loongson Technology Corporation Limited - * - * Helpers for IOCSR reads/writes - */ - -#include "qemu/osdep.h" -#include "cpu.h" -#include "qemu/host-utils.h" -#include "exec/helper-proto.h" -#include "exec/exec-all.h" -#include "exec/cpu_ldst.h" - -#define GET_MEMTXATTRS(cas) \ - ((MemTxAttrs){.requester_id = env_cpu(cas)->cpu_index}) - -uint64_t helper_iocsrrd_b(CPULoongArchState *env, target_ulong r_addr) -{ - return address_space_ldub(&env->address_space_iocsr, r_addr, - GET_MEMTXATTRS(env), NULL); -} - -uint64_t helper_iocsrrd_h(CPULoongArchState *env, target_ulong r_addr) -{ - return address_space_lduw(&env->address_space_iocsr, r_addr, - GET_MEMTXATTRS(env), NULL); -} - -uint64_t helper_iocsrrd_w(CPULoongArchState *env, target_ulong r_addr) -{ - return address_space_ldl(&env->address_space_iocsr, r_addr, - GET_MEMTXATTRS(env), NULL); -} - -uint64_t helper_iocsrrd_d(CPULoongArchState *env, target_ulong r_addr) -{ - return address_space_ldq(&env->address_space_iocsr, r_addr, - GET_MEMTXATTRS(env), NULL); -} - -void helper_iocsrwr_b(CPULoongArchState *env, target_ulong w_addr, - target_ulong val) -{ - address_space_stb(&env->address_space_iocsr, w_addr, - val, GET_MEMTXATTRS(env), NULL); -} - -void helper_iocsrwr_h(CPULoongArchState *env, target_ulong w_addr, - target_ulong val) -{ - address_space_stw(&env->address_space_iocsr, w_addr, - val, GET_MEMTXATTRS(env), NULL); -} - -void helper_iocsrwr_w(CPULoongArchState *env, target_ulong w_addr, - target_ulong val) -{ - address_space_stl(&env->address_space_iocsr, w_addr, - val, GET_MEMTXATTRS(env), NULL); -} - -void helper_iocsrwr_d(CPULoongArchState *env, target_ulong w_addr, - target_ulong val) -{ - address_space_stq(&env->address_space_iocsr, w_addr, - val, GET_MEMTXATTRS(env), NULL); -} diff --git a/target/loongarch/meson.build b/target/loongarch/meson.build index b3a0fb12fb..e84e4c51f4 100644 --- a/target/loongarch/meson.build +++ b/target/loongarch/meson.build @@ -5,29 +5,16 @@ loongarch_ss.add(files( 'cpu.c', 'gdbstub.c', )) -loongarch_tcg_ss = ss.source_set() -loongarch_tcg_ss.add(gen) -loongarch_tcg_ss.add(files( - 'fpu_helper.c', - 'op_helper.c', - 'translate.c', - 'vec_helper.c', -)) -loongarch_tcg_ss.add(zlib) loongarch_system_ss = ss.source_set() loongarch_system_ss.add(files( 'loongarch-qmp-cmds.c', 'machine.c', - 'tlb_helper.c', - 'constant_timer.c', - 'csr_helper.c', - 'iocsr_helper.c', )) common_ss.add(when: 'CONFIG_LOONGARCH_DIS', if_true: [files('disas.c'), gen]) -loongarch_ss.add_all(when: 'CONFIG_TCG', if_true: [loongarch_tcg_ss]) +subdir('tcg') target_arch += {'loongarch': loongarch_ss} target_system_arch += {'loongarch': loongarch_system_ss} diff --git a/target/loongarch/op_helper.c b/target/loongarch/op_helper.c deleted file mode 100644 index fe79c62fa4..0000000000 --- a/target/loongarch/op_helper.c +++ /dev/null @@ -1,140 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-or-later */ -/* - * LoongArch emulation helpers for QEMU. - * - * Copyright (c) 2021 Loongson Technology Corporation Limited - */ - -#include "qemu/osdep.h" -#include "qemu/log.h" -#include "cpu.h" -#include "qemu/host-utils.h" -#include "exec/helper-proto.h" -#include "exec/exec-all.h" -#include "exec/cpu_ldst.h" -#include "internals.h" -#include "qemu/crc32c.h" -#include -#include "cpu-csr.h" - -/* Exceptions helpers */ -void helper_raise_exception(CPULoongArchState *env, uint32_t exception) -{ - do_raise_exception(env, exception, GETPC()); -} - -target_ulong helper_bitrev_w(target_ulong rj) -{ - return (int32_t)revbit32(rj); -} - -target_ulong helper_bitrev_d(target_ulong rj) -{ - return revbit64(rj); -} - -target_ulong helper_bitswap(target_ulong v) -{ - v = ((v >> 1) & (target_ulong)0x5555555555555555ULL) | - ((v & (target_ulong)0x5555555555555555ULL) << 1); - v = ((v >> 2) & (target_ulong)0x3333333333333333ULL) | - ((v & (target_ulong)0x3333333333333333ULL) << 2); - v = ((v >> 4) & (target_ulong)0x0F0F0F0F0F0F0F0FULL) | - ((v & (target_ulong)0x0F0F0F0F0F0F0F0FULL) << 4); - return v; -} - -/* loongarch assert op */ -void helper_asrtle_d(CPULoongArchState *env, target_ulong rj, target_ulong rk) -{ - if (rj > rk) { - env->CSR_BADV = rj; - do_raise_exception(env, EXCCODE_BCE, GETPC()); - } -} - -void helper_asrtgt_d(CPULoongArchState *env, target_ulong rj, target_ulong rk) -{ - if (rj <= rk) { - env->CSR_BADV = rj; - do_raise_exception(env, EXCCODE_BCE, GETPC()); - } -} - -target_ulong helper_crc32(target_ulong val, target_ulong m, uint64_t sz) -{ - uint8_t buf[8]; - target_ulong mask = ((sz * 8) == 64) ? -1ULL : ((1ULL << (sz * 8)) - 1); - - m &= mask; - stq_le_p(buf, m); - return (int32_t) (crc32(val ^ 0xffffffff, buf, sz) ^ 0xffffffff); -} - -target_ulong helper_crc32c(target_ulong val, target_ulong m, uint64_t sz) -{ - uint8_t buf[8]; - target_ulong mask = ((sz * 8) == 64) ? -1ULL : ((1ULL << (sz * 8)) - 1); - m &= mask; - stq_le_p(buf, m); - return (int32_t) (crc32c(val, buf, sz) ^ 0xffffffff); -} - -target_ulong helper_cpucfg(CPULoongArchState *env, target_ulong rj) -{ - return rj >= ARRAY_SIZE(env->cpucfg) ? 0 : env->cpucfg[rj]; -} - -uint64_t helper_rdtime_d(CPULoongArchState *env) -{ -#ifdef CONFIG_USER_ONLY - return cpu_get_host_ticks(); -#else - uint64_t plv; - LoongArchCPU *cpu = env_archcpu(env); - - plv = FIELD_EX64(env->CSR_CRMD, CSR_CRMD, PLV); - if (extract64(env->CSR_MISC, R_CSR_MISC_DRDTL_SHIFT + plv, 1)) { - do_raise_exception(env, EXCCODE_IPE, GETPC()); - } - - return cpu_loongarch_get_constant_timer_counter(cpu); -#endif -} - -#ifndef CONFIG_USER_ONLY -void helper_ertn(CPULoongArchState *env) -{ - uint64_t csr_pplv, csr_pie; - if (FIELD_EX64(env->CSR_TLBRERA, CSR_TLBRERA, ISTLBR)) { - csr_pplv = FIELD_EX64(env->CSR_TLBRPRMD, CSR_TLBRPRMD, PPLV); - csr_pie = FIELD_EX64(env->CSR_TLBRPRMD, CSR_TLBRPRMD, PIE); - - env->CSR_TLBRERA = FIELD_DP64(env->CSR_TLBRERA, CSR_TLBRERA, ISTLBR, 0); - env->CSR_CRMD = FIELD_DP64(env->CSR_CRMD, CSR_CRMD, DA, 0); - env->CSR_CRMD = FIELD_DP64(env->CSR_CRMD, CSR_CRMD, PG, 1); - set_pc(env, env->CSR_TLBRERA); - qemu_log_mask(CPU_LOG_INT, "%s: TLBRERA " TARGET_FMT_lx "\n", - __func__, env->CSR_TLBRERA); - } else { - csr_pplv = FIELD_EX64(env->CSR_PRMD, CSR_PRMD, PPLV); - csr_pie = FIELD_EX64(env->CSR_PRMD, CSR_PRMD, PIE); - - set_pc(env, env->CSR_ERA); - qemu_log_mask(CPU_LOG_INT, "%s: ERA " TARGET_FMT_lx "\n", - __func__, env->CSR_ERA); - } - env->CSR_CRMD = FIELD_DP64(env->CSR_CRMD, CSR_CRMD, PLV, csr_pplv); - env->CSR_CRMD = FIELD_DP64(env->CSR_CRMD, CSR_CRMD, IE, csr_pie); - - env->lladdr = 1; -} - -void helper_idle(CPULoongArchState *env) -{ - CPUState *cs = env_cpu(env); - - cs->halted = 1; - do_raise_exception(env, EXCP_HLT, 0); -} -#endif diff --git a/target/loongarch/tcg/constant_timer.c b/target/loongarch/tcg/constant_timer.c new file mode 100644 index 0000000000..1851f53fd6 --- /dev/null +++ b/target/loongarch/tcg/constant_timer.c @@ -0,0 +1,64 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +/* + * QEMU LoongArch constant timer support + * + * Copyright (c) 2021 Loongson Technology Corporation Limited + */ + +#include "qemu/osdep.h" +#include "qemu/timer.h" +#include "cpu.h" +#include "internals.h" +#include "cpu-csr.h" + +#define TIMER_PERIOD 10 /* 10 ns period for 100 MHz frequency */ +#define CONSTANT_TIMER_TICK_MASK 0xfffffffffffcUL +#define CONSTANT_TIMER_ENABLE 0x1UL + +uint64_t cpu_loongarch_get_constant_timer_counter(LoongArchCPU *cpu) +{ + return qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) / TIMER_PERIOD; +} + +uint64_t cpu_loongarch_get_constant_timer_ticks(LoongArchCPU *cpu) +{ + uint64_t now, expire; + + now = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL); + expire = timer_expire_time_ns(&cpu->timer); + + return (expire - now) / TIMER_PERIOD; +} + +void cpu_loongarch_store_constant_timer_config(LoongArchCPU *cpu, + uint64_t value) +{ + CPULoongArchState *env = &cpu->env; + uint64_t now, next; + + env->CSR_TCFG = value; + if (value & CONSTANT_TIMER_ENABLE) { + now = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL); + next = now + (value & CONSTANT_TIMER_TICK_MASK) * TIMER_PERIOD; + timer_mod(&cpu->timer, next); + } else { + timer_del(&cpu->timer); + } +} + +void loongarch_constant_timer_cb(void *opaque) +{ + LoongArchCPU *cpu = opaque; + CPULoongArchState *env = &cpu->env; + uint64_t now, next; + + if (FIELD_EX64(env->CSR_TCFG, CSR_TCFG, PERIODIC)) { + now = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL); + next = now + (env->CSR_TCFG & CONSTANT_TIMER_TICK_MASK) * TIMER_PERIOD; + timer_mod(&cpu->timer, next); + } else { + env->CSR_TCFG = FIELD_DP64(env->CSR_TCFG, CSR_TCFG, EN, 0); + } + + loongarch_cpu_set_irq(opaque, IRQ_TIMER, 1); +} diff --git a/target/loongarch/tcg/csr_helper.c b/target/loongarch/tcg/csr_helper.c new file mode 100644 index 0000000000..55341551a5 --- /dev/null +++ b/target/loongarch/tcg/csr_helper.c @@ -0,0 +1,97 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +/* + * LoongArch emulation helpers for CSRs + * + * Copyright (c) 2021 Loongson Technology Corporation Limited + */ + +#include "qemu/osdep.h" +#include "qemu/main-loop.h" +#include "cpu.h" +#include "internals.h" +#include "qemu/host-utils.h" +#include "exec/helper-proto.h" +#include "exec/exec-all.h" +#include "exec/cpu_ldst.h" +#include "hw/irq.h" +#include "cpu-csr.h" + +target_ulong helper_csrrd_pgd(CPULoongArchState *env) +{ + int64_t v; + + if (env->CSR_TLBRERA & 0x1) { + v = env->CSR_TLBRBADV; + } else { + v = env->CSR_BADV; + } + + if ((v >> 63) & 0x1) { + v = env->CSR_PGDH; + } else { + v = env->CSR_PGDL; + } + + return v; +} + +target_ulong helper_csrrd_cpuid(CPULoongArchState *env) +{ + LoongArchCPU *lac = env_archcpu(env); + + env->CSR_CPUID = CPU(lac)->cpu_index; + + return env->CSR_CPUID; +} + +target_ulong helper_csrrd_tval(CPULoongArchState *env) +{ + LoongArchCPU *cpu = env_archcpu(env); + + return cpu_loongarch_get_constant_timer_ticks(cpu); +} + +target_ulong helper_csrwr_estat(CPULoongArchState *env, target_ulong val) +{ + int64_t old_v = env->CSR_ESTAT; + + /* Only IS[1:0] can be written */ + env->CSR_ESTAT = deposit64(env->CSR_ESTAT, 0, 2, val); + + return old_v; +} + +target_ulong helper_csrwr_asid(CPULoongArchState *env, target_ulong val) +{ + int64_t old_v = env->CSR_ASID; + + /* Only ASID filed of CSR_ASID can be written */ + env->CSR_ASID = deposit64(env->CSR_ASID, 0, 10, val); + if (old_v != env->CSR_ASID) { + tlb_flush(env_cpu(env)); + } + return old_v; +} + +target_ulong helper_csrwr_tcfg(CPULoongArchState *env, target_ulong val) +{ + LoongArchCPU *cpu = env_archcpu(env); + int64_t old_v = env->CSR_TCFG; + + cpu_loongarch_store_constant_timer_config(cpu, val); + + return old_v; +} + +target_ulong helper_csrwr_ticlr(CPULoongArchState *env, target_ulong val) +{ + LoongArchCPU *cpu = env_archcpu(env); + int64_t old_v = 0; + + if (val & 0x1) { + qemu_mutex_lock_iothread(); + loongarch_cpu_set_irq(cpu, IRQ_TIMER, 0); + qemu_mutex_unlock_iothread(); + } + return old_v; +} diff --git a/target/loongarch/tcg/fpu_helper.c b/target/loongarch/tcg/fpu_helper.c new file mode 100644 index 0000000000..f6753c5875 --- /dev/null +++ b/target/loongarch/tcg/fpu_helper.c @@ -0,0 +1,879 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +/* + * LoongArch float point emulation helpers for QEMU + * + * Copyright (c) 2021 Loongson Technology Corporation Limited + */ + +#include "qemu/osdep.h" +#include "cpu.h" +#include "exec/helper-proto.h" +#include "exec/exec-all.h" +#include "exec/cpu_ldst.h" +#include "fpu/softfloat.h" +#include "internals.h" + +static inline uint64_t nanbox_s(float32 fp) +{ + return fp | MAKE_64BIT_MASK(32, 32); +} + +/* Convert loongarch rounding mode in fcsr0 to IEEE library */ +static const FloatRoundMode ieee_rm[4] = { + float_round_nearest_even, + float_round_to_zero, + float_round_up, + float_round_down +}; + +void restore_fp_status(CPULoongArchState *env) +{ + set_float_rounding_mode(ieee_rm[(env->fcsr0 >> FCSR0_RM) & 0x3], + &env->fp_status); + set_flush_to_zero(0, &env->fp_status); +} + +int ieee_ex_to_loongarch(int xcpt) +{ + int ret = 0; + if (xcpt & float_flag_invalid) { + ret |= FP_INVALID; + } + if (xcpt & float_flag_overflow) { + ret |= FP_OVERFLOW; + } + if (xcpt & float_flag_underflow) { + ret |= FP_UNDERFLOW; + } + if (xcpt & float_flag_divbyzero) { + ret |= FP_DIV0; + } + if (xcpt & float_flag_inexact) { + ret |= FP_INEXACT; + } + return ret; +} + +static void update_fcsr0_mask(CPULoongArchState *env, uintptr_t pc, int mask) +{ + int flags = get_float_exception_flags(&env->fp_status); + + set_float_exception_flags(0, &env->fp_status); + + flags &= ~mask; + + if (!flags) { + SET_FP_CAUSE(env->fcsr0, flags); + return; + } else { + flags = ieee_ex_to_loongarch(flags); + SET_FP_CAUSE(env->fcsr0, flags); + } + + if (GET_FP_ENABLES(env->fcsr0) & flags) { + do_raise_exception(env, EXCCODE_FPE, pc); + } else { + UPDATE_FP_FLAGS(env->fcsr0, flags); + } +} + +static void update_fcsr0(CPULoongArchState *env, uintptr_t pc) +{ + update_fcsr0_mask(env, pc, 0); +} + +uint64_t helper_fadd_s(CPULoongArchState *env, uint64_t fj, uint64_t fk) +{ + uint64_t fd; + + fd = nanbox_s(float32_add((uint32_t)fj, (uint32_t)fk, &env->fp_status)); + update_fcsr0(env, GETPC()); + return fd; +} + +uint64_t helper_fadd_d(CPULoongArchState *env, uint64_t fj, uint64_t fk) +{ + uint64_t fd; + + fd = float64_add(fj, fk, &env->fp_status); + update_fcsr0(env, GETPC()); + return fd; +} + +uint64_t helper_fsub_s(CPULoongArchState *env, uint64_t fj, uint64_t fk) +{ + uint64_t fd; + + fd = nanbox_s(float32_sub((uint32_t)fj, (uint32_t)fk, &env->fp_status)); + update_fcsr0(env, GETPC()); + return fd; +} + +uint64_t helper_fsub_d(CPULoongArchState *env, uint64_t fj, uint64_t fk) +{ + uint64_t fd; + + fd = float64_sub(fj, fk, &env->fp_status); + update_fcsr0(env, GETPC()); + return fd; +} + +uint64_t helper_fmul_s(CPULoongArchState *env, uint64_t fj, uint64_t fk) +{ + uint64_t fd; + + fd = nanbox_s(float32_mul((uint32_t)fj, (uint32_t)fk, &env->fp_status)); + update_fcsr0(env, GETPC()); + return fd; +} + +uint64_t helper_fmul_d(CPULoongArchState *env, uint64_t fj, uint64_t fk) +{ + uint64_t fd; + + fd = float64_mul(fj, fk, &env->fp_status); + update_fcsr0(env, GETPC()); + return fd; +} + +uint64_t helper_fdiv_s(CPULoongArchState *env, uint64_t fj, uint64_t fk) +{ + uint64_t fd; + + fd = nanbox_s(float32_div((uint32_t)fj, (uint32_t)fk, &env->fp_status)); + update_fcsr0(env, GETPC()); + return fd; +} + +uint64_t helper_fdiv_d(CPULoongArchState *env, uint64_t fj, uint64_t fk) +{ + uint64_t fd; + + fd = float64_div(fj, fk, &env->fp_status); + update_fcsr0(env, GETPC()); + return fd; +} + +uint64_t helper_fmax_s(CPULoongArchState *env, uint64_t fj, uint64_t fk) +{ + uint64_t fd; + + fd = nanbox_s(float32_maxnum((uint32_t)fj, (uint32_t)fk, &env->fp_status)); + update_fcsr0(env, GETPC()); + return fd; +} + +uint64_t helper_fmax_d(CPULoongArchState *env, uint64_t fj, uint64_t fk) +{ + uint64_t fd; + + fd = float64_maxnum(fj, fk, &env->fp_status); + update_fcsr0(env, GETPC()); + return fd; +} + +uint64_t helper_fmin_s(CPULoongArchState *env, uint64_t fj, uint64_t fk) +{ + uint64_t fd; + + fd = nanbox_s(float32_minnum((uint32_t)fj, (uint32_t)fk, &env->fp_status)); + update_fcsr0(env, GETPC()); + return fd; +} + +uint64_t helper_fmin_d(CPULoongArchState *env, uint64_t fj, uint64_t fk) +{ + uint64_t fd; + + fd = float64_minnum(fj, fk, &env->fp_status); + update_fcsr0(env, GETPC()); + return fd; +} + +uint64_t helper_fmaxa_s(CPULoongArchState *env, uint64_t fj, uint64_t fk) +{ + uint64_t fd; + + fd = nanbox_s(float32_maxnummag((uint32_t)fj, + (uint32_t)fk, &env->fp_status)); + update_fcsr0(env, GETPC()); + return fd; +} + +uint64_t helper_fmaxa_d(CPULoongArchState *env, uint64_t fj, uint64_t fk) +{ + uint64_t fd; + + fd = float64_maxnummag(fj, fk, &env->fp_status); + update_fcsr0(env, GETPC()); + return fd; +} + +uint64_t helper_fmina_s(CPULoongArchState *env, uint64_t fj, uint64_t fk) +{ + uint64_t fd; + + fd = nanbox_s(float32_minnummag((uint32_t)fj, + (uint32_t)fk, &env->fp_status)); + update_fcsr0(env, GETPC()); + return fd; +} + +uint64_t helper_fmina_d(CPULoongArchState *env, uint64_t fj, uint64_t fk) +{ + uint64_t fd; + + fd = float64_minnummag(fj, fk, &env->fp_status); + update_fcsr0(env, GETPC()); + return fd; +} + +uint64_t helper_fscaleb_s(CPULoongArchState *env, uint64_t fj, uint64_t fk) +{ + uint64_t fd; + int32_t n = (int32_t)fk; + + fd = nanbox_s(float32_scalbn((uint32_t)fj, + n > 0x200 ? 0x200 : + n < -0x200 ? -0x200 : n, + &env->fp_status)); + update_fcsr0(env, GETPC()); + return fd; +} + +uint64_t helper_fscaleb_d(CPULoongArchState *env, uint64_t fj, uint64_t fk) +{ + uint64_t fd; + int64_t n = (int64_t)fk; + + fd = float64_scalbn(fj, + n > 0x1000 ? 0x1000 : + n < -0x1000 ? -0x1000 : n, + &env->fp_status); + update_fcsr0(env, GETPC()); + return fd; +} + +uint64_t helper_fsqrt_s(CPULoongArchState *env, uint64_t fj) +{ + uint64_t fd; + + fd = nanbox_s(float32_sqrt((uint32_t)fj, &env->fp_status)); + update_fcsr0(env, GETPC()); + return fd; +} + +uint64_t helper_fsqrt_d(CPULoongArchState *env, uint64_t fj) +{ + uint64_t fd; + + fd = float64_sqrt(fj, &env->fp_status); + update_fcsr0(env, GETPC()); + return fd; +} + +uint64_t helper_frecip_s(CPULoongArchState *env, uint64_t fj) +{ + uint64_t fd; + + fd = nanbox_s(float32_div(float32_one, (uint32_t)fj, &env->fp_status)); + update_fcsr0(env, GETPC()); + return fd; +} + +uint64_t helper_frecip_d(CPULoongArchState *env, uint64_t fj) +{ + uint64_t fd; + + fd = float64_div(float64_one, fj, &env->fp_status); + update_fcsr0(env, GETPC()); + return fd; +} + +uint64_t helper_frsqrt_s(CPULoongArchState *env, uint64_t fj) +{ + uint64_t fd; + uint32_t fp; + + fp = float32_sqrt((uint32_t)fj, &env->fp_status); + fd = nanbox_s(float32_div(float32_one, fp, &env->fp_status)); + update_fcsr0(env, GETPC()); + return fd; +} + +uint64_t helper_frsqrt_d(CPULoongArchState *env, uint64_t fj) +{ + uint64_t fp, fd; + + fp = float64_sqrt(fj, &env->fp_status); + fd = float64_div(float64_one, fp, &env->fp_status); + update_fcsr0(env, GETPC()); + return fd; +} + +uint64_t helper_flogb_s(CPULoongArchState *env, uint64_t fj) +{ + uint64_t fd; + uint32_t fp; + float_status *status = &env->fp_status; + FloatRoundMode old_mode = get_float_rounding_mode(status); + + set_float_rounding_mode(float_round_down, status); + fp = float32_log2((uint32_t)fj, status); + fd = nanbox_s(float32_round_to_int(fp, status)); + set_float_rounding_mode(old_mode, status); + update_fcsr0_mask(env, GETPC(), float_flag_inexact); + return fd; +} + +uint64_t helper_flogb_d(CPULoongArchState *env, uint64_t fj) +{ + uint64_t fd; + float_status *status = &env->fp_status; + FloatRoundMode old_mode = get_float_rounding_mode(status); + + set_float_rounding_mode(float_round_down, status); + fd = float64_log2(fj, status); + fd = float64_round_to_int(fd, status); + set_float_rounding_mode(old_mode, status); + update_fcsr0_mask(env, GETPC(), float_flag_inexact); + return fd; +} + +uint64_t helper_fclass_s(CPULoongArchState *env, uint64_t fj) +{ + float32 f = fj; + bool sign = float32_is_neg(f); + + if (float32_is_infinity(f)) { + return sign ? 1 << 2 : 1 << 6; + } else if (float32_is_zero(f)) { + return sign ? 1 << 5 : 1 << 9; + } else if (float32_is_zero_or_denormal(f)) { + return sign ? 1 << 4 : 1 << 8; + } else if (float32_is_any_nan(f)) { + float_status s = { }; /* for snan_bit_is_one */ + return float32_is_quiet_nan(f, &s) ? 1 << 1 : 1 << 0; + } else { + return sign ? 1 << 3 : 1 << 7; + } +} + +uint64_t helper_fclass_d(CPULoongArchState *env, uint64_t fj) +{ + float64 f = fj; + bool sign = float64_is_neg(f); + + if (float64_is_infinity(f)) { + return sign ? 1 << 2 : 1 << 6; + } else if (float64_is_zero(f)) { + return sign ? 1 << 5 : 1 << 9; + } else if (float64_is_zero_or_denormal(f)) { + return sign ? 1 << 4 : 1 << 8; + } else if (float64_is_any_nan(f)) { + float_status s = { }; /* for snan_bit_is_one */ + return float64_is_quiet_nan(f, &s) ? 1 << 1 : 1 << 0; + } else { + return sign ? 1 << 3 : 1 << 7; + } +} + +uint64_t helper_fmuladd_s(CPULoongArchState *env, uint64_t fj, + uint64_t fk, uint64_t fa, uint32_t flag) +{ + uint64_t fd; + + fd = nanbox_s(float32_muladd((uint32_t)fj, (uint32_t)fk, + (uint32_t)fa, flag, &env->fp_status)); + update_fcsr0(env, GETPC()); + return fd; +} + +uint64_t helper_fmuladd_d(CPULoongArchState *env, uint64_t fj, + uint64_t fk, uint64_t fa, uint32_t flag) +{ + uint64_t fd; + + fd = float64_muladd(fj, fk, fa, flag, &env->fp_status); + update_fcsr0(env, GETPC()); + return fd; +} + +static uint64_t fcmp_common(CPULoongArchState *env, FloatRelation cmp, + uint32_t flags) +{ + bool ret; + + switch (cmp) { + case float_relation_less: + ret = (flags & FCMP_LT); + break; + case float_relation_equal: + ret = (flags & FCMP_EQ); + break; + case float_relation_greater: + ret = (flags & FCMP_GT); + break; + case float_relation_unordered: + ret = (flags & FCMP_UN); + break; + default: + g_assert_not_reached(); + } + update_fcsr0(env, GETPC()); + + return ret; +} + +/* fcmp_cXXX_s */ +uint64_t helper_fcmp_c_s(CPULoongArchState *env, uint64_t fj, + uint64_t fk, uint32_t flags) +{ + FloatRelation cmp = float32_compare_quiet((uint32_t)fj, + (uint32_t)fk, &env->fp_status); + return fcmp_common(env, cmp, flags); +} + +/* fcmp_sXXX_s */ +uint64_t helper_fcmp_s_s(CPULoongArchState *env, uint64_t fj, + uint64_t fk, uint32_t flags) +{ + FloatRelation cmp = float32_compare((uint32_t)fj, + (uint32_t)fk, &env->fp_status); + return fcmp_common(env, cmp, flags); +} + +/* fcmp_cXXX_d */ +uint64_t helper_fcmp_c_d(CPULoongArchState *env, uint64_t fj, + uint64_t fk, uint32_t flags) +{ + FloatRelation cmp = float64_compare_quiet(fj, fk, &env->fp_status); + return fcmp_common(env, cmp, flags); +} + +/* fcmp_sXXX_d */ +uint64_t helper_fcmp_s_d(CPULoongArchState *env, uint64_t fj, + uint64_t fk, uint32_t flags) +{ + FloatRelation cmp = float64_compare(fj, fk, &env->fp_status); + return fcmp_common(env, cmp, flags); +} + +/* floating point conversion */ +uint64_t helper_fcvt_s_d(CPULoongArchState *env, uint64_t fj) +{ + uint64_t fd; + + fd = nanbox_s(float64_to_float32(fj, &env->fp_status)); + update_fcsr0(env, GETPC()); + return fd; +} + +uint64_t helper_fcvt_d_s(CPULoongArchState *env, uint64_t fj) +{ + uint64_t fd; + + fd = float32_to_float64((uint32_t)fj, &env->fp_status); + update_fcsr0(env, GETPC()); + return fd; +} + +uint64_t helper_ffint_s_w(CPULoongArchState *env, uint64_t fj) +{ + uint64_t fd; + + fd = nanbox_s(int32_to_float32((int32_t)fj, &env->fp_status)); + update_fcsr0(env, GETPC()); + return fd; +} + +uint64_t helper_ffint_s_l(CPULoongArchState *env, uint64_t fj) +{ + uint64_t fd; + + fd = nanbox_s(int64_to_float32(fj, &env->fp_status)); + update_fcsr0(env, GETPC()); + return fd; +} + +uint64_t helper_ffint_d_w(CPULoongArchState *env, uint64_t fj) +{ + uint64_t fd; + + fd = int32_to_float64((int32_t)fj, &env->fp_status); + update_fcsr0(env, GETPC()); + return fd; +} + +uint64_t helper_ffint_d_l(CPULoongArchState *env, uint64_t fj) +{ + uint64_t fd; + + fd = int64_to_float64(fj, &env->fp_status); + update_fcsr0(env, GETPC()); + return fd; +} + +uint64_t helper_frint_s(CPULoongArchState *env, uint64_t fj) +{ + uint64_t fd; + + fd = (uint64_t)(float32_round_to_int((uint32_t)fj, &env->fp_status)); + update_fcsr0(env, GETPC()); + return fd; +} + +uint64_t helper_frint_d(CPULoongArchState *env, uint64_t fj) +{ + uint64_t fd; + + fd = float64_round_to_int(fj, &env->fp_status); + update_fcsr0(env, GETPC()); + return fd; +} + +uint64_t helper_ftintrm_l_d(CPULoongArchState *env, uint64_t fj) +{ + uint64_t fd; + FloatRoundMode old_mode = get_float_rounding_mode(&env->fp_status); + + set_float_rounding_mode(float_round_down, &env->fp_status); + fd = float64_to_int64(fj, &env->fp_status); + set_float_rounding_mode(old_mode, &env->fp_status); + + if (get_float_exception_flags(&env->fp_status) & (float_flag_invalid)) { + if (float64_is_any_nan(fj)) { + fd = 0; + } + } + update_fcsr0(env, GETPC()); + return fd; +} + +uint64_t helper_ftintrm_l_s(CPULoongArchState *env, uint64_t fj) +{ + uint64_t fd; + FloatRoundMode old_mode = get_float_rounding_mode(&env->fp_status); + + set_float_rounding_mode(float_round_down, &env->fp_status); + fd = float32_to_int64((uint32_t)fj, &env->fp_status); + set_float_rounding_mode(old_mode, &env->fp_status); + + if (get_float_exception_flags(&env->fp_status) & (float_flag_invalid)) { + if (float32_is_any_nan((uint32_t)fj)) { + fd = 0; + } + } + update_fcsr0(env, GETPC()); + return fd; +} + +uint64_t helper_ftintrm_w_d(CPULoongArchState *env, uint64_t fj) +{ + uint64_t fd; + FloatRoundMode old_mode = get_float_rounding_mode(&env->fp_status); + + set_float_rounding_mode(float_round_down, &env->fp_status); + fd = (uint64_t)float64_to_int32(fj, &env->fp_status); + set_float_rounding_mode(old_mode, &env->fp_status); + + if (get_float_exception_flags(&env->fp_status) & (float_flag_invalid)) { + if (float64_is_any_nan(fj)) { + fd = 0; + } + } + update_fcsr0(env, GETPC()); + return fd; +} + +uint64_t helper_ftintrm_w_s(CPULoongArchState *env, uint64_t fj) +{ + uint64_t fd; + FloatRoundMode old_mode = get_float_rounding_mode(&env->fp_status); + + set_float_rounding_mode(float_round_down, &env->fp_status); + fd = (uint64_t)float32_to_int32((uint32_t)fj, &env->fp_status); + set_float_rounding_mode(old_mode, &env->fp_status); + + if (get_float_exception_flags(&env->fp_status) & (float_flag_invalid)) { + if (float32_is_any_nan((uint32_t)fj)) { + fd = 0; + } + } + update_fcsr0(env, GETPC()); + return fd; +} + +uint64_t helper_ftintrp_l_d(CPULoongArchState *env, uint64_t fj) +{ + uint64_t fd; + FloatRoundMode old_mode = get_float_rounding_mode(&env->fp_status); + + set_float_rounding_mode(float_round_up, &env->fp_status); + fd = float64_to_int64(fj, &env->fp_status); + set_float_rounding_mode(old_mode, &env->fp_status); + + if (get_float_exception_flags(&env->fp_status) & (float_flag_invalid)) { + if (float64_is_any_nan(fj)) { + fd = 0; + } + } + update_fcsr0(env, GETPC()); + return fd; +} + +uint64_t helper_ftintrp_l_s(CPULoongArchState *env, uint64_t fj) +{ + uint64_t fd; + FloatRoundMode old_mode = get_float_rounding_mode(&env->fp_status); + + set_float_rounding_mode(float_round_up, &env->fp_status); + fd = float32_to_int64((uint32_t)fj, &env->fp_status); + set_float_rounding_mode(old_mode, &env->fp_status); + + if (get_float_exception_flags(&env->fp_status) & (float_flag_invalid)) { + if (float32_is_any_nan((uint32_t)fj)) { + fd = 0; + } + } + update_fcsr0(env, GETPC()); + return fd; +} + +uint64_t helper_ftintrp_w_d(CPULoongArchState *env, uint64_t fj) +{ + uint64_t fd; + FloatRoundMode old_mode = get_float_rounding_mode(&env->fp_status); + + set_float_rounding_mode(float_round_up, &env->fp_status); + fd = (uint64_t)float64_to_int32(fj, &env->fp_status); + set_float_rounding_mode(old_mode, &env->fp_status); + + if (get_float_exception_flags(&env->fp_status) & (float_flag_invalid)) { + if (float64_is_any_nan(fj)) { + fd = 0; + } + } + update_fcsr0(env, GETPC()); + return fd; +} + +uint64_t helper_ftintrp_w_s(CPULoongArchState *env, uint64_t fj) +{ + uint64_t fd; + FloatRoundMode old_mode = get_float_rounding_mode(&env->fp_status); + + set_float_rounding_mode(float_round_up, &env->fp_status); + fd = (uint64_t)float32_to_int32((uint32_t)fj, &env->fp_status); + set_float_rounding_mode(old_mode, &env->fp_status); + + if (get_float_exception_flags(&env->fp_status) & (float_flag_invalid)) { + if (float32_is_any_nan((uint32_t)fj)) { + fd = 0; + } + } + update_fcsr0(env, GETPC()); + return fd; +} + +uint64_t helper_ftintrz_l_d(CPULoongArchState *env, uint64_t fj) +{ + uint64_t fd; + FloatRoundMode old_mode = get_float_rounding_mode(&env->fp_status); + + fd = float64_to_int64_round_to_zero(fj, &env->fp_status); + set_float_rounding_mode(old_mode, &env->fp_status); + + if (get_float_exception_flags(&env->fp_status) & (float_flag_invalid)) { + if (float64_is_any_nan(fj)) { + fd = 0; + } + } + update_fcsr0(env, GETPC()); + return fd; +} + +uint64_t helper_ftintrz_l_s(CPULoongArchState *env, uint64_t fj) +{ + uint64_t fd; + FloatRoundMode old_mode = get_float_rounding_mode(&env->fp_status); + + fd = float32_to_int64_round_to_zero((uint32_t)fj, &env->fp_status); + set_float_rounding_mode(old_mode, &env->fp_status); + + if (get_float_exception_flags(&env->fp_status) & (float_flag_invalid)) { + if (float32_is_any_nan((uint32_t)fj)) { + fd = 0; + } + } + update_fcsr0(env, GETPC()); + return fd; +} + +uint64_t helper_ftintrz_w_d(CPULoongArchState *env, uint64_t fj) +{ + uint64_t fd; + FloatRoundMode old_mode = get_float_rounding_mode(&env->fp_status); + + fd = (uint64_t)float64_to_int32_round_to_zero(fj, &env->fp_status); + set_float_rounding_mode(old_mode, &env->fp_status); + + if (get_float_exception_flags(&env->fp_status) & (float_flag_invalid)) { + if (float64_is_any_nan(fj)) { + fd = 0; + } + } + update_fcsr0(env, GETPC()); + return fd; +} + +uint64_t helper_ftintrz_w_s(CPULoongArchState *env, uint64_t fj) +{ + uint32_t fd; + FloatRoundMode old_mode = get_float_rounding_mode(&env->fp_status); + + fd = float32_to_int32_round_to_zero((uint32_t)fj, &env->fp_status); + set_float_rounding_mode(old_mode, &env->fp_status); + + if (get_float_exception_flags(&env->fp_status) & (float_flag_invalid)) { + if (float32_is_any_nan((uint32_t)fj)) { + fd = 0; + } + } + update_fcsr0(env, GETPC()); + return (uint64_t)fd; +} + +uint64_t helper_ftintrne_l_d(CPULoongArchState *env, uint64_t fj) +{ + uint64_t fd; + FloatRoundMode old_mode = get_float_rounding_mode(&env->fp_status); + + set_float_rounding_mode(float_round_nearest_even, &env->fp_status); + fd = float64_to_int64(fj, &env->fp_status); + set_float_rounding_mode(old_mode, &env->fp_status); + + if (get_float_exception_flags(&env->fp_status) & (float_flag_invalid)) { + if (float64_is_any_nan(fj)) { + fd = 0; + } + } + update_fcsr0(env, GETPC()); + return fd; +} + +uint64_t helper_ftintrne_l_s(CPULoongArchState *env, uint64_t fj) +{ + uint64_t fd; + FloatRoundMode old_mode = get_float_rounding_mode(&env->fp_status); + + set_float_rounding_mode(float_round_nearest_even, &env->fp_status); + fd = float32_to_int64((uint32_t)fj, &env->fp_status); + set_float_rounding_mode(old_mode, &env->fp_status); + + if (get_float_exception_flags(&env->fp_status) & (float_flag_invalid)) { + if (float32_is_any_nan((uint32_t)fj)) { + fd = 0; + } + } + update_fcsr0(env, GETPC()); + return fd; +} + +uint64_t helper_ftintrne_w_d(CPULoongArchState *env, uint64_t fj) +{ + uint64_t fd; + FloatRoundMode old_mode = get_float_rounding_mode(&env->fp_status); + + set_float_rounding_mode(float_round_nearest_even, &env->fp_status); + fd = (uint64_t)float64_to_int32(fj, &env->fp_status); + set_float_rounding_mode(old_mode, &env->fp_status); + + if (get_float_exception_flags(&env->fp_status) & (float_flag_invalid)) { + if (float64_is_any_nan(fj)) { + fd = 0; + } + } + update_fcsr0(env, GETPC()); + return fd; +} + +uint64_t helper_ftintrne_w_s(CPULoongArchState *env, uint64_t fj) +{ + uint32_t fd; + FloatRoundMode old_mode = get_float_rounding_mode(&env->fp_status); + + set_float_rounding_mode(float_round_nearest_even, &env->fp_status); + fd = float32_to_int32((uint32_t)fj, &env->fp_status); + set_float_rounding_mode(old_mode, &env->fp_status); + + if (get_float_exception_flags(&env->fp_status) & (float_flag_invalid)) { + if (float32_is_any_nan((uint32_t)fj)) { + fd = 0; + } + } + update_fcsr0(env, GETPC()); + return (uint64_t)fd; +} + +uint64_t helper_ftint_l_d(CPULoongArchState *env, uint64_t fj) +{ + uint64_t fd; + + fd = float64_to_int64(fj, &env->fp_status); + if (get_float_exception_flags(&env->fp_status) & (float_flag_invalid)) { + if (float64_is_any_nan(fj)) { + fd = 0; + } + } + update_fcsr0(env, GETPC()); + return fd; +} + +uint64_t helper_ftint_l_s(CPULoongArchState *env, uint64_t fj) +{ + uint64_t fd; + + fd = float32_to_int64((uint32_t)fj, &env->fp_status); + if (get_float_exception_flags(&env->fp_status) & (float_flag_invalid)) { + if (float32_is_any_nan((uint32_t)fj)) { + fd = 0; + } + } + update_fcsr0(env, GETPC()); + return fd; +} + +uint64_t helper_ftint_w_s(CPULoongArchState *env, uint64_t fj) +{ + uint64_t fd; + + fd = (uint64_t)float32_to_int32((uint32_t)fj, &env->fp_status); + if (get_float_exception_flags(&env->fp_status) & (float_flag_invalid)) { + if (float32_is_any_nan((uint32_t)fj)) { + fd = 0; + } + } + update_fcsr0(env, GETPC()); + return fd; +} + +uint64_t helper_ftint_w_d(CPULoongArchState *env, uint64_t fj) +{ + uint64_t fd; + + fd = (uint64_t)float64_to_int32(fj, &env->fp_status); + if (get_float_exception_flags(&env->fp_status) & (float_flag_invalid)) { + if (float64_is_any_nan(fj)) { + fd = 0; + } + } + update_fcsr0(env, GETPC()); + return fd; +} + +void helper_set_rounding_mode(CPULoongArchState *env) +{ + set_float_rounding_mode(ieee_rm[(env->fcsr0 >> FCSR0_RM) & 0x3], + &env->fp_status); +} diff --git a/target/loongarch/tcg/insn_trans/trans_arith.c.inc b/target/loongarch/tcg/insn_trans/trans_arith.c.inc new file mode 100644 index 0000000000..2be057e932 --- /dev/null +++ b/target/loongarch/tcg/insn_trans/trans_arith.c.inc @@ -0,0 +1,304 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +/* + * Copyright (c) 2021 Loongson Technology Corporation Limited + */ + +static bool gen_rrr(DisasContext *ctx, arg_rrr *a, + DisasExtend src1_ext, DisasExtend src2_ext, + DisasExtend dst_ext, void (*func)(TCGv, TCGv, TCGv)) +{ + TCGv dest = gpr_dst(ctx, a->rd, dst_ext); + TCGv src1 = gpr_src(ctx, a->rj, src1_ext); + TCGv src2 = gpr_src(ctx, a->rk, src2_ext); + + func(dest, src1, src2); + gen_set_gpr(a->rd, dest, dst_ext); + + return true; +} + +static bool gen_rri_v(DisasContext *ctx, arg_rr_i *a, + DisasExtend src_ext, DisasExtend dst_ext, + void (*func)(TCGv, TCGv, TCGv)) +{ + TCGv dest = gpr_dst(ctx, a->rd, dst_ext); + TCGv src1 = gpr_src(ctx, a->rj, src_ext); + TCGv src2 = tcg_constant_tl(a->imm); + + func(dest, src1, src2); + gen_set_gpr(a->rd, dest, dst_ext); + + return true; +} + +static bool gen_rri_c(DisasContext *ctx, arg_rr_i *a, + DisasExtend src_ext, DisasExtend dst_ext, + void (*func)(TCGv, TCGv, target_long)) +{ + TCGv dest = gpr_dst(ctx, a->rd, dst_ext); + TCGv src1 = gpr_src(ctx, a->rj, src_ext); + + func(dest, src1, a->imm); + gen_set_gpr(a->rd, dest, dst_ext); + + return true; +} + +static bool gen_rrr_sa(DisasContext *ctx, arg_rrr_sa *a, + DisasExtend src_ext, DisasExtend dst_ext, + void (*func)(TCGv, TCGv, TCGv, target_long)) +{ + TCGv dest = gpr_dst(ctx, a->rd, dst_ext); + TCGv src1 = gpr_src(ctx, a->rj, src_ext); + TCGv src2 = gpr_src(ctx, a->rk, src_ext); + + func(dest, src1, src2, a->sa); + gen_set_gpr(a->rd, dest, dst_ext); + + return true; +} + +static bool trans_lu12i_w(DisasContext *ctx, arg_lu12i_w *a) +{ + TCGv dest = gpr_dst(ctx, a->rd, EXT_NONE); + + tcg_gen_movi_tl(dest, a->imm << 12); + gen_set_gpr(a->rd, dest, EXT_NONE); + + return true; +} + +static bool gen_pc(DisasContext *ctx, arg_r_i *a, + target_ulong (*func)(target_ulong, int)) +{ + TCGv dest = gpr_dst(ctx, a->rd, EXT_NONE); + target_ulong addr = make_address_pc(ctx, func(ctx->base.pc_next, a->imm)); + + tcg_gen_movi_tl(dest, addr); + gen_set_gpr(a->rd, dest, EXT_NONE); + + return true; +} + +static void gen_slt(TCGv dest, TCGv src1, TCGv src2) +{ + tcg_gen_setcond_tl(TCG_COND_LT, dest, src1, src2); +} + +static void gen_sltu(TCGv dest, TCGv src1, TCGv src2) +{ + tcg_gen_setcond_tl(TCG_COND_LTU, dest, src1, src2); +} + +static void gen_mulh_w(TCGv dest, TCGv src1, TCGv src2) +{ + tcg_gen_mul_i64(dest, src1, src2); + tcg_gen_sari_i64(dest, dest, 32); +} + +static void gen_mulh_d(TCGv dest, TCGv src1, TCGv src2) +{ + TCGv discard = tcg_temp_new(); + tcg_gen_muls2_tl(discard, dest, src1, src2); +} + +static void gen_mulh_du(TCGv dest, TCGv src1, TCGv src2) +{ + TCGv discard = tcg_temp_new(); + tcg_gen_mulu2_tl(discard, dest, src1, src2); +} + +static void prep_divisor_d(TCGv ret, TCGv src1, TCGv src2) +{ + TCGv t0 = tcg_temp_new(); + TCGv t1 = tcg_temp_new(); + TCGv zero = tcg_constant_tl(0); + + /* + * If min / -1, set the divisor to 1. + * This avoids potential host overflow trap and produces min. + * If x / 0, set the divisor to 1. + * This avoids potential host overflow trap; + * the required result is undefined. + */ + tcg_gen_setcondi_tl(TCG_COND_EQ, ret, src1, INT64_MIN); + tcg_gen_setcondi_tl(TCG_COND_EQ, t0, src2, -1); + tcg_gen_setcondi_tl(TCG_COND_EQ, t1, src2, 0); + tcg_gen_and_tl(ret, ret, t0); + tcg_gen_or_tl(ret, ret, t1); + tcg_gen_movcond_tl(TCG_COND_NE, ret, ret, zero, ret, src2); +} + +static void prep_divisor_du(TCGv ret, TCGv src2) +{ + TCGv zero = tcg_constant_tl(0); + TCGv one = tcg_constant_tl(1); + + /* + * If x / 0, set the divisor to 1. + * This avoids potential host overflow trap; + * the required result is undefined. + */ + tcg_gen_movcond_tl(TCG_COND_EQ, ret, src2, zero, one, src2); +} + +static void gen_div_d(TCGv dest, TCGv src1, TCGv src2) +{ + TCGv t0 = tcg_temp_new(); + prep_divisor_d(t0, src1, src2); + tcg_gen_div_tl(dest, src1, t0); +} + +static void gen_rem_d(TCGv dest, TCGv src1, TCGv src2) +{ + TCGv t0 = tcg_temp_new(); + prep_divisor_d(t0, src1, src2); + tcg_gen_rem_tl(dest, src1, t0); +} + +static void gen_div_du(TCGv dest, TCGv src1, TCGv src2) +{ + TCGv t0 = tcg_temp_new(); + prep_divisor_du(t0, src2); + tcg_gen_divu_tl(dest, src1, t0); +} + +static void gen_rem_du(TCGv dest, TCGv src1, TCGv src2) +{ + TCGv t0 = tcg_temp_new(); + prep_divisor_du(t0, src2); + tcg_gen_remu_tl(dest, src1, t0); +} + +static void gen_div_w(TCGv dest, TCGv src1, TCGv src2) +{ + TCGv t0 = tcg_temp_new(); + /* We need not check for integer overflow for div_w. */ + prep_divisor_du(t0, src2); + tcg_gen_div_tl(dest, src1, t0); +} + +static void gen_rem_w(TCGv dest, TCGv src1, TCGv src2) +{ + TCGv t0 = tcg_temp_new(); + /* We need not check for integer overflow for rem_w. */ + prep_divisor_du(t0, src2); + tcg_gen_rem_tl(dest, src1, t0); +} + +static void gen_alsl(TCGv dest, TCGv src1, TCGv src2, target_long sa) +{ + TCGv t0 = tcg_temp_new(); + tcg_gen_shli_tl(t0, src1, sa); + tcg_gen_add_tl(dest, t0, src2); +} + +static bool trans_lu32i_d(DisasContext *ctx, arg_lu32i_d *a) +{ + TCGv dest = gpr_dst(ctx, a->rd, EXT_NONE); + TCGv src1 = gpr_src(ctx, a->rd, EXT_NONE); + TCGv src2 = tcg_constant_tl(a->imm); + + if (!avail_64(ctx)) { + return false; + } + + tcg_gen_deposit_tl(dest, src1, src2, 32, 32); + gen_set_gpr(a->rd, dest, EXT_NONE); + + return true; +} + +static bool trans_lu52i_d(DisasContext *ctx, arg_lu52i_d *a) +{ + TCGv dest = gpr_dst(ctx, a->rd, EXT_NONE); + TCGv src1 = gpr_src(ctx, a->rj, EXT_NONE); + TCGv src2 = tcg_constant_tl(a->imm); + + if (!avail_64(ctx)) { + return false; + } + + tcg_gen_deposit_tl(dest, src1, src2, 52, 12); + gen_set_gpr(a->rd, dest, EXT_NONE); + + return true; +} + +static target_ulong gen_pcaddi(target_ulong pc, int imm) +{ + return pc + (imm << 2); +} + +static target_ulong gen_pcalau12i(target_ulong pc, int imm) +{ + return (pc + (imm << 12)) & ~0xfff; +} + +static target_ulong gen_pcaddu12i(target_ulong pc, int imm) +{ + return pc + (imm << 12); +} + +static target_ulong gen_pcaddu18i(target_ulong pc, int imm) +{ + return pc + ((target_ulong)(imm) << 18); +} + +static bool trans_addu16i_d(DisasContext *ctx, arg_addu16i_d *a) +{ + TCGv dest = gpr_dst(ctx, a->rd, EXT_NONE); + TCGv src1 = gpr_src(ctx, a->rj, EXT_NONE); + + if (!avail_64(ctx)) { + return false; + } + + tcg_gen_addi_tl(dest, src1, a->imm << 16); + gen_set_gpr(a->rd, dest, EXT_NONE); + + return true; +} + +TRANS(add_w, ALL, gen_rrr, EXT_NONE, EXT_NONE, EXT_SIGN, tcg_gen_add_tl) +TRANS(add_d, 64, gen_rrr, EXT_NONE, EXT_NONE, EXT_NONE, tcg_gen_add_tl) +TRANS(sub_w, ALL, gen_rrr, EXT_NONE, EXT_NONE, EXT_SIGN, tcg_gen_sub_tl) +TRANS(sub_d, 64, gen_rrr, EXT_NONE, EXT_NONE, EXT_NONE, tcg_gen_sub_tl) +TRANS(and, ALL, gen_rrr, EXT_NONE, EXT_NONE, EXT_NONE, tcg_gen_and_tl) +TRANS(or, ALL, gen_rrr, EXT_NONE, EXT_NONE, EXT_NONE, tcg_gen_or_tl) +TRANS(xor, ALL, gen_rrr, EXT_NONE, EXT_NONE, EXT_NONE, tcg_gen_xor_tl) +TRANS(nor, ALL, gen_rrr, EXT_NONE, EXT_NONE, EXT_NONE, tcg_gen_nor_tl) +TRANS(andn, ALL, gen_rrr, EXT_NONE, EXT_NONE, EXT_NONE, tcg_gen_andc_tl) +TRANS(orn, ALL, gen_rrr, EXT_NONE, EXT_NONE, EXT_NONE, tcg_gen_orc_tl) +TRANS(slt, ALL, gen_rrr, EXT_NONE, EXT_NONE, EXT_NONE, gen_slt) +TRANS(sltu, ALL, gen_rrr, EXT_NONE, EXT_NONE, EXT_NONE, gen_sltu) +TRANS(mul_w, ALL, gen_rrr, EXT_SIGN, EXT_SIGN, EXT_SIGN, tcg_gen_mul_tl) +TRANS(mul_d, 64, gen_rrr, EXT_NONE, EXT_NONE, EXT_NONE, tcg_gen_mul_tl) +TRANS(mulh_w, ALL, gen_rrr, EXT_SIGN, EXT_SIGN, EXT_NONE, gen_mulh_w) +TRANS(mulh_wu, ALL, gen_rrr, EXT_ZERO, EXT_ZERO, EXT_NONE, gen_mulh_w) +TRANS(mulh_d, 64, gen_rrr, EXT_NONE, EXT_NONE, EXT_NONE, gen_mulh_d) +TRANS(mulh_du, 64, gen_rrr, EXT_NONE, EXT_NONE, EXT_NONE, gen_mulh_du) +TRANS(mulw_d_w, 64, gen_rrr, EXT_SIGN, EXT_SIGN, EXT_NONE, tcg_gen_mul_tl) +TRANS(mulw_d_wu, 64, gen_rrr, EXT_ZERO, EXT_ZERO, EXT_NONE, tcg_gen_mul_tl) +TRANS(div_w, ALL, gen_rrr, EXT_SIGN, EXT_SIGN, EXT_SIGN, gen_div_w) +TRANS(mod_w, ALL, gen_rrr, EXT_SIGN, EXT_SIGN, EXT_SIGN, gen_rem_w) +TRANS(div_wu, ALL, gen_rrr, EXT_ZERO, EXT_ZERO, EXT_SIGN, gen_div_du) +TRANS(mod_wu, ALL, gen_rrr, EXT_ZERO, EXT_ZERO, EXT_SIGN, gen_rem_du) +TRANS(div_d, 64, gen_rrr, EXT_NONE, EXT_NONE, EXT_NONE, gen_div_d) +TRANS(mod_d, 64, gen_rrr, EXT_NONE, EXT_NONE, EXT_NONE, gen_rem_d) +TRANS(div_du, 64, gen_rrr, EXT_NONE, EXT_NONE, EXT_NONE, gen_div_du) +TRANS(mod_du, 64, gen_rrr, EXT_NONE, EXT_NONE, EXT_NONE, gen_rem_du) +TRANS(slti, ALL, gen_rri_v, EXT_NONE, EXT_NONE, gen_slt) +TRANS(sltui, ALL, gen_rri_v, EXT_NONE, EXT_NONE, gen_sltu) +TRANS(addi_w, ALL, gen_rri_c, EXT_NONE, EXT_SIGN, tcg_gen_addi_tl) +TRANS(addi_d, 64, gen_rri_c, EXT_NONE, EXT_NONE, tcg_gen_addi_tl) +TRANS(alsl_w, ALL, gen_rrr_sa, EXT_NONE, EXT_SIGN, gen_alsl) +TRANS(alsl_wu, 64, gen_rrr_sa, EXT_NONE, EXT_ZERO, gen_alsl) +TRANS(alsl_d, 64, gen_rrr_sa, EXT_NONE, EXT_NONE, gen_alsl) +TRANS(pcaddi, ALL, gen_pc, gen_pcaddi) +TRANS(pcalau12i, ALL, gen_pc, gen_pcalau12i) +TRANS(pcaddu12i, ALL, gen_pc, gen_pcaddu12i) +TRANS(pcaddu18i, 64, gen_pc, gen_pcaddu18i) +TRANS(andi, ALL, gen_rri_c, EXT_NONE, EXT_NONE, tcg_gen_andi_tl) +TRANS(ori, ALL, gen_rri_c, EXT_NONE, EXT_NONE, tcg_gen_ori_tl) +TRANS(xori, ALL, gen_rri_c, EXT_NONE, EXT_NONE, tcg_gen_xori_tl) diff --git a/target/loongarch/tcg/insn_trans/trans_atomic.c.inc b/target/loongarch/tcg/insn_trans/trans_atomic.c.inc new file mode 100644 index 0000000000..80c2e286fd --- /dev/null +++ b/target/loongarch/tcg/insn_trans/trans_atomic.c.inc @@ -0,0 +1,111 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +/* + * Copyright (c) 2021 Loongson Technology Corporation Limited + */ + +static bool gen_ll(DisasContext *ctx, arg_rr_i *a, MemOp mop) +{ + TCGv dest = gpr_dst(ctx, a->rd, EXT_NONE); + TCGv src1 = gpr_src(ctx, a->rj, EXT_NONE); + TCGv t0 = make_address_i(ctx, src1, a->imm); + + tcg_gen_qemu_ld_i64(dest, t0, ctx->mem_idx, mop); + tcg_gen_st_tl(t0, tcg_env, offsetof(CPULoongArchState, lladdr)); + tcg_gen_st_tl(dest, tcg_env, offsetof(CPULoongArchState, llval)); + gen_set_gpr(a->rd, dest, EXT_NONE); + + return true; +} + +static bool gen_sc(DisasContext *ctx, arg_rr_i *a, MemOp mop) +{ + TCGv dest = gpr_dst(ctx, a->rd, EXT_NONE); + TCGv src1 = gpr_src(ctx, a->rj, EXT_NONE); + TCGv src2 = gpr_src(ctx, a->rd, EXT_NONE); + TCGv t0 = tcg_temp_new(); + TCGv val = tcg_temp_new(); + + TCGLabel *l1 = gen_new_label(); + TCGLabel *done = gen_new_label(); + + tcg_gen_addi_tl(t0, src1, a->imm); + tcg_gen_brcond_tl(TCG_COND_EQ, t0, cpu_lladdr, l1); + tcg_gen_movi_tl(dest, 0); + tcg_gen_br(done); + + gen_set_label(l1); + tcg_gen_mov_tl(val, src2); + /* generate cmpxchg */ + tcg_gen_atomic_cmpxchg_tl(t0, cpu_lladdr, cpu_llval, + val, ctx->mem_idx, mop); + tcg_gen_setcond_tl(TCG_COND_EQ, dest, t0, cpu_llval); + gen_set_label(done); + gen_set_gpr(a->rd, dest, EXT_NONE); + + return true; +} + +static bool gen_am(DisasContext *ctx, arg_rrr *a, + void (*func)(TCGv, TCGv, TCGv, TCGArg, MemOp), + MemOp mop) +{ + TCGv dest = gpr_dst(ctx, a->rd, EXT_NONE); + TCGv addr = gpr_src(ctx, a->rj, EXT_NONE); + TCGv val = gpr_src(ctx, a->rk, EXT_NONE); + + if (a->rd != 0 && (a->rj == a->rd || a->rk == a->rd)) { + qemu_log_mask(LOG_GUEST_ERROR, + "Warning: source register overlaps destination register" + "in atomic insn at pc=0x" TARGET_FMT_lx "\n", + ctx->base.pc_next - 4); + return false; + } + + addr = make_address_i(ctx, addr, 0); + + func(dest, addr, val, ctx->mem_idx, mop); + gen_set_gpr(a->rd, dest, EXT_NONE); + + return true; +} + +TRANS(ll_w, ALL, gen_ll, MO_TESL) +TRANS(sc_w, ALL, gen_sc, MO_TESL) +TRANS(ll_d, 64, gen_ll, MO_TEUQ) +TRANS(sc_d, 64, gen_sc, MO_TEUQ) +TRANS(amswap_w, LAM, gen_am, tcg_gen_atomic_xchg_tl, MO_TESL) +TRANS(amswap_d, LAM, gen_am, tcg_gen_atomic_xchg_tl, MO_TEUQ) +TRANS(amadd_w, LAM, gen_am, tcg_gen_atomic_fetch_add_tl, MO_TESL) +TRANS(amadd_d, LAM, gen_am, tcg_gen_atomic_fetch_add_tl, MO_TEUQ) +TRANS(amand_w, LAM, gen_am, tcg_gen_atomic_fetch_and_tl, MO_TESL) +TRANS(amand_d, LAM, gen_am, tcg_gen_atomic_fetch_and_tl, MO_TEUQ) +TRANS(amor_w, LAM, gen_am, tcg_gen_atomic_fetch_or_tl, MO_TESL) +TRANS(amor_d, LAM, gen_am, tcg_gen_atomic_fetch_or_tl, MO_TEUQ) +TRANS(amxor_w, LAM, gen_am, tcg_gen_atomic_fetch_xor_tl, MO_TESL) +TRANS(amxor_d, LAM, gen_am, tcg_gen_atomic_fetch_xor_tl, MO_TEUQ) +TRANS(ammax_w, LAM, gen_am, tcg_gen_atomic_fetch_smax_tl, MO_TESL) +TRANS(ammax_d, LAM, gen_am, tcg_gen_atomic_fetch_smax_tl, MO_TEUQ) +TRANS(ammin_w, LAM, gen_am, tcg_gen_atomic_fetch_smin_tl, MO_TESL) +TRANS(ammin_d, LAM, gen_am, tcg_gen_atomic_fetch_smin_tl, MO_TEUQ) +TRANS(ammax_wu, LAM, gen_am, tcg_gen_atomic_fetch_umax_tl, MO_TESL) +TRANS(ammax_du, LAM, gen_am, tcg_gen_atomic_fetch_umax_tl, MO_TEUQ) +TRANS(ammin_wu, LAM, gen_am, tcg_gen_atomic_fetch_umin_tl, MO_TESL) +TRANS(ammin_du, LAM, gen_am, tcg_gen_atomic_fetch_umin_tl, MO_TEUQ) +TRANS(amswap_db_w, LAM, gen_am, tcg_gen_atomic_xchg_tl, MO_TESL) +TRANS(amswap_db_d, LAM, gen_am, tcg_gen_atomic_xchg_tl, MO_TEUQ) +TRANS(amadd_db_w, LAM, gen_am, tcg_gen_atomic_fetch_add_tl, MO_TESL) +TRANS(amadd_db_d, LAM, gen_am, tcg_gen_atomic_fetch_add_tl, MO_TEUQ) +TRANS(amand_db_w, LAM, gen_am, tcg_gen_atomic_fetch_and_tl, MO_TESL) +TRANS(amand_db_d, LAM, gen_am, tcg_gen_atomic_fetch_and_tl, MO_TEUQ) +TRANS(amor_db_w, LAM, gen_am, tcg_gen_atomic_fetch_or_tl, MO_TESL) +TRANS(amor_db_d, LAM, gen_am, tcg_gen_atomic_fetch_or_tl, MO_TEUQ) +TRANS(amxor_db_w, LAM, gen_am, tcg_gen_atomic_fetch_xor_tl, MO_TESL) +TRANS(amxor_db_d, LAM, gen_am, tcg_gen_atomic_fetch_xor_tl, MO_TEUQ) +TRANS(ammax_db_w, LAM, gen_am, tcg_gen_atomic_fetch_smax_tl, MO_TESL) +TRANS(ammax_db_d, LAM, gen_am, tcg_gen_atomic_fetch_smax_tl, MO_TEUQ) +TRANS(ammin_db_w, LAM, gen_am, tcg_gen_atomic_fetch_smin_tl, MO_TESL) +TRANS(ammin_db_d, LAM, gen_am, tcg_gen_atomic_fetch_smin_tl, MO_TEUQ) +TRANS(ammax_db_wu, LAM, gen_am, tcg_gen_atomic_fetch_umax_tl, MO_TESL) +TRANS(ammax_db_du, LAM, gen_am, tcg_gen_atomic_fetch_umax_tl, MO_TEUQ) +TRANS(ammin_db_wu, LAM, gen_am, tcg_gen_atomic_fetch_umin_tl, MO_TESL) +TRANS(ammin_db_du, LAM, gen_am, tcg_gen_atomic_fetch_umin_tl, MO_TEUQ) diff --git a/target/loongarch/tcg/insn_trans/trans_bit.c.inc b/target/loongarch/tcg/insn_trans/trans_bit.c.inc new file mode 100644 index 0000000000..ee5fa003ce --- /dev/null +++ b/target/loongarch/tcg/insn_trans/trans_bit.c.inc @@ -0,0 +1,208 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +/* + * Copyright (c) 2021 Loongson Technology Corporation Limited + */ + +static bool gen_rr(DisasContext *ctx, arg_rr *a, + DisasExtend src_ext, DisasExtend dst_ext, + void (*func)(TCGv, TCGv)) +{ + TCGv dest = gpr_dst(ctx, a->rd, dst_ext); + TCGv src1 = gpr_src(ctx, a->rj, src_ext); + + func(dest, src1); + gen_set_gpr(a->rd, dest, dst_ext); + + return true; +} + +static void gen_bytepick_w(TCGv dest, TCGv src1, TCGv src2, target_long sa) +{ + tcg_gen_concat_tl_i64(dest, src1, src2); + tcg_gen_sextract_i64(dest, dest, (32 - sa * 8), 32); +} + +static void gen_bytepick_d(TCGv dest, TCGv src1, TCGv src2, target_long sa) +{ + tcg_gen_extract2_i64(dest, src1, src2, (64 - sa * 8)); +} + +static bool gen_bstrins(DisasContext *ctx, arg_rr_ms_ls *a, + DisasExtend dst_ext) +{ + TCGv src1 = gpr_src(ctx, a->rd, EXT_NONE); + TCGv src2 = gpr_src(ctx, a->rj, EXT_NONE); + TCGv dest = gpr_dst(ctx, a->rd, EXT_NONE); + + if (a->ls > a->ms) { + return false; + } + + tcg_gen_deposit_tl(dest, src1, src2, a->ls, a->ms - a->ls + 1); + gen_set_gpr(a->rd, dest, dst_ext); + return true; +} + +static bool gen_bstrpick(DisasContext *ctx, arg_rr_ms_ls *a, + DisasExtend dst_ext) +{ + TCGv dest = gpr_dst(ctx, a->rd, EXT_NONE); + TCGv src1 = gpr_src(ctx, a->rj, EXT_NONE); + + if (a->ls > a->ms) { + return false; + } + + tcg_gen_extract_tl(dest, src1, a->ls, a->ms - a->ls + 1); + gen_set_gpr(a->rd, dest, dst_ext); + return true; +} + +static void gen_clz_w(TCGv dest, TCGv src1) +{ + tcg_gen_clzi_tl(dest, src1, TARGET_LONG_BITS); + tcg_gen_subi_tl(dest, dest, TARGET_LONG_BITS - 32); +} + +static void gen_clo_w(TCGv dest, TCGv src1) +{ + tcg_gen_not_tl(dest, src1); + tcg_gen_ext32u_tl(dest, dest); + gen_clz_w(dest, dest); +} + +static void gen_ctz_w(TCGv dest, TCGv src1) +{ + tcg_gen_ori_tl(dest, src1, (target_ulong)MAKE_64BIT_MASK(32, 32)); + tcg_gen_ctzi_tl(dest, dest, TARGET_LONG_BITS); +} + +static void gen_cto_w(TCGv dest, TCGv src1) +{ + tcg_gen_not_tl(dest, src1); + gen_ctz_w(dest, dest); +} + +static void gen_clz_d(TCGv dest, TCGv src1) +{ + tcg_gen_clzi_i64(dest, src1, TARGET_LONG_BITS); +} + +static void gen_clo_d(TCGv dest, TCGv src1) +{ + tcg_gen_not_tl(dest, src1); + gen_clz_d(dest, dest); +} + +static void gen_ctz_d(TCGv dest, TCGv src1) +{ + tcg_gen_ctzi_tl(dest, src1, TARGET_LONG_BITS); +} + +static void gen_cto_d(TCGv dest, TCGv src1) +{ + tcg_gen_not_tl(dest, src1); + gen_ctz_d(dest, dest); +} + +static void gen_revb_2w(TCGv dest, TCGv src1) +{ + tcg_gen_bswap64_i64(dest, src1); + tcg_gen_rotri_i64(dest, dest, 32); +} + +static void gen_revb_2h(TCGv dest, TCGv src1) +{ + TCGv mask = tcg_constant_tl(0x00FF00FF); + TCGv t0 = tcg_temp_new(); + TCGv t1 = tcg_temp_new(); + + tcg_gen_shri_tl(t0, src1, 8); + tcg_gen_and_tl(t0, t0, mask); + tcg_gen_and_tl(t1, src1, mask); + tcg_gen_shli_tl(t1, t1, 8); + tcg_gen_or_tl(dest, t0, t1); +} + +static void gen_revb_4h(TCGv dest, TCGv src1) +{ + TCGv mask = tcg_constant_tl(0x00FF00FF00FF00FFULL); + TCGv t0 = tcg_temp_new(); + TCGv t1 = tcg_temp_new(); + + tcg_gen_shri_tl(t0, src1, 8); + tcg_gen_and_tl(t0, t0, mask); + tcg_gen_and_tl(t1, src1, mask); + tcg_gen_shli_tl(t1, t1, 8); + tcg_gen_or_tl(dest, t0, t1); +} + +static void gen_revh_2w(TCGv dest, TCGv src1) +{ + TCGv_i64 t0 = tcg_temp_new_i64(); + TCGv_i64 t1 = tcg_temp_new_i64(); + TCGv_i64 mask = tcg_constant_i64(0x0000ffff0000ffffull); + + tcg_gen_shri_i64(t0, src1, 16); + tcg_gen_and_i64(t1, src1, mask); + tcg_gen_and_i64(t0, t0, mask); + tcg_gen_shli_i64(t1, t1, 16); + tcg_gen_or_i64(dest, t1, t0); +} + +static void gen_revh_d(TCGv dest, TCGv src1) +{ + TCGv t0 = tcg_temp_new(); + TCGv t1 = tcg_temp_new(); + TCGv mask = tcg_constant_tl(0x0000FFFF0000FFFFULL); + + tcg_gen_shri_tl(t1, src1, 16); + tcg_gen_and_tl(t1, t1, mask); + tcg_gen_and_tl(t0, src1, mask); + tcg_gen_shli_tl(t0, t0, 16); + tcg_gen_or_tl(t0, t0, t1); + tcg_gen_rotri_tl(dest, t0, 32); +} + +static void gen_maskeqz(TCGv dest, TCGv src1, TCGv src2) +{ + TCGv zero = tcg_constant_tl(0); + + tcg_gen_movcond_tl(TCG_COND_EQ, dest, src2, zero, zero, src1); +} + +static void gen_masknez(TCGv dest, TCGv src1, TCGv src2) +{ + TCGv zero = tcg_constant_tl(0); + + tcg_gen_movcond_tl(TCG_COND_NE, dest, src2, zero, zero, src1); +} + +TRANS(ext_w_h, ALL, gen_rr, EXT_NONE, EXT_NONE, tcg_gen_ext16s_tl) +TRANS(ext_w_b, ALL, gen_rr, EXT_NONE, EXT_NONE, tcg_gen_ext8s_tl) +TRANS(clo_w, ALL, gen_rr, EXT_NONE, EXT_NONE, gen_clo_w) +TRANS(clz_w, ALL, gen_rr, EXT_ZERO, EXT_NONE, gen_clz_w) +TRANS(cto_w, ALL, gen_rr, EXT_NONE, EXT_NONE, gen_cto_w) +TRANS(ctz_w, ALL, gen_rr, EXT_NONE, EXT_NONE, gen_ctz_w) +TRANS(clo_d, 64, gen_rr, EXT_NONE, EXT_NONE, gen_clo_d) +TRANS(clz_d, 64, gen_rr, EXT_NONE, EXT_NONE, gen_clz_d) +TRANS(cto_d, 64, gen_rr, EXT_NONE, EXT_NONE, gen_cto_d) +TRANS(ctz_d, 64, gen_rr, EXT_NONE, EXT_NONE, gen_ctz_d) +TRANS(revb_2h, ALL, gen_rr, EXT_NONE, EXT_SIGN, gen_revb_2h) +TRANS(revb_4h, 64, gen_rr, EXT_NONE, EXT_NONE, gen_revb_4h) +TRANS(revb_2w, 64, gen_rr, EXT_NONE, EXT_NONE, gen_revb_2w) +TRANS(revb_d, 64, gen_rr, EXT_NONE, EXT_NONE, tcg_gen_bswap64_i64) +TRANS(revh_2w, 64, gen_rr, EXT_NONE, EXT_NONE, gen_revh_2w) +TRANS(revh_d, 64, gen_rr, EXT_NONE, EXT_NONE, gen_revh_d) +TRANS(bitrev_4b, ALL, gen_rr, EXT_ZERO, EXT_SIGN, gen_helper_bitswap) +TRANS(bitrev_8b, 64, gen_rr, EXT_NONE, EXT_NONE, gen_helper_bitswap) +TRANS(bitrev_w, ALL, gen_rr, EXT_NONE, EXT_SIGN, gen_helper_bitrev_w) +TRANS(bitrev_d, 64, gen_rr, EXT_NONE, EXT_NONE, gen_helper_bitrev_d) +TRANS(maskeqz, ALL, gen_rrr, EXT_NONE, EXT_NONE, EXT_NONE, gen_maskeqz) +TRANS(masknez, ALL, gen_rrr, EXT_NONE, EXT_NONE, EXT_NONE, gen_masknez) +TRANS(bytepick_w, ALL, gen_rrr_sa, EXT_NONE, EXT_NONE, gen_bytepick_w) +TRANS(bytepick_d, 64, gen_rrr_sa, EXT_NONE, EXT_NONE, gen_bytepick_d) +TRANS(bstrins_w, ALL, gen_bstrins, EXT_SIGN) +TRANS(bstrins_d, 64, gen_bstrins, EXT_NONE) +TRANS(bstrpick_w, ALL, gen_bstrpick, EXT_SIGN) +TRANS(bstrpick_d, 64, gen_bstrpick, EXT_NONE) diff --git a/target/loongarch/tcg/insn_trans/trans_branch.c.inc b/target/loongarch/tcg/insn_trans/trans_branch.c.inc new file mode 100644 index 0000000000..221e5159db --- /dev/null +++ b/target/loongarch/tcg/insn_trans/trans_branch.c.inc @@ -0,0 +1,84 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +/* + * Copyright (c) 2021 Loongson Technology Corporation Limited + */ + +static bool trans_b(DisasContext *ctx, arg_b *a) +{ + gen_goto_tb(ctx, 0, ctx->base.pc_next + a->offs); + ctx->base.is_jmp = DISAS_NORETURN; + return true; +} + +static bool trans_bl(DisasContext *ctx, arg_bl *a) +{ + tcg_gen_movi_tl(cpu_gpr[1], make_address_pc(ctx, ctx->base.pc_next + 4)); + gen_goto_tb(ctx, 0, ctx->base.pc_next + a->offs); + ctx->base.is_jmp = DISAS_NORETURN; + return true; +} + +static bool trans_jirl(DisasContext *ctx, arg_jirl *a) +{ + TCGv dest = gpr_dst(ctx, a->rd, EXT_NONE); + TCGv src1 = gpr_src(ctx, a->rj, EXT_NONE); + + TCGv addr = make_address_i(ctx, src1, a->imm); + tcg_gen_mov_tl(cpu_pc, addr); + tcg_gen_movi_tl(dest, make_address_pc(ctx, ctx->base.pc_next + 4)); + gen_set_gpr(a->rd, dest, EXT_NONE); + tcg_gen_lookup_and_goto_ptr(); + ctx->base.is_jmp = DISAS_NORETURN; + return true; +} + +static void gen_bc(DisasContext *ctx, TCGv src1, TCGv src2, + target_long offs, TCGCond cond) +{ + TCGLabel *l = gen_new_label(); + tcg_gen_brcond_tl(cond, src1, src2, l); + gen_goto_tb(ctx, 1, ctx->base.pc_next + 4); + gen_set_label(l); + gen_goto_tb(ctx, 0, ctx->base.pc_next + offs); + ctx->base.is_jmp = DISAS_NORETURN; +} + +static bool gen_rr_bc(DisasContext *ctx, arg_rr_offs *a, TCGCond cond) +{ + TCGv src1 = gpr_src(ctx, a->rj, EXT_NONE); + TCGv src2 = gpr_src(ctx, a->rd, EXT_NONE); + + gen_bc(ctx, src1, src2, a->offs, cond); + return true; +} + +static bool gen_rz_bc(DisasContext *ctx, arg_r_offs *a, TCGCond cond) +{ + TCGv src1 = gpr_src(ctx, a->rj, EXT_NONE); + TCGv src2 = tcg_constant_tl(0); + + gen_bc(ctx, src1, src2, a->offs, cond); + return true; +} + +static bool gen_cz_bc(DisasContext *ctx, arg_c_offs *a, TCGCond cond) +{ + TCGv src1 = tcg_temp_new(); + TCGv src2 = tcg_constant_tl(0); + + tcg_gen_ld8u_tl(src1, tcg_env, + offsetof(CPULoongArchState, cf[a->cj])); + gen_bc(ctx, src1, src2, a->offs, cond); + return true; +} + +TRANS(beq, ALL, gen_rr_bc, TCG_COND_EQ) +TRANS(bne, ALL, gen_rr_bc, TCG_COND_NE) +TRANS(blt, ALL, gen_rr_bc, TCG_COND_LT) +TRANS(bge, ALL, gen_rr_bc, TCG_COND_GE) +TRANS(bltu, ALL, gen_rr_bc, TCG_COND_LTU) +TRANS(bgeu, ALL, gen_rr_bc, TCG_COND_GEU) +TRANS(beqz, ALL, gen_rz_bc, TCG_COND_EQ) +TRANS(bnez, ALL, gen_rz_bc, TCG_COND_NE) +TRANS(bceqz, 64, gen_cz_bc, TCG_COND_EQ) +TRANS(bcnez, 64, gen_cz_bc, TCG_COND_NE) diff --git a/target/loongarch/tcg/insn_trans/trans_extra.c.inc b/target/loongarch/tcg/insn_trans/trans_extra.c.inc new file mode 100644 index 0000000000..cfa361fecf --- /dev/null +++ b/target/loongarch/tcg/insn_trans/trans_extra.c.inc @@ -0,0 +1,107 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +/* + * Copyright (c) 2021 Loongson Technology Corporation Limited + */ + +static bool trans_break(DisasContext *ctx, arg_break *a) +{ + generate_exception(ctx, EXCCODE_BRK); + return true; +} + +static bool trans_syscall(DisasContext *ctx, arg_syscall *a) +{ + generate_exception(ctx, EXCCODE_SYS); + return true; +} + +static bool trans_asrtle_d(DisasContext *ctx, arg_asrtle_d * a) +{ + TCGv src1 = gpr_src(ctx, a->rj, EXT_NONE); + TCGv src2 = gpr_src(ctx, a->rk, EXT_NONE); + + if (!avail_64(ctx)) { + return false; + } + + gen_helper_asrtle_d(tcg_env, src1, src2); + return true; +} + +static bool trans_asrtgt_d(DisasContext *ctx, arg_asrtgt_d * a) +{ + TCGv src1 = gpr_src(ctx, a->rj, EXT_NONE); + TCGv src2 = gpr_src(ctx, a->rk, EXT_NONE); + + if (!avail_64(ctx)) { + return false; + } + + gen_helper_asrtgt_d(tcg_env, src1, src2); + return true; +} + +static bool gen_rdtime(DisasContext *ctx, arg_rr *a, + bool word, bool high) +{ + TCGv dst1 = gpr_dst(ctx, a->rd, EXT_NONE); + TCGv dst2 = gpr_dst(ctx, a->rj, EXT_NONE); + + translator_io_start(&ctx->base); + gen_helper_rdtime_d(dst1, tcg_env); + if (word) { + tcg_gen_sextract_tl(dst1, dst1, high ? 32 : 0, 32); + } + tcg_gen_ld_i64(dst2, tcg_env, offsetof(CPULoongArchState, CSR_TID)); + + return true; +} + +static bool trans_rdtimel_w(DisasContext *ctx, arg_rdtimel_w *a) +{ + return gen_rdtime(ctx, a, 1, 0); +} + +static bool trans_rdtimeh_w(DisasContext *ctx, arg_rdtimeh_w *a) +{ + return gen_rdtime(ctx, a, 1, 1); +} + +static bool trans_rdtime_d(DisasContext *ctx, arg_rdtime_d *a) +{ + return gen_rdtime(ctx, a, 0, 0); +} + +static bool trans_cpucfg(DisasContext *ctx, arg_cpucfg *a) +{ + TCGv dest = gpr_dst(ctx, a->rd, EXT_NONE); + TCGv src1 = gpr_src(ctx, a->rj, EXT_NONE); + + gen_helper_cpucfg(dest, tcg_env, src1); + gen_set_gpr(a->rd, dest, EXT_NONE); + + return true; +} + +static bool gen_crc(DisasContext *ctx, arg_rrr *a, + void (*func)(TCGv, TCGv, TCGv, TCGv), + TCGv tsz) +{ + TCGv dest = gpr_dst(ctx, a->rd, EXT_SIGN); + TCGv src1 = gpr_src(ctx, a->rj, EXT_NONE); + TCGv src2 = gpr_src(ctx, a->rk, EXT_NONE); + + func(dest, src2, src1, tsz); + gen_set_gpr(a->rd, dest, EXT_SIGN); + + return true; +} + +TRANS(crc_w_b_w, 64, gen_crc, gen_helper_crc32, tcg_constant_tl(1)) +TRANS(crc_w_h_w, 64, gen_crc, gen_helper_crc32, tcg_constant_tl(2)) +TRANS(crc_w_w_w, 64, gen_crc, gen_helper_crc32, tcg_constant_tl(4)) +TRANS(crc_w_d_w, 64, gen_crc, gen_helper_crc32, tcg_constant_tl(8)) +TRANS(crcc_w_b_w, 64, gen_crc, gen_helper_crc32c, tcg_constant_tl(1)) +TRANS(crcc_w_h_w, 64, gen_crc, gen_helper_crc32c, tcg_constant_tl(2)) +TRANS(crcc_w_w_w, 64, gen_crc, gen_helper_crc32c, tcg_constant_tl(4)) +TRANS(crcc_w_d_w, 64, gen_crc, gen_helper_crc32c, tcg_constant_tl(8)) diff --git a/target/loongarch/tcg/insn_trans/trans_farith.c.inc b/target/loongarch/tcg/insn_trans/trans_farith.c.inc new file mode 100644 index 0000000000..f4a0dea727 --- /dev/null +++ b/target/loongarch/tcg/insn_trans/trans_farith.c.inc @@ -0,0 +1,207 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +/* + * Copyright (c) 2021 Loongson Technology Corporation Limited + */ + +#ifndef CONFIG_USER_ONLY +#define CHECK_FPE do { \ + if ((ctx->base.tb->flags & HW_FLAGS_EUEN_FPE) == 0) { \ + generate_exception(ctx, EXCCODE_FPD); \ + return true; \ + } \ +} while (0) +#else +#define CHECK_FPE +#endif + +static bool gen_fff(DisasContext *ctx, arg_fff *a, + void (*func)(TCGv, TCGv_env, TCGv, TCGv)) +{ + TCGv dest = get_fpr(ctx, a->fd); + TCGv src1 = get_fpr(ctx, a->fj); + TCGv src2 = get_fpr(ctx, a->fk); + + CHECK_FPE; + + func(dest, tcg_env, src1, src2); + set_fpr(a->fd, dest); + + return true; +} + +static bool gen_ff(DisasContext *ctx, arg_ff *a, + void (*func)(TCGv, TCGv_env, TCGv)) +{ + TCGv dest = get_fpr(ctx, a->fd); + TCGv src = get_fpr(ctx, a->fj); + + CHECK_FPE; + + func(dest, tcg_env, src); + set_fpr(a->fd, dest); + + return true; +} + +static bool gen_muladd(DisasContext *ctx, arg_ffff *a, + void (*func)(TCGv, TCGv_env, TCGv, TCGv, TCGv, TCGv_i32), + int flag) +{ + TCGv_i32 tflag = tcg_constant_i32(flag); + TCGv dest = get_fpr(ctx, a->fd); + TCGv src1 = get_fpr(ctx, a->fj); + TCGv src2 = get_fpr(ctx, a->fk); + TCGv src3 = get_fpr(ctx, a->fa); + + CHECK_FPE; + + func(dest, tcg_env, src1, src2, src3, tflag); + set_fpr(a->fd, dest); + + return true; +} + +static bool trans_fcopysign_s(DisasContext *ctx, arg_fcopysign_s *a) +{ + TCGv dest = get_fpr(ctx, a->fd); + TCGv src1 = get_fpr(ctx, a->fk); + TCGv src2 = get_fpr(ctx, a->fj); + + if (!avail_FP_SP(ctx)) { + return false; + } + + CHECK_FPE; + + tcg_gen_deposit_i64(dest, src1, src2, 0, 31); + set_fpr(a->fd, dest); + + return true; +} + +static bool trans_fcopysign_d(DisasContext *ctx, arg_fcopysign_d *a) +{ + TCGv dest = get_fpr(ctx, a->fd); + TCGv src1 = get_fpr(ctx, a->fk); + TCGv src2 = get_fpr(ctx, a->fj); + + if (!avail_FP_DP(ctx)) { + return false; + } + + CHECK_FPE; + + tcg_gen_deposit_i64(dest, src1, src2, 0, 63); + set_fpr(a->fd, dest); + + return true; +} + +static bool trans_fabs_s(DisasContext *ctx, arg_fabs_s *a) +{ + TCGv dest = get_fpr(ctx, a->fd); + TCGv src = get_fpr(ctx, a->fj); + + if (!avail_FP_SP(ctx)) { + return false; + } + + CHECK_FPE; + + tcg_gen_andi_i64(dest, src, MAKE_64BIT_MASK(0, 31)); + gen_nanbox_s(dest, dest); + set_fpr(a->fd, dest); + + return true; +} + +static bool trans_fabs_d(DisasContext *ctx, arg_fabs_d *a) +{ + TCGv dest = get_fpr(ctx, a->fd); + TCGv src = get_fpr(ctx, a->fj); + + if (!avail_FP_DP(ctx)) { + return false; + } + + CHECK_FPE; + + tcg_gen_andi_i64(dest, src, MAKE_64BIT_MASK(0, 63)); + set_fpr(a->fd, dest); + + return true; +} + +static bool trans_fneg_s(DisasContext *ctx, arg_fneg_s *a) +{ + TCGv dest = get_fpr(ctx, a->fd); + TCGv src = get_fpr(ctx, a->fj); + + if (!avail_FP_SP(ctx)) { + return false; + } + + CHECK_FPE; + + tcg_gen_xori_i64(dest, src, 0x80000000); + gen_nanbox_s(dest, dest); + set_fpr(a->fd, dest); + + return true; +} + +static bool trans_fneg_d(DisasContext *ctx, arg_fneg_d *a) +{ + TCGv dest = get_fpr(ctx, a->fd); + TCGv src = get_fpr(ctx, a->fj); + + if (!avail_FP_DP(ctx)) { + return false; + } + + CHECK_FPE; + + tcg_gen_xori_i64(dest, src, 0x8000000000000000LL); + set_fpr(a->fd, dest); + + return true; +} + +TRANS(fadd_s, FP_SP, gen_fff, gen_helper_fadd_s) +TRANS(fadd_d, FP_DP, gen_fff, gen_helper_fadd_d) +TRANS(fsub_s, FP_SP, gen_fff, gen_helper_fsub_s) +TRANS(fsub_d, FP_DP, gen_fff, gen_helper_fsub_d) +TRANS(fmul_s, FP_SP, gen_fff, gen_helper_fmul_s) +TRANS(fmul_d, FP_DP, gen_fff, gen_helper_fmul_d) +TRANS(fdiv_s, FP_SP, gen_fff, gen_helper_fdiv_s) +TRANS(fdiv_d, FP_DP, gen_fff, gen_helper_fdiv_d) +TRANS(fmax_s, FP_SP, gen_fff, gen_helper_fmax_s) +TRANS(fmax_d, FP_DP, gen_fff, gen_helper_fmax_d) +TRANS(fmin_s, FP_SP, gen_fff, gen_helper_fmin_s) +TRANS(fmin_d, FP_DP, gen_fff, gen_helper_fmin_d) +TRANS(fmaxa_s, FP_SP, gen_fff, gen_helper_fmaxa_s) +TRANS(fmaxa_d, FP_DP, gen_fff, gen_helper_fmaxa_d) +TRANS(fmina_s, FP_SP, gen_fff, gen_helper_fmina_s) +TRANS(fmina_d, FP_DP, gen_fff, gen_helper_fmina_d) +TRANS(fscaleb_s, FP_SP, gen_fff, gen_helper_fscaleb_s) +TRANS(fscaleb_d, FP_DP, gen_fff, gen_helper_fscaleb_d) +TRANS(fsqrt_s, FP_SP, gen_ff, gen_helper_fsqrt_s) +TRANS(fsqrt_d, FP_DP, gen_ff, gen_helper_fsqrt_d) +TRANS(frecip_s, FP_SP, gen_ff, gen_helper_frecip_s) +TRANS(frecip_d, FP_DP, gen_ff, gen_helper_frecip_d) +TRANS(frsqrt_s, FP_SP, gen_ff, gen_helper_frsqrt_s) +TRANS(frsqrt_d, FP_DP, gen_ff, gen_helper_frsqrt_d) +TRANS(flogb_s, FP_SP, gen_ff, gen_helper_flogb_s) +TRANS(flogb_d, FP_DP, gen_ff, gen_helper_flogb_d) +TRANS(fclass_s, FP_SP, gen_ff, gen_helper_fclass_s) +TRANS(fclass_d, FP_DP, gen_ff, gen_helper_fclass_d) +TRANS(fmadd_s, FP_SP, gen_muladd, gen_helper_fmuladd_s, 0) +TRANS(fmadd_d, FP_DP, gen_muladd, gen_helper_fmuladd_d, 0) +TRANS(fmsub_s, FP_SP, gen_muladd, gen_helper_fmuladd_s, float_muladd_negate_c) +TRANS(fmsub_d, FP_DP, gen_muladd, gen_helper_fmuladd_d, float_muladd_negate_c) +TRANS(fnmadd_s, FP_SP, gen_muladd, gen_helper_fmuladd_s, float_muladd_negate_result) +TRANS(fnmadd_d, FP_DP, gen_muladd, gen_helper_fmuladd_d, float_muladd_negate_result) +TRANS(fnmsub_s, FP_SP, gen_muladd, gen_helper_fmuladd_s, + float_muladd_negate_c | float_muladd_negate_result) +TRANS(fnmsub_d, FP_DP, gen_muladd, gen_helper_fmuladd_d, + float_muladd_negate_c | float_muladd_negate_result) diff --git a/target/loongarch/tcg/insn_trans/trans_fcmp.c.inc b/target/loongarch/tcg/insn_trans/trans_fcmp.c.inc new file mode 100644 index 0000000000..3babf69e4a --- /dev/null +++ b/target/loongarch/tcg/insn_trans/trans_fcmp.c.inc @@ -0,0 +1,72 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +/* + * Copyright (c) 2021 Loongson Technology Corporation Limited + */ + +/* bit0(signaling/quiet) bit1(lt) bit2(eq) bit3(un) bit4(neq) */ +static uint32_t get_fcmp_flags(int cond) +{ + uint32_t flags = 0; + + if (cond & 0x1) { + flags |= FCMP_LT; + } + if (cond & 0x2) { + flags |= FCMP_EQ; + } + if (cond & 0x4) { + flags |= FCMP_UN; + } + if (cond & 0x8) { + flags |= FCMP_GT | FCMP_LT; + } + return flags; +} + +static bool trans_fcmp_cond_s(DisasContext *ctx, arg_fcmp_cond_s *a) +{ + TCGv var, src1, src2; + uint32_t flags; + void (*fn)(TCGv, TCGv_env, TCGv, TCGv, TCGv_i32); + + if (!avail_FP_SP(ctx)) { + return false; + } + + CHECK_FPE; + + var = tcg_temp_new(); + src1 = get_fpr(ctx, a->fj); + src2 = get_fpr(ctx, a->fk); + fn = (a->fcond & 1 ? gen_helper_fcmp_s_s : gen_helper_fcmp_c_s); + flags = get_fcmp_flags(a->fcond >> 1); + + fn(var, tcg_env, src1, src2, tcg_constant_i32(flags)); + + tcg_gen_st8_tl(var, tcg_env, offsetof(CPULoongArchState, cf[a->cd])); + return true; +} + +static bool trans_fcmp_cond_d(DisasContext *ctx, arg_fcmp_cond_d *a) +{ + TCGv var, src1, src2; + uint32_t flags; + void (*fn)(TCGv, TCGv_env, TCGv, TCGv, TCGv_i32); + + if (!avail_FP_DP(ctx)) { + return false; + } + + CHECK_FPE; + + var = tcg_temp_new(); + src1 = get_fpr(ctx, a->fj); + src2 = get_fpr(ctx, a->fk); + fn = (a->fcond & 1 ? gen_helper_fcmp_s_d : gen_helper_fcmp_c_d); + flags = get_fcmp_flags(a->fcond >> 1); + + fn(var, tcg_env, src1, src2, tcg_constant_i32(flags)); + + tcg_gen_st8_tl(var, tcg_env, offsetof(CPULoongArchState, cf[a->cd])); + return true; +} diff --git a/target/loongarch/tcg/insn_trans/trans_fcnv.c.inc b/target/loongarch/tcg/insn_trans/trans_fcnv.c.inc new file mode 100644 index 0000000000..833c059d6d --- /dev/null +++ b/target/loongarch/tcg/insn_trans/trans_fcnv.c.inc @@ -0,0 +1,33 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +/* + * Copyright (c) 2021 Loongson Technology Corporation Limited + */ + +TRANS(fcvt_s_d, FP_DP, gen_ff, gen_helper_fcvt_s_d) +TRANS(fcvt_d_s, FP_DP, gen_ff, gen_helper_fcvt_d_s) +TRANS(ftintrm_w_s, FP_SP, gen_ff, gen_helper_ftintrm_w_s) +TRANS(ftintrm_w_d, FP_DP, gen_ff, gen_helper_ftintrm_w_d) +TRANS(ftintrm_l_s, FP_SP, gen_ff, gen_helper_ftintrm_l_s) +TRANS(ftintrm_l_d, FP_DP, gen_ff, gen_helper_ftintrm_l_d) +TRANS(ftintrp_w_s, FP_SP, gen_ff, gen_helper_ftintrp_w_s) +TRANS(ftintrp_w_d, FP_DP, gen_ff, gen_helper_ftintrp_w_d) +TRANS(ftintrp_l_s, FP_SP, gen_ff, gen_helper_ftintrp_l_s) +TRANS(ftintrp_l_d, FP_DP, gen_ff, gen_helper_ftintrp_l_d) +TRANS(ftintrz_w_s, FP_SP, gen_ff, gen_helper_ftintrz_w_s) +TRANS(ftintrz_w_d, FP_DP, gen_ff, gen_helper_ftintrz_w_d) +TRANS(ftintrz_l_s, FP_SP, gen_ff, gen_helper_ftintrz_l_s) +TRANS(ftintrz_l_d, FP_DP, gen_ff, gen_helper_ftintrz_l_d) +TRANS(ftintrne_w_s, FP_SP, gen_ff, gen_helper_ftintrne_w_s) +TRANS(ftintrne_w_d, FP_DP, gen_ff, gen_helper_ftintrne_w_d) +TRANS(ftintrne_l_s, FP_SP, gen_ff, gen_helper_ftintrne_l_s) +TRANS(ftintrne_l_d, FP_DP, gen_ff, gen_helper_ftintrne_l_d) +TRANS(ftint_w_s, FP_SP, gen_ff, gen_helper_ftint_w_s) +TRANS(ftint_w_d, FP_DP, gen_ff, gen_helper_ftint_w_d) +TRANS(ftint_l_s, FP_SP, gen_ff, gen_helper_ftint_l_s) +TRANS(ftint_l_d, FP_DP, gen_ff, gen_helper_ftint_l_d) +TRANS(ffint_s_w, FP_SP, gen_ff, gen_helper_ffint_s_w) +TRANS(ffint_s_l, FP_SP, gen_ff, gen_helper_ffint_s_l) +TRANS(ffint_d_w, FP_DP, gen_ff, gen_helper_ffint_d_w) +TRANS(ffint_d_l, FP_DP, gen_ff, gen_helper_ffint_d_l) +TRANS(frint_s, FP_SP, gen_ff, gen_helper_frint_s) +TRANS(frint_d, FP_DP, gen_ff, gen_helper_frint_d) diff --git a/target/loongarch/tcg/insn_trans/trans_fmemory.c.inc b/target/loongarch/tcg/insn_trans/trans_fmemory.c.inc new file mode 100644 index 0000000000..13452bc7e5 --- /dev/null +++ b/target/loongarch/tcg/insn_trans/trans_fmemory.c.inc @@ -0,0 +1,158 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +/* + * Copyright (c) 2021 Loongson Technology Corporation Limited + */ + +static void maybe_nanbox_load(TCGv freg, MemOp mop) +{ + if ((mop & MO_SIZE) == MO_32) { + gen_nanbox_s(freg, freg); + } +} + +static bool gen_fload_i(DisasContext *ctx, arg_fr_i *a, MemOp mop) +{ + TCGv addr = gpr_src(ctx, a->rj, EXT_NONE); + TCGv dest = get_fpr(ctx, a->fd); + + CHECK_FPE; + + addr = make_address_i(ctx, addr, a->imm); + + tcg_gen_qemu_ld_tl(dest, addr, ctx->mem_idx, mop); + maybe_nanbox_load(dest, mop); + set_fpr(a->fd, dest); + + return true; +} + +static bool gen_fstore_i(DisasContext *ctx, arg_fr_i *a, MemOp mop) +{ + TCGv addr = gpr_src(ctx, a->rj, EXT_NONE); + TCGv src = get_fpr(ctx, a->fd); + + CHECK_FPE; + + addr = make_address_i(ctx, addr, a->imm); + + tcg_gen_qemu_st_tl(src, addr, ctx->mem_idx, mop); + + return true; +} + +static bool gen_floadx(DisasContext *ctx, arg_frr *a, MemOp mop) +{ + TCGv src1 = gpr_src(ctx, a->rj, EXT_NONE); + TCGv src2 = gpr_src(ctx, a->rk, EXT_NONE); + TCGv dest = get_fpr(ctx, a->fd); + TCGv addr; + + CHECK_FPE; + + addr = make_address_x(ctx, src1, src2); + tcg_gen_qemu_ld_tl(dest, addr, ctx->mem_idx, mop); + maybe_nanbox_load(dest, mop); + set_fpr(a->fd, dest); + + return true; +} + +static bool gen_fstorex(DisasContext *ctx, arg_frr *a, MemOp mop) +{ + TCGv src1 = gpr_src(ctx, a->rj, EXT_NONE); + TCGv src2 = gpr_src(ctx, a->rk, EXT_NONE); + TCGv src3 = get_fpr(ctx, a->fd); + TCGv addr; + + CHECK_FPE; + + addr = make_address_x(ctx, src1, src2); + tcg_gen_qemu_st_tl(src3, addr, ctx->mem_idx, mop); + + return true; +} + +static bool gen_fload_gt(DisasContext *ctx, arg_frr *a, MemOp mop) +{ + TCGv src1 = gpr_src(ctx, a->rj, EXT_NONE); + TCGv src2 = gpr_src(ctx, a->rk, EXT_NONE); + TCGv dest = get_fpr(ctx, a->fd); + TCGv addr; + + CHECK_FPE; + + gen_helper_asrtgt_d(tcg_env, src1, src2); + addr = make_address_x(ctx, src1, src2); + tcg_gen_qemu_ld_tl(dest, addr, ctx->mem_idx, mop); + maybe_nanbox_load(dest, mop); + set_fpr(a->fd, dest); + + return true; +} + +static bool gen_fstore_gt(DisasContext *ctx, arg_frr *a, MemOp mop) +{ + TCGv src1 = gpr_src(ctx, a->rj, EXT_NONE); + TCGv src2 = gpr_src(ctx, a->rk, EXT_NONE); + TCGv src3 = get_fpr(ctx, a->fd); + TCGv addr; + + CHECK_FPE; + + gen_helper_asrtgt_d(tcg_env, src1, src2); + addr = make_address_x(ctx, src1, src2); + tcg_gen_qemu_st_tl(src3, addr, ctx->mem_idx, mop); + + return true; +} + +static bool gen_fload_le(DisasContext *ctx, arg_frr *a, MemOp mop) +{ + TCGv src1 = gpr_src(ctx, a->rj, EXT_NONE); + TCGv src2 = gpr_src(ctx, a->rk, EXT_NONE); + TCGv dest = get_fpr(ctx, a->fd); + TCGv addr; + + CHECK_FPE; + + gen_helper_asrtle_d(tcg_env, src1, src2); + addr = make_address_x(ctx, src1, src2); + tcg_gen_qemu_ld_tl(dest, addr, ctx->mem_idx, mop); + maybe_nanbox_load(dest, mop); + set_fpr(a->fd, dest); + + return true; +} + +static bool gen_fstore_le(DisasContext *ctx, arg_frr *a, MemOp mop) +{ + TCGv src1 = gpr_src(ctx, a->rj, EXT_NONE); + TCGv src2 = gpr_src(ctx, a->rk, EXT_NONE); + TCGv src3 = get_fpr(ctx, a->fd); + TCGv addr; + + CHECK_FPE; + + gen_helper_asrtle_d(tcg_env, src1, src2); + addr = make_address_x(ctx, src1, src2); + tcg_gen_qemu_st_tl(src3, addr, ctx->mem_idx, mop); + + return true; +} + +TRANS(fld_s, FP_SP, gen_fload_i, MO_TEUL) +TRANS(fst_s, FP_SP, gen_fstore_i, MO_TEUL) +TRANS(fld_d, FP_DP, gen_fload_i, MO_TEUQ) +TRANS(fst_d, FP_DP, gen_fstore_i, MO_TEUQ) +TRANS(fldx_s, FP_SP, gen_floadx, MO_TEUL) +TRANS(fldx_d, FP_DP, gen_floadx, MO_TEUQ) +TRANS(fstx_s, FP_SP, gen_fstorex, MO_TEUL) +TRANS(fstx_d, FP_DP, gen_fstorex, MO_TEUQ) +TRANS(fldgt_s, FP_SP, gen_fload_gt, MO_TEUL) +TRANS(fldgt_d, FP_DP, gen_fload_gt, MO_TEUQ) +TRANS(fldle_s, FP_SP, gen_fload_le, MO_TEUL) +TRANS(fldle_d, FP_DP, gen_fload_le, MO_TEUQ) +TRANS(fstgt_s, FP_SP, gen_fstore_gt, MO_TEUL) +TRANS(fstgt_d, FP_DP, gen_fstore_gt, MO_TEUQ) +TRANS(fstle_s, FP_SP, gen_fstore_le, MO_TEUL) +TRANS(fstle_d, FP_DP, gen_fstore_le, MO_TEUQ) diff --git a/target/loongarch/tcg/insn_trans/trans_fmov.c.inc b/target/loongarch/tcg/insn_trans/trans_fmov.c.inc new file mode 100644 index 0000000000..5cbd9d3f34 --- /dev/null +++ b/target/loongarch/tcg/insn_trans/trans_fmov.c.inc @@ -0,0 +1,224 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +/* + * Copyright (c) 2021 Loongson Technology Corporation Limited + */ + +static const uint32_t fcsr_mask[4] = { + UINT32_MAX, FCSR0_M1, FCSR0_M2, FCSR0_M3 +}; + +static bool trans_fsel(DisasContext *ctx, arg_fsel *a) +{ + TCGv zero = tcg_constant_tl(0); + TCGv dest = get_fpr(ctx, a->fd); + TCGv src1 = get_fpr(ctx, a->fj); + TCGv src2 = get_fpr(ctx, a->fk); + TCGv cond; + + if (!avail_FP(ctx)) { + return false; + } + + CHECK_FPE; + + cond = tcg_temp_new(); + tcg_gen_ld8u_tl(cond, tcg_env, offsetof(CPULoongArchState, cf[a->ca])); + tcg_gen_movcond_tl(TCG_COND_EQ, dest, cond, zero, src1, src2); + set_fpr(a->fd, dest); + + return true; +} + +static bool gen_f2f(DisasContext *ctx, arg_ff *a, + void (*func)(TCGv, TCGv), bool nanbox) +{ + TCGv dest = get_fpr(ctx, a->fd); + TCGv src = get_fpr(ctx, a->fj); + + CHECK_FPE; + + func(dest, src); + if (nanbox) { + gen_nanbox_s(dest, dest); + } + set_fpr(a->fd, dest); + + return true; +} + +static bool gen_r2f(DisasContext *ctx, arg_fr *a, + void (*func)(TCGv, TCGv)) +{ + TCGv src = gpr_src(ctx, a->rj, EXT_NONE); + TCGv dest = get_fpr(ctx, a->fd); + + if (!avail_FP(ctx)) { + return false; + } + + CHECK_FPE; + + func(dest, src); + set_fpr(a->fd, dest); + + return true; +} + +static bool gen_f2r(DisasContext *ctx, arg_rf *a, + void (*func)(TCGv, TCGv)) +{ + TCGv dest = gpr_dst(ctx, a->rd, EXT_NONE); + TCGv src = get_fpr(ctx, a->fj); + + if (!avail_FP(ctx)) { + return false; + } + + CHECK_FPE; + + func(dest, src); + gen_set_gpr(a->rd, dest, EXT_NONE); + + return true; +} + +static bool trans_movgr2fcsr(DisasContext *ctx, arg_movgr2fcsr *a) +{ + uint32_t mask = fcsr_mask[a->fcsrd]; + TCGv Rj = gpr_src(ctx, a->rj, EXT_NONE); + + if (!avail_FP(ctx)) { + return false; + } + + CHECK_FPE; + + if (mask == UINT32_MAX) { + tcg_gen_st32_i64(Rj, tcg_env, offsetof(CPULoongArchState, fcsr0)); + } else { + TCGv_i32 fcsr0 = tcg_temp_new_i32(); + TCGv_i32 temp = tcg_temp_new_i32(); + + tcg_gen_ld_i32(fcsr0, tcg_env, offsetof(CPULoongArchState, fcsr0)); + tcg_gen_extrl_i64_i32(temp, Rj); + tcg_gen_andi_i32(temp, temp, mask); + tcg_gen_andi_i32(fcsr0, fcsr0, ~mask); + tcg_gen_or_i32(fcsr0, fcsr0, temp); + tcg_gen_st_i32(fcsr0, tcg_env, offsetof(CPULoongArchState, fcsr0)); + } + + /* + * Install the new rounding mode to fpu_status, if changed. + * Note that FCSR3 is exactly the rounding mode field. + */ + if (mask & FCSR0_M3) { + gen_helper_set_rounding_mode(tcg_env); + } + return true; +} + +static bool trans_movfcsr2gr(DisasContext *ctx, arg_movfcsr2gr *a) +{ + TCGv dest = gpr_dst(ctx, a->rd, EXT_NONE); + + if (!avail_FP(ctx)) { + return false; + } + + CHECK_FPE; + + tcg_gen_ld32u_i64(dest, tcg_env, offsetof(CPULoongArchState, fcsr0)); + tcg_gen_andi_i64(dest, dest, fcsr_mask[a->fcsrs]); + gen_set_gpr(a->rd, dest, EXT_NONE); + + return true; +} + +static void gen_movgr2fr_w(TCGv dest, TCGv src) +{ + tcg_gen_deposit_i64(dest, dest, src, 0, 32); +} + +static void gen_movgr2frh_w(TCGv dest, TCGv src) +{ + tcg_gen_deposit_i64(dest, dest, src, 32, 32); +} + +static void gen_movfrh2gr_s(TCGv dest, TCGv src) +{ + tcg_gen_sextract_tl(dest, src, 32, 32); +} + +static bool trans_movfr2cf(DisasContext *ctx, arg_movfr2cf *a) +{ + TCGv t0; + TCGv src = get_fpr(ctx, a->fj); + + if (!avail_FP(ctx)) { + return false; + } + + CHECK_FPE; + + t0 = tcg_temp_new(); + tcg_gen_andi_tl(t0, src, 0x1); + tcg_gen_st8_tl(t0, tcg_env, offsetof(CPULoongArchState, cf[a->cd & 0x7])); + + return true; +} + +static bool trans_movcf2fr(DisasContext *ctx, arg_movcf2fr *a) +{ + TCGv dest = get_fpr(ctx, a->fd); + + if (!avail_FP(ctx)) { + return false; + } + + CHECK_FPE; + + tcg_gen_ld8u_tl(dest, tcg_env, + offsetof(CPULoongArchState, cf[a->cj & 0x7])); + set_fpr(a->fd, dest); + + return true; +} + +static bool trans_movgr2cf(DisasContext *ctx, arg_movgr2cf *a) +{ + TCGv t0; + + if (!avail_FP(ctx)) { + return false; + } + + CHECK_FPE; + + t0 = tcg_temp_new(); + tcg_gen_andi_tl(t0, gpr_src(ctx, a->rj, EXT_NONE), 0x1); + tcg_gen_st8_tl(t0, tcg_env, offsetof(CPULoongArchState, cf[a->cd & 0x7])); + + return true; +} + +static bool trans_movcf2gr(DisasContext *ctx, arg_movcf2gr *a) +{ + if (!avail_FP(ctx)) { + return false; + } + + CHECK_FPE; + + tcg_gen_ld8u_tl(gpr_dst(ctx, a->rd, EXT_NONE), tcg_env, + offsetof(CPULoongArchState, cf[a->cj & 0x7])); + return true; +} + +TRANS(fmov_s, FP_SP, gen_f2f, tcg_gen_mov_tl, true) +TRANS(fmov_d, FP_DP, gen_f2f, tcg_gen_mov_tl, false) +TRANS(movgr2fr_w, FP_SP, gen_r2f, gen_movgr2fr_w) +TRANS(movgr2fr_d, 64, gen_r2f, tcg_gen_mov_tl) +TRANS(movgr2frh_w, FP_DP, gen_r2f, gen_movgr2frh_w) +TRANS(movfr2gr_s, FP_SP, gen_f2r, tcg_gen_ext32s_tl) +TRANS(movfr2gr_d, 64, gen_f2r, tcg_gen_mov_tl) +TRANS(movfrh2gr_s, FP_DP, gen_f2r, gen_movfrh2gr_s) diff --git a/target/loongarch/tcg/insn_trans/trans_memory.c.inc b/target/loongarch/tcg/insn_trans/trans_memory.c.inc new file mode 100644 index 0000000000..42f4e74012 --- /dev/null +++ b/target/loongarch/tcg/insn_trans/trans_memory.c.inc @@ -0,0 +1,194 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +/* + * Copyright (c) 2021 Loongson Technology Corporation Limited + */ + +static bool gen_load(DisasContext *ctx, arg_rr_i *a, MemOp mop) +{ + TCGv dest = gpr_dst(ctx, a->rd, EXT_NONE); + TCGv addr = gpr_src(ctx, a->rj, EXT_NONE); + + addr = make_address_i(ctx, addr, a->imm); + + tcg_gen_qemu_ld_tl(dest, addr, ctx->mem_idx, mop); + gen_set_gpr(a->rd, dest, EXT_NONE); + return true; +} + +static bool gen_store(DisasContext *ctx, arg_rr_i *a, MemOp mop) +{ + TCGv data = gpr_src(ctx, a->rd, EXT_NONE); + TCGv addr = gpr_src(ctx, a->rj, EXT_NONE); + + addr = make_address_i(ctx, addr, a->imm); + + tcg_gen_qemu_st_tl(data, addr, ctx->mem_idx, mop); + return true; +} + +static bool gen_loadx(DisasContext *ctx, arg_rrr *a, MemOp mop) +{ + TCGv dest = gpr_dst(ctx, a->rd, EXT_NONE); + TCGv src1 = gpr_src(ctx, a->rj, EXT_NONE); + TCGv src2 = gpr_src(ctx, a->rk, EXT_NONE); + TCGv addr = make_address_x(ctx, src1, src2); + + tcg_gen_qemu_ld_tl(dest, addr, ctx->mem_idx, mop); + gen_set_gpr(a->rd, dest, EXT_NONE); + + return true; +} + +static bool gen_storex(DisasContext *ctx, arg_rrr *a, MemOp mop) +{ + TCGv data = gpr_src(ctx, a->rd, EXT_NONE); + TCGv src1 = gpr_src(ctx, a->rj, EXT_NONE); + TCGv src2 = gpr_src(ctx, a->rk, EXT_NONE); + TCGv addr = make_address_x(ctx, src1, src2); + + tcg_gen_qemu_st_tl(data, addr, ctx->mem_idx, mop); + + return true; +} + +static bool gen_load_gt(DisasContext *ctx, arg_rrr *a, MemOp mop) +{ + TCGv dest = gpr_dst(ctx, a->rd, EXT_NONE); + TCGv src1 = gpr_src(ctx, a->rj, EXT_NONE); + TCGv src2 = gpr_src(ctx, a->rk, EXT_NONE); + + gen_helper_asrtgt_d(tcg_env, src1, src2); + src1 = make_address_i(ctx, src1, 0); + tcg_gen_qemu_ld_tl(dest, src1, ctx->mem_idx, mop); + gen_set_gpr(a->rd, dest, EXT_NONE); + + return true; +} + +static bool gen_load_le(DisasContext *ctx, arg_rrr *a, MemOp mop) +{ + TCGv dest = gpr_dst(ctx, a->rd, EXT_NONE); + TCGv src1 = gpr_src(ctx, a->rj, EXT_NONE); + TCGv src2 = gpr_src(ctx, a->rk, EXT_NONE); + + gen_helper_asrtle_d(tcg_env, src1, src2); + src1 = make_address_i(ctx, src1, 0); + tcg_gen_qemu_ld_tl(dest, src1, ctx->mem_idx, mop); + gen_set_gpr(a->rd, dest, EXT_NONE); + + return true; +} + +static bool gen_store_gt(DisasContext *ctx, arg_rrr *a, MemOp mop) +{ + TCGv data = gpr_src(ctx, a->rd, EXT_NONE); + TCGv src1 = gpr_src(ctx, a->rj, EXT_NONE); + TCGv src2 = gpr_src(ctx, a->rk, EXT_NONE); + + gen_helper_asrtgt_d(tcg_env, src1, src2); + src1 = make_address_i(ctx, src1, 0); + tcg_gen_qemu_st_tl(data, src1, ctx->mem_idx, mop); + + return true; +} + +static bool gen_store_le(DisasContext *ctx, arg_rrr *a, MemOp mop) +{ + TCGv data = gpr_src(ctx, a->rd, EXT_NONE); + TCGv src1 = gpr_src(ctx, a->rj, EXT_NONE); + TCGv src2 = gpr_src(ctx, a->rk, EXT_NONE); + + gen_helper_asrtle_d(tcg_env, src1, src2); + src1 = make_address_i(ctx, src1, 0); + tcg_gen_qemu_st_tl(data, src1, ctx->mem_idx, mop); + + return true; +} + +static bool trans_preld(DisasContext *ctx, arg_preld *a) +{ + return true; +} + +static bool trans_preldx(DisasContext *ctx, arg_preldx * a) +{ + return true; +} + +static bool trans_dbar(DisasContext *ctx, arg_dbar * a) +{ + tcg_gen_mb(TCG_BAR_SC | TCG_MO_ALL); + return true; +} + +static bool trans_ibar(DisasContext *ctx, arg_ibar *a) +{ + ctx->base.is_jmp = DISAS_STOP; + return true; +} + +static bool gen_ldptr(DisasContext *ctx, arg_rr_i *a, MemOp mop) +{ + TCGv dest = gpr_dst(ctx, a->rd, EXT_NONE); + TCGv addr = gpr_src(ctx, a->rj, EXT_NONE); + + addr = make_address_i(ctx, addr, a->imm); + + tcg_gen_qemu_ld_tl(dest, addr, ctx->mem_idx, mop); + gen_set_gpr(a->rd, dest, EXT_NONE); + return true; +} + +static bool gen_stptr(DisasContext *ctx, arg_rr_i *a, MemOp mop) +{ + TCGv data = gpr_src(ctx, a->rd, EXT_NONE); + TCGv addr = gpr_src(ctx, a->rj, EXT_NONE); + + addr = make_address_i(ctx, addr, a->imm); + + tcg_gen_qemu_st_tl(data, addr, ctx->mem_idx, mop); + return true; +} + +TRANS(ld_b, ALL, gen_load, MO_SB) +TRANS(ld_h, ALL, gen_load, MO_TESW) +TRANS(ld_w, ALL, gen_load, MO_TESL) +TRANS(ld_d, 64, gen_load, MO_TEUQ) +TRANS(st_b, ALL, gen_store, MO_UB) +TRANS(st_h, ALL, gen_store, MO_TEUW) +TRANS(st_w, ALL, gen_store, MO_TEUL) +TRANS(st_d, 64, gen_store, MO_TEUQ) +TRANS(ld_bu, ALL, gen_load, MO_UB) +TRANS(ld_hu, ALL, gen_load, MO_TEUW) +TRANS(ld_wu, 64, gen_load, MO_TEUL) +TRANS(ldx_b, 64, gen_loadx, MO_SB) +TRANS(ldx_h, 64, gen_loadx, MO_TESW) +TRANS(ldx_w, 64, gen_loadx, MO_TESL) +TRANS(ldx_d, 64, gen_loadx, MO_TEUQ) +TRANS(stx_b, 64, gen_storex, MO_UB) +TRANS(stx_h, 64, gen_storex, MO_TEUW) +TRANS(stx_w, 64, gen_storex, MO_TEUL) +TRANS(stx_d, 64, gen_storex, MO_TEUQ) +TRANS(ldx_bu, 64, gen_loadx, MO_UB) +TRANS(ldx_hu, 64, gen_loadx, MO_TEUW) +TRANS(ldx_wu, 64, gen_loadx, MO_TEUL) +TRANS(ldptr_w, 64, gen_ldptr, MO_TESL) +TRANS(stptr_w, 64, gen_stptr, MO_TEUL) +TRANS(ldptr_d, 64, gen_ldptr, MO_TEUQ) +TRANS(stptr_d, 64, gen_stptr, MO_TEUQ) +TRANS(ldgt_b, 64, gen_load_gt, MO_SB) +TRANS(ldgt_h, 64, gen_load_gt, MO_TESW) +TRANS(ldgt_w, 64, gen_load_gt, MO_TESL) +TRANS(ldgt_d, 64, gen_load_gt, MO_TEUQ) +TRANS(ldle_b, 64, gen_load_le, MO_SB) +TRANS(ldle_h, 64, gen_load_le, MO_TESW) +TRANS(ldle_w, 64, gen_load_le, MO_TESL) +TRANS(ldle_d, 64, gen_load_le, MO_TEUQ) +TRANS(stgt_b, 64, gen_store_gt, MO_UB) +TRANS(stgt_h, 64, gen_store_gt, MO_TEUW) +TRANS(stgt_w, 64, gen_store_gt, MO_TEUL) +TRANS(stgt_d, 64, gen_store_gt, MO_TEUQ) +TRANS(stle_b, 64, gen_store_le, MO_UB) +TRANS(stle_h, 64, gen_store_le, MO_TEUW) +TRANS(stle_w, 64, gen_store_le, MO_TEUL) +TRANS(stle_d, 64, gen_store_le, MO_TEUQ) diff --git a/target/loongarch/tcg/insn_trans/trans_privileged.c.inc b/target/loongarch/tcg/insn_trans/trans_privileged.c.inc new file mode 100644 index 0000000000..01d457212b --- /dev/null +++ b/target/loongarch/tcg/insn_trans/trans_privileged.c.inc @@ -0,0 +1,498 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +/* + * Copyright (c) 2021 Loongson Technology Corporation Limited + * + * LoongArch translation routines for the privileged instructions. + */ + +#include "cpu-csr.h" + +#ifdef CONFIG_USER_ONLY + +#define GEN_FALSE_TRANS(name) \ +static bool trans_##name(DisasContext *ctx, arg_##name * a) \ +{ \ + return false; \ +} + +GEN_FALSE_TRANS(csrrd) +GEN_FALSE_TRANS(csrwr) +GEN_FALSE_TRANS(csrxchg) +GEN_FALSE_TRANS(iocsrrd_b) +GEN_FALSE_TRANS(iocsrrd_h) +GEN_FALSE_TRANS(iocsrrd_w) +GEN_FALSE_TRANS(iocsrrd_d) +GEN_FALSE_TRANS(iocsrwr_b) +GEN_FALSE_TRANS(iocsrwr_h) +GEN_FALSE_TRANS(iocsrwr_w) +GEN_FALSE_TRANS(iocsrwr_d) +GEN_FALSE_TRANS(tlbsrch) +GEN_FALSE_TRANS(tlbrd) +GEN_FALSE_TRANS(tlbwr) +GEN_FALSE_TRANS(tlbfill) +GEN_FALSE_TRANS(tlbclr) +GEN_FALSE_TRANS(tlbflush) +GEN_FALSE_TRANS(invtlb) +GEN_FALSE_TRANS(cacop) +GEN_FALSE_TRANS(ldpte) +GEN_FALSE_TRANS(lddir) +GEN_FALSE_TRANS(ertn) +GEN_FALSE_TRANS(dbcl) +GEN_FALSE_TRANS(idle) + +#else + +typedef void (*GenCSRRead)(TCGv dest, TCGv_ptr env); +typedef void (*GenCSRWrite)(TCGv dest, TCGv_ptr env, TCGv src); + +typedef struct { + int offset; + int flags; + GenCSRRead readfn; + GenCSRWrite writefn; +} CSRInfo; + +enum { + CSRFL_READONLY = (1 << 0), + CSRFL_EXITTB = (1 << 1), + CSRFL_IO = (1 << 2), +}; + +#define CSR_OFF_FUNCS(NAME, FL, RD, WR) \ + [LOONGARCH_CSR_##NAME] = { \ + .offset = offsetof(CPULoongArchState, CSR_##NAME), \ + .flags = FL, .readfn = RD, .writefn = WR \ + } + +#define CSR_OFF_ARRAY(NAME, N) \ + [LOONGARCH_CSR_##NAME(N)] = { \ + .offset = offsetof(CPULoongArchState, CSR_##NAME[N]), \ + .flags = 0, .readfn = NULL, .writefn = NULL \ + } + +#define CSR_OFF_FLAGS(NAME, FL) \ + CSR_OFF_FUNCS(NAME, FL, NULL, NULL) + +#define CSR_OFF(NAME) \ + CSR_OFF_FLAGS(NAME, 0) + +static const CSRInfo csr_info[] = { + CSR_OFF_FLAGS(CRMD, CSRFL_EXITTB), + CSR_OFF(PRMD), + CSR_OFF_FLAGS(EUEN, CSRFL_EXITTB), + CSR_OFF_FLAGS(MISC, CSRFL_READONLY), + CSR_OFF(ECFG), + CSR_OFF_FUNCS(ESTAT, CSRFL_EXITTB, NULL, gen_helper_csrwr_estat), + CSR_OFF(ERA), + CSR_OFF(BADV), + CSR_OFF_FLAGS(BADI, CSRFL_READONLY), + CSR_OFF(EENTRY), + CSR_OFF(TLBIDX), + CSR_OFF(TLBEHI), + CSR_OFF(TLBELO0), + CSR_OFF(TLBELO1), + CSR_OFF_FUNCS(ASID, CSRFL_EXITTB, NULL, gen_helper_csrwr_asid), + CSR_OFF(PGDL), + CSR_OFF(PGDH), + CSR_OFF_FUNCS(PGD, CSRFL_READONLY, gen_helper_csrrd_pgd, NULL), + CSR_OFF(PWCL), + CSR_OFF(PWCH), + CSR_OFF(STLBPS), + CSR_OFF(RVACFG), + CSR_OFF_FUNCS(CPUID, CSRFL_READONLY, gen_helper_csrrd_cpuid, NULL), + CSR_OFF_FLAGS(PRCFG1, CSRFL_READONLY), + CSR_OFF_FLAGS(PRCFG2, CSRFL_READONLY), + CSR_OFF_FLAGS(PRCFG3, CSRFL_READONLY), + CSR_OFF_ARRAY(SAVE, 0), + CSR_OFF_ARRAY(SAVE, 1), + CSR_OFF_ARRAY(SAVE, 2), + CSR_OFF_ARRAY(SAVE, 3), + CSR_OFF_ARRAY(SAVE, 4), + CSR_OFF_ARRAY(SAVE, 5), + CSR_OFF_ARRAY(SAVE, 6), + CSR_OFF_ARRAY(SAVE, 7), + CSR_OFF_ARRAY(SAVE, 8), + CSR_OFF_ARRAY(SAVE, 9), + CSR_OFF_ARRAY(SAVE, 10), + CSR_OFF_ARRAY(SAVE, 11), + CSR_OFF_ARRAY(SAVE, 12), + CSR_OFF_ARRAY(SAVE, 13), + CSR_OFF_ARRAY(SAVE, 14), + CSR_OFF_ARRAY(SAVE, 15), + CSR_OFF(TID), + CSR_OFF_FUNCS(TCFG, CSRFL_IO, NULL, gen_helper_csrwr_tcfg), + CSR_OFF_FUNCS(TVAL, CSRFL_READONLY | CSRFL_IO, gen_helper_csrrd_tval, NULL), + CSR_OFF(CNTC), + CSR_OFF_FUNCS(TICLR, CSRFL_IO, NULL, gen_helper_csrwr_ticlr), + CSR_OFF(LLBCTL), + CSR_OFF(IMPCTL1), + CSR_OFF(IMPCTL2), + CSR_OFF(TLBRENTRY), + CSR_OFF(TLBRBADV), + CSR_OFF(TLBRERA), + CSR_OFF(TLBRSAVE), + CSR_OFF(TLBRELO0), + CSR_OFF(TLBRELO1), + CSR_OFF(TLBREHI), + CSR_OFF(TLBRPRMD), + CSR_OFF(MERRCTL), + CSR_OFF(MERRINFO1), + CSR_OFF(MERRINFO2), + CSR_OFF(MERRENTRY), + CSR_OFF(MERRERA), + CSR_OFF(MERRSAVE), + CSR_OFF(CTAG), + CSR_OFF_ARRAY(DMW, 0), + CSR_OFF_ARRAY(DMW, 1), + CSR_OFF_ARRAY(DMW, 2), + CSR_OFF_ARRAY(DMW, 3), + CSR_OFF(DBG), + CSR_OFF(DERA), + CSR_OFF(DSAVE), +}; + +static bool check_plv(DisasContext *ctx) +{ + if (ctx->plv == MMU_PLV_USER) { + generate_exception(ctx, EXCCODE_IPE); + return true; + } + return false; +} + +static const CSRInfo *get_csr(unsigned csr_num) +{ + const CSRInfo *csr; + + if (csr_num >= ARRAY_SIZE(csr_info)) { + return NULL; + } + csr = &csr_info[csr_num]; + if (csr->offset == 0) { + return NULL; + } + return csr; +} + +static bool check_csr_flags(DisasContext *ctx, const CSRInfo *csr, bool write) +{ + if ((csr->flags & CSRFL_READONLY) && write) { + return false; + } + if ((csr->flags & CSRFL_IO) && translator_io_start(&ctx->base)) { + ctx->base.is_jmp = DISAS_EXIT_UPDATE; + } else if ((csr->flags & CSRFL_EXITTB) && write) { + ctx->base.is_jmp = DISAS_EXIT_UPDATE; + } + return true; +} + +static bool trans_csrrd(DisasContext *ctx, arg_csrrd *a) +{ + TCGv dest; + const CSRInfo *csr; + + if (check_plv(ctx)) { + return false; + } + csr = get_csr(a->csr); + if (csr == NULL) { + /* CSR is undefined: read as 0. */ + dest = tcg_constant_tl(0); + } else { + check_csr_flags(ctx, csr, false); + dest = gpr_dst(ctx, a->rd, EXT_NONE); + if (csr->readfn) { + csr->readfn(dest, tcg_env); + } else { + tcg_gen_ld_tl(dest, tcg_env, csr->offset); + } + } + gen_set_gpr(a->rd, dest, EXT_NONE); + return true; +} + +static bool trans_csrwr(DisasContext *ctx, arg_csrwr *a) +{ + TCGv dest, src1; + const CSRInfo *csr; + + if (check_plv(ctx)) { + return false; + } + csr = get_csr(a->csr); + if (csr == NULL) { + /* CSR is undefined: write ignored, read old_value as 0. */ + gen_set_gpr(a->rd, tcg_constant_tl(0), EXT_NONE); + return true; + } + if (!check_csr_flags(ctx, csr, true)) { + /* CSR is readonly: trap. */ + return false; + } + src1 = gpr_src(ctx, a->rd, EXT_NONE); + if (csr->writefn) { + dest = gpr_dst(ctx, a->rd, EXT_NONE); + csr->writefn(dest, tcg_env, src1); + } else { + dest = tcg_temp_new(); + tcg_gen_ld_tl(dest, tcg_env, csr->offset); + tcg_gen_st_tl(src1, tcg_env, csr->offset); + } + gen_set_gpr(a->rd, dest, EXT_NONE); + return true; +} + +static bool trans_csrxchg(DisasContext *ctx, arg_csrxchg *a) +{ + TCGv src1, mask, oldv, newv, temp; + const CSRInfo *csr; + + if (check_plv(ctx)) { + return false; + } + csr = get_csr(a->csr); + if (csr == NULL) { + /* CSR is undefined: write ignored, read old_value as 0. */ + gen_set_gpr(a->rd, tcg_constant_tl(0), EXT_NONE); + return true; + } + + if (!check_csr_flags(ctx, csr, true)) { + /* CSR is readonly: trap. */ + return false; + } + + /* So far only readonly csrs have readfn. */ + assert(csr->readfn == NULL); + + src1 = gpr_src(ctx, a->rd, EXT_NONE); + mask = gpr_src(ctx, a->rj, EXT_NONE); + oldv = tcg_temp_new(); + newv = tcg_temp_new(); + temp = tcg_temp_new(); + + tcg_gen_ld_tl(oldv, tcg_env, csr->offset); + tcg_gen_and_tl(newv, src1, mask); + tcg_gen_andc_tl(temp, oldv, mask); + tcg_gen_or_tl(newv, newv, temp); + + if (csr->writefn) { + csr->writefn(oldv, tcg_env, newv); + } else { + tcg_gen_st_tl(newv, tcg_env, csr->offset); + } + gen_set_gpr(a->rd, oldv, EXT_NONE); + return true; +} + +static bool gen_iocsrrd(DisasContext *ctx, arg_rr *a, + void (*func)(TCGv, TCGv_ptr, TCGv)) +{ + TCGv dest = gpr_dst(ctx, a->rd, EXT_NONE); + TCGv src1 = gpr_src(ctx, a->rj, EXT_NONE); + + if (check_plv(ctx)) { + return false; + } + func(dest, tcg_env, src1); + return true; +} + +static bool gen_iocsrwr(DisasContext *ctx, arg_rr *a, + void (*func)(TCGv_ptr, TCGv, TCGv)) +{ + TCGv val = gpr_src(ctx, a->rd, EXT_NONE); + TCGv addr = gpr_src(ctx, a->rj, EXT_NONE); + + if (check_plv(ctx)) { + return false; + } + func(tcg_env, addr, val); + return true; +} + +TRANS(iocsrrd_b, IOCSR, gen_iocsrrd, gen_helper_iocsrrd_b) +TRANS(iocsrrd_h, IOCSR, gen_iocsrrd, gen_helper_iocsrrd_h) +TRANS(iocsrrd_w, IOCSR, gen_iocsrrd, gen_helper_iocsrrd_w) +TRANS(iocsrrd_d, IOCSR, gen_iocsrrd, gen_helper_iocsrrd_d) +TRANS(iocsrwr_b, IOCSR, gen_iocsrwr, gen_helper_iocsrwr_b) +TRANS(iocsrwr_h, IOCSR, gen_iocsrwr, gen_helper_iocsrwr_h) +TRANS(iocsrwr_w, IOCSR, gen_iocsrwr, gen_helper_iocsrwr_w) +TRANS(iocsrwr_d, IOCSR, gen_iocsrwr, gen_helper_iocsrwr_d) + +static void check_mmu_idx(DisasContext *ctx) +{ + if (ctx->mem_idx != MMU_IDX_DA) { + tcg_gen_movi_tl(cpu_pc, ctx->base.pc_next + 4); + ctx->base.is_jmp = DISAS_EXIT; + } +} + +static bool trans_tlbsrch(DisasContext *ctx, arg_tlbsrch *a) +{ + if (check_plv(ctx)) { + return false; + } + gen_helper_tlbsrch(tcg_env); + return true; +} + +static bool trans_tlbrd(DisasContext *ctx, arg_tlbrd *a) +{ + if (check_plv(ctx)) { + return false; + } + gen_helper_tlbrd(tcg_env); + return true; +} + +static bool trans_tlbwr(DisasContext *ctx, arg_tlbwr *a) +{ + if (check_plv(ctx)) { + return false; + } + gen_helper_tlbwr(tcg_env); + check_mmu_idx(ctx); + return true; +} + +static bool trans_tlbfill(DisasContext *ctx, arg_tlbfill *a) +{ + if (check_plv(ctx)) { + return false; + } + gen_helper_tlbfill(tcg_env); + check_mmu_idx(ctx); + return true; +} + +static bool trans_tlbclr(DisasContext *ctx, arg_tlbclr *a) +{ + if (check_plv(ctx)) { + return false; + } + gen_helper_tlbclr(tcg_env); + check_mmu_idx(ctx); + return true; +} + +static bool trans_tlbflush(DisasContext *ctx, arg_tlbflush *a) +{ + if (check_plv(ctx)) { + return false; + } + gen_helper_tlbflush(tcg_env); + check_mmu_idx(ctx); + return true; +} + +static bool trans_invtlb(DisasContext *ctx, arg_invtlb *a) +{ + TCGv rj = gpr_src(ctx, a->rj, EXT_NONE); + TCGv rk = gpr_src(ctx, a->rk, EXT_NONE); + + if (check_plv(ctx)) { + return false; + } + + switch (a->imm) { + case 0: + case 1: + gen_helper_invtlb_all(tcg_env); + break; + case 2: + gen_helper_invtlb_all_g(tcg_env, tcg_constant_i32(1)); + break; + case 3: + gen_helper_invtlb_all_g(tcg_env, tcg_constant_i32(0)); + break; + case 4: + gen_helper_invtlb_all_asid(tcg_env, rj); + break; + case 5: + gen_helper_invtlb_page_asid(tcg_env, rj, rk); + break; + case 6: + gen_helper_invtlb_page_asid_or_g(tcg_env, rj, rk); + break; + default: + return false; + } + ctx->base.is_jmp = DISAS_STOP; + return true; +} + +static bool trans_cacop(DisasContext *ctx, arg_cacop *a) +{ + /* Treat the cacop as a nop */ + if (check_plv(ctx)) { + return false; + } + return true; +} + +static bool trans_ldpte(DisasContext *ctx, arg_ldpte *a) +{ + TCGv_i32 mem_idx = tcg_constant_i32(ctx->mem_idx); + TCGv src1 = gpr_src(ctx, a->rj, EXT_NONE); + + if (!avail_LSPW(ctx)) { + return true; + } + + if (check_plv(ctx)) { + return false; + } + gen_helper_ldpte(tcg_env, src1, tcg_constant_tl(a->imm), mem_idx); + return true; +} + +static bool trans_lddir(DisasContext *ctx, arg_lddir *a) +{ + TCGv_i32 mem_idx = tcg_constant_i32(ctx->mem_idx); + TCGv src = gpr_src(ctx, a->rj, EXT_NONE); + TCGv dest = gpr_dst(ctx, a->rd, EXT_NONE); + + if (!avail_LSPW(ctx)) { + return true; + } + + if (check_plv(ctx)) { + return false; + } + gen_helper_lddir(dest, tcg_env, src, tcg_constant_tl(a->imm), mem_idx); + return true; +} + +static bool trans_ertn(DisasContext *ctx, arg_ertn *a) +{ + if (check_plv(ctx)) { + return false; + } + gen_helper_ertn(tcg_env); + ctx->base.is_jmp = DISAS_EXIT; + return true; +} + +static bool trans_dbcl(DisasContext *ctx, arg_dbcl *a) +{ + if (check_plv(ctx)) { + return false; + } + generate_exception(ctx, EXCCODE_DBP); + return true; +} + +static bool trans_idle(DisasContext *ctx, arg_idle *a) +{ + if (check_plv(ctx)) { + return false; + } + + tcg_gen_movi_tl(cpu_pc, ctx->base.pc_next + 4); + gen_helper_idle(tcg_env); + ctx->base.is_jmp = DISAS_NORETURN; + return true; +} +#endif diff --git a/target/loongarch/tcg/insn_trans/trans_shift.c.inc b/target/loongarch/tcg/insn_trans/trans_shift.c.inc new file mode 100644 index 0000000000..2f4bd6ff28 --- /dev/null +++ b/target/loongarch/tcg/insn_trans/trans_shift.c.inc @@ -0,0 +1,99 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +/* + * Copyright (c) 2021 Loongson Technology Corporation Limited + */ + +static void gen_sll_w(TCGv dest, TCGv src1, TCGv src2) +{ + TCGv t0 = tcg_temp_new(); + tcg_gen_andi_tl(t0, src2, 0x1f); + tcg_gen_shl_tl(dest, src1, t0); +} + +static void gen_srl_w(TCGv dest, TCGv src1, TCGv src2) +{ + TCGv t0 = tcg_temp_new(); + tcg_gen_andi_tl(t0, src2, 0x1f); + tcg_gen_shr_tl(dest, src1, t0); +} + +static void gen_sra_w(TCGv dest, TCGv src1, TCGv src2) +{ + TCGv t0 = tcg_temp_new(); + tcg_gen_andi_tl(t0, src2, 0x1f); + tcg_gen_sar_tl(dest, src1, t0); +} + +static void gen_sll_d(TCGv dest, TCGv src1, TCGv src2) +{ + TCGv t0 = tcg_temp_new(); + tcg_gen_andi_tl(t0, src2, 0x3f); + tcg_gen_shl_tl(dest, src1, t0); +} + +static void gen_srl_d(TCGv dest, TCGv src1, TCGv src2) +{ + TCGv t0 = tcg_temp_new(); + tcg_gen_andi_tl(t0, src2, 0x3f); + tcg_gen_shr_tl(dest, src1, t0); +} + +static void gen_sra_d(TCGv dest, TCGv src1, TCGv src2) +{ + TCGv t0 = tcg_temp_new(); + tcg_gen_andi_tl(t0, src2, 0x3f); + tcg_gen_sar_tl(dest, src1, t0); +} + +static void gen_rotr_w(TCGv dest, TCGv src1, TCGv src2) +{ + TCGv_i32 t1 = tcg_temp_new_i32(); + TCGv_i32 t2 = tcg_temp_new_i32(); + TCGv t0 = tcg_temp_new(); + + tcg_gen_andi_tl(t0, src2, 0x1f); + + tcg_gen_trunc_tl_i32(t1, src1); + tcg_gen_trunc_tl_i32(t2, t0); + + tcg_gen_rotr_i32(t1, t1, t2); + tcg_gen_ext_i32_tl(dest, t1); +} + +static void gen_rotr_d(TCGv dest, TCGv src1, TCGv src2) +{ + TCGv t0 = tcg_temp_new(); + tcg_gen_andi_tl(t0, src2, 0x3f); + tcg_gen_rotr_tl(dest, src1, t0); +} + +static bool trans_srai_w(DisasContext *ctx, arg_srai_w *a) +{ + TCGv dest = gpr_dst(ctx, a->rd, EXT_NONE); + TCGv src1 = gpr_src(ctx, a->rj, EXT_ZERO); + + if (!avail_64(ctx)) { + return false; + } + + tcg_gen_sextract_tl(dest, src1, a->imm, 32 - a->imm); + gen_set_gpr(a->rd, dest, EXT_NONE); + + return true; +} + +TRANS(sll_w, ALL, gen_rrr, EXT_ZERO, EXT_NONE, EXT_SIGN, gen_sll_w) +TRANS(srl_w, ALL, gen_rrr, EXT_ZERO, EXT_NONE, EXT_SIGN, gen_srl_w) +TRANS(sra_w, ALL, gen_rrr, EXT_SIGN, EXT_NONE, EXT_SIGN, gen_sra_w) +TRANS(sll_d, 64, gen_rrr, EXT_NONE, EXT_NONE, EXT_NONE, gen_sll_d) +TRANS(srl_d, 64, gen_rrr, EXT_NONE, EXT_NONE, EXT_NONE, gen_srl_d) +TRANS(sra_d, 64, gen_rrr, EXT_NONE, EXT_NONE, EXT_NONE, gen_sra_d) +TRANS(rotr_w, 64, gen_rrr, EXT_ZERO, EXT_NONE, EXT_SIGN, gen_rotr_w) +TRANS(rotr_d, 64, gen_rrr, EXT_NONE, EXT_NONE, EXT_NONE, gen_rotr_d) +TRANS(slli_w, ALL, gen_rri_c, EXT_NONE, EXT_SIGN, tcg_gen_shli_tl) +TRANS(slli_d, 64, gen_rri_c, EXT_NONE, EXT_NONE, tcg_gen_shli_tl) +TRANS(srli_w, ALL, gen_rri_c, EXT_ZERO, EXT_SIGN, tcg_gen_shri_tl) +TRANS(srli_d, 64, gen_rri_c, EXT_NONE, EXT_NONE, tcg_gen_shri_tl) +TRANS(srai_d, 64, gen_rri_c, EXT_NONE, EXT_NONE, tcg_gen_sari_tl) +TRANS(rotri_w, 64, gen_rri_v, EXT_NONE, EXT_NONE, gen_rotr_w) +TRANS(rotri_d, 64, gen_rri_c, EXT_NONE, EXT_NONE, tcg_gen_rotri_tl) diff --git a/target/loongarch/tcg/insn_trans/trans_vec.c.inc b/target/loongarch/tcg/insn_trans/trans_vec.c.inc new file mode 100644 index 0000000000..92b1d22e28 --- /dev/null +++ b/target/loongarch/tcg/insn_trans/trans_vec.c.inc @@ -0,0 +1,5511 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +/* + * LoongArch vector translate functions + * Copyright (c) 2022-2023 Loongson Technology Corporation Limited + */ + +static bool check_vec(DisasContext *ctx, uint32_t oprsz) +{ + if ((oprsz == 16) && ((ctx->base.tb->flags & HW_FLAGS_EUEN_SXE) == 0)) { + generate_exception(ctx, EXCCODE_SXD); + return false; + } + + if ((oprsz == 32) && ((ctx->base.tb->flags & HW_FLAGS_EUEN_ASXE) == 0)) { + generate_exception(ctx, EXCCODE_ASXD); + return false; + } + + return true; +} + +static bool gen_vvvv_ptr_vl(DisasContext *ctx, arg_vvvv *a, uint32_t oprsz, + gen_helper_gvec_4_ptr *fn) +{ + if (!check_vec(ctx, oprsz)) { + return true; + } + + tcg_gen_gvec_4_ptr(vec_full_offset(a->vd), + vec_full_offset(a->vj), + vec_full_offset(a->vk), + vec_full_offset(a->va), + tcg_env, + oprsz, ctx->vl / 8, 0, fn); + return true; +} + +static bool gen_vvvv_ptr(DisasContext *ctx, arg_vvvv *a, + gen_helper_gvec_4_ptr *fn) +{ + return gen_vvvv_ptr_vl(ctx, a, 16, fn); +} + +static bool gen_xxxx_ptr(DisasContext *ctx, arg_vvvv *a, + gen_helper_gvec_4_ptr *fn) +{ + return gen_vvvv_ptr_vl(ctx, a, 32, fn); +} + +static bool gen_vvvv_vl(DisasContext *ctx, arg_vvvv *a, uint32_t oprsz, + gen_helper_gvec_4 *fn) +{ + if (!check_vec(ctx, oprsz)) { + return true; + } + + tcg_gen_gvec_4_ool(vec_full_offset(a->vd), + vec_full_offset(a->vj), + vec_full_offset(a->vk), + vec_full_offset(a->va), + oprsz, ctx->vl / 8, 0, fn); + return true; +} + +static bool gen_vvvv(DisasContext *ctx, arg_vvvv *a, + gen_helper_gvec_4 *fn) +{ + return gen_vvvv_vl(ctx, a, 16, fn); +} + +static bool gen_xxxx(DisasContext *ctx, arg_vvvv *a, + gen_helper_gvec_4 *fn) +{ + return gen_vvvv_vl(ctx, a, 32, fn); +} + +static bool gen_vvv_ptr_vl(DisasContext *ctx, arg_vvv *a, uint32_t oprsz, + gen_helper_gvec_3_ptr *fn) +{ + if (!check_vec(ctx, oprsz)) { + return true; + } + tcg_gen_gvec_3_ptr(vec_full_offset(a->vd), + vec_full_offset(a->vj), + vec_full_offset(a->vk), + tcg_env, + oprsz, ctx->vl / 8, 0, fn); + return true; +} + +static bool gen_vvv_ptr(DisasContext *ctx, arg_vvv *a, + gen_helper_gvec_3_ptr *fn) +{ + return gen_vvv_ptr_vl(ctx, a, 16, fn); +} + +static bool gen_xxx_ptr(DisasContext *ctx, arg_vvv *a, + gen_helper_gvec_3_ptr *fn) +{ + return gen_vvv_ptr_vl(ctx, a, 32, fn); +} + +static bool gen_vvv_vl(DisasContext *ctx, arg_vvv *a, uint32_t oprsz, + gen_helper_gvec_3 *fn) +{ + if (!check_vec(ctx, oprsz)) { + return true; + } + + tcg_gen_gvec_3_ool(vec_full_offset(a->vd), + vec_full_offset(a->vj), + vec_full_offset(a->vk), + oprsz, ctx->vl / 8, 0, fn); + return true; +} + +static bool gen_vvv(DisasContext *ctx, arg_vvv *a, gen_helper_gvec_3 *fn) +{ + return gen_vvv_vl(ctx, a, 16, fn); +} + +static bool gen_xxx(DisasContext *ctx, arg_vvv *a, gen_helper_gvec_3 *fn) +{ + return gen_vvv_vl(ctx, a, 32, fn); +} + +static bool gen_vv_ptr_vl(DisasContext *ctx, arg_vv *a, uint32_t oprsz, + gen_helper_gvec_2_ptr *fn) +{ + if (!check_vec(ctx, oprsz)) { + return true; + } + + tcg_gen_gvec_2_ptr(vec_full_offset(a->vd), + vec_full_offset(a->vj), + tcg_env, + oprsz, ctx->vl / 8, 0, fn); + return true; +} + +static bool gen_vv_ptr(DisasContext *ctx, arg_vv *a, + gen_helper_gvec_2_ptr *fn) +{ + return gen_vv_ptr_vl(ctx, a, 16, fn); +} + +static bool gen_xx_ptr(DisasContext *ctx, arg_vv *a, + gen_helper_gvec_2_ptr *fn) +{ + return gen_vv_ptr_vl(ctx, a, 32, fn); +} + +static bool gen_vv_vl(DisasContext *ctx, arg_vv *a, uint32_t oprsz, + gen_helper_gvec_2 *fn) +{ + if (!check_vec(ctx, oprsz)) { + return true; + } + + tcg_gen_gvec_2_ool(vec_full_offset(a->vd), + vec_full_offset(a->vj), + oprsz, ctx->vl / 8, 0, fn); + return true; +} + +static bool gen_vv(DisasContext *ctx, arg_vv *a, gen_helper_gvec_2 *fn) +{ + return gen_vv_vl(ctx, a, 16, fn); +} + +static bool gen_xx(DisasContext *ctx, arg_vv *a, gen_helper_gvec_2 *fn) +{ + return gen_vv_vl(ctx, a, 32, fn); +} + +static bool gen_vv_i_vl(DisasContext *ctx, arg_vv_i *a, uint32_t oprsz, + gen_helper_gvec_2i *fn) +{ + if (!check_vec(ctx, oprsz)) { + return true; + } + + tcg_gen_gvec_2i_ool(vec_full_offset(a->vd), + vec_full_offset(a->vj), + tcg_constant_i64(a->imm), + oprsz, ctx->vl / 8, 0, fn); + return true; +} + +static bool gen_vv_i(DisasContext *ctx, arg_vv_i *a, gen_helper_gvec_2i *fn) +{ + return gen_vv_i_vl(ctx, a, 16, fn); +} + +static bool gen_xx_i(DisasContext *ctx, arg_vv_i *a, gen_helper_gvec_2i *fn) +{ + return gen_vv_i_vl(ctx, a, 32, fn); +} + +static bool gen_cv_vl(DisasContext *ctx, arg_cv *a, uint32_t sz, + void (*func)(TCGv_ptr, TCGv_i32, TCGv_i32, TCGv_i32)) +{ + if (!check_vec(ctx, sz)) { + return true; + } + + TCGv_i32 vj = tcg_constant_i32(a->vj); + TCGv_i32 cd = tcg_constant_i32(a->cd); + TCGv_i32 oprsz = tcg_constant_i32(sz); + + func(tcg_env, oprsz, cd, vj); + return true; +} + +static bool gen_cv(DisasContext *ctx, arg_cv *a, + void (*func)(TCGv_ptr, TCGv_i32, TCGv_i32, TCGv_i32)) +{ + return gen_cv_vl(ctx, a, 16, func); +} + +static bool gen_cx(DisasContext *ctx, arg_cv *a, + void (*func)(TCGv_ptr, TCGv_i32, TCGv_i32, TCGv_i32)) +{ + return gen_cv_vl(ctx, a, 32, func); +} + +static bool gvec_vvv_vl(DisasContext *ctx, arg_vvv *a, + uint32_t oprsz, MemOp mop, + void (*func)(unsigned, uint32_t, uint32_t, + uint32_t, uint32_t, uint32_t)) +{ + uint32_t vd_ofs = vec_full_offset(a->vd); + uint32_t vj_ofs = vec_full_offset(a->vj); + uint32_t vk_ofs = vec_full_offset(a->vk); + + if (!check_vec(ctx, oprsz)) { + return true; + } + + func(mop, vd_ofs, vj_ofs, vk_ofs, oprsz, ctx->vl / 8); + return true; +} + +static bool gvec_vvv(DisasContext *ctx, arg_vvv *a, MemOp mop, + void (*func)(unsigned, uint32_t, uint32_t, + uint32_t, uint32_t, uint32_t)) +{ + return gvec_vvv_vl(ctx, a, 16, mop, func); +} + +static bool gvec_xxx(DisasContext *ctx, arg_vvv *a, MemOp mop, + void (*func)(unsigned, uint32_t, uint32_t, + uint32_t, uint32_t, uint32_t)) +{ + return gvec_vvv_vl(ctx, a, 32, mop, func); +} + +static bool gvec_vv_vl(DisasContext *ctx, arg_vv *a, + uint32_t oprsz, MemOp mop, + void (*func)(unsigned, uint32_t, uint32_t, + uint32_t, uint32_t)) +{ + uint32_t vd_ofs = vec_full_offset(a->vd); + uint32_t vj_ofs = vec_full_offset(a->vj); + + if (!check_vec(ctx, oprsz)) { + return true; + } + + func(mop, vd_ofs, vj_ofs, oprsz, ctx->vl / 8); + return true; +} + + +static bool gvec_vv(DisasContext *ctx, arg_vv *a, MemOp mop, + void (*func)(unsigned, uint32_t, uint32_t, + uint32_t, uint32_t)) +{ + return gvec_vv_vl(ctx, a, 16, mop, func); +} + +static bool gvec_xx(DisasContext *ctx, arg_vv *a, MemOp mop, + void (*func)(unsigned, uint32_t, uint32_t, + uint32_t, uint32_t)) +{ + return gvec_vv_vl(ctx, a, 32, mop, func); +} + +static bool gvec_vv_i_vl(DisasContext *ctx, arg_vv_i *a, + uint32_t oprsz, MemOp mop, + void (*func)(unsigned, uint32_t, uint32_t, + int64_t, uint32_t, uint32_t)) +{ + uint32_t vd_ofs = vec_full_offset(a->vd); + uint32_t vj_ofs = vec_full_offset(a->vj); + + if (!check_vec(ctx, oprsz)) { + return true; + } + + func(mop, vd_ofs, vj_ofs, a->imm, oprsz, ctx->vl / 8); + return true; +} + +static bool gvec_vv_i(DisasContext *ctx, arg_vv_i *a, MemOp mop, + void (*func)(unsigned, uint32_t, uint32_t, + int64_t, uint32_t, uint32_t)) +{ + return gvec_vv_i_vl(ctx, a, 16, mop, func); +} + +static bool gvec_xx_i(DisasContext *ctx, arg_vv_i *a, MemOp mop, + void (*func)(unsigned, uint32_t, uint32_t, + int64_t, uint32_t, uint32_t)) +{ + return gvec_vv_i_vl(ctx,a, 32, mop, func); +} + +static bool gvec_subi_vl(DisasContext *ctx, arg_vv_i *a, + uint32_t oprsz, MemOp mop) +{ + uint32_t vd_ofs = vec_full_offset(a->vd); + uint32_t vj_ofs = vec_full_offset(a->vj); + + if (!check_vec(ctx, oprsz)) { + return true; + } + + tcg_gen_gvec_addi(mop, vd_ofs, vj_ofs, -a->imm, oprsz, ctx->vl / 8); + return true; +} + +static bool gvec_subi(DisasContext *ctx, arg_vv_i *a, MemOp mop) +{ + return gvec_subi_vl(ctx, a, 16, mop); +} + +static bool gvec_xsubi(DisasContext *ctx, arg_vv_i *a, MemOp mop) +{ + return gvec_subi_vl(ctx, a, 32, mop); +} + +TRANS(vadd_b, LSX, gvec_vvv, MO_8, tcg_gen_gvec_add) +TRANS(vadd_h, LSX, gvec_vvv, MO_16, tcg_gen_gvec_add) +TRANS(vadd_w, LSX, gvec_vvv, MO_32, tcg_gen_gvec_add) +TRANS(vadd_d, LSX, gvec_vvv, MO_64, tcg_gen_gvec_add) +TRANS(xvadd_b, LASX, gvec_xxx, MO_8, tcg_gen_gvec_add) +TRANS(xvadd_h, LASX, gvec_xxx, MO_16, tcg_gen_gvec_add) +TRANS(xvadd_w, LASX, gvec_xxx, MO_32, tcg_gen_gvec_add) +TRANS(xvadd_d, LASX, gvec_xxx, MO_64, tcg_gen_gvec_add) + +static bool gen_vaddsub_q_vl(DisasContext *ctx, arg_vvv *a, uint32_t oprsz, + void (*func)(TCGv_i64, TCGv_i64, TCGv_i64, + TCGv_i64, TCGv_i64, TCGv_i64)) +{ + int i; + TCGv_i64 rh, rl, ah, al, bh, bl; + + if (!check_vec(ctx, oprsz)) { + return true; + } + + rh = tcg_temp_new_i64(); + rl = tcg_temp_new_i64(); + ah = tcg_temp_new_i64(); + al = tcg_temp_new_i64(); + bh = tcg_temp_new_i64(); + bl = tcg_temp_new_i64(); + + for (i = 0; i < oprsz / 16; i++) { + get_vreg64(ah, a->vj, 1 + i * 2); + get_vreg64(al, a->vj, i * 2); + get_vreg64(bh, a->vk, 1 + i * 2); + get_vreg64(bl, a->vk, i * 2); + + func(rl, rh, al, ah, bl, bh); + + set_vreg64(rh, a->vd, 1 + i * 2); + set_vreg64(rl, a->vd, i * 2); + } + return true; +} + +static bool gen_vaddsub_q(DisasContext *ctx, arg_vvv *a, + void (*func)(TCGv_i64, TCGv_i64, TCGv_i64, + TCGv_i64, TCGv_i64, TCGv_i64)) +{ + return gen_vaddsub_q_vl(ctx, a, 16, func); +} + +static bool gen_xvaddsub_q(DisasContext *ctx, arg_vvv *a, + void (*func)(TCGv_i64, TCGv_i64, TCGv_i64, + TCGv_i64, TCGv_i64, TCGv_i64)) +{ + return gen_vaddsub_q_vl(ctx, a, 32, func); +} + +TRANS(vsub_b, LSX, gvec_vvv, MO_8, tcg_gen_gvec_sub) +TRANS(vsub_h, LSX, gvec_vvv, MO_16, tcg_gen_gvec_sub) +TRANS(vsub_w, LSX, gvec_vvv, MO_32, tcg_gen_gvec_sub) +TRANS(vsub_d, LSX, gvec_vvv, MO_64, tcg_gen_gvec_sub) +TRANS(xvsub_b, LASX, gvec_xxx, MO_8, tcg_gen_gvec_sub) +TRANS(xvsub_h, LASX, gvec_xxx, MO_16, tcg_gen_gvec_sub) +TRANS(xvsub_w, LASX, gvec_xxx, MO_32, tcg_gen_gvec_sub) +TRANS(xvsub_d, LASX, gvec_xxx, MO_64, tcg_gen_gvec_sub) + +TRANS(vadd_q, LSX, gen_vaddsub_q, tcg_gen_add2_i64) +TRANS(vsub_q, LSX, gen_vaddsub_q, tcg_gen_sub2_i64) +TRANS(xvadd_q, LASX, gen_xvaddsub_q, tcg_gen_add2_i64) +TRANS(xvsub_q, LASX, gen_xvaddsub_q, tcg_gen_sub2_i64) + +TRANS(vaddi_bu, LSX, gvec_vv_i, MO_8, tcg_gen_gvec_addi) +TRANS(vaddi_hu, LSX, gvec_vv_i, MO_16, tcg_gen_gvec_addi) +TRANS(vaddi_wu, LSX, gvec_vv_i, MO_32, tcg_gen_gvec_addi) +TRANS(vaddi_du, LSX, gvec_vv_i, MO_64, tcg_gen_gvec_addi) +TRANS(vsubi_bu, LSX, gvec_subi, MO_8) +TRANS(vsubi_hu, LSX, gvec_subi, MO_16) +TRANS(vsubi_wu, LSX, gvec_subi, MO_32) +TRANS(vsubi_du, LSX, gvec_subi, MO_64) +TRANS(xvaddi_bu, LASX, gvec_xx_i, MO_8, tcg_gen_gvec_addi) +TRANS(xvaddi_hu, LASX, gvec_xx_i, MO_16, tcg_gen_gvec_addi) +TRANS(xvaddi_wu, LASX, gvec_xx_i, MO_32, tcg_gen_gvec_addi) +TRANS(xvaddi_du, LASX, gvec_xx_i, MO_64, tcg_gen_gvec_addi) +TRANS(xvsubi_bu, LASX, gvec_xsubi, MO_8) +TRANS(xvsubi_hu, LASX, gvec_xsubi, MO_16) +TRANS(xvsubi_wu, LASX, gvec_xsubi, MO_32) +TRANS(xvsubi_du, LASX, gvec_xsubi, MO_64) + +TRANS(vneg_b, LSX, gvec_vv, MO_8, tcg_gen_gvec_neg) +TRANS(vneg_h, LSX, gvec_vv, MO_16, tcg_gen_gvec_neg) +TRANS(vneg_w, LSX, gvec_vv, MO_32, tcg_gen_gvec_neg) +TRANS(vneg_d, LSX, gvec_vv, MO_64, tcg_gen_gvec_neg) +TRANS(xvneg_b, LASX, gvec_xx, MO_8, tcg_gen_gvec_neg) +TRANS(xvneg_h, LASX, gvec_xx, MO_16, tcg_gen_gvec_neg) +TRANS(xvneg_w, LASX, gvec_xx, MO_32, tcg_gen_gvec_neg) +TRANS(xvneg_d, LASX, gvec_xx, MO_64, tcg_gen_gvec_neg) + +TRANS(vsadd_b, LSX, gvec_vvv, MO_8, tcg_gen_gvec_ssadd) +TRANS(vsadd_h, LSX, gvec_vvv, MO_16, tcg_gen_gvec_ssadd) +TRANS(vsadd_w, LSX, gvec_vvv, MO_32, tcg_gen_gvec_ssadd) +TRANS(vsadd_d, LSX, gvec_vvv, MO_64, tcg_gen_gvec_ssadd) +TRANS(vsadd_bu, LSX, gvec_vvv, MO_8, tcg_gen_gvec_usadd) +TRANS(vsadd_hu, LSX, gvec_vvv, MO_16, tcg_gen_gvec_usadd) +TRANS(vsadd_wu, LSX, gvec_vvv, MO_32, tcg_gen_gvec_usadd) +TRANS(vsadd_du, LSX, gvec_vvv, MO_64, tcg_gen_gvec_usadd) +TRANS(vssub_b, LSX, gvec_vvv, MO_8, tcg_gen_gvec_sssub) +TRANS(vssub_h, LSX, gvec_vvv, MO_16, tcg_gen_gvec_sssub) +TRANS(vssub_w, LSX, gvec_vvv, MO_32, tcg_gen_gvec_sssub) +TRANS(vssub_d, LSX, gvec_vvv, MO_64, tcg_gen_gvec_sssub) +TRANS(vssub_bu, LSX, gvec_vvv, MO_8, tcg_gen_gvec_ussub) +TRANS(vssub_hu, LSX, gvec_vvv, MO_16, tcg_gen_gvec_ussub) +TRANS(vssub_wu, LSX, gvec_vvv, MO_32, tcg_gen_gvec_ussub) +TRANS(vssub_du, LSX, gvec_vvv, MO_64, tcg_gen_gvec_ussub) + +TRANS(xvsadd_b, LASX, gvec_xxx, MO_8, tcg_gen_gvec_ssadd) +TRANS(xvsadd_h, LASX, gvec_xxx, MO_16, tcg_gen_gvec_ssadd) +TRANS(xvsadd_w, LASX, gvec_xxx, MO_32, tcg_gen_gvec_ssadd) +TRANS(xvsadd_d, LASX, gvec_xxx, MO_64, tcg_gen_gvec_ssadd) +TRANS(xvsadd_bu, LASX, gvec_xxx, MO_8, tcg_gen_gvec_usadd) +TRANS(xvsadd_hu, LASX, gvec_xxx, MO_16, tcg_gen_gvec_usadd) +TRANS(xvsadd_wu, LASX, gvec_xxx, MO_32, tcg_gen_gvec_usadd) +TRANS(xvsadd_du, LASX, gvec_xxx, MO_64, tcg_gen_gvec_usadd) +TRANS(xvssub_b, LASX, gvec_xxx, MO_8, tcg_gen_gvec_sssub) +TRANS(xvssub_h, LASX, gvec_xxx, MO_16, tcg_gen_gvec_sssub) +TRANS(xvssub_w, LASX, gvec_xxx, MO_32, tcg_gen_gvec_sssub) +TRANS(xvssub_d, LASX, gvec_xxx, MO_64, tcg_gen_gvec_sssub) +TRANS(xvssub_bu, LASX, gvec_xxx, MO_8, tcg_gen_gvec_ussub) +TRANS(xvssub_hu, LASX, gvec_xxx, MO_16, tcg_gen_gvec_ussub) +TRANS(xvssub_wu, LASX, gvec_xxx, MO_32, tcg_gen_gvec_ussub) +TRANS(xvssub_du, LASX, gvec_xxx, MO_64, tcg_gen_gvec_ussub) + +TRANS(vhaddw_h_b, LSX, gen_vvv, gen_helper_vhaddw_h_b) +TRANS(vhaddw_w_h, LSX, gen_vvv, gen_helper_vhaddw_w_h) +TRANS(vhaddw_d_w, LSX, gen_vvv, gen_helper_vhaddw_d_w) +TRANS(vhaddw_q_d, LSX, gen_vvv, gen_helper_vhaddw_q_d) +TRANS(vhaddw_hu_bu, LSX, gen_vvv, gen_helper_vhaddw_hu_bu) +TRANS(vhaddw_wu_hu, LSX, gen_vvv, gen_helper_vhaddw_wu_hu) +TRANS(vhaddw_du_wu, LSX, gen_vvv, gen_helper_vhaddw_du_wu) +TRANS(vhaddw_qu_du, LSX, gen_vvv, gen_helper_vhaddw_qu_du) +TRANS(vhsubw_h_b, LSX, gen_vvv, gen_helper_vhsubw_h_b) +TRANS(vhsubw_w_h, LSX, gen_vvv, gen_helper_vhsubw_w_h) +TRANS(vhsubw_d_w, LSX, gen_vvv, gen_helper_vhsubw_d_w) +TRANS(vhsubw_q_d, LSX, gen_vvv, gen_helper_vhsubw_q_d) +TRANS(vhsubw_hu_bu, LSX, gen_vvv, gen_helper_vhsubw_hu_bu) +TRANS(vhsubw_wu_hu, LSX, gen_vvv, gen_helper_vhsubw_wu_hu) +TRANS(vhsubw_du_wu, LSX, gen_vvv, gen_helper_vhsubw_du_wu) +TRANS(vhsubw_qu_du, LSX, gen_vvv, gen_helper_vhsubw_qu_du) + +TRANS(xvhaddw_h_b, LASX, gen_xxx, gen_helper_vhaddw_h_b) +TRANS(xvhaddw_w_h, LASX, gen_xxx, gen_helper_vhaddw_w_h) +TRANS(xvhaddw_d_w, LASX, gen_xxx, gen_helper_vhaddw_d_w) +TRANS(xvhaddw_q_d, LASX, gen_xxx, gen_helper_vhaddw_q_d) +TRANS(xvhaddw_hu_bu, LASX, gen_xxx, gen_helper_vhaddw_hu_bu) +TRANS(xvhaddw_wu_hu, LASX, gen_xxx, gen_helper_vhaddw_wu_hu) +TRANS(xvhaddw_du_wu, LASX, gen_xxx, gen_helper_vhaddw_du_wu) +TRANS(xvhaddw_qu_du, LASX, gen_xxx, gen_helper_vhaddw_qu_du) +TRANS(xvhsubw_h_b, LASX, gen_xxx, gen_helper_vhsubw_h_b) +TRANS(xvhsubw_w_h, LASX, gen_xxx, gen_helper_vhsubw_w_h) +TRANS(xvhsubw_d_w, LASX, gen_xxx, gen_helper_vhsubw_d_w) +TRANS(xvhsubw_q_d, LASX, gen_xxx, gen_helper_vhsubw_q_d) +TRANS(xvhsubw_hu_bu, LASX, gen_xxx, gen_helper_vhsubw_hu_bu) +TRANS(xvhsubw_wu_hu, LASX, gen_xxx, gen_helper_vhsubw_wu_hu) +TRANS(xvhsubw_du_wu, LASX, gen_xxx, gen_helper_vhsubw_du_wu) +TRANS(xvhsubw_qu_du, LASX, gen_xxx, gen_helper_vhsubw_qu_du) + +static void gen_vaddwev_s(unsigned vece, TCGv_vec t, TCGv_vec a, TCGv_vec b) +{ + TCGv_vec t1, t2; + + int halfbits = 4 << vece; + + t1 = tcg_temp_new_vec_matching(a); + t2 = tcg_temp_new_vec_matching(b); + + /* Sign-extend the even elements from a */ + tcg_gen_shli_vec(vece, t1, a, halfbits); + tcg_gen_sari_vec(vece, t1, t1, halfbits); + + /* Sign-extend the even elements from b */ + tcg_gen_shli_vec(vece, t2, b, halfbits); + tcg_gen_sari_vec(vece, t2, t2, halfbits); + + tcg_gen_add_vec(vece, t, t1, t2); +} + +static void gen_vaddwev_w_h(TCGv_i32 t, TCGv_i32 a, TCGv_i32 b) +{ + TCGv_i32 t1, t2; + + t1 = tcg_temp_new_i32(); + t2 = tcg_temp_new_i32(); + tcg_gen_ext16s_i32(t1, a); + tcg_gen_ext16s_i32(t2, b); + tcg_gen_add_i32(t, t1, t2); +} + +static void gen_vaddwev_d_w(TCGv_i64 t, TCGv_i64 a, TCGv_i64 b) +{ + TCGv_i64 t1, t2; + + t1 = tcg_temp_new_i64(); + t2 = tcg_temp_new_i64(); + tcg_gen_ext32s_i64(t1, a); + tcg_gen_ext32s_i64(t2, b); + tcg_gen_add_i64(t, t1, t2); +} + +static void do_vaddwev_s(unsigned vece, uint32_t vd_ofs, uint32_t vj_ofs, + uint32_t vk_ofs, uint32_t oprsz, uint32_t maxsz) +{ + static const TCGOpcode vecop_list[] = { + INDEX_op_shli_vec, INDEX_op_sari_vec, INDEX_op_add_vec, 0 + }; + static const GVecGen3 op[4] = { + { + .fniv = gen_vaddwev_s, + .fno = gen_helper_vaddwev_h_b, + .opt_opc = vecop_list, + .vece = MO_16 + }, + { + .fni4 = gen_vaddwev_w_h, + .fniv = gen_vaddwev_s, + .fno = gen_helper_vaddwev_w_h, + .opt_opc = vecop_list, + .vece = MO_32 + }, + { + .fni8 = gen_vaddwev_d_w, + .fniv = gen_vaddwev_s, + .fno = gen_helper_vaddwev_d_w, + .opt_opc = vecop_list, + .vece = MO_64 + }, + { + .fno = gen_helper_vaddwev_q_d, + .vece = MO_128 + }, + }; + + tcg_gen_gvec_3(vd_ofs, vj_ofs, vk_ofs, oprsz, maxsz, &op[vece]); +} + +TRANS(vaddwev_h_b, LSX, gvec_vvv, MO_8, do_vaddwev_s) +TRANS(vaddwev_w_h, LSX, gvec_vvv, MO_16, do_vaddwev_s) +TRANS(vaddwev_d_w, LSX, gvec_vvv, MO_32, do_vaddwev_s) +TRANS(vaddwev_q_d, LSX, gvec_vvv, MO_64, do_vaddwev_s) +TRANS(xvaddwev_h_b, LASX, gvec_xxx, MO_8, do_vaddwev_s) +TRANS(xvaddwev_w_h, LASX, gvec_xxx, MO_16, do_vaddwev_s) +TRANS(xvaddwev_d_w, LASX, gvec_xxx, MO_32, do_vaddwev_s) +TRANS(xvaddwev_q_d, LASX, gvec_xxx, MO_64, do_vaddwev_s) + +static void gen_vaddwod_w_h(TCGv_i32 t, TCGv_i32 a, TCGv_i32 b) +{ + TCGv_i32 t1, t2; + + t1 = tcg_temp_new_i32(); + t2 = tcg_temp_new_i32(); + tcg_gen_sari_i32(t1, a, 16); + tcg_gen_sari_i32(t2, b, 16); + tcg_gen_add_i32(t, t1, t2); +} + +static void gen_vaddwod_d_w(TCGv_i64 t, TCGv_i64 a, TCGv_i64 b) +{ + TCGv_i64 t1, t2; + + t1 = tcg_temp_new_i64(); + t2 = tcg_temp_new_i64(); + tcg_gen_sari_i64(t1, a, 32); + tcg_gen_sari_i64(t2, b, 32); + tcg_gen_add_i64(t, t1, t2); +} + +static void gen_vaddwod_s(unsigned vece, TCGv_vec t, TCGv_vec a, TCGv_vec b) +{ + TCGv_vec t1, t2; + + int halfbits = 4 << vece; + + t1 = tcg_temp_new_vec_matching(a); + t2 = tcg_temp_new_vec_matching(b); + + /* Sign-extend the odd elements for vector */ + tcg_gen_sari_vec(vece, t1, a, halfbits); + tcg_gen_sari_vec(vece, t2, b, halfbits); + + tcg_gen_add_vec(vece, t, t1, t2); +} + +static void do_vaddwod_s(unsigned vece, uint32_t vd_ofs, uint32_t vj_ofs, + uint32_t vk_ofs, uint32_t oprsz, uint32_t maxsz) +{ + static const TCGOpcode vecop_list[] = { + INDEX_op_sari_vec, INDEX_op_add_vec, 0 + }; + static const GVecGen3 op[4] = { + { + .fniv = gen_vaddwod_s, + .fno = gen_helper_vaddwod_h_b, + .opt_opc = vecop_list, + .vece = MO_16 + }, + { + .fni4 = gen_vaddwod_w_h, + .fniv = gen_vaddwod_s, + .fno = gen_helper_vaddwod_w_h, + .opt_opc = vecop_list, + .vece = MO_32 + }, + { + .fni8 = gen_vaddwod_d_w, + .fniv = gen_vaddwod_s, + .fno = gen_helper_vaddwod_d_w, + .opt_opc = vecop_list, + .vece = MO_64 + }, + { + .fno = gen_helper_vaddwod_q_d, + .vece = MO_128 + }, + }; + + tcg_gen_gvec_3(vd_ofs, vj_ofs, vk_ofs, oprsz, maxsz, &op[vece]); +} + +TRANS(vaddwod_h_b, LSX, gvec_vvv, MO_8, do_vaddwod_s) +TRANS(vaddwod_w_h, LSX, gvec_vvv, MO_16, do_vaddwod_s) +TRANS(vaddwod_d_w, LSX, gvec_vvv, MO_32, do_vaddwod_s) +TRANS(vaddwod_q_d, LSX, gvec_vvv, MO_64, do_vaddwod_s) +TRANS(xvaddwod_h_b, LASX, gvec_xxx, MO_8, do_vaddwod_s) +TRANS(xvaddwod_w_h, LASX, gvec_xxx, MO_16, do_vaddwod_s) +TRANS(xvaddwod_d_w, LASX, gvec_xxx, MO_32, do_vaddwod_s) +TRANS(xvaddwod_q_d, LASX, gvec_xxx, MO_64, do_vaddwod_s) + + +static void gen_vsubwev_s(unsigned vece, TCGv_vec t, TCGv_vec a, TCGv_vec b) +{ + TCGv_vec t1, t2; + + int halfbits = 4 << vece; + + t1 = tcg_temp_new_vec_matching(a); + t2 = tcg_temp_new_vec_matching(b); + + /* Sign-extend the even elements from a */ + tcg_gen_shli_vec(vece, t1, a, halfbits); + tcg_gen_sari_vec(vece, t1, t1, halfbits); + + /* Sign-extend the even elements from b */ + tcg_gen_shli_vec(vece, t2, b, halfbits); + tcg_gen_sari_vec(vece, t2, t2, halfbits); + + tcg_gen_sub_vec(vece, t, t1, t2); +} + +static void gen_vsubwev_w_h(TCGv_i32 t, TCGv_i32 a, TCGv_i32 b) +{ + TCGv_i32 t1, t2; + + t1 = tcg_temp_new_i32(); + t2 = tcg_temp_new_i32(); + tcg_gen_ext16s_i32(t1, a); + tcg_gen_ext16s_i32(t2, b); + tcg_gen_sub_i32(t, t1, t2); +} + +static void gen_vsubwev_d_w(TCGv_i64 t, TCGv_i64 a, TCGv_i64 b) +{ + TCGv_i64 t1, t2; + + t1 = tcg_temp_new_i64(); + t2 = tcg_temp_new_i64(); + tcg_gen_ext32s_i64(t1, a); + tcg_gen_ext32s_i64(t2, b); + tcg_gen_sub_i64(t, t1, t2); +} + +static void do_vsubwev_s(unsigned vece, uint32_t vd_ofs, uint32_t vj_ofs, + uint32_t vk_ofs, uint32_t oprsz, uint32_t maxsz) +{ + static const TCGOpcode vecop_list[] = { + INDEX_op_shli_vec, INDEX_op_sari_vec, INDEX_op_sub_vec, 0 + }; + static const GVecGen3 op[4] = { + { + .fniv = gen_vsubwev_s, + .fno = gen_helper_vsubwev_h_b, + .opt_opc = vecop_list, + .vece = MO_16 + }, + { + .fni4 = gen_vsubwev_w_h, + .fniv = gen_vsubwev_s, + .fno = gen_helper_vsubwev_w_h, + .opt_opc = vecop_list, + .vece = MO_32 + }, + { + .fni8 = gen_vsubwev_d_w, + .fniv = gen_vsubwev_s, + .fno = gen_helper_vsubwev_d_w, + .opt_opc = vecop_list, + .vece = MO_64 + }, + { + .fno = gen_helper_vsubwev_q_d, + .vece = MO_128 + }, + }; + + tcg_gen_gvec_3(vd_ofs, vj_ofs, vk_ofs, oprsz, maxsz, &op[vece]); +} + +TRANS(vsubwev_h_b, LSX, gvec_vvv, MO_8, do_vsubwev_s) +TRANS(vsubwev_w_h, LSX, gvec_vvv, MO_16, do_vsubwev_s) +TRANS(vsubwev_d_w, LSX, gvec_vvv, MO_32, do_vsubwev_s) +TRANS(vsubwev_q_d, LSX, gvec_vvv, MO_64, do_vsubwev_s) +TRANS(xvsubwev_h_b, LASX, gvec_xxx, MO_8, do_vsubwev_s) +TRANS(xvsubwev_w_h, LASX, gvec_xxx, MO_16, do_vsubwev_s) +TRANS(xvsubwev_d_w, LASX, gvec_xxx, MO_32, do_vsubwev_s) +TRANS(xvsubwev_q_d, LASX, gvec_xxx, MO_64, do_vsubwev_s) + +static void gen_vsubwod_s(unsigned vece, TCGv_vec t, TCGv_vec a, TCGv_vec b) +{ + TCGv_vec t1, t2; + + int halfbits = 4 << vece; + + t1 = tcg_temp_new_vec_matching(a); + t2 = tcg_temp_new_vec_matching(b); + + /* Sign-extend the odd elements for vector */ + tcg_gen_sari_vec(vece, t1, a, halfbits); + tcg_gen_sari_vec(vece, t2, b, halfbits); + + tcg_gen_sub_vec(vece, t, t1, t2); +} + +static void gen_vsubwod_w_h(TCGv_i32 t, TCGv_i32 a, TCGv_i32 b) +{ + TCGv_i32 t1, t2; + + t1 = tcg_temp_new_i32(); + t2 = tcg_temp_new_i32(); + tcg_gen_sari_i32(t1, a, 16); + tcg_gen_sari_i32(t2, b, 16); + tcg_gen_sub_i32(t, t1, t2); +} + +static void gen_vsubwod_d_w(TCGv_i64 t, TCGv_i64 a, TCGv_i64 b) +{ + TCGv_i64 t1, t2; + + t1 = tcg_temp_new_i64(); + t2 = tcg_temp_new_i64(); + tcg_gen_sari_i64(t1, a, 32); + tcg_gen_sari_i64(t2, b, 32); + tcg_gen_sub_i64(t, t1, t2); +} + +static void do_vsubwod_s(unsigned vece, uint32_t vd_ofs, uint32_t vj_ofs, + uint32_t vk_ofs, uint32_t oprsz, uint32_t maxsz) +{ + static const TCGOpcode vecop_list[] = { + INDEX_op_sari_vec, INDEX_op_sub_vec, 0 + }; + static const GVecGen3 op[4] = { + { + .fniv = gen_vsubwod_s, + .fno = gen_helper_vsubwod_h_b, + .opt_opc = vecop_list, + .vece = MO_16 + }, + { + .fni4 = gen_vsubwod_w_h, + .fniv = gen_vsubwod_s, + .fno = gen_helper_vsubwod_w_h, + .opt_opc = vecop_list, + .vece = MO_32 + }, + { + .fni8 = gen_vsubwod_d_w, + .fniv = gen_vsubwod_s, + .fno = gen_helper_vsubwod_d_w, + .opt_opc = vecop_list, + .vece = MO_64 + }, + { + .fno = gen_helper_vsubwod_q_d, + .vece = MO_128 + }, + }; + + tcg_gen_gvec_3(vd_ofs, vj_ofs, vk_ofs, oprsz, maxsz, &op[vece]); +} + +TRANS(vsubwod_h_b, LSX, gvec_vvv, MO_8, do_vsubwod_s) +TRANS(vsubwod_w_h, LSX, gvec_vvv, MO_16, do_vsubwod_s) +TRANS(vsubwod_d_w, LSX, gvec_vvv, MO_32, do_vsubwod_s) +TRANS(vsubwod_q_d, LSX, gvec_vvv, MO_64, do_vsubwod_s) +TRANS(xvsubwod_h_b, LASX, gvec_xxx, MO_8, do_vsubwod_s) +TRANS(xvsubwod_w_h, LASX, gvec_xxx, MO_16, do_vsubwod_s) +TRANS(xvsubwod_d_w, LASX, gvec_xxx, MO_32, do_vsubwod_s) +TRANS(xvsubwod_q_d, LASX, gvec_xxx, MO_64, do_vsubwod_s) + +static void gen_vaddwev_u(unsigned vece, TCGv_vec t, TCGv_vec a, TCGv_vec b) +{ + TCGv_vec t1, t2, t3; + + t1 = tcg_temp_new_vec_matching(a); + t2 = tcg_temp_new_vec_matching(b); + t3 = tcg_constant_vec_matching(t, vece, MAKE_64BIT_MASK(0, 4 << vece)); + tcg_gen_and_vec(vece, t1, a, t3); + tcg_gen_and_vec(vece, t2, b, t3); + tcg_gen_add_vec(vece, t, t1, t2); +} + +static void gen_vaddwev_w_hu(TCGv_i32 t, TCGv_i32 a, TCGv_i32 b) +{ + TCGv_i32 t1, t2; + + t1 = tcg_temp_new_i32(); + t2 = tcg_temp_new_i32(); + tcg_gen_ext16u_i32(t1, a); + tcg_gen_ext16u_i32(t2, b); + tcg_gen_add_i32(t, t1, t2); +} + +static void gen_vaddwev_d_wu(TCGv_i64 t, TCGv_i64 a, TCGv_i64 b) +{ + TCGv_i64 t1, t2; + + t1 = tcg_temp_new_i64(); + t2 = tcg_temp_new_i64(); + tcg_gen_ext32u_i64(t1, a); + tcg_gen_ext32u_i64(t2, b); + tcg_gen_add_i64(t, t1, t2); +} + +static void do_vaddwev_u(unsigned vece, uint32_t vd_ofs, uint32_t vj_ofs, + uint32_t vk_ofs, uint32_t oprsz, uint32_t maxsz) +{ + static const TCGOpcode vecop_list[] = { + INDEX_op_add_vec, 0 + }; + static const GVecGen3 op[4] = { + { + .fniv = gen_vaddwev_u, + .fno = gen_helper_vaddwev_h_bu, + .opt_opc = vecop_list, + .vece = MO_16 + }, + { + .fni4 = gen_vaddwev_w_hu, + .fniv = gen_vaddwev_u, + .fno = gen_helper_vaddwev_w_hu, + .opt_opc = vecop_list, + .vece = MO_32 + }, + { + .fni8 = gen_vaddwev_d_wu, + .fniv = gen_vaddwev_u, + .fno = gen_helper_vaddwev_d_wu, + .opt_opc = vecop_list, + .vece = MO_64 + }, + { + .fno = gen_helper_vaddwev_q_du, + .vece = MO_128 + }, + }; + + tcg_gen_gvec_3(vd_ofs, vj_ofs, vk_ofs, oprsz, maxsz, &op[vece]); +} + +TRANS(vaddwev_h_bu, LSX, gvec_vvv, MO_8, do_vaddwev_u) +TRANS(vaddwev_w_hu, LSX, gvec_vvv, MO_16, do_vaddwev_u) +TRANS(vaddwev_d_wu, LSX, gvec_vvv, MO_32, do_vaddwev_u) +TRANS(vaddwev_q_du, LSX, gvec_vvv, MO_64, do_vaddwev_u) +TRANS(xvaddwev_h_bu, LASX, gvec_xxx, MO_8, do_vaddwev_u) +TRANS(xvaddwev_w_hu, LASX, gvec_xxx, MO_16, do_vaddwev_u) +TRANS(xvaddwev_d_wu, LASX, gvec_xxx, MO_32, do_vaddwev_u) +TRANS(xvaddwev_q_du, LASX, gvec_xxx, MO_64, do_vaddwev_u) + +static void gen_vaddwod_u(unsigned vece, TCGv_vec t, TCGv_vec a, TCGv_vec b) +{ + TCGv_vec t1, t2; + + int halfbits = 4 << vece; + + t1 = tcg_temp_new_vec_matching(a); + t2 = tcg_temp_new_vec_matching(b); + + /* Zero-extend the odd elements for vector */ + tcg_gen_shri_vec(vece, t1, a, halfbits); + tcg_gen_shri_vec(vece, t2, b, halfbits); + + tcg_gen_add_vec(vece, t, t1, t2); +} + +static void gen_vaddwod_w_hu(TCGv_i32 t, TCGv_i32 a, TCGv_i32 b) +{ + TCGv_i32 t1, t2; + + t1 = tcg_temp_new_i32(); + t2 = tcg_temp_new_i32(); + tcg_gen_shri_i32(t1, a, 16); + tcg_gen_shri_i32(t2, b, 16); + tcg_gen_add_i32(t, t1, t2); +} + +static void gen_vaddwod_d_wu(TCGv_i64 t, TCGv_i64 a, TCGv_i64 b) +{ + TCGv_i64 t1, t2; + + t1 = tcg_temp_new_i64(); + t2 = tcg_temp_new_i64(); + tcg_gen_shri_i64(t1, a, 32); + tcg_gen_shri_i64(t2, b, 32); + tcg_gen_add_i64(t, t1, t2); +} + +static void do_vaddwod_u(unsigned vece, uint32_t vd_ofs, uint32_t vj_ofs, + uint32_t vk_ofs, uint32_t oprsz, uint32_t maxsz) +{ + static const TCGOpcode vecop_list[] = { + INDEX_op_shri_vec, INDEX_op_add_vec, 0 + }; + static const GVecGen3 op[4] = { + { + .fniv = gen_vaddwod_u, + .fno = gen_helper_vaddwod_h_bu, + .opt_opc = vecop_list, + .vece = MO_16 + }, + { + .fni4 = gen_vaddwod_w_hu, + .fniv = gen_vaddwod_u, + .fno = gen_helper_vaddwod_w_hu, + .opt_opc = vecop_list, + .vece = MO_32 + }, + { + .fni8 = gen_vaddwod_d_wu, + .fniv = gen_vaddwod_u, + .fno = gen_helper_vaddwod_d_wu, + .opt_opc = vecop_list, + .vece = MO_64 + }, + { + .fno = gen_helper_vaddwod_q_du, + .vece = MO_128 + }, + }; + + tcg_gen_gvec_3(vd_ofs, vj_ofs, vk_ofs, oprsz, maxsz, &op[vece]); +} + +TRANS(vaddwod_h_bu, LSX, gvec_vvv, MO_8, do_vaddwod_u) +TRANS(vaddwod_w_hu, LSX, gvec_vvv, MO_16, do_vaddwod_u) +TRANS(vaddwod_d_wu, LSX, gvec_vvv, MO_32, do_vaddwod_u) +TRANS(vaddwod_q_du, LSX, gvec_vvv, MO_64, do_vaddwod_u) +TRANS(xvaddwod_h_bu, LASX, gvec_xxx, MO_8, do_vaddwod_u) +TRANS(xvaddwod_w_hu, LASX, gvec_xxx, MO_16, do_vaddwod_u) +TRANS(xvaddwod_d_wu, LASX, gvec_xxx, MO_32, do_vaddwod_u) +TRANS(xvaddwod_q_du, LASX, gvec_xxx, MO_64, do_vaddwod_u) + +static void gen_vsubwev_u(unsigned vece, TCGv_vec t, TCGv_vec a, TCGv_vec b) +{ + TCGv_vec t1, t2, t3; + + t1 = tcg_temp_new_vec_matching(a); + t2 = tcg_temp_new_vec_matching(b); + t3 = tcg_constant_vec_matching(t, vece, MAKE_64BIT_MASK(0, 4 << vece)); + tcg_gen_and_vec(vece, t1, a, t3); + tcg_gen_and_vec(vece, t2, b, t3); + tcg_gen_sub_vec(vece, t, t1, t2); +} + +static void gen_vsubwev_w_hu(TCGv_i32 t, TCGv_i32 a, TCGv_i32 b) +{ + TCGv_i32 t1, t2; + + t1 = tcg_temp_new_i32(); + t2 = tcg_temp_new_i32(); + tcg_gen_ext16u_i32(t1, a); + tcg_gen_ext16u_i32(t2, b); + tcg_gen_sub_i32(t, t1, t2); +} + +static void gen_vsubwev_d_wu(TCGv_i64 t, TCGv_i64 a, TCGv_i64 b) +{ + TCGv_i64 t1, t2; + + t1 = tcg_temp_new_i64(); + t2 = tcg_temp_new_i64(); + tcg_gen_ext32u_i64(t1, a); + tcg_gen_ext32u_i64(t2, b); + tcg_gen_sub_i64(t, t1, t2); +} + +static void do_vsubwev_u(unsigned vece, uint32_t vd_ofs, uint32_t vj_ofs, + uint32_t vk_ofs, uint32_t oprsz, uint32_t maxsz) +{ + static const TCGOpcode vecop_list[] = { + INDEX_op_sub_vec, 0 + }; + static const GVecGen3 op[4] = { + { + .fniv = gen_vsubwev_u, + .fno = gen_helper_vsubwev_h_bu, + .opt_opc = vecop_list, + .vece = MO_16 + }, + { + .fni4 = gen_vsubwev_w_hu, + .fniv = gen_vsubwev_u, + .fno = gen_helper_vsubwev_w_hu, + .opt_opc = vecop_list, + .vece = MO_32 + }, + { + .fni8 = gen_vsubwev_d_wu, + .fniv = gen_vsubwev_u, + .fno = gen_helper_vsubwev_d_wu, + .opt_opc = vecop_list, + .vece = MO_64 + }, + { + .fno = gen_helper_vsubwev_q_du, + .vece = MO_128 + }, + }; + + tcg_gen_gvec_3(vd_ofs, vj_ofs, vk_ofs, oprsz, maxsz, &op[vece]); +} + +TRANS(vsubwev_h_bu, LSX, gvec_vvv, MO_8, do_vsubwev_u) +TRANS(vsubwev_w_hu, LSX, gvec_vvv, MO_16, do_vsubwev_u) +TRANS(vsubwev_d_wu, LSX, gvec_vvv, MO_32, do_vsubwev_u) +TRANS(vsubwev_q_du, LSX, gvec_vvv, MO_64, do_vsubwev_u) +TRANS(xvsubwev_h_bu, LASX, gvec_xxx, MO_8, do_vsubwev_u) +TRANS(xvsubwev_w_hu, LASX, gvec_xxx, MO_16, do_vsubwev_u) +TRANS(xvsubwev_d_wu, LASX, gvec_xxx, MO_32, do_vsubwev_u) +TRANS(xvsubwev_q_du, LASX, gvec_xxx, MO_64, do_vsubwev_u) + +static void gen_vsubwod_u(unsigned vece, TCGv_vec t, TCGv_vec a, TCGv_vec b) +{ + TCGv_vec t1, t2; + + int halfbits = 4 << vece; + + t1 = tcg_temp_new_vec_matching(a); + t2 = tcg_temp_new_vec_matching(b); + + /* Zero-extend the odd elements for vector */ + tcg_gen_shri_vec(vece, t1, a, halfbits); + tcg_gen_shri_vec(vece, t2, b, halfbits); + + tcg_gen_sub_vec(vece, t, t1, t2); +} + +static void gen_vsubwod_w_hu(TCGv_i32 t, TCGv_i32 a, TCGv_i32 b) +{ + TCGv_i32 t1, t2; + + t1 = tcg_temp_new_i32(); + t2 = tcg_temp_new_i32(); + tcg_gen_shri_i32(t1, a, 16); + tcg_gen_shri_i32(t2, b, 16); + tcg_gen_sub_i32(t, t1, t2); +} + +static void gen_vsubwod_d_wu(TCGv_i64 t, TCGv_i64 a, TCGv_i64 b) +{ + TCGv_i64 t1, t2; + + t1 = tcg_temp_new_i64(); + t2 = tcg_temp_new_i64(); + tcg_gen_shri_i64(t1, a, 32); + tcg_gen_shri_i64(t2, b, 32); + tcg_gen_sub_i64(t, t1, t2); +} + +static void do_vsubwod_u(unsigned vece, uint32_t vd_ofs, uint32_t vj_ofs, + uint32_t vk_ofs, uint32_t oprsz, uint32_t maxsz) +{ + static const TCGOpcode vecop_list[] = { + INDEX_op_shri_vec, INDEX_op_sub_vec, 0 + }; + static const GVecGen3 op[4] = { + { + .fniv = gen_vsubwod_u, + .fno = gen_helper_vsubwod_h_bu, + .opt_opc = vecop_list, + .vece = MO_16 + }, + { + .fni4 = gen_vsubwod_w_hu, + .fniv = gen_vsubwod_u, + .fno = gen_helper_vsubwod_w_hu, + .opt_opc = vecop_list, + .vece = MO_32 + }, + { + .fni8 = gen_vsubwod_d_wu, + .fniv = gen_vsubwod_u, + .fno = gen_helper_vsubwod_d_wu, + .opt_opc = vecop_list, + .vece = MO_64 + }, + { + .fno = gen_helper_vsubwod_q_du, + .vece = MO_128 + }, + }; + + tcg_gen_gvec_3(vd_ofs, vj_ofs, vk_ofs, oprsz, maxsz, &op[vece]); +} + +TRANS(vsubwod_h_bu, LSX, gvec_vvv, MO_8, do_vsubwod_u) +TRANS(vsubwod_w_hu, LSX, gvec_vvv, MO_16, do_vsubwod_u) +TRANS(vsubwod_d_wu, LSX, gvec_vvv, MO_32, do_vsubwod_u) +TRANS(vsubwod_q_du, LSX, gvec_vvv, MO_64, do_vsubwod_u) +TRANS(xvsubwod_h_bu, LASX, gvec_xxx, MO_8, do_vsubwod_u) +TRANS(xvsubwod_w_hu, LASX, gvec_xxx, MO_16, do_vsubwod_u) +TRANS(xvsubwod_d_wu, LASX, gvec_xxx, MO_32, do_vsubwod_u) +TRANS(xvsubwod_q_du, LASX, gvec_xxx, MO_64, do_vsubwod_u) + +static void gen_vaddwev_u_s(unsigned vece, TCGv_vec t, TCGv_vec a, TCGv_vec b) +{ + TCGv_vec t1, t2, t3; + + int halfbits = 4 << vece; + + t1 = tcg_temp_new_vec_matching(a); + t2 = tcg_temp_new_vec_matching(b); + t3 = tcg_constant_vec_matching(t, vece, MAKE_64BIT_MASK(0, halfbits)); + + /* Zero-extend the even elements from a */ + tcg_gen_and_vec(vece, t1, a, t3); + + /* Sign-extend the even elements from b */ + tcg_gen_shli_vec(vece, t2, b, halfbits); + tcg_gen_sari_vec(vece, t2, t2, halfbits); + + tcg_gen_add_vec(vece, t, t1, t2); +} + +static void gen_vaddwev_w_hu_h(TCGv_i32 t, TCGv_i32 a, TCGv_i32 b) +{ + TCGv_i32 t1, t2; + + t1 = tcg_temp_new_i32(); + t2 = tcg_temp_new_i32(); + tcg_gen_ext16u_i32(t1, a); + tcg_gen_ext16s_i32(t2, b); + tcg_gen_add_i32(t, t1, t2); +} + +static void gen_vaddwev_d_wu_w(TCGv_i64 t, TCGv_i64 a, TCGv_i64 b) +{ + TCGv_i64 t1, t2; + + t1 = tcg_temp_new_i64(); + t2 = tcg_temp_new_i64(); + tcg_gen_ext32u_i64(t1, a); + tcg_gen_ext32s_i64(t2, b); + tcg_gen_add_i64(t, t1, t2); +} + +static void do_vaddwev_u_s(unsigned vece, uint32_t vd_ofs, uint32_t vj_ofs, + uint32_t vk_ofs, uint32_t oprsz, uint32_t maxsz) +{ + static const TCGOpcode vecop_list[] = { + INDEX_op_shli_vec, INDEX_op_sari_vec, INDEX_op_add_vec, 0 + }; + static const GVecGen3 op[4] = { + { + .fniv = gen_vaddwev_u_s, + .fno = gen_helper_vaddwev_h_bu_b, + .opt_opc = vecop_list, + .vece = MO_16 + }, + { + .fni4 = gen_vaddwev_w_hu_h, + .fniv = gen_vaddwev_u_s, + .fno = gen_helper_vaddwev_w_hu_h, + .opt_opc = vecop_list, + .vece = MO_32 + }, + { + .fni8 = gen_vaddwev_d_wu_w, + .fniv = gen_vaddwev_u_s, + .fno = gen_helper_vaddwev_d_wu_w, + .opt_opc = vecop_list, + .vece = MO_64 + }, + { + .fno = gen_helper_vaddwev_q_du_d, + .vece = MO_128 + }, + }; + + tcg_gen_gvec_3(vd_ofs, vj_ofs, vk_ofs, oprsz, maxsz, &op[vece]); +} + +TRANS(vaddwev_h_bu_b, LSX, gvec_vvv, MO_8, do_vaddwev_u_s) +TRANS(vaddwev_w_hu_h, LSX, gvec_vvv, MO_16, do_vaddwev_u_s) +TRANS(vaddwev_d_wu_w, LSX, gvec_vvv, MO_32, do_vaddwev_u_s) +TRANS(vaddwev_q_du_d, LSX, gvec_vvv, MO_64, do_vaddwev_u_s) +TRANS(xvaddwev_h_bu_b, LASX, gvec_xxx, MO_8, do_vaddwev_u_s) +TRANS(xvaddwev_w_hu_h, LASX, gvec_xxx, MO_16, do_vaddwev_u_s) +TRANS(xvaddwev_d_wu_w, LASX, gvec_xxx, MO_32, do_vaddwev_u_s) +TRANS(xvaddwev_q_du_d, LASX, gvec_xxx, MO_64, do_vaddwev_u_s) + +static void gen_vaddwod_u_s(unsigned vece, TCGv_vec t, TCGv_vec a, TCGv_vec b) +{ + TCGv_vec t1, t2; + + int halfbits = 4 << vece; + + t1 = tcg_temp_new_vec_matching(a); + t2 = tcg_temp_new_vec_matching(b); + + /* Zero-extend the odd elements from a */ + tcg_gen_shri_vec(vece, t1, a, halfbits); + /* Sign-extend the odd elements from b */ + tcg_gen_sari_vec(vece, t2, b, halfbits); + + tcg_gen_add_vec(vece, t, t1, t2); +} + +static void gen_vaddwod_w_hu_h(TCGv_i32 t, TCGv_i32 a, TCGv_i32 b) +{ + TCGv_i32 t1, t2; + + t1 = tcg_temp_new_i32(); + t2 = tcg_temp_new_i32(); + tcg_gen_shri_i32(t1, a, 16); + tcg_gen_sari_i32(t2, b, 16); + tcg_gen_add_i32(t, t1, t2); +} + +static void gen_vaddwod_d_wu_w(TCGv_i64 t, TCGv_i64 a, TCGv_i64 b) +{ + TCGv_i64 t1, t2; + + t1 = tcg_temp_new_i64(); + t2 = tcg_temp_new_i64(); + tcg_gen_shri_i64(t1, a, 32); + tcg_gen_sari_i64(t2, b, 32); + tcg_gen_add_i64(t, t1, t2); +} + +static void do_vaddwod_u_s(unsigned vece, uint32_t vd_ofs, uint32_t vj_ofs, + uint32_t vk_ofs, uint32_t oprsz, uint32_t maxsz) +{ + static const TCGOpcode vecop_list[] = { + INDEX_op_shri_vec, INDEX_op_sari_vec, INDEX_op_add_vec, 0 + }; + static const GVecGen3 op[4] = { + { + .fniv = gen_vaddwod_u_s, + .fno = gen_helper_vaddwod_h_bu_b, + .opt_opc = vecop_list, + .vece = MO_16 + }, + { + .fni4 = gen_vaddwod_w_hu_h, + .fniv = gen_vaddwod_u_s, + .fno = gen_helper_vaddwod_w_hu_h, + .opt_opc = vecop_list, + .vece = MO_32 + }, + { + .fni8 = gen_vaddwod_d_wu_w, + .fniv = gen_vaddwod_u_s, + .fno = gen_helper_vaddwod_d_wu_w, + .opt_opc = vecop_list, + .vece = MO_64 + }, + { + .fno = gen_helper_vaddwod_q_du_d, + .vece = MO_128 + }, + }; + + tcg_gen_gvec_3(vd_ofs, vj_ofs, vk_ofs, oprsz, maxsz, &op[vece]); +} + +TRANS(vaddwod_h_bu_b, LSX, gvec_vvv, MO_8, do_vaddwod_u_s) +TRANS(vaddwod_w_hu_h, LSX, gvec_vvv, MO_16, do_vaddwod_u_s) +TRANS(vaddwod_d_wu_w, LSX, gvec_vvv, MO_32, do_vaddwod_u_s) +TRANS(vaddwod_q_du_d, LSX, gvec_vvv, MO_64, do_vaddwod_u_s) +TRANS(xvaddwod_h_bu_b, LSX, gvec_xxx, MO_8, do_vaddwod_u_s) +TRANS(xvaddwod_w_hu_h, LSX, gvec_xxx, MO_16, do_vaddwod_u_s) +TRANS(xvaddwod_d_wu_w, LSX, gvec_xxx, MO_32, do_vaddwod_u_s) +TRANS(xvaddwod_q_du_d, LSX, gvec_xxx, MO_64, do_vaddwod_u_s) + +static void do_vavg(unsigned vece, TCGv_vec t, TCGv_vec a, TCGv_vec b, + void (*gen_shr_vec)(unsigned, TCGv_vec, + TCGv_vec, int64_t), + void (*gen_round_vec)(unsigned, TCGv_vec, + TCGv_vec, TCGv_vec)) +{ + TCGv_vec tmp = tcg_temp_new_vec_matching(t); + gen_round_vec(vece, tmp, a, b); + tcg_gen_and_vec(vece, tmp, tmp, tcg_constant_vec_matching(t, vece, 1)); + gen_shr_vec(vece, a, a, 1); + gen_shr_vec(vece, b, b, 1); + tcg_gen_add_vec(vece, t, a, b); + tcg_gen_add_vec(vece, t, t, tmp); +} + +static void gen_vavg_s(unsigned vece, TCGv_vec t, TCGv_vec a, TCGv_vec b) +{ + do_vavg(vece, t, a, b, tcg_gen_sari_vec, tcg_gen_and_vec); +} + +static void gen_vavg_u(unsigned vece, TCGv_vec t, TCGv_vec a, TCGv_vec b) +{ + do_vavg(vece, t, a, b, tcg_gen_shri_vec, tcg_gen_and_vec); +} + +static void gen_vavgr_s(unsigned vece, TCGv_vec t, TCGv_vec a, TCGv_vec b) +{ + do_vavg(vece, t, a, b, tcg_gen_sari_vec, tcg_gen_or_vec); +} + +static void gen_vavgr_u(unsigned vece, TCGv_vec t, TCGv_vec a, TCGv_vec b) +{ + do_vavg(vece, t, a, b, tcg_gen_shri_vec, tcg_gen_or_vec); +} + +static void do_vavg_s(unsigned vece, uint32_t vd_ofs, uint32_t vj_ofs, + uint32_t vk_ofs, uint32_t oprsz, uint32_t maxsz) +{ + static const TCGOpcode vecop_list[] = { + INDEX_op_sari_vec, INDEX_op_add_vec, 0 + }; + static const GVecGen3 op[4] = { + { + .fniv = gen_vavg_s, + .fno = gen_helper_vavg_b, + .opt_opc = vecop_list, + .vece = MO_8 + }, + { + .fniv = gen_vavg_s, + .fno = gen_helper_vavg_h, + .opt_opc = vecop_list, + .vece = MO_16 + }, + { + .fniv = gen_vavg_s, + .fno = gen_helper_vavg_w, + .opt_opc = vecop_list, + .vece = MO_32 + }, + { + .fniv = gen_vavg_s, + .fno = gen_helper_vavg_d, + .opt_opc = vecop_list, + .vece = MO_64 + }, + }; + + tcg_gen_gvec_3(vd_ofs, vj_ofs, vk_ofs, oprsz, maxsz, &op[vece]); +} + +static void do_vavg_u(unsigned vece, uint32_t vd_ofs, uint32_t vj_ofs, + uint32_t vk_ofs, uint32_t oprsz, uint32_t maxsz) +{ + static const TCGOpcode vecop_list[] = { + INDEX_op_shri_vec, INDEX_op_add_vec, 0 + }; + static const GVecGen3 op[4] = { + { + .fniv = gen_vavg_u, + .fno = gen_helper_vavg_bu, + .opt_opc = vecop_list, + .vece = MO_8 + }, + { + .fniv = gen_vavg_u, + .fno = gen_helper_vavg_hu, + .opt_opc = vecop_list, + .vece = MO_16 + }, + { + .fniv = gen_vavg_u, + .fno = gen_helper_vavg_wu, + .opt_opc = vecop_list, + .vece = MO_32 + }, + { + .fniv = gen_vavg_u, + .fno = gen_helper_vavg_du, + .opt_opc = vecop_list, + .vece = MO_64 + }, + }; + + tcg_gen_gvec_3(vd_ofs, vj_ofs, vk_ofs, oprsz, maxsz, &op[vece]); +} + +TRANS(vavg_b, LSX, gvec_vvv, MO_8, do_vavg_s) +TRANS(vavg_h, LSX, gvec_vvv, MO_16, do_vavg_s) +TRANS(vavg_w, LSX, gvec_vvv, MO_32, do_vavg_s) +TRANS(vavg_d, LSX, gvec_vvv, MO_64, do_vavg_s) +TRANS(vavg_bu, LSX, gvec_vvv, MO_8, do_vavg_u) +TRANS(vavg_hu, LSX, gvec_vvv, MO_16, do_vavg_u) +TRANS(vavg_wu, LSX, gvec_vvv, MO_32, do_vavg_u) +TRANS(vavg_du, LSX, gvec_vvv, MO_64, do_vavg_u) +TRANS(xvavg_b, LASX, gvec_xxx, MO_8, do_vavg_s) +TRANS(xvavg_h, LASX, gvec_xxx, MO_16, do_vavg_s) +TRANS(xvavg_w, LASX, gvec_xxx, MO_32, do_vavg_s) +TRANS(xvavg_d, LASX, gvec_xxx, MO_64, do_vavg_s) +TRANS(xvavg_bu, LASX, gvec_xxx, MO_8, do_vavg_u) +TRANS(xvavg_hu, LASX, gvec_xxx, MO_16, do_vavg_u) +TRANS(xvavg_wu, LASX, gvec_xxx, MO_32, do_vavg_u) +TRANS(xvavg_du, LASX, gvec_xxx, MO_64, do_vavg_u) + +static void do_vavgr_s(unsigned vece, uint32_t vd_ofs, uint32_t vj_ofs, + uint32_t vk_ofs, uint32_t oprsz, uint32_t maxsz) +{ + static const TCGOpcode vecop_list[] = { + INDEX_op_sari_vec, INDEX_op_add_vec, 0 + }; + static const GVecGen3 op[4] = { + { + .fniv = gen_vavgr_s, + .fno = gen_helper_vavgr_b, + .opt_opc = vecop_list, + .vece = MO_8 + }, + { + .fniv = gen_vavgr_s, + .fno = gen_helper_vavgr_h, + .opt_opc = vecop_list, + .vece = MO_16 + }, + { + .fniv = gen_vavgr_s, + .fno = gen_helper_vavgr_w, + .opt_opc = vecop_list, + .vece = MO_32 + }, + { + .fniv = gen_vavgr_s, + .fno = gen_helper_vavgr_d, + .opt_opc = vecop_list, + .vece = MO_64 + }, + }; + + tcg_gen_gvec_3(vd_ofs, vj_ofs, vk_ofs, oprsz, maxsz, &op[vece]); +} + +static void do_vavgr_u(unsigned vece, uint32_t vd_ofs, uint32_t vj_ofs, + uint32_t vk_ofs, uint32_t oprsz, uint32_t maxsz) +{ + static const TCGOpcode vecop_list[] = { + INDEX_op_shri_vec, INDEX_op_add_vec, 0 + }; + static const GVecGen3 op[4] = { + { + .fniv = gen_vavgr_u, + .fno = gen_helper_vavgr_bu, + .opt_opc = vecop_list, + .vece = MO_8 + }, + { + .fniv = gen_vavgr_u, + .fno = gen_helper_vavgr_hu, + .opt_opc = vecop_list, + .vece = MO_16 + }, + { + .fniv = gen_vavgr_u, + .fno = gen_helper_vavgr_wu, + .opt_opc = vecop_list, + .vece = MO_32 + }, + { + .fniv = gen_vavgr_u, + .fno = gen_helper_vavgr_du, + .opt_opc = vecop_list, + .vece = MO_64 + }, + }; + + tcg_gen_gvec_3(vd_ofs, vj_ofs, vk_ofs, oprsz, maxsz, &op[vece]); +} + +TRANS(vavgr_b, LSX, gvec_vvv, MO_8, do_vavgr_s) +TRANS(vavgr_h, LSX, gvec_vvv, MO_16, do_vavgr_s) +TRANS(vavgr_w, LSX, gvec_vvv, MO_32, do_vavgr_s) +TRANS(vavgr_d, LSX, gvec_vvv, MO_64, do_vavgr_s) +TRANS(vavgr_bu, LSX, gvec_vvv, MO_8, do_vavgr_u) +TRANS(vavgr_hu, LSX, gvec_vvv, MO_16, do_vavgr_u) +TRANS(vavgr_wu, LSX, gvec_vvv, MO_32, do_vavgr_u) +TRANS(vavgr_du, LSX, gvec_vvv, MO_64, do_vavgr_u) +TRANS(xvavgr_b, LASX, gvec_xxx, MO_8, do_vavgr_s) +TRANS(xvavgr_h, LASX, gvec_xxx, MO_16, do_vavgr_s) +TRANS(xvavgr_w, LASX, gvec_xxx, MO_32, do_vavgr_s) +TRANS(xvavgr_d, LASX, gvec_xxx, MO_64, do_vavgr_s) +TRANS(xvavgr_bu, LASX, gvec_xxx, MO_8, do_vavgr_u) +TRANS(xvavgr_hu, LASX, gvec_xxx, MO_16, do_vavgr_u) +TRANS(xvavgr_wu, LASX, gvec_xxx, MO_32, do_vavgr_u) +TRANS(xvavgr_du, LASX, gvec_xxx, MO_64, do_vavgr_u) + +static void gen_vabsd_s(unsigned vece, TCGv_vec t, TCGv_vec a, TCGv_vec b) +{ + tcg_gen_smax_vec(vece, t, a, b); + tcg_gen_smin_vec(vece, a, a, b); + tcg_gen_sub_vec(vece, t, t, a); +} + +static void do_vabsd_s(unsigned vece, uint32_t vd_ofs, uint32_t vj_ofs, + uint32_t vk_ofs, uint32_t oprsz, uint32_t maxsz) +{ + static const TCGOpcode vecop_list[] = { + INDEX_op_smax_vec, INDEX_op_smin_vec, INDEX_op_sub_vec, 0 + }; + static const GVecGen3 op[4] = { + { + .fniv = gen_vabsd_s, + .fno = gen_helper_vabsd_b, + .opt_opc = vecop_list, + .vece = MO_8 + }, + { + .fniv = gen_vabsd_s, + .fno = gen_helper_vabsd_h, + .opt_opc = vecop_list, + .vece = MO_16 + }, + { + .fniv = gen_vabsd_s, + .fno = gen_helper_vabsd_w, + .opt_opc = vecop_list, + .vece = MO_32 + }, + { + .fniv = gen_vabsd_s, + .fno = gen_helper_vabsd_d, + .opt_opc = vecop_list, + .vece = MO_64 + }, + }; + + tcg_gen_gvec_3(vd_ofs, vj_ofs, vk_ofs, oprsz, maxsz, &op[vece]); +} + +static void gen_vabsd_u(unsigned vece, TCGv_vec t, TCGv_vec a, TCGv_vec b) +{ + tcg_gen_umax_vec(vece, t, a, b); + tcg_gen_umin_vec(vece, a, a, b); + tcg_gen_sub_vec(vece, t, t, a); +} + +static void do_vabsd_u(unsigned vece, uint32_t vd_ofs, uint32_t vj_ofs, + uint32_t vk_ofs, uint32_t oprsz, uint32_t maxsz) +{ + static const TCGOpcode vecop_list[] = { + INDEX_op_umax_vec, INDEX_op_umin_vec, INDEX_op_sub_vec, 0 + }; + static const GVecGen3 op[4] = { + { + .fniv = gen_vabsd_u, + .fno = gen_helper_vabsd_bu, + .opt_opc = vecop_list, + .vece = MO_8 + }, + { + .fniv = gen_vabsd_u, + .fno = gen_helper_vabsd_hu, + .opt_opc = vecop_list, + .vece = MO_16 + }, + { + .fniv = gen_vabsd_u, + .fno = gen_helper_vabsd_wu, + .opt_opc = vecop_list, + .vece = MO_32 + }, + { + .fniv = gen_vabsd_u, + .fno = gen_helper_vabsd_du, + .opt_opc = vecop_list, + .vece = MO_64 + }, + }; + + tcg_gen_gvec_3(vd_ofs, vj_ofs, vk_ofs, oprsz, maxsz, &op[vece]); +} + +TRANS(vabsd_b, LSX, gvec_vvv, MO_8, do_vabsd_s) +TRANS(vabsd_h, LSX, gvec_vvv, MO_16, do_vabsd_s) +TRANS(vabsd_w, LSX, gvec_vvv, MO_32, do_vabsd_s) +TRANS(vabsd_d, LSX, gvec_vvv, MO_64, do_vabsd_s) +TRANS(vabsd_bu, LSX, gvec_vvv, MO_8, do_vabsd_u) +TRANS(vabsd_hu, LSX, gvec_vvv, MO_16, do_vabsd_u) +TRANS(vabsd_wu, LSX, gvec_vvv, MO_32, do_vabsd_u) +TRANS(vabsd_du, LSX, gvec_vvv, MO_64, do_vabsd_u) +TRANS(xvabsd_b, LASX, gvec_xxx, MO_8, do_vabsd_s) +TRANS(xvabsd_h, LASX, gvec_xxx, MO_16, do_vabsd_s) +TRANS(xvabsd_w, LASX, gvec_xxx, MO_32, do_vabsd_s) +TRANS(xvabsd_d, LASX, gvec_xxx, MO_64, do_vabsd_s) +TRANS(xvabsd_bu, LASX, gvec_xxx, MO_8, do_vabsd_u) +TRANS(xvabsd_hu, LASX, gvec_xxx, MO_16, do_vabsd_u) +TRANS(xvabsd_wu, LASX, gvec_xxx, MO_32, do_vabsd_u) +TRANS(xvabsd_du, LASX, gvec_xxx, MO_64, do_vabsd_u) + +static void gen_vadda(unsigned vece, TCGv_vec t, TCGv_vec a, TCGv_vec b) +{ + TCGv_vec t1, t2; + + t1 = tcg_temp_new_vec_matching(a); + t2 = tcg_temp_new_vec_matching(b); + + tcg_gen_abs_vec(vece, t1, a); + tcg_gen_abs_vec(vece, t2, b); + tcg_gen_add_vec(vece, t, t1, t2); +} + +static void do_vadda(unsigned vece, uint32_t vd_ofs, uint32_t vj_ofs, + uint32_t vk_ofs, uint32_t oprsz, uint32_t maxsz) +{ + static const TCGOpcode vecop_list[] = { + INDEX_op_abs_vec, INDEX_op_add_vec, 0 + }; + static const GVecGen3 op[4] = { + { + .fniv = gen_vadda, + .fno = gen_helper_vadda_b, + .opt_opc = vecop_list, + .vece = MO_8 + }, + { + .fniv = gen_vadda, + .fno = gen_helper_vadda_h, + .opt_opc = vecop_list, + .vece = MO_16 + }, + { + .fniv = gen_vadda, + .fno = gen_helper_vadda_w, + .opt_opc = vecop_list, + .vece = MO_32 + }, + { + .fniv = gen_vadda, + .fno = gen_helper_vadda_d, + .opt_opc = vecop_list, + .vece = MO_64 + }, + }; + + tcg_gen_gvec_3(vd_ofs, vj_ofs, vk_ofs, oprsz, maxsz, &op[vece]); +} + +TRANS(vadda_b, LSX, gvec_vvv, MO_8, do_vadda) +TRANS(vadda_h, LSX, gvec_vvv, MO_16, do_vadda) +TRANS(vadda_w, LSX, gvec_vvv, MO_32, do_vadda) +TRANS(vadda_d, LSX, gvec_vvv, MO_64, do_vadda) +TRANS(xvadda_b, LASX, gvec_xxx, MO_8, do_vadda) +TRANS(xvadda_h, LASX, gvec_xxx, MO_16, do_vadda) +TRANS(xvadda_w, LASX, gvec_xxx, MO_32, do_vadda) +TRANS(xvadda_d, LASX, gvec_xxx, MO_64, do_vadda) + +TRANS(vmax_b, LSX, gvec_vvv, MO_8, tcg_gen_gvec_smax) +TRANS(vmax_h, LSX, gvec_vvv, MO_16, tcg_gen_gvec_smax) +TRANS(vmax_w, LSX, gvec_vvv, MO_32, tcg_gen_gvec_smax) +TRANS(vmax_d, LSX, gvec_vvv, MO_64, tcg_gen_gvec_smax) +TRANS(vmax_bu, LSX, gvec_vvv, MO_8, tcg_gen_gvec_umax) +TRANS(vmax_hu, LSX, gvec_vvv, MO_16, tcg_gen_gvec_umax) +TRANS(vmax_wu, LSX, gvec_vvv, MO_32, tcg_gen_gvec_umax) +TRANS(vmax_du, LSX, gvec_vvv, MO_64, tcg_gen_gvec_umax) +TRANS(xvmax_b, LASX, gvec_xxx, MO_8, tcg_gen_gvec_smax) +TRANS(xvmax_h, LASX, gvec_xxx, MO_16, tcg_gen_gvec_smax) +TRANS(xvmax_w, LASX, gvec_xxx, MO_32, tcg_gen_gvec_smax) +TRANS(xvmax_d, LASX, gvec_xxx, MO_64, tcg_gen_gvec_smax) +TRANS(xvmax_bu, LASX, gvec_xxx, MO_8, tcg_gen_gvec_umax) +TRANS(xvmax_hu, LASX, gvec_xxx, MO_16, tcg_gen_gvec_umax) +TRANS(xvmax_wu, LASX, gvec_xxx, MO_32, tcg_gen_gvec_umax) +TRANS(xvmax_du, LASX, gvec_xxx, MO_64, tcg_gen_gvec_umax) + +TRANS(vmin_b, LSX, gvec_vvv, MO_8, tcg_gen_gvec_smin) +TRANS(vmin_h, LSX, gvec_vvv, MO_16, tcg_gen_gvec_smin) +TRANS(vmin_w, LSX, gvec_vvv, MO_32, tcg_gen_gvec_smin) +TRANS(vmin_d, LSX, gvec_vvv, MO_64, tcg_gen_gvec_smin) +TRANS(vmin_bu, LSX, gvec_vvv, MO_8, tcg_gen_gvec_umin) +TRANS(vmin_hu, LSX, gvec_vvv, MO_16, tcg_gen_gvec_umin) +TRANS(vmin_wu, LSX, gvec_vvv, MO_32, tcg_gen_gvec_umin) +TRANS(vmin_du, LSX, gvec_vvv, MO_64, tcg_gen_gvec_umin) +TRANS(xvmin_b, LASX, gvec_xxx, MO_8, tcg_gen_gvec_smin) +TRANS(xvmin_h, LASX, gvec_xxx, MO_16, tcg_gen_gvec_smin) +TRANS(xvmin_w, LASX, gvec_xxx, MO_32, tcg_gen_gvec_smin) +TRANS(xvmin_d, LASX, gvec_xxx, MO_64, tcg_gen_gvec_smin) +TRANS(xvmin_bu, LASX, gvec_xxx, MO_8, tcg_gen_gvec_umin) +TRANS(xvmin_hu, LASX, gvec_xxx, MO_16, tcg_gen_gvec_umin) +TRANS(xvmin_wu, LASX, gvec_xxx, MO_32, tcg_gen_gvec_umin) +TRANS(xvmin_du, LASX, gvec_xxx, MO_64, tcg_gen_gvec_umin) + +static void gen_vmini_s(unsigned vece, TCGv_vec t, TCGv_vec a, int64_t imm) +{ + tcg_gen_smin_vec(vece, t, a, tcg_constant_vec_matching(t, vece, imm)); +} + +static void gen_vmini_u(unsigned vece, TCGv_vec t, TCGv_vec a, int64_t imm) +{ + tcg_gen_umin_vec(vece, t, a, tcg_constant_vec_matching(t, vece, imm)); +} + +static void gen_vmaxi_s(unsigned vece, TCGv_vec t, TCGv_vec a, int64_t imm) +{ + tcg_gen_smax_vec(vece, t, a, tcg_constant_vec_matching(t, vece, imm)); +} + +static void gen_vmaxi_u(unsigned vece, TCGv_vec t, TCGv_vec a, int64_t imm) +{ + tcg_gen_umax_vec(vece, t, a, tcg_constant_vec_matching(t, vece, imm)); +} + +static void do_vmini_s(unsigned vece, uint32_t vd_ofs, uint32_t vj_ofs, + int64_t imm, uint32_t oprsz, uint32_t maxsz) +{ + static const TCGOpcode vecop_list[] = { + INDEX_op_smin_vec, 0 + }; + static const GVecGen2i op[4] = { + { + .fniv = gen_vmini_s, + .fnoi = gen_helper_vmini_b, + .opt_opc = vecop_list, + .vece = MO_8 + }, + { + .fniv = gen_vmini_s, + .fnoi = gen_helper_vmini_h, + .opt_opc = vecop_list, + .vece = MO_16 + }, + { + .fniv = gen_vmini_s, + .fnoi = gen_helper_vmini_w, + .opt_opc = vecop_list, + .vece = MO_32 + }, + { + .fniv = gen_vmini_s, + .fnoi = gen_helper_vmini_d, + .opt_opc = vecop_list, + .vece = MO_64 + }, + }; + + tcg_gen_gvec_2i(vd_ofs, vj_ofs, oprsz, maxsz, imm, &op[vece]); +} + +static void do_vmini_u(unsigned vece, uint32_t vd_ofs, uint32_t vj_ofs, + int64_t imm, uint32_t oprsz, uint32_t maxsz) +{ + static const TCGOpcode vecop_list[] = { + INDEX_op_umin_vec, 0 + }; + static const GVecGen2i op[4] = { + { + .fniv = gen_vmini_u, + .fnoi = gen_helper_vmini_bu, + .opt_opc = vecop_list, + .vece = MO_8 + }, + { + .fniv = gen_vmini_u, + .fnoi = gen_helper_vmini_hu, + .opt_opc = vecop_list, + .vece = MO_16 + }, + { + .fniv = gen_vmini_u, + .fnoi = gen_helper_vmini_wu, + .opt_opc = vecop_list, + .vece = MO_32 + }, + { + .fniv = gen_vmini_u, + .fnoi = gen_helper_vmini_du, + .opt_opc = vecop_list, + .vece = MO_64 + }, + }; + + tcg_gen_gvec_2i(vd_ofs, vj_ofs, oprsz, maxsz, imm, &op[vece]); +} + +TRANS(vmini_b, LSX, gvec_vv_i, MO_8, do_vmini_s) +TRANS(vmini_h, LSX, gvec_vv_i, MO_16, do_vmini_s) +TRANS(vmini_w, LSX, gvec_vv_i, MO_32, do_vmini_s) +TRANS(vmini_d, LSX, gvec_vv_i, MO_64, do_vmini_s) +TRANS(vmini_bu, LSX, gvec_vv_i, MO_8, do_vmini_u) +TRANS(vmini_hu, LSX, gvec_vv_i, MO_16, do_vmini_u) +TRANS(vmini_wu, LSX, gvec_vv_i, MO_32, do_vmini_u) +TRANS(vmini_du, LSX, gvec_vv_i, MO_64, do_vmini_u) +TRANS(xvmini_b, LASX, gvec_xx_i, MO_8, do_vmini_s) +TRANS(xvmini_h, LASX, gvec_xx_i, MO_16, do_vmini_s) +TRANS(xvmini_w, LASX, gvec_xx_i, MO_32, do_vmini_s) +TRANS(xvmini_d, LASX, gvec_xx_i, MO_64, do_vmini_s) +TRANS(xvmini_bu, LASX, gvec_xx_i, MO_8, do_vmini_u) +TRANS(xvmini_hu, LASX, gvec_xx_i, MO_16, do_vmini_u) +TRANS(xvmini_wu, LASX, gvec_xx_i, MO_32, do_vmini_u) +TRANS(xvmini_du, LASX, gvec_xx_i, MO_64, do_vmini_u) + +static void do_vmaxi_s(unsigned vece, uint32_t vd_ofs, uint32_t vj_ofs, + int64_t imm, uint32_t oprsz, uint32_t maxsz) +{ + static const TCGOpcode vecop_list[] = { + INDEX_op_smax_vec, 0 + }; + static const GVecGen2i op[4] = { + { + .fniv = gen_vmaxi_s, + .fnoi = gen_helper_vmaxi_b, + .opt_opc = vecop_list, + .vece = MO_8 + }, + { + .fniv = gen_vmaxi_s, + .fnoi = gen_helper_vmaxi_h, + .opt_opc = vecop_list, + .vece = MO_16 + }, + { + .fniv = gen_vmaxi_s, + .fnoi = gen_helper_vmaxi_w, + .opt_opc = vecop_list, + .vece = MO_32 + }, + { + .fniv = gen_vmaxi_s, + .fnoi = gen_helper_vmaxi_d, + .opt_opc = vecop_list, + .vece = MO_64 + }, + }; + + tcg_gen_gvec_2i(vd_ofs, vj_ofs, oprsz, maxsz, imm, &op[vece]); +} + +static void do_vmaxi_u(unsigned vece, uint32_t vd_ofs, uint32_t vj_ofs, + int64_t imm, uint32_t oprsz, uint32_t maxsz) +{ + static const TCGOpcode vecop_list[] = { + INDEX_op_umax_vec, 0 + }; + static const GVecGen2i op[4] = { + { + .fniv = gen_vmaxi_u, + .fnoi = gen_helper_vmaxi_bu, + .opt_opc = vecop_list, + .vece = MO_8 + }, + { + .fniv = gen_vmaxi_u, + .fnoi = gen_helper_vmaxi_hu, + .opt_opc = vecop_list, + .vece = MO_16 + }, + { + .fniv = gen_vmaxi_u, + .fnoi = gen_helper_vmaxi_wu, + .opt_opc = vecop_list, + .vece = MO_32 + }, + { + .fniv = gen_vmaxi_u, + .fnoi = gen_helper_vmaxi_du, + .opt_opc = vecop_list, + .vece = MO_64 + }, + }; + + tcg_gen_gvec_2i(vd_ofs, vj_ofs, oprsz, maxsz, imm, &op[vece]); +} + +TRANS(vmaxi_b, LSX, gvec_vv_i, MO_8, do_vmaxi_s) +TRANS(vmaxi_h, LSX, gvec_vv_i, MO_16, do_vmaxi_s) +TRANS(vmaxi_w, LSX, gvec_vv_i, MO_32, do_vmaxi_s) +TRANS(vmaxi_d, LSX, gvec_vv_i, MO_64, do_vmaxi_s) +TRANS(vmaxi_bu, LSX, gvec_vv_i, MO_8, do_vmaxi_u) +TRANS(vmaxi_hu, LSX, gvec_vv_i, MO_16, do_vmaxi_u) +TRANS(vmaxi_wu, LSX, gvec_vv_i, MO_32, do_vmaxi_u) +TRANS(vmaxi_du, LSX, gvec_vv_i, MO_64, do_vmaxi_u) +TRANS(xvmaxi_b, LASX, gvec_xx_i, MO_8, do_vmaxi_s) +TRANS(xvmaxi_h, LASX, gvec_xx_i, MO_16, do_vmaxi_s) +TRANS(xvmaxi_w, LASX, gvec_xx_i, MO_32, do_vmaxi_s) +TRANS(xvmaxi_d, LASX, gvec_xx_i, MO_64, do_vmaxi_s) +TRANS(xvmaxi_bu, LASX, gvec_xx_i, MO_8, do_vmaxi_u) +TRANS(xvmaxi_hu, LASX, gvec_xx_i, MO_16, do_vmaxi_u) +TRANS(xvmaxi_wu, LASX, gvec_xx_i, MO_32, do_vmaxi_u) +TRANS(xvmaxi_du, LASX, gvec_xx_i, MO_64, do_vmaxi_u) + +TRANS(vmul_b, LSX, gvec_vvv, MO_8, tcg_gen_gvec_mul) +TRANS(vmul_h, LSX, gvec_vvv, MO_16, tcg_gen_gvec_mul) +TRANS(vmul_w, LSX, gvec_vvv, MO_32, tcg_gen_gvec_mul) +TRANS(vmul_d, LSX, gvec_vvv, MO_64, tcg_gen_gvec_mul) +TRANS(xvmul_b, LASX, gvec_xxx, MO_8, tcg_gen_gvec_mul) +TRANS(xvmul_h, LASX, gvec_xxx, MO_16, tcg_gen_gvec_mul) +TRANS(xvmul_w, LASX, gvec_xxx, MO_32, tcg_gen_gvec_mul) +TRANS(xvmul_d, LASX, gvec_xxx, MO_64, tcg_gen_gvec_mul) + +static void gen_vmuh_w(TCGv_i32 t, TCGv_i32 a, TCGv_i32 b) +{ + TCGv_i32 discard = tcg_temp_new_i32(); + tcg_gen_muls2_i32(discard, t, a, b); +} + +static void gen_vmuh_d(TCGv_i64 t, TCGv_i64 a, TCGv_i64 b) +{ + TCGv_i64 discard = tcg_temp_new_i64(); + tcg_gen_muls2_i64(discard, t, a, b); +} + +static void do_vmuh_s(unsigned vece, uint32_t vd_ofs, uint32_t vj_ofs, + uint32_t vk_ofs, uint32_t oprsz, uint32_t maxsz) +{ + static const GVecGen3 op[4] = { + { + .fno = gen_helper_vmuh_b, + .vece = MO_8 + }, + { + .fno = gen_helper_vmuh_h, + .vece = MO_16 + }, + { + .fni4 = gen_vmuh_w, + .fno = gen_helper_vmuh_w, + .vece = MO_32 + }, + { + .fni8 = gen_vmuh_d, + .fno = gen_helper_vmuh_d, + .vece = MO_64 + }, + }; + + tcg_gen_gvec_3(vd_ofs, vj_ofs, vk_ofs, oprsz, maxsz, &op[vece]); +} + +TRANS(vmuh_b, LSX, gvec_vvv, MO_8, do_vmuh_s) +TRANS(vmuh_h, LSX, gvec_vvv, MO_16, do_vmuh_s) +TRANS(vmuh_w, LSX, gvec_vvv, MO_32, do_vmuh_s) +TRANS(vmuh_d, LSX, gvec_vvv, MO_64, do_vmuh_s) +TRANS(xvmuh_b, LASX, gvec_xxx, MO_8, do_vmuh_s) +TRANS(xvmuh_h, LASX, gvec_xxx, MO_16, do_vmuh_s) +TRANS(xvmuh_w, LASX, gvec_xxx, MO_32, do_vmuh_s) +TRANS(xvmuh_d, LASX, gvec_xxx, MO_64, do_vmuh_s) + +static void gen_vmuh_wu(TCGv_i32 t, TCGv_i32 a, TCGv_i32 b) +{ + TCGv_i32 discard = tcg_temp_new_i32(); + tcg_gen_mulu2_i32(discard, t, a, b); +} + +static void gen_vmuh_du(TCGv_i64 t, TCGv_i64 a, TCGv_i64 b) +{ + TCGv_i64 discard = tcg_temp_new_i64(); + tcg_gen_mulu2_i64(discard, t, a, b); +} + +static void do_vmuh_u(unsigned vece, uint32_t vd_ofs, uint32_t vj_ofs, + uint32_t vk_ofs, uint32_t oprsz, uint32_t maxsz) +{ + static const GVecGen3 op[4] = { + { + .fno = gen_helper_vmuh_bu, + .vece = MO_8 + }, + { + .fno = gen_helper_vmuh_hu, + .vece = MO_16 + }, + { + .fni4 = gen_vmuh_wu, + .fno = gen_helper_vmuh_wu, + .vece = MO_32 + }, + { + .fni8 = gen_vmuh_du, + .fno = gen_helper_vmuh_du, + .vece = MO_64 + }, + }; + + tcg_gen_gvec_3(vd_ofs, vj_ofs, vk_ofs, oprsz, maxsz, &op[vece]); +} + +TRANS(vmuh_bu, LSX, gvec_vvv, MO_8, do_vmuh_u) +TRANS(vmuh_hu, LSX, gvec_vvv, MO_16, do_vmuh_u) +TRANS(vmuh_wu, LSX, gvec_vvv, MO_32, do_vmuh_u) +TRANS(vmuh_du, LSX, gvec_vvv, MO_64, do_vmuh_u) +TRANS(xvmuh_bu, LASX, gvec_xxx, MO_8, do_vmuh_u) +TRANS(xvmuh_hu, LASX, gvec_xxx, MO_16, do_vmuh_u) +TRANS(xvmuh_wu, LASX, gvec_xxx, MO_32, do_vmuh_u) +TRANS(xvmuh_du, LASX, gvec_xxx, MO_64, do_vmuh_u) + +static void gen_vmulwev_s(unsigned vece, TCGv_vec t, TCGv_vec a, TCGv_vec b) +{ + TCGv_vec t1, t2; + int halfbits = 4 << vece; + + t1 = tcg_temp_new_vec_matching(a); + t2 = tcg_temp_new_vec_matching(b); + tcg_gen_shli_vec(vece, t1, a, halfbits); + tcg_gen_sari_vec(vece, t1, t1, halfbits); + tcg_gen_shli_vec(vece, t2, b, halfbits); + tcg_gen_sari_vec(vece, t2, t2, halfbits); + tcg_gen_mul_vec(vece, t, t1, t2); +} + +static void gen_vmulwev_w_h(TCGv_i32 t, TCGv_i32 a, TCGv_i32 b) +{ + TCGv_i32 t1, t2; + + t1 = tcg_temp_new_i32(); + t2 = tcg_temp_new_i32(); + tcg_gen_ext16s_i32(t1, a); + tcg_gen_ext16s_i32(t2, b); + tcg_gen_mul_i32(t, t1, t2); +} + +static void gen_vmulwev_d_w(TCGv_i64 t, TCGv_i64 a, TCGv_i64 b) +{ + TCGv_i64 t1, t2; + + t1 = tcg_temp_new_i64(); + t2 = tcg_temp_new_i64(); + tcg_gen_ext32s_i64(t1, a); + tcg_gen_ext32s_i64(t2, b); + tcg_gen_mul_i64(t, t1, t2); +} + +static void do_vmulwev_s(unsigned vece, uint32_t vd_ofs, uint32_t vj_ofs, + uint32_t vk_ofs, uint32_t oprsz, uint32_t maxsz) +{ + static const TCGOpcode vecop_list[] = { + INDEX_op_shli_vec, INDEX_op_sari_vec, INDEX_op_mul_vec, 0 + }; + static const GVecGen3 op[3] = { + { + .fniv = gen_vmulwev_s, + .fno = gen_helper_vmulwev_h_b, + .opt_opc = vecop_list, + .vece = MO_16 + }, + { + .fni4 = gen_vmulwev_w_h, + .fniv = gen_vmulwev_s, + .fno = gen_helper_vmulwev_w_h, + .opt_opc = vecop_list, + .vece = MO_32 + }, + { + .fni8 = gen_vmulwev_d_w, + .fniv = gen_vmulwev_s, + .fno = gen_helper_vmulwev_d_w, + .opt_opc = vecop_list, + .vece = MO_64 + }, + }; + + tcg_gen_gvec_3(vd_ofs, vj_ofs, vk_ofs, oprsz, maxsz, &op[vece]); +} + +TRANS(vmulwev_h_b, LSX, gvec_vvv, MO_8, do_vmulwev_s) +TRANS(vmulwev_w_h, LSX, gvec_vvv, MO_16, do_vmulwev_s) +TRANS(vmulwev_d_w, LSX, gvec_vvv, MO_32, do_vmulwev_s) +TRANS(xvmulwev_h_b, LASX, gvec_xxx, MO_8, do_vmulwev_s) +TRANS(xvmulwev_w_h, LASX, gvec_xxx, MO_16, do_vmulwev_s) +TRANS(xvmulwev_d_w, LASX, gvec_xxx, MO_32, do_vmulwev_s) + +static void tcg_gen_mulus2_i64(TCGv_i64 rl, TCGv_i64 rh, + TCGv_i64 arg1, TCGv_i64 arg2) +{ + tcg_gen_mulsu2_i64(rl, rh, arg2, arg1); +} + +static bool gen_vmul_q_vl(DisasContext *ctx, + arg_vvv *a, uint32_t oprsz, int idx1, int idx2, + void (*func)(TCGv_i64, TCGv_i64, + TCGv_i64, TCGv_i64)) +{ + TCGv_i64 rh, rl, arg1, arg2; + int i; + + if (!check_vec(ctx, oprsz)) { + return true; + } + + rh = tcg_temp_new_i64(); + rl = tcg_temp_new_i64(); + arg1 = tcg_temp_new_i64(); + arg2 = tcg_temp_new_i64(); + + for (i = 0; i < oprsz / 16; i++) { + get_vreg64(arg1, a->vj, 2 * i + idx1); + get_vreg64(arg2, a->vk, 2 * i + idx2); + + func(rl, rh, arg1, arg2); + + set_vreg64(rh, a->vd, 2 * i + 1); + set_vreg64(rl, a->vd, 2 * i); + } + + return true; +} + +static bool gen_vmul_q(DisasContext *ctx, arg_vvv *a, int idx1, int idx2, + void (*func)(TCGv_i64, TCGv_i64, + TCGv_i64, TCGv_i64)) +{ + return gen_vmul_q_vl(ctx, a, 16, idx1, idx2, func); +} + +static bool gen_xvmul_q(DisasContext *ctx, arg_vvv *a, int idx1, int idx2, + void (*func)(TCGv_i64, TCGv_i64, + TCGv_i64, TCGv_i64)) +{ + return gen_vmul_q_vl(ctx, a, 32, idx1, idx2, func); +} + +TRANS(vmulwev_q_d, LSX, gen_vmul_q, 0, 0, tcg_gen_muls2_i64) +TRANS(vmulwod_q_d, LSX, gen_vmul_q, 1, 1, tcg_gen_muls2_i64) +TRANS(vmulwev_q_du, LSX, gen_vmul_q, 0, 0, tcg_gen_mulu2_i64) +TRANS(vmulwod_q_du, LSX, gen_vmul_q, 1, 1, tcg_gen_mulu2_i64) +TRANS(vmulwev_q_du_d, LSX, gen_vmul_q, 0, 0, tcg_gen_mulus2_i64) +TRANS(vmulwod_q_du_d, LSX, gen_vmul_q, 1, 1, tcg_gen_mulus2_i64) +TRANS(xvmulwev_q_d, LASX, gen_xvmul_q, 0, 0, tcg_gen_muls2_i64) +TRANS(xvmulwod_q_d, LASX, gen_xvmul_q, 1, 1, tcg_gen_muls2_i64) +TRANS(xvmulwev_q_du, LASX, gen_xvmul_q, 0, 0, tcg_gen_mulu2_i64) +TRANS(xvmulwod_q_du, LASX, gen_xvmul_q, 1, 1, tcg_gen_mulu2_i64) +TRANS(xvmulwev_q_du_d, LASX, gen_xvmul_q, 0, 0, tcg_gen_mulus2_i64) +TRANS(xvmulwod_q_du_d, LASX, gen_xvmul_q, 1, 1, tcg_gen_mulus2_i64) + +static void gen_vmulwod_s(unsigned vece, TCGv_vec t, TCGv_vec a, TCGv_vec b) +{ + TCGv_vec t1, t2; + int halfbits = 4 << vece; + + t1 = tcg_temp_new_vec_matching(a); + t2 = tcg_temp_new_vec_matching(b); + tcg_gen_sari_vec(vece, t1, a, halfbits); + tcg_gen_sari_vec(vece, t2, b, halfbits); + tcg_gen_mul_vec(vece, t, t1, t2); +} + +static void gen_vmulwod_w_h(TCGv_i32 t, TCGv_i32 a, TCGv_i32 b) +{ + TCGv_i32 t1, t2; + + t1 = tcg_temp_new_i32(); + t2 = tcg_temp_new_i32(); + tcg_gen_sari_i32(t1, a, 16); + tcg_gen_sari_i32(t2, b, 16); + tcg_gen_mul_i32(t, t1, t2); +} + +static void gen_vmulwod_d_w(TCGv_i64 t, TCGv_i64 a, TCGv_i64 b) +{ + TCGv_i64 t1, t2; + + t1 = tcg_temp_new_i64(); + t2 = tcg_temp_new_i64(); + tcg_gen_sari_i64(t1, a, 32); + tcg_gen_sari_i64(t2, b, 32); + tcg_gen_mul_i64(t, t1, t2); +} + +static void do_vmulwod_s(unsigned vece, uint32_t vd_ofs, uint32_t vj_ofs, + uint32_t vk_ofs, uint32_t oprsz, uint32_t maxsz) +{ + static const TCGOpcode vecop_list[] = { + INDEX_op_sari_vec, INDEX_op_mul_vec, 0 + }; + static const GVecGen3 op[3] = { + { + .fniv = gen_vmulwod_s, + .fno = gen_helper_vmulwod_h_b, + .opt_opc = vecop_list, + .vece = MO_16 + }, + { + .fni4 = gen_vmulwod_w_h, + .fniv = gen_vmulwod_s, + .fno = gen_helper_vmulwod_w_h, + .opt_opc = vecop_list, + .vece = MO_32 + }, + { + .fni8 = gen_vmulwod_d_w, + .fniv = gen_vmulwod_s, + .fno = gen_helper_vmulwod_d_w, + .opt_opc = vecop_list, + .vece = MO_64 + }, + }; + + tcg_gen_gvec_3(vd_ofs, vj_ofs, vk_ofs, oprsz, maxsz, &op[vece]); +} + +TRANS(vmulwod_h_b, LSX, gvec_vvv, MO_8, do_vmulwod_s) +TRANS(vmulwod_w_h, LSX, gvec_vvv, MO_16, do_vmulwod_s) +TRANS(vmulwod_d_w, LSX, gvec_vvv, MO_32, do_vmulwod_s) +TRANS(xvmulwod_h_b, LASX, gvec_xxx, MO_8, do_vmulwod_s) +TRANS(xvmulwod_w_h, LASX, gvec_xxx, MO_16, do_vmulwod_s) +TRANS(xvmulwod_d_w, LASX, gvec_xxx, MO_32, do_vmulwod_s) + +static void gen_vmulwev_u(unsigned vece, TCGv_vec t, TCGv_vec a, TCGv_vec b) +{ + TCGv_vec t1, t2, mask; + + t1 = tcg_temp_new_vec_matching(a); + t2 = tcg_temp_new_vec_matching(b); + mask = tcg_constant_vec_matching(t, vece, MAKE_64BIT_MASK(0, 4 << vece)); + tcg_gen_and_vec(vece, t1, a, mask); + tcg_gen_and_vec(vece, t2, b, mask); + tcg_gen_mul_vec(vece, t, t1, t2); +} + +static void gen_vmulwev_w_hu(TCGv_i32 t, TCGv_i32 a, TCGv_i32 b) +{ + TCGv_i32 t1, t2; + + t1 = tcg_temp_new_i32(); + t2 = tcg_temp_new_i32(); + tcg_gen_ext16u_i32(t1, a); + tcg_gen_ext16u_i32(t2, b); + tcg_gen_mul_i32(t, t1, t2); +} + +static void gen_vmulwev_d_wu(TCGv_i64 t, TCGv_i64 a, TCGv_i64 b) +{ + TCGv_i64 t1, t2; + + t1 = tcg_temp_new_i64(); + t2 = tcg_temp_new_i64(); + tcg_gen_ext32u_i64(t1, a); + tcg_gen_ext32u_i64(t2, b); + tcg_gen_mul_i64(t, t1, t2); +} + +static void do_vmulwev_u(unsigned vece, uint32_t vd_ofs, uint32_t vj_ofs, + uint32_t vk_ofs, uint32_t oprsz, uint32_t maxsz) +{ + static const TCGOpcode vecop_list[] = { + INDEX_op_mul_vec, 0 + }; + static const GVecGen3 op[3] = { + { + .fniv = gen_vmulwev_u, + .fno = gen_helper_vmulwev_h_bu, + .opt_opc = vecop_list, + .vece = MO_16 + }, + { + .fni4 = gen_vmulwev_w_hu, + .fniv = gen_vmulwev_u, + .fno = gen_helper_vmulwev_w_hu, + .opt_opc = vecop_list, + .vece = MO_32 + }, + { + .fni8 = gen_vmulwev_d_wu, + .fniv = gen_vmulwev_u, + .fno = gen_helper_vmulwev_d_wu, + .opt_opc = vecop_list, + .vece = MO_64 + }, + }; + + tcg_gen_gvec_3(vd_ofs, vj_ofs, vk_ofs, oprsz, maxsz, &op[vece]); +} + +TRANS(vmulwev_h_bu, LSX, gvec_vvv, MO_8, do_vmulwev_u) +TRANS(vmulwev_w_hu, LSX, gvec_vvv, MO_16, do_vmulwev_u) +TRANS(vmulwev_d_wu, LSX, gvec_vvv, MO_32, do_vmulwev_u) +TRANS(xvmulwev_h_bu, LASX, gvec_xxx, MO_8, do_vmulwev_u) +TRANS(xvmulwev_w_hu, LASX, gvec_xxx, MO_16, do_vmulwev_u) +TRANS(xvmulwev_d_wu, LASX, gvec_xxx, MO_32, do_vmulwev_u) + +static void gen_vmulwod_u(unsigned vece, TCGv_vec t, TCGv_vec a, TCGv_vec b) +{ + TCGv_vec t1, t2; + int halfbits = 4 << vece; + + t1 = tcg_temp_new_vec_matching(a); + t2 = tcg_temp_new_vec_matching(b); + tcg_gen_shri_vec(vece, t1, a, halfbits); + tcg_gen_shri_vec(vece, t2, b, halfbits); + tcg_gen_mul_vec(vece, t, t1, t2); +} + +static void gen_vmulwod_w_hu(TCGv_i32 t, TCGv_i32 a, TCGv_i32 b) +{ + TCGv_i32 t1, t2; + + t1 = tcg_temp_new_i32(); + t2 = tcg_temp_new_i32(); + tcg_gen_shri_i32(t1, a, 16); + tcg_gen_shri_i32(t2, b, 16); + tcg_gen_mul_i32(t, t1, t2); +} + +static void gen_vmulwod_d_wu(TCGv_i64 t, TCGv_i64 a, TCGv_i64 b) +{ + TCGv_i64 t1, t2; + + t1 = tcg_temp_new_i64(); + t2 = tcg_temp_new_i64(); + tcg_gen_shri_i64(t1, a, 32); + tcg_gen_shri_i64(t2, b, 32); + tcg_gen_mul_i64(t, t1, t2); +} + +static void do_vmulwod_u(unsigned vece, uint32_t vd_ofs, uint32_t vj_ofs, + uint32_t vk_ofs, uint32_t oprsz, uint32_t maxsz) +{ + static const TCGOpcode vecop_list[] = { + INDEX_op_shri_vec, INDEX_op_mul_vec, 0 + }; + static const GVecGen3 op[3] = { + { + .fniv = gen_vmulwod_u, + .fno = gen_helper_vmulwod_h_bu, + .opt_opc = vecop_list, + .vece = MO_16 + }, + { + .fni4 = gen_vmulwod_w_hu, + .fniv = gen_vmulwod_u, + .fno = gen_helper_vmulwod_w_hu, + .opt_opc = vecop_list, + .vece = MO_32 + }, + { + .fni8 = gen_vmulwod_d_wu, + .fniv = gen_vmulwod_u, + .fno = gen_helper_vmulwod_d_wu, + .opt_opc = vecop_list, + .vece = MO_64 + }, + }; + + tcg_gen_gvec_3(vd_ofs, vj_ofs, vk_ofs, oprsz, maxsz, &op[vece]); +} + +TRANS(vmulwod_h_bu, LSX, gvec_vvv, MO_8, do_vmulwod_u) +TRANS(vmulwod_w_hu, LSX, gvec_vvv, MO_16, do_vmulwod_u) +TRANS(vmulwod_d_wu, LSX, gvec_vvv, MO_32, do_vmulwod_u) +TRANS(xvmulwod_h_bu, LASX, gvec_xxx, MO_8, do_vmulwod_u) +TRANS(xvmulwod_w_hu, LASX, gvec_xxx, MO_16, do_vmulwod_u) +TRANS(xvmulwod_d_wu, LASX, gvec_xxx, MO_32, do_vmulwod_u) + +static void gen_vmulwev_u_s(unsigned vece, TCGv_vec t, TCGv_vec a, TCGv_vec b) +{ + TCGv_vec t1, t2, mask; + int halfbits = 4 << vece; + + t1 = tcg_temp_new_vec_matching(a); + t2 = tcg_temp_new_vec_matching(b); + mask = tcg_constant_vec_matching(t, vece, MAKE_64BIT_MASK(0, 4 << vece)); + tcg_gen_and_vec(vece, t1, a, mask); + tcg_gen_shli_vec(vece, t2, b, halfbits); + tcg_gen_sari_vec(vece, t2, t2, halfbits); + tcg_gen_mul_vec(vece, t, t1, t2); +} + +static void gen_vmulwev_w_hu_h(TCGv_i32 t, TCGv_i32 a, TCGv_i32 b) +{ + TCGv_i32 t1, t2; + + t1 = tcg_temp_new_i32(); + t2 = tcg_temp_new_i32(); + tcg_gen_ext16u_i32(t1, a); + tcg_gen_ext16s_i32(t2, b); + tcg_gen_mul_i32(t, t1, t2); +} + +static void gen_vmulwev_d_wu_w(TCGv_i64 t, TCGv_i64 a, TCGv_i64 b) +{ + TCGv_i64 t1, t2; + + t1 = tcg_temp_new_i64(); + t2 = tcg_temp_new_i64(); + tcg_gen_ext32u_i64(t1, a); + tcg_gen_ext32s_i64(t2, b); + tcg_gen_mul_i64(t, t1, t2); +} + +static void do_vmulwev_u_s(unsigned vece, uint32_t vd_ofs, uint32_t vj_ofs, + uint32_t vk_ofs, uint32_t oprsz, uint32_t maxsz) +{ + static const TCGOpcode vecop_list[] = { + INDEX_op_shli_vec, INDEX_op_sari_vec, INDEX_op_mul_vec, 0 + }; + static const GVecGen3 op[3] = { + { + .fniv = gen_vmulwev_u_s, + .fno = gen_helper_vmulwev_h_bu_b, + .opt_opc = vecop_list, + .vece = MO_16 + }, + { + .fni4 = gen_vmulwev_w_hu_h, + .fniv = gen_vmulwev_u_s, + .fno = gen_helper_vmulwev_w_hu_h, + .opt_opc = vecop_list, + .vece = MO_32 + }, + { + .fni8 = gen_vmulwev_d_wu_w, + .fniv = gen_vmulwev_u_s, + .fno = gen_helper_vmulwev_d_wu_w, + .opt_opc = vecop_list, + .vece = MO_64 + }, + }; + + tcg_gen_gvec_3(vd_ofs, vj_ofs, vk_ofs, oprsz, maxsz, &op[vece]); +} + +TRANS(vmulwev_h_bu_b, LSX, gvec_vvv, MO_8, do_vmulwev_u_s) +TRANS(vmulwev_w_hu_h, LSX, gvec_vvv, MO_16, do_vmulwev_u_s) +TRANS(vmulwev_d_wu_w, LSX, gvec_vvv, MO_32, do_vmulwev_u_s) +TRANS(xvmulwev_h_bu_b, LASX, gvec_xxx, MO_8, do_vmulwev_u_s) +TRANS(xvmulwev_w_hu_h, LASX, gvec_xxx, MO_16, do_vmulwev_u_s) +TRANS(xvmulwev_d_wu_w, LASX, gvec_xxx, MO_32, do_vmulwev_u_s) + +static void gen_vmulwod_u_s(unsigned vece, TCGv_vec t, TCGv_vec a, TCGv_vec b) +{ + TCGv_vec t1, t2; + int halfbits = 4 << vece; + + t1 = tcg_temp_new_vec_matching(a); + t2 = tcg_temp_new_vec_matching(b); + tcg_gen_shri_vec(vece, t1, a, halfbits); + tcg_gen_sari_vec(vece, t2, b, halfbits); + tcg_gen_mul_vec(vece, t, t1, t2); +} + +static void gen_vmulwod_w_hu_h(TCGv_i32 t, TCGv_i32 a, TCGv_i32 b) +{ + TCGv_i32 t1, t2; + + t1 = tcg_temp_new_i32(); + t2 = tcg_temp_new_i32(); + tcg_gen_shri_i32(t1, a, 16); + tcg_gen_sari_i32(t2, b, 16); + tcg_gen_mul_i32(t, t1, t2); +} +static void gen_vmulwod_d_wu_w(TCGv_i64 t, TCGv_i64 a, TCGv_i64 b) +{ + TCGv_i64 t1, t2; + + t1 = tcg_temp_new_i64(); + t2 = tcg_temp_new_i64(); + tcg_gen_shri_i64(t1, a, 32); + tcg_gen_sari_i64(t2, b, 32); + tcg_gen_mul_i64(t, t1, t2); +} + +static void do_vmulwod_u_s(unsigned vece, uint32_t vd_ofs, uint32_t vj_ofs, + uint32_t vk_ofs, uint32_t oprsz, uint32_t maxsz) +{ + static const TCGOpcode vecop_list[] = { + INDEX_op_shri_vec, INDEX_op_sari_vec, INDEX_op_mul_vec, 0 + }; + static const GVecGen3 op[3] = { + { + .fniv = gen_vmulwod_u_s, + .fno = gen_helper_vmulwod_h_bu_b, + .opt_opc = vecop_list, + .vece = MO_16 + }, + { + .fni4 = gen_vmulwod_w_hu_h, + .fniv = gen_vmulwod_u_s, + .fno = gen_helper_vmulwod_w_hu_h, + .opt_opc = vecop_list, + .vece = MO_32 + }, + { + .fni8 = gen_vmulwod_d_wu_w, + .fniv = gen_vmulwod_u_s, + .fno = gen_helper_vmulwod_d_wu_w, + .opt_opc = vecop_list, + .vece = MO_64 + }, + }; + + tcg_gen_gvec_3(vd_ofs, vj_ofs, vk_ofs, oprsz, maxsz, &op[vece]); +} + +TRANS(vmulwod_h_bu_b, LSX, gvec_vvv, MO_8, do_vmulwod_u_s) +TRANS(vmulwod_w_hu_h, LSX, gvec_vvv, MO_16, do_vmulwod_u_s) +TRANS(vmulwod_d_wu_w, LSX, gvec_vvv, MO_32, do_vmulwod_u_s) +TRANS(xvmulwod_h_bu_b, LASX, gvec_xxx, MO_8, do_vmulwod_u_s) +TRANS(xvmulwod_w_hu_h, LASX, gvec_xxx, MO_16, do_vmulwod_u_s) +TRANS(xvmulwod_d_wu_w, LASX, gvec_xxx, MO_32, do_vmulwod_u_s) + +static void gen_vmadd(unsigned vece, TCGv_vec t, TCGv_vec a, TCGv_vec b) +{ + TCGv_vec t1; + + t1 = tcg_temp_new_vec_matching(t); + tcg_gen_mul_vec(vece, t1, a, b); + tcg_gen_add_vec(vece, t, t, t1); +} + +static void gen_vmadd_w(TCGv_i32 t, TCGv_i32 a, TCGv_i32 b) +{ + TCGv_i32 t1; + + t1 = tcg_temp_new_i32(); + tcg_gen_mul_i32(t1, a, b); + tcg_gen_add_i32(t, t, t1); +} + +static void gen_vmadd_d(TCGv_i64 t, TCGv_i64 a, TCGv_i64 b) +{ + TCGv_i64 t1; + + t1 = tcg_temp_new_i64(); + tcg_gen_mul_i64(t1, a, b); + tcg_gen_add_i64(t, t, t1); +} + +static void do_vmadd(unsigned vece, uint32_t vd_ofs, uint32_t vj_ofs, + uint32_t vk_ofs, uint32_t oprsz, uint32_t maxsz) +{ + static const TCGOpcode vecop_list[] = { + INDEX_op_mul_vec, INDEX_op_add_vec, 0 + }; + static const GVecGen3 op[4] = { + { + .fniv = gen_vmadd, + .fno = gen_helper_vmadd_b, + .load_dest = true, + .opt_opc = vecop_list, + .vece = MO_8 + }, + { + .fniv = gen_vmadd, + .fno = gen_helper_vmadd_h, + .load_dest = true, + .opt_opc = vecop_list, + .vece = MO_16 + }, + { + .fni4 = gen_vmadd_w, + .fniv = gen_vmadd, + .fno = gen_helper_vmadd_w, + .load_dest = true, + .opt_opc = vecop_list, + .vece = MO_32 + }, + { + .fni8 = gen_vmadd_d, + .fniv = gen_vmadd, + .fno = gen_helper_vmadd_d, + .load_dest = true, + .opt_opc = vecop_list, + .vece = MO_64 + }, + }; + + tcg_gen_gvec_3(vd_ofs, vj_ofs, vk_ofs, oprsz, maxsz, &op[vece]); +} + +TRANS(vmadd_b, LSX, gvec_vvv, MO_8, do_vmadd) +TRANS(vmadd_h, LSX, gvec_vvv, MO_16, do_vmadd) +TRANS(vmadd_w, LSX, gvec_vvv, MO_32, do_vmadd) +TRANS(vmadd_d, LSX, gvec_vvv, MO_64, do_vmadd) +TRANS(xvmadd_b, LASX, gvec_xxx, MO_8, do_vmadd) +TRANS(xvmadd_h, LASX, gvec_xxx, MO_16, do_vmadd) +TRANS(xvmadd_w, LASX, gvec_xxx, MO_32, do_vmadd) +TRANS(xvmadd_d, LASX, gvec_xxx, MO_64, do_vmadd) + +static void gen_vmsub(unsigned vece, TCGv_vec t, TCGv_vec a, TCGv_vec b) +{ + TCGv_vec t1; + + t1 = tcg_temp_new_vec_matching(t); + tcg_gen_mul_vec(vece, t1, a, b); + tcg_gen_sub_vec(vece, t, t, t1); +} + +static void gen_vmsub_w(TCGv_i32 t, TCGv_i32 a, TCGv_i32 b) +{ + TCGv_i32 t1; + + t1 = tcg_temp_new_i32(); + tcg_gen_mul_i32(t1, a, b); + tcg_gen_sub_i32(t, t, t1); +} + +static void gen_vmsub_d(TCGv_i64 t, TCGv_i64 a, TCGv_i64 b) +{ + TCGv_i64 t1; + + t1 = tcg_temp_new_i64(); + tcg_gen_mul_i64(t1, a, b); + tcg_gen_sub_i64(t, t, t1); +} + +static void do_vmsub(unsigned vece, uint32_t vd_ofs, uint32_t vj_ofs, + uint32_t vk_ofs, uint32_t oprsz, uint32_t maxsz) +{ + static const TCGOpcode vecop_list[] = { + INDEX_op_mul_vec, INDEX_op_sub_vec, 0 + }; + static const GVecGen3 op[4] = { + { + .fniv = gen_vmsub, + .fno = gen_helper_vmsub_b, + .load_dest = true, + .opt_opc = vecop_list, + .vece = MO_8 + }, + { + .fniv = gen_vmsub, + .fno = gen_helper_vmsub_h, + .load_dest = true, + .opt_opc = vecop_list, + .vece = MO_16 + }, + { + .fni4 = gen_vmsub_w, + .fniv = gen_vmsub, + .fno = gen_helper_vmsub_w, + .load_dest = true, + .opt_opc = vecop_list, + .vece = MO_32 + }, + { + .fni8 = gen_vmsub_d, + .fniv = gen_vmsub, + .fno = gen_helper_vmsub_d, + .load_dest = true, + .opt_opc = vecop_list, + .vece = MO_64 + }, + }; + + tcg_gen_gvec_3(vd_ofs, vj_ofs, vk_ofs, oprsz, maxsz, &op[vece]); +} + +TRANS(vmsub_b, LSX, gvec_vvv, MO_8, do_vmsub) +TRANS(vmsub_h, LSX, gvec_vvv, MO_16, do_vmsub) +TRANS(vmsub_w, LSX, gvec_vvv, MO_32, do_vmsub) +TRANS(vmsub_d, LSX, gvec_vvv, MO_64, do_vmsub) +TRANS(xvmsub_b, LASX, gvec_xxx, MO_8, do_vmsub) +TRANS(xvmsub_h, LASX, gvec_xxx, MO_16, do_vmsub) +TRANS(xvmsub_w, LASX, gvec_xxx, MO_32, do_vmsub) +TRANS(xvmsub_d, LASX, gvec_xxx, MO_64, do_vmsub) + +static void gen_vmaddwev_s(unsigned vece, TCGv_vec t, TCGv_vec a, TCGv_vec b) +{ + TCGv_vec t1, t2, t3; + int halfbits = 4 << vece; + + t1 = tcg_temp_new_vec_matching(a); + t2 = tcg_temp_new_vec_matching(b); + t3 = tcg_temp_new_vec_matching(t); + tcg_gen_shli_vec(vece, t1, a, halfbits); + tcg_gen_sari_vec(vece, t1, t1, halfbits); + tcg_gen_shli_vec(vece, t2, b, halfbits); + tcg_gen_sari_vec(vece, t2, t2, halfbits); + tcg_gen_mul_vec(vece, t3, t1, t2); + tcg_gen_add_vec(vece, t, t, t3); +} + +static void gen_vmaddwev_w_h(TCGv_i32 t, TCGv_i32 a, TCGv_i32 b) +{ + TCGv_i32 t1; + + t1 = tcg_temp_new_i32(); + gen_vmulwev_w_h(t1, a, b); + tcg_gen_add_i32(t, t, t1); +} + +static void gen_vmaddwev_d_w(TCGv_i64 t, TCGv_i64 a, TCGv_i64 b) +{ + TCGv_i64 t1; + + t1 = tcg_temp_new_i64(); + gen_vmulwev_d_w(t1, a, b); + tcg_gen_add_i64(t, t, t1); +} + +static void do_vmaddwev_s(unsigned vece, uint32_t vd_ofs, uint32_t vj_ofs, + uint32_t vk_ofs, uint32_t oprsz, uint32_t maxsz) +{ + static const TCGOpcode vecop_list[] = { + INDEX_op_shli_vec, INDEX_op_sari_vec, + INDEX_op_mul_vec, INDEX_op_add_vec, 0 + }; + static const GVecGen3 op[3] = { + { + .fniv = gen_vmaddwev_s, + .fno = gen_helper_vmaddwev_h_b, + .load_dest = true, + .opt_opc = vecop_list, + .vece = MO_16 + }, + { + .fni4 = gen_vmaddwev_w_h, + .fniv = gen_vmaddwev_s, + .fno = gen_helper_vmaddwev_w_h, + .load_dest = true, + .opt_opc = vecop_list, + .vece = MO_32 + }, + { + .fni8 = gen_vmaddwev_d_w, + .fniv = gen_vmaddwev_s, + .fno = gen_helper_vmaddwev_d_w, + .load_dest = true, + .opt_opc = vecop_list, + .vece = MO_64 + }, + }; + + tcg_gen_gvec_3(vd_ofs, vj_ofs, vk_ofs, oprsz, maxsz, &op[vece]); +} + +TRANS(vmaddwev_h_b, LSX, gvec_vvv, MO_8, do_vmaddwev_s) +TRANS(vmaddwev_w_h, LSX, gvec_vvv, MO_16, do_vmaddwev_s) +TRANS(vmaddwev_d_w, LSX, gvec_vvv, MO_32, do_vmaddwev_s) +TRANS(xvmaddwev_h_b, LASX, gvec_xxx, MO_8, do_vmaddwev_s) +TRANS(xvmaddwev_w_h, LASX, gvec_xxx, MO_16, do_vmaddwev_s) +TRANS(xvmaddwev_d_w, LASX, gvec_xxx, MO_32, do_vmaddwev_s) + +static bool gen_vmadd_q_vl(DisasContext * ctx, + arg_vvv *a, uint32_t oprsz, int idx1, int idx2, + void (*func)(TCGv_i64, TCGv_i64, + TCGv_i64, TCGv_i64)) +{ + TCGv_i64 rh, rl, arg1, arg2, th, tl; + int i; + + if (!check_vec(ctx, oprsz)) { + return true; + } + + rh = tcg_temp_new_i64(); + rl = tcg_temp_new_i64(); + arg1 = tcg_temp_new_i64(); + arg2 = tcg_temp_new_i64(); + th = tcg_temp_new_i64(); + tl = tcg_temp_new_i64(); + + for (i = 0; i < oprsz / 16; i++) { + get_vreg64(arg1, a->vj, 2 * i + idx1); + get_vreg64(arg2, a->vk, 2 * i + idx2); + get_vreg64(rh, a->vd, 2 * i + 1); + get_vreg64(rl, a->vd, 2 * i); + + func(tl, th, arg1, arg2); + tcg_gen_add2_i64(rl, rh, rl, rh, tl, th); + + set_vreg64(rh, a->vd, 2 * i + 1); + set_vreg64(rl, a->vd, 2 * i); + } + + return true; +} + +static bool gen_vmadd_q(DisasContext *ctx, arg_vvv *a, int idx1, int idx2, + void (*func)(TCGv_i64, TCGv_i64, TCGv_i64, TCGv_i64)) +{ + return gen_vmadd_q_vl(ctx, a, 16, idx1, idx2, func); +} + +static bool gen_xvmadd_q(DisasContext *ctx, arg_vvv *a, int idx1, int idx2, + void (*func)(TCGv_i64, TCGv_i64, TCGv_i64, TCGv_i64)) +{ + return gen_vmadd_q_vl(ctx, a, 32, idx1, idx2, func); +} + +TRANS(vmaddwev_q_d, LSX, gen_vmadd_q, 0, 0, tcg_gen_muls2_i64) +TRANS(vmaddwod_q_d, LSX, gen_vmadd_q, 1, 1, tcg_gen_muls2_i64) +TRANS(vmaddwev_q_du, LSX, gen_vmadd_q, 0, 0, tcg_gen_mulu2_i64) +TRANS(vmaddwod_q_du, LSX, gen_vmadd_q, 1, 1, tcg_gen_mulu2_i64) +TRANS(vmaddwev_q_du_d, LSX, gen_vmadd_q, 0, 0, tcg_gen_mulus2_i64) +TRANS(vmaddwod_q_du_d, LSX, gen_vmadd_q, 1, 1, tcg_gen_mulus2_i64) +TRANS(xvmaddwev_q_d, LASX, gen_xvmadd_q, 0, 0, tcg_gen_muls2_i64) +TRANS(xvmaddwod_q_d, LASX, gen_xvmadd_q, 1, 1, tcg_gen_muls2_i64) +TRANS(xvmaddwev_q_du, LASX, gen_xvmadd_q, 0, 0, tcg_gen_mulu2_i64) +TRANS(xvmaddwod_q_du, LASX, gen_xvmadd_q, 1, 1, tcg_gen_mulu2_i64) +TRANS(xvmaddwev_q_du_d, LASX, gen_xvmadd_q, 0, 0, tcg_gen_mulus2_i64) +TRANS(xvmaddwod_q_du_d, LASX, gen_xvmadd_q, 1, 1, tcg_gen_mulus2_i64) + +static void gen_vmaddwod_s(unsigned vece, TCGv_vec t, TCGv_vec a, TCGv_vec b) +{ + TCGv_vec t1, t2, t3; + int halfbits = 4 << vece; + + t1 = tcg_temp_new_vec_matching(a); + t2 = tcg_temp_new_vec_matching(b); + t3 = tcg_temp_new_vec_matching(t); + tcg_gen_sari_vec(vece, t1, a, halfbits); + tcg_gen_sari_vec(vece, t2, b, halfbits); + tcg_gen_mul_vec(vece, t3, t1, t2); + tcg_gen_add_vec(vece, t, t, t3); +} + +static void gen_vmaddwod_w_h(TCGv_i32 t, TCGv_i32 a, TCGv_i32 b) +{ + TCGv_i32 t1; + + t1 = tcg_temp_new_i32(); + gen_vmulwod_w_h(t1, a, b); + tcg_gen_add_i32(t, t, t1); +} + +static void gen_vmaddwod_d_w(TCGv_i64 t, TCGv_i64 a, TCGv_i64 b) +{ + TCGv_i64 t1; + + t1 = tcg_temp_new_i64(); + gen_vmulwod_d_w(t1, a, b); + tcg_gen_add_i64(t, t, t1); +} + +static void do_vmaddwod_s(unsigned vece, uint32_t vd_ofs, uint32_t vj_ofs, + uint32_t vk_ofs, uint32_t oprsz, uint32_t maxsz) +{ + static const TCGOpcode vecop_list[] = { + INDEX_op_sari_vec, INDEX_op_mul_vec, INDEX_op_add_vec, 0 + }; + static const GVecGen3 op[3] = { + { + .fniv = gen_vmaddwod_s, + .fno = gen_helper_vmaddwod_h_b, + .load_dest = true, + .opt_opc = vecop_list, + .vece = MO_16 + }, + { + .fni4 = gen_vmaddwod_w_h, + .fniv = gen_vmaddwod_s, + .fno = gen_helper_vmaddwod_w_h, + .load_dest = true, + .opt_opc = vecop_list, + .vece = MO_32 + }, + { + .fni8 = gen_vmaddwod_d_w, + .fniv = gen_vmaddwod_s, + .fno = gen_helper_vmaddwod_d_w, + .load_dest = true, + .opt_opc = vecop_list, + .vece = MO_64 + }, + }; + + tcg_gen_gvec_3(vd_ofs, vj_ofs, vk_ofs, oprsz, maxsz, &op[vece]); +} + +TRANS(vmaddwod_h_b, LSX, gvec_vvv, MO_8, do_vmaddwod_s) +TRANS(vmaddwod_w_h, LSX, gvec_vvv, MO_16, do_vmaddwod_s) +TRANS(vmaddwod_d_w, LSX, gvec_vvv, MO_32, do_vmaddwod_s) +TRANS(xvmaddwod_h_b, LASX, gvec_xxx, MO_8, do_vmaddwod_s) +TRANS(xvmaddwod_w_h, LASX, gvec_xxx, MO_16, do_vmaddwod_s) +TRANS(xvmaddwod_d_w, LASX, gvec_xxx, MO_32, do_vmaddwod_s) + +static void gen_vmaddwev_u(unsigned vece, TCGv_vec t, TCGv_vec a, TCGv_vec b) +{ + TCGv_vec t1, t2, mask; + + t1 = tcg_temp_new_vec_matching(t); + t2 = tcg_temp_new_vec_matching(b); + mask = tcg_constant_vec_matching(t, vece, MAKE_64BIT_MASK(0, 4 << vece)); + tcg_gen_and_vec(vece, t1, a, mask); + tcg_gen_and_vec(vece, t2, b, mask); + tcg_gen_mul_vec(vece, t1, t1, t2); + tcg_gen_add_vec(vece, t, t, t1); +} + +static void gen_vmaddwev_w_hu(TCGv_i32 t, TCGv_i32 a, TCGv_i32 b) +{ + TCGv_i32 t1; + + t1 = tcg_temp_new_i32(); + gen_vmulwev_w_hu(t1, a, b); + tcg_gen_add_i32(t, t, t1); +} + +static void gen_vmaddwev_d_wu(TCGv_i64 t, TCGv_i64 a, TCGv_i64 b) +{ + TCGv_i64 t1; + + t1 = tcg_temp_new_i64(); + gen_vmulwev_d_wu(t1, a, b); + tcg_gen_add_i64(t, t, t1); +} + +static void do_vmaddwev_u(unsigned vece, uint32_t vd_ofs, uint32_t vj_ofs, + uint32_t vk_ofs, uint32_t oprsz, uint32_t maxsz) +{ + static const TCGOpcode vecop_list[] = { + INDEX_op_mul_vec, INDEX_op_add_vec, 0 + }; + static const GVecGen3 op[3] = { + { + .fniv = gen_vmaddwev_u, + .fno = gen_helper_vmaddwev_h_bu, + .load_dest = true, + .opt_opc = vecop_list, + .vece = MO_16 + }, + { + .fni4 = gen_vmaddwev_w_hu, + .fniv = gen_vmaddwev_u, + .fno = gen_helper_vmaddwev_w_hu, + .load_dest = true, + .opt_opc = vecop_list, + .vece = MO_32 + }, + { + .fni8 = gen_vmaddwev_d_wu, + .fniv = gen_vmaddwev_u, + .fno = gen_helper_vmaddwev_d_wu, + .load_dest = true, + .opt_opc = vecop_list, + .vece = MO_64 + }, + }; + + tcg_gen_gvec_3(vd_ofs, vj_ofs, vk_ofs, oprsz, maxsz, &op[vece]); +} + +TRANS(vmaddwev_h_bu, LSX, gvec_vvv, MO_8, do_vmaddwev_u) +TRANS(vmaddwev_w_hu, LSX, gvec_vvv, MO_16, do_vmaddwev_u) +TRANS(vmaddwev_d_wu, LSX, gvec_vvv, MO_32, do_vmaddwev_u) +TRANS(xvmaddwev_h_bu, LASX, gvec_xxx, MO_8, do_vmaddwev_u) +TRANS(xvmaddwev_w_hu, LASX, gvec_xxx, MO_16, do_vmaddwev_u) +TRANS(xvmaddwev_d_wu, LASX, gvec_xxx, MO_32, do_vmaddwev_u) + +static void gen_vmaddwod_u(unsigned vece, TCGv_vec t, TCGv_vec a, TCGv_vec b) +{ + TCGv_vec t1, t2, t3; + int halfbits = 4 << vece; + + t1 = tcg_temp_new_vec_matching(a); + t2 = tcg_temp_new_vec_matching(b); + t3 = tcg_temp_new_vec_matching(t); + tcg_gen_shri_vec(vece, t1, a, halfbits); + tcg_gen_shri_vec(vece, t2, b, halfbits); + tcg_gen_mul_vec(vece, t3, t1, t2); + tcg_gen_add_vec(vece, t, t, t3); +} + +static void gen_vmaddwod_w_hu(TCGv_i32 t, TCGv_i32 a, TCGv_i32 b) +{ + TCGv_i32 t1; + + t1 = tcg_temp_new_i32(); + gen_vmulwod_w_hu(t1, a, b); + tcg_gen_add_i32(t, t, t1); +} + +static void gen_vmaddwod_d_wu(TCGv_i64 t, TCGv_i64 a, TCGv_i64 b) +{ + TCGv_i64 t1; + + t1 = tcg_temp_new_i64(); + gen_vmulwod_d_wu(t1, a, b); + tcg_gen_add_i64(t, t, t1); +} + +static void do_vmaddwod_u(unsigned vece, uint32_t vd_ofs, uint32_t vj_ofs, + uint32_t vk_ofs, uint32_t oprsz, uint32_t maxsz) +{ + static const TCGOpcode vecop_list[] = { + INDEX_op_shri_vec, INDEX_op_mul_vec, INDEX_op_add_vec, 0 + }; + static const GVecGen3 op[3] = { + { + .fniv = gen_vmaddwod_u, + .fno = gen_helper_vmaddwod_h_bu, + .load_dest = true, + .opt_opc = vecop_list, + .vece = MO_16 + }, + { + .fni4 = gen_vmaddwod_w_hu, + .fniv = gen_vmaddwod_u, + .fno = gen_helper_vmaddwod_w_hu, + .load_dest = true, + .opt_opc = vecop_list, + .vece = MO_32 + }, + { + .fni8 = gen_vmaddwod_d_wu, + .fniv = gen_vmaddwod_u, + .fno = gen_helper_vmaddwod_d_wu, + .load_dest = true, + .opt_opc = vecop_list, + .vece = MO_64 + }, + }; + + tcg_gen_gvec_3(vd_ofs, vj_ofs, vk_ofs, oprsz, maxsz, &op[vece]); +} + +TRANS(vmaddwod_h_bu, LSX, gvec_vvv, MO_8, do_vmaddwod_u) +TRANS(vmaddwod_w_hu, LSX, gvec_vvv, MO_16, do_vmaddwod_u) +TRANS(vmaddwod_d_wu, LSX, gvec_vvv, MO_32, do_vmaddwod_u) +TRANS(xvmaddwod_h_bu, LASX, gvec_xxx, MO_8, do_vmaddwod_u) +TRANS(xvmaddwod_w_hu, LASX, gvec_xxx, MO_16, do_vmaddwod_u) +TRANS(xvmaddwod_d_wu, LASX, gvec_xxx, MO_32, do_vmaddwod_u) + +static void gen_vmaddwev_u_s(unsigned vece, TCGv_vec t, TCGv_vec a, TCGv_vec b) +{ + TCGv_vec t1, t2, mask; + int halfbits = 4 << vece; + + t1 = tcg_temp_new_vec_matching(a); + t2 = tcg_temp_new_vec_matching(b); + mask = tcg_constant_vec_matching(t, vece, MAKE_64BIT_MASK(0, 4 << vece)); + tcg_gen_and_vec(vece, t1, a, mask); + tcg_gen_shli_vec(vece, t2, b, halfbits); + tcg_gen_sari_vec(vece, t2, t2, halfbits); + tcg_gen_mul_vec(vece, t1, t1, t2); + tcg_gen_add_vec(vece, t, t, t1); +} + +static void gen_vmaddwev_w_hu_h(TCGv_i32 t, TCGv_i32 a, TCGv_i32 b) +{ + TCGv_i32 t1; + + t1 = tcg_temp_new_i32(); + gen_vmulwev_w_hu_h(t1, a, b); + tcg_gen_add_i32(t, t, t1); +} + +static void gen_vmaddwev_d_wu_w(TCGv_i64 t, TCGv_i64 a, TCGv_i64 b) +{ + TCGv_i64 t1; + + t1 = tcg_temp_new_i64(); + gen_vmulwev_d_wu_w(t1, a, b); + tcg_gen_add_i64(t, t, t1); +} + +static void do_vmaddwev_u_s(unsigned vece, uint32_t vd_ofs, uint32_t vj_ofs, + uint32_t vk_ofs, uint32_t oprsz, uint32_t maxsz) +{ + static const TCGOpcode vecop_list[] = { + INDEX_op_shli_vec, INDEX_op_sari_vec, + INDEX_op_mul_vec, INDEX_op_add_vec, 0 + }; + static const GVecGen3 op[3] = { + { + .fniv = gen_vmaddwev_u_s, + .fno = gen_helper_vmaddwev_h_bu_b, + .load_dest = true, + .opt_opc = vecop_list, + .vece = MO_16 + }, + { + .fni4 = gen_vmaddwev_w_hu_h, + .fniv = gen_vmaddwev_u_s, + .fno = gen_helper_vmaddwev_w_hu_h, + .load_dest = true, + .opt_opc = vecop_list, + .vece = MO_32 + }, + { + .fni8 = gen_vmaddwev_d_wu_w, + .fniv = gen_vmaddwev_u_s, + .fno = gen_helper_vmaddwev_d_wu_w, + .load_dest = true, + .opt_opc = vecop_list, + .vece = MO_64 + }, + }; + + tcg_gen_gvec_3(vd_ofs, vj_ofs, vk_ofs, oprsz, maxsz, &op[vece]); +} + +TRANS(vmaddwev_h_bu_b, LSX, gvec_vvv, MO_8, do_vmaddwev_u_s) +TRANS(vmaddwev_w_hu_h, LSX, gvec_vvv, MO_16, do_vmaddwev_u_s) +TRANS(vmaddwev_d_wu_w, LSX, gvec_vvv, MO_32, do_vmaddwev_u_s) +TRANS(xvmaddwev_h_bu_b, LASX, gvec_xxx, MO_8, do_vmaddwev_u_s) +TRANS(xvmaddwev_w_hu_h, LASX, gvec_xxx, MO_16, do_vmaddwev_u_s) +TRANS(xvmaddwev_d_wu_w, LASX, gvec_xxx, MO_32, do_vmaddwev_u_s) + +static void gen_vmaddwod_u_s(unsigned vece, TCGv_vec t, TCGv_vec a, TCGv_vec b) +{ + TCGv_vec t1, t2, t3; + int halfbits = 4 << vece; + + t1 = tcg_temp_new_vec_matching(a); + t2 = tcg_temp_new_vec_matching(b); + t3 = tcg_temp_new_vec_matching(t); + tcg_gen_shri_vec(vece, t1, a, halfbits); + tcg_gen_sari_vec(vece, t2, b, halfbits); + tcg_gen_mul_vec(vece, t3, t1, t2); + tcg_gen_add_vec(vece, t, t, t3); +} + +static void gen_vmaddwod_w_hu_h(TCGv_i32 t, TCGv_i32 a, TCGv_i32 b) +{ + TCGv_i32 t1; + + t1 = tcg_temp_new_i32(); + gen_vmulwod_w_hu_h(t1, a, b); + tcg_gen_add_i32(t, t, t1); +} + +static void gen_vmaddwod_d_wu_w(TCGv_i64 t, TCGv_i64 a, TCGv_i64 b) +{ + TCGv_i64 t1; + + t1 = tcg_temp_new_i64(); + gen_vmulwod_d_wu_w(t1, a, b); + tcg_gen_add_i64(t, t, t1); +} + +static void do_vmaddwod_u_s(unsigned vece, uint32_t vd_ofs, uint32_t vj_ofs, + uint32_t vk_ofs, uint32_t oprsz, uint32_t maxsz) +{ + static const TCGOpcode vecop_list[] = { + INDEX_op_shri_vec, INDEX_op_sari_vec, + INDEX_op_mul_vec, INDEX_op_add_vec, 0 + }; + static const GVecGen3 op[3] = { + { + .fniv = gen_vmaddwod_u_s, + .fno = gen_helper_vmaddwod_h_bu_b, + .load_dest = true, + .opt_opc = vecop_list, + .vece = MO_16 + }, + { + .fni4 = gen_vmaddwod_w_hu_h, + .fniv = gen_vmaddwod_u_s, + .fno = gen_helper_vmaddwod_w_hu_h, + .load_dest = true, + .opt_opc = vecop_list, + .vece = MO_32 + }, + { + .fni8 = gen_vmaddwod_d_wu_w, + .fniv = gen_vmaddwod_u_s, + .fno = gen_helper_vmaddwod_d_wu_w, + .load_dest = true, + .opt_opc = vecop_list, + .vece = MO_64 + }, + }; + + tcg_gen_gvec_3(vd_ofs, vj_ofs, vk_ofs, oprsz, maxsz, &op[vece]); +} + +TRANS(vmaddwod_h_bu_b, LSX, gvec_vvv, MO_8, do_vmaddwod_u_s) +TRANS(vmaddwod_w_hu_h, LSX, gvec_vvv, MO_16, do_vmaddwod_u_s) +TRANS(vmaddwod_d_wu_w, LSX, gvec_vvv, MO_32, do_vmaddwod_u_s) +TRANS(xvmaddwod_h_bu_b, LASX, gvec_xxx, MO_8, do_vmaddwod_u_s) +TRANS(xvmaddwod_w_hu_h, LASX, gvec_xxx, MO_16, do_vmaddwod_u_s) +TRANS(xvmaddwod_d_wu_w, LASX, gvec_xxx, MO_32, do_vmaddwod_u_s) + +TRANS(vdiv_b, LSX, gen_vvv, gen_helper_vdiv_b) +TRANS(vdiv_h, LSX, gen_vvv, gen_helper_vdiv_h) +TRANS(vdiv_w, LSX, gen_vvv, gen_helper_vdiv_w) +TRANS(vdiv_d, LSX, gen_vvv, gen_helper_vdiv_d) +TRANS(vdiv_bu, LSX, gen_vvv, gen_helper_vdiv_bu) +TRANS(vdiv_hu, LSX, gen_vvv, gen_helper_vdiv_hu) +TRANS(vdiv_wu, LSX, gen_vvv, gen_helper_vdiv_wu) +TRANS(vdiv_du, LSX, gen_vvv, gen_helper_vdiv_du) +TRANS(vmod_b, LSX, gen_vvv, gen_helper_vmod_b) +TRANS(vmod_h, LSX, gen_vvv, gen_helper_vmod_h) +TRANS(vmod_w, LSX, gen_vvv, gen_helper_vmod_w) +TRANS(vmod_d, LSX, gen_vvv, gen_helper_vmod_d) +TRANS(vmod_bu, LSX, gen_vvv, gen_helper_vmod_bu) +TRANS(vmod_hu, LSX, gen_vvv, gen_helper_vmod_hu) +TRANS(vmod_wu, LSX, gen_vvv, gen_helper_vmod_wu) +TRANS(vmod_du, LSX, gen_vvv, gen_helper_vmod_du) +TRANS(xvdiv_b, LASX, gen_xxx, gen_helper_vdiv_b) +TRANS(xvdiv_h, LASX, gen_xxx, gen_helper_vdiv_h) +TRANS(xvdiv_w, LASX, gen_xxx, gen_helper_vdiv_w) +TRANS(xvdiv_d, LASX, gen_xxx, gen_helper_vdiv_d) +TRANS(xvdiv_bu, LASX, gen_xxx, gen_helper_vdiv_bu) +TRANS(xvdiv_hu, LASX, gen_xxx, gen_helper_vdiv_hu) +TRANS(xvdiv_wu, LASX, gen_xxx, gen_helper_vdiv_wu) +TRANS(xvdiv_du, LASX, gen_xxx, gen_helper_vdiv_du) +TRANS(xvmod_b, LASX, gen_xxx, gen_helper_vmod_b) +TRANS(xvmod_h, LASX, gen_xxx, gen_helper_vmod_h) +TRANS(xvmod_w, LASX, gen_xxx, gen_helper_vmod_w) +TRANS(xvmod_d, LASX, gen_xxx, gen_helper_vmod_d) +TRANS(xvmod_bu, LASX, gen_xxx, gen_helper_vmod_bu) +TRANS(xvmod_hu, LASX, gen_xxx, gen_helper_vmod_hu) +TRANS(xvmod_wu, LASX, gen_xxx, gen_helper_vmod_wu) +TRANS(xvmod_du, LASX, gen_xxx, gen_helper_vmod_du) + +static void gen_vsat_s(unsigned vece, TCGv_vec t, TCGv_vec a, TCGv_vec max) +{ + TCGv_vec min; + + min = tcg_temp_new_vec_matching(t); + tcg_gen_not_vec(vece, min, max); + tcg_gen_smax_vec(vece, t, a, min); + tcg_gen_smin_vec(vece, t, t, max); +} + +static void do_vsat_s(unsigned vece, uint32_t vd_ofs, uint32_t vj_ofs, + int64_t imm, uint32_t oprsz, uint32_t maxsz) +{ + static const TCGOpcode vecop_list[] = { + INDEX_op_smax_vec, INDEX_op_smin_vec, 0 + }; + static const GVecGen2s op[4] = { + { + .fniv = gen_vsat_s, + .fno = gen_helper_vsat_b, + .opt_opc = vecop_list, + .vece = MO_8 + }, + { + .fniv = gen_vsat_s, + .fno = gen_helper_vsat_h, + .opt_opc = vecop_list, + .vece = MO_16 + }, + { + .fniv = gen_vsat_s, + .fno = gen_helper_vsat_w, + .opt_opc = vecop_list, + .vece = MO_32 + }, + { + .fniv = gen_vsat_s, + .fno = gen_helper_vsat_d, + .opt_opc = vecop_list, + .vece = MO_64 + }, + }; + + tcg_gen_gvec_2s(vd_ofs, vj_ofs, oprsz, maxsz, + tcg_constant_i64((1ll<< imm) -1), &op[vece]); +} + +TRANS(vsat_b, LSX, gvec_vv_i, MO_8, do_vsat_s) +TRANS(vsat_h, LSX, gvec_vv_i, MO_16, do_vsat_s) +TRANS(vsat_w, LSX, gvec_vv_i, MO_32, do_vsat_s) +TRANS(vsat_d, LSX, gvec_vv_i, MO_64, do_vsat_s) +TRANS(xvsat_b, LASX, gvec_xx_i, MO_8, do_vsat_s) +TRANS(xvsat_h, LASX, gvec_xx_i, MO_16, do_vsat_s) +TRANS(xvsat_w, LASX, gvec_xx_i, MO_32, do_vsat_s) +TRANS(xvsat_d, LASX, gvec_xx_i, MO_64, do_vsat_s) + +static void gen_vsat_u(unsigned vece, TCGv_vec t, TCGv_vec a, TCGv_vec max) +{ + tcg_gen_umin_vec(vece, t, a, max); +} + +static void do_vsat_u(unsigned vece, uint32_t vd_ofs, uint32_t vj_ofs, + int64_t imm, uint32_t oprsz, uint32_t maxsz) +{ + uint64_t max; + static const TCGOpcode vecop_list[] = { + INDEX_op_umin_vec, 0 + }; + static const GVecGen2s op[4] = { + { + .fniv = gen_vsat_u, + .fno = gen_helper_vsat_bu, + .opt_opc = vecop_list, + .vece = MO_8 + }, + { + .fniv = gen_vsat_u, + .fno = gen_helper_vsat_hu, + .opt_opc = vecop_list, + .vece = MO_16 + }, + { + .fniv = gen_vsat_u, + .fno = gen_helper_vsat_wu, + .opt_opc = vecop_list, + .vece = MO_32 + }, + { + .fniv = gen_vsat_u, + .fno = gen_helper_vsat_du, + .opt_opc = vecop_list, + .vece = MO_64 + }, + }; + + max = (imm == 0x3f) ? UINT64_MAX : (1ull << (imm + 1)) - 1; + tcg_gen_gvec_2s(vd_ofs, vj_ofs, oprsz, maxsz, + tcg_constant_i64(max), &op[vece]); +} + +TRANS(vsat_bu, LSX, gvec_vv_i, MO_8, do_vsat_u) +TRANS(vsat_hu, LSX, gvec_vv_i, MO_16, do_vsat_u) +TRANS(vsat_wu, LSX, gvec_vv_i, MO_32, do_vsat_u) +TRANS(vsat_du, LSX, gvec_vv_i, MO_64, do_vsat_u) +TRANS(xvsat_bu, LASX, gvec_xx_i, MO_8, do_vsat_u) +TRANS(xvsat_hu, LASX, gvec_xx_i, MO_16, do_vsat_u) +TRANS(xvsat_wu, LASX, gvec_xx_i, MO_32, do_vsat_u) +TRANS(xvsat_du, LASX, gvec_xx_i, MO_64, do_vsat_u) + +TRANS(vexth_h_b, LSX, gen_vv, gen_helper_vexth_h_b) +TRANS(vexth_w_h, LSX, gen_vv, gen_helper_vexth_w_h) +TRANS(vexth_d_w, LSX, gen_vv, gen_helper_vexth_d_w) +TRANS(vexth_q_d, LSX, gen_vv, gen_helper_vexth_q_d) +TRANS(vexth_hu_bu, LSX, gen_vv, gen_helper_vexth_hu_bu) +TRANS(vexth_wu_hu, LSX, gen_vv, gen_helper_vexth_wu_hu) +TRANS(vexth_du_wu, LSX, gen_vv, gen_helper_vexth_du_wu) +TRANS(vexth_qu_du, LSX, gen_vv, gen_helper_vexth_qu_du) +TRANS(xvexth_h_b, LASX, gen_xx, gen_helper_vexth_h_b) +TRANS(xvexth_w_h, LASX, gen_xx, gen_helper_vexth_w_h) +TRANS(xvexth_d_w, LASX, gen_xx, gen_helper_vexth_d_w) +TRANS(xvexth_q_d, LASX, gen_xx, gen_helper_vexth_q_d) +TRANS(xvexth_hu_bu, LASX, gen_xx, gen_helper_vexth_hu_bu) +TRANS(xvexth_wu_hu, LASX, gen_xx, gen_helper_vexth_wu_hu) +TRANS(xvexth_du_wu, LASX, gen_xx, gen_helper_vexth_du_wu) +TRANS(xvexth_qu_du, LASX, gen_xx, gen_helper_vexth_qu_du) + +TRANS(vext2xv_h_b, LASX, gen_xx, gen_helper_vext2xv_h_b) +TRANS(vext2xv_w_b, LASX, gen_xx, gen_helper_vext2xv_w_b) +TRANS(vext2xv_d_b, LASX, gen_xx, gen_helper_vext2xv_d_b) +TRANS(vext2xv_w_h, LASX, gen_xx, gen_helper_vext2xv_w_h) +TRANS(vext2xv_d_h, LASX, gen_xx, gen_helper_vext2xv_d_h) +TRANS(vext2xv_d_w, LASX, gen_xx, gen_helper_vext2xv_d_w) +TRANS(vext2xv_hu_bu, LASX, gen_xx, gen_helper_vext2xv_hu_bu) +TRANS(vext2xv_wu_bu, LASX, gen_xx, gen_helper_vext2xv_wu_bu) +TRANS(vext2xv_du_bu, LASX, gen_xx, gen_helper_vext2xv_du_bu) +TRANS(vext2xv_wu_hu, LASX, gen_xx, gen_helper_vext2xv_wu_hu) +TRANS(vext2xv_du_hu, LASX, gen_xx, gen_helper_vext2xv_du_hu) +TRANS(vext2xv_du_wu, LASX, gen_xx, gen_helper_vext2xv_du_wu) + +static void gen_vsigncov(unsigned vece, TCGv_vec t, TCGv_vec a, TCGv_vec b) +{ + TCGv_vec t1, zero; + + t1 = tcg_temp_new_vec_matching(t); + zero = tcg_constant_vec_matching(t, vece, 0); + + tcg_gen_neg_vec(vece, t1, b); + tcg_gen_cmpsel_vec(TCG_COND_LT, vece, t, a, zero, t1, b); + tcg_gen_cmpsel_vec(TCG_COND_EQ, vece, t, a, zero, zero, t); +} + +static void do_vsigncov(unsigned vece, uint32_t vd_ofs, uint32_t vj_ofs, + uint32_t vk_ofs, uint32_t oprsz, uint32_t maxsz) +{ + static const TCGOpcode vecop_list[] = { + INDEX_op_neg_vec, INDEX_op_cmpsel_vec, 0 + }; + static const GVecGen3 op[4] = { + { + .fniv = gen_vsigncov, + .fno = gen_helper_vsigncov_b, + .opt_opc = vecop_list, + .vece = MO_8 + }, + { + .fniv = gen_vsigncov, + .fno = gen_helper_vsigncov_h, + .opt_opc = vecop_list, + .vece = MO_16 + }, + { + .fniv = gen_vsigncov, + .fno = gen_helper_vsigncov_w, + .opt_opc = vecop_list, + .vece = MO_32 + }, + { + .fniv = gen_vsigncov, + .fno = gen_helper_vsigncov_d, + .opt_opc = vecop_list, + .vece = MO_64 + }, + }; + + tcg_gen_gvec_3(vd_ofs, vj_ofs, vk_ofs, oprsz, maxsz, &op[vece]); +} + +TRANS(vsigncov_b, LSX, gvec_vvv, MO_8, do_vsigncov) +TRANS(vsigncov_h, LSX, gvec_vvv, MO_16, do_vsigncov) +TRANS(vsigncov_w, LSX, gvec_vvv, MO_32, do_vsigncov) +TRANS(vsigncov_d, LSX, gvec_vvv, MO_64, do_vsigncov) +TRANS(xvsigncov_b, LASX, gvec_xxx, MO_8, do_vsigncov) +TRANS(xvsigncov_h, LASX, gvec_xxx, MO_16, do_vsigncov) +TRANS(xvsigncov_w, LASX, gvec_xxx, MO_32, do_vsigncov) +TRANS(xvsigncov_d, LASX, gvec_xxx, MO_64, do_vsigncov) + +TRANS(vmskltz_b, LSX, gen_vv, gen_helper_vmskltz_b) +TRANS(vmskltz_h, LSX, gen_vv, gen_helper_vmskltz_h) +TRANS(vmskltz_w, LSX, gen_vv, gen_helper_vmskltz_w) +TRANS(vmskltz_d, LSX, gen_vv, gen_helper_vmskltz_d) +TRANS(vmskgez_b, LSX, gen_vv, gen_helper_vmskgez_b) +TRANS(vmsknz_b, LSX, gen_vv, gen_helper_vmsknz_b) +TRANS(xvmskltz_b, LASX, gen_xx, gen_helper_vmskltz_b) +TRANS(xvmskltz_h, LASX, gen_xx, gen_helper_vmskltz_h) +TRANS(xvmskltz_w, LASX, gen_xx, gen_helper_vmskltz_w) +TRANS(xvmskltz_d, LASX, gen_xx, gen_helper_vmskltz_d) +TRANS(xvmskgez_b, LASX, gen_xx, gen_helper_vmskgez_b) +TRANS(xvmsknz_b, LASX, gen_xx, gen_helper_vmsknz_b) + +#define EXPAND_BYTE(bit) ((uint64_t)(bit ? 0xff : 0)) + +static uint64_t vldi_get_value(DisasContext *ctx, uint32_t imm) +{ + int mode; + uint64_t data, t; + + /* + * imm bit [11:8] is mode, mode value is 0-12. + * other values are invalid. + */ + mode = (imm >> 8) & 0xf; + t = imm & 0xff; + switch (mode) { + case 0: + /* data: {2{24'0, imm[7:0]}} */ + data = (t << 32) | t ; + break; + case 1: + /* data: {2{16'0, imm[7:0], 8'0}} */ + data = (t << 24) | (t << 8); + break; + case 2: + /* data: {2{8'0, imm[7:0], 16'0}} */ + data = (t << 48) | (t << 16); + break; + case 3: + /* data: {2{imm[7:0], 24'0}} */ + data = (t << 56) | (t << 24); + break; + case 4: + /* data: {4{8'0, imm[7:0]}} */ + data = (t << 48) | (t << 32) | (t << 16) | t; + break; + case 5: + /* data: {4{imm[7:0], 8'0}} */ + data = (t << 56) |(t << 40) | (t << 24) | (t << 8); + break; + case 6: + /* data: {2{16'0, imm[7:0], 8'1}} */ + data = (t << 40) | ((uint64_t)0xff << 32) | (t << 8) | 0xff; + break; + case 7: + /* data: {2{8'0, imm[7:0], 16'1}} */ + data = (t << 48) | ((uint64_t)0xffff << 32) | (t << 16) | 0xffff; + break; + case 8: + /* data: {8{imm[7:0]}} */ + data =(t << 56) | (t << 48) | (t << 40) | (t << 32) | + (t << 24) | (t << 16) | (t << 8) | t; + break; + case 9: + /* data: {{8{imm[7]}, ..., 8{imm[0]}}} */ + { + uint64_t b0,b1,b2,b3,b4,b5,b6,b7; + b0 = t& 0x1; + b1 = (t & 0x2) >> 1; + b2 = (t & 0x4) >> 2; + b3 = (t & 0x8) >> 3; + b4 = (t & 0x10) >> 4; + b5 = (t & 0x20) >> 5; + b6 = (t & 0x40) >> 6; + b7 = (t & 0x80) >> 7; + data = (EXPAND_BYTE(b7) << 56) | + (EXPAND_BYTE(b6) << 48) | + (EXPAND_BYTE(b5) << 40) | + (EXPAND_BYTE(b4) << 32) | + (EXPAND_BYTE(b3) << 24) | + (EXPAND_BYTE(b2) << 16) | + (EXPAND_BYTE(b1) << 8) | + EXPAND_BYTE(b0); + } + break; + case 10: + /* data: {2{imm[7], ~imm[6], {5{imm[6]}}, imm[5:0], 19'0}} */ + { + uint64_t b6, b7; + uint64_t t0, t1; + b6 = (imm & 0x40) >> 6; + b7 = (imm & 0x80) >> 7; + t0 = (imm & 0x3f); + t1 = (b7 << 6) | ((1-b6) << 5) | (uint64_t)(b6 ? 0x1f : 0); + data = (t1 << 57) | (t0 << 51) | (t1 << 25) | (t0 << 19); + } + break; + case 11: + /* data: {32'0, imm[7], ~{imm[6]}, 5{imm[6]}, imm[5:0], 19'0} */ + { + uint64_t b6,b7; + uint64_t t0, t1; + b6 = (imm & 0x40) >> 6; + b7 = (imm & 0x80) >> 7; + t0 = (imm & 0x3f); + t1 = (b7 << 6) | ((1-b6) << 5) | (b6 ? 0x1f : 0); + data = (t1 << 25) | (t0 << 19); + } + break; + case 12: + /* data: {imm[7], ~imm[6], 8{imm[6]}, imm[5:0], 48'0} */ + { + uint64_t b6,b7; + uint64_t t0, t1; + b6 = (imm & 0x40) >> 6; + b7 = (imm & 0x80) >> 7; + t0 = (imm & 0x3f); + t1 = (b7 << 9) | ((1-b6) << 8) | (b6 ? 0xff : 0); + data = (t1 << 54) | (t0 << 48); + } + break; + default: + generate_exception(ctx, EXCCODE_INE); + g_assert_not_reached(); + } + return data; +} + +static bool gen_vldi(DisasContext *ctx, arg_vldi *a, uint32_t oprsz) +{ + int sel, vece; + uint64_t value; + + if (!check_vec(ctx, oprsz)) { + return true; + } + + sel = (a->imm >> 12) & 0x1; + + if (sel) { + value = vldi_get_value(ctx, a->imm); + vece = MO_64; + } else { + value = ((int32_t)(a->imm << 22)) >> 22; + vece = (a->imm >> 10) & 0x3; + } + + tcg_gen_gvec_dup_i64(vece, vec_full_offset(a->vd), oprsz, ctx->vl/8, + tcg_constant_i64(value)); + return true; +} + +TRANS(vldi, LSX, gen_vldi, 16) +TRANS(xvldi, LASX, gen_vldi, 32) + +static bool gen_vandn_v(DisasContext *ctx, arg_vvv *a, uint32_t oprsz) +{ + uint32_t vd_ofs, vj_ofs, vk_ofs; + + if (!check_vec(ctx, oprsz)) { + return true; + } + + vd_ofs = vec_full_offset(a->vd); + vj_ofs = vec_full_offset(a->vj); + vk_ofs = vec_full_offset(a->vk); + + tcg_gen_gvec_andc(MO_64, vd_ofs, vk_ofs, vj_ofs, oprsz, ctx->vl / 8); + return true; +} + +static void gen_vnori(unsigned vece, TCGv_vec t, TCGv_vec a, int64_t imm) +{ + TCGv_vec t1; + + t1 = tcg_constant_vec_matching(t, vece, imm); + tcg_gen_nor_vec(vece, t, a, t1); +} + +static void gen_vnori_b(TCGv_i64 t, TCGv_i64 a, int64_t imm) +{ + tcg_gen_movi_i64(t, dup_const(MO_8, imm)); + tcg_gen_nor_i64(t, a, t); +} + +static void do_vnori_b(unsigned vece, uint32_t vd_ofs, uint32_t vj_ofs, + int64_t imm, uint32_t oprsz, uint32_t maxsz) +{ + static const TCGOpcode vecop_list[] = { + INDEX_op_nor_vec, 0 + }; + static const GVecGen2i op = { + .fni8 = gen_vnori_b, + .fniv = gen_vnori, + .fnoi = gen_helper_vnori_b, + .opt_opc = vecop_list, + .vece = MO_8 + }; + + tcg_gen_gvec_2i(vd_ofs, vj_ofs, oprsz, maxsz, imm, &op); +} + +TRANS(vand_v, LSX, gvec_vvv, MO_64, tcg_gen_gvec_and) +TRANS(vor_v, LSX, gvec_vvv, MO_64, tcg_gen_gvec_or) +TRANS(vxor_v, LSX, gvec_vvv, MO_64, tcg_gen_gvec_xor) +TRANS(vnor_v, LSX, gvec_vvv, MO_64, tcg_gen_gvec_nor) +TRANS(vandn_v, LSX, gen_vandn_v, 16) +TRANS(vorn_v, LSX, gvec_vvv, MO_64, tcg_gen_gvec_orc) +TRANS(vandi_b, LSX, gvec_vv_i, MO_8, tcg_gen_gvec_andi) +TRANS(vori_b, LSX, gvec_vv_i, MO_8, tcg_gen_gvec_ori) +TRANS(vxori_b, LSX, gvec_vv_i, MO_8, tcg_gen_gvec_xori) +TRANS(vnori_b, LSX, gvec_vv_i, MO_8, do_vnori_b) +TRANS(xvand_v, LASX, gvec_xxx, MO_64, tcg_gen_gvec_and) +TRANS(xvor_v, LASX, gvec_xxx, MO_64, tcg_gen_gvec_or) +TRANS(xvxor_v, LASX, gvec_xxx, MO_64, tcg_gen_gvec_xor) +TRANS(xvnor_v, LASX, gvec_xxx, MO_64, tcg_gen_gvec_nor) +TRANS(xvandn_v, LASX, gen_vandn_v, 32) +TRANS(xvorn_v, LASX, gvec_xxx, MO_64, tcg_gen_gvec_orc) +TRANS(xvandi_b, LASX, gvec_xx_i, MO_8, tcg_gen_gvec_andi) +TRANS(xvori_b, LASX, gvec_xx_i, MO_8, tcg_gen_gvec_ori) +TRANS(xvxori_b, LASX, gvec_xx_i, MO_8, tcg_gen_gvec_xori) +TRANS(xvnori_b, LASX, gvec_xx_i, MO_8, do_vnori_b) + +TRANS(vsll_b, LSX, gvec_vvv, MO_8, tcg_gen_gvec_shlv) +TRANS(vsll_h, LSX, gvec_vvv, MO_16, tcg_gen_gvec_shlv) +TRANS(vsll_w, LSX, gvec_vvv, MO_32, tcg_gen_gvec_shlv) +TRANS(vsll_d, LSX, gvec_vvv, MO_64, tcg_gen_gvec_shlv) +TRANS(vslli_b, LSX, gvec_vv_i, MO_8, tcg_gen_gvec_shli) +TRANS(vslli_h, LSX, gvec_vv_i, MO_16, tcg_gen_gvec_shli) +TRANS(vslli_w, LSX, gvec_vv_i, MO_32, tcg_gen_gvec_shli) +TRANS(vslli_d, LSX, gvec_vv_i, MO_64, tcg_gen_gvec_shli) +TRANS(xvsll_b, LASX, gvec_xxx, MO_8, tcg_gen_gvec_shlv) +TRANS(xvsll_h, LASX, gvec_xxx, MO_16, tcg_gen_gvec_shlv) +TRANS(xvsll_w, LASX, gvec_xxx, MO_32, tcg_gen_gvec_shlv) +TRANS(xvsll_d, LASX, gvec_xxx, MO_64, tcg_gen_gvec_shlv) +TRANS(xvslli_b, LASX, gvec_xx_i, MO_8, tcg_gen_gvec_shli) +TRANS(xvslli_h, LASX, gvec_xx_i, MO_16, tcg_gen_gvec_shli) +TRANS(xvslli_w, LASX, gvec_xx_i, MO_32, tcg_gen_gvec_shli) +TRANS(xvslli_d, LASX, gvec_xx_i, MO_64, tcg_gen_gvec_shli) + +TRANS(vsrl_b, LSX, gvec_vvv, MO_8, tcg_gen_gvec_shrv) +TRANS(vsrl_h, LSX, gvec_vvv, MO_16, tcg_gen_gvec_shrv) +TRANS(vsrl_w, LSX, gvec_vvv, MO_32, tcg_gen_gvec_shrv) +TRANS(vsrl_d, LSX, gvec_vvv, MO_64, tcg_gen_gvec_shrv) +TRANS(vsrli_b, LSX, gvec_vv_i, MO_8, tcg_gen_gvec_shri) +TRANS(vsrli_h, LSX, gvec_vv_i, MO_16, tcg_gen_gvec_shri) +TRANS(vsrli_w, LSX, gvec_vv_i, MO_32, tcg_gen_gvec_shri) +TRANS(vsrli_d, LSX, gvec_vv_i, MO_64, tcg_gen_gvec_shri) +TRANS(xvsrl_b, LASX, gvec_xxx, MO_8, tcg_gen_gvec_shrv) +TRANS(xvsrl_h, LASX, gvec_xxx, MO_16, tcg_gen_gvec_shrv) +TRANS(xvsrl_w, LASX, gvec_xxx, MO_32, tcg_gen_gvec_shrv) +TRANS(xvsrl_d, LASX, gvec_xxx, MO_64, tcg_gen_gvec_shrv) +TRANS(xvsrli_b, LASX, gvec_xx_i, MO_8, tcg_gen_gvec_shri) +TRANS(xvsrli_h, LASX, gvec_xx_i, MO_16, tcg_gen_gvec_shri) +TRANS(xvsrli_w, LASX, gvec_xx_i, MO_32, tcg_gen_gvec_shri) +TRANS(xvsrli_d, LASX, gvec_xx_i, MO_64, tcg_gen_gvec_shri) + +TRANS(vsra_b, LSX, gvec_vvv, MO_8, tcg_gen_gvec_sarv) +TRANS(vsra_h, LSX, gvec_vvv, MO_16, tcg_gen_gvec_sarv) +TRANS(vsra_w, LSX, gvec_vvv, MO_32, tcg_gen_gvec_sarv) +TRANS(vsra_d, LSX, gvec_vvv, MO_64, tcg_gen_gvec_sarv) +TRANS(vsrai_b, LSX, gvec_vv_i, MO_8, tcg_gen_gvec_sari) +TRANS(vsrai_h, LSX, gvec_vv_i, MO_16, tcg_gen_gvec_sari) +TRANS(vsrai_w, LSX, gvec_vv_i, MO_32, tcg_gen_gvec_sari) +TRANS(vsrai_d, LSX, gvec_vv_i, MO_64, tcg_gen_gvec_sari) +TRANS(xvsra_b, LASX, gvec_xxx, MO_8, tcg_gen_gvec_sarv) +TRANS(xvsra_h, LASX, gvec_xxx, MO_16, tcg_gen_gvec_sarv) +TRANS(xvsra_w, LASX, gvec_xxx, MO_32, tcg_gen_gvec_sarv) +TRANS(xvsra_d, LASX, gvec_xxx, MO_64, tcg_gen_gvec_sarv) +TRANS(xvsrai_b, LASX, gvec_xx_i, MO_8, tcg_gen_gvec_sari) +TRANS(xvsrai_h, LASX, gvec_xx_i, MO_16, tcg_gen_gvec_sari) +TRANS(xvsrai_w, LASX, gvec_xx_i, MO_32, tcg_gen_gvec_sari) +TRANS(xvsrai_d, LASX, gvec_xx_i, MO_64, tcg_gen_gvec_sari) + +TRANS(vrotr_b, LSX, gvec_vvv, MO_8, tcg_gen_gvec_rotrv) +TRANS(vrotr_h, LSX, gvec_vvv, MO_16, tcg_gen_gvec_rotrv) +TRANS(vrotr_w, LSX, gvec_vvv, MO_32, tcg_gen_gvec_rotrv) +TRANS(vrotr_d, LSX, gvec_vvv, MO_64, tcg_gen_gvec_rotrv) +TRANS(vrotri_b, LSX, gvec_vv_i, MO_8, tcg_gen_gvec_rotri) +TRANS(vrotri_h, LSX, gvec_vv_i, MO_16, tcg_gen_gvec_rotri) +TRANS(vrotri_w, LSX, gvec_vv_i, MO_32, tcg_gen_gvec_rotri) +TRANS(vrotri_d, LSX, gvec_vv_i, MO_64, tcg_gen_gvec_rotri) +TRANS(xvrotr_b, LASX, gvec_xxx, MO_8, tcg_gen_gvec_rotrv) +TRANS(xvrotr_h, LASX, gvec_xxx, MO_16, tcg_gen_gvec_rotrv) +TRANS(xvrotr_w, LASX, gvec_xxx, MO_32, tcg_gen_gvec_rotrv) +TRANS(xvrotr_d, LASX, gvec_xxx, MO_64, tcg_gen_gvec_rotrv) +TRANS(xvrotri_b, LASX, gvec_xx_i, MO_8, tcg_gen_gvec_rotri) +TRANS(xvrotri_h, LASX, gvec_xx_i, MO_16, tcg_gen_gvec_rotri) +TRANS(xvrotri_w, LASX, gvec_xx_i, MO_32, tcg_gen_gvec_rotri) +TRANS(xvrotri_d, LASX, gvec_xx_i, MO_64, tcg_gen_gvec_rotri) + +TRANS(vsllwil_h_b, LSX, gen_vv_i, gen_helper_vsllwil_h_b) +TRANS(vsllwil_w_h, LSX, gen_vv_i, gen_helper_vsllwil_w_h) +TRANS(vsllwil_d_w, LSX, gen_vv_i, gen_helper_vsllwil_d_w) +TRANS(vextl_q_d, LSX, gen_vv, gen_helper_vextl_q_d) +TRANS(vsllwil_hu_bu, LSX, gen_vv_i, gen_helper_vsllwil_hu_bu) +TRANS(vsllwil_wu_hu, LSX, gen_vv_i, gen_helper_vsllwil_wu_hu) +TRANS(vsllwil_du_wu, LSX, gen_vv_i, gen_helper_vsllwil_du_wu) +TRANS(vextl_qu_du, LSX, gen_vv, gen_helper_vextl_qu_du) +TRANS(xvsllwil_h_b, LASX, gen_xx_i, gen_helper_vsllwil_h_b) +TRANS(xvsllwil_w_h, LASX, gen_xx_i, gen_helper_vsllwil_w_h) +TRANS(xvsllwil_d_w, LASX, gen_xx_i, gen_helper_vsllwil_d_w) +TRANS(xvextl_q_d, LASX, gen_xx, gen_helper_vextl_q_d) +TRANS(xvsllwil_hu_bu, LASX, gen_xx_i, gen_helper_vsllwil_hu_bu) +TRANS(xvsllwil_wu_hu, LASX, gen_xx_i, gen_helper_vsllwil_wu_hu) +TRANS(xvsllwil_du_wu, LASX, gen_xx_i, gen_helper_vsllwil_du_wu) +TRANS(xvextl_qu_du, LASX, gen_xx, gen_helper_vextl_qu_du) + +TRANS(vsrlr_b, LSX, gen_vvv, gen_helper_vsrlr_b) +TRANS(vsrlr_h, LSX, gen_vvv, gen_helper_vsrlr_h) +TRANS(vsrlr_w, LSX, gen_vvv, gen_helper_vsrlr_w) +TRANS(vsrlr_d, LSX, gen_vvv, gen_helper_vsrlr_d) +TRANS(vsrlri_b, LSX, gen_vv_i, gen_helper_vsrlri_b) +TRANS(vsrlri_h, LSX, gen_vv_i, gen_helper_vsrlri_h) +TRANS(vsrlri_w, LSX, gen_vv_i, gen_helper_vsrlri_w) +TRANS(vsrlri_d, LSX, gen_vv_i, gen_helper_vsrlri_d) +TRANS(xvsrlr_b, LASX, gen_xxx, gen_helper_vsrlr_b) +TRANS(xvsrlr_h, LASX, gen_xxx, gen_helper_vsrlr_h) +TRANS(xvsrlr_w, LASX, gen_xxx, gen_helper_vsrlr_w) +TRANS(xvsrlr_d, LASX, gen_xxx, gen_helper_vsrlr_d) +TRANS(xvsrlri_b, LASX, gen_xx_i, gen_helper_vsrlri_b) +TRANS(xvsrlri_h, LASX, gen_xx_i, gen_helper_vsrlri_h) +TRANS(xvsrlri_w, LASX, gen_xx_i, gen_helper_vsrlri_w) +TRANS(xvsrlri_d, LASX, gen_xx_i, gen_helper_vsrlri_d) + +TRANS(vsrar_b, LSX, gen_vvv, gen_helper_vsrar_b) +TRANS(vsrar_h, LSX, gen_vvv, gen_helper_vsrar_h) +TRANS(vsrar_w, LSX, gen_vvv, gen_helper_vsrar_w) +TRANS(vsrar_d, LSX, gen_vvv, gen_helper_vsrar_d) +TRANS(vsrari_b, LSX, gen_vv_i, gen_helper_vsrari_b) +TRANS(vsrari_h, LSX, gen_vv_i, gen_helper_vsrari_h) +TRANS(vsrari_w, LSX, gen_vv_i, gen_helper_vsrari_w) +TRANS(vsrari_d, LSX, gen_vv_i, gen_helper_vsrari_d) +TRANS(xvsrar_b, LASX, gen_xxx, gen_helper_vsrar_b) +TRANS(xvsrar_h, LASX, gen_xxx, gen_helper_vsrar_h) +TRANS(xvsrar_w, LASX, gen_xxx, gen_helper_vsrar_w) +TRANS(xvsrar_d, LASX, gen_xxx, gen_helper_vsrar_d) +TRANS(xvsrari_b, LASX, gen_xx_i, gen_helper_vsrari_b) +TRANS(xvsrari_h, LASX, gen_xx_i, gen_helper_vsrari_h) +TRANS(xvsrari_w, LASX, gen_xx_i, gen_helper_vsrari_w) +TRANS(xvsrari_d, LASX, gen_xx_i, gen_helper_vsrari_d) + +TRANS(vsrln_b_h, LSX, gen_vvv, gen_helper_vsrln_b_h) +TRANS(vsrln_h_w, LSX, gen_vvv, gen_helper_vsrln_h_w) +TRANS(vsrln_w_d, LSX, gen_vvv, gen_helper_vsrln_w_d) +TRANS(vsran_b_h, LSX, gen_vvv, gen_helper_vsran_b_h) +TRANS(vsran_h_w, LSX, gen_vvv, gen_helper_vsran_h_w) +TRANS(vsran_w_d, LSX, gen_vvv, gen_helper_vsran_w_d) +TRANS(xvsrln_b_h, LASX, gen_xxx, gen_helper_vsrln_b_h) +TRANS(xvsrln_h_w, LASX, gen_xxx, gen_helper_vsrln_h_w) +TRANS(xvsrln_w_d, LASX, gen_xxx, gen_helper_vsrln_w_d) +TRANS(xvsran_b_h, LASX, gen_xxx, gen_helper_vsran_b_h) +TRANS(xvsran_h_w, LASX, gen_xxx, gen_helper_vsran_h_w) +TRANS(xvsran_w_d, LASX, gen_xxx, gen_helper_vsran_w_d) + +TRANS(vsrlni_b_h, LSX, gen_vv_i, gen_helper_vsrlni_b_h) +TRANS(vsrlni_h_w, LSX, gen_vv_i, gen_helper_vsrlni_h_w) +TRANS(vsrlni_w_d, LSX, gen_vv_i, gen_helper_vsrlni_w_d) +TRANS(vsrlni_d_q, LSX, gen_vv_i, gen_helper_vsrlni_d_q) +TRANS(vsrani_b_h, LSX, gen_vv_i, gen_helper_vsrani_b_h) +TRANS(vsrani_h_w, LSX, gen_vv_i, gen_helper_vsrani_h_w) +TRANS(vsrani_w_d, LSX, gen_vv_i, gen_helper_vsrani_w_d) +TRANS(vsrani_d_q, LSX, gen_vv_i, gen_helper_vsrani_d_q) +TRANS(xvsrlni_b_h, LASX, gen_xx_i, gen_helper_vsrlni_b_h) +TRANS(xvsrlni_h_w, LASX, gen_xx_i, gen_helper_vsrlni_h_w) +TRANS(xvsrlni_w_d, LASX, gen_xx_i, gen_helper_vsrlni_w_d) +TRANS(xvsrlni_d_q, LASX, gen_xx_i, gen_helper_vsrlni_d_q) +TRANS(xvsrani_b_h, LASX, gen_xx_i, gen_helper_vsrani_b_h) +TRANS(xvsrani_h_w, LASX, gen_xx_i, gen_helper_vsrani_h_w) +TRANS(xvsrani_w_d, LASX, gen_xx_i, gen_helper_vsrani_w_d) +TRANS(xvsrani_d_q, LASX, gen_xx_i, gen_helper_vsrani_d_q) + +TRANS(vsrlrn_b_h, LSX, gen_vvv, gen_helper_vsrlrn_b_h) +TRANS(vsrlrn_h_w, LSX, gen_vvv, gen_helper_vsrlrn_h_w) +TRANS(vsrlrn_w_d, LSX, gen_vvv, gen_helper_vsrlrn_w_d) +TRANS(vsrarn_b_h, LSX, gen_vvv, gen_helper_vsrarn_b_h) +TRANS(vsrarn_h_w, LSX, gen_vvv, gen_helper_vsrarn_h_w) +TRANS(vsrarn_w_d, LSX, gen_vvv, gen_helper_vsrarn_w_d) +TRANS(xvsrlrn_b_h, LASX, gen_xxx, gen_helper_vsrlrn_b_h) +TRANS(xvsrlrn_h_w, LASX, gen_xxx, gen_helper_vsrlrn_h_w) +TRANS(xvsrlrn_w_d, LASX, gen_xxx, gen_helper_vsrlrn_w_d) +TRANS(xvsrarn_b_h, LASX, gen_xxx, gen_helper_vsrarn_b_h) +TRANS(xvsrarn_h_w, LASX, gen_xxx, gen_helper_vsrarn_h_w) +TRANS(xvsrarn_w_d, LASX, gen_xxx, gen_helper_vsrarn_w_d) + +TRANS(vsrlrni_b_h, LSX, gen_vv_i, gen_helper_vsrlrni_b_h) +TRANS(vsrlrni_h_w, LSX, gen_vv_i, gen_helper_vsrlrni_h_w) +TRANS(vsrlrni_w_d, LSX, gen_vv_i, gen_helper_vsrlrni_w_d) +TRANS(vsrlrni_d_q, LSX, gen_vv_i, gen_helper_vsrlrni_d_q) +TRANS(vsrarni_b_h, LSX, gen_vv_i, gen_helper_vsrarni_b_h) +TRANS(vsrarni_h_w, LSX, gen_vv_i, gen_helper_vsrarni_h_w) +TRANS(vsrarni_w_d, LSX, gen_vv_i, gen_helper_vsrarni_w_d) +TRANS(vsrarni_d_q, LSX, gen_vv_i, gen_helper_vsrarni_d_q) +TRANS(xvsrlrni_b_h, LASX, gen_xx_i, gen_helper_vsrlrni_b_h) +TRANS(xvsrlrni_h_w, LASX, gen_xx_i, gen_helper_vsrlrni_h_w) +TRANS(xvsrlrni_w_d, LASX, gen_xx_i, gen_helper_vsrlrni_w_d) +TRANS(xvsrlrni_d_q, LASX, gen_xx_i, gen_helper_vsrlrni_d_q) +TRANS(xvsrarni_b_h, LASX, gen_xx_i, gen_helper_vsrarni_b_h) +TRANS(xvsrarni_h_w, LASX, gen_xx_i, gen_helper_vsrarni_h_w) +TRANS(xvsrarni_w_d, LASX, gen_xx_i, gen_helper_vsrarni_w_d) +TRANS(xvsrarni_d_q, LASX, gen_xx_i, gen_helper_vsrarni_d_q) + +TRANS(vssrln_b_h, LSX, gen_vvv, gen_helper_vssrln_b_h) +TRANS(vssrln_h_w, LSX, gen_vvv, gen_helper_vssrln_h_w) +TRANS(vssrln_w_d, LSX, gen_vvv, gen_helper_vssrln_w_d) +TRANS(vssran_b_h, LSX, gen_vvv, gen_helper_vssran_b_h) +TRANS(vssran_h_w, LSX, gen_vvv, gen_helper_vssran_h_w) +TRANS(vssran_w_d, LSX, gen_vvv, gen_helper_vssran_w_d) +TRANS(vssrln_bu_h, LSX, gen_vvv, gen_helper_vssrln_bu_h) +TRANS(vssrln_hu_w, LSX, gen_vvv, gen_helper_vssrln_hu_w) +TRANS(vssrln_wu_d, LSX, gen_vvv, gen_helper_vssrln_wu_d) +TRANS(vssran_bu_h, LSX, gen_vvv, gen_helper_vssran_bu_h) +TRANS(vssran_hu_w, LSX, gen_vvv, gen_helper_vssran_hu_w) +TRANS(vssran_wu_d, LSX, gen_vvv, gen_helper_vssran_wu_d) +TRANS(xvssrln_b_h, LASX, gen_xxx, gen_helper_vssrln_b_h) +TRANS(xvssrln_h_w, LASX, gen_xxx, gen_helper_vssrln_h_w) +TRANS(xvssrln_w_d, LASX, gen_xxx, gen_helper_vssrln_w_d) +TRANS(xvssran_b_h, LASX, gen_xxx, gen_helper_vssran_b_h) +TRANS(xvssran_h_w, LASX, gen_xxx, gen_helper_vssran_h_w) +TRANS(xvssran_w_d, LASX, gen_xxx, gen_helper_vssran_w_d) +TRANS(xvssrln_bu_h, LASX, gen_xxx, gen_helper_vssrln_bu_h) +TRANS(xvssrln_hu_w, LASX, gen_xxx, gen_helper_vssrln_hu_w) +TRANS(xvssrln_wu_d, LASX, gen_xxx, gen_helper_vssrln_wu_d) +TRANS(xvssran_bu_h, LASX, gen_xxx, gen_helper_vssran_bu_h) +TRANS(xvssran_hu_w, LASX, gen_xxx, gen_helper_vssran_hu_w) +TRANS(xvssran_wu_d, LASX, gen_xxx, gen_helper_vssran_wu_d) + +TRANS(vssrlni_b_h, LSX, gen_vv_i, gen_helper_vssrlni_b_h) +TRANS(vssrlni_h_w, LSX, gen_vv_i, gen_helper_vssrlni_h_w) +TRANS(vssrlni_w_d, LSX, gen_vv_i, gen_helper_vssrlni_w_d) +TRANS(vssrlni_d_q, LSX, gen_vv_i, gen_helper_vssrlni_d_q) +TRANS(vssrani_b_h, LSX, gen_vv_i, gen_helper_vssrani_b_h) +TRANS(vssrani_h_w, LSX, gen_vv_i, gen_helper_vssrani_h_w) +TRANS(vssrani_w_d, LSX, gen_vv_i, gen_helper_vssrani_w_d) +TRANS(vssrani_d_q, LSX, gen_vv_i, gen_helper_vssrani_d_q) +TRANS(vssrlni_bu_h, LSX, gen_vv_i, gen_helper_vssrlni_bu_h) +TRANS(vssrlni_hu_w, LSX, gen_vv_i, gen_helper_vssrlni_hu_w) +TRANS(vssrlni_wu_d, LSX, gen_vv_i, gen_helper_vssrlni_wu_d) +TRANS(vssrlni_du_q, LSX, gen_vv_i, gen_helper_vssrlni_du_q) +TRANS(vssrani_bu_h, LSX, gen_vv_i, gen_helper_vssrani_bu_h) +TRANS(vssrani_hu_w, LSX, gen_vv_i, gen_helper_vssrani_hu_w) +TRANS(vssrani_wu_d, LSX, gen_vv_i, gen_helper_vssrani_wu_d) +TRANS(vssrani_du_q, LSX, gen_vv_i, gen_helper_vssrani_du_q) +TRANS(xvssrlni_b_h, LASX, gen_xx_i, gen_helper_vssrlni_b_h) +TRANS(xvssrlni_h_w, LASX, gen_xx_i, gen_helper_vssrlni_h_w) +TRANS(xvssrlni_w_d, LASX, gen_xx_i, gen_helper_vssrlni_w_d) +TRANS(xvssrlni_d_q, LASX, gen_xx_i, gen_helper_vssrlni_d_q) +TRANS(xvssrani_b_h, LASX, gen_xx_i, gen_helper_vssrani_b_h) +TRANS(xvssrani_h_w, LASX, gen_xx_i, gen_helper_vssrani_h_w) +TRANS(xvssrani_w_d, LASX, gen_xx_i, gen_helper_vssrani_w_d) +TRANS(xvssrani_d_q, LASX, gen_xx_i, gen_helper_vssrani_d_q) +TRANS(xvssrlni_bu_h, LASX, gen_xx_i, gen_helper_vssrlni_bu_h) +TRANS(xvssrlni_hu_w, LASX, gen_xx_i, gen_helper_vssrlni_hu_w) +TRANS(xvssrlni_wu_d, LASX, gen_xx_i, gen_helper_vssrlni_wu_d) +TRANS(xvssrlni_du_q, LASX, gen_xx_i, gen_helper_vssrlni_du_q) +TRANS(xvssrani_bu_h, LASX, gen_xx_i, gen_helper_vssrani_bu_h) +TRANS(xvssrani_hu_w, LASX, gen_xx_i, gen_helper_vssrani_hu_w) +TRANS(xvssrani_wu_d, LASX, gen_xx_i, gen_helper_vssrani_wu_d) +TRANS(xvssrani_du_q, LASX, gen_xx_i, gen_helper_vssrani_du_q) + +TRANS(vssrlrn_b_h, LSX, gen_vvv, gen_helper_vssrlrn_b_h) +TRANS(vssrlrn_h_w, LSX, gen_vvv, gen_helper_vssrlrn_h_w) +TRANS(vssrlrn_w_d, LSX, gen_vvv, gen_helper_vssrlrn_w_d) +TRANS(vssrarn_b_h, LSX, gen_vvv, gen_helper_vssrarn_b_h) +TRANS(vssrarn_h_w, LSX, gen_vvv, gen_helper_vssrarn_h_w) +TRANS(vssrarn_w_d, LSX, gen_vvv, gen_helper_vssrarn_w_d) +TRANS(vssrlrn_bu_h, LSX, gen_vvv, gen_helper_vssrlrn_bu_h) +TRANS(vssrlrn_hu_w, LSX, gen_vvv, gen_helper_vssrlrn_hu_w) +TRANS(vssrlrn_wu_d, LSX, gen_vvv, gen_helper_vssrlrn_wu_d) +TRANS(vssrarn_bu_h, LSX, gen_vvv, gen_helper_vssrarn_bu_h) +TRANS(vssrarn_hu_w, LSX, gen_vvv, gen_helper_vssrarn_hu_w) +TRANS(vssrarn_wu_d, LSX, gen_vvv, gen_helper_vssrarn_wu_d) +TRANS(xvssrlrn_b_h, LASX, gen_xxx, gen_helper_vssrlrn_b_h) +TRANS(xvssrlrn_h_w, LASX, gen_xxx, gen_helper_vssrlrn_h_w) +TRANS(xvssrlrn_w_d, LASX, gen_xxx, gen_helper_vssrlrn_w_d) +TRANS(xvssrarn_b_h, LASX, gen_xxx, gen_helper_vssrarn_b_h) +TRANS(xvssrarn_h_w, LASX, gen_xxx, gen_helper_vssrarn_h_w) +TRANS(xvssrarn_w_d, LASX, gen_xxx, gen_helper_vssrarn_w_d) +TRANS(xvssrlrn_bu_h, LASX, gen_xxx, gen_helper_vssrlrn_bu_h) +TRANS(xvssrlrn_hu_w, LASX, gen_xxx, gen_helper_vssrlrn_hu_w) +TRANS(xvssrlrn_wu_d, LASX, gen_xxx, gen_helper_vssrlrn_wu_d) +TRANS(xvssrarn_bu_h, LASX, gen_xxx, gen_helper_vssrarn_bu_h) +TRANS(xvssrarn_hu_w, LASX, gen_xxx, gen_helper_vssrarn_hu_w) +TRANS(xvssrarn_wu_d, LASX, gen_xxx, gen_helper_vssrarn_wu_d) + +TRANS(vssrlrni_b_h, LSX, gen_vv_i, gen_helper_vssrlrni_b_h) +TRANS(vssrlrni_h_w, LSX, gen_vv_i, gen_helper_vssrlrni_h_w) +TRANS(vssrlrni_w_d, LSX, gen_vv_i, gen_helper_vssrlrni_w_d) +TRANS(vssrlrni_d_q, LSX, gen_vv_i, gen_helper_vssrlrni_d_q) +TRANS(vssrarni_b_h, LSX, gen_vv_i, gen_helper_vssrarni_b_h) +TRANS(vssrarni_h_w, LSX, gen_vv_i, gen_helper_vssrarni_h_w) +TRANS(vssrarni_w_d, LSX, gen_vv_i, gen_helper_vssrarni_w_d) +TRANS(vssrarni_d_q, LSX, gen_vv_i, gen_helper_vssrarni_d_q) +TRANS(vssrlrni_bu_h, LSX, gen_vv_i, gen_helper_vssrlrni_bu_h) +TRANS(vssrlrni_hu_w, LSX, gen_vv_i, gen_helper_vssrlrni_hu_w) +TRANS(vssrlrni_wu_d, LSX, gen_vv_i, gen_helper_vssrlrni_wu_d) +TRANS(vssrlrni_du_q, LSX, gen_vv_i, gen_helper_vssrlrni_du_q) +TRANS(vssrarni_bu_h, LSX, gen_vv_i, gen_helper_vssrarni_bu_h) +TRANS(vssrarni_hu_w, LSX, gen_vv_i, gen_helper_vssrarni_hu_w) +TRANS(vssrarni_wu_d, LSX, gen_vv_i, gen_helper_vssrarni_wu_d) +TRANS(vssrarni_du_q, LSX, gen_vv_i, gen_helper_vssrarni_du_q) +TRANS(xvssrlrni_b_h, LASX, gen_xx_i, gen_helper_vssrlrni_b_h) +TRANS(xvssrlrni_h_w, LASX, gen_xx_i, gen_helper_vssrlrni_h_w) +TRANS(xvssrlrni_w_d, LASX, gen_xx_i, gen_helper_vssrlrni_w_d) +TRANS(xvssrlrni_d_q, LASX, gen_xx_i, gen_helper_vssrlrni_d_q) +TRANS(xvssrarni_b_h, LASX, gen_xx_i, gen_helper_vssrarni_b_h) +TRANS(xvssrarni_h_w, LASX, gen_xx_i, gen_helper_vssrarni_h_w) +TRANS(xvssrarni_w_d, LASX, gen_xx_i, gen_helper_vssrarni_w_d) +TRANS(xvssrarni_d_q, LASX, gen_xx_i, gen_helper_vssrarni_d_q) +TRANS(xvssrlrni_bu_h, LASX, gen_xx_i, gen_helper_vssrlrni_bu_h) +TRANS(xvssrlrni_hu_w, LASX, gen_xx_i, gen_helper_vssrlrni_hu_w) +TRANS(xvssrlrni_wu_d, LASX, gen_xx_i, gen_helper_vssrlrni_wu_d) +TRANS(xvssrlrni_du_q, LASX, gen_xx_i, gen_helper_vssrlrni_du_q) +TRANS(xvssrarni_bu_h, LASX, gen_xx_i, gen_helper_vssrarni_bu_h) +TRANS(xvssrarni_hu_w, LASX, gen_xx_i, gen_helper_vssrarni_hu_w) +TRANS(xvssrarni_wu_d, LASX, gen_xx_i, gen_helper_vssrarni_wu_d) +TRANS(xvssrarni_du_q, LASX, gen_xx_i, gen_helper_vssrarni_du_q) + +TRANS(vclo_b, LSX, gen_vv, gen_helper_vclo_b) +TRANS(vclo_h, LSX, gen_vv, gen_helper_vclo_h) +TRANS(vclo_w, LSX, gen_vv, gen_helper_vclo_w) +TRANS(vclo_d, LSX, gen_vv, gen_helper_vclo_d) +TRANS(vclz_b, LSX, gen_vv, gen_helper_vclz_b) +TRANS(vclz_h, LSX, gen_vv, gen_helper_vclz_h) +TRANS(vclz_w, LSX, gen_vv, gen_helper_vclz_w) +TRANS(vclz_d, LSX, gen_vv, gen_helper_vclz_d) +TRANS(xvclo_b, LASX, gen_xx, gen_helper_vclo_b) +TRANS(xvclo_h, LASX, gen_xx, gen_helper_vclo_h) +TRANS(xvclo_w, LASX, gen_xx, gen_helper_vclo_w) +TRANS(xvclo_d, LASX, gen_xx, gen_helper_vclo_d) +TRANS(xvclz_b, LASX, gen_xx, gen_helper_vclz_b) +TRANS(xvclz_h, LASX, gen_xx, gen_helper_vclz_h) +TRANS(xvclz_w, LASX, gen_xx, gen_helper_vclz_w) +TRANS(xvclz_d, LASX, gen_xx, gen_helper_vclz_d) + +TRANS(vpcnt_b, LSX, gen_vv, gen_helper_vpcnt_b) +TRANS(vpcnt_h, LSX, gen_vv, gen_helper_vpcnt_h) +TRANS(vpcnt_w, LSX, gen_vv, gen_helper_vpcnt_w) +TRANS(vpcnt_d, LSX, gen_vv, gen_helper_vpcnt_d) +TRANS(xvpcnt_b, LASX, gen_xx, gen_helper_vpcnt_b) +TRANS(xvpcnt_h, LASX, gen_xx, gen_helper_vpcnt_h) +TRANS(xvpcnt_w, LASX, gen_xx, gen_helper_vpcnt_w) +TRANS(xvpcnt_d, LASX, gen_xx, gen_helper_vpcnt_d) + +static void do_vbit(unsigned vece, TCGv_vec t, TCGv_vec a, TCGv_vec b, + void (*func)(unsigned, TCGv_vec, TCGv_vec, TCGv_vec)) +{ + TCGv_vec mask, lsh, t1, one; + + lsh = tcg_temp_new_vec_matching(t); + t1 = tcg_temp_new_vec_matching(t); + mask = tcg_constant_vec_matching(t, vece, (8 << vece) - 1); + one = tcg_constant_vec_matching(t, vece, 1); + + tcg_gen_and_vec(vece, lsh, b, mask); + tcg_gen_shlv_vec(vece, t1, one, lsh); + func(vece, t, a, t1); +} + +static void gen_vbitclr(unsigned vece, TCGv_vec t, TCGv_vec a, TCGv_vec b) +{ + do_vbit(vece, t, a, b, tcg_gen_andc_vec); +} + +static void gen_vbitset(unsigned vece, TCGv_vec t, TCGv_vec a, TCGv_vec b) +{ + do_vbit(vece, t, a, b, tcg_gen_or_vec); +} + +static void gen_vbitrev(unsigned vece, TCGv_vec t, TCGv_vec a, TCGv_vec b) +{ + do_vbit(vece, t, a, b, tcg_gen_xor_vec); +} + +static void do_vbitclr(unsigned vece, uint32_t vd_ofs, uint32_t vj_ofs, + uint32_t vk_ofs, uint32_t oprsz, uint32_t maxsz) +{ + static const TCGOpcode vecop_list[] = { + INDEX_op_shlv_vec, INDEX_op_andc_vec, 0 + }; + static const GVecGen3 op[4] = { + { + .fniv = gen_vbitclr, + .fno = gen_helper_vbitclr_b, + .opt_opc = vecop_list, + .vece = MO_8 + }, + { + .fniv = gen_vbitclr, + .fno = gen_helper_vbitclr_h, + .opt_opc = vecop_list, + .vece = MO_16 + }, + { + .fniv = gen_vbitclr, + .fno = gen_helper_vbitclr_w, + .opt_opc = vecop_list, + .vece = MO_32 + }, + { + .fniv = gen_vbitclr, + .fno = gen_helper_vbitclr_d, + .opt_opc = vecop_list, + .vece = MO_64 + }, + }; + + tcg_gen_gvec_3(vd_ofs, vj_ofs, vk_ofs, oprsz, maxsz, &op[vece]); +} + +TRANS(vbitclr_b, LSX, gvec_vvv, MO_8, do_vbitclr) +TRANS(vbitclr_h, LSX, gvec_vvv, MO_16, do_vbitclr) +TRANS(vbitclr_w, LSX, gvec_vvv, MO_32, do_vbitclr) +TRANS(vbitclr_d, LSX, gvec_vvv, MO_64, do_vbitclr) +TRANS(xvbitclr_b, LASX, gvec_xxx, MO_8, do_vbitclr) +TRANS(xvbitclr_h, LASX, gvec_xxx, MO_16, do_vbitclr) +TRANS(xvbitclr_w, LASX, gvec_xxx, MO_32, do_vbitclr) +TRANS(xvbitclr_d, LASX, gvec_xxx, MO_64, do_vbitclr) + +static void do_vbiti(unsigned vece, TCGv_vec t, TCGv_vec a, int64_t imm, + void (*func)(unsigned, TCGv_vec, TCGv_vec, TCGv_vec)) +{ + int lsh; + TCGv_vec t1, one; + + lsh = imm & ((8 << vece) -1); + t1 = tcg_temp_new_vec_matching(t); + one = tcg_constant_vec_matching(t, vece, 1); + + tcg_gen_shli_vec(vece, t1, one, lsh); + func(vece, t, a, t1); +} + +static void gen_vbitclri(unsigned vece, TCGv_vec t, TCGv_vec a, int64_t imm) +{ + do_vbiti(vece, t, a, imm, tcg_gen_andc_vec); +} + +static void gen_vbitseti(unsigned vece, TCGv_vec t, TCGv_vec a, int64_t imm) +{ + do_vbiti(vece, t, a, imm, tcg_gen_or_vec); +} + +static void gen_vbitrevi(unsigned vece, TCGv_vec t, TCGv_vec a, int64_t imm) +{ + do_vbiti(vece, t, a, imm, tcg_gen_xor_vec); +} + +static void do_vbitclri(unsigned vece, uint32_t vd_ofs, uint32_t vj_ofs, + int64_t imm, uint32_t oprsz, uint32_t maxsz) +{ + static const TCGOpcode vecop_list[] = { + INDEX_op_shli_vec, INDEX_op_andc_vec, 0 + }; + static const GVecGen2i op[4] = { + { + .fniv = gen_vbitclri, + .fnoi = gen_helper_vbitclri_b, + .opt_opc = vecop_list, + .vece = MO_8 + }, + { + .fniv = gen_vbitclri, + .fnoi = gen_helper_vbitclri_h, + .opt_opc = vecop_list, + .vece = MO_16 + }, + { + .fniv = gen_vbitclri, + .fnoi = gen_helper_vbitclri_w, + .opt_opc = vecop_list, + .vece = MO_32 + }, + { + .fniv = gen_vbitclri, + .fnoi = gen_helper_vbitclri_d, + .opt_opc = vecop_list, + .vece = MO_64 + }, + }; + + tcg_gen_gvec_2i(vd_ofs, vj_ofs, oprsz, maxsz, imm, &op[vece]); +} + +TRANS(vbitclri_b, LSX, gvec_vv_i, MO_8, do_vbitclri) +TRANS(vbitclri_h, LSX, gvec_vv_i, MO_16, do_vbitclri) +TRANS(vbitclri_w, LSX, gvec_vv_i, MO_32, do_vbitclri) +TRANS(vbitclri_d, LSX, gvec_vv_i, MO_64, do_vbitclri) +TRANS(xvbitclri_b, LASX, gvec_xx_i, MO_8, do_vbitclri) +TRANS(xvbitclri_h, LASX, gvec_xx_i, MO_16, do_vbitclri) +TRANS(xvbitclri_w, LASX, gvec_xx_i, MO_32, do_vbitclri) +TRANS(xvbitclri_d, LASX, gvec_xx_i, MO_64, do_vbitclri) + +static void do_vbitset(unsigned vece, uint32_t vd_ofs, uint32_t vj_ofs, + uint32_t vk_ofs, uint32_t oprsz, uint32_t maxsz) +{ + static const TCGOpcode vecop_list[] = { + INDEX_op_shlv_vec, 0 + }; + static const GVecGen3 op[4] = { + { + .fniv = gen_vbitset, + .fno = gen_helper_vbitset_b, + .opt_opc = vecop_list, + .vece = MO_8 + }, + { + .fniv = gen_vbitset, + .fno = gen_helper_vbitset_h, + .opt_opc = vecop_list, + .vece = MO_16 + }, + { + .fniv = gen_vbitset, + .fno = gen_helper_vbitset_w, + .opt_opc = vecop_list, + .vece = MO_32 + }, + { + .fniv = gen_vbitset, + .fno = gen_helper_vbitset_d, + .opt_opc = vecop_list, + .vece = MO_64 + }, + }; + + tcg_gen_gvec_3(vd_ofs, vj_ofs, vk_ofs, oprsz, maxsz, &op[vece]); +} + +TRANS(vbitset_b, LSX, gvec_vvv, MO_8, do_vbitset) +TRANS(vbitset_h, LSX, gvec_vvv, MO_16, do_vbitset) +TRANS(vbitset_w, LSX, gvec_vvv, MO_32, do_vbitset) +TRANS(vbitset_d, LSX, gvec_vvv, MO_64, do_vbitset) +TRANS(xvbitset_b, LASX, gvec_xxx, MO_8, do_vbitset) +TRANS(xvbitset_h, LASX, gvec_xxx, MO_16, do_vbitset) +TRANS(xvbitset_w, LASX, gvec_xxx, MO_32, do_vbitset) +TRANS(xvbitset_d, LASX, gvec_xxx, MO_64, do_vbitset) + +static void do_vbitseti(unsigned vece, uint32_t vd_ofs, uint32_t vj_ofs, + int64_t imm, uint32_t oprsz, uint32_t maxsz) +{ + static const TCGOpcode vecop_list[] = { + INDEX_op_shli_vec, 0 + }; + static const GVecGen2i op[4] = { + { + .fniv = gen_vbitseti, + .fnoi = gen_helper_vbitseti_b, + .opt_opc = vecop_list, + .vece = MO_8 + }, + { + .fniv = gen_vbitseti, + .fnoi = gen_helper_vbitseti_h, + .opt_opc = vecop_list, + .vece = MO_16 + }, + { + .fniv = gen_vbitseti, + .fnoi = gen_helper_vbitseti_w, + .opt_opc = vecop_list, + .vece = MO_32 + }, + { + .fniv = gen_vbitseti, + .fnoi = gen_helper_vbitseti_d, + .opt_opc = vecop_list, + .vece = MO_64 + }, + }; + + tcg_gen_gvec_2i(vd_ofs, vj_ofs, oprsz, maxsz, imm, &op[vece]); +} + +TRANS(vbitseti_b, LSX, gvec_vv_i, MO_8, do_vbitseti) +TRANS(vbitseti_h, LSX, gvec_vv_i, MO_16, do_vbitseti) +TRANS(vbitseti_w, LSX, gvec_vv_i, MO_32, do_vbitseti) +TRANS(vbitseti_d, LSX, gvec_vv_i, MO_64, do_vbitseti) +TRANS(xvbitseti_b, LASX, gvec_xx_i, MO_8, do_vbitseti) +TRANS(xvbitseti_h, LASX, gvec_xx_i, MO_16, do_vbitseti) +TRANS(xvbitseti_w, LASX, gvec_xx_i, MO_32, do_vbitseti) +TRANS(xvbitseti_d, LASX, gvec_xx_i, MO_64, do_vbitseti) + +static void do_vbitrev(unsigned vece, uint32_t vd_ofs, uint32_t vj_ofs, + uint32_t vk_ofs, uint32_t oprsz, uint32_t maxsz) +{ + static const TCGOpcode vecop_list[] = { + INDEX_op_shlv_vec, 0 + }; + static const GVecGen3 op[4] = { + { + .fniv = gen_vbitrev, + .fno = gen_helper_vbitrev_b, + .opt_opc = vecop_list, + .vece = MO_8 + }, + { + .fniv = gen_vbitrev, + .fno = gen_helper_vbitrev_h, + .opt_opc = vecop_list, + .vece = MO_16 + }, + { + .fniv = gen_vbitrev, + .fno = gen_helper_vbitrev_w, + .opt_opc = vecop_list, + .vece = MO_32 + }, + { + .fniv = gen_vbitrev, + .fno = gen_helper_vbitrev_d, + .opt_opc = vecop_list, + .vece = MO_64 + }, + }; + + tcg_gen_gvec_3(vd_ofs, vj_ofs, vk_ofs, oprsz, maxsz, &op[vece]); +} + +TRANS(vbitrev_b, LSX, gvec_vvv, MO_8, do_vbitrev) +TRANS(vbitrev_h, LSX, gvec_vvv, MO_16, do_vbitrev) +TRANS(vbitrev_w, LSX, gvec_vvv, MO_32, do_vbitrev) +TRANS(vbitrev_d, LSX, gvec_vvv, MO_64, do_vbitrev) +TRANS(xvbitrev_b, LASX, gvec_xxx, MO_8, do_vbitrev) +TRANS(xvbitrev_h, LASX, gvec_xxx, MO_16, do_vbitrev) +TRANS(xvbitrev_w, LASX, gvec_xxx, MO_32, do_vbitrev) +TRANS(xvbitrev_d, LASX, gvec_xxx, MO_64, do_vbitrev) + +static void do_vbitrevi(unsigned vece, uint32_t vd_ofs, uint32_t vj_ofs, + int64_t imm, uint32_t oprsz, uint32_t maxsz) +{ + static const TCGOpcode vecop_list[] = { + INDEX_op_shli_vec, 0 + }; + static const GVecGen2i op[4] = { + { + .fniv = gen_vbitrevi, + .fnoi = gen_helper_vbitrevi_b, + .opt_opc = vecop_list, + .vece = MO_8 + }, + { + .fniv = gen_vbitrevi, + .fnoi = gen_helper_vbitrevi_h, + .opt_opc = vecop_list, + .vece = MO_16 + }, + { + .fniv = gen_vbitrevi, + .fnoi = gen_helper_vbitrevi_w, + .opt_opc = vecop_list, + .vece = MO_32 + }, + { + .fniv = gen_vbitrevi, + .fnoi = gen_helper_vbitrevi_d, + .opt_opc = vecop_list, + .vece = MO_64 + }, + }; + + tcg_gen_gvec_2i(vd_ofs, vj_ofs, oprsz, maxsz, imm, &op[vece]); +} + +TRANS(vbitrevi_b, LSX, gvec_vv_i, MO_8, do_vbitrevi) +TRANS(vbitrevi_h, LSX, gvec_vv_i, MO_16, do_vbitrevi) +TRANS(vbitrevi_w, LSX, gvec_vv_i, MO_32, do_vbitrevi) +TRANS(vbitrevi_d, LSX, gvec_vv_i, MO_64, do_vbitrevi) +TRANS(xvbitrevi_b, LASX, gvec_xx_i, MO_8, do_vbitrevi) +TRANS(xvbitrevi_h, LASX, gvec_xx_i, MO_16, do_vbitrevi) +TRANS(xvbitrevi_w, LASX, gvec_xx_i, MO_32, do_vbitrevi) +TRANS(xvbitrevi_d, LASX, gvec_xx_i, MO_64, do_vbitrevi) + +TRANS(vfrstp_b, LSX, gen_vvv, gen_helper_vfrstp_b) +TRANS(vfrstp_h, LSX, gen_vvv, gen_helper_vfrstp_h) +TRANS(vfrstpi_b, LSX, gen_vv_i, gen_helper_vfrstpi_b) +TRANS(vfrstpi_h, LSX, gen_vv_i, gen_helper_vfrstpi_h) +TRANS(xvfrstp_b, LASX, gen_xxx, gen_helper_vfrstp_b) +TRANS(xvfrstp_h, LASX, gen_xxx, gen_helper_vfrstp_h) +TRANS(xvfrstpi_b, LASX, gen_xx_i, gen_helper_vfrstpi_b) +TRANS(xvfrstpi_h, LASX, gen_xx_i, gen_helper_vfrstpi_h) + +TRANS(vfadd_s, LSX, gen_vvv_ptr, gen_helper_vfadd_s) +TRANS(vfadd_d, LSX, gen_vvv_ptr, gen_helper_vfadd_d) +TRANS(vfsub_s, LSX, gen_vvv_ptr, gen_helper_vfsub_s) +TRANS(vfsub_d, LSX, gen_vvv_ptr, gen_helper_vfsub_d) +TRANS(vfmul_s, LSX, gen_vvv_ptr, gen_helper_vfmul_s) +TRANS(vfmul_d, LSX, gen_vvv_ptr, gen_helper_vfmul_d) +TRANS(vfdiv_s, LSX, gen_vvv_ptr, gen_helper_vfdiv_s) +TRANS(vfdiv_d, LSX, gen_vvv_ptr, gen_helper_vfdiv_d) +TRANS(xvfadd_s, LASX, gen_xxx_ptr, gen_helper_vfadd_s) +TRANS(xvfadd_d, LASX, gen_xxx_ptr, gen_helper_vfadd_d) +TRANS(xvfsub_s, LASX, gen_xxx_ptr, gen_helper_vfsub_s) +TRANS(xvfsub_d, LASX, gen_xxx_ptr, gen_helper_vfsub_d) +TRANS(xvfmul_s, LASX, gen_xxx_ptr, gen_helper_vfmul_s) +TRANS(xvfmul_d, LASX, gen_xxx_ptr, gen_helper_vfmul_d) +TRANS(xvfdiv_s, LASX, gen_xxx_ptr, gen_helper_vfdiv_s) +TRANS(xvfdiv_d, LASX, gen_xxx_ptr, gen_helper_vfdiv_d) + +TRANS(vfmadd_s, LSX, gen_vvvv_ptr, gen_helper_vfmadd_s) +TRANS(vfmadd_d, LSX, gen_vvvv_ptr, gen_helper_vfmadd_d) +TRANS(vfmsub_s, LSX, gen_vvvv_ptr, gen_helper_vfmsub_s) +TRANS(vfmsub_d, LSX, gen_vvvv_ptr, gen_helper_vfmsub_d) +TRANS(vfnmadd_s, LSX, gen_vvvv_ptr, gen_helper_vfnmadd_s) +TRANS(vfnmadd_d, LSX, gen_vvvv_ptr, gen_helper_vfnmadd_d) +TRANS(vfnmsub_s, LSX, gen_vvvv_ptr, gen_helper_vfnmsub_s) +TRANS(vfnmsub_d, LSX, gen_vvvv_ptr, gen_helper_vfnmsub_d) +TRANS(xvfmadd_s, LASX, gen_xxxx_ptr, gen_helper_vfmadd_s) +TRANS(xvfmadd_d, LASX, gen_xxxx_ptr, gen_helper_vfmadd_d) +TRANS(xvfmsub_s, LASX, gen_xxxx_ptr, gen_helper_vfmsub_s) +TRANS(xvfmsub_d, LASX, gen_xxxx_ptr, gen_helper_vfmsub_d) +TRANS(xvfnmadd_s, LASX, gen_xxxx_ptr, gen_helper_vfnmadd_s) +TRANS(xvfnmadd_d, LASX, gen_xxxx_ptr, gen_helper_vfnmadd_d) +TRANS(xvfnmsub_s, LASX, gen_xxxx_ptr, gen_helper_vfnmsub_s) +TRANS(xvfnmsub_d, LASX, gen_xxxx_ptr, gen_helper_vfnmsub_d) + +TRANS(vfmax_s, LSX, gen_vvv_ptr, gen_helper_vfmax_s) +TRANS(vfmax_d, LSX, gen_vvv_ptr, gen_helper_vfmax_d) +TRANS(vfmin_s, LSX, gen_vvv_ptr, gen_helper_vfmin_s) +TRANS(vfmin_d, LSX, gen_vvv_ptr, gen_helper_vfmin_d) +TRANS(xvfmax_s, LASX, gen_xxx_ptr, gen_helper_vfmax_s) +TRANS(xvfmax_d, LASX, gen_xxx_ptr, gen_helper_vfmax_d) +TRANS(xvfmin_s, LASX, gen_xxx_ptr, gen_helper_vfmin_s) +TRANS(xvfmin_d, LASX, gen_xxx_ptr, gen_helper_vfmin_d) + +TRANS(vfmaxa_s, LSX, gen_vvv_ptr, gen_helper_vfmaxa_s) +TRANS(vfmaxa_d, LSX, gen_vvv_ptr, gen_helper_vfmaxa_d) +TRANS(vfmina_s, LSX, gen_vvv_ptr, gen_helper_vfmina_s) +TRANS(vfmina_d, LSX, gen_vvv_ptr, gen_helper_vfmina_d) +TRANS(xvfmaxa_s, LASX, gen_xxx_ptr, gen_helper_vfmaxa_s) +TRANS(xvfmaxa_d, LASX, gen_xxx_ptr, gen_helper_vfmaxa_d) +TRANS(xvfmina_s, LASX, gen_xxx_ptr, gen_helper_vfmina_s) +TRANS(xvfmina_d, LASX, gen_xxx_ptr, gen_helper_vfmina_d) + +TRANS(vflogb_s, LSX, gen_vv_ptr, gen_helper_vflogb_s) +TRANS(vflogb_d, LSX, gen_vv_ptr, gen_helper_vflogb_d) +TRANS(xvflogb_s, LASX, gen_xx_ptr, gen_helper_vflogb_s) +TRANS(xvflogb_d, LASX, gen_xx_ptr, gen_helper_vflogb_d) + +TRANS(vfclass_s, LSX, gen_vv_ptr, gen_helper_vfclass_s) +TRANS(vfclass_d, LSX, gen_vv_ptr, gen_helper_vfclass_d) +TRANS(xvfclass_s, LASX, gen_xx_ptr, gen_helper_vfclass_s) +TRANS(xvfclass_d, LASX, gen_xx_ptr, gen_helper_vfclass_d) + +TRANS(vfsqrt_s, LSX, gen_vv_ptr, gen_helper_vfsqrt_s) +TRANS(vfsqrt_d, LSX, gen_vv_ptr, gen_helper_vfsqrt_d) +TRANS(vfrecip_s, LSX, gen_vv_ptr, gen_helper_vfrecip_s) +TRANS(vfrecip_d, LSX, gen_vv_ptr, gen_helper_vfrecip_d) +TRANS(vfrsqrt_s, LSX, gen_vv_ptr, gen_helper_vfrsqrt_s) +TRANS(vfrsqrt_d, LSX, gen_vv_ptr, gen_helper_vfrsqrt_d) +TRANS(xvfsqrt_s, LASX, gen_xx_ptr, gen_helper_vfsqrt_s) +TRANS(xvfsqrt_d, LASX, gen_xx_ptr, gen_helper_vfsqrt_d) +TRANS(xvfrecip_s, LASX, gen_xx_ptr, gen_helper_vfrecip_s) +TRANS(xvfrecip_d, LASX, gen_xx_ptr, gen_helper_vfrecip_d) +TRANS(xvfrsqrt_s, LASX, gen_xx_ptr, gen_helper_vfrsqrt_s) +TRANS(xvfrsqrt_d, LASX, gen_xx_ptr, gen_helper_vfrsqrt_d) + +TRANS(vfcvtl_s_h, LSX, gen_vv_ptr, gen_helper_vfcvtl_s_h) +TRANS(vfcvth_s_h, LSX, gen_vv_ptr, gen_helper_vfcvth_s_h) +TRANS(vfcvtl_d_s, LSX, gen_vv_ptr, gen_helper_vfcvtl_d_s) +TRANS(vfcvth_d_s, LSX, gen_vv_ptr, gen_helper_vfcvth_d_s) +TRANS(vfcvt_h_s, LSX, gen_vvv_ptr, gen_helper_vfcvt_h_s) +TRANS(vfcvt_s_d, LSX, gen_vvv_ptr, gen_helper_vfcvt_s_d) +TRANS(xvfcvtl_s_h, LASX, gen_xx_ptr, gen_helper_vfcvtl_s_h) +TRANS(xvfcvth_s_h, LASX, gen_xx_ptr, gen_helper_vfcvth_s_h) +TRANS(xvfcvtl_d_s, LASX, gen_xx_ptr, gen_helper_vfcvtl_d_s) +TRANS(xvfcvth_d_s, LASX, gen_xx_ptr, gen_helper_vfcvth_d_s) +TRANS(xvfcvt_h_s, LASX, gen_xxx_ptr, gen_helper_vfcvt_h_s) +TRANS(xvfcvt_s_d, LASX, gen_xxx_ptr, gen_helper_vfcvt_s_d) + +TRANS(vfrintrne_s, LSX, gen_vv_ptr, gen_helper_vfrintrne_s) +TRANS(vfrintrne_d, LSX, gen_vv_ptr, gen_helper_vfrintrne_d) +TRANS(vfrintrz_s, LSX, gen_vv_ptr, gen_helper_vfrintrz_s) +TRANS(vfrintrz_d, LSX, gen_vv_ptr, gen_helper_vfrintrz_d) +TRANS(vfrintrp_s, LSX, gen_vv_ptr, gen_helper_vfrintrp_s) +TRANS(vfrintrp_d, LSX, gen_vv_ptr, gen_helper_vfrintrp_d) +TRANS(vfrintrm_s, LSX, gen_vv_ptr, gen_helper_vfrintrm_s) +TRANS(vfrintrm_d, LSX, gen_vv_ptr, gen_helper_vfrintrm_d) +TRANS(vfrint_s, LSX, gen_vv_ptr, gen_helper_vfrint_s) +TRANS(vfrint_d, LSX, gen_vv_ptr, gen_helper_vfrint_d) +TRANS(xvfrintrne_s, LASX, gen_xx_ptr, gen_helper_vfrintrne_s) +TRANS(xvfrintrne_d, LASX, gen_xx_ptr, gen_helper_vfrintrne_d) +TRANS(xvfrintrz_s, LASX, gen_xx_ptr, gen_helper_vfrintrz_s) +TRANS(xvfrintrz_d, LASX, gen_xx_ptr, gen_helper_vfrintrz_d) +TRANS(xvfrintrp_s, LASX, gen_xx_ptr, gen_helper_vfrintrp_s) +TRANS(xvfrintrp_d, LASX, gen_xx_ptr, gen_helper_vfrintrp_d) +TRANS(xvfrintrm_s, LASX, gen_xx_ptr, gen_helper_vfrintrm_s) +TRANS(xvfrintrm_d, LASX, gen_xx_ptr, gen_helper_vfrintrm_d) +TRANS(xvfrint_s, LASX, gen_xx_ptr, gen_helper_vfrint_s) +TRANS(xvfrint_d, LASX, gen_xx_ptr, gen_helper_vfrint_d) + +TRANS(vftintrne_w_s, LSX, gen_vv_ptr, gen_helper_vftintrne_w_s) +TRANS(vftintrne_l_d, LSX, gen_vv_ptr, gen_helper_vftintrne_l_d) +TRANS(vftintrz_w_s, LSX, gen_vv_ptr, gen_helper_vftintrz_w_s) +TRANS(vftintrz_l_d, LSX, gen_vv_ptr, gen_helper_vftintrz_l_d) +TRANS(vftintrp_w_s, LSX, gen_vv_ptr, gen_helper_vftintrp_w_s) +TRANS(vftintrp_l_d, LSX, gen_vv_ptr, gen_helper_vftintrp_l_d) +TRANS(vftintrm_w_s, LSX, gen_vv_ptr, gen_helper_vftintrm_w_s) +TRANS(vftintrm_l_d, LSX, gen_vv_ptr, gen_helper_vftintrm_l_d) +TRANS(vftint_w_s, LSX, gen_vv_ptr, gen_helper_vftint_w_s) +TRANS(vftint_l_d, LSX, gen_vv_ptr, gen_helper_vftint_l_d) +TRANS(vftintrz_wu_s, LSX, gen_vv_ptr, gen_helper_vftintrz_wu_s) +TRANS(vftintrz_lu_d, LSX, gen_vv_ptr, gen_helper_vftintrz_lu_d) +TRANS(vftint_wu_s, LSX, gen_vv_ptr, gen_helper_vftint_wu_s) +TRANS(vftint_lu_d, LSX, gen_vv_ptr, gen_helper_vftint_lu_d) +TRANS(vftintrne_w_d, LSX, gen_vvv_ptr, gen_helper_vftintrne_w_d) +TRANS(vftintrz_w_d, LSX, gen_vvv_ptr, gen_helper_vftintrz_w_d) +TRANS(vftintrp_w_d, LSX, gen_vvv_ptr, gen_helper_vftintrp_w_d) +TRANS(vftintrm_w_d, LSX, gen_vvv_ptr, gen_helper_vftintrm_w_d) +TRANS(vftint_w_d, LSX, gen_vvv_ptr, gen_helper_vftint_w_d) +TRANS(vftintrnel_l_s, LSX, gen_vv_ptr, gen_helper_vftintrnel_l_s) +TRANS(vftintrneh_l_s, LSX, gen_vv_ptr, gen_helper_vftintrneh_l_s) +TRANS(vftintrzl_l_s, LSX, gen_vv_ptr, gen_helper_vftintrzl_l_s) +TRANS(vftintrzh_l_s, LSX, gen_vv_ptr, gen_helper_vftintrzh_l_s) +TRANS(vftintrpl_l_s, LSX, gen_vv_ptr, gen_helper_vftintrpl_l_s) +TRANS(vftintrph_l_s, LSX, gen_vv_ptr, gen_helper_vftintrph_l_s) +TRANS(vftintrml_l_s, LSX, gen_vv_ptr, gen_helper_vftintrml_l_s) +TRANS(vftintrmh_l_s, LSX, gen_vv_ptr, gen_helper_vftintrmh_l_s) +TRANS(vftintl_l_s, LSX, gen_vv_ptr, gen_helper_vftintl_l_s) +TRANS(vftinth_l_s, LSX, gen_vv_ptr, gen_helper_vftinth_l_s) +TRANS(xvftintrne_w_s, LASX, gen_xx_ptr, gen_helper_vftintrne_w_s) +TRANS(xvftintrne_l_d, LASX, gen_xx_ptr, gen_helper_vftintrne_l_d) +TRANS(xvftintrz_w_s, LASX, gen_xx_ptr, gen_helper_vftintrz_w_s) +TRANS(xvftintrz_l_d, LASX, gen_xx_ptr, gen_helper_vftintrz_l_d) +TRANS(xvftintrp_w_s, LASX, gen_xx_ptr, gen_helper_vftintrp_w_s) +TRANS(xvftintrp_l_d, LASX, gen_xx_ptr, gen_helper_vftintrp_l_d) +TRANS(xvftintrm_w_s, LASX, gen_xx_ptr, gen_helper_vftintrm_w_s) +TRANS(xvftintrm_l_d, LASX, gen_xx_ptr, gen_helper_vftintrm_l_d) +TRANS(xvftint_w_s, LASX, gen_xx_ptr, gen_helper_vftint_w_s) +TRANS(xvftint_l_d, LASX, gen_xx_ptr, gen_helper_vftint_l_d) +TRANS(xvftintrz_wu_s, LASX, gen_xx_ptr, gen_helper_vftintrz_wu_s) +TRANS(xvftintrz_lu_d, LASX, gen_xx_ptr, gen_helper_vftintrz_lu_d) +TRANS(xvftint_wu_s, LASX, gen_xx_ptr, gen_helper_vftint_wu_s) +TRANS(xvftint_lu_d, LASX, gen_xx_ptr, gen_helper_vftint_lu_d) +TRANS(xvftintrne_w_d, LASX, gen_xxx_ptr, gen_helper_vftintrne_w_d) +TRANS(xvftintrz_w_d, LASX, gen_xxx_ptr, gen_helper_vftintrz_w_d) +TRANS(xvftintrp_w_d, LASX, gen_xxx_ptr, gen_helper_vftintrp_w_d) +TRANS(xvftintrm_w_d, LASX, gen_xxx_ptr, gen_helper_vftintrm_w_d) +TRANS(xvftint_w_d, LASX, gen_xxx_ptr, gen_helper_vftint_w_d) +TRANS(xvftintrnel_l_s, LASX, gen_xx_ptr, gen_helper_vftintrnel_l_s) +TRANS(xvftintrneh_l_s, LASX, gen_xx_ptr, gen_helper_vftintrneh_l_s) +TRANS(xvftintrzl_l_s, LASX, gen_xx_ptr, gen_helper_vftintrzl_l_s) +TRANS(xvftintrzh_l_s, LASX, gen_xx_ptr, gen_helper_vftintrzh_l_s) +TRANS(xvftintrpl_l_s, LASX, gen_xx_ptr, gen_helper_vftintrpl_l_s) +TRANS(xvftintrph_l_s, LASX, gen_xx_ptr, gen_helper_vftintrph_l_s) +TRANS(xvftintrml_l_s, LASX, gen_xx_ptr, gen_helper_vftintrml_l_s) +TRANS(xvftintrmh_l_s, LASX, gen_xx_ptr, gen_helper_vftintrmh_l_s) +TRANS(xvftintl_l_s, LASX, gen_xx_ptr, gen_helper_vftintl_l_s) +TRANS(xvftinth_l_s, LASX, gen_xx_ptr, gen_helper_vftinth_l_s) + +TRANS(vffint_s_w, LSX, gen_vv_ptr, gen_helper_vffint_s_w) +TRANS(vffint_d_l, LSX, gen_vv_ptr, gen_helper_vffint_d_l) +TRANS(vffint_s_wu, LSX, gen_vv_ptr, gen_helper_vffint_s_wu) +TRANS(vffint_d_lu, LSX, gen_vv_ptr, gen_helper_vffint_d_lu) +TRANS(vffintl_d_w, LSX, gen_vv_ptr, gen_helper_vffintl_d_w) +TRANS(vffinth_d_w, LSX, gen_vv_ptr, gen_helper_vffinth_d_w) +TRANS(vffint_s_l, LSX, gen_vvv_ptr, gen_helper_vffint_s_l) +TRANS(xvffint_s_w, LASX, gen_xx_ptr, gen_helper_vffint_s_w) +TRANS(xvffint_d_l, LASX, gen_xx_ptr, gen_helper_vffint_d_l) +TRANS(xvffint_s_wu, LASX, gen_xx_ptr, gen_helper_vffint_s_wu) +TRANS(xvffint_d_lu, LASX, gen_xx_ptr, gen_helper_vffint_d_lu) +TRANS(xvffintl_d_w, LASX, gen_xx_ptr, gen_helper_vffintl_d_w) +TRANS(xvffinth_d_w, LASX, gen_xx_ptr, gen_helper_vffinth_d_w) +TRANS(xvffint_s_l, LASX, gen_xxx_ptr, gen_helper_vffint_s_l) + +static bool do_cmp_vl(DisasContext *ctx, arg_vvv *a, + uint32_t oprsz, MemOp mop, TCGCond cond) +{ + uint32_t vd_ofs, vj_ofs, vk_ofs; + + if (!check_vec(ctx, oprsz)) { + return true; + } + + vd_ofs = vec_full_offset(a->vd); + vj_ofs = vec_full_offset(a->vj); + vk_ofs = vec_full_offset(a->vk); + + tcg_gen_gvec_cmp(cond, mop, vd_ofs, vj_ofs, vk_ofs, oprsz, ctx->vl / 8); + return true; +} + +static bool do_cmp(DisasContext *ctx, arg_vvv *a, + MemOp mop, TCGCond cond) +{ + return do_cmp_vl(ctx, a, 16, mop, cond); +} + +static bool do_xcmp(DisasContext *ctx, arg_vvv *a, + MemOp mop, TCGCond cond) +{ + return do_cmp_vl(ctx, a, 32, mop, cond); +} + +static bool do_cmpi_vl(DisasContext *ctx, arg_vv_i *a, + uint32_t oprsz, MemOp mop, TCGCond cond) +{ + uint32_t vd_ofs, vj_ofs; + + if (!check_vec(ctx, oprsz)) { + return true; + } + + vd_ofs = vec_full_offset(a->vd); + vj_ofs = vec_full_offset(a->vj); + + tcg_gen_gvec_cmpi(cond, mop, vd_ofs, vj_ofs, a->imm, oprsz, ctx->vl / 8); + return true; +} + +static bool do_cmpi(DisasContext *ctx, arg_vv_i *a, + MemOp mop, TCGCond cond) +{ + return do_cmpi_vl(ctx, a, 16, mop, cond); +} + +static bool do_xcmpi(DisasContext *ctx, arg_vv_i *a, + MemOp mop, TCGCond cond) +{ + return do_cmpi_vl(ctx, a, 32, mop, cond); +} + +TRANS(vseq_b, LSX, do_cmp, MO_8, TCG_COND_EQ) +TRANS(vseq_h, LSX, do_cmp, MO_16, TCG_COND_EQ) +TRANS(vseq_w, LSX, do_cmp, MO_32, TCG_COND_EQ) +TRANS(vseq_d, LSX, do_cmp, MO_64, TCG_COND_EQ) +TRANS(vseqi_b, LSX, do_cmpi, MO_8, TCG_COND_EQ) +TRANS(vseqi_h, LSX, do_cmpi, MO_16, TCG_COND_EQ) +TRANS(vseqi_w, LSX, do_cmpi, MO_32, TCG_COND_EQ) +TRANS(vseqi_d, LSX, do_cmpi, MO_64, TCG_COND_EQ) +TRANS(xvseq_b, LASX, do_xcmp, MO_8, TCG_COND_EQ) +TRANS(xvseq_h, LASX, do_xcmp, MO_16, TCG_COND_EQ) +TRANS(xvseq_w, LASX, do_xcmp, MO_32, TCG_COND_EQ) +TRANS(xvseq_d, LASX, do_xcmp, MO_64, TCG_COND_EQ) +TRANS(xvseqi_b, LASX, do_xcmpi, MO_8, TCG_COND_EQ) +TRANS(xvseqi_h, LASX, do_xcmpi, MO_16, TCG_COND_EQ) +TRANS(xvseqi_w, LASX, do_xcmpi, MO_32, TCG_COND_EQ) +TRANS(xvseqi_d, LASX, do_xcmpi, MO_64, TCG_COND_EQ) + +TRANS(vsle_b, LSX, do_cmp, MO_8, TCG_COND_LE) +TRANS(vsle_h, LSX, do_cmp, MO_16, TCG_COND_LE) +TRANS(vsle_w, LSX, do_cmp, MO_32, TCG_COND_LE) +TRANS(vsle_d, LSX, do_cmp, MO_64, TCG_COND_LE) +TRANS(vslei_b, LSX, do_cmpi, MO_8, TCG_COND_LE) +TRANS(vslei_h, LSX, do_cmpi, MO_16, TCG_COND_LE) +TRANS(vslei_w, LSX, do_cmpi, MO_32, TCG_COND_LE) +TRANS(vslei_d, LSX, do_cmpi, MO_64, TCG_COND_LE) +TRANS(vsle_bu, LSX, do_cmp, MO_8, TCG_COND_LEU) +TRANS(vsle_hu, LSX, do_cmp, MO_16, TCG_COND_LEU) +TRANS(vsle_wu, LSX, do_cmp, MO_32, TCG_COND_LEU) +TRANS(vsle_du, LSX, do_cmp, MO_64, TCG_COND_LEU) +TRANS(vslei_bu, LSX, do_cmpi, MO_8, TCG_COND_LEU) +TRANS(vslei_hu, LSX, do_cmpi, MO_16, TCG_COND_LEU) +TRANS(vslei_wu, LSX, do_cmpi, MO_32, TCG_COND_LEU) +TRANS(vslei_du, LSX, do_cmpi, MO_64, TCG_COND_LEU) +TRANS(xvsle_b, LASX, do_xcmp, MO_8, TCG_COND_LE) +TRANS(xvsle_h, LASX, do_xcmp, MO_16, TCG_COND_LE) +TRANS(xvsle_w, LASX, do_xcmp, MO_32, TCG_COND_LE) +TRANS(xvsle_d, LASX, do_xcmp, MO_64, TCG_COND_LE) +TRANS(xvslei_b, LASX, do_xcmpi, MO_8, TCG_COND_LE) +TRANS(xvslei_h, LASX, do_xcmpi, MO_16, TCG_COND_LE) +TRANS(xvslei_w, LASX, do_xcmpi, MO_32, TCG_COND_LE) +TRANS(xvslei_d, LASX, do_xcmpi, MO_64, TCG_COND_LE) +TRANS(xvsle_bu, LASX, do_xcmp, MO_8, TCG_COND_LEU) +TRANS(xvsle_hu, LASX, do_xcmp, MO_16, TCG_COND_LEU) +TRANS(xvsle_wu, LASX, do_xcmp, MO_32, TCG_COND_LEU) +TRANS(xvsle_du, LASX, do_xcmp, MO_64, TCG_COND_LEU) +TRANS(xvslei_bu, LASX, do_xcmpi, MO_8, TCG_COND_LEU) +TRANS(xvslei_hu, LASX, do_xcmpi, MO_16, TCG_COND_LEU) +TRANS(xvslei_wu, LASX, do_xcmpi, MO_32, TCG_COND_LEU) +TRANS(xvslei_du, LASX, do_xcmpi, MO_64, TCG_COND_LEU) + +TRANS(vslt_b, LSX, do_cmp, MO_8, TCG_COND_LT) +TRANS(vslt_h, LSX, do_cmp, MO_16, TCG_COND_LT) +TRANS(vslt_w, LSX, do_cmp, MO_32, TCG_COND_LT) +TRANS(vslt_d, LSX, do_cmp, MO_64, TCG_COND_LT) +TRANS(vslti_b, LSX, do_cmpi, MO_8, TCG_COND_LT) +TRANS(vslti_h, LSX, do_cmpi, MO_16, TCG_COND_LT) +TRANS(vslti_w, LSX, do_cmpi, MO_32, TCG_COND_LT) +TRANS(vslti_d, LSX, do_cmpi, MO_64, TCG_COND_LT) +TRANS(vslt_bu, LSX, do_cmp, MO_8, TCG_COND_LTU) +TRANS(vslt_hu, LSX, do_cmp, MO_16, TCG_COND_LTU) +TRANS(vslt_wu, LSX, do_cmp, MO_32, TCG_COND_LTU) +TRANS(vslt_du, LSX, do_cmp, MO_64, TCG_COND_LTU) +TRANS(vslti_bu, LSX, do_cmpi, MO_8, TCG_COND_LTU) +TRANS(vslti_hu, LSX, do_cmpi, MO_16, TCG_COND_LTU) +TRANS(vslti_wu, LSX, do_cmpi, MO_32, TCG_COND_LTU) +TRANS(vslti_du, LSX, do_cmpi, MO_64, TCG_COND_LTU) +TRANS(xvslt_b, LASX, do_xcmp, MO_8, TCG_COND_LT) +TRANS(xvslt_h, LASX, do_xcmp, MO_16, TCG_COND_LT) +TRANS(xvslt_w, LASX, do_xcmp, MO_32, TCG_COND_LT) +TRANS(xvslt_d, LASX, do_xcmp, MO_64, TCG_COND_LT) +TRANS(xvslti_b, LASX, do_xcmpi, MO_8, TCG_COND_LT) +TRANS(xvslti_h, LASX, do_xcmpi, MO_16, TCG_COND_LT) +TRANS(xvslti_w, LASX, do_xcmpi, MO_32, TCG_COND_LT) +TRANS(xvslti_d, LASX, do_xcmpi, MO_64, TCG_COND_LT) +TRANS(xvslt_bu, LASX, do_xcmp, MO_8, TCG_COND_LTU) +TRANS(xvslt_hu, LASX, do_xcmp, MO_16, TCG_COND_LTU) +TRANS(xvslt_wu, LASX, do_xcmp, MO_32, TCG_COND_LTU) +TRANS(xvslt_du, LASX, do_xcmp, MO_64, TCG_COND_LTU) +TRANS(xvslti_bu, LASX, do_xcmpi, MO_8, TCG_COND_LTU) +TRANS(xvslti_hu, LASX, do_xcmpi, MO_16, TCG_COND_LTU) +TRANS(xvslti_wu, LASX, do_xcmpi, MO_32, TCG_COND_LTU) +TRANS(xvslti_du, LASX, do_xcmpi, MO_64, TCG_COND_LTU) + +static bool do_vfcmp_cond_s(DisasContext *ctx, arg_vvv_fcond *a, uint32_t sz) +{ + uint32_t flags; + void (*fn)(TCGv_env, TCGv_i32, TCGv_i32, TCGv_i32, TCGv_i32, TCGv_i32); + TCGv_i32 vd = tcg_constant_i32(a->vd); + TCGv_i32 vj = tcg_constant_i32(a->vj); + TCGv_i32 vk = tcg_constant_i32(a->vk); + TCGv_i32 oprsz = tcg_constant_i32(sz); + + if (!check_vec(ctx, sz)) { + return true; + } + + fn = (a->fcond & 1 ? gen_helper_vfcmp_s_s : gen_helper_vfcmp_c_s); + flags = get_fcmp_flags(a->fcond >> 1); + fn(tcg_env, oprsz, vd, vj, vk, tcg_constant_i32(flags)); + + return true; +} + +static bool do_vfcmp_cond_d(DisasContext *ctx, arg_vvv_fcond *a, uint32_t sz) +{ + uint32_t flags; + void (*fn)(TCGv_env, TCGv_i32, TCGv_i32, TCGv_i32, TCGv_i32, TCGv_i32); + TCGv_i32 vd = tcg_constant_i32(a->vd); + TCGv_i32 vj = tcg_constant_i32(a->vj); + TCGv_i32 vk = tcg_constant_i32(a->vk); + TCGv_i32 oprsz = tcg_constant_i32(sz); + + if (!check_vec(ctx, sz)) { + return true; + } + + fn = (a->fcond & 1 ? gen_helper_vfcmp_s_d : gen_helper_vfcmp_c_d); + flags = get_fcmp_flags(a->fcond >> 1); + fn(tcg_env, oprsz, vd, vj, vk, tcg_constant_i32(flags)); + + return true; +} + +TRANS(vfcmp_cond_s, LSX, do_vfcmp_cond_s, 16) +TRANS(vfcmp_cond_d, LSX, do_vfcmp_cond_d, 16) +TRANS(xvfcmp_cond_s, LASX, do_vfcmp_cond_s, 32) +TRANS(xvfcmp_cond_d, LASX, do_vfcmp_cond_d, 32) + +static bool do_vbitsel_v(DisasContext *ctx, arg_vvvv *a, uint32_t oprsz) +{ + if (!check_vec(ctx, oprsz)) { + return true; + } + + tcg_gen_gvec_bitsel(MO_64, vec_full_offset(a->vd), vec_full_offset(a->va), + vec_full_offset(a->vk), vec_full_offset(a->vj), + oprsz, ctx->vl / 8); + return true; +} + +TRANS(vbitsel_v, LSX, do_vbitsel_v, 16) +TRANS(xvbitsel_v, LASX, do_vbitsel_v, 32) + +static void gen_vbitseli(unsigned vece, TCGv_vec a, TCGv_vec b, int64_t imm) +{ + tcg_gen_bitsel_vec(vece, a, a, tcg_constant_vec_matching(a, vece, imm), b); +} + +static bool do_vbitseli_b(DisasContext *ctx, arg_vv_i *a, uint32_t oprsz) +{ + static const GVecGen2i op = { + .fniv = gen_vbitseli, + .fnoi = gen_helper_vbitseli_b, + .vece = MO_8, + .load_dest = true + }; + + if (!check_vec(ctx, oprsz)) { + return true; + } + + tcg_gen_gvec_2i(vec_full_offset(a->vd), vec_full_offset(a->vj), + oprsz, ctx->vl / 8, a->imm , &op); + return true; +} + +TRANS(vbitseli_b, LSX, do_vbitseli_b, 16) +TRANS(xvbitseli_b, LASX, do_vbitseli_b, 32) + +#define VSET(NAME, COND) \ +static bool trans_## NAME (DisasContext *ctx, arg_cv *a) \ +{ \ + TCGv_i64 t1, al, ah; \ + \ + al = tcg_temp_new_i64(); \ + ah = tcg_temp_new_i64(); \ + t1 = tcg_temp_new_i64(); \ + \ + get_vreg64(ah, a->vj, 1); \ + get_vreg64(al, a->vj, 0); \ + \ + if (!avail_LSX(ctx)) { \ + return false; \ + } \ + \ + if (!check_vec(ctx, 16)) { \ + return true; \ + } \ + \ + tcg_gen_or_i64(t1, al, ah); \ + tcg_gen_setcondi_i64(COND, t1, t1, 0); \ + tcg_gen_st8_tl(t1, tcg_env, offsetof(CPULoongArchState, cf[a->cd & 0x7])); \ + \ + return true; \ +} + +VSET(vseteqz_v, TCG_COND_EQ) +VSET(vsetnez_v, TCG_COND_NE) + +TRANS(vsetanyeqz_b, LSX, gen_cv, gen_helper_vsetanyeqz_b) +TRANS(vsetanyeqz_h, LSX, gen_cv, gen_helper_vsetanyeqz_h) +TRANS(vsetanyeqz_w, LSX, gen_cv, gen_helper_vsetanyeqz_w) +TRANS(vsetanyeqz_d, LSX, gen_cv, gen_helper_vsetanyeqz_d) +TRANS(vsetallnez_b, LSX, gen_cv, gen_helper_vsetallnez_b) +TRANS(vsetallnez_h, LSX, gen_cv, gen_helper_vsetallnez_h) +TRANS(vsetallnez_w, LSX, gen_cv, gen_helper_vsetallnez_w) +TRANS(vsetallnez_d, LSX, gen_cv, gen_helper_vsetallnez_d) + +#define XVSET(NAME, COND) \ +static bool trans_## NAME(DisasContext *ctx, arg_cv * a) \ +{ \ + TCGv_i64 t1, t2, d[4]; \ + \ + d[0] = tcg_temp_new_i64(); \ + d[1] = tcg_temp_new_i64(); \ + d[2] = tcg_temp_new_i64(); \ + d[3] = tcg_temp_new_i64(); \ + t1 = tcg_temp_new_i64(); \ + t2 = tcg_temp_new_i64(); \ + \ + get_vreg64(d[0], a->vj, 0); \ + get_vreg64(d[1], a->vj, 1); \ + get_vreg64(d[2], a->vj, 2); \ + get_vreg64(d[3], a->vj, 3); \ + \ + if (!avail_LASX(ctx)) { \ + return false; \ + } \ + \ + if (!check_vec(ctx, 32)) { \ + return true; \ + } \ + \ + tcg_gen_or_i64(t1, d[0], d[1]); \ + tcg_gen_or_i64(t2, d[2], d[3]); \ + tcg_gen_or_i64(t1, t2, t1); \ + tcg_gen_setcondi_i64(COND, t1, t1, 0); \ + tcg_gen_st8_tl(t1, tcg_env, offsetof(CPULoongArchState, cf[a->cd & 0x7])); \ + \ + return true; \ +} + +XVSET(xvseteqz_v, TCG_COND_EQ) +XVSET(xvsetnez_v, TCG_COND_NE) + +TRANS(xvsetanyeqz_b, LASX, gen_cx, gen_helper_vsetanyeqz_b) +TRANS(xvsetanyeqz_h, LASX, gen_cx, gen_helper_vsetanyeqz_h) +TRANS(xvsetanyeqz_w, LASX, gen_cx, gen_helper_vsetanyeqz_w) +TRANS(xvsetanyeqz_d, LASX, gen_cx, gen_helper_vsetanyeqz_d) +TRANS(xvsetallnez_b, LASX, gen_cx, gen_helper_vsetallnez_b) +TRANS(xvsetallnez_h, LASX, gen_cx, gen_helper_vsetallnez_h) +TRANS(xvsetallnez_w, LASX, gen_cx, gen_helper_vsetallnez_w) +TRANS(xvsetallnez_d, LASX, gen_cx, gen_helper_vsetallnez_d) + +static bool gen_g2v_vl(DisasContext *ctx, arg_vr_i *a, uint32_t oprsz, MemOp mop, + void (*func)(TCGv, TCGv_ptr, tcg_target_long)) +{ + TCGv src = gpr_src(ctx, a->rj, EXT_NONE); + + if (!check_vec(ctx, oprsz)) { + return true; + } + + func(src, tcg_env, vec_reg_offset(a->vd, a->imm, mop)); + + return true; +} + +static bool gen_g2v(DisasContext *ctx, arg_vr_i *a, MemOp mop, + void (*func)(TCGv, TCGv_ptr, tcg_target_long)) +{ + return gen_g2v_vl(ctx, a, 16, mop, func); +} + +static bool gen_g2x(DisasContext *ctx, arg_vr_i *a, MemOp mop, + void (*func)(TCGv, TCGv_ptr, tcg_target_long)) +{ + return gen_g2v_vl(ctx, a, 32, mop, func); +} + +TRANS(vinsgr2vr_b, LSX, gen_g2v, MO_8, tcg_gen_st8_i64) +TRANS(vinsgr2vr_h, LSX, gen_g2v, MO_16, tcg_gen_st16_i64) +TRANS(vinsgr2vr_w, LSX, gen_g2v, MO_32, tcg_gen_st32_i64) +TRANS(vinsgr2vr_d, LSX, gen_g2v, MO_64, tcg_gen_st_i64) +TRANS(xvinsgr2vr_w, LASX, gen_g2x, MO_32, tcg_gen_st32_i64) +TRANS(xvinsgr2vr_d, LASX, gen_g2x, MO_64, tcg_gen_st_i64) + +static bool gen_v2g_vl(DisasContext *ctx, arg_rv_i *a, uint32_t oprsz, MemOp mop, + void (*func)(TCGv, TCGv_ptr, tcg_target_long)) +{ + TCGv dst = gpr_dst(ctx, a->rd, EXT_NONE); + + if (!check_vec(ctx, oprsz)) { + return true; + } + + func(dst, tcg_env, vec_reg_offset(a->vj, a->imm, mop)); + + return true; +} + +static bool gen_v2g(DisasContext *ctx, arg_rv_i *a, MemOp mop, + void (*func)(TCGv, TCGv_ptr, tcg_target_long)) +{ + return gen_v2g_vl(ctx, a, 16, mop, func); +} + +static bool gen_x2g(DisasContext *ctx, arg_rv_i *a, MemOp mop, + void (*func)(TCGv, TCGv_ptr, tcg_target_long)) +{ + return gen_v2g_vl(ctx, a, 32, mop, func); +} + +TRANS(vpickve2gr_b, LSX, gen_v2g, MO_8, tcg_gen_ld8s_i64) +TRANS(vpickve2gr_h, LSX, gen_v2g, MO_16, tcg_gen_ld16s_i64) +TRANS(vpickve2gr_w, LSX, gen_v2g, MO_32, tcg_gen_ld32s_i64) +TRANS(vpickve2gr_d, LSX, gen_v2g, MO_64, tcg_gen_ld_i64) +TRANS(vpickve2gr_bu, LSX, gen_v2g, MO_8, tcg_gen_ld8u_i64) +TRANS(vpickve2gr_hu, LSX, gen_v2g, MO_16, tcg_gen_ld16u_i64) +TRANS(vpickve2gr_wu, LSX, gen_v2g, MO_32, tcg_gen_ld32u_i64) +TRANS(vpickve2gr_du, LSX, gen_v2g, MO_64, tcg_gen_ld_i64) +TRANS(xvpickve2gr_w, LASX, gen_x2g, MO_32, tcg_gen_ld32s_i64) +TRANS(xvpickve2gr_d, LASX, gen_x2g, MO_64, tcg_gen_ld_i64) +TRANS(xvpickve2gr_wu, LASX, gen_x2g, MO_32, tcg_gen_ld32u_i64) +TRANS(xvpickve2gr_du, LASX, gen_x2g, MO_64, tcg_gen_ld_i64) + +static bool gvec_dup_vl(DisasContext *ctx, arg_vr *a, + uint32_t oprsz, MemOp mop) +{ + TCGv src = gpr_src(ctx, a->rj, EXT_NONE); + + if (!check_vec(ctx, oprsz)) { + return true; + } + + tcg_gen_gvec_dup_i64(mop, vec_full_offset(a->vd), + oprsz, ctx->vl/8, src); + return true; +} + +static bool gvec_dup(DisasContext *ctx, arg_vr *a, MemOp mop) +{ + return gvec_dup_vl(ctx, a, 16, mop); +} + +static bool gvec_dupx(DisasContext *ctx, arg_vr *a, MemOp mop) +{ + return gvec_dup_vl(ctx, a, 32, mop); +} + +TRANS(vreplgr2vr_b, LSX, gvec_dup, MO_8) +TRANS(vreplgr2vr_h, LSX, gvec_dup, MO_16) +TRANS(vreplgr2vr_w, LSX, gvec_dup, MO_32) +TRANS(vreplgr2vr_d, LSX, gvec_dup, MO_64) +TRANS(xvreplgr2vr_b, LASX, gvec_dupx, MO_8) +TRANS(xvreplgr2vr_h, LASX, gvec_dupx, MO_16) +TRANS(xvreplgr2vr_w, LASX, gvec_dupx, MO_32) +TRANS(xvreplgr2vr_d, LASX, gvec_dupx, MO_64) + +static bool trans_vreplvei_b(DisasContext *ctx, arg_vv_i *a) +{ + if (!avail_LSX(ctx)) { + return false; + } + + if (!check_vec(ctx, 16)) { + return true; + } + + tcg_gen_gvec_dup_mem(MO_8,vec_full_offset(a->vd), + offsetof(CPULoongArchState, + fpr[a->vj].vreg.B((a->imm))), + 16, ctx->vl/8); + return true; +} + +static bool trans_vreplvei_h(DisasContext *ctx, arg_vv_i *a) +{ + if (!avail_LSX(ctx)) { + return false; + } + + if (!check_vec(ctx, 16)) { + return true; + } + + tcg_gen_gvec_dup_mem(MO_16, vec_full_offset(a->vd), + offsetof(CPULoongArchState, + fpr[a->vj].vreg.H((a->imm))), + 16, ctx->vl/8); + return true; +} +static bool trans_vreplvei_w(DisasContext *ctx, arg_vv_i *a) +{ + if (!avail_LSX(ctx)) { + return false; + } + + if (!check_vec(ctx, 16)) { + return true; + } + + tcg_gen_gvec_dup_mem(MO_32, vec_full_offset(a->vd), + offsetof(CPULoongArchState, + fpr[a->vj].vreg.W((a->imm))), + 16, ctx->vl/8); + return true; +} +static bool trans_vreplvei_d(DisasContext *ctx, arg_vv_i *a) +{ + if (!avail_LSX(ctx)) { + return false; + } + + if (!check_vec(ctx, 16)) { + return true; + } + + tcg_gen_gvec_dup_mem(MO_64, vec_full_offset(a->vd), + offsetof(CPULoongArchState, + fpr[a->vj].vreg.D((a->imm))), + 16, ctx->vl/8); + return true; +} + +static bool gen_vreplve_vl(DisasContext *ctx, arg_vvr *a, + uint32_t oprsz, int vece, int bit, + void (*func)(TCGv_i64, TCGv_ptr, tcg_target_long)) +{ + int i; + TCGv_i64 t0 = tcg_temp_new_i64(); + TCGv_ptr t1 = tcg_temp_new_ptr(); + TCGv_i64 t2 = tcg_temp_new_i64(); + + if (!check_vec(ctx, oprsz)) { + return true; + } + + tcg_gen_andi_i64(t0, gpr_src(ctx, a->rk, EXT_NONE), (LSX_LEN / bit) - 1); + tcg_gen_shli_i64(t0, t0, vece); + if (HOST_BIG_ENDIAN) { + tcg_gen_xori_i64(t0, t0, vece << ((LSX_LEN / bit) - 1)); + } + + tcg_gen_trunc_i64_ptr(t1, t0); + tcg_gen_add_ptr(t1, t1, tcg_env); + + for (i = 0; i < oprsz; i += 16) { + func(t2, t1, vec_full_offset(a->vj) + i); + tcg_gen_gvec_dup_i64(vece, vec_full_offset(a->vd) + i, 16, 16, t2); + } + + return true; +} + +static bool gen_vreplve(DisasContext *ctx, arg_vvr *a, int vece, int bit, + void (*func)(TCGv_i64, TCGv_ptr, tcg_target_long)) +{ + return gen_vreplve_vl(ctx, a, 16, vece, bit, func); +} + +static bool gen_xvreplve(DisasContext *ctx, arg_vvr *a, int vece, int bit, + void (*func)(TCGv_i64, TCGv_ptr, tcg_target_long)) +{ + return gen_vreplve_vl(ctx, a, 32, vece, bit, func); +} + +TRANS(vreplve_b, LSX, gen_vreplve, MO_8, 8, tcg_gen_ld8u_i64) +TRANS(vreplve_h, LSX, gen_vreplve, MO_16, 16, tcg_gen_ld16u_i64) +TRANS(vreplve_w, LSX, gen_vreplve, MO_32, 32, tcg_gen_ld32u_i64) +TRANS(vreplve_d, LSX, gen_vreplve, MO_64, 64, tcg_gen_ld_i64) +TRANS(xvreplve_b, LASX, gen_xvreplve, MO_8, 8, tcg_gen_ld8u_i64) +TRANS(xvreplve_h, LASX, gen_xvreplve, MO_16, 16, tcg_gen_ld16u_i64) +TRANS(xvreplve_w, LASX, gen_xvreplve, MO_32, 32, tcg_gen_ld32u_i64) +TRANS(xvreplve_d, LASX, gen_xvreplve, MO_64, 64, tcg_gen_ld_i64) + +static bool gen_xvrepl128(DisasContext *ctx, arg_vv_i *a, MemOp mop) +{ + int i; + + if (!check_vec(ctx, 32)) { + return true; + } + + for (i = 0; i < 32; i += 16) { + tcg_gen_gvec_dup_mem(mop, vec_full_offset(a->vd) + i, + vec_reg_offset(a->vj, a->imm, mop) + i, 16, 16); + + } + return true; +} + +TRANS(xvrepl128vei_b, LASX, gen_xvrepl128, MO_8) +TRANS(xvrepl128vei_h, LASX, gen_xvrepl128, MO_16) +TRANS(xvrepl128vei_w, LASX, gen_xvrepl128, MO_32) +TRANS(xvrepl128vei_d, LASX, gen_xvrepl128, MO_64) + +static bool gen_xvreplve0(DisasContext *ctx, arg_vv *a, MemOp mop) +{ + if (!check_vec(ctx, 32)) { + return true; + } + + tcg_gen_gvec_dup_mem(mop, vec_full_offset(a->vd), + vec_full_offset(a->vj), 32, 32); + return true; +} + +TRANS(xvreplve0_b, LASX, gen_xvreplve0, MO_8) +TRANS(xvreplve0_h, LASX, gen_xvreplve0, MO_16) +TRANS(xvreplve0_w, LASX, gen_xvreplve0, MO_32) +TRANS(xvreplve0_d, LASX, gen_xvreplve0, MO_64) +TRANS(xvreplve0_q, LASX, gen_xvreplve0, MO_128) + +TRANS(xvinsve0_w, LASX, gen_xx_i, gen_helper_xvinsve0_w) +TRANS(xvinsve0_d, LASX, gen_xx_i, gen_helper_xvinsve0_d) + +TRANS(xvpickve_w, LASX, gen_xx_i, gen_helper_xvpickve_w) +TRANS(xvpickve_d, LASX, gen_xx_i, gen_helper_xvpickve_d) + +static bool do_vbsll_v(DisasContext *ctx, arg_vv_i *a, uint32_t oprsz) +{ + int i, ofs; + + if (!check_vec(ctx, oprsz)) { + return true; + } + + for (i = 0; i < oprsz / 16; i++) { + TCGv desthigh = tcg_temp_new_i64(); + TCGv destlow = tcg_temp_new_i64(); + TCGv high = tcg_temp_new_i64(); + TCGv low = tcg_temp_new_i64(); + + get_vreg64(low, a->vj, 2 * i); + + ofs = ((a->imm) & 0xf) * 8; + if (ofs < 64) { + get_vreg64(high, a->vj, 2 * i + 1); + tcg_gen_extract2_i64(desthigh, low, high, 64 - ofs); + tcg_gen_shli_i64(destlow, low, ofs); + } else { + tcg_gen_shli_i64(desthigh, low, ofs - 64); + destlow = tcg_constant_i64(0); + } + set_vreg64(desthigh, a->vd, 2 * i + 1); + set_vreg64(destlow, a->vd, 2 * i); + } + + return true; +} + +static bool do_vbsrl_v(DisasContext *ctx, arg_vv_i *a, uint32_t oprsz) +{ + int i, ofs; + + if (!check_vec(ctx, 32)) { + return true; + } + + for (i = 0; i < oprsz / 16; i++) { + TCGv desthigh = tcg_temp_new_i64(); + TCGv destlow = tcg_temp_new_i64(); + TCGv high = tcg_temp_new_i64(); + TCGv low = tcg_temp_new_i64(); + get_vreg64(high, a->vj, 2 * i + 1); + + ofs = ((a->imm) & 0xf) * 8; + if (ofs < 64) { + get_vreg64(low, a->vj, 2 * i); + tcg_gen_extract2_i64(destlow, low, high, ofs); + tcg_gen_shri_i64(desthigh, high, ofs); + } else { + tcg_gen_shri_i64(destlow, high, ofs - 64); + desthigh = tcg_constant_i64(0); + } + set_vreg64(desthigh, a->vd, 2 * i + 1); + set_vreg64(destlow, a->vd, 2 * i); + } + + return true; +} + +TRANS(vbsll_v, LSX, do_vbsll_v, 16) +TRANS(vbsrl_v, LSX, do_vbsrl_v, 16) +TRANS(xvbsll_v, LASX, do_vbsll_v, 32) +TRANS(xvbsrl_v, LASX, do_vbsrl_v, 32) + +TRANS(vpackev_b, LSX, gen_vvv, gen_helper_vpackev_b) +TRANS(vpackev_h, LSX, gen_vvv, gen_helper_vpackev_h) +TRANS(vpackev_w, LSX, gen_vvv, gen_helper_vpackev_w) +TRANS(vpackev_d, LSX, gen_vvv, gen_helper_vpackev_d) +TRANS(vpackod_b, LSX, gen_vvv, gen_helper_vpackod_b) +TRANS(vpackod_h, LSX, gen_vvv, gen_helper_vpackod_h) +TRANS(vpackod_w, LSX, gen_vvv, gen_helper_vpackod_w) +TRANS(vpackod_d, LSX, gen_vvv, gen_helper_vpackod_d) +TRANS(xvpackev_b, LASX, gen_xxx, gen_helper_vpackev_b) +TRANS(xvpackev_h, LASX, gen_xxx, gen_helper_vpackev_h) +TRANS(xvpackev_w, LASX, gen_xxx, gen_helper_vpackev_w) +TRANS(xvpackev_d, LASX, gen_xxx, gen_helper_vpackev_d) +TRANS(xvpackod_b, LASX, gen_xxx, gen_helper_vpackod_b) +TRANS(xvpackod_h, LASX, gen_xxx, gen_helper_vpackod_h) +TRANS(xvpackod_w, LASX, gen_xxx, gen_helper_vpackod_w) +TRANS(xvpackod_d, LASX, gen_xxx, gen_helper_vpackod_d) + +TRANS(vpickev_b, LSX, gen_vvv, gen_helper_vpickev_b) +TRANS(vpickev_h, LSX, gen_vvv, gen_helper_vpickev_h) +TRANS(vpickev_w, LSX, gen_vvv, gen_helper_vpickev_w) +TRANS(vpickev_d, LSX, gen_vvv, gen_helper_vpickev_d) +TRANS(vpickod_b, LSX, gen_vvv, gen_helper_vpickod_b) +TRANS(vpickod_h, LSX, gen_vvv, gen_helper_vpickod_h) +TRANS(vpickod_w, LSX, gen_vvv, gen_helper_vpickod_w) +TRANS(vpickod_d, LSX, gen_vvv, gen_helper_vpickod_d) +TRANS(xvpickev_b, LASX, gen_xxx, gen_helper_vpickev_b) +TRANS(xvpickev_h, LASX, gen_xxx, gen_helper_vpickev_h) +TRANS(xvpickev_w, LASX, gen_xxx, gen_helper_vpickev_w) +TRANS(xvpickev_d, LASX, gen_xxx, gen_helper_vpickev_d) +TRANS(xvpickod_b, LASX, gen_xxx, gen_helper_vpickod_b) +TRANS(xvpickod_h, LASX, gen_xxx, gen_helper_vpickod_h) +TRANS(xvpickod_w, LASX, gen_xxx, gen_helper_vpickod_w) +TRANS(xvpickod_d, LASX, gen_xxx, gen_helper_vpickod_d) + +TRANS(vilvl_b, LSX, gen_vvv, gen_helper_vilvl_b) +TRANS(vilvl_h, LSX, gen_vvv, gen_helper_vilvl_h) +TRANS(vilvl_w, LSX, gen_vvv, gen_helper_vilvl_w) +TRANS(vilvl_d, LSX, gen_vvv, gen_helper_vilvl_d) +TRANS(vilvh_b, LSX, gen_vvv, gen_helper_vilvh_b) +TRANS(vilvh_h, LSX, gen_vvv, gen_helper_vilvh_h) +TRANS(vilvh_w, LSX, gen_vvv, gen_helper_vilvh_w) +TRANS(vilvh_d, LSX, gen_vvv, gen_helper_vilvh_d) +TRANS(xvilvl_b, LASX, gen_xxx, gen_helper_vilvl_b) +TRANS(xvilvl_h, LASX, gen_xxx, gen_helper_vilvl_h) +TRANS(xvilvl_w, LASX, gen_xxx, gen_helper_vilvl_w) +TRANS(xvilvl_d, LASX, gen_xxx, gen_helper_vilvl_d) +TRANS(xvilvh_b, LASX, gen_xxx, gen_helper_vilvh_b) +TRANS(xvilvh_h, LASX, gen_xxx, gen_helper_vilvh_h) +TRANS(xvilvh_w, LASX, gen_xxx, gen_helper_vilvh_w) +TRANS(xvilvh_d, LASX, gen_xxx, gen_helper_vilvh_d) + +TRANS(vshuf_b, LSX, gen_vvvv, gen_helper_vshuf_b) +TRANS(vshuf_h, LSX, gen_vvv, gen_helper_vshuf_h) +TRANS(vshuf_w, LSX, gen_vvv, gen_helper_vshuf_w) +TRANS(vshuf_d, LSX, gen_vvv, gen_helper_vshuf_d) +TRANS(xvshuf_b, LASX, gen_xxxx, gen_helper_vshuf_b) +TRANS(xvshuf_h, LASX, gen_xxx, gen_helper_vshuf_h) +TRANS(xvshuf_w, LASX, gen_xxx, gen_helper_vshuf_w) +TRANS(xvshuf_d, LASX, gen_xxx, gen_helper_vshuf_d) +TRANS(vshuf4i_b, LSX, gen_vv_i, gen_helper_vshuf4i_b) +TRANS(vshuf4i_h, LSX, gen_vv_i, gen_helper_vshuf4i_h) +TRANS(vshuf4i_w, LSX, gen_vv_i, gen_helper_vshuf4i_w) +TRANS(vshuf4i_d, LSX, gen_vv_i, gen_helper_vshuf4i_d) +TRANS(xvshuf4i_b, LASX, gen_xx_i, gen_helper_vshuf4i_b) +TRANS(xvshuf4i_h, LASX, gen_xx_i, gen_helper_vshuf4i_h) +TRANS(xvshuf4i_w, LASX, gen_xx_i, gen_helper_vshuf4i_w) +TRANS(xvshuf4i_d, LASX, gen_xx_i, gen_helper_vshuf4i_d) + +TRANS(xvperm_w, LASX, gen_xxx, gen_helper_vperm_w) +TRANS(vpermi_w, LSX, gen_vv_i, gen_helper_vpermi_w) +TRANS(xvpermi_w, LASX, gen_xx_i, gen_helper_vpermi_w) +TRANS(xvpermi_d, LASX, gen_xx_i, gen_helper_vpermi_d) +TRANS(xvpermi_q, LASX, gen_xx_i, gen_helper_vpermi_q) + +TRANS(vextrins_b, LSX, gen_vv_i, gen_helper_vextrins_b) +TRANS(vextrins_h, LSX, gen_vv_i, gen_helper_vextrins_h) +TRANS(vextrins_w, LSX, gen_vv_i, gen_helper_vextrins_w) +TRANS(vextrins_d, LSX, gen_vv_i, gen_helper_vextrins_d) +TRANS(xvextrins_b, LASX, gen_xx_i, gen_helper_vextrins_b) +TRANS(xvextrins_h, LASX, gen_xx_i, gen_helper_vextrins_h) +TRANS(xvextrins_w, LASX, gen_xx_i, gen_helper_vextrins_w) +TRANS(xvextrins_d, LASX, gen_xx_i, gen_helper_vextrins_d) + +static bool trans_vld(DisasContext *ctx, arg_vr_i *a) +{ + TCGv addr; + TCGv_i64 rl, rh; + TCGv_i128 val; + + if (!avail_LSX(ctx)) { + return false; + } + + if (!check_vec(ctx, 16)) { + return true; + } + + addr = gpr_src(ctx, a->rj, EXT_NONE); + val = tcg_temp_new_i128(); + rl = tcg_temp_new_i64(); + rh = tcg_temp_new_i64(); + + addr = make_address_i(ctx, addr, a->imm); + + tcg_gen_qemu_ld_i128(val, addr, ctx->mem_idx, MO_128 | MO_TE); + tcg_gen_extr_i128_i64(rl, rh, val); + set_vreg64(rh, a->vd, 1); + set_vreg64(rl, a->vd, 0); + + return true; +} + +static bool trans_vst(DisasContext *ctx, arg_vr_i *a) +{ + TCGv addr; + TCGv_i128 val; + TCGv_i64 ah, al; + + if (!avail_LSX(ctx)) { + return false; + } + + if (!check_vec(ctx, 16)) { + return true; + } + + addr = gpr_src(ctx, a->rj, EXT_NONE); + val = tcg_temp_new_i128(); + ah = tcg_temp_new_i64(); + al = tcg_temp_new_i64(); + + addr = make_address_i(ctx, addr, a->imm); + + get_vreg64(ah, a->vd, 1); + get_vreg64(al, a->vd, 0); + tcg_gen_concat_i64_i128(val, al, ah); + tcg_gen_qemu_st_i128(val, addr, ctx->mem_idx, MO_128 | MO_TE); + + return true; +} + +static bool trans_vldx(DisasContext *ctx, arg_vrr *a) +{ + TCGv addr, src1, src2; + TCGv_i64 rl, rh; + TCGv_i128 val; + + if (!avail_LSX(ctx)) { + return false; + } + + if (!check_vec(ctx, 16)) { + return true; + } + + src1 = gpr_src(ctx, a->rj, EXT_NONE); + src2 = gpr_src(ctx, a->rk, EXT_NONE); + val = tcg_temp_new_i128(); + rl = tcg_temp_new_i64(); + rh = tcg_temp_new_i64(); + + addr = make_address_x(ctx, src1, src2); + tcg_gen_qemu_ld_i128(val, addr, ctx->mem_idx, MO_128 | MO_TE); + tcg_gen_extr_i128_i64(rl, rh, val); + set_vreg64(rh, a->vd, 1); + set_vreg64(rl, a->vd, 0); + + return true; +} + +static bool trans_vstx(DisasContext *ctx, arg_vrr *a) +{ + TCGv addr, src1, src2; + TCGv_i64 ah, al; + TCGv_i128 val; + + if (!avail_LSX(ctx)) { + return false; + } + + if (!check_vec(ctx, 16)) { + return true; + } + + src1 = gpr_src(ctx, a->rj, EXT_NONE); + src2 = gpr_src(ctx, a->rk, EXT_NONE); + val = tcg_temp_new_i128(); + ah = tcg_temp_new_i64(); + al = tcg_temp_new_i64(); + + addr = make_address_x(ctx, src1, src2); + get_vreg64(ah, a->vd, 1); + get_vreg64(al, a->vd, 0); + tcg_gen_concat_i64_i128(val, al, ah); + tcg_gen_qemu_st_i128(val, addr, ctx->mem_idx, MO_128 | MO_TE); + + return true; +} + +static bool do_vldrepl_vl(DisasContext *ctx, arg_vr_i *a, + uint32_t oprsz, MemOp mop) +{ + TCGv addr; + TCGv_i64 val; + + if (!check_vec(ctx, oprsz)) { + return true; + } + + addr = gpr_src(ctx, a->rj, EXT_NONE); + val = tcg_temp_new_i64(); + + addr = make_address_i(ctx, addr, a->imm); + + tcg_gen_qemu_ld_i64(val, addr, ctx->mem_idx, mop); + tcg_gen_gvec_dup_i64(mop, vec_full_offset(a->vd), oprsz, ctx->vl / 8, val); + + return true; +} + +static bool do_vldrepl(DisasContext *ctx, arg_vr_i *a, MemOp mop) +{ + return do_vldrepl_vl(ctx, a, 16, mop); +} + +static bool do_xvldrepl(DisasContext *ctx, arg_vr_i *a, MemOp mop) +{ + return do_vldrepl_vl(ctx, a, 32, mop); +} + +TRANS(vldrepl_b, LSX, do_vldrepl, MO_8) +TRANS(vldrepl_h, LSX, do_vldrepl, MO_16) +TRANS(vldrepl_w, LSX, do_vldrepl, MO_32) +TRANS(vldrepl_d, LSX, do_vldrepl, MO_64) +TRANS(xvldrepl_b, LASX, do_xvldrepl, MO_8) +TRANS(xvldrepl_h, LASX, do_xvldrepl, MO_16) +TRANS(xvldrepl_w, LASX, do_xvldrepl, MO_32) +TRANS(xvldrepl_d, LASX, do_xvldrepl, MO_64) + +static bool do_vstelm_vl(DisasContext *ctx, + arg_vr_ii *a, uint32_t oprsz, MemOp mop) +{ + TCGv addr; + TCGv_i64 val; + + if (!check_vec(ctx, oprsz)) { + return true; + } + + addr = gpr_src(ctx, a->rj, EXT_NONE); + val = tcg_temp_new_i64(); + + addr = make_address_i(ctx, addr, a->imm); + tcg_gen_ld_i64(val, tcg_env, vec_reg_offset(a->vd, a->imm2, mop)); + tcg_gen_qemu_st_i64(val, addr, ctx->mem_idx, mop); + return true; +} + +static bool do_vstelm(DisasContext *ctx, arg_vr_ii *a, MemOp mop) +{ + return do_vstelm_vl(ctx, a, 16, mop); +} + +static bool do_xvstelm(DisasContext *ctx, arg_vr_ii *a, MemOp mop) +{ + return do_vstelm_vl(ctx, a, 32, mop); +} + +TRANS(vstelm_b, LSX, do_vstelm, MO_8) +TRANS(vstelm_h, LSX, do_vstelm, MO_16) +TRANS(vstelm_w, LSX, do_vstelm, MO_32) +TRANS(vstelm_d, LSX, do_vstelm, MO_64) +TRANS(xvstelm_b, LASX, do_xvstelm, MO_8) +TRANS(xvstelm_h, LASX, do_xvstelm, MO_16) +TRANS(xvstelm_w, LASX, do_xvstelm, MO_32) +TRANS(xvstelm_d, LASX, do_xvstelm, MO_64) + +static bool gen_lasx_memory(DisasContext *ctx, arg_vr_i *a, + void (*func)(DisasContext *, int, TCGv)) +{ + TCGv addr = gpr_src(ctx, a->rj, EXT_NONE); + TCGv temp = NULL; + + if (!check_vec(ctx, 32)) { + return true; + } + + if (a->imm) { + temp = tcg_temp_new(); + tcg_gen_addi_tl(temp, addr, a->imm); + addr = temp; + } + + func(ctx, a->vd, addr); + return true; +} + +static void gen_xvld(DisasContext *ctx, int vreg, TCGv addr) +{ + int i; + TCGv temp = tcg_temp_new(); + TCGv dest = tcg_temp_new(); + + tcg_gen_qemu_ld_i64(dest, addr, ctx->mem_idx, MO_TEUQ); + set_vreg64(dest, vreg, 0); + + for (i = 1; i < 4; i++) { + tcg_gen_addi_tl(temp, addr, 8 * i); + tcg_gen_qemu_ld_i64(dest, temp, ctx->mem_idx, MO_TEUQ); + set_vreg64(dest, vreg, i); + } +} + +static void gen_xvst(DisasContext * ctx, int vreg, TCGv addr) +{ + int i; + TCGv temp = tcg_temp_new(); + TCGv dest = tcg_temp_new(); + + get_vreg64(dest, vreg, 0); + tcg_gen_qemu_st_i64(dest, addr, ctx->mem_idx, MO_TEUQ); + + for (i = 1; i < 4; i++) { + tcg_gen_addi_tl(temp, addr, 8 * i); + get_vreg64(dest, vreg, i); + tcg_gen_qemu_st_i64(dest, temp, ctx->mem_idx, MO_TEUQ); + } +} + +TRANS(xvld, LASX, gen_lasx_memory, gen_xvld) +TRANS(xvst, LASX, gen_lasx_memory, gen_xvst) + +static bool gen_lasx_memoryx(DisasContext *ctx, arg_vrr *a, + void (*func)(DisasContext*, int, TCGv)) +{ + TCGv src1 = gpr_src(ctx, a->rj, EXT_NONE); + TCGv src2 = gpr_src(ctx, a->rk, EXT_NONE); + TCGv addr = tcg_temp_new(); + + if (!check_vec(ctx, 32)) { + return true; + } + + tcg_gen_add_tl(addr, src1, src2); + func(ctx, a->vd, addr); + + return true; +} + +TRANS(xvldx, LASX, gen_lasx_memoryx, gen_xvld) +TRANS(xvstx, LASX, gen_lasx_memoryx, gen_xvst) diff --git a/target/loongarch/tcg/iocsr_helper.c b/target/loongarch/tcg/iocsr_helper.c new file mode 100644 index 0000000000..6cd01d5f09 --- /dev/null +++ b/target/loongarch/tcg/iocsr_helper.c @@ -0,0 +1,68 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +/* + * Copyright (c) 2021 Loongson Technology Corporation Limited + * + * Helpers for IOCSR reads/writes + */ + +#include "qemu/osdep.h" +#include "cpu.h" +#include "qemu/host-utils.h" +#include "exec/helper-proto.h" +#include "exec/exec-all.h" +#include "exec/cpu_ldst.h" + +#define GET_MEMTXATTRS(cas) \ + ((MemTxAttrs){.requester_id = env_cpu(cas)->cpu_index}) + +uint64_t helper_iocsrrd_b(CPULoongArchState *env, target_ulong r_addr) +{ + return address_space_ldub(&env->address_space_iocsr, r_addr, + GET_MEMTXATTRS(env), NULL); +} + +uint64_t helper_iocsrrd_h(CPULoongArchState *env, target_ulong r_addr) +{ + return address_space_lduw(&env->address_space_iocsr, r_addr, + GET_MEMTXATTRS(env), NULL); +} + +uint64_t helper_iocsrrd_w(CPULoongArchState *env, target_ulong r_addr) +{ + return address_space_ldl(&env->address_space_iocsr, r_addr, + GET_MEMTXATTRS(env), NULL); +} + +uint64_t helper_iocsrrd_d(CPULoongArchState *env, target_ulong r_addr) +{ + return address_space_ldq(&env->address_space_iocsr, r_addr, + GET_MEMTXATTRS(env), NULL); +} + +void helper_iocsrwr_b(CPULoongArchState *env, target_ulong w_addr, + target_ulong val) +{ + address_space_stb(&env->address_space_iocsr, w_addr, + val, GET_MEMTXATTRS(env), NULL); +} + +void helper_iocsrwr_h(CPULoongArchState *env, target_ulong w_addr, + target_ulong val) +{ + address_space_stw(&env->address_space_iocsr, w_addr, + val, GET_MEMTXATTRS(env), NULL); +} + +void helper_iocsrwr_w(CPULoongArchState *env, target_ulong w_addr, + target_ulong val) +{ + address_space_stl(&env->address_space_iocsr, w_addr, + val, GET_MEMTXATTRS(env), NULL); +} + +void helper_iocsrwr_d(CPULoongArchState *env, target_ulong w_addr, + target_ulong val) +{ + address_space_stq(&env->address_space_iocsr, w_addr, + val, GET_MEMTXATTRS(env), NULL); +} diff --git a/target/loongarch/tcg/meson.build b/target/loongarch/tcg/meson.build new file mode 100644 index 0000000000..bdf34f9673 --- /dev/null +++ b/target/loongarch/tcg/meson.build @@ -0,0 +1,19 @@ +if 'CONFIG_TCG' not in config_all_accel + subdir_done() +endif + +loongarch_ss.add([zlib, gen]) + +loongarch_ss.add(files( + 'fpu_helper.c', + 'op_helper.c', + 'translate.c', + 'vec_helper.c', +)) + +loongarch_system_ss.add(files( + 'constant_timer.c', + 'csr_helper.c', + 'iocsr_helper.c', + 'tlb_helper.c', +)) diff --git a/target/loongarch/tcg/op_helper.c b/target/loongarch/tcg/op_helper.c new file mode 100644 index 0000000000..fe79c62fa4 --- /dev/null +++ b/target/loongarch/tcg/op_helper.c @@ -0,0 +1,140 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +/* + * LoongArch emulation helpers for QEMU. + * + * Copyright (c) 2021 Loongson Technology Corporation Limited + */ + +#include "qemu/osdep.h" +#include "qemu/log.h" +#include "cpu.h" +#include "qemu/host-utils.h" +#include "exec/helper-proto.h" +#include "exec/exec-all.h" +#include "exec/cpu_ldst.h" +#include "internals.h" +#include "qemu/crc32c.h" +#include +#include "cpu-csr.h" + +/* Exceptions helpers */ +void helper_raise_exception(CPULoongArchState *env, uint32_t exception) +{ + do_raise_exception(env, exception, GETPC()); +} + +target_ulong helper_bitrev_w(target_ulong rj) +{ + return (int32_t)revbit32(rj); +} + +target_ulong helper_bitrev_d(target_ulong rj) +{ + return revbit64(rj); +} + +target_ulong helper_bitswap(target_ulong v) +{ + v = ((v >> 1) & (target_ulong)0x5555555555555555ULL) | + ((v & (target_ulong)0x5555555555555555ULL) << 1); + v = ((v >> 2) & (target_ulong)0x3333333333333333ULL) | + ((v & (target_ulong)0x3333333333333333ULL) << 2); + v = ((v >> 4) & (target_ulong)0x0F0F0F0F0F0F0F0FULL) | + ((v & (target_ulong)0x0F0F0F0F0F0F0F0FULL) << 4); + return v; +} + +/* loongarch assert op */ +void helper_asrtle_d(CPULoongArchState *env, target_ulong rj, target_ulong rk) +{ + if (rj > rk) { + env->CSR_BADV = rj; + do_raise_exception(env, EXCCODE_BCE, GETPC()); + } +} + +void helper_asrtgt_d(CPULoongArchState *env, target_ulong rj, target_ulong rk) +{ + if (rj <= rk) { + env->CSR_BADV = rj; + do_raise_exception(env, EXCCODE_BCE, GETPC()); + } +} + +target_ulong helper_crc32(target_ulong val, target_ulong m, uint64_t sz) +{ + uint8_t buf[8]; + target_ulong mask = ((sz * 8) == 64) ? -1ULL : ((1ULL << (sz * 8)) - 1); + + m &= mask; + stq_le_p(buf, m); + return (int32_t) (crc32(val ^ 0xffffffff, buf, sz) ^ 0xffffffff); +} + +target_ulong helper_crc32c(target_ulong val, target_ulong m, uint64_t sz) +{ + uint8_t buf[8]; + target_ulong mask = ((sz * 8) == 64) ? -1ULL : ((1ULL << (sz * 8)) - 1); + m &= mask; + stq_le_p(buf, m); + return (int32_t) (crc32c(val, buf, sz) ^ 0xffffffff); +} + +target_ulong helper_cpucfg(CPULoongArchState *env, target_ulong rj) +{ + return rj >= ARRAY_SIZE(env->cpucfg) ? 0 : env->cpucfg[rj]; +} + +uint64_t helper_rdtime_d(CPULoongArchState *env) +{ +#ifdef CONFIG_USER_ONLY + return cpu_get_host_ticks(); +#else + uint64_t plv; + LoongArchCPU *cpu = env_archcpu(env); + + plv = FIELD_EX64(env->CSR_CRMD, CSR_CRMD, PLV); + if (extract64(env->CSR_MISC, R_CSR_MISC_DRDTL_SHIFT + plv, 1)) { + do_raise_exception(env, EXCCODE_IPE, GETPC()); + } + + return cpu_loongarch_get_constant_timer_counter(cpu); +#endif +} + +#ifndef CONFIG_USER_ONLY +void helper_ertn(CPULoongArchState *env) +{ + uint64_t csr_pplv, csr_pie; + if (FIELD_EX64(env->CSR_TLBRERA, CSR_TLBRERA, ISTLBR)) { + csr_pplv = FIELD_EX64(env->CSR_TLBRPRMD, CSR_TLBRPRMD, PPLV); + csr_pie = FIELD_EX64(env->CSR_TLBRPRMD, CSR_TLBRPRMD, PIE); + + env->CSR_TLBRERA = FIELD_DP64(env->CSR_TLBRERA, CSR_TLBRERA, ISTLBR, 0); + env->CSR_CRMD = FIELD_DP64(env->CSR_CRMD, CSR_CRMD, DA, 0); + env->CSR_CRMD = FIELD_DP64(env->CSR_CRMD, CSR_CRMD, PG, 1); + set_pc(env, env->CSR_TLBRERA); + qemu_log_mask(CPU_LOG_INT, "%s: TLBRERA " TARGET_FMT_lx "\n", + __func__, env->CSR_TLBRERA); + } else { + csr_pplv = FIELD_EX64(env->CSR_PRMD, CSR_PRMD, PPLV); + csr_pie = FIELD_EX64(env->CSR_PRMD, CSR_PRMD, PIE); + + set_pc(env, env->CSR_ERA); + qemu_log_mask(CPU_LOG_INT, "%s: ERA " TARGET_FMT_lx "\n", + __func__, env->CSR_ERA); + } + env->CSR_CRMD = FIELD_DP64(env->CSR_CRMD, CSR_CRMD, PLV, csr_pplv); + env->CSR_CRMD = FIELD_DP64(env->CSR_CRMD, CSR_CRMD, IE, csr_pie); + + env->lladdr = 1; +} + +void helper_idle(CPULoongArchState *env) +{ + CPUState *cs = env_cpu(env); + + cs->halted = 1; + do_raise_exception(env, EXCP_HLT, 0); +} +#endif diff --git a/target/loongarch/tcg/tlb_helper.c b/target/loongarch/tcg/tlb_helper.c new file mode 100644 index 0000000000..449043c68b --- /dev/null +++ b/target/loongarch/tcg/tlb_helper.c @@ -0,0 +1,803 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +/* + * QEMU LoongArch TLB helpers + * + * Copyright (c) 2021 Loongson Technology Corporation Limited + * + */ + +#include "qemu/osdep.h" +#include "qemu/guest-random.h" + +#include "cpu.h" +#include "internals.h" +#include "exec/helper-proto.h" +#include "exec/exec-all.h" +#include "exec/cpu_ldst.h" +#include "exec/log.h" +#include "cpu-csr.h" + +enum { + TLBRET_MATCH = 0, + TLBRET_BADADDR = 1, + TLBRET_NOMATCH = 2, + TLBRET_INVALID = 3, + TLBRET_DIRTY = 4, + TLBRET_RI = 5, + TLBRET_XI = 6, + TLBRET_PE = 7, +}; + +static int loongarch_map_tlb_entry(CPULoongArchState *env, hwaddr *physical, + int *prot, target_ulong address, + int access_type, int index, int mmu_idx) +{ + LoongArchTLB *tlb = &env->tlb[index]; + uint64_t plv = mmu_idx; + uint64_t tlb_entry, tlb_ppn; + uint8_t tlb_ps, n, tlb_v, tlb_d, tlb_plv, tlb_nx, tlb_nr, tlb_rplv; + + if (index >= LOONGARCH_STLB) { + tlb_ps = FIELD_EX64(tlb->tlb_misc, TLB_MISC, PS); + } else { + tlb_ps = FIELD_EX64(env->CSR_STLBPS, CSR_STLBPS, PS); + } + n = (address >> tlb_ps) & 0x1;/* Odd or even */ + + tlb_entry = n ? tlb->tlb_entry1 : tlb->tlb_entry0; + tlb_v = FIELD_EX64(tlb_entry, TLBENTRY, V); + tlb_d = FIELD_EX64(tlb_entry, TLBENTRY, D); + tlb_plv = FIELD_EX64(tlb_entry, TLBENTRY, PLV); + if (is_la64(env)) { + tlb_ppn = FIELD_EX64(tlb_entry, TLBENTRY_64, PPN); + tlb_nx = FIELD_EX64(tlb_entry, TLBENTRY_64, NX); + tlb_nr = FIELD_EX64(tlb_entry, TLBENTRY_64, NR); + tlb_rplv = FIELD_EX64(tlb_entry, TLBENTRY_64, RPLV); + } else { + tlb_ppn = FIELD_EX64(tlb_entry, TLBENTRY_32, PPN); + tlb_nx = 0; + tlb_nr = 0; + tlb_rplv = 0; + } + + /* Remove sw bit between bit12 -- bit PS*/ + tlb_ppn = tlb_ppn & ~(((0x1UL << (tlb_ps - 12)) -1)); + + /* Check access rights */ + if (!tlb_v) { + return TLBRET_INVALID; + } + + if (access_type == MMU_INST_FETCH && tlb_nx) { + return TLBRET_XI; + } + + if (access_type == MMU_DATA_LOAD && tlb_nr) { + return TLBRET_RI; + } + + if (((tlb_rplv == 0) && (plv > tlb_plv)) || + ((tlb_rplv == 1) && (plv != tlb_plv))) { + return TLBRET_PE; + } + + if ((access_type == MMU_DATA_STORE) && !tlb_d) { + return TLBRET_DIRTY; + } + + *physical = (tlb_ppn << R_TLBENTRY_64_PPN_SHIFT) | + (address & MAKE_64BIT_MASK(0, tlb_ps)); + *prot = PAGE_READ; + if (tlb_d) { + *prot |= PAGE_WRITE; + } + if (!tlb_nx) { + *prot |= PAGE_EXEC; + } + return TLBRET_MATCH; +} + +/* + * One tlb entry holds an adjacent odd/even pair, the vpn is the + * content of the virtual page number divided by 2. So the + * compare vpn is bit[47:15] for 16KiB page. while the vppn + * field in tlb entry contains bit[47:13], so need adjust. + * virt_vpn = vaddr[47:13] + */ +static bool loongarch_tlb_search(CPULoongArchState *env, target_ulong vaddr, + int *index) +{ + LoongArchTLB *tlb; + uint16_t csr_asid, tlb_asid, stlb_idx; + uint8_t tlb_e, tlb_ps, tlb_g, stlb_ps; + int i, compare_shift; + uint64_t vpn, tlb_vppn; + + csr_asid = FIELD_EX64(env->CSR_ASID, CSR_ASID, ASID); + stlb_ps = FIELD_EX64(env->CSR_STLBPS, CSR_STLBPS, PS); + vpn = (vaddr & TARGET_VIRT_MASK) >> (stlb_ps + 1); + stlb_idx = vpn & 0xff; /* VA[25:15] <==> TLBIDX.index for 16KiB Page */ + compare_shift = stlb_ps + 1 - R_TLB_MISC_VPPN_SHIFT; + + /* Search STLB */ + for (i = 0; i < 8; ++i) { + tlb = &env->tlb[i * 256 + stlb_idx]; + tlb_e = FIELD_EX64(tlb->tlb_misc, TLB_MISC, E); + if (tlb_e) { + tlb_vppn = FIELD_EX64(tlb->tlb_misc, TLB_MISC, VPPN); + tlb_asid = FIELD_EX64(tlb->tlb_misc, TLB_MISC, ASID); + tlb_g = FIELD_EX64(tlb->tlb_entry0, TLBENTRY, G); + + if ((tlb_g == 1 || tlb_asid == csr_asid) && + (vpn == (tlb_vppn >> compare_shift))) { + *index = i * 256 + stlb_idx; + return true; + } + } + } + + /* Search MTLB */ + for (i = LOONGARCH_STLB; i < LOONGARCH_TLB_MAX; ++i) { + tlb = &env->tlb[i]; + tlb_e = FIELD_EX64(tlb->tlb_misc, TLB_MISC, E); + if (tlb_e) { + tlb_vppn = FIELD_EX64(tlb->tlb_misc, TLB_MISC, VPPN); + tlb_ps = FIELD_EX64(tlb->tlb_misc, TLB_MISC, PS); + tlb_asid = FIELD_EX64(tlb->tlb_misc, TLB_MISC, ASID); + tlb_g = FIELD_EX64(tlb->tlb_entry0, TLBENTRY, G); + compare_shift = tlb_ps + 1 - R_TLB_MISC_VPPN_SHIFT; + vpn = (vaddr & TARGET_VIRT_MASK) >> (tlb_ps + 1); + if ((tlb_g == 1 || tlb_asid == csr_asid) && + (vpn == (tlb_vppn >> compare_shift))) { + *index = i; + return true; + } + } + } + return false; +} + +static int loongarch_map_address(CPULoongArchState *env, hwaddr *physical, + int *prot, target_ulong address, + MMUAccessType access_type, int mmu_idx) +{ + int index, match; + + match = loongarch_tlb_search(env, address, &index); + if (match) { + return loongarch_map_tlb_entry(env, physical, prot, + address, access_type, index, mmu_idx); + } + + return TLBRET_NOMATCH; +} + +static hwaddr dmw_va2pa(CPULoongArchState *env, target_ulong va, + target_ulong dmw) +{ + if (is_la64(env)) { + return va & TARGET_VIRT_MASK; + } else { + uint32_t pseg = FIELD_EX32(dmw, CSR_DMW_32, PSEG); + return (va & MAKE_64BIT_MASK(0, R_CSR_DMW_32_VSEG_SHIFT)) | \ + (pseg << R_CSR_DMW_32_VSEG_SHIFT); + } +} + +static int get_physical_address(CPULoongArchState *env, hwaddr *physical, + int *prot, target_ulong address, + MMUAccessType access_type, int mmu_idx) +{ + int user_mode = mmu_idx == MMU_IDX_USER; + int kernel_mode = mmu_idx == MMU_IDX_KERNEL; + uint32_t plv, base_c, base_v; + int64_t addr_high; + uint8_t da = FIELD_EX64(env->CSR_CRMD, CSR_CRMD, DA); + uint8_t pg = FIELD_EX64(env->CSR_CRMD, CSR_CRMD, PG); + + /* Check PG and DA */ + if (da & !pg) { + *physical = address & TARGET_PHYS_MASK; + *prot = PAGE_READ | PAGE_WRITE | PAGE_EXEC; + return TLBRET_MATCH; + } + + plv = kernel_mode | (user_mode << R_CSR_DMW_PLV3_SHIFT); + if (is_la64(env)) { + base_v = address >> R_CSR_DMW_64_VSEG_SHIFT; + } else { + base_v = address >> R_CSR_DMW_32_VSEG_SHIFT; + } + /* Check direct map window */ + for (int i = 0; i < 4; i++) { + if (is_la64(env)) { + base_c = FIELD_EX64(env->CSR_DMW[i], CSR_DMW_64, VSEG); + } else { + base_c = FIELD_EX64(env->CSR_DMW[i], CSR_DMW_32, VSEG); + } + if ((plv & env->CSR_DMW[i]) && (base_c == base_v)) { + *physical = dmw_va2pa(env, address, env->CSR_DMW[i]); + *prot = PAGE_READ | PAGE_WRITE | PAGE_EXEC; + return TLBRET_MATCH; + } + } + + /* Check valid extension */ + addr_high = sextract64(address, TARGET_VIRT_ADDR_SPACE_BITS, 16); + if (!(addr_high == 0 || addr_high == -1)) { + return TLBRET_BADADDR; + } + + /* Mapped address */ + return loongarch_map_address(env, physical, prot, address, + access_type, mmu_idx); +} + +hwaddr loongarch_cpu_get_phys_page_debug(CPUState *cs, vaddr addr) +{ + LoongArchCPU *cpu = LOONGARCH_CPU(cs); + CPULoongArchState *env = &cpu->env; + hwaddr phys_addr; + int prot; + + if (get_physical_address(env, &phys_addr, &prot, addr, MMU_DATA_LOAD, + cpu_mmu_index(env, false)) != 0) { + return -1; + } + return phys_addr; +} + +static void raise_mmu_exception(CPULoongArchState *env, target_ulong address, + MMUAccessType access_type, int tlb_error) +{ + CPUState *cs = env_cpu(env); + + switch (tlb_error) { + default: + case TLBRET_BADADDR: + cs->exception_index = access_type == MMU_INST_FETCH + ? EXCCODE_ADEF : EXCCODE_ADEM; + break; + case TLBRET_NOMATCH: + /* No TLB match for a mapped address */ + if (access_type == MMU_DATA_LOAD) { + cs->exception_index = EXCCODE_PIL; + } else if (access_type == MMU_DATA_STORE) { + cs->exception_index = EXCCODE_PIS; + } else if (access_type == MMU_INST_FETCH) { + cs->exception_index = EXCCODE_PIF; + } + env->CSR_TLBRERA = FIELD_DP64(env->CSR_TLBRERA, CSR_TLBRERA, ISTLBR, 1); + break; + case TLBRET_INVALID: + /* TLB match with no valid bit */ + if (access_type == MMU_DATA_LOAD) { + cs->exception_index = EXCCODE_PIL; + } else if (access_type == MMU_DATA_STORE) { + cs->exception_index = EXCCODE_PIS; + } else if (access_type == MMU_INST_FETCH) { + cs->exception_index = EXCCODE_PIF; + } + break; + case TLBRET_DIRTY: + /* TLB match but 'D' bit is cleared */ + cs->exception_index = EXCCODE_PME; + break; + case TLBRET_XI: + /* Execute-Inhibit Exception */ + cs->exception_index = EXCCODE_PNX; + break; + case TLBRET_RI: + /* Read-Inhibit Exception */ + cs->exception_index = EXCCODE_PNR; + break; + case TLBRET_PE: + /* Privileged Exception */ + cs->exception_index = EXCCODE_PPI; + break; + } + + if (tlb_error == TLBRET_NOMATCH) { + env->CSR_TLBRBADV = address; + if (is_la64(env)) { + env->CSR_TLBREHI = FIELD_DP64(env->CSR_TLBREHI, CSR_TLBREHI_64, + VPPN, extract64(address, 13, 35)); + } else { + env->CSR_TLBREHI = FIELD_DP64(env->CSR_TLBREHI, CSR_TLBREHI_32, + VPPN, extract64(address, 13, 19)); + } + } else { + if (!FIELD_EX64(env->CSR_DBG, CSR_DBG, DST)) { + env->CSR_BADV = address; + } + env->CSR_TLBEHI = address & (TARGET_PAGE_MASK << 1); + } +} + +static void invalidate_tlb_entry(CPULoongArchState *env, int index) +{ + target_ulong addr, mask, pagesize; + uint8_t tlb_ps; + LoongArchTLB *tlb = &env->tlb[index]; + + int mmu_idx = cpu_mmu_index(env, false); + uint8_t tlb_v0 = FIELD_EX64(tlb->tlb_entry0, TLBENTRY, V); + uint8_t tlb_v1 = FIELD_EX64(tlb->tlb_entry1, TLBENTRY, V); + uint64_t tlb_vppn = FIELD_EX64(tlb->tlb_misc, TLB_MISC, VPPN); + + if (index >= LOONGARCH_STLB) { + tlb_ps = FIELD_EX64(tlb->tlb_misc, TLB_MISC, PS); + } else { + tlb_ps = FIELD_EX64(env->CSR_STLBPS, CSR_STLBPS, PS); + } + pagesize = MAKE_64BIT_MASK(tlb_ps, 1); + mask = MAKE_64BIT_MASK(0, tlb_ps + 1); + + if (tlb_v0) { + addr = (tlb_vppn << R_TLB_MISC_VPPN_SHIFT) & ~mask; /* even */ + tlb_flush_range_by_mmuidx(env_cpu(env), addr, pagesize, + mmu_idx, TARGET_LONG_BITS); + } + + if (tlb_v1) { + addr = (tlb_vppn << R_TLB_MISC_VPPN_SHIFT) & pagesize; /* odd */ + tlb_flush_range_by_mmuidx(env_cpu(env), addr, pagesize, + mmu_idx, TARGET_LONG_BITS); + } +} + +static void invalidate_tlb(CPULoongArchState *env, int index) +{ + LoongArchTLB *tlb; + uint16_t csr_asid, tlb_asid, tlb_g; + + csr_asid = FIELD_EX64(env->CSR_ASID, CSR_ASID, ASID); + tlb = &env->tlb[index]; + tlb_asid = FIELD_EX64(tlb->tlb_misc, TLB_MISC, ASID); + tlb_g = FIELD_EX64(tlb->tlb_entry0, TLBENTRY, G); + if (tlb_g == 0 && tlb_asid != csr_asid) { + return; + } + invalidate_tlb_entry(env, index); +} + +static void fill_tlb_entry(CPULoongArchState *env, int index) +{ + LoongArchTLB *tlb = &env->tlb[index]; + uint64_t lo0, lo1, csr_vppn; + uint16_t csr_asid; + uint8_t csr_ps; + + if (FIELD_EX64(env->CSR_TLBRERA, CSR_TLBRERA, ISTLBR)) { + csr_ps = FIELD_EX64(env->CSR_TLBREHI, CSR_TLBREHI, PS); + if (is_la64(env)) { + csr_vppn = FIELD_EX64(env->CSR_TLBREHI, CSR_TLBREHI_64, VPPN); + } else { + csr_vppn = FIELD_EX64(env->CSR_TLBREHI, CSR_TLBREHI_32, VPPN); + } + lo0 = env->CSR_TLBRELO0; + lo1 = env->CSR_TLBRELO1; + } else { + csr_ps = FIELD_EX64(env->CSR_TLBIDX, CSR_TLBIDX, PS); + if (is_la64(env)) { + csr_vppn = FIELD_EX64(env->CSR_TLBEHI, CSR_TLBEHI_64, VPPN); + } else { + csr_vppn = FIELD_EX64(env->CSR_TLBEHI, CSR_TLBEHI_32, VPPN); + } + lo0 = env->CSR_TLBELO0; + lo1 = env->CSR_TLBELO1; + } + + if (csr_ps == 0) { + qemu_log_mask(CPU_LOG_MMU, "page size is 0\n"); + } + + /* Only MTLB has the ps fields */ + if (index >= LOONGARCH_STLB) { + tlb->tlb_misc = FIELD_DP64(tlb->tlb_misc, TLB_MISC, PS, csr_ps); + } + + tlb->tlb_misc = FIELD_DP64(tlb->tlb_misc, TLB_MISC, VPPN, csr_vppn); + tlb->tlb_misc = FIELD_DP64(tlb->tlb_misc, TLB_MISC, E, 1); + csr_asid = FIELD_EX64(env->CSR_ASID, CSR_ASID, ASID); + tlb->tlb_misc = FIELD_DP64(tlb->tlb_misc, TLB_MISC, ASID, csr_asid); + + tlb->tlb_entry0 = lo0; + tlb->tlb_entry1 = lo1; +} + +/* Return an random value between low and high */ +static uint32_t get_random_tlb(uint32_t low, uint32_t high) +{ + uint32_t val; + + qemu_guest_getrandom_nofail(&val, sizeof(val)); + return val % (high - low + 1) + low; +} + +void helper_tlbsrch(CPULoongArchState *env) +{ + int index, match; + + if (FIELD_EX64(env->CSR_TLBRERA, CSR_TLBRERA, ISTLBR)) { + match = loongarch_tlb_search(env, env->CSR_TLBREHI, &index); + } else { + match = loongarch_tlb_search(env, env->CSR_TLBEHI, &index); + } + + if (match) { + env->CSR_TLBIDX = FIELD_DP64(env->CSR_TLBIDX, CSR_TLBIDX, INDEX, index); + env->CSR_TLBIDX = FIELD_DP64(env->CSR_TLBIDX, CSR_TLBIDX, NE, 0); + return; + } + + env->CSR_TLBIDX = FIELD_DP64(env->CSR_TLBIDX, CSR_TLBIDX, NE, 1); +} + +void helper_tlbrd(CPULoongArchState *env) +{ + LoongArchTLB *tlb; + int index; + uint8_t tlb_ps, tlb_e; + + index = FIELD_EX64(env->CSR_TLBIDX, CSR_TLBIDX, INDEX); + tlb = &env->tlb[index]; + + if (index >= LOONGARCH_STLB) { + tlb_ps = FIELD_EX64(tlb->tlb_misc, TLB_MISC, PS); + } else { + tlb_ps = FIELD_EX64(env->CSR_STLBPS, CSR_STLBPS, PS); + } + tlb_e = FIELD_EX64(tlb->tlb_misc, TLB_MISC, E); + + if (!tlb_e) { + /* Invalid TLB entry */ + env->CSR_TLBIDX = FIELD_DP64(env->CSR_TLBIDX, CSR_TLBIDX, NE, 1); + env->CSR_ASID = FIELD_DP64(env->CSR_ASID, CSR_ASID, ASID, 0); + env->CSR_TLBEHI = 0; + env->CSR_TLBELO0 = 0; + env->CSR_TLBELO1 = 0; + env->CSR_TLBIDX = FIELD_DP64(env->CSR_TLBIDX, CSR_TLBIDX, PS, 0); + } else { + /* Valid TLB entry */ + env->CSR_TLBIDX = FIELD_DP64(env->CSR_TLBIDX, CSR_TLBIDX, NE, 0); + env->CSR_TLBIDX = FIELD_DP64(env->CSR_TLBIDX, CSR_TLBIDX, + PS, (tlb_ps & 0x3f)); + env->CSR_TLBEHI = FIELD_EX64(tlb->tlb_misc, TLB_MISC, VPPN) << + R_TLB_MISC_VPPN_SHIFT; + env->CSR_TLBELO0 = tlb->tlb_entry0; + env->CSR_TLBELO1 = tlb->tlb_entry1; + } +} + +void helper_tlbwr(CPULoongArchState *env) +{ + int index = FIELD_EX64(env->CSR_TLBIDX, CSR_TLBIDX, INDEX); + + invalidate_tlb(env, index); + + if (FIELD_EX64(env->CSR_TLBIDX, CSR_TLBIDX, NE)) { + env->tlb[index].tlb_misc = FIELD_DP64(env->tlb[index].tlb_misc, + TLB_MISC, E, 0); + return; + } + + fill_tlb_entry(env, index); +} + +void helper_tlbfill(CPULoongArchState *env) +{ + uint64_t address, entryhi; + int index, set, stlb_idx; + uint16_t pagesize, stlb_ps; + + if (FIELD_EX64(env->CSR_TLBRERA, CSR_TLBRERA, ISTLBR)) { + entryhi = env->CSR_TLBREHI; + pagesize = FIELD_EX64(env->CSR_TLBREHI, CSR_TLBREHI, PS); + } else { + entryhi = env->CSR_TLBEHI; + pagesize = FIELD_EX64(env->CSR_TLBIDX, CSR_TLBIDX, PS); + } + + stlb_ps = FIELD_EX64(env->CSR_STLBPS, CSR_STLBPS, PS); + + if (pagesize == stlb_ps) { + /* Only write into STLB bits [47:13] */ + address = entryhi & ~MAKE_64BIT_MASK(0, R_CSR_TLBEHI_64_VPPN_SHIFT); + + /* Choose one set ramdomly */ + set = get_random_tlb(0, 7); + + /* Index in one set */ + stlb_idx = (address >> (stlb_ps + 1)) & 0xff; /* [0,255] */ + + index = set * 256 + stlb_idx; + } else { + /* Only write into MTLB */ + index = get_random_tlb(LOONGARCH_STLB, LOONGARCH_TLB_MAX - 1); + } + + invalidate_tlb(env, index); + fill_tlb_entry(env, index); +} + +void helper_tlbclr(CPULoongArchState *env) +{ + LoongArchTLB *tlb; + int i, index; + uint16_t csr_asid, tlb_asid, tlb_g; + + csr_asid = FIELD_EX64(env->CSR_ASID, CSR_ASID, ASID); + index = FIELD_EX64(env->CSR_TLBIDX, CSR_TLBIDX, INDEX); + + if (index < LOONGARCH_STLB) { + /* STLB. One line per operation */ + for (i = 0; i < 8; i++) { + tlb = &env->tlb[i * 256 + (index % 256)]; + tlb_asid = FIELD_EX64(tlb->tlb_misc, TLB_MISC, ASID); + tlb_g = FIELD_EX64(tlb->tlb_entry0, TLBENTRY, G); + if (!tlb_g && tlb_asid == csr_asid) { + tlb->tlb_misc = FIELD_DP64(tlb->tlb_misc, TLB_MISC, E, 0); + } + } + } else if (index < LOONGARCH_TLB_MAX) { + /* All MTLB entries */ + for (i = LOONGARCH_STLB; i < LOONGARCH_TLB_MAX; i++) { + tlb = &env->tlb[i]; + tlb_asid = FIELD_EX64(tlb->tlb_misc, TLB_MISC, ASID); + tlb_g = FIELD_EX64(tlb->tlb_entry0, TLBENTRY, G); + if (!tlb_g && tlb_asid == csr_asid) { + tlb->tlb_misc = FIELD_DP64(tlb->tlb_misc, TLB_MISC, E, 0); + } + } + } + + tlb_flush(env_cpu(env)); +} + +void helper_tlbflush(CPULoongArchState *env) +{ + int i, index; + + index = FIELD_EX64(env->CSR_TLBIDX, CSR_TLBIDX, INDEX); + + if (index < LOONGARCH_STLB) { + /* STLB. One line per operation */ + for (i = 0; i < 8; i++) { + int s_idx = i * 256 + (index % 256); + env->tlb[s_idx].tlb_misc = FIELD_DP64(env->tlb[s_idx].tlb_misc, + TLB_MISC, E, 0); + } + } else if (index < LOONGARCH_TLB_MAX) { + /* All MTLB entries */ + for (i = LOONGARCH_STLB; i < LOONGARCH_TLB_MAX; i++) { + env->tlb[i].tlb_misc = FIELD_DP64(env->tlb[i].tlb_misc, + TLB_MISC, E, 0); + } + } + + tlb_flush(env_cpu(env)); +} + +void helper_invtlb_all(CPULoongArchState *env) +{ + for (int i = 0; i < LOONGARCH_TLB_MAX; i++) { + env->tlb[i].tlb_misc = FIELD_DP64(env->tlb[i].tlb_misc, + TLB_MISC, E, 0); + } + tlb_flush(env_cpu(env)); +} + +void helper_invtlb_all_g(CPULoongArchState *env, uint32_t g) +{ + for (int i = 0; i < LOONGARCH_TLB_MAX; i++) { + LoongArchTLB *tlb = &env->tlb[i]; + uint8_t tlb_g = FIELD_EX64(tlb->tlb_entry0, TLBENTRY, G); + + if (tlb_g == g) { + tlb->tlb_misc = FIELD_DP64(tlb->tlb_misc, TLB_MISC, E, 0); + } + } + tlb_flush(env_cpu(env)); +} + +void helper_invtlb_all_asid(CPULoongArchState *env, target_ulong info) +{ + uint16_t asid = info & R_CSR_ASID_ASID_MASK; + + for (int i = 0; i < LOONGARCH_TLB_MAX; i++) { + LoongArchTLB *tlb = &env->tlb[i]; + uint8_t tlb_g = FIELD_EX64(tlb->tlb_entry0, TLBENTRY, G); + uint16_t tlb_asid = FIELD_EX64(tlb->tlb_misc, TLB_MISC, ASID); + + if (!tlb_g && (tlb_asid == asid)) { + tlb->tlb_misc = FIELD_DP64(tlb->tlb_misc, TLB_MISC, E, 0); + } + } + tlb_flush(env_cpu(env)); +} + +void helper_invtlb_page_asid(CPULoongArchState *env, target_ulong info, + target_ulong addr) +{ + uint16_t asid = info & 0x3ff; + + for (int i = 0; i < LOONGARCH_TLB_MAX; i++) { + LoongArchTLB *tlb = &env->tlb[i]; + uint8_t tlb_g = FIELD_EX64(tlb->tlb_entry0, TLBENTRY, G); + uint16_t tlb_asid = FIELD_EX64(tlb->tlb_misc, TLB_MISC, ASID); + uint64_t vpn, tlb_vppn; + uint8_t tlb_ps, compare_shift; + + if (i >= LOONGARCH_STLB) { + tlb_ps = FIELD_EX64(tlb->tlb_misc, TLB_MISC, PS); + } else { + tlb_ps = FIELD_EX64(env->CSR_STLBPS, CSR_STLBPS, PS); + } + tlb_vppn = FIELD_EX64(tlb->tlb_misc, TLB_MISC, VPPN); + vpn = (addr & TARGET_VIRT_MASK) >> (tlb_ps + 1); + compare_shift = tlb_ps + 1 - R_TLB_MISC_VPPN_SHIFT; + + if (!tlb_g && (tlb_asid == asid) && + (vpn == (tlb_vppn >> compare_shift))) { + tlb->tlb_misc = FIELD_DP64(tlb->tlb_misc, TLB_MISC, E, 0); + } + } + tlb_flush(env_cpu(env)); +} + +void helper_invtlb_page_asid_or_g(CPULoongArchState *env, + target_ulong info, target_ulong addr) +{ + uint16_t asid = info & 0x3ff; + + for (int i = 0; i < LOONGARCH_TLB_MAX; i++) { + LoongArchTLB *tlb = &env->tlb[i]; + uint8_t tlb_g = FIELD_EX64(tlb->tlb_entry0, TLBENTRY, G); + uint16_t tlb_asid = FIELD_EX64(tlb->tlb_misc, TLB_MISC, ASID); + uint64_t vpn, tlb_vppn; + uint8_t tlb_ps, compare_shift; + + if (i >= LOONGARCH_STLB) { + tlb_ps = FIELD_EX64(tlb->tlb_misc, TLB_MISC, PS); + } else { + tlb_ps = FIELD_EX64(env->CSR_STLBPS, CSR_STLBPS, PS); + } + tlb_vppn = FIELD_EX64(tlb->tlb_misc, TLB_MISC, VPPN); + vpn = (addr & TARGET_VIRT_MASK) >> (tlb_ps + 1); + compare_shift = tlb_ps + 1 - R_TLB_MISC_VPPN_SHIFT; + + if ((tlb_g || (tlb_asid == asid)) && + (vpn == (tlb_vppn >> compare_shift))) { + tlb->tlb_misc = FIELD_DP64(tlb->tlb_misc, TLB_MISC, E, 0); + } + } + tlb_flush(env_cpu(env)); +} + +bool loongarch_cpu_tlb_fill(CPUState *cs, vaddr address, int size, + MMUAccessType access_type, int mmu_idx, + bool probe, uintptr_t retaddr) +{ + LoongArchCPU *cpu = LOONGARCH_CPU(cs); + CPULoongArchState *env = &cpu->env; + hwaddr physical; + int prot; + int ret; + + /* Data access */ + ret = get_physical_address(env, &physical, &prot, address, + access_type, mmu_idx); + + if (ret == TLBRET_MATCH) { + tlb_set_page(cs, address & TARGET_PAGE_MASK, + physical & TARGET_PAGE_MASK, prot, + mmu_idx, TARGET_PAGE_SIZE); + qemu_log_mask(CPU_LOG_MMU, + "%s address=%" VADDR_PRIx " physical " HWADDR_FMT_plx + " prot %d\n", __func__, address, physical, prot); + return true; + } else { + qemu_log_mask(CPU_LOG_MMU, + "%s address=%" VADDR_PRIx " ret %d\n", __func__, address, + ret); + } + if (probe) { + return false; + } + raise_mmu_exception(env, address, access_type, ret); + cpu_loop_exit_restore(cs, retaddr); +} + +target_ulong helper_lddir(CPULoongArchState *env, target_ulong base, + target_ulong level, uint32_t mem_idx) +{ + CPUState *cs = env_cpu(env); + target_ulong badvaddr, index, phys, ret; + int shift; + uint64_t dir_base, dir_width; + bool huge = (base >> LOONGARCH_PAGE_HUGE_SHIFT) & 0x1; + + badvaddr = env->CSR_TLBRBADV; + base = base & TARGET_PHYS_MASK; + + /* 0:64bit, 1:128bit, 2:192bit, 3:256bit */ + shift = FIELD_EX64(env->CSR_PWCL, CSR_PWCL, PTEWIDTH); + shift = (shift + 1) * 3; + + if (huge) { + return base; + } + switch (level) { + case 1: + dir_base = FIELD_EX64(env->CSR_PWCL, CSR_PWCL, DIR1_BASE); + dir_width = FIELD_EX64(env->CSR_PWCL, CSR_PWCL, DIR1_WIDTH); + break; + case 2: + dir_base = FIELD_EX64(env->CSR_PWCL, CSR_PWCL, DIR2_BASE); + dir_width = FIELD_EX64(env->CSR_PWCL, CSR_PWCL, DIR2_WIDTH); + break; + case 3: + dir_base = FIELD_EX64(env->CSR_PWCH, CSR_PWCH, DIR3_BASE); + dir_width = FIELD_EX64(env->CSR_PWCH, CSR_PWCH, DIR3_WIDTH); + break; + case 4: + dir_base = FIELD_EX64(env->CSR_PWCH, CSR_PWCH, DIR4_BASE); + dir_width = FIELD_EX64(env->CSR_PWCH, CSR_PWCH, DIR4_WIDTH); + break; + default: + do_raise_exception(env, EXCCODE_INE, GETPC()); + return 0; + } + index = (badvaddr >> dir_base) & ((1 << dir_width) - 1); + phys = base | index << shift; + ret = ldq_phys(cs->as, phys) & TARGET_PHYS_MASK; + return ret; +} + +void helper_ldpte(CPULoongArchState *env, target_ulong base, target_ulong odd, + uint32_t mem_idx) +{ + CPUState *cs = env_cpu(env); + target_ulong phys, tmp0, ptindex, ptoffset0, ptoffset1, ps, badv; + int shift; + bool huge = (base >> LOONGARCH_PAGE_HUGE_SHIFT) & 0x1; + uint64_t ptbase = FIELD_EX64(env->CSR_PWCL, CSR_PWCL, PTBASE); + uint64_t ptwidth = FIELD_EX64(env->CSR_PWCL, CSR_PWCL, PTWIDTH); + + base = base & TARGET_PHYS_MASK; + + if (huge) { + /* Huge Page. base is paddr */ + tmp0 = base ^ (1 << LOONGARCH_PAGE_HUGE_SHIFT); + /* Move Global bit */ + tmp0 = ((tmp0 & (1 << LOONGARCH_HGLOBAL_SHIFT)) >> + LOONGARCH_HGLOBAL_SHIFT) << R_TLBENTRY_G_SHIFT | + (tmp0 & (~(1 << LOONGARCH_HGLOBAL_SHIFT))); + ps = ptbase + ptwidth - 1; + if (odd) { + tmp0 += MAKE_64BIT_MASK(ps, 1); + } + } else { + /* 0:64bit, 1:128bit, 2:192bit, 3:256bit */ + shift = FIELD_EX64(env->CSR_PWCL, CSR_PWCL, PTEWIDTH); + shift = (shift + 1) * 3; + badv = env->CSR_TLBRBADV; + + ptindex = (badv >> ptbase) & ((1 << ptwidth) - 1); + ptindex = ptindex & ~0x1; /* clear bit 0 */ + ptoffset0 = ptindex << shift; + ptoffset1 = (ptindex + 1) << shift; + + phys = base | (odd ? ptoffset1 : ptoffset0); + tmp0 = ldq_phys(cs->as, phys) & TARGET_PHYS_MASK; + ps = ptbase; + } + + if (odd) { + env->CSR_TLBRELO1 = tmp0; + } else { + env->CSR_TLBRELO0 = tmp0; + } + env->CSR_TLBREHI = FIELD_DP64(env->CSR_TLBREHI, CSR_TLBREHI, PS, ps); +} diff --git a/target/loongarch/tcg/translate.c b/target/loongarch/tcg/translate.c new file mode 100644 index 0000000000..21f4db6fbd --- /dev/null +++ b/target/loongarch/tcg/translate.c @@ -0,0 +1,370 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +/* + * LoongArch emulation for QEMU - main translation routines. + * + * Copyright (c) 2021 Loongson Technology Corporation Limited + */ + +#include "qemu/osdep.h" +#include "cpu.h" +#include "tcg/tcg-op.h" +#include "tcg/tcg-op-gvec.h" +#include "exec/translation-block.h" +#include "exec/translator.h" +#include "exec/helper-proto.h" +#include "exec/helper-gen.h" +#include "exec/log.h" +#include "qemu/qemu-print.h" +#include "fpu/softfloat.h" +#include "translate.h" +#include "internals.h" +#include "vec.h" + +/* Global register indices */ +TCGv cpu_gpr[32], cpu_pc; +static TCGv cpu_lladdr, cpu_llval; + +#define HELPER_H "helper.h" +#include "exec/helper-info.c.inc" +#undef HELPER_H + +#define DISAS_STOP DISAS_TARGET_0 +#define DISAS_EXIT DISAS_TARGET_1 +#define DISAS_EXIT_UPDATE DISAS_TARGET_2 + +static inline int vec_full_offset(int regno) +{ + return offsetof(CPULoongArchState, fpr[regno]); +} + +static inline int vec_reg_offset(int regno, int index, MemOp mop) +{ + const uint8_t size = 1 << mop; + int offs = index * size; + + if (HOST_BIG_ENDIAN && size < 8 ) { + offs ^= (8 - size); + } + + return offs + vec_full_offset(regno); +} + +static inline void get_vreg64(TCGv_i64 dest, int regno, int index) +{ + tcg_gen_ld_i64(dest, tcg_env, + offsetof(CPULoongArchState, fpr[regno].vreg.D(index))); +} + +static inline void set_vreg64(TCGv_i64 src, int regno, int index) +{ + tcg_gen_st_i64(src, tcg_env, + offsetof(CPULoongArchState, fpr[regno].vreg.D(index))); +} + +static inline int plus_1(DisasContext *ctx, int x) +{ + return x + 1; +} + +static inline int shl_1(DisasContext *ctx, int x) +{ + return x << 1; +} + +static inline int shl_2(DisasContext *ctx, int x) +{ + return x << 2; +} + +static inline int shl_3(DisasContext *ctx, int x) +{ + return x << 3; +} + +/* + * LoongArch the upper 32 bits are undefined ("can be any value"). + * QEMU chooses to nanbox, because it is most likely to show guest bugs early. + */ +static void gen_nanbox_s(TCGv_i64 out, TCGv_i64 in) +{ + tcg_gen_ori_i64(out, in, MAKE_64BIT_MASK(32, 32)); +} + +void generate_exception(DisasContext *ctx, int excp) +{ + tcg_gen_movi_tl(cpu_pc, ctx->base.pc_next); + gen_helper_raise_exception(tcg_env, tcg_constant_i32(excp)); + ctx->base.is_jmp = DISAS_NORETURN; +} + +static inline void gen_goto_tb(DisasContext *ctx, int n, target_ulong dest) +{ + if (ctx->va32) { + dest = (uint32_t) dest; + } + + if (translator_use_goto_tb(&ctx->base, dest)) { + tcg_gen_goto_tb(n); + tcg_gen_movi_tl(cpu_pc, dest); + tcg_gen_exit_tb(ctx->base.tb, n); + } else { + tcg_gen_movi_tl(cpu_pc, dest); + tcg_gen_lookup_and_goto_ptr(); + } +} + +static void loongarch_tr_init_disas_context(DisasContextBase *dcbase, + CPUState *cs) +{ + int64_t bound; + CPULoongArchState *env = cpu_env(cs); + DisasContext *ctx = container_of(dcbase, DisasContext, base); + + ctx->page_start = ctx->base.pc_first & TARGET_PAGE_MASK; + ctx->plv = ctx->base.tb->flags & HW_FLAGS_PLV_MASK; + if (ctx->base.tb->flags & HW_FLAGS_CRMD_PG) { + ctx->mem_idx = ctx->plv; + } else { + ctx->mem_idx = MMU_IDX_DA; + } + + /* Bound the number of insns to execute to those left on the page. */ + bound = -(ctx->base.pc_first | TARGET_PAGE_MASK) / 4; + ctx->base.max_insns = MIN(ctx->base.max_insns, bound); + + if (FIELD_EX64(env->cpucfg[2], CPUCFG2, LSX)) { + ctx->vl = LSX_LEN; + } + + if (FIELD_EX64(env->cpucfg[2], CPUCFG2, LASX)) { + ctx->vl = LASX_LEN; + } + + ctx->la64 = is_la64(env); + ctx->va32 = (ctx->base.tb->flags & HW_FLAGS_VA32) != 0; + + ctx->zero = tcg_constant_tl(0); + + ctx->cpucfg1 = env->cpucfg[1]; + ctx->cpucfg2 = env->cpucfg[2]; +} + +static void loongarch_tr_tb_start(DisasContextBase *dcbase, CPUState *cs) +{ +} + +static void loongarch_tr_insn_start(DisasContextBase *dcbase, CPUState *cs) +{ + DisasContext *ctx = container_of(dcbase, DisasContext, base); + + tcg_gen_insn_start(ctx->base.pc_next); +} + +/* + * Wrappers for getting reg values. + * + * The $zero register does not have cpu_gpr[0] allocated -- we supply the + * constant zero as a source, and an uninitialized sink as destination. + * + * Further, we may provide an extension for word operations. + */ +static TCGv gpr_src(DisasContext *ctx, int reg_num, DisasExtend src_ext) +{ + TCGv t; + + if (reg_num == 0) { + return ctx->zero; + } + + switch (src_ext) { + case EXT_NONE: + return cpu_gpr[reg_num]; + case EXT_SIGN: + t = tcg_temp_new(); + tcg_gen_ext32s_tl(t, cpu_gpr[reg_num]); + return t; + case EXT_ZERO: + t = tcg_temp_new(); + tcg_gen_ext32u_tl(t, cpu_gpr[reg_num]); + return t; + } + g_assert_not_reached(); +} + +static TCGv gpr_dst(DisasContext *ctx, int reg_num, DisasExtend dst_ext) +{ + if (reg_num == 0 || dst_ext) { + return tcg_temp_new(); + } + return cpu_gpr[reg_num]; +} + +static void gen_set_gpr(int reg_num, TCGv t, DisasExtend dst_ext) +{ + if (reg_num != 0) { + switch (dst_ext) { + case EXT_NONE: + tcg_gen_mov_tl(cpu_gpr[reg_num], t); + break; + case EXT_SIGN: + tcg_gen_ext32s_tl(cpu_gpr[reg_num], t); + break; + case EXT_ZERO: + tcg_gen_ext32u_tl(cpu_gpr[reg_num], t); + break; + default: + g_assert_not_reached(); + } + } +} + +static TCGv get_fpr(DisasContext *ctx, int reg_num) +{ + TCGv t = tcg_temp_new(); + tcg_gen_ld_i64(t, tcg_env, + offsetof(CPULoongArchState, fpr[reg_num].vreg.D(0))); + return t; +} + +static void set_fpr(int reg_num, TCGv val) +{ + tcg_gen_st_i64(val, tcg_env, + offsetof(CPULoongArchState, fpr[reg_num].vreg.D(0))); +} + +static TCGv make_address_x(DisasContext *ctx, TCGv base, TCGv addend) +{ + TCGv temp = NULL; + + if (addend || ctx->va32) { + temp = tcg_temp_new(); + } + if (addend) { + tcg_gen_add_tl(temp, base, addend); + base = temp; + } + if (ctx->va32) { + tcg_gen_ext32u_tl(temp, base); + base = temp; + } + return base; +} + +static TCGv make_address_i(DisasContext *ctx, TCGv base, target_long ofs) +{ + TCGv addend = ofs ? tcg_constant_tl(ofs) : NULL; + return make_address_x(ctx, base, addend); +} + +static uint64_t make_address_pc(DisasContext *ctx, uint64_t addr) +{ + if (ctx->va32) { + addr = (int32_t)addr; + } + return addr; +} + +#include "decode-insns.c.inc" +#include "insn_trans/trans_arith.c.inc" +#include "insn_trans/trans_shift.c.inc" +#include "insn_trans/trans_bit.c.inc" +#include "insn_trans/trans_memory.c.inc" +#include "insn_trans/trans_atomic.c.inc" +#include "insn_trans/trans_extra.c.inc" +#include "insn_trans/trans_farith.c.inc" +#include "insn_trans/trans_fcmp.c.inc" +#include "insn_trans/trans_fcnv.c.inc" +#include "insn_trans/trans_fmov.c.inc" +#include "insn_trans/trans_fmemory.c.inc" +#include "insn_trans/trans_branch.c.inc" +#include "insn_trans/trans_privileged.c.inc" +#include "insn_trans/trans_vec.c.inc" + +static void loongarch_tr_translate_insn(DisasContextBase *dcbase, CPUState *cs) +{ + CPULoongArchState *env = cpu_env(cs); + DisasContext *ctx = container_of(dcbase, DisasContext, base); + + ctx->opcode = translator_ldl(env, &ctx->base, ctx->base.pc_next); + + if (!decode(ctx, ctx->opcode)) { + qemu_log_mask(LOG_UNIMP, "Error: unknown opcode. " + TARGET_FMT_lx ": 0x%x\n", + ctx->base.pc_next, ctx->opcode); + generate_exception(ctx, EXCCODE_INE); + } + + ctx->base.pc_next += 4; + + if (ctx->va32) { + ctx->base.pc_next = (uint32_t)ctx->base.pc_next; + } +} + +static void loongarch_tr_tb_stop(DisasContextBase *dcbase, CPUState *cs) +{ + DisasContext *ctx = container_of(dcbase, DisasContext, base); + + switch (ctx->base.is_jmp) { + case DISAS_STOP: + tcg_gen_movi_tl(cpu_pc, ctx->base.pc_next); + tcg_gen_lookup_and_goto_ptr(); + break; + case DISAS_TOO_MANY: + gen_goto_tb(ctx, 0, ctx->base.pc_next); + break; + case DISAS_NORETURN: + break; + case DISAS_EXIT_UPDATE: + tcg_gen_movi_tl(cpu_pc, ctx->base.pc_next); + QEMU_FALLTHROUGH; + case DISAS_EXIT: + tcg_gen_exit_tb(NULL, 0); + break; + default: + g_assert_not_reached(); + } +} + +static void loongarch_tr_disas_log(const DisasContextBase *dcbase, + CPUState *cpu, FILE *logfile) +{ + qemu_log("IN: %s\n", lookup_symbol(dcbase->pc_first)); + target_disas(logfile, cpu, dcbase->pc_first, dcbase->tb->size); +} + +static const TranslatorOps loongarch_tr_ops = { + .init_disas_context = loongarch_tr_init_disas_context, + .tb_start = loongarch_tr_tb_start, + .insn_start = loongarch_tr_insn_start, + .translate_insn = loongarch_tr_translate_insn, + .tb_stop = loongarch_tr_tb_stop, + .disas_log = loongarch_tr_disas_log, +}; + +void gen_intermediate_code(CPUState *cs, TranslationBlock *tb, int *max_insns, + target_ulong pc, void *host_pc) +{ + DisasContext ctx; + + translator_loop(cs, tb, max_insns, pc, host_pc, + &loongarch_tr_ops, &ctx.base); +} + +void loongarch_translate_init(void) +{ + int i; + + cpu_gpr[0] = NULL; + for (i = 1; i < 32; i++) { + cpu_gpr[i] = tcg_global_mem_new(tcg_env, + offsetof(CPULoongArchState, gpr[i]), + regnames[i]); + } + + cpu_pc = tcg_global_mem_new(tcg_env, offsetof(CPULoongArchState, pc), "pc"); + cpu_lladdr = tcg_global_mem_new(tcg_env, + offsetof(CPULoongArchState, lladdr), "lladdr"); + cpu_llval = tcg_global_mem_new(tcg_env, + offsetof(CPULoongArchState, llval), "llval"); +} diff --git a/target/loongarch/tcg/vec_helper.c b/target/loongarch/tcg/vec_helper.c new file mode 100644 index 0000000000..3faf52cbc4 --- /dev/null +++ b/target/loongarch/tcg/vec_helper.c @@ -0,0 +1,3494 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +/* + * QEMU LoongArch vector helper functions. + * + * Copyright (c) 2022-2023 Loongson Technology Corporation Limited + */ + +#include "qemu/osdep.h" +#include "cpu.h" +#include "exec/exec-all.h" +#include "exec/helper-proto.h" +#include "fpu/softfloat.h" +#include "internals.h" +#include "tcg/tcg.h" +#include "vec.h" +#include "tcg/tcg-gvec-desc.h" + +#define DO_ODD_EVEN(NAME, BIT, E1, E2, DO_OP) \ +void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t desc) \ +{ \ + int i; \ + VReg *Vd = (VReg *)vd; \ + VReg *Vj = (VReg *)vj; \ + VReg *Vk = (VReg *)vk; \ + typedef __typeof(Vd->E1(0)) TD; \ + int oprsz = simd_oprsz(desc); \ + \ + for (i = 0; i < oprsz / (BIT / 8); i++) { \ + Vd->E1(i) = DO_OP((TD)Vj->E2(2 * i + 1), (TD)Vk->E2(2 * i)); \ + } \ +} + +DO_ODD_EVEN(vhaddw_h_b, 16, H, B, DO_ADD) +DO_ODD_EVEN(vhaddw_w_h, 32, W, H, DO_ADD) +DO_ODD_EVEN(vhaddw_d_w, 64, D, W, DO_ADD) + +void HELPER(vhaddw_q_d)(void *vd, void *vj, void *vk, uint32_t desc) +{ + int i; + VReg *Vd = (VReg *)vd; + VReg *Vj = (VReg *)vj; + VReg *Vk = (VReg *)vk; + int oprsz = simd_oprsz(desc); + + for (i = 0; i < oprsz / 16 ; i++) { + Vd->Q(i) = int128_add(int128_makes64(Vj->D(2 * i + 1)), + int128_makes64(Vk->D(2 * i))); + } +} + +DO_ODD_EVEN(vhsubw_h_b, 16, H, B, DO_SUB) +DO_ODD_EVEN(vhsubw_w_h, 32, W, H, DO_SUB) +DO_ODD_EVEN(vhsubw_d_w, 64, D, W, DO_SUB) + +void HELPER(vhsubw_q_d)(void *vd, void *vj, void *vk, uint32_t desc) +{ + int i; + VReg *Vd = (VReg *)vd; + VReg *Vj = (VReg *)vj; + VReg *Vk = (VReg *)vk; + int oprsz = simd_oprsz(desc); + + for (i = 0; i < oprsz / 16; i++) { + Vd->Q(i) = int128_sub(int128_makes64(Vj->D(2 * i + 1)), + int128_makes64(Vk->D(2 * i))); + } +} + +DO_ODD_EVEN(vhaddw_hu_bu, 16, UH, UB, DO_ADD) +DO_ODD_EVEN(vhaddw_wu_hu, 32, UW, UH, DO_ADD) +DO_ODD_EVEN(vhaddw_du_wu, 64, UD, UW, DO_ADD) + +void HELPER(vhaddw_qu_du)(void *vd, void *vj, void *vk, uint32_t desc) +{ + int i; + VReg *Vd = (VReg *)vd; + VReg *Vj = (VReg *)vj; + VReg *Vk = (VReg *)vk; + int oprsz = simd_oprsz(desc); + + for (i = 0; i < oprsz / 16; i ++) { + Vd->Q(i) = int128_add(int128_make64(Vj->UD(2 * i + 1)), + int128_make64(Vk->UD(2 * i))); + } +} + +DO_ODD_EVEN(vhsubw_hu_bu, 16, UH, UB, DO_SUB) +DO_ODD_EVEN(vhsubw_wu_hu, 32, UW, UH, DO_SUB) +DO_ODD_EVEN(vhsubw_du_wu, 64, UD, UW, DO_SUB) + +void HELPER(vhsubw_qu_du)(void *vd, void *vj, void *vk, uint32_t desc) +{ + int i; + VReg *Vd = (VReg *)vd; + VReg *Vj = (VReg *)vj; + VReg *Vk = (VReg *)vk; + int oprsz = simd_oprsz(desc); + + for (i = 0; i < oprsz / 16; i++) { + Vd->Q(i) = int128_sub(int128_make64(Vj->UD(2 * i + 1)), + int128_make64(Vk->UD(2 * i))); + } +} + +#define DO_EVEN(NAME, BIT, E1, E2, DO_OP) \ +void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t desc) \ +{ \ + int i; \ + VReg *Vd = (VReg *)vd; \ + VReg *Vj = (VReg *)vj; \ + VReg *Vk = (VReg *)vk; \ + typedef __typeof(Vd->E1(0)) TD; \ + int oprsz = simd_oprsz(desc); \ + \ + for (i = 0; i < oprsz / (BIT / 8); i++) { \ + Vd->E1(i) = DO_OP((TD)Vj->E2(2 * i) ,(TD)Vk->E2(2 * i)); \ + } \ +} + +#define DO_ODD(NAME, BIT, E1, E2, DO_OP) \ +void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t desc) \ +{ \ + int i; \ + VReg *Vd = (VReg *)vd; \ + VReg *Vj = (VReg *)vj; \ + VReg *Vk = (VReg *)vk; \ + typedef __typeof(Vd->E1(0)) TD; \ + int oprsz = simd_oprsz(desc); \ + \ + for (i = 0; i < oprsz / (BIT / 8); i++) { \ + Vd->E1(i) = DO_OP((TD)Vj->E2(2 * i + 1), (TD)Vk->E2(2 * i + 1)); \ + } \ +} + +void HELPER(vaddwev_q_d)(void *vd, void *vj, void *vk, uint32_t desc) +{ + int i; + VReg *Vd = (VReg *)vd; + VReg *Vj = (VReg *)vj; + VReg *Vk = (VReg *)vk; + int oprsz = simd_oprsz(desc); + + for (i = 0; i < oprsz / 16; i++) { + Vd->Q(i) = int128_add(int128_makes64(Vj->D(2 * i)), + int128_makes64(Vk->D(2 * i))); + } +} + +DO_EVEN(vaddwev_h_b, 16, H, B, DO_ADD) +DO_EVEN(vaddwev_w_h, 32, W, H, DO_ADD) +DO_EVEN(vaddwev_d_w, 64, D, W, DO_ADD) + +void HELPER(vaddwod_q_d)(void *vd, void *vj, void *vk, uint32_t desc) +{ + int i; + VReg *Vd = (VReg *)vd; + VReg *Vj = (VReg *)vj; + VReg *Vk = (VReg *)vk; + int oprsz = simd_oprsz(desc); + + for (i = 0; i < oprsz / 16; i++) { + Vd->Q(i) = int128_add(int128_makes64(Vj->D(2 * i +1)), + int128_makes64(Vk->D(2 * i +1))); + } +} + +DO_ODD(vaddwod_h_b, 16, H, B, DO_ADD) +DO_ODD(vaddwod_w_h, 32, W, H, DO_ADD) +DO_ODD(vaddwod_d_w, 64, D, W, DO_ADD) + +void HELPER(vsubwev_q_d)(void *vd, void *vj, void *vk, uint32_t desc) +{ + int i; + VReg *Vd = (VReg *)vd; + VReg *Vj = (VReg *)vj; + VReg *Vk = (VReg *)vk; + int oprsz = simd_oprsz(desc); + + for (i = 0; i < oprsz / 16; i++) { + Vd->Q(i) = int128_sub(int128_makes64(Vj->D(2 * i)), + int128_makes64(Vk->D(2 * i))); + } +} + +DO_EVEN(vsubwev_h_b, 16, H, B, DO_SUB) +DO_EVEN(vsubwev_w_h, 32, W, H, DO_SUB) +DO_EVEN(vsubwev_d_w, 64, D, W, DO_SUB) + +void HELPER(vsubwod_q_d)(void *vd, void *vj, void *vk, uint32_t desc) +{ + int i; + VReg *Vd = (VReg *)vd; + VReg *Vj = (VReg *)vj; + VReg *Vk = (VReg *)vk; + int oprsz = simd_oprsz(desc); + + for (i = 0; i < oprsz / 16; i++) { + Vd->Q(i) = int128_sub(int128_makes64(Vj->D(2 * i + 1)), + int128_makes64(Vk->D(2 * i + 1))); + } +} + +DO_ODD(vsubwod_h_b, 16, H, B, DO_SUB) +DO_ODD(vsubwod_w_h, 32, W, H, DO_SUB) +DO_ODD(vsubwod_d_w, 64, D, W, DO_SUB) + +void HELPER(vaddwev_q_du)(void *vd, void *vj, void *vk, uint32_t desc) +{ + int i; + VReg *Vd = (VReg *)vd; + VReg *Vj = (VReg *)vj; + VReg *Vk = (VReg *)vk; + int oprsz = simd_oprsz(desc); + + for (i = 0; i < oprsz / 16; i++) { + Vd->Q(i) = int128_add(int128_make64(Vj->UD(2 * i)), + int128_make64(Vk->UD(2 * i))); + } +} + +DO_EVEN(vaddwev_h_bu, 16, UH, UB, DO_ADD) +DO_EVEN(vaddwev_w_hu, 32, UW, UH, DO_ADD) +DO_EVEN(vaddwev_d_wu, 64, UD, UW, DO_ADD) + +void HELPER(vaddwod_q_du)(void *vd, void *vj, void *vk, uint32_t desc) +{ + int i; + VReg *Vd = (VReg *)vd; + VReg *Vj = (VReg *)vj; + VReg *Vk = (VReg *)vk; + int oprsz = simd_oprsz(desc); + + for (i = 0; i < oprsz / 16; i++) { + Vd->Q(i) = int128_add(int128_make64(Vj->UD(2 * i + 1)), + int128_make64(Vk->UD(2 * i + 1))); + } +} + +DO_ODD(vaddwod_h_bu, 16, UH, UB, DO_ADD) +DO_ODD(vaddwod_w_hu, 32, UW, UH, DO_ADD) +DO_ODD(vaddwod_d_wu, 64, UD, UW, DO_ADD) + +void HELPER(vsubwev_q_du)(void *vd, void *vj, void *vk, uint32_t desc) +{ + int i; + VReg *Vd = (VReg *)vd; + VReg *Vj = (VReg *)vj; + VReg *Vk = (VReg *)vk; + int oprsz = simd_oprsz(desc); + + for (i = 0; i < oprsz / 16; i++) { + Vd->Q(i) = int128_sub(int128_make64(Vj->UD(2 * i)), + int128_make64(Vk->UD(2 * i))); + } +} + +DO_EVEN(vsubwev_h_bu, 16, UH, UB, DO_SUB) +DO_EVEN(vsubwev_w_hu, 32, UW, UH, DO_SUB) +DO_EVEN(vsubwev_d_wu, 64, UD, UW, DO_SUB) + +void HELPER(vsubwod_q_du)(void *vd, void *vj, void *vk, uint32_t desc) +{ + int i; + VReg *Vd = (VReg *)vd; + VReg *Vj = (VReg *)vj; + VReg *Vk = (VReg *)vk; + int oprsz = simd_oprsz(desc); + + for (i = 0; i < oprsz / 16; i++) { + Vd->Q(i) = int128_sub(int128_make64(Vj->UD(2 * i + 1)), + int128_make64(Vk->UD(2 * i + 1))); + } +} + +DO_ODD(vsubwod_h_bu, 16, UH, UB, DO_SUB) +DO_ODD(vsubwod_w_hu, 32, UW, UH, DO_SUB) +DO_ODD(vsubwod_d_wu, 64, UD, UW, DO_SUB) + +#define DO_EVEN_U_S(NAME, BIT, ES1, EU1, ES2, EU2, DO_OP) \ +void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t desc) \ +{ \ + int i; \ + VReg *Vd = (VReg *)vd; \ + VReg *Vj = (VReg *)vj; \ + VReg *Vk = (VReg *)vk; \ + typedef __typeof(Vd->ES1(0)) TDS; \ + typedef __typeof(Vd->EU1(0)) TDU; \ + int oprsz = simd_oprsz(desc); \ + \ + for (i = 0; i < oprsz / (BIT / 8); i++) { \ + Vd->ES1(i) = DO_OP((TDU)Vj->EU2(2 * i) ,(TDS)Vk->ES2(2 * i)); \ + } \ +} + +#define DO_ODD_U_S(NAME, BIT, ES1, EU1, ES2, EU2, DO_OP) \ +void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t desc) \ +{ \ + int i; \ + VReg *Vd = (VReg *)vd; \ + VReg *Vj = (VReg *)vj; \ + VReg *Vk = (VReg *)vk; \ + typedef __typeof(Vd->ES1(0)) TDS; \ + typedef __typeof(Vd->EU1(0)) TDU; \ + int oprsz = simd_oprsz(desc); \ + \ + for (i = 0; i < oprsz / (BIT / 8); i++) { \ + Vd->ES1(i) = DO_OP((TDU)Vj->EU2(2 * i + 1), (TDS)Vk->ES2(2 * i + 1)); \ + } \ +} + +void HELPER(vaddwev_q_du_d)(void *vd, void *vj, void *vk, uint32_t desc) +{ + int i; + VReg *Vd = (VReg *)vd; + VReg *Vj = (VReg *)vj; + VReg *Vk = (VReg *)vk; + int oprsz = simd_oprsz(desc); + + for (i = 0; i < oprsz / 16; i++) { + Vd->Q(i) = int128_add(int128_make64(Vj->UD(2 * i)), + int128_makes64(Vk->D(2 * i))); + } +} + +DO_EVEN_U_S(vaddwev_h_bu_b, 16, H, UH, B, UB, DO_ADD) +DO_EVEN_U_S(vaddwev_w_hu_h, 32, W, UW, H, UH, DO_ADD) +DO_EVEN_U_S(vaddwev_d_wu_w, 64, D, UD, W, UW, DO_ADD) + +void HELPER(vaddwod_q_du_d)(void *vd, void *vj, void *vk, uint32_t desc) +{ + int i; + VReg *Vd = (VReg *)vd; + VReg *Vj = (VReg *)vj; + VReg *Vk = (VReg *)vk; + int oprsz = simd_oprsz(desc); + + for (i = 0; i < oprsz / 16; i++) { + Vd->Q(i) = int128_add(int128_make64(Vj->UD(2 * i + 1)), + int128_makes64(Vk->D(2 * i + 1))); + } +} + +DO_ODD_U_S(vaddwod_h_bu_b, 16, H, UH, B, UB, DO_ADD) +DO_ODD_U_S(vaddwod_w_hu_h, 32, W, UW, H, UH, DO_ADD) +DO_ODD_U_S(vaddwod_d_wu_w, 64, D, UD, W, UW, DO_ADD) + +#define DO_3OP(NAME, BIT, E, DO_OP) \ +void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t desc) \ +{ \ + int i; \ + VReg *Vd = (VReg *)vd; \ + VReg *Vj = (VReg *)vj; \ + VReg *Vk = (VReg *)vk; \ + int oprsz = simd_oprsz(desc); \ + \ + for (i = 0; i < oprsz / (BIT / 8); i++) { \ + Vd->E(i) = DO_OP(Vj->E(i), Vk->E(i)); \ + } \ +} + +DO_3OP(vavg_b, 8, B, DO_VAVG) +DO_3OP(vavg_h, 16, H, DO_VAVG) +DO_3OP(vavg_w, 32, W, DO_VAVG) +DO_3OP(vavg_d, 64, D, DO_VAVG) +DO_3OP(vavgr_b, 8, B, DO_VAVGR) +DO_3OP(vavgr_h, 16, H, DO_VAVGR) +DO_3OP(vavgr_w, 32, W, DO_VAVGR) +DO_3OP(vavgr_d, 64, D, DO_VAVGR) +DO_3OP(vavg_bu, 8, UB, DO_VAVG) +DO_3OP(vavg_hu, 16, UH, DO_VAVG) +DO_3OP(vavg_wu, 32, UW, DO_VAVG) +DO_3OP(vavg_du, 64, UD, DO_VAVG) +DO_3OP(vavgr_bu, 8, UB, DO_VAVGR) +DO_3OP(vavgr_hu, 16, UH, DO_VAVGR) +DO_3OP(vavgr_wu, 32, UW, DO_VAVGR) +DO_3OP(vavgr_du, 64, UD, DO_VAVGR) + +DO_3OP(vabsd_b, 8, B, DO_VABSD) +DO_3OP(vabsd_h, 16, H, DO_VABSD) +DO_3OP(vabsd_w, 32, W, DO_VABSD) +DO_3OP(vabsd_d, 64, D, DO_VABSD) +DO_3OP(vabsd_bu, 8, UB, DO_VABSD) +DO_3OP(vabsd_hu, 16, UH, DO_VABSD) +DO_3OP(vabsd_wu, 32, UW, DO_VABSD) +DO_3OP(vabsd_du, 64, UD, DO_VABSD) + +#define DO_VADDA(NAME, BIT, E) \ +void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t desc) \ +{ \ + int i; \ + VReg *Vd = (VReg *)vd; \ + VReg *Vj = (VReg *)vj; \ + VReg *Vk = (VReg *)vk; \ + int oprsz = simd_oprsz(desc); \ + \ + for (i = 0; i < oprsz / (BIT / 8); i++) { \ + Vd->E(i) = DO_VABS(Vj->E(i)) + DO_VABS(Vk->E(i)); \ + } \ +} + +DO_VADDA(vadda_b, 8, B) +DO_VADDA(vadda_h, 16, H) +DO_VADDA(vadda_w, 32, W) +DO_VADDA(vadda_d, 64, D) + +#define VMINMAXI(NAME, BIT, E, DO_OP) \ +void HELPER(NAME)(void *vd, void *vj, uint64_t imm, uint32_t desc) \ +{ \ + int i; \ + VReg *Vd = (VReg *)vd; \ + VReg *Vj = (VReg *)vj; \ + typedef __typeof(Vd->E(0)) TD; \ + int oprsz = simd_oprsz(desc); \ + \ + for (i = 0; i < oprsz / (BIT / 8); i++) { \ + Vd->E(i) = DO_OP(Vj->E(i), (TD)imm); \ + } \ +} + +VMINMAXI(vmini_b, 8, B, DO_MIN) +VMINMAXI(vmini_h, 16, H, DO_MIN) +VMINMAXI(vmini_w, 32, W, DO_MIN) +VMINMAXI(vmini_d, 64, D, DO_MIN) +VMINMAXI(vmaxi_b, 8, B, DO_MAX) +VMINMAXI(vmaxi_h, 16, H, DO_MAX) +VMINMAXI(vmaxi_w, 32, W, DO_MAX) +VMINMAXI(vmaxi_d, 64, D, DO_MAX) +VMINMAXI(vmini_bu, 8, UB, DO_MIN) +VMINMAXI(vmini_hu, 16, UH, DO_MIN) +VMINMAXI(vmini_wu, 32, UW, DO_MIN) +VMINMAXI(vmini_du, 64, UD, DO_MIN) +VMINMAXI(vmaxi_bu, 8, UB, DO_MAX) +VMINMAXI(vmaxi_hu, 16, UH, DO_MAX) +VMINMAXI(vmaxi_wu, 32, UW, DO_MAX) +VMINMAXI(vmaxi_du, 64, UD, DO_MAX) + +#define DO_VMUH(NAME, BIT, E1, E2, DO_OP) \ +void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t desc) \ +{ \ + int i; \ + VReg *Vd = (VReg *)vd; \ + VReg *Vj = (VReg *)vj; \ + VReg *Vk = (VReg *)vk; \ + typedef __typeof(Vd->E1(0)) T; \ + int oprsz = simd_oprsz(desc); \ + \ + for (i = 0; i < oprsz / (BIT / 8); i++) { \ + Vd->E2(i) = ((T)Vj->E2(i)) * ((T)Vk->E2(i)) >> BIT; \ + } \ +} + +void HELPER(vmuh_d)(void *vd, void *vj, void *vk, uint32_t desc) +{ + int i; + uint64_t l, h; + VReg *Vd = (VReg *)vd; + VReg *Vj = (VReg *)vj; + VReg *Vk = (VReg *)vk; + int oprsz = simd_oprsz(desc); + + for (i = 0; i < oprsz / 8; i++) { + muls64(&l, &h, Vj->D(i), Vk->D(i)); + Vd->D(i) = h; + } +} + +DO_VMUH(vmuh_b, 8, H, B, DO_MUH) +DO_VMUH(vmuh_h, 16, W, H, DO_MUH) +DO_VMUH(vmuh_w, 32, D, W, DO_MUH) + +void HELPER(vmuh_du)(void *vd, void *vj, void *vk, uint32_t desc) +{ + int i; + uint64_t l, h; + VReg *Vd = (VReg *)vd; + VReg *Vj = (VReg *)vj; + VReg *Vk = (VReg *)vk; + int oprsz = simd_oprsz(desc); + + for (i = 0; i < oprsz / 8; i++) { + mulu64(&l, &h, Vj->D(i), Vk->D(i)); + Vd->D(i) = h; + } +} + +DO_VMUH(vmuh_bu, 8, UH, UB, DO_MUH) +DO_VMUH(vmuh_hu, 16, UW, UH, DO_MUH) +DO_VMUH(vmuh_wu, 32, UD, UW, DO_MUH) + +DO_EVEN(vmulwev_h_b, 16, H, B, DO_MUL) +DO_EVEN(vmulwev_w_h, 32, W, H, DO_MUL) +DO_EVEN(vmulwev_d_w, 64, D, W, DO_MUL) + +DO_ODD(vmulwod_h_b, 16, H, B, DO_MUL) +DO_ODD(vmulwod_w_h, 32, W, H, DO_MUL) +DO_ODD(vmulwod_d_w, 64, D, W, DO_MUL) + +DO_EVEN(vmulwev_h_bu, 16, UH, UB, DO_MUL) +DO_EVEN(vmulwev_w_hu, 32, UW, UH, DO_MUL) +DO_EVEN(vmulwev_d_wu, 64, UD, UW, DO_MUL) + +DO_ODD(vmulwod_h_bu, 16, UH, UB, DO_MUL) +DO_ODD(vmulwod_w_hu, 32, UW, UH, DO_MUL) +DO_ODD(vmulwod_d_wu, 64, UD, UW, DO_MUL) + +DO_EVEN_U_S(vmulwev_h_bu_b, 16, H, UH, B, UB, DO_MUL) +DO_EVEN_U_S(vmulwev_w_hu_h, 32, W, UW, H, UH, DO_MUL) +DO_EVEN_U_S(vmulwev_d_wu_w, 64, D, UD, W, UW, DO_MUL) + +DO_ODD_U_S(vmulwod_h_bu_b, 16, H, UH, B, UB, DO_MUL) +DO_ODD_U_S(vmulwod_w_hu_h, 32, W, UW, H, UH, DO_MUL) +DO_ODD_U_S(vmulwod_d_wu_w, 64, D, UD, W, UW, DO_MUL) + +#define VMADDSUB(NAME, BIT, E, DO_OP) \ +void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t desc) \ +{ \ + int i; \ + VReg *Vd = (VReg *)vd; \ + VReg *Vj = (VReg *)vj; \ + VReg *Vk = (VReg *)vk; \ + int oprsz = simd_oprsz(desc); \ + \ + for (i = 0; i < oprsz / (BIT / 8); i++) { \ + Vd->E(i) = DO_OP(Vd->E(i), Vj->E(i) ,Vk->E(i)); \ + } \ +} + +VMADDSUB(vmadd_b, 8, B, DO_MADD) +VMADDSUB(vmadd_h, 16, H, DO_MADD) +VMADDSUB(vmadd_w, 32, W, DO_MADD) +VMADDSUB(vmadd_d, 64, D, DO_MADD) +VMADDSUB(vmsub_b, 8, B, DO_MSUB) +VMADDSUB(vmsub_h, 16, H, DO_MSUB) +VMADDSUB(vmsub_w, 32, W, DO_MSUB) +VMADDSUB(vmsub_d, 64, D, DO_MSUB) + +#define VMADDWEV(NAME, BIT, E1, E2, DO_OP) \ +void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t desc) \ +{ \ + int i; \ + VReg *Vd = (VReg *)vd; \ + VReg *Vj = (VReg *)vj; \ + VReg *Vk = (VReg *)vk; \ + typedef __typeof(Vd->E1(0)) TD; \ + int oprsz = simd_oprsz(desc); \ + \ + for (i = 0; i < oprsz / (BIT / 8); i++) { \ + Vd->E1(i) += DO_OP((TD)Vj->E2(2 * i), (TD)Vk->E2(2 * i)); \ + } \ +} + +VMADDWEV(vmaddwev_h_b, 16, H, B, DO_MUL) +VMADDWEV(vmaddwev_w_h, 32, W, H, DO_MUL) +VMADDWEV(vmaddwev_d_w, 64, D, W, DO_MUL) +VMADDWEV(vmaddwev_h_bu, 16, UH, UB, DO_MUL) +VMADDWEV(vmaddwev_w_hu, 32, UW, UH, DO_MUL) +VMADDWEV(vmaddwev_d_wu, 64, UD, UW, DO_MUL) + +#define VMADDWOD(NAME, BIT, E1, E2, DO_OP) \ +void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t desc) \ +{ \ + int i; \ + VReg *Vd = (VReg *)vd; \ + VReg *Vj = (VReg *)vj; \ + VReg *Vk = (VReg *)vk; \ + typedef __typeof(Vd->E1(0)) TD; \ + int oprsz = simd_oprsz(desc); \ + \ + for (i = 0; i < oprsz / (BIT / 8); i++) { \ + Vd->E1(i) += DO_OP((TD)Vj->E2(2 * i + 1), \ + (TD)Vk->E2(2 * i + 1)); \ + } \ +} + +VMADDWOD(vmaddwod_h_b, 16, H, B, DO_MUL) +VMADDWOD(vmaddwod_w_h, 32, W, H, DO_MUL) +VMADDWOD(vmaddwod_d_w, 64, D, W, DO_MUL) +VMADDWOD(vmaddwod_h_bu, 16, UH, UB, DO_MUL) +VMADDWOD(vmaddwod_w_hu, 32, UW, UH, DO_MUL) +VMADDWOD(vmaddwod_d_wu, 64, UD, UW, DO_MUL) + +#define VMADDWEV_U_S(NAME, BIT, ES1, EU1, ES2, EU2, DO_OP) \ +void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t desc) \ +{ \ + int i; \ + VReg *Vd = (VReg *)vd; \ + VReg *Vj = (VReg *)vj; \ + VReg *Vk = (VReg *)vk; \ + typedef __typeof(Vd->ES1(0)) TS1; \ + typedef __typeof(Vd->EU1(0)) TU1; \ + int oprsz = simd_oprsz(desc); \ + \ + for (i = 0; i < oprsz / (BIT / 8); i++) { \ + Vd->ES1(i) += DO_OP((TU1)Vj->EU2(2 * i), \ + (TS1)Vk->ES2(2 * i)); \ + } \ +} + +VMADDWEV_U_S(vmaddwev_h_bu_b, 16, H, UH, B, UB, DO_MUL) +VMADDWEV_U_S(vmaddwev_w_hu_h, 32, W, UW, H, UH, DO_MUL) +VMADDWEV_U_S(vmaddwev_d_wu_w, 64, D, UD, W, UW, DO_MUL) + +#define VMADDWOD_U_S(NAME, BIT, ES1, EU1, ES2, EU2, DO_OP) \ +void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t desc) \ +{ \ + int i; \ + VReg *Vd = (VReg *)vd; \ + VReg *Vj = (VReg *)vj; \ + VReg *Vk = (VReg *)vk; \ + typedef __typeof(Vd->ES1(0)) TS1; \ + typedef __typeof(Vd->EU1(0)) TU1; \ + int oprsz = simd_oprsz(desc); \ + \ + for (i = 0; i < oprsz / (BIT / 8); i++) { \ + Vd->ES1(i) += DO_OP((TU1)Vj->EU2(2 * i + 1), \ + (TS1)Vk->ES2(2 * i + 1)); \ + } \ +} + +VMADDWOD_U_S(vmaddwod_h_bu_b, 16, H, UH, B, UB, DO_MUL) +VMADDWOD_U_S(vmaddwod_w_hu_h, 32, W, UW, H, UH, DO_MUL) +VMADDWOD_U_S(vmaddwod_d_wu_w, 64, D, UD, W, UW, DO_MUL) + +#define VDIV(NAME, BIT, E, DO_OP) \ +void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t desc) \ +{ \ + int i; \ + VReg *Vd = (VReg *)vd; \ + VReg *Vj = (VReg *)vj; \ + VReg *Vk = (VReg *)vk; \ + int oprsz = simd_oprsz(desc); \ + \ + for (i = 0; i < oprsz / (BIT / 8); i++) { \ + Vd->E(i) = DO_OP(Vj->E(i), Vk->E(i)); \ + } \ +} + +VDIV(vdiv_b, 8, B, DO_DIV) +VDIV(vdiv_h, 16, H, DO_DIV) +VDIV(vdiv_w, 32, W, DO_DIV) +VDIV(vdiv_d, 64, D, DO_DIV) +VDIV(vdiv_bu, 8, UB, DO_DIVU) +VDIV(vdiv_hu, 16, UH, DO_DIVU) +VDIV(vdiv_wu, 32, UW, DO_DIVU) +VDIV(vdiv_du, 64, UD, DO_DIVU) +VDIV(vmod_b, 8, B, DO_REM) +VDIV(vmod_h, 16, H, DO_REM) +VDIV(vmod_w, 32, W, DO_REM) +VDIV(vmod_d, 64, D, DO_REM) +VDIV(vmod_bu, 8, UB, DO_REMU) +VDIV(vmod_hu, 16, UH, DO_REMU) +VDIV(vmod_wu, 32, UW, DO_REMU) +VDIV(vmod_du, 64, UD, DO_REMU) + +#define VSAT_S(NAME, BIT, E) \ +void HELPER(NAME)(void *vd, void *vj, uint64_t max, uint32_t desc) \ +{ \ + int i; \ + VReg *Vd = (VReg *)vd; \ + VReg *Vj = (VReg *)vj; \ + typedef __typeof(Vd->E(0)) TD; \ + int oprsz = simd_oprsz(desc); \ + \ + for (i = 0; i < oprsz / (BIT / 8); i++) { \ + Vd->E(i) = Vj->E(i) > (TD)max ? (TD)max : \ + Vj->E(i) < (TD)~max ? (TD)~max: Vj->E(i); \ + } \ +} + +VSAT_S(vsat_b, 8, B) +VSAT_S(vsat_h, 16, H) +VSAT_S(vsat_w, 32, W) +VSAT_S(vsat_d, 64, D) + +#define VSAT_U(NAME, BIT, E) \ +void HELPER(NAME)(void *vd, void *vj, uint64_t max, uint32_t desc) \ +{ \ + int i; \ + VReg *Vd = (VReg *)vd; \ + VReg *Vj = (VReg *)vj; \ + typedef __typeof(Vd->E(0)) TD; \ + int oprsz = simd_oprsz(desc); \ + \ + for (i = 0; i < oprsz / (BIT / 8); i++) { \ + Vd->E(i) = Vj->E(i) > (TD)max ? (TD)max : Vj->E(i); \ + } \ +} + +VSAT_U(vsat_bu, 8, UB) +VSAT_U(vsat_hu, 16, UH) +VSAT_U(vsat_wu, 32, UW) +VSAT_U(vsat_du, 64, UD) + +#define VEXTH(NAME, BIT, E1, E2) \ +void HELPER(NAME)(void *vd, void *vj, uint32_t desc) \ +{ \ + int i, j, ofs; \ + VReg *Vd = (VReg *)vd; \ + VReg *Vj = (VReg *)vj; \ + int oprsz = simd_oprsz(desc); \ + \ + ofs = LSX_LEN / BIT; \ + for (i = 0; i < oprsz / 16; i++) { \ + for (j = 0; j < ofs; j++) { \ + Vd->E1(j + i * ofs) = Vj->E2(j + ofs + ofs * 2 * i); \ + } \ + } \ +} + +void HELPER(vexth_q_d)(void *vd, void *vj, uint32_t desc) +{ + int i; + VReg *Vd = (VReg *)vd; + VReg *Vj = (VReg *)vj; + int oprsz = simd_oprsz(desc); + + for (i = 0; i < oprsz / 16; i++) { + Vd->Q(i) = int128_makes64(Vj->D(2 * i + 1)); + } +} + +void HELPER(vexth_qu_du)(void *vd, void *vj, uint32_t desc) +{ + int i; + VReg *Vd = (VReg *)vd; + VReg *Vj = (VReg *)vj; + int oprsz = simd_oprsz(desc); + + for (i = 0; i < oprsz / 16; i++) { + Vd->Q(i) = int128_make64(Vj->UD(2 * i + 1)); + } +} + +VEXTH(vexth_h_b, 16, H, B) +VEXTH(vexth_w_h, 32, W, H) +VEXTH(vexth_d_w, 64, D, W) +VEXTH(vexth_hu_bu, 16, UH, UB) +VEXTH(vexth_wu_hu, 32, UW, UH) +VEXTH(vexth_du_wu, 64, UD, UW) + +#define VEXT2XV(NAME, BIT, E1, E2) \ +void HELPER(NAME)(void *vd, void *vj, uint32_t desc) \ +{ \ + int i; \ + VReg temp = {}; \ + VReg *Vd = (VReg *)vd; \ + VReg *Vj = (VReg *)vj; \ + int oprsz = simd_oprsz(desc); \ + \ + for (i = 0; i < oprsz / (BIT / 8); i++) { \ + temp.E1(i) = Vj->E2(i); \ + } \ + *Vd = temp; \ +} + +VEXT2XV(vext2xv_h_b, 16, H, B) +VEXT2XV(vext2xv_w_b, 32, W, B) +VEXT2XV(vext2xv_d_b, 64, D, B) +VEXT2XV(vext2xv_w_h, 32, W, H) +VEXT2XV(vext2xv_d_h, 64, D, H) +VEXT2XV(vext2xv_d_w, 64, D, W) +VEXT2XV(vext2xv_hu_bu, 16, UH, UB) +VEXT2XV(vext2xv_wu_bu, 32, UW, UB) +VEXT2XV(vext2xv_du_bu, 64, UD, UB) +VEXT2XV(vext2xv_wu_hu, 32, UW, UH) +VEXT2XV(vext2xv_du_hu, 64, UD, UH) +VEXT2XV(vext2xv_du_wu, 64, UD, UW) + +DO_3OP(vsigncov_b, 8, B, DO_SIGNCOV) +DO_3OP(vsigncov_h, 16, H, DO_SIGNCOV) +DO_3OP(vsigncov_w, 32, W, DO_SIGNCOV) +DO_3OP(vsigncov_d, 64, D, DO_SIGNCOV) + +static uint64_t do_vmskltz_b(int64_t val) +{ + uint64_t m = 0x8080808080808080ULL; + uint64_t c = val & m; + c |= c << 7; + c |= c << 14; + c |= c << 28; + return c >> 56; +} + +void HELPER(vmskltz_b)(void *vd, void *vj, uint32_t desc) +{ + int i; + uint16_t temp = 0; + VReg *Vd = (VReg *)vd; + VReg *Vj = (VReg *)vj; + int oprsz = simd_oprsz(desc); + + for (i = 0; i < oprsz / 16; i++) { + temp = 0; + temp = do_vmskltz_b(Vj->D(2 * i)); + temp |= (do_vmskltz_b(Vj->D(2 * i + 1)) << 8); + Vd->D(2 * i) = temp; + Vd->D(2 * i + 1) = 0; + } +} + +static uint64_t do_vmskltz_h(int64_t val) +{ + uint64_t m = 0x8000800080008000ULL; + uint64_t c = val & m; + c |= c << 15; + c |= c << 30; + return c >> 60; +} + +void HELPER(vmskltz_h)(void *vd, void *vj, uint32_t desc) +{ + int i; + uint16_t temp = 0; + VReg *Vd = (VReg *)vd; + VReg *Vj = (VReg *)vj; + int oprsz = simd_oprsz(desc); + + for (i = 0; i < oprsz / 16; i++) { + temp = 0; + temp = do_vmskltz_h(Vj->D(2 * i)); + temp |= (do_vmskltz_h(Vj->D(2 * i + 1)) << 4); + Vd->D(2 * i) = temp; + Vd->D(2 * i + 1) = 0; + } +} + +static uint64_t do_vmskltz_w(int64_t val) +{ + uint64_t m = 0x8000000080000000ULL; + uint64_t c = val & m; + c |= c << 31; + return c >> 62; +} + +void HELPER(vmskltz_w)(void *vd, void *vj, uint32_t desc) +{ + int i; + uint16_t temp = 0; + VReg *Vd = (VReg *)vd; + VReg *Vj = (VReg *)vj; + int oprsz = simd_oprsz(desc); + + for (i = 0; i < oprsz / 16; i++) { + temp = 0; + temp = do_vmskltz_w(Vj->D(2 * i)); + temp |= (do_vmskltz_w(Vj->D(2 * i + 1)) << 2); + Vd->D(2 * i) = temp; + Vd->D(2 * i + 1) = 0; + } +} + +static uint64_t do_vmskltz_d(int64_t val) +{ + return (uint64_t)val >> 63; +} +void HELPER(vmskltz_d)(void *vd, void *vj, uint32_t desc) +{ + int i; + uint16_t temp = 0; + VReg *Vd = (VReg *)vd; + VReg *Vj = (VReg *)vj; + int oprsz = simd_oprsz(desc); + + for (i = 0; i < oprsz / 16; i++) { + temp = 0; + temp = do_vmskltz_d(Vj->D(2 * i)); + temp |= (do_vmskltz_d(Vj->D(2 * i + 1)) << 1); + Vd->D(2 * i) = temp; + Vd->D(2 * i + 1) = 0; + } +} + +void HELPER(vmskgez_b)(void *vd, void *vj, uint32_t desc) +{ + int i; + uint16_t temp = 0; + VReg *Vd = (VReg *)vd; + VReg *Vj = (VReg *)vj; + int oprsz = simd_oprsz(desc); + + for (i = 0; i < oprsz / 16; i++) { + temp = 0; + temp = do_vmskltz_b(Vj->D(2 * i)); + temp |= (do_vmskltz_b(Vj->D(2 * i + 1)) << 8); + Vd->D(2 * i) = (uint16_t)(~temp); + Vd->D(2 * i + 1) = 0; + } +} + +static uint64_t do_vmskez_b(uint64_t a) +{ + uint64_t m = 0x7f7f7f7f7f7f7f7fULL; + uint64_t c = ~(((a & m) + m) | a | m); + c |= c << 7; + c |= c << 14; + c |= c << 28; + return c >> 56; +} + +void HELPER(vmsknz_b)(void *vd, void *vj, uint32_t desc) +{ + int i; + uint16_t temp = 0; + VReg *Vd = (VReg *)vd; + VReg *Vj = (VReg *)vj; + int oprsz = simd_oprsz(desc); + + for (i = 0; i < oprsz / 16; i++) { + temp = 0; + temp = do_vmskez_b(Vj->D(2 * i)); + temp |= (do_vmskez_b(Vj->D(2 * i + 1)) << 8); + Vd->D(2 * i) = (uint16_t)(~temp); + Vd->D(2 * i + 1) = 0; + } +} + +void HELPER(vnori_b)(void *vd, void *vj, uint64_t imm, uint32_t desc) +{ + int i; + VReg *Vd = (VReg *)vd; + VReg *Vj = (VReg *)vj; + + for (i = 0; i < simd_oprsz(desc); i++) { + Vd->B(i) = ~(Vj->B(i) | (uint8_t)imm); + } +} + +#define VSLLWIL(NAME, BIT, E1, E2) \ +void HELPER(NAME)(void *vd, void *vj, uint64_t imm, uint32_t desc) \ +{ \ + int i, j, ofs; \ + VReg temp = {}; \ + VReg *Vd = (VReg *)vd; \ + VReg *Vj = (VReg *)vj; \ + int oprsz = simd_oprsz(desc); \ + typedef __typeof(temp.E1(0)) TD; \ + \ + ofs = LSX_LEN / BIT; \ + for (i = 0; i < oprsz / 16; i++) { \ + for (j = 0; j < ofs; j++) { \ + temp.E1(j + ofs * i) = (TD)Vj->E2(j + ofs * 2 * i) << (imm % BIT); \ + } \ + } \ + *Vd = temp; \ +} + + +void HELPER(vextl_q_d)(void *vd, void *vj, uint32_t desc) +{ + int i; + VReg *Vd = (VReg *)vd; + VReg *Vj = (VReg *)vj; + int oprsz = simd_oprsz(desc); + + for (i = 0; i < oprsz / 16; i++) { + Vd->Q(i) = int128_makes64(Vj->D(2 * i)); + } +} + +void HELPER(vextl_qu_du)(void *vd, void *vj, uint32_t desc) +{ + int i; + VReg *Vd = (VReg *)vd; + VReg *Vj = (VReg *)vj; + int oprsz = simd_oprsz(desc); + + for (i = 0; i < oprsz / 16; i++) { + Vd->Q(i) = int128_make64(Vj->UD(2 * i)); + } +} + +VSLLWIL(vsllwil_h_b, 16, H, B) +VSLLWIL(vsllwil_w_h, 32, W, H) +VSLLWIL(vsllwil_d_w, 64, D, W) +VSLLWIL(vsllwil_hu_bu, 16, UH, UB) +VSLLWIL(vsllwil_wu_hu, 32, UW, UH) +VSLLWIL(vsllwil_du_wu, 64, UD, UW) + +#define do_vsrlr(E, T) \ +static T do_vsrlr_ ##E(T s1, int sh) \ +{ \ + if (sh == 0) { \ + return s1; \ + } else { \ + return (s1 >> sh) + ((s1 >> (sh - 1)) & 0x1); \ + } \ +} + +do_vsrlr(B, uint8_t) +do_vsrlr(H, uint16_t) +do_vsrlr(W, uint32_t) +do_vsrlr(D, uint64_t) + +#define VSRLR(NAME, BIT, T, E) \ +void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t desc) \ +{ \ + int i; \ + VReg *Vd = (VReg *)vd; \ + VReg *Vj = (VReg *)vj; \ + VReg *Vk = (VReg *)vk; \ + int oprsz = simd_oprsz(desc); \ + \ + for (i = 0; i < oprsz / (BIT / 8); i++) { \ + Vd->E(i) = do_vsrlr_ ## E(Vj->E(i), ((T)Vk->E(i))%BIT); \ + } \ +} + +VSRLR(vsrlr_b, 8, uint8_t, B) +VSRLR(vsrlr_h, 16, uint16_t, H) +VSRLR(vsrlr_w, 32, uint32_t, W) +VSRLR(vsrlr_d, 64, uint64_t, D) + +#define VSRLRI(NAME, BIT, E) \ +void HELPER(NAME)(void *vd, void *vj, uint64_t imm, uint32_t desc) \ +{ \ + int i; \ + VReg *Vd = (VReg *)vd; \ + VReg *Vj = (VReg *)vj; \ + int oprsz = simd_oprsz(desc); \ + \ + for (i = 0; i < oprsz / (BIT / 8); i++) { \ + Vd->E(i) = do_vsrlr_ ## E(Vj->E(i), imm); \ + } \ +} + +VSRLRI(vsrlri_b, 8, B) +VSRLRI(vsrlri_h, 16, H) +VSRLRI(vsrlri_w, 32, W) +VSRLRI(vsrlri_d, 64, D) + +#define do_vsrar(E, T) \ +static T do_vsrar_ ##E(T s1, int sh) \ +{ \ + if (sh == 0) { \ + return s1; \ + } else { \ + return (s1 >> sh) + ((s1 >> (sh - 1)) & 0x1); \ + } \ +} + +do_vsrar(B, int8_t) +do_vsrar(H, int16_t) +do_vsrar(W, int32_t) +do_vsrar(D, int64_t) + +#define VSRAR(NAME, BIT, T, E) \ +void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t desc) \ +{ \ + int i; \ + VReg *Vd = (VReg *)vd; \ + VReg *Vj = (VReg *)vj; \ + VReg *Vk = (VReg *)vk; \ + int oprsz = simd_oprsz(desc); \ + \ + for (i = 0; i < oprsz / (BIT / 8); i++) { \ + Vd->E(i) = do_vsrar_ ## E(Vj->E(i), ((T)Vk->E(i))%BIT); \ + } \ +} + +VSRAR(vsrar_b, 8, uint8_t, B) +VSRAR(vsrar_h, 16, uint16_t, H) +VSRAR(vsrar_w, 32, uint32_t, W) +VSRAR(vsrar_d, 64, uint64_t, D) + +#define VSRARI(NAME, BIT, E) \ +void HELPER(NAME)(void *vd, void *vj, uint64_t imm, uint32_t desc) \ +{ \ + int i; \ + VReg *Vd = (VReg *)vd; \ + VReg *Vj = (VReg *)vj; \ + int oprsz = simd_oprsz(desc); \ + \ + for (i = 0; i < oprsz / (BIT / 8); i++) { \ + Vd->E(i) = do_vsrar_ ## E(Vj->E(i), imm); \ + } \ +} + +VSRARI(vsrari_b, 8, B) +VSRARI(vsrari_h, 16, H) +VSRARI(vsrari_w, 32, W) +VSRARI(vsrari_d, 64, D) + +#define VSRLN(NAME, BIT, E1, E2) \ +void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t desc) \ +{ \ + int i, j, ofs; \ + VReg *Vd = (VReg *)vd; \ + VReg *Vj = (VReg *)vj; \ + VReg *Vk = (VReg *)vk; \ + int oprsz = simd_oprsz(desc); \ + \ + ofs = LSX_LEN / BIT; \ + for (i = 0; i < oprsz / 16; i++) { \ + for (j = 0; j < ofs; j++) { \ + Vd->E1(j + ofs * 2 * i) = R_SHIFT(Vj->E2(j + ofs * i), \ + Vk->E2(j + ofs * i) % BIT); \ + } \ + Vd->D(2 * i + 1) = 0; \ + } \ +} + +VSRLN(vsrln_b_h, 16, B, UH) +VSRLN(vsrln_h_w, 32, H, UW) +VSRLN(vsrln_w_d, 64, W, UD) + +#define VSRAN(NAME, BIT, E1, E2, E3) \ +void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t desc) \ +{ \ + int i, j, ofs; \ + VReg *Vd = (VReg *)vd; \ + VReg *Vj = (VReg *)vj; \ + VReg *Vk = (VReg *)vk; \ + int oprsz = simd_oprsz(desc); \ + \ + ofs = LSX_LEN / BIT; \ + for (i = 0; i < oprsz / 16; i++) { \ + for (j = 0; j < ofs; j++) { \ + Vd->E1(j + ofs * 2 * i) = R_SHIFT(Vj->E2(j + ofs * i), \ + Vk->E3(j + ofs * i) % BIT); \ + } \ + Vd->D(2 * i + 1) = 0; \ + } \ +} + +VSRAN(vsran_b_h, 16, B, H, UH) +VSRAN(vsran_h_w, 32, H, W, UW) +VSRAN(vsran_w_d, 64, W, D, UD) + +#define VSRLNI(NAME, BIT, E1, E2) \ +void HELPER(NAME)(void *vd, void *vj, uint64_t imm, uint32_t desc) \ +{ \ + int i, j, ofs; \ + VReg temp = {}; \ + VReg *Vd = (VReg *)vd; \ + VReg *Vj = (VReg *)vj; \ + int oprsz = simd_oprsz(desc); \ + \ + ofs = LSX_LEN / BIT; \ + for (i = 0; i < oprsz / 16; i++) { \ + for (j = 0; j < ofs; j++) { \ + temp.E1(j + ofs * 2 * i) = R_SHIFT(Vj->E2(j + ofs * i), imm); \ + temp.E1(j + ofs * (2 * i + 1)) = R_SHIFT(Vd->E2(j + ofs * i), \ + imm); \ + } \ + } \ + *Vd = temp; \ +} + +void HELPER(vsrlni_d_q)(void *vd, void *vj, uint64_t imm, uint32_t desc) +{ + int i; + VReg temp = {}; + VReg *Vd = (VReg *)vd; + VReg *Vj = (VReg *)vj; + + for (i = 0; i < 2; i++) { + temp.D(2 * i) = int128_getlo(int128_urshift(Vj->Q(i), imm % 128)); + temp.D(2 * i +1) = int128_getlo(int128_urshift(Vd->Q(i), imm % 128)); + } + *Vd = temp; +} + +VSRLNI(vsrlni_b_h, 16, B, UH) +VSRLNI(vsrlni_h_w, 32, H, UW) +VSRLNI(vsrlni_w_d, 64, W, UD) + +#define VSRANI(NAME, BIT, E1, E2) \ +void HELPER(NAME)(void *vd, void *vj, uint64_t imm, uint32_t desc) \ +{ \ + int i, j, ofs; \ + VReg temp = {}; \ + VReg *Vd = (VReg *)vd; \ + VReg *Vj = (VReg *)vj; \ + int oprsz = simd_oprsz(desc); \ + \ + ofs = LSX_LEN / BIT; \ + for (i = 0; i < oprsz / 16; i++) { \ + for (j = 0; j < ofs; j++) { \ + temp.E1(j + ofs * 2 * i) = R_SHIFT(Vj->E2(j + ofs * i), imm); \ + temp.E1(j + ofs * (2 * i + 1)) = R_SHIFT(Vd->E2(j + ofs * i), \ + imm); \ + } \ + } \ + *Vd = temp; \ +} + +void HELPER(vsrani_d_q)(void *vd, void *vj, uint64_t imm, uint32_t desc) +{ + int i; + VReg temp = {}; + VReg *Vd = (VReg *)vd; + VReg *Vj = (VReg *)vj; + + for (i = 0; i < 2; i++) { + temp.D(2 * i) = int128_getlo(int128_rshift(Vj->Q(i), imm % 128)); + temp.D(2 * i + 1) = int128_getlo(int128_rshift(Vd->Q(i), imm % 128)); + } + *Vd = temp; +} + +VSRANI(vsrani_b_h, 16, B, H) +VSRANI(vsrani_h_w, 32, H, W) +VSRANI(vsrani_w_d, 64, W, D) + +#define VSRLRN(NAME, BIT, E1, E2, E3) \ +void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t desc) \ +{ \ + int i, j, ofs; \ + VReg *Vd = (VReg *)vd; \ + VReg *Vj = (VReg *)vj; \ + VReg *Vk = (VReg *)vk; \ + int oprsz = simd_oprsz(desc); \ + \ + ofs = LSX_LEN / BIT; \ + for (i = 0; i < oprsz / 16; i++) { \ + for (j = 0; j < ofs; j++) { \ + Vd->E1(j + ofs * 2 * i) = do_vsrlr_ ##E2(Vj->E2(j + ofs * i), \ + Vk->E3(j + ofs * i) % BIT); \ + } \ + Vd->D(2 * i + 1) = 0; \ + } \ +} + +VSRLRN(vsrlrn_b_h, 16, B, H, UH) +VSRLRN(vsrlrn_h_w, 32, H, W, UW) +VSRLRN(vsrlrn_w_d, 64, W, D, UD) + +#define VSRARN(NAME, BIT, E1, E2, E3) \ +void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t desc) \ +{ \ + int i, j, ofs; \ + VReg *Vd = (VReg *)vd; \ + VReg *Vj = (VReg *)vj; \ + VReg *Vk = (VReg *)vk; \ + int oprsz = simd_oprsz(desc); \ + \ + ofs = LSX_LEN / BIT; \ + for (i = 0; i < oprsz / 16; i++) { \ + for (j = 0; j < ofs; j++) { \ + Vd->E1(j + ofs * 2 * i) = do_vsrar_ ## E2(Vj->E2(j + ofs * i), \ + Vk->E3(j + ofs * i) % BIT); \ + } \ + Vd->D(2 * i + 1) = 0; \ + } \ +} + +VSRARN(vsrarn_b_h, 16, B, H, UH) +VSRARN(vsrarn_h_w, 32, H, W, UW) +VSRARN(vsrarn_w_d, 64, W, D, UD) + +#define VSRLRNI(NAME, BIT, E1, E2) \ +void HELPER(NAME)(void *vd, void *vj, uint64_t imm, uint32_t desc) \ +{ \ + int i, j, ofs; \ + VReg temp = {}; \ + VReg *Vd = (VReg *)vd; \ + VReg *Vj = (VReg *)vj; \ + int oprsz = simd_oprsz(desc); \ + \ + ofs = LSX_LEN / BIT; \ + for (i = 0; i < oprsz / 16; i++) { \ + for (j = 0; j < ofs; j++) { \ + temp.E1(j + ofs * 2 * i) = do_vsrlr_ ## E2(Vj->E2(j + ofs * i), imm); \ + temp.E1(j + ofs * (2 * i + 1)) = do_vsrlr_ ## E2(Vd->E2(j + ofs * i), \ + imm); \ + } \ + } \ + *Vd = temp; \ +} + +void HELPER(vsrlrni_d_q)(void *vd, void *vj, uint64_t imm, uint32_t desc) +{ + int i; + VReg temp = {}; + VReg *Vd = (VReg *)vd; + VReg *Vj = (VReg *)vj; + Int128 r[4]; + int oprsz = simd_oprsz(desc); + + for (i = 0; i < oprsz / 16; i++) { + if (imm == 0) { + temp.D(2 * i) = int128_getlo(Vj->Q(i)); + temp.D(2 * i + 1) = int128_getlo(Vd->Q(i)); + } else { + r[2 * i] = int128_and(int128_urshift(Vj->Q(i), (imm - 1)), + int128_one()); + r[2 * i + 1] = int128_and(int128_urshift(Vd->Q(i), (imm - 1)), + int128_one()); + temp.D(2 * i) = int128_getlo(int128_add(int128_urshift(Vj->Q(i), + imm), r[2 * i])); + temp.D(2 * i + 1) = int128_getlo(int128_add(int128_urshift(Vd->Q(i), + imm), r[ 2 * i + 1])); + } + } + *Vd = temp; +} + +VSRLRNI(vsrlrni_b_h, 16, B, H) +VSRLRNI(vsrlrni_h_w, 32, H, W) +VSRLRNI(vsrlrni_w_d, 64, W, D) + +#define VSRARNI(NAME, BIT, E1, E2) \ +void HELPER(NAME)(void *vd, void *vj, uint64_t imm, uint32_t desc) \ +{ \ + int i, j, ofs; \ + VReg temp = {}; \ + VReg *Vd = (VReg *)vd; \ + VReg *Vj = (VReg *)vj; \ + int oprsz = simd_oprsz(desc); \ + \ + ofs = LSX_LEN / BIT; \ + for (i = 0; i < oprsz / 16; i++) { \ + for (j = 0; j < ofs; j++) { \ + temp.E1(j + ofs * 2 * i) = do_vsrar_ ## E2(Vj->E2(j + ofs * i), imm); \ + temp.E1(j + ofs * (2 * i + 1)) = do_vsrar_ ## E2(Vd->E2(j + ofs * i), \ + imm); \ + } \ + } \ + *Vd = temp; \ +} + +void HELPER(vsrarni_d_q)(void *vd, void *vj, uint64_t imm, uint32_t desc) +{ + int i; + VReg temp = {}; + VReg *Vd = (VReg *)vd; + VReg *Vj = (VReg *)vj; + Int128 r[4]; + int oprsz = simd_oprsz(desc); + + for (i = 0; i < oprsz / 16; i++) { + if (imm == 0) { + temp.D(2 * i) = int128_getlo(Vj->Q(i)); + temp.D(2 * i + 1) = int128_getlo(Vd->Q(i)); + } else { + r[2 * i] = int128_and(int128_rshift(Vj->Q(i), (imm - 1)), + int128_one()); + r[2 * i + 1] = int128_and(int128_rshift(Vd->Q(i), (imm - 1)), + int128_one()); + temp.D(2 * i) = int128_getlo(int128_add(int128_rshift(Vj->Q(i), + imm), r[2 * i])); + temp.D(2 * i + 1) = int128_getlo(int128_add(int128_rshift(Vd->Q(i), + imm), r[2 * i + 1])); + } + } + *Vd = temp; +} + +VSRARNI(vsrarni_b_h, 16, B, H) +VSRARNI(vsrarni_h_w, 32, H, W) +VSRARNI(vsrarni_w_d, 64, W, D) + +#define SSRLNS(NAME, T1, T2, T3) \ +static T1 do_ssrlns_ ## NAME(T2 e2, int sa, int sh) \ +{ \ + T1 shft_res; \ + if (sa == 0) { \ + shft_res = e2; \ + } else { \ + shft_res = (((T1)e2) >> sa); \ + } \ + T3 mask; \ + mask = (1ull << sh) -1; \ + if (shft_res > mask) { \ + return mask; \ + } else { \ + return shft_res; \ + } \ +} + +SSRLNS(B, uint16_t, int16_t, uint8_t) +SSRLNS(H, uint32_t, int32_t, uint16_t) +SSRLNS(W, uint64_t, int64_t, uint32_t) + +#define VSSRLN(NAME, BIT, E1, E2, E3) \ +void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t desc) \ +{ \ + int i, j, ofs; \ + VReg *Vd = (VReg *)vd; \ + VReg *Vj = (VReg *)vj; \ + VReg *Vk = (VReg *)vk; \ + int oprsz = simd_oprsz(desc); \ + \ + ofs = LSX_LEN / BIT; \ + for (i = 0; i < oprsz / 16; i++) { \ + for (j = 0; j < ofs; j++) { \ + Vd->E1(j + ofs * 2 * i) = do_ssrlns_ ## E1(Vj->E2(j + ofs * i), \ + Vk->E3(j + ofs * i) % BIT, \ + BIT / 2 - 1); \ + } \ + Vd->D(2 * i + 1) = 0; \ + } \ +} + +VSSRLN(vssrln_b_h, 16, B, H, UH) +VSSRLN(vssrln_h_w, 32, H, W, UW) +VSSRLN(vssrln_w_d, 64, W, D, UD) + +#define SSRANS(E, T1, T2) \ +static T1 do_ssrans_ ## E(T1 e2, int sa, int sh) \ +{ \ + T1 shft_res; \ + if (sa == 0) { \ + shft_res = e2; \ + } else { \ + shft_res = e2 >> sa; \ + } \ + T2 mask; \ + mask = (1ll << sh) - 1; \ + if (shft_res > mask) { \ + return mask; \ + } else if (shft_res < -(mask + 1)) { \ + return ~mask; \ + } else { \ + return shft_res; \ + } \ +} + +SSRANS(B, int16_t, int8_t) +SSRANS(H, int32_t, int16_t) +SSRANS(W, int64_t, int32_t) + +#define VSSRAN(NAME, BIT, E1, E2, E3) \ +void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t desc) \ +{ \ + int i, j, ofs; \ + VReg *Vd = (VReg *)vd; \ + VReg *Vj = (VReg *)vj; \ + VReg *Vk = (VReg *)vk; \ + int oprsz = simd_oprsz(desc); \ + \ + ofs = LSX_LEN / BIT; \ + for (i = 0; i < oprsz / 16; i++) { \ + for (j = 0; j < ofs; j++) { \ + Vd->E1(j + ofs * 2 * i) = do_ssrans_ ## E1(Vj->E2(j + ofs * i), \ + Vk->E3(j + ofs * i) % BIT, \ + BIT / 2 - 1); \ + } \ + Vd->D(2 * i + 1) = 0; \ + } \ +} + +VSSRAN(vssran_b_h, 16, B, H, UH) +VSSRAN(vssran_h_w, 32, H, W, UW) +VSSRAN(vssran_w_d, 64, W, D, UD) + +#define SSRLNU(E, T1, T2, T3) \ +static T1 do_ssrlnu_ ## E(T3 e2, int sa, int sh) \ +{ \ + T1 shft_res; \ + if (sa == 0) { \ + shft_res = e2; \ + } else { \ + shft_res = (((T1)e2) >> sa); \ + } \ + T2 mask; \ + mask = (1ull << sh) - 1; \ + if (shft_res > mask) { \ + return mask; \ + } else { \ + return shft_res; \ + } \ +} + +SSRLNU(B, uint16_t, uint8_t, int16_t) +SSRLNU(H, uint32_t, uint16_t, int32_t) +SSRLNU(W, uint64_t, uint32_t, int64_t) + +#define VSSRLNU(NAME, BIT, E1, E2, E3) \ +void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t desc) \ +{ \ + int i, j, ofs; \ + VReg *Vd = (VReg *)vd; \ + VReg *Vj = (VReg *)vj; \ + VReg *Vk = (VReg *)vk; \ + int oprsz = simd_oprsz(desc); \ + \ + ofs = LSX_LEN / BIT; \ + for (i = 0; i < oprsz / 16; i++) { \ + for (j = 0; j < ofs; j++) { \ + Vd->E1(j + ofs * 2 * i) = do_ssrlnu_ ## E1(Vj->E2(j + ofs * i), \ + Vk->E3(j + ofs * i) % BIT, \ + BIT / 2); \ + } \ + Vd->D(2 * i + 1) = 0; \ + } \ +} + +VSSRLNU(vssrln_bu_h, 16, B, H, UH) +VSSRLNU(vssrln_hu_w, 32, H, W, UW) +VSSRLNU(vssrln_wu_d, 64, W, D, UD) + +#define SSRANU(E, T1, T2, T3) \ +static T1 do_ssranu_ ## E(T3 e2, int sa, int sh) \ +{ \ + T1 shft_res; \ + if (sa == 0) { \ + shft_res = e2; \ + } else { \ + shft_res = e2 >> sa; \ + } \ + if (e2 < 0) { \ + shft_res = 0; \ + } \ + T2 mask; \ + mask = (1ull << sh) - 1; \ + if (shft_res > mask) { \ + return mask; \ + } else { \ + return shft_res; \ + } \ +} + +SSRANU(B, uint16_t, uint8_t, int16_t) +SSRANU(H, uint32_t, uint16_t, int32_t) +SSRANU(W, uint64_t, uint32_t, int64_t) + +#define VSSRANU(NAME, BIT, E1, E2, E3) \ +void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t desc) \ +{ \ + int i, j, ofs; \ + VReg *Vd = (VReg *)vd; \ + VReg *Vj = (VReg *)vj; \ + VReg *Vk = (VReg *)vk; \ + int oprsz = simd_oprsz(desc); \ + \ + ofs = LSX_LEN / BIT; \ + for (i = 0; i < oprsz / 16; i++) { \ + for (j = 0; j < ofs; j++) { \ + Vd->E1(j + ofs * 2 * i) = do_ssranu_ ## E1(Vj->E2(j + ofs * i), \ + Vk->E3(j + ofs * i) % BIT, \ + BIT / 2); \ + } \ + Vd->D(2 * i + 1) = 0; \ + } \ +} + +VSSRANU(vssran_bu_h, 16, B, H, UH) +VSSRANU(vssran_hu_w, 32, H, W, UW) +VSSRANU(vssran_wu_d, 64, W, D, UD) + +#define VSSRLNI(NAME, BIT, E1, E2) \ +void HELPER(NAME)(void *vd, void *vj, uint64_t imm, uint32_t desc) \ +{ \ + int i, j, ofs; \ + VReg temp = {}; \ + VReg *Vd = (VReg *)vd; \ + VReg *Vj = (VReg *)vj; \ + int oprsz = simd_oprsz(desc); \ + \ + ofs = LSX_LEN / BIT; \ + for (i = 0; i < oprsz / 16; i++) { \ + for (j = 0; j < ofs; j++) { \ + temp.E1(j + ofs * 2 * i) = do_ssrlns_ ## E1(Vj->E2(j + ofs * i), \ + imm, BIT / 2 - 1); \ + temp.E1(j + ofs * (2 * i + 1)) = do_ssrlns_ ## E1(Vd->E2(j + ofs * i), \ + imm, BIT / 2 - 1); \ + } \ + } \ + *Vd = temp; \ +} + +static void do_vssrlni_q(VReg *Vd, VReg *Vj, + uint64_t imm, int idx, Int128 mask) +{ + Int128 shft_res1, shft_res2; + + if (imm == 0) { + shft_res1 = Vj->Q(idx); + shft_res2 = Vd->Q(idx); + } else { + shft_res1 = int128_urshift(Vj->Q(idx), imm); + shft_res2 = int128_urshift(Vd->Q(idx), imm); + } + + if (int128_ult(mask, shft_res1)) { + Vd->D(idx * 2) = int128_getlo(mask); + }else { + Vd->D(idx * 2) = int128_getlo(shft_res1); + } + + if (int128_ult(mask, shft_res2)) { + Vd->D(idx * 2 + 1) = int128_getlo(mask); + }else { + Vd->D(idx * 2 + 1) = int128_getlo(shft_res2); + } +} + +void HELPER(vssrlni_d_q)(void *vd, void *vj, uint64_t imm, uint32_t desc) +{ + int i; + Int128 mask; + VReg *Vd = (VReg *)vd; + VReg *Vj = (VReg *)vj; + int oprsz = simd_oprsz(desc); + + mask = int128_sub(int128_lshift(int128_one(), 63), int128_one()); + + for (i = 0; i < oprsz / 16; i++) { + do_vssrlni_q(Vd, Vj, imm, i, mask); + } +} + +VSSRLNI(vssrlni_b_h, 16, B, H) +VSSRLNI(vssrlni_h_w, 32, H, W) +VSSRLNI(vssrlni_w_d, 64, W, D) + +#define VSSRANI(NAME, BIT, E1, E2) \ +void HELPER(NAME)(void *vd, void *vj, uint64_t imm, uint32_t desc) \ +{ \ + int i, j, ofs; \ + VReg temp = {}; \ + VReg *Vd = (VReg *)vd; \ + VReg *Vj = (VReg *)vj; \ + int oprsz = simd_oprsz(desc); \ + \ + ofs = LSX_LEN / BIT; \ + for (i = 0; i < oprsz / 16; i++) { \ + for (j = 0; j < ofs; j++) { \ + temp.E1(j + ofs * 2 * i) = do_ssrans_ ## E1(Vj->E2(j + ofs * i), \ + imm, BIT / 2 - 1); \ + temp.E1(j + ofs * (2 * i + 1)) = do_ssrans_ ## E1(Vd->E2(j + ofs * i), \ + imm, BIT / 2 - 1); \ + } \ + } \ + *Vd = temp; \ +} + +static void do_vssrani_d_q(VReg *Vd, VReg *Vj, + uint64_t imm, int idx, Int128 mask, Int128 min) +{ + Int128 shft_res1, shft_res2; + + if (imm == 0) { + shft_res1 = Vj->Q(idx); + shft_res2 = Vd->Q(idx); + } else { + shft_res1 = int128_rshift(Vj->Q(idx), imm); + shft_res2 = int128_rshift(Vd->Q(idx), imm); + } + + if (int128_gt(shft_res1, mask)) { + Vd->D(idx * 2) = int128_getlo(mask); + } else if (int128_lt(shft_res1, int128_neg(min))) { + Vd->D(idx * 2) = int128_getlo(min); + } else { + Vd->D(idx * 2) = int128_getlo(shft_res1); + } + + if (int128_gt(shft_res2, mask)) { + Vd->D(idx * 2 + 1) = int128_getlo(mask); + } else if (int128_lt(shft_res2, int128_neg(min))) { + Vd->D(idx * 2 + 1) = int128_getlo(min); + } else { + Vd->D(idx * 2 + 1) = int128_getlo(shft_res2); + } +} + +void HELPER(vssrani_d_q)(void *vd, void *vj, uint64_t imm, uint32_t desc) +{ + int i; + Int128 mask, min; + VReg *Vd = (VReg *)vd; + VReg *Vj = (VReg *)vj; + int oprsz = simd_oprsz(desc); + + mask = int128_sub(int128_lshift(int128_one(), 63), int128_one()); + min = int128_lshift(int128_one(), 63); + + for (i = 0; i < oprsz / 16; i++) { + do_vssrani_d_q(Vd, Vj, imm, i, mask, min); + } +} + + +VSSRANI(vssrani_b_h, 16, B, H) +VSSRANI(vssrani_h_w, 32, H, W) +VSSRANI(vssrani_w_d, 64, W, D) + +#define VSSRLNUI(NAME, BIT, E1, E2) \ +void HELPER(NAME)(void *vd, void *vj, uint64_t imm, uint32_t desc) \ +{ \ + int i, j, ofs; \ + VReg temp = {}; \ + VReg *Vd = (VReg *)vd; \ + VReg *Vj = (VReg *)vj; \ + int oprsz = simd_oprsz(desc); \ + \ + ofs = LSX_LEN / BIT; \ + for (i = 0; i < oprsz / 16; i++) { \ + for (j = 0; j < ofs; j++) { \ + temp.E1(j + ofs * 2 * i) = do_ssrlnu_ ## E1(Vj->E2(j + ofs * i), \ + imm, BIT / 2); \ + temp.E1(j + ofs * (2 * i + 1)) = do_ssrlnu_ ## E1(Vd->E2(j + ofs * i), \ + imm, BIT / 2); \ + } \ + } \ + *Vd = temp; \ +} + +void HELPER(vssrlni_du_q)(void *vd, void *vj, uint64_t imm, uint32_t desc) +{ + int i; + Int128 mask; + VReg *Vd = (VReg *)vd; + VReg *Vj = (VReg *)vj; + int oprsz = simd_oprsz(desc); + + mask = int128_sub(int128_lshift(int128_one(), 64), int128_one()); + + for (i = 0; i < oprsz / 16; i++) { + do_vssrlni_q(Vd, Vj, imm, i, mask); + } +} + +VSSRLNUI(vssrlni_bu_h, 16, B, H) +VSSRLNUI(vssrlni_hu_w, 32, H, W) +VSSRLNUI(vssrlni_wu_d, 64, W, D) + +#define VSSRANUI(NAME, BIT, E1, E2) \ +void HELPER(NAME)(void *vd, void *vj, uint64_t imm, uint32_t desc) \ +{ \ + int i, j, ofs; \ + VReg temp = {}; \ + VReg *Vd = (VReg *)vd; \ + VReg *Vj = (VReg *)vj; \ + int oprsz = simd_oprsz(desc); \ + \ + ofs = LSX_LEN / BIT; \ + for (i = 0; i < oprsz / 16; i++) { \ + for (j = 0; j < ofs; j++) { \ + temp.E1(j + ofs * 2 * i) = do_ssranu_ ## E1(Vj->E2(j + ofs * i), \ + imm, BIT / 2); \ + temp.E1(j + ofs * (2 * i + 1)) = do_ssranu_ ## E1(Vd->E2(j + ofs * i), \ + imm, BIT / 2); \ + } \ + } \ + *Vd = temp; \ +} + +static void do_vssrani_du_q(VReg *Vd, VReg *Vj, + uint64_t imm, int idx, Int128 mask) +{ + Int128 shft_res1, shft_res2; + + if (imm == 0) { + shft_res1 = Vj->Q(idx); + shft_res2 = Vd->Q(idx); + } else { + shft_res1 = int128_rshift(Vj->Q(idx), imm); + shft_res2 = int128_rshift(Vd->Q(idx), imm); + } + + if (int128_lt(Vj->Q(idx), int128_zero())) { + shft_res1 = int128_zero(); + } + + if (int128_lt(Vd->Q(idx), int128_zero())) { + shft_res2 = int128_zero(); + } + if (int128_ult(mask, shft_res1)) { + Vd->D(idx * 2) = int128_getlo(mask); + }else { + Vd->D(idx * 2) = int128_getlo(shft_res1); + } + + if (int128_ult(mask, shft_res2)) { + Vd->D(idx * 2 + 1) = int128_getlo(mask); + }else { + Vd->D(idx * 2 + 1) = int128_getlo(shft_res2); + } + +} + +void HELPER(vssrani_du_q)(void *vd, void *vj, uint64_t imm, uint32_t desc) +{ + int i; + Int128 mask; + VReg *Vd = (VReg *)vd; + VReg *Vj = (VReg *)vj; + int oprsz = simd_oprsz(desc); + + mask = int128_sub(int128_lshift(int128_one(), 64), int128_one()); + + for (i = 0; i < oprsz / 16; i++) { + do_vssrani_du_q(Vd, Vj, imm, i, mask); + } +} + +VSSRANUI(vssrani_bu_h, 16, B, H) +VSSRANUI(vssrani_hu_w, 32, H, W) +VSSRANUI(vssrani_wu_d, 64, W, D) + +#define SSRLRNS(E1, E2, T1, T2, T3) \ +static T1 do_ssrlrns_ ## E1(T2 e2, int sa, int sh) \ +{ \ + T1 shft_res; \ + \ + shft_res = do_vsrlr_ ## E2(e2, sa); \ + T1 mask; \ + mask = (1ull << sh) - 1; \ + if (shft_res > mask) { \ + return mask; \ + } else { \ + return shft_res; \ + } \ +} + +SSRLRNS(B, H, uint16_t, int16_t, uint8_t) +SSRLRNS(H, W, uint32_t, int32_t, uint16_t) +SSRLRNS(W, D, uint64_t, int64_t, uint32_t) + +#define VSSRLRN(NAME, BIT, E1, E2, E3) \ +void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t desc) \ +{ \ + int i, j, ofs; \ + VReg *Vd = (VReg *)vd; \ + VReg *Vj = (VReg *)vj; \ + VReg *Vk = (VReg *)vk; \ + int oprsz = simd_oprsz(desc); \ + \ + ofs = LSX_LEN / BIT; \ + for (i = 0; i < oprsz / 16; i++) { \ + for (j = 0; j < ofs; j++) { \ + Vd->E1(j + ofs * 2 * i) = do_ssrlrns_ ## E1(Vj->E2(j + ofs * i), \ + Vk->E3(j + ofs * i) % BIT, \ + BIT / 2 - 1); \ + } \ + Vd->D(2 * i + 1) = 0; \ + } \ +} + +VSSRLRN(vssrlrn_b_h, 16, B, H, UH) +VSSRLRN(vssrlrn_h_w, 32, H, W, UW) +VSSRLRN(vssrlrn_w_d, 64, W, D, UD) + +#define SSRARNS(E1, E2, T1, T2) \ +static T1 do_ssrarns_ ## E1(T1 e2, int sa, int sh) \ +{ \ + T1 shft_res; \ + \ + shft_res = do_vsrar_ ## E2(e2, sa); \ + T2 mask; \ + mask = (1ll << sh) - 1; \ + if (shft_res > mask) { \ + return mask; \ + } else if (shft_res < -(mask +1)) { \ + return ~mask; \ + } else { \ + return shft_res; \ + } \ +} + +SSRARNS(B, H, int16_t, int8_t) +SSRARNS(H, W, int32_t, int16_t) +SSRARNS(W, D, int64_t, int32_t) + +#define VSSRARN(NAME, BIT, E1, E2, E3) \ +void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t desc) \ +{ \ + int i, j, ofs; \ + VReg *Vd = (VReg *)vd; \ + VReg *Vj = (VReg *)vj; \ + VReg *Vk = (VReg *)vk; \ + int oprsz = simd_oprsz(desc); \ + \ + ofs = LSX_LEN / BIT; \ + for (i = 0; i < oprsz / 16; i++) { \ + for (j = 0; j < ofs; j++) { \ + Vd->E1(j + ofs * 2 * i) = do_ssrarns_ ## E1(Vj->E2(j + ofs * i), \ + Vk->E3(j + ofs * i) % BIT, \ + BIT/ 2 - 1); \ + } \ + Vd->D(2 * i + 1) = 0; \ + } \ +} + +VSSRARN(vssrarn_b_h, 16, B, H, UH) +VSSRARN(vssrarn_h_w, 32, H, W, UW) +VSSRARN(vssrarn_w_d, 64, W, D, UD) + +#define SSRLRNU(E1, E2, T1, T2, T3) \ +static T1 do_ssrlrnu_ ## E1(T3 e2, int sa, int sh) \ +{ \ + T1 shft_res; \ + \ + shft_res = do_vsrlr_ ## E2(e2, sa); \ + \ + T2 mask; \ + mask = (1ull << sh) - 1; \ + if (shft_res > mask) { \ + return mask; \ + } else { \ + return shft_res; \ + } \ +} + +SSRLRNU(B, H, uint16_t, uint8_t, int16_t) +SSRLRNU(H, W, uint32_t, uint16_t, int32_t) +SSRLRNU(W, D, uint64_t, uint32_t, int64_t) + +#define VSSRLRNU(NAME, BIT, E1, E2, E3) \ +void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t desc) \ +{ \ + int i, j, ofs; \ + VReg *Vd = (VReg *)vd; \ + VReg *Vj = (VReg *)vj; \ + VReg *Vk = (VReg *)vk; \ + int oprsz = simd_oprsz(desc); \ + \ + ofs = LSX_LEN / BIT; \ + for (i = 0; i < oprsz / 16; i++) { \ + for (j = 0; j < ofs; j++) { \ + Vd->E1(j + ofs * 2 * i) = do_ssrlrnu_ ## E1(Vj->E2(j + ofs * i), \ + Vk->E3(j + ofs * i) % BIT, \ + BIT / 2); \ + } \ + Vd->D(2 * i + 1) = 0; \ + } \ +} + +VSSRLRNU(vssrlrn_bu_h, 16, B, H, UH) +VSSRLRNU(vssrlrn_hu_w, 32, H, W, UW) +VSSRLRNU(vssrlrn_wu_d, 64, W, D, UD) + +#define SSRARNU(E1, E2, T1, T2, T3) \ +static T1 do_ssrarnu_ ## E1(T3 e2, int sa, int sh) \ +{ \ + T1 shft_res; \ + \ + if (e2 < 0) { \ + shft_res = 0; \ + } else { \ + shft_res = do_vsrar_ ## E2(e2, sa); \ + } \ + T2 mask; \ + mask = (1ull << sh) - 1; \ + if (shft_res > mask) { \ + return mask; \ + } else { \ + return shft_res; \ + } \ +} + +SSRARNU(B, H, uint16_t, uint8_t, int16_t) +SSRARNU(H, W, uint32_t, uint16_t, int32_t) +SSRARNU(W, D, uint64_t, uint32_t, int64_t) + +#define VSSRARNU(NAME, BIT, E1, E2, E3) \ +void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t desc) \ +{ \ + int i, j, ofs; \ + VReg *Vd = (VReg *)vd; \ + VReg *Vj = (VReg *)vj; \ + VReg *Vk = (VReg *)vk; \ + int oprsz = simd_oprsz(desc); \ + \ + ofs = LSX_LEN / BIT; \ + for (i = 0; i < oprsz / 16; i++) { \ + for (j = 0; j < ofs; j++) { \ + Vd->E1(j + ofs * 2 * i) = do_ssrarnu_ ## E1(Vj->E2(j + ofs * i), \ + Vk->E3(j + ofs * i) % BIT, \ + BIT / 2); \ + } \ + Vd->D(2 * i + 1) = 0; \ + } \ +} + +VSSRARNU(vssrarn_bu_h, 16, B, H, UH) +VSSRARNU(vssrarn_hu_w, 32, H, W, UW) +VSSRARNU(vssrarn_wu_d, 64, W, D, UD) + +#define VSSRLRNI(NAME, BIT, E1, E2) \ +void HELPER(NAME)(void *vd, void *vj, uint64_t imm, uint32_t desc) \ +{ \ + int i, j, ofs; \ + VReg temp = {}; \ + VReg *Vd = (VReg *)vd; \ + VReg *Vj = (VReg *)vj; \ + int oprsz = simd_oprsz(desc); \ + \ + ofs = LSX_LEN / BIT; \ + for (i = 0; i < oprsz / 16; i++) { \ + for (j = 0; j < ofs; j++) { \ + temp.E1(j + ofs * 2 * i) = do_ssrlrns_ ## E1(Vj->E2(j + ofs * i), \ + imm, BIT / 2 - 1); \ + temp.E1(j + ofs * (2 * i + 1)) = do_ssrlrns_ ## E1(Vd->E2(j + ofs * i), \ + imm, BIT / 2 - 1); \ + } \ + } \ + *Vd = temp; \ +} + +static void do_vssrlrni_q(VReg *Vd, VReg * Vj, + uint64_t imm, int idx, Int128 mask) +{ + Int128 shft_res1, shft_res2, r1, r2; + if (imm == 0) { + shft_res1 = Vj->Q(idx); + shft_res2 = Vd->Q(idx); + } else { + r1 = int128_and(int128_urshift(Vj->Q(idx), (imm - 1)), int128_one()); + r2 = int128_and(int128_urshift(Vd->Q(idx), (imm - 1)), int128_one()); + shft_res1 = (int128_add(int128_urshift(Vj->Q(idx), imm), r1)); + shft_res2 = (int128_add(int128_urshift(Vd->Q(idx), imm), r2)); + } + + if (int128_ult(mask, shft_res1)) { + Vd->D(idx * 2) = int128_getlo(mask); + }else { + Vd->D(idx * 2) = int128_getlo(shft_res1); + } + + if (int128_ult(mask, shft_res2)) { + Vd->D(idx * 2 + 1) = int128_getlo(mask); + }else { + Vd->D(idx * 2 + 1) = int128_getlo(shft_res2); + } +} + +void HELPER(vssrlrni_d_q)(void *vd, void *vj, uint64_t imm, uint32_t desc) +{ + int i; + Int128 mask; + VReg *Vd = (VReg *)vd; + VReg *Vj = (VReg *)vj; + int oprsz = simd_oprsz(desc); + + mask = int128_sub(int128_lshift(int128_one(), 63), int128_one()); + + for (i = 0; i < oprsz / 16; i++) { + do_vssrlrni_q(Vd, Vj, imm, i, mask); + } +} + +VSSRLRNI(vssrlrni_b_h, 16, B, H) +VSSRLRNI(vssrlrni_h_w, 32, H, W) +VSSRLRNI(vssrlrni_w_d, 64, W, D) + +#define VSSRARNI(NAME, BIT, E1, E2) \ +void HELPER(NAME)(void *vd, void *vj, uint64_t imm, uint32_t desc) \ +{ \ + int i, j, ofs; \ + VReg temp = {}; \ + VReg *Vd = (VReg *)vd; \ + VReg *Vj = (VReg *)vj; \ + int oprsz = simd_oprsz(desc); \ + \ + ofs = LSX_LEN / BIT; \ + for (i = 0; i < oprsz / 16; i++) { \ + for (j = 0; j < ofs; j++) { \ + temp.E1(j + ofs * 2 * i) = do_ssrarns_ ## E1(Vj->E2(j + ofs * i), \ + imm, BIT / 2 - 1); \ + temp.E1(j + ofs * (2 * i + 1)) = do_ssrarns_ ## E1(Vd->E2(j + ofs * i), \ + imm, BIT / 2 - 1); \ + } \ + } \ + *Vd = temp; \ +} + +static void do_vssrarni_d_q(VReg *Vd, VReg *Vj, + uint64_t imm, int idx, Int128 mask1, Int128 mask2) +{ + Int128 shft_res1, shft_res2, r1, r2; + + if (imm == 0) { + shft_res1 = Vj->Q(idx); + shft_res2 = Vd->Q(idx); + } else { + r1 = int128_and(int128_rshift(Vj->Q(idx), (imm - 1)), int128_one()); + r2 = int128_and(int128_rshift(Vd->Q(idx), (imm - 1)), int128_one()); + shft_res1 = int128_add(int128_rshift(Vj->Q(idx), imm), r1); + shft_res2 = int128_add(int128_rshift(Vd->Q(idx), imm), r2); + } + if (int128_gt(shft_res1, mask1)) { + Vd->D(idx * 2) = int128_getlo(mask1); + } else if (int128_lt(shft_res1, int128_neg(mask2))) { + Vd->D(idx * 2) = int128_getlo(mask2); + } else { + Vd->D(idx * 2) = int128_getlo(shft_res1); + } + + if (int128_gt(shft_res2, mask1)) { + Vd->D(idx * 2 + 1) = int128_getlo(mask1); + } else if (int128_lt(shft_res2, int128_neg(mask2))) { + Vd->D(idx * 2 + 1) = int128_getlo(mask2); + } else { + Vd->D(idx * 2 + 1) = int128_getlo(shft_res2); + } +} + +void HELPER(vssrarni_d_q)(void *vd, void *vj, uint64_t imm, uint32_t desc) +{ + int i; + Int128 mask1, mask2; + VReg *Vd = (VReg *)vd; + VReg *Vj = (VReg *)vj; + int oprsz = simd_oprsz(desc); + + mask1 = int128_sub(int128_lshift(int128_one(), 63), int128_one()); + mask2 = int128_lshift(int128_one(), 63); + + for (i = 0; i < oprsz / 16; i++) { + do_vssrarni_d_q(Vd, Vj, imm, i, mask1, mask2); + } +} + +VSSRARNI(vssrarni_b_h, 16, B, H) +VSSRARNI(vssrarni_h_w, 32, H, W) +VSSRARNI(vssrarni_w_d, 64, W, D) + +#define VSSRLRNUI(NAME, BIT, E1, E2) \ +void HELPER(NAME)(void *vd, void *vj, uint64_t imm, uint32_t desc) \ +{ \ + int i, j, ofs; \ + VReg temp = {}; \ + VReg *Vd = (VReg *)vd; \ + VReg *Vj = (VReg *)vj; \ + int oprsz = simd_oprsz(desc); \ + \ + ofs = LSX_LEN / BIT; \ + for (i = 0; i < oprsz / 16; i++) { \ + for (j = 0; j < ofs; j++) { \ + temp.E1(j + ofs * 2 * i) = do_ssrlrnu_ ## E1(Vj->E2(j + ofs * i), \ + imm, BIT / 2); \ + temp.E1(j + ofs * (2 * i + 1)) = do_ssrlrnu_ ## E1(Vd->E2(j + ofs * i), \ + imm, BIT / 2); \ + } \ + } \ + *Vd = temp; \ +} + +void HELPER(vssrlrni_du_q)(void *vd, void *vj, uint64_t imm, uint32_t desc) +{ + int i; + Int128 mask; + VReg *Vd = (VReg *)vd; + VReg *Vj = (VReg *)vj; + int oprsz = simd_oprsz(desc); + + mask = int128_sub(int128_lshift(int128_one(), 64), int128_one()); + + for (i = 0; i < oprsz / 16; i++) { + do_vssrlrni_q(Vd, Vj, imm, i, mask); + } +} + +VSSRLRNUI(vssrlrni_bu_h, 16, B, H) +VSSRLRNUI(vssrlrni_hu_w, 32, H, W) +VSSRLRNUI(vssrlrni_wu_d, 64, W, D) + +#define VSSRARNUI(NAME, BIT, E1, E2) \ +void HELPER(NAME)(void *vd, void *vj, uint64_t imm, uint32_t desc) \ +{ \ + int i, j, ofs; \ + VReg temp = {}; \ + VReg *Vd = (VReg *)vd; \ + VReg *Vj = (VReg *)vj; \ + int oprsz = simd_oprsz(desc); \ + \ + ofs = LSX_LEN / BIT; \ + for (i = 0; i < oprsz / 16; i++) { \ + for (j = 0; j < ofs; j++) { \ + temp.E1(j + ofs * 2 * i) = do_ssrarnu_ ## E1(Vj->E2(j + ofs * i), \ + imm, BIT / 2); \ + temp.E1(j + ofs * (2 * i + 1)) = do_ssrarnu_ ## E1(Vd->E2(j + ofs * i), \ + imm, BIT / 2); \ + } \ + } \ + *Vd = temp; \ +} + +static void do_vssrarni_du_q(VReg *Vd, VReg *Vj, + uint64_t imm, int idx, Int128 mask1, Int128 mask2) +{ + Int128 shft_res1, shft_res2, r1, r2; + + if (imm == 0) { + shft_res1 = Vj->Q(idx); + shft_res2 = Vd->Q(idx); + } else { + r1 = int128_and(int128_rshift(Vj->Q(idx), (imm - 1)), int128_one()); + r2 = int128_and(int128_rshift(Vd->Q(idx), (imm - 1)), int128_one()); + shft_res1 = int128_add(int128_rshift(Vj->Q(idx), imm), r1); + shft_res2 = int128_add(int128_rshift(Vd->Q(idx), imm), r2); + } + + if (int128_lt(Vj->Q(idx), int128_zero())) { + shft_res1 = int128_zero(); + } + if (int128_lt(Vd->Q(idx), int128_zero())) { + shft_res2 = int128_zero(); + } + + if (int128_gt(shft_res1, mask1)) { + Vd->D(idx * 2) = int128_getlo(mask1); + } else if (int128_lt(shft_res1, int128_neg(mask2))) { + Vd->D(idx * 2) = int128_getlo(mask2); + } else { + Vd->D(idx * 2) = int128_getlo(shft_res1); + } + + if (int128_gt(shft_res2, mask1)) { + Vd->D(idx * 2 + 1) = int128_getlo(mask1); + } else if (int128_lt(shft_res2, int128_neg(mask2))) { + Vd->D(idx * 2 + 1) = int128_getlo(mask2); + } else { + Vd->D(idx * 2 + 1) = int128_getlo(shft_res2); + } +} + +void HELPER(vssrarni_du_q)(void *vd, void *vj, uint64_t imm, uint32_t desc) +{ + int i; + Int128 mask1, mask2; + VReg *Vd = (VReg *)vd; + VReg *Vj = (VReg *)vj; + int oprsz = simd_oprsz(desc); + + mask1 = int128_sub(int128_lshift(int128_one(), 64), int128_one()); + mask2 = int128_lshift(int128_one(), 64); + + for (i = 0; i < oprsz / 16; i++) { + do_vssrarni_du_q(Vd, Vj, imm, i, mask1, mask2); + } +} + +VSSRARNUI(vssrarni_bu_h, 16, B, H) +VSSRARNUI(vssrarni_hu_w, 32, H, W) +VSSRARNUI(vssrarni_wu_d, 64, W, D) + +#define DO_2OP(NAME, BIT, E, DO_OP) \ +void HELPER(NAME)(void *vd, void *vj, uint32_t desc) \ +{ \ + int i; \ + VReg *Vd = (VReg *)vd; \ + VReg *Vj = (VReg *)vj; \ + int oprsz = simd_oprsz(desc); \ + \ + for (i = 0; i < oprsz / (BIT / 8); i++) \ + { \ + Vd->E(i) = DO_OP(Vj->E(i)); \ + } \ +} + +DO_2OP(vclo_b, 8, UB, DO_CLO_B) +DO_2OP(vclo_h, 16, UH, DO_CLO_H) +DO_2OP(vclo_w, 32, UW, DO_CLO_W) +DO_2OP(vclo_d, 64, UD, DO_CLO_D) +DO_2OP(vclz_b, 8, UB, DO_CLZ_B) +DO_2OP(vclz_h, 16, UH, DO_CLZ_H) +DO_2OP(vclz_w, 32, UW, DO_CLZ_W) +DO_2OP(vclz_d, 64, UD, DO_CLZ_D) + +#define VPCNT(NAME, BIT, E, FN) \ +void HELPER(NAME)(void *vd, void *vj, uint32_t desc) \ +{ \ + int i; \ + VReg *Vd = (VReg *)vd; \ + VReg *Vj = (VReg *)vj; \ + int oprsz = simd_oprsz(desc); \ + \ + for (i = 0; i < oprsz / (BIT / 8); i++) \ + { \ + Vd->E(i) = FN(Vj->E(i)); \ + } \ +} + +VPCNT(vpcnt_b, 8, UB, ctpop8) +VPCNT(vpcnt_h, 16, UH, ctpop16) +VPCNT(vpcnt_w, 32, UW, ctpop32) +VPCNT(vpcnt_d, 64, UD, ctpop64) + +#define DO_BIT(NAME, BIT, E, DO_OP) \ +void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t desc) \ +{ \ + int i; \ + VReg *Vd = (VReg *)vd; \ + VReg *Vj = (VReg *)vj; \ + VReg *Vk = (VReg *)vk; \ + int oprsz = simd_oprsz(desc); \ + \ + for (i = 0; i < oprsz / (BIT / 8); i++) { \ + Vd->E(i) = DO_OP(Vj->E(i), Vk->E(i)%BIT); \ + } \ +} + +DO_BIT(vbitclr_b, 8, UB, DO_BITCLR) +DO_BIT(vbitclr_h, 16, UH, DO_BITCLR) +DO_BIT(vbitclr_w, 32, UW, DO_BITCLR) +DO_BIT(vbitclr_d, 64, UD, DO_BITCLR) +DO_BIT(vbitset_b, 8, UB, DO_BITSET) +DO_BIT(vbitset_h, 16, UH, DO_BITSET) +DO_BIT(vbitset_w, 32, UW, DO_BITSET) +DO_BIT(vbitset_d, 64, UD, DO_BITSET) +DO_BIT(vbitrev_b, 8, UB, DO_BITREV) +DO_BIT(vbitrev_h, 16, UH, DO_BITREV) +DO_BIT(vbitrev_w, 32, UW, DO_BITREV) +DO_BIT(vbitrev_d, 64, UD, DO_BITREV) + +#define DO_BITI(NAME, BIT, E, DO_OP) \ +void HELPER(NAME)(void *vd, void *vj, uint64_t imm, uint32_t desc) \ +{ \ + int i; \ + VReg *Vd = (VReg *)vd; \ + VReg *Vj = (VReg *)vj; \ + int oprsz = simd_oprsz(desc); \ + \ + for (i = 0; i < oprsz / (BIT / 8); i++) { \ + Vd->E(i) = DO_OP(Vj->E(i), imm); \ + } \ +} + +DO_BITI(vbitclri_b, 8, UB, DO_BITCLR) +DO_BITI(vbitclri_h, 16, UH, DO_BITCLR) +DO_BITI(vbitclri_w, 32, UW, DO_BITCLR) +DO_BITI(vbitclri_d, 64, UD, DO_BITCLR) +DO_BITI(vbitseti_b, 8, UB, DO_BITSET) +DO_BITI(vbitseti_h, 16, UH, DO_BITSET) +DO_BITI(vbitseti_w, 32, UW, DO_BITSET) +DO_BITI(vbitseti_d, 64, UD, DO_BITSET) +DO_BITI(vbitrevi_b, 8, UB, DO_BITREV) +DO_BITI(vbitrevi_h, 16, UH, DO_BITREV) +DO_BITI(vbitrevi_w, 32, UW, DO_BITREV) +DO_BITI(vbitrevi_d, 64, UD, DO_BITREV) + +#define VFRSTP(NAME, BIT, MASK, E) \ +void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t desc) \ +{ \ + int i, j, m, ofs; \ + VReg *Vd = (VReg *)vd; \ + VReg *Vj = (VReg *)vj; \ + VReg *Vk = (VReg *)vk; \ + int oprsz = simd_oprsz(desc); \ + \ + ofs = LSX_LEN / BIT; \ + for (i = 0; i < oprsz / 16; i++) { \ + m = Vk->E(i * ofs) & MASK; \ + for (j = 0; j < ofs; j++) { \ + if (Vj->E(j + ofs * i) < 0) { \ + break; \ + } \ + } \ + Vd->E(m + i * ofs) = j; \ + } \ +} + +VFRSTP(vfrstp_b, 8, 0xf, B) +VFRSTP(vfrstp_h, 16, 0x7, H) + +#define VFRSTPI(NAME, BIT, E) \ +void HELPER(NAME)(void *vd, void *vj, uint64_t imm, uint32_t desc) \ +{ \ + int i, j, m, ofs; \ + VReg *Vd = (VReg *)vd; \ + VReg *Vj = (VReg *)vj; \ + int oprsz = simd_oprsz(desc); \ + \ + ofs = LSX_LEN / BIT; \ + m = imm % ofs; \ + for (i = 0; i < oprsz / 16; i++) { \ + for (j = 0; j < ofs; j++) { \ + if (Vj->E(j + ofs * i) < 0) { \ + break; \ + } \ + } \ + Vd->E(m + i * ofs) = j; \ + } \ +} + +VFRSTPI(vfrstpi_b, 8, B) +VFRSTPI(vfrstpi_h, 16, H) + +static void vec_update_fcsr0_mask(CPULoongArchState *env, + uintptr_t pc, int mask) +{ + int flags = get_float_exception_flags(&env->fp_status); + + set_float_exception_flags(0, &env->fp_status); + + flags &= ~mask; + + if (flags) { + flags = ieee_ex_to_loongarch(flags); + UPDATE_FP_CAUSE(env->fcsr0, flags); + } + + if (GET_FP_ENABLES(env->fcsr0) & flags) { + do_raise_exception(env, EXCCODE_FPE, pc); + } else { + UPDATE_FP_FLAGS(env->fcsr0, flags); + } +} + +static void vec_update_fcsr0(CPULoongArchState *env, uintptr_t pc) +{ + vec_update_fcsr0_mask(env, pc, 0); +} + +static inline void vec_clear_cause(CPULoongArchState *env) +{ + SET_FP_CAUSE(env->fcsr0, 0); +} + +#define DO_3OP_F(NAME, BIT, E, FN) \ +void HELPER(NAME)(void *vd, void *vj, void *vk, \ + CPULoongArchState *env, uint32_t desc) \ +{ \ + int i; \ + VReg *Vd = (VReg *)vd; \ + VReg *Vj = (VReg *)vj; \ + VReg *Vk = (VReg *)vk; \ + int oprsz = simd_oprsz(desc); \ + \ + vec_clear_cause(env); \ + for (i = 0; i < oprsz / (BIT / 8); i++) { \ + Vd->E(i) = FN(Vj->E(i), Vk->E(i), &env->fp_status); \ + vec_update_fcsr0(env, GETPC()); \ + } \ +} + +DO_3OP_F(vfadd_s, 32, UW, float32_add) +DO_3OP_F(vfadd_d, 64, UD, float64_add) +DO_3OP_F(vfsub_s, 32, UW, float32_sub) +DO_3OP_F(vfsub_d, 64, UD, float64_sub) +DO_3OP_F(vfmul_s, 32, UW, float32_mul) +DO_3OP_F(vfmul_d, 64, UD, float64_mul) +DO_3OP_F(vfdiv_s, 32, UW, float32_div) +DO_3OP_F(vfdiv_d, 64, UD, float64_div) +DO_3OP_F(vfmax_s, 32, UW, float32_maxnum) +DO_3OP_F(vfmax_d, 64, UD, float64_maxnum) +DO_3OP_F(vfmin_s, 32, UW, float32_minnum) +DO_3OP_F(vfmin_d, 64, UD, float64_minnum) +DO_3OP_F(vfmaxa_s, 32, UW, float32_maxnummag) +DO_3OP_F(vfmaxa_d, 64, UD, float64_maxnummag) +DO_3OP_F(vfmina_s, 32, UW, float32_minnummag) +DO_3OP_F(vfmina_d, 64, UD, float64_minnummag) + +#define DO_4OP_F(NAME, BIT, E, FN, flags) \ +void HELPER(NAME)(void *vd, void *vj, void *vk, void *va, \ + CPULoongArchState *env, uint32_t desc) \ +{ \ + int i; \ + VReg *Vd = (VReg *)vd; \ + VReg *Vj = (VReg *)vj; \ + VReg *Vk = (VReg *)vk; \ + VReg *Va = (VReg *)va; \ + int oprsz = simd_oprsz(desc); \ + \ + vec_clear_cause(env); \ + for (i = 0; i < oprsz / (BIT / 8); i++) { \ + Vd->E(i) = FN(Vj->E(i), Vk->E(i), Va->E(i), flags, &env->fp_status); \ + vec_update_fcsr0(env, GETPC()); \ + } \ +} + +DO_4OP_F(vfmadd_s, 32, UW, float32_muladd, 0) +DO_4OP_F(vfmadd_d, 64, UD, float64_muladd, 0) +DO_4OP_F(vfmsub_s, 32, UW, float32_muladd, float_muladd_negate_c) +DO_4OP_F(vfmsub_d, 64, UD, float64_muladd, float_muladd_negate_c) +DO_4OP_F(vfnmadd_s, 32, UW, float32_muladd, float_muladd_negate_result) +DO_4OP_F(vfnmadd_d, 64, UD, float64_muladd, float_muladd_negate_result) +DO_4OP_F(vfnmsub_s, 32, UW, float32_muladd, + float_muladd_negate_c | float_muladd_negate_result) +DO_4OP_F(vfnmsub_d, 64, UD, float64_muladd, + float_muladd_negate_c | float_muladd_negate_result) + +#define DO_2OP_F(NAME, BIT, E, FN) \ +void HELPER(NAME)(void *vd, void *vj, \ + CPULoongArchState *env, uint32_t desc) \ +{ \ + int i; \ + VReg *Vd = (VReg *)vd; \ + VReg *Vj = (VReg *)vj; \ + int oprsz = simd_oprsz(desc); \ + \ + vec_clear_cause(env); \ + for (i = 0; i < oprsz / (BIT / 8); i++) { \ + Vd->E(i) = FN(env, Vj->E(i)); \ + } \ +} + +#define FLOGB(BIT, T) \ +static T do_flogb_## BIT(CPULoongArchState *env, T fj) \ +{ \ + T fp, fd; \ + float_status *status = &env->fp_status; \ + FloatRoundMode old_mode = get_float_rounding_mode(status); \ + \ + set_float_rounding_mode(float_round_down, status); \ + fp = float ## BIT ##_log2(fj, status); \ + fd = float ## BIT ##_round_to_int(fp, status); \ + set_float_rounding_mode(old_mode, status); \ + vec_update_fcsr0_mask(env, GETPC(), float_flag_inexact); \ + return fd; \ +} + +FLOGB(32, uint32_t) +FLOGB(64, uint64_t) + +#define FCLASS(NAME, BIT, E, FN) \ +void HELPER(NAME)(void *vd, void *vj, \ + CPULoongArchState *env, uint32_t desc) \ +{ \ + int i; \ + VReg *Vd = (VReg *)vd; \ + VReg *Vj = (VReg *)vj; \ + int oprsz = simd_oprsz(desc); \ + \ + for (i = 0; i < oprsz / (BIT / 8); i++) { \ + Vd->E(i) = FN(env, Vj->E(i)); \ + } \ +} + +FCLASS(vfclass_s, 32, UW, helper_fclass_s) +FCLASS(vfclass_d, 64, UD, helper_fclass_d) + +#define FSQRT(BIT, T) \ +static T do_fsqrt_## BIT(CPULoongArchState *env, T fj) \ +{ \ + T fd; \ + fd = float ## BIT ##_sqrt(fj, &env->fp_status); \ + vec_update_fcsr0(env, GETPC()); \ + return fd; \ +} + +FSQRT(32, uint32_t) +FSQRT(64, uint64_t) + +#define FRECIP(BIT, T) \ +static T do_frecip_## BIT(CPULoongArchState *env, T fj) \ +{ \ + T fd; \ + fd = float ## BIT ##_div(float ## BIT ##_one, fj, &env->fp_status); \ + vec_update_fcsr0(env, GETPC()); \ + return fd; \ +} + +FRECIP(32, uint32_t) +FRECIP(64, uint64_t) + +#define FRSQRT(BIT, T) \ +static T do_frsqrt_## BIT(CPULoongArchState *env, T fj) \ +{ \ + T fd, fp; \ + fp = float ## BIT ##_sqrt(fj, &env->fp_status); \ + fd = float ## BIT ##_div(float ## BIT ##_one, fp, &env->fp_status); \ + vec_update_fcsr0(env, GETPC()); \ + return fd; \ +} + +FRSQRT(32, uint32_t) +FRSQRT(64, uint64_t) + +DO_2OP_F(vflogb_s, 32, UW, do_flogb_32) +DO_2OP_F(vflogb_d, 64, UD, do_flogb_64) +DO_2OP_F(vfsqrt_s, 32, UW, do_fsqrt_32) +DO_2OP_F(vfsqrt_d, 64, UD, do_fsqrt_64) +DO_2OP_F(vfrecip_s, 32, UW, do_frecip_32) +DO_2OP_F(vfrecip_d, 64, UD, do_frecip_64) +DO_2OP_F(vfrsqrt_s, 32, UW, do_frsqrt_32) +DO_2OP_F(vfrsqrt_d, 64, UD, do_frsqrt_64) + +static uint32_t float16_cvt_float32(uint16_t h, float_status *status) +{ + return float16_to_float32(h, true, status); +} +static uint64_t float32_cvt_float64(uint32_t s, float_status *status) +{ + return float32_to_float64(s, status); +} + +static uint16_t float32_cvt_float16(uint32_t s, float_status *status) +{ + return float32_to_float16(s, true, status); +} +static uint32_t float64_cvt_float32(uint64_t d, float_status *status) +{ + return float64_to_float32(d, status); +} + +void HELPER(vfcvtl_s_h)(void *vd, void *vj, + CPULoongArchState *env, uint32_t desc) +{ + int i, j, ofs; + VReg temp = {}; + VReg *Vd = (VReg *)vd; + VReg *Vj = (VReg *)vj; + int oprsz = simd_oprsz(desc); + + ofs = LSX_LEN / 32; + vec_clear_cause(env); + for (i = 0; i < oprsz / 16; i++) { + for (j = 0; j < ofs; j++) { + temp.UW(j + ofs * i) =float16_cvt_float32(Vj->UH(j + ofs * 2 * i), + &env->fp_status); + } + vec_update_fcsr0(env, GETPC()); + } + *Vd = temp; +} + +void HELPER(vfcvtl_d_s)(void *vd, void *vj, + CPULoongArchState *env, uint32_t desc) +{ + int i, j, ofs; + VReg temp = {}; + VReg *Vd = (VReg *)vd; + VReg *Vj = (VReg *)vj; + int oprsz = simd_oprsz(desc); + + ofs = LSX_LEN / 64; + vec_clear_cause(env); + for (i = 0; i < oprsz / 16; i++) { + for (j = 0; j < ofs; j++) { + temp.UD(j + ofs * i) = float32_cvt_float64(Vj->UW(j + ofs * 2 * i), + &env->fp_status); + } + vec_update_fcsr0(env, GETPC()); + } + *Vd = temp; +} + +void HELPER(vfcvth_s_h)(void *vd, void *vj, + CPULoongArchState *env, uint32_t desc) +{ + int i, j, ofs; + VReg temp = {}; + VReg *Vd = (VReg *)vd; + VReg *Vj = (VReg *)vj; + int oprsz = simd_oprsz(desc); + + ofs = LSX_LEN / 32; + vec_clear_cause(env); + for (i = 0; i < oprsz / 16; i++) { + for (j = 0; j < ofs; j++) { + temp.UW(j + ofs * i) = float16_cvt_float32(Vj->UH(j + ofs * (2 * i + 1)), + &env->fp_status); + } + vec_update_fcsr0(env, GETPC()); + } + *Vd = temp; +} + +void HELPER(vfcvth_d_s)(void *vd, void *vj, + CPULoongArchState *env, uint32_t desc) +{ + int i, j, ofs; + VReg temp = {}; + VReg *Vd = (VReg *)vd; + VReg *Vj = (VReg *)vj; + int oprsz = simd_oprsz(desc); + + ofs = LSX_LEN / 64; + vec_clear_cause(env); + for (i = 0; i < oprsz / 16; i++) { + for (j = 0; j < ofs; j++) { + temp.UD(j + ofs * i) = float32_cvt_float64(Vj->UW(j + ofs * (2 * i + 1)), + &env->fp_status); + } + vec_update_fcsr0(env, GETPC()); + } + *Vd = temp; +} + +void HELPER(vfcvt_h_s)(void *vd, void *vj, void *vk, + CPULoongArchState *env, uint32_t desc) +{ + int i, j, ofs; + VReg temp = {}; + VReg *Vd = (VReg *)vd; + VReg *Vj = (VReg *)vj; + VReg *Vk = (VReg *)vk; + int oprsz = simd_oprsz(desc); + + ofs = LSX_LEN / 32; + vec_clear_cause(env); + for(i = 0; i < oprsz / 16; i++) { + for (j = 0; j < ofs; j++) { + temp.UH(j + ofs * (2 * i + 1)) = float32_cvt_float16(Vj->UW(j + ofs * i), + &env->fp_status); + temp.UH(j + ofs * 2 * i) = float32_cvt_float16(Vk->UW(j + ofs * i), + &env->fp_status); + } + vec_update_fcsr0(env, GETPC()); + } + *Vd = temp; +} + +void HELPER(vfcvt_s_d)(void *vd, void *vj, void *vk, + CPULoongArchState *env, uint32_t desc) +{ + int i, j, ofs; + VReg temp = {}; + VReg *Vd = (VReg *)vd; + VReg *Vj = (VReg *)vj; + VReg *Vk = (VReg *)vk; + int oprsz = simd_oprsz(desc); + + ofs = LSX_LEN / 64; + vec_clear_cause(env); + for(i = 0; i < oprsz / 16; i++) { + for (j = 0; j < ofs; j++) { + temp.UW(j + ofs * (2 * i + 1)) = float64_cvt_float32(Vj->UD(j + ofs * i), + &env->fp_status); + temp.UW(j + ofs * 2 * i) = float64_cvt_float32(Vk->UD(j + ofs * i), + &env->fp_status); + } + vec_update_fcsr0(env, GETPC()); + } + *Vd = temp; +} + +void HELPER(vfrint_s)(void *vd, void *vj, + CPULoongArchState *env, uint32_t desc) +{ + int i; + VReg *Vd = (VReg *)vd; + VReg *Vj = (VReg *)vj; + int oprsz = simd_oprsz(desc); + + vec_clear_cause(env); + for (i = 0; i < oprsz / 4; i++) { + Vd->W(i) = float32_round_to_int(Vj->UW(i), &env->fp_status); + vec_update_fcsr0(env, GETPC()); + } +} + +void HELPER(vfrint_d)(void *vd, void *vj, + CPULoongArchState *env, uint32_t desc) +{ + int i; + VReg *Vd = (VReg *)vd; + VReg *Vj = (VReg *)vj; + int oprsz = simd_oprsz(desc); + + vec_clear_cause(env); + for (i = 0; i < oprsz / 8; i++) { + Vd->D(i) = float64_round_to_int(Vj->UD(i), &env->fp_status); + vec_update_fcsr0(env, GETPC()); + } +} + +#define FCVT_2OP(NAME, BIT, E, MODE) \ +void HELPER(NAME)(void *vd, void *vj, \ + CPULoongArchState *env, uint32_t desc) \ +{ \ + int i; \ + VReg *Vd = (VReg *)vd; \ + VReg *Vj = (VReg *)vj; \ + int oprsz = simd_oprsz(desc); \ + \ + vec_clear_cause(env); \ + for (i = 0; i < oprsz / (BIT / 8); i++) { \ + FloatRoundMode old_mode = get_float_rounding_mode(&env->fp_status); \ + set_float_rounding_mode(MODE, &env->fp_status); \ + Vd->E(i) = float## BIT ## _round_to_int(Vj->E(i), &env->fp_status); \ + set_float_rounding_mode(old_mode, &env->fp_status); \ + vec_update_fcsr0(env, GETPC()); \ + } \ +} + +FCVT_2OP(vfrintrne_s, 32, UW, float_round_nearest_even) +FCVT_2OP(vfrintrne_d, 64, UD, float_round_nearest_even) +FCVT_2OP(vfrintrz_s, 32, UW, float_round_to_zero) +FCVT_2OP(vfrintrz_d, 64, UD, float_round_to_zero) +FCVT_2OP(vfrintrp_s, 32, UW, float_round_up) +FCVT_2OP(vfrintrp_d, 64, UD, float_round_up) +FCVT_2OP(vfrintrm_s, 32, UW, float_round_down) +FCVT_2OP(vfrintrm_d, 64, UD, float_round_down) + +#define FTINT(NAME, FMT1, FMT2, T1, T2, MODE) \ +static T2 do_ftint ## NAME(CPULoongArchState *env, T1 fj) \ +{ \ + T2 fd; \ + FloatRoundMode old_mode = get_float_rounding_mode(&env->fp_status); \ + \ + set_float_rounding_mode(MODE, &env->fp_status); \ + fd = do_## FMT1 ##_to_## FMT2(env, fj); \ + set_float_rounding_mode(old_mode, &env->fp_status); \ + return fd; \ +} + +#define DO_FTINT(FMT1, FMT2, T1, T2) \ +static T2 do_## FMT1 ##_to_## FMT2(CPULoongArchState *env, T1 fj) \ +{ \ + T2 fd; \ + \ + fd = FMT1 ##_to_## FMT2(fj, &env->fp_status); \ + if (get_float_exception_flags(&env->fp_status) & (float_flag_invalid)) { \ + if (FMT1 ##_is_any_nan(fj)) { \ + fd = 0; \ + } \ + } \ + vec_update_fcsr0(env, GETPC()); \ + return fd; \ +} + +DO_FTINT(float32, int32, uint32_t, uint32_t) +DO_FTINT(float64, int64, uint64_t, uint64_t) +DO_FTINT(float32, uint32, uint32_t, uint32_t) +DO_FTINT(float64, uint64, uint64_t, uint64_t) +DO_FTINT(float64, int32, uint64_t, uint32_t) +DO_FTINT(float32, int64, uint32_t, uint64_t) + +FTINT(rne_w_s, float32, int32, uint32_t, uint32_t, float_round_nearest_even) +FTINT(rne_l_d, float64, int64, uint64_t, uint64_t, float_round_nearest_even) +FTINT(rp_w_s, float32, int32, uint32_t, uint32_t, float_round_up) +FTINT(rp_l_d, float64, int64, uint64_t, uint64_t, float_round_up) +FTINT(rz_w_s, float32, int32, uint32_t, uint32_t, float_round_to_zero) +FTINT(rz_l_d, float64, int64, uint64_t, uint64_t, float_round_to_zero) +FTINT(rm_w_s, float32, int32, uint32_t, uint32_t, float_round_down) +FTINT(rm_l_d, float64, int64, uint64_t, uint64_t, float_round_down) + +DO_2OP_F(vftintrne_w_s, 32, UW, do_ftintrne_w_s) +DO_2OP_F(vftintrne_l_d, 64, UD, do_ftintrne_l_d) +DO_2OP_F(vftintrp_w_s, 32, UW, do_ftintrp_w_s) +DO_2OP_F(vftintrp_l_d, 64, UD, do_ftintrp_l_d) +DO_2OP_F(vftintrz_w_s, 32, UW, do_ftintrz_w_s) +DO_2OP_F(vftintrz_l_d, 64, UD, do_ftintrz_l_d) +DO_2OP_F(vftintrm_w_s, 32, UW, do_ftintrm_w_s) +DO_2OP_F(vftintrm_l_d, 64, UD, do_ftintrm_l_d) +DO_2OP_F(vftint_w_s, 32, UW, do_float32_to_int32) +DO_2OP_F(vftint_l_d, 64, UD, do_float64_to_int64) + +FTINT(rz_wu_s, float32, uint32, uint32_t, uint32_t, float_round_to_zero) +FTINT(rz_lu_d, float64, uint64, uint64_t, uint64_t, float_round_to_zero) + +DO_2OP_F(vftintrz_wu_s, 32, UW, do_ftintrz_wu_s) +DO_2OP_F(vftintrz_lu_d, 64, UD, do_ftintrz_lu_d) +DO_2OP_F(vftint_wu_s, 32, UW, do_float32_to_uint32) +DO_2OP_F(vftint_lu_d, 64, UD, do_float64_to_uint64) + +FTINT(rm_w_d, float64, int32, uint64_t, uint32_t, float_round_down) +FTINT(rp_w_d, float64, int32, uint64_t, uint32_t, float_round_up) +FTINT(rz_w_d, float64, int32, uint64_t, uint32_t, float_round_to_zero) +FTINT(rne_w_d, float64, int32, uint64_t, uint32_t, float_round_nearest_even) + +#define FTINT_W_D(NAME, FN) \ +void HELPER(NAME)(void *vd, void *vj, void *vk, \ + CPULoongArchState *env, uint32_t desc) \ +{ \ + int i, j, ofs; \ + VReg temp = {}; \ + VReg *Vd = (VReg *)vd; \ + VReg *Vj = (VReg *)vj; \ + VReg *Vk = (VReg *)vk; \ + int oprsz = simd_oprsz(desc); \ + \ + ofs = LSX_LEN / 64; \ + vec_clear_cause(env); \ + for (i = 0; i < oprsz / 16; i++) { \ + for (j = 0; j < ofs; j++) { \ + temp.W(j + ofs * (2 * i + 1)) = FN(env, Vj->UD(j + ofs * i)); \ + temp.W(j + ofs * 2 * i) = FN(env, Vk->UD(j + ofs * i)); \ + } \ + } \ + *Vd = temp; \ +} + +FTINT_W_D(vftint_w_d, do_float64_to_int32) +FTINT_W_D(vftintrm_w_d, do_ftintrm_w_d) +FTINT_W_D(vftintrp_w_d, do_ftintrp_w_d) +FTINT_W_D(vftintrz_w_d, do_ftintrz_w_d) +FTINT_W_D(vftintrne_w_d, do_ftintrne_w_d) + +FTINT(rml_l_s, float32, int64, uint32_t, uint64_t, float_round_down) +FTINT(rpl_l_s, float32, int64, uint32_t, uint64_t, float_round_up) +FTINT(rzl_l_s, float32, int64, uint32_t, uint64_t, float_round_to_zero) +FTINT(rnel_l_s, float32, int64, uint32_t, uint64_t, float_round_nearest_even) +FTINT(rmh_l_s, float32, int64, uint32_t, uint64_t, float_round_down) +FTINT(rph_l_s, float32, int64, uint32_t, uint64_t, float_round_up) +FTINT(rzh_l_s, float32, int64, uint32_t, uint64_t, float_round_to_zero) +FTINT(rneh_l_s, float32, int64, uint32_t, uint64_t, float_round_nearest_even) + +#define FTINTL_L_S(NAME, FN) \ +void HELPER(NAME)(void *vd, void *vj, \ + CPULoongArchState *env, uint32_t desc) \ +{ \ + int i, j, ofs; \ + VReg temp; \ + VReg *Vd = (VReg *)vd; \ + VReg *Vj = (VReg *)vj; \ + int oprsz = simd_oprsz(desc); \ + \ + ofs = LSX_LEN / 64; \ + vec_clear_cause(env); \ + for (i = 0; i < oprsz / 16; i++) { \ + for (j = 0; j < ofs; j++) { \ + temp.D(j + ofs * i) = FN(env, Vj->UW(j + ofs * 2 * i)); \ + } \ + } \ + *Vd = temp; \ +} + +FTINTL_L_S(vftintl_l_s, do_float32_to_int64) +FTINTL_L_S(vftintrml_l_s, do_ftintrml_l_s) +FTINTL_L_S(vftintrpl_l_s, do_ftintrpl_l_s) +FTINTL_L_S(vftintrzl_l_s, do_ftintrzl_l_s) +FTINTL_L_S(vftintrnel_l_s, do_ftintrnel_l_s) + +#define FTINTH_L_S(NAME, FN) \ +void HELPER(NAME)(void *vd, void *vj, \ + CPULoongArchState *env, uint32_t desc) \ +{ \ + int i, j, ofs; \ + VReg temp = {}; \ + VReg *Vd = (VReg *)vd; \ + VReg *Vj = (VReg *)vj; \ + int oprsz = simd_oprsz(desc); \ + \ + ofs = LSX_LEN / 64; \ + vec_clear_cause(env); \ + for (i = 0; i < oprsz / 16; i++) { \ + for (j = 0; j < ofs; j++) { \ + temp.D(j + ofs * i) = FN(env, Vj->UW(j + ofs * (2 * i + 1))); \ + } \ + } \ + *Vd = temp; \ +} + +FTINTH_L_S(vftinth_l_s, do_float32_to_int64) +FTINTH_L_S(vftintrmh_l_s, do_ftintrmh_l_s) +FTINTH_L_S(vftintrph_l_s, do_ftintrph_l_s) +FTINTH_L_S(vftintrzh_l_s, do_ftintrzh_l_s) +FTINTH_L_S(vftintrneh_l_s, do_ftintrneh_l_s) + +#define FFINT(NAME, FMT1, FMT2, T1, T2) \ +static T2 do_ffint_ ## NAME(CPULoongArchState *env, T1 fj) \ +{ \ + T2 fd; \ + \ + fd = FMT1 ##_to_## FMT2(fj, &env->fp_status); \ + vec_update_fcsr0(env, GETPC()); \ + return fd; \ +} + +FFINT(s_w, int32, float32, int32_t, uint32_t) +FFINT(d_l, int64, float64, int64_t, uint64_t) +FFINT(s_wu, uint32, float32, uint32_t, uint32_t) +FFINT(d_lu, uint64, float64, uint64_t, uint64_t) + +DO_2OP_F(vffint_s_w, 32, W, do_ffint_s_w) +DO_2OP_F(vffint_d_l, 64, D, do_ffint_d_l) +DO_2OP_F(vffint_s_wu, 32, UW, do_ffint_s_wu) +DO_2OP_F(vffint_d_lu, 64, UD, do_ffint_d_lu) + +void HELPER(vffintl_d_w)(void *vd, void *vj, + CPULoongArchState *env, uint32_t desc) +{ + int i, j, ofs; + VReg temp = {}; + VReg *Vd = (VReg *)vd; + VReg *Vj = (VReg *)vj; + int oprsz = simd_oprsz(desc); + + ofs = LSX_LEN / 64; + vec_clear_cause(env); + for (i = 0; i < oprsz / 16; i++) { + for (j = 0; j < ofs; j++) { + temp.D(j + ofs * i) = int32_to_float64(Vj->W(j + ofs * 2 * i), + &env->fp_status); + } + vec_update_fcsr0(env, GETPC()); + } + *Vd = temp; +} + +void HELPER(vffinth_d_w)(void *vd, void *vj, + CPULoongArchState *env, uint32_t desc) +{ + int i, j, ofs; + VReg temp = {}; + VReg *Vd = (VReg *)vd; + VReg *Vj = (VReg *)vj; + int oprsz = simd_oprsz(desc); + + ofs = LSX_LEN / 64; + vec_clear_cause(env); + for (i = 0; i < oprsz /16; i++) { + for (j = 0; j < ofs; j++) { + temp.D(j + ofs * i) = int32_to_float64(Vj->W(j + ofs * (2 * i + 1)), + &env->fp_status); + } + vec_update_fcsr0(env, GETPC()); + } + *Vd = temp; +} + +void HELPER(vffint_s_l)(void *vd, void *vj, void *vk, + CPULoongArchState *env, uint32_t desc) +{ + int i, j, ofs; + VReg temp = {}; + VReg *Vd = (VReg *)vd; + VReg *Vj = (VReg *)vj; + VReg *Vk = (VReg *)vk; + int oprsz = simd_oprsz(desc); + + ofs = LSX_LEN / 64; + vec_clear_cause(env); + for (i = 0; i < oprsz / 16; i++) { + for (j = 0; j < ofs; j++) { + temp.W(j + ofs * (2 * i + 1)) = int64_to_float32(Vj->D(j + ofs * i), + &env->fp_status); + temp.W(j + ofs * 2 * i) = int64_to_float32(Vk->D(j + ofs * i), + &env->fp_status); + } + vec_update_fcsr0(env, GETPC()); + } + *Vd = temp; +} + +#define VCMPI(NAME, BIT, E, DO_OP) \ +void HELPER(NAME)(void *vd, void *vj, uint64_t imm, uint32_t desc) \ +{ \ + int i; \ + VReg *Vd = (VReg *)vd; \ + VReg *Vj = (VReg *)vj; \ + typedef __typeof(Vd->E(0)) TD; \ + int oprsz = simd_oprsz(desc); \ + \ + for (i = 0; i < oprsz / (BIT / 8); i++) { \ + Vd->E(i) = DO_OP(Vj->E(i), (TD)imm); \ + } \ +} + +VCMPI(vseqi_b, 8, B, VSEQ) +VCMPI(vseqi_h, 16, H, VSEQ) +VCMPI(vseqi_w, 32, W, VSEQ) +VCMPI(vseqi_d, 64, D, VSEQ) +VCMPI(vslei_b, 8, B, VSLE) +VCMPI(vslei_h, 16, H, VSLE) +VCMPI(vslei_w, 32, W, VSLE) +VCMPI(vslei_d, 64, D, VSLE) +VCMPI(vslei_bu, 8, UB, VSLE) +VCMPI(vslei_hu, 16, UH, VSLE) +VCMPI(vslei_wu, 32, UW, VSLE) +VCMPI(vslei_du, 64, UD, VSLE) +VCMPI(vslti_b, 8, B, VSLT) +VCMPI(vslti_h, 16, H, VSLT) +VCMPI(vslti_w, 32, W, VSLT) +VCMPI(vslti_d, 64, D, VSLT) +VCMPI(vslti_bu, 8, UB, VSLT) +VCMPI(vslti_hu, 16, UH, VSLT) +VCMPI(vslti_wu, 32, UW, VSLT) +VCMPI(vslti_du, 64, UD, VSLT) + +static uint64_t vfcmp_common(CPULoongArchState *env, + FloatRelation cmp, uint32_t flags) +{ + uint64_t ret = 0; + + switch (cmp) { + case float_relation_less: + ret = (flags & FCMP_LT); + break; + case float_relation_equal: + ret = (flags & FCMP_EQ); + break; + case float_relation_greater: + ret = (flags & FCMP_GT); + break; + case float_relation_unordered: + ret = (flags & FCMP_UN); + break; + default: + g_assert_not_reached(); + } + + if (ret) { + ret = -1; + } + + return ret; +} + +#define VFCMP(NAME, BIT, E, FN) \ +void HELPER(NAME)(CPULoongArchState *env, uint32_t oprsz, \ + uint32_t vd, uint32_t vj, uint32_t vk, uint32_t flags) \ +{ \ + int i; \ + VReg t; \ + VReg *Vd = &(env->fpr[vd].vreg); \ + VReg *Vj = &(env->fpr[vj].vreg); \ + VReg *Vk = &(env->fpr[vk].vreg); \ + \ + vec_clear_cause(env); \ + for (i = 0; i < oprsz / (BIT / 8); i++) { \ + FloatRelation cmp; \ + cmp = FN(Vj->E(i), Vk->E(i), &env->fp_status); \ + t.E(i) = vfcmp_common(env, cmp, flags); \ + vec_update_fcsr0(env, GETPC()); \ + } \ + *Vd = t; \ +} + +VFCMP(vfcmp_c_s, 32, UW, float32_compare_quiet) +VFCMP(vfcmp_s_s, 32, UW, float32_compare) +VFCMP(vfcmp_c_d, 64, UD, float64_compare_quiet) +VFCMP(vfcmp_s_d, 64, UD, float64_compare) + +void HELPER(vbitseli_b)(void *vd, void *vj, uint64_t imm, uint32_t desc) +{ + int i; + VReg *Vd = (VReg *)vd; + VReg *Vj = (VReg *)vj; + + for (i = 0; i < simd_oprsz(desc); i++) { + Vd->B(i) = (~Vd->B(i) & Vj->B(i)) | (Vd->B(i) & imm); + } +} + +/* Copy from target/arm/tcg/sve_helper.c */ +static inline bool do_match2(uint64_t n, uint64_t m0, uint64_t m1, int esz) +{ + int bits = 8 << esz; + uint64_t ones = dup_const(esz, 1); + uint64_t signs = ones << (bits - 1); + uint64_t cmp0, cmp1; + + cmp1 = dup_const(esz, n); + cmp0 = cmp1 ^ m0; + cmp1 = cmp1 ^ m1; + cmp0 = (cmp0 - ones) & ~cmp0; + cmp1 = (cmp1 - ones) & ~cmp1; + return (cmp0 | cmp1) & signs; +} + +#define SETANYEQZ(NAME, MO) \ +void HELPER(NAME)(CPULoongArchState *env, \ + uint32_t oprsz, uint32_t cd, uint32_t vj) \ +{ \ + VReg *Vj = &(env->fpr[vj].vreg); \ + \ + env->cf[cd & 0x7] = do_match2(0, Vj->D(0), Vj->D(1), MO); \ + if (oprsz == 32) { \ + env->cf[cd & 0x7] = env->cf[cd & 0x7] || \ + do_match2(0, Vj->D(2), Vj->D(3), MO); \ + } \ +} + +SETANYEQZ(vsetanyeqz_b, MO_8) +SETANYEQZ(vsetanyeqz_h, MO_16) +SETANYEQZ(vsetanyeqz_w, MO_32) +SETANYEQZ(vsetanyeqz_d, MO_64) + +#define SETALLNEZ(NAME, MO) \ +void HELPER(NAME)(CPULoongArchState *env, \ + uint32_t oprsz, uint32_t cd, uint32_t vj) \ +{ \ + VReg *Vj = &(env->fpr[vj].vreg); \ + \ + env->cf[cd & 0x7]= !do_match2(0, Vj->D(0), Vj->D(1), MO); \ + if (oprsz == 32) { \ + env->cf[cd & 0x7] = env->cf[cd & 0x7] && \ + !do_match2(0, Vj->D(2), Vj->D(3), MO); \ + } \ +} + +SETALLNEZ(vsetallnez_b, MO_8) +SETALLNEZ(vsetallnez_h, MO_16) +SETALLNEZ(vsetallnez_w, MO_32) +SETALLNEZ(vsetallnez_d, MO_64) + +#define XVINSVE0(NAME, E, MASK) \ +void HELPER(NAME)(void *vd, void *vj, uint64_t imm, uint32_t desc) \ +{ \ + VReg *Vd = (VReg *)vd; \ + VReg *Vj = (VReg *)vj; \ + Vd->E(imm & MASK) = Vj->E(0); \ +} + +XVINSVE0(xvinsve0_w, W, 0x7) +XVINSVE0(xvinsve0_d, D, 0x3) + +#define XVPICKVE(NAME, E, BIT, MASK) \ +void HELPER(NAME)(void *vd, void *vj, uint64_t imm, uint32_t desc) \ +{ \ + int i; \ + VReg *Vd = (VReg *)vd; \ + VReg *Vj = (VReg *)vj; \ + int oprsz = simd_oprsz(desc); \ + \ + Vd->E(0) = Vj->E(imm & MASK); \ + for (i = 1; i < oprsz / (BIT / 8); i++) { \ + Vd->E(i) = 0; \ + } \ +} + +XVPICKVE(xvpickve_w, W, 32, 0x7) +XVPICKVE(xvpickve_d, D, 64, 0x3) + +#define VPACKEV(NAME, BIT, E) \ +void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t desc) \ +{ \ + int i; \ + VReg temp = {}; \ + VReg *Vd = (VReg *)vd; \ + VReg *Vj = (VReg *)vj; \ + VReg *Vk = (VReg *)vk; \ + int oprsz = simd_oprsz(desc); \ + \ + for (i = 0; i < oprsz / (BIT / 8); i++) { \ + temp.E(2 * i + 1) = Vj->E(2 * i); \ + temp.E(2 *i) = Vk->E(2 * i); \ + } \ + *Vd = temp; \ +} + +VPACKEV(vpackev_b, 16, B) +VPACKEV(vpackev_h, 32, H) +VPACKEV(vpackev_w, 64, W) +VPACKEV(vpackev_d, 128, D) + +#define VPACKOD(NAME, BIT, E) \ +void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t desc) \ +{ \ + int i; \ + VReg temp = {}; \ + VReg *Vd = (VReg *)vd; \ + VReg *Vj = (VReg *)vj; \ + VReg *Vk = (VReg *)vk; \ + int oprsz = simd_oprsz(desc); \ + \ + for (i = 0; i < oprsz / (BIT / 8); i++) { \ + temp.E(2 * i + 1) = Vj->E(2 * i + 1); \ + temp.E(2 * i) = Vk->E(2 * i + 1); \ + } \ + *Vd = temp; \ +} + +VPACKOD(vpackod_b, 16, B) +VPACKOD(vpackod_h, 32, H) +VPACKOD(vpackod_w, 64, W) +VPACKOD(vpackod_d, 128, D) + +#define VPICKEV(NAME, BIT, E) \ +void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t desc) \ +{ \ + int i, j, ofs; \ + VReg temp = {}; \ + VReg *Vd = (VReg *)vd; \ + VReg *Vj = (VReg *)vj; \ + VReg *Vk = (VReg *)vk; \ + int oprsz = simd_oprsz(desc); \ + \ + ofs = LSX_LEN / BIT; \ + for (i = 0; i < oprsz / 16; i++) { \ + for (j = 0; j < ofs; j++) { \ + temp.E(j + ofs * (2 * i + 1)) = Vj->E(2 * (j + ofs * i)); \ + temp.E(j + ofs * 2 * i) = Vk->E(2 * (j + ofs * i)); \ + } \ + } \ + *Vd = temp; \ +} + +VPICKEV(vpickev_b, 16, B) +VPICKEV(vpickev_h, 32, H) +VPICKEV(vpickev_w, 64, W) +VPICKEV(vpickev_d, 128, D) + +#define VPICKOD(NAME, BIT, E) \ +void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t desc) \ +{ \ + int i, j, ofs; \ + VReg temp = {}; \ + VReg *Vd = (VReg *)vd; \ + VReg *Vj = (VReg *)vj; \ + VReg *Vk = (VReg *)vk; \ + int oprsz = simd_oprsz(desc); \ + \ + ofs = LSX_LEN / BIT; \ + for (i = 0; i < oprsz / 16; i++) { \ + for (j = 0; j < ofs; j++) { \ + temp.E(j + ofs * (2 * i + 1)) = Vj->E(2 * (j + ofs * i) + 1); \ + temp.E(j + ofs * 2 * i) = Vk->E(2 * (j + ofs * i) + 1); \ + } \ + } \ + *Vd = temp; \ +} + +VPICKOD(vpickod_b, 16, B) +VPICKOD(vpickod_h, 32, H) +VPICKOD(vpickod_w, 64, W) +VPICKOD(vpickod_d, 128, D) + +#define VILVL(NAME, BIT, E) \ +void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t desc) \ +{ \ + int i, j, ofs; \ + VReg temp = {}; \ + VReg *Vd = (VReg *)vd; \ + VReg *Vj = (VReg *)vj; \ + VReg *Vk = (VReg *)vk; \ + int oprsz = simd_oprsz(desc); \ + \ + ofs = LSX_LEN / BIT; \ + for (i = 0; i < oprsz / 16; i++) { \ + for (j = 0; j < ofs; j++) { \ + temp.E(2 * (j + ofs * i) + 1) = Vj->E(j + ofs * 2 * i); \ + temp.E(2 * (j + ofs * i)) = Vk->E(j + ofs * 2 * i); \ + } \ + } \ + *Vd = temp; \ +} + +VILVL(vilvl_b, 16, B) +VILVL(vilvl_h, 32, H) +VILVL(vilvl_w, 64, W) +VILVL(vilvl_d, 128, D) + +#define VILVH(NAME, BIT, E) \ +void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t desc) \ +{ \ + int i, j, ofs; \ + VReg temp = {}; \ + VReg *Vd = (VReg *)vd; \ + VReg *Vj = (VReg *)vj; \ + VReg *Vk = (VReg *)vk; \ + int oprsz = simd_oprsz(desc); \ + \ + ofs = LSX_LEN / BIT; \ + for (i = 0; i < oprsz / 16; i++) { \ + for (j = 0; j < ofs; j++) { \ + temp.E(2 * (j + ofs * i) + 1) = Vj->E(j + ofs * (2 * i + 1)); \ + temp.E(2 * (j + ofs * i)) = Vk->E(j + ofs * (2 * i + 1)); \ + } \ + } \ + *Vd = temp; \ +} + +VILVH(vilvh_b, 16, B) +VILVH(vilvh_h, 32, H) +VILVH(vilvh_w, 64, W) +VILVH(vilvh_d, 128, D) + +void HELPER(vshuf_b)(void *vd, void *vj, void *vk, void *va, uint32_t desc) +{ + int i, j, m; + VReg temp = {}; + VReg *Vd = (VReg *)vd; + VReg *Vj = (VReg *)vj; + VReg *Vk = (VReg *)vk; + VReg *Va = (VReg *)va; + int oprsz = simd_oprsz(desc); + + m = LSX_LEN / 8; + for (i = 0; i < (oprsz / 16) * m; i++) { + j = i < m ? 0 : 1; + uint64_t k = (uint8_t)Va->B(i) % (2 * m); + temp.B(i) = k < m ? Vk->B(k + j * m): Vj->B(k + (j - 1) * m); + } + *Vd = temp; +} + +#define VSHUF(NAME, BIT, E) \ +void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t desc) \ +{ \ + int i, j, m; \ + VReg temp = {}; \ + VReg *Vd = (VReg *)vd; \ + VReg *Vj = (VReg *)vj; \ + VReg *Vk = (VReg *)vk; \ + int oprsz = simd_oprsz(desc); \ + \ + m = LSX_LEN / BIT; \ + for (i = 0; i < (oprsz / 16) * m; i++) { \ + j = i < m ? 0 : 1; \ + uint64_t k = ((uint8_t)Vd->E(i)) % (2 * m); \ + temp.E(i) = k < m ? Vk->E(k + j * m) : Vj->E(k + (j - 1) * m); \ + } \ + *Vd = temp; \ +} + +VSHUF(vshuf_h, 16, H) +VSHUF(vshuf_w, 32, W) +VSHUF(vshuf_d, 64, D) + +#define VSHUF4I(NAME, BIT, E) \ +void HELPER(NAME)(void *vd, void *vj, uint64_t imm, uint32_t desc) \ +{ \ + int i, j, max; \ + VReg temp = {}; \ + VReg *Vd = (VReg *)vd; \ + VReg *Vj = (VReg *)vj; \ + int oprsz = simd_oprsz(desc); \ + \ + max = LSX_LEN / BIT; \ + for (i = 0; i < oprsz / (BIT / 8); i++) { \ + j = i < max ? 1 : 2; \ + temp.E(i) = Vj->E(SHF_POS(i - ((j -1)* max), imm) + (j - 1) * max); \ + } \ + *Vd = temp; \ +} + +VSHUF4I(vshuf4i_b, 8, B) +VSHUF4I(vshuf4i_h, 16, H) +VSHUF4I(vshuf4i_w, 32, W) + +void HELPER(vshuf4i_d)(void *vd, void *vj, uint64_t imm, uint32_t desc) +{ + int i; + VReg temp = {}; + VReg *Vd = (VReg *)vd; + VReg *Vj = (VReg *)vj; + int oprsz = simd_oprsz(desc); + + for (i = 0; i < oprsz / 16; i++) { + temp.D(2 * i) = (imm & 2 ? Vj : Vd)->D((imm & 1) + 2 * i); + temp.D(2 * i + 1) = (imm & 8 ? Vj : Vd)->D(((imm >> 2) & 1) + 2 * i); + } + *Vd = temp; +} + +void HELPER(vperm_w)(void *vd, void *vj, void *vk, uint32_t desc) +{ + int i, m; + VReg temp = {}; + VReg *Vd = (VReg *)vd; + VReg *Vj = (VReg *)vj; + VReg *Vk = (VReg *)vk; + + m = LASX_LEN / 32; + for (i = 0; i < m ; i++) { + uint64_t k = (uint8_t)Vk->W(i) % 8; + temp.W(i) = Vj->W(k); + } + *Vd = temp; +} + +void HELPER(vpermi_w)(void *vd, void *vj, uint64_t imm, uint32_t desc) +{ + int i; + VReg temp = {}; + VReg *Vd = (VReg *)vd; + VReg *Vj = (VReg *)vj; + int oprsz = simd_oprsz(desc); + + for (i = 0; i < oprsz / 16; i++) { + temp.W(4 * i) = Vj->W((imm & 0x3) + 4 * i); + temp.W(4 * i + 1) = Vj->W(((imm >> 2) & 0x3) + 4 * i); + temp.W(4 * i + 2) = Vd->W(((imm >> 4) & 0x3) + 4 * i); + temp.W(4 * i + 3) = Vd->W(((imm >> 6) & 0x3) + 4 * i); + } + *Vd = temp; +} + +void HELPER(vpermi_d)(void *vd, void *vj, uint64_t imm, uint32_t desc) +{ + VReg temp = {}; + VReg *Vd = (VReg *)vd; + VReg *Vj = (VReg *)vj; + + temp.D(0) = Vj->D(imm & 0x3); + temp.D(1) = Vj->D((imm >> 2) & 0x3); + temp.D(2) = Vj->D((imm >> 4) & 0x3); + temp.D(3) = Vj->D((imm >> 6) & 0x3); + *Vd = temp; +} + +void HELPER(vpermi_q)(void *vd, void *vj, uint64_t imm, uint32_t desc) +{ + int i; + VReg temp; + VReg *Vd = (VReg *)vd; + VReg *Vj = (VReg *)vj; + + for (i = 0; i < 2; i++, imm >>= 4) { + temp.Q(i) = (imm & 2 ? Vd: Vj)->Q(imm & 1); + } + *Vd = temp; +} + +#define VEXTRINS(NAME, BIT, E, MASK) \ +void HELPER(NAME)(void *vd, void *vj, uint64_t imm, uint32_t desc) \ +{ \ + int i, ins, extr, max; \ + VReg *Vd = (VReg *)vd; \ + VReg *Vj = (VReg *)vj; \ + int oprsz = simd_oprsz(desc); \ + \ + max = LSX_LEN / BIT; \ + ins = (imm >> 4) & MASK; \ + extr = imm & MASK; \ + for (i = 0; i < oprsz / 16; i++) { \ + Vd->E(ins + i * max) = Vj->E(extr + i * max); \ + } \ +} + +VEXTRINS(vextrins_b, 8, B, 0xf) +VEXTRINS(vextrins_h, 16, H, 0x7) +VEXTRINS(vextrins_w, 32, W, 0x3) +VEXTRINS(vextrins_d, 64, D, 0x1) diff --git a/target/loongarch/tlb_helper.c b/target/loongarch/tlb_helper.c deleted file mode 100644 index 449043c68b..0000000000 --- a/target/loongarch/tlb_helper.c +++ /dev/null @@ -1,803 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-or-later */ -/* - * QEMU LoongArch TLB helpers - * - * Copyright (c) 2021 Loongson Technology Corporation Limited - * - */ - -#include "qemu/osdep.h" -#include "qemu/guest-random.h" - -#include "cpu.h" -#include "internals.h" -#include "exec/helper-proto.h" -#include "exec/exec-all.h" -#include "exec/cpu_ldst.h" -#include "exec/log.h" -#include "cpu-csr.h" - -enum { - TLBRET_MATCH = 0, - TLBRET_BADADDR = 1, - TLBRET_NOMATCH = 2, - TLBRET_INVALID = 3, - TLBRET_DIRTY = 4, - TLBRET_RI = 5, - TLBRET_XI = 6, - TLBRET_PE = 7, -}; - -static int loongarch_map_tlb_entry(CPULoongArchState *env, hwaddr *physical, - int *prot, target_ulong address, - int access_type, int index, int mmu_idx) -{ - LoongArchTLB *tlb = &env->tlb[index]; - uint64_t plv = mmu_idx; - uint64_t tlb_entry, tlb_ppn; - uint8_t tlb_ps, n, tlb_v, tlb_d, tlb_plv, tlb_nx, tlb_nr, tlb_rplv; - - if (index >= LOONGARCH_STLB) { - tlb_ps = FIELD_EX64(tlb->tlb_misc, TLB_MISC, PS); - } else { - tlb_ps = FIELD_EX64(env->CSR_STLBPS, CSR_STLBPS, PS); - } - n = (address >> tlb_ps) & 0x1;/* Odd or even */ - - tlb_entry = n ? tlb->tlb_entry1 : tlb->tlb_entry0; - tlb_v = FIELD_EX64(tlb_entry, TLBENTRY, V); - tlb_d = FIELD_EX64(tlb_entry, TLBENTRY, D); - tlb_plv = FIELD_EX64(tlb_entry, TLBENTRY, PLV); - if (is_la64(env)) { - tlb_ppn = FIELD_EX64(tlb_entry, TLBENTRY_64, PPN); - tlb_nx = FIELD_EX64(tlb_entry, TLBENTRY_64, NX); - tlb_nr = FIELD_EX64(tlb_entry, TLBENTRY_64, NR); - tlb_rplv = FIELD_EX64(tlb_entry, TLBENTRY_64, RPLV); - } else { - tlb_ppn = FIELD_EX64(tlb_entry, TLBENTRY_32, PPN); - tlb_nx = 0; - tlb_nr = 0; - tlb_rplv = 0; - } - - /* Remove sw bit between bit12 -- bit PS*/ - tlb_ppn = tlb_ppn & ~(((0x1UL << (tlb_ps - 12)) -1)); - - /* Check access rights */ - if (!tlb_v) { - return TLBRET_INVALID; - } - - if (access_type == MMU_INST_FETCH && tlb_nx) { - return TLBRET_XI; - } - - if (access_type == MMU_DATA_LOAD && tlb_nr) { - return TLBRET_RI; - } - - if (((tlb_rplv == 0) && (plv > tlb_plv)) || - ((tlb_rplv == 1) && (plv != tlb_plv))) { - return TLBRET_PE; - } - - if ((access_type == MMU_DATA_STORE) && !tlb_d) { - return TLBRET_DIRTY; - } - - *physical = (tlb_ppn << R_TLBENTRY_64_PPN_SHIFT) | - (address & MAKE_64BIT_MASK(0, tlb_ps)); - *prot = PAGE_READ; - if (tlb_d) { - *prot |= PAGE_WRITE; - } - if (!tlb_nx) { - *prot |= PAGE_EXEC; - } - return TLBRET_MATCH; -} - -/* - * One tlb entry holds an adjacent odd/even pair, the vpn is the - * content of the virtual page number divided by 2. So the - * compare vpn is bit[47:15] for 16KiB page. while the vppn - * field in tlb entry contains bit[47:13], so need adjust. - * virt_vpn = vaddr[47:13] - */ -static bool loongarch_tlb_search(CPULoongArchState *env, target_ulong vaddr, - int *index) -{ - LoongArchTLB *tlb; - uint16_t csr_asid, tlb_asid, stlb_idx; - uint8_t tlb_e, tlb_ps, tlb_g, stlb_ps; - int i, compare_shift; - uint64_t vpn, tlb_vppn; - - csr_asid = FIELD_EX64(env->CSR_ASID, CSR_ASID, ASID); - stlb_ps = FIELD_EX64(env->CSR_STLBPS, CSR_STLBPS, PS); - vpn = (vaddr & TARGET_VIRT_MASK) >> (stlb_ps + 1); - stlb_idx = vpn & 0xff; /* VA[25:15] <==> TLBIDX.index for 16KiB Page */ - compare_shift = stlb_ps + 1 - R_TLB_MISC_VPPN_SHIFT; - - /* Search STLB */ - for (i = 0; i < 8; ++i) { - tlb = &env->tlb[i * 256 + stlb_idx]; - tlb_e = FIELD_EX64(tlb->tlb_misc, TLB_MISC, E); - if (tlb_e) { - tlb_vppn = FIELD_EX64(tlb->tlb_misc, TLB_MISC, VPPN); - tlb_asid = FIELD_EX64(tlb->tlb_misc, TLB_MISC, ASID); - tlb_g = FIELD_EX64(tlb->tlb_entry0, TLBENTRY, G); - - if ((tlb_g == 1 || tlb_asid == csr_asid) && - (vpn == (tlb_vppn >> compare_shift))) { - *index = i * 256 + stlb_idx; - return true; - } - } - } - - /* Search MTLB */ - for (i = LOONGARCH_STLB; i < LOONGARCH_TLB_MAX; ++i) { - tlb = &env->tlb[i]; - tlb_e = FIELD_EX64(tlb->tlb_misc, TLB_MISC, E); - if (tlb_e) { - tlb_vppn = FIELD_EX64(tlb->tlb_misc, TLB_MISC, VPPN); - tlb_ps = FIELD_EX64(tlb->tlb_misc, TLB_MISC, PS); - tlb_asid = FIELD_EX64(tlb->tlb_misc, TLB_MISC, ASID); - tlb_g = FIELD_EX64(tlb->tlb_entry0, TLBENTRY, G); - compare_shift = tlb_ps + 1 - R_TLB_MISC_VPPN_SHIFT; - vpn = (vaddr & TARGET_VIRT_MASK) >> (tlb_ps + 1); - if ((tlb_g == 1 || tlb_asid == csr_asid) && - (vpn == (tlb_vppn >> compare_shift))) { - *index = i; - return true; - } - } - } - return false; -} - -static int loongarch_map_address(CPULoongArchState *env, hwaddr *physical, - int *prot, target_ulong address, - MMUAccessType access_type, int mmu_idx) -{ - int index, match; - - match = loongarch_tlb_search(env, address, &index); - if (match) { - return loongarch_map_tlb_entry(env, physical, prot, - address, access_type, index, mmu_idx); - } - - return TLBRET_NOMATCH; -} - -static hwaddr dmw_va2pa(CPULoongArchState *env, target_ulong va, - target_ulong dmw) -{ - if (is_la64(env)) { - return va & TARGET_VIRT_MASK; - } else { - uint32_t pseg = FIELD_EX32(dmw, CSR_DMW_32, PSEG); - return (va & MAKE_64BIT_MASK(0, R_CSR_DMW_32_VSEG_SHIFT)) | \ - (pseg << R_CSR_DMW_32_VSEG_SHIFT); - } -} - -static int get_physical_address(CPULoongArchState *env, hwaddr *physical, - int *prot, target_ulong address, - MMUAccessType access_type, int mmu_idx) -{ - int user_mode = mmu_idx == MMU_IDX_USER; - int kernel_mode = mmu_idx == MMU_IDX_KERNEL; - uint32_t plv, base_c, base_v; - int64_t addr_high; - uint8_t da = FIELD_EX64(env->CSR_CRMD, CSR_CRMD, DA); - uint8_t pg = FIELD_EX64(env->CSR_CRMD, CSR_CRMD, PG); - - /* Check PG and DA */ - if (da & !pg) { - *physical = address & TARGET_PHYS_MASK; - *prot = PAGE_READ | PAGE_WRITE | PAGE_EXEC; - return TLBRET_MATCH; - } - - plv = kernel_mode | (user_mode << R_CSR_DMW_PLV3_SHIFT); - if (is_la64(env)) { - base_v = address >> R_CSR_DMW_64_VSEG_SHIFT; - } else { - base_v = address >> R_CSR_DMW_32_VSEG_SHIFT; - } - /* Check direct map window */ - for (int i = 0; i < 4; i++) { - if (is_la64(env)) { - base_c = FIELD_EX64(env->CSR_DMW[i], CSR_DMW_64, VSEG); - } else { - base_c = FIELD_EX64(env->CSR_DMW[i], CSR_DMW_32, VSEG); - } - if ((plv & env->CSR_DMW[i]) && (base_c == base_v)) { - *physical = dmw_va2pa(env, address, env->CSR_DMW[i]); - *prot = PAGE_READ | PAGE_WRITE | PAGE_EXEC; - return TLBRET_MATCH; - } - } - - /* Check valid extension */ - addr_high = sextract64(address, TARGET_VIRT_ADDR_SPACE_BITS, 16); - if (!(addr_high == 0 || addr_high == -1)) { - return TLBRET_BADADDR; - } - - /* Mapped address */ - return loongarch_map_address(env, physical, prot, address, - access_type, mmu_idx); -} - -hwaddr loongarch_cpu_get_phys_page_debug(CPUState *cs, vaddr addr) -{ - LoongArchCPU *cpu = LOONGARCH_CPU(cs); - CPULoongArchState *env = &cpu->env; - hwaddr phys_addr; - int prot; - - if (get_physical_address(env, &phys_addr, &prot, addr, MMU_DATA_LOAD, - cpu_mmu_index(env, false)) != 0) { - return -1; - } - return phys_addr; -} - -static void raise_mmu_exception(CPULoongArchState *env, target_ulong address, - MMUAccessType access_type, int tlb_error) -{ - CPUState *cs = env_cpu(env); - - switch (tlb_error) { - default: - case TLBRET_BADADDR: - cs->exception_index = access_type == MMU_INST_FETCH - ? EXCCODE_ADEF : EXCCODE_ADEM; - break; - case TLBRET_NOMATCH: - /* No TLB match for a mapped address */ - if (access_type == MMU_DATA_LOAD) { - cs->exception_index = EXCCODE_PIL; - } else if (access_type == MMU_DATA_STORE) { - cs->exception_index = EXCCODE_PIS; - } else if (access_type == MMU_INST_FETCH) { - cs->exception_index = EXCCODE_PIF; - } - env->CSR_TLBRERA = FIELD_DP64(env->CSR_TLBRERA, CSR_TLBRERA, ISTLBR, 1); - break; - case TLBRET_INVALID: - /* TLB match with no valid bit */ - if (access_type == MMU_DATA_LOAD) { - cs->exception_index = EXCCODE_PIL; - } else if (access_type == MMU_DATA_STORE) { - cs->exception_index = EXCCODE_PIS; - } else if (access_type == MMU_INST_FETCH) { - cs->exception_index = EXCCODE_PIF; - } - break; - case TLBRET_DIRTY: - /* TLB match but 'D' bit is cleared */ - cs->exception_index = EXCCODE_PME; - break; - case TLBRET_XI: - /* Execute-Inhibit Exception */ - cs->exception_index = EXCCODE_PNX; - break; - case TLBRET_RI: - /* Read-Inhibit Exception */ - cs->exception_index = EXCCODE_PNR; - break; - case TLBRET_PE: - /* Privileged Exception */ - cs->exception_index = EXCCODE_PPI; - break; - } - - if (tlb_error == TLBRET_NOMATCH) { - env->CSR_TLBRBADV = address; - if (is_la64(env)) { - env->CSR_TLBREHI = FIELD_DP64(env->CSR_TLBREHI, CSR_TLBREHI_64, - VPPN, extract64(address, 13, 35)); - } else { - env->CSR_TLBREHI = FIELD_DP64(env->CSR_TLBREHI, CSR_TLBREHI_32, - VPPN, extract64(address, 13, 19)); - } - } else { - if (!FIELD_EX64(env->CSR_DBG, CSR_DBG, DST)) { - env->CSR_BADV = address; - } - env->CSR_TLBEHI = address & (TARGET_PAGE_MASK << 1); - } -} - -static void invalidate_tlb_entry(CPULoongArchState *env, int index) -{ - target_ulong addr, mask, pagesize; - uint8_t tlb_ps; - LoongArchTLB *tlb = &env->tlb[index]; - - int mmu_idx = cpu_mmu_index(env, false); - uint8_t tlb_v0 = FIELD_EX64(tlb->tlb_entry0, TLBENTRY, V); - uint8_t tlb_v1 = FIELD_EX64(tlb->tlb_entry1, TLBENTRY, V); - uint64_t tlb_vppn = FIELD_EX64(tlb->tlb_misc, TLB_MISC, VPPN); - - if (index >= LOONGARCH_STLB) { - tlb_ps = FIELD_EX64(tlb->tlb_misc, TLB_MISC, PS); - } else { - tlb_ps = FIELD_EX64(env->CSR_STLBPS, CSR_STLBPS, PS); - } - pagesize = MAKE_64BIT_MASK(tlb_ps, 1); - mask = MAKE_64BIT_MASK(0, tlb_ps + 1); - - if (tlb_v0) { - addr = (tlb_vppn << R_TLB_MISC_VPPN_SHIFT) & ~mask; /* even */ - tlb_flush_range_by_mmuidx(env_cpu(env), addr, pagesize, - mmu_idx, TARGET_LONG_BITS); - } - - if (tlb_v1) { - addr = (tlb_vppn << R_TLB_MISC_VPPN_SHIFT) & pagesize; /* odd */ - tlb_flush_range_by_mmuidx(env_cpu(env), addr, pagesize, - mmu_idx, TARGET_LONG_BITS); - } -} - -static void invalidate_tlb(CPULoongArchState *env, int index) -{ - LoongArchTLB *tlb; - uint16_t csr_asid, tlb_asid, tlb_g; - - csr_asid = FIELD_EX64(env->CSR_ASID, CSR_ASID, ASID); - tlb = &env->tlb[index]; - tlb_asid = FIELD_EX64(tlb->tlb_misc, TLB_MISC, ASID); - tlb_g = FIELD_EX64(tlb->tlb_entry0, TLBENTRY, G); - if (tlb_g == 0 && tlb_asid != csr_asid) { - return; - } - invalidate_tlb_entry(env, index); -} - -static void fill_tlb_entry(CPULoongArchState *env, int index) -{ - LoongArchTLB *tlb = &env->tlb[index]; - uint64_t lo0, lo1, csr_vppn; - uint16_t csr_asid; - uint8_t csr_ps; - - if (FIELD_EX64(env->CSR_TLBRERA, CSR_TLBRERA, ISTLBR)) { - csr_ps = FIELD_EX64(env->CSR_TLBREHI, CSR_TLBREHI, PS); - if (is_la64(env)) { - csr_vppn = FIELD_EX64(env->CSR_TLBREHI, CSR_TLBREHI_64, VPPN); - } else { - csr_vppn = FIELD_EX64(env->CSR_TLBREHI, CSR_TLBREHI_32, VPPN); - } - lo0 = env->CSR_TLBRELO0; - lo1 = env->CSR_TLBRELO1; - } else { - csr_ps = FIELD_EX64(env->CSR_TLBIDX, CSR_TLBIDX, PS); - if (is_la64(env)) { - csr_vppn = FIELD_EX64(env->CSR_TLBEHI, CSR_TLBEHI_64, VPPN); - } else { - csr_vppn = FIELD_EX64(env->CSR_TLBEHI, CSR_TLBEHI_32, VPPN); - } - lo0 = env->CSR_TLBELO0; - lo1 = env->CSR_TLBELO1; - } - - if (csr_ps == 0) { - qemu_log_mask(CPU_LOG_MMU, "page size is 0\n"); - } - - /* Only MTLB has the ps fields */ - if (index >= LOONGARCH_STLB) { - tlb->tlb_misc = FIELD_DP64(tlb->tlb_misc, TLB_MISC, PS, csr_ps); - } - - tlb->tlb_misc = FIELD_DP64(tlb->tlb_misc, TLB_MISC, VPPN, csr_vppn); - tlb->tlb_misc = FIELD_DP64(tlb->tlb_misc, TLB_MISC, E, 1); - csr_asid = FIELD_EX64(env->CSR_ASID, CSR_ASID, ASID); - tlb->tlb_misc = FIELD_DP64(tlb->tlb_misc, TLB_MISC, ASID, csr_asid); - - tlb->tlb_entry0 = lo0; - tlb->tlb_entry1 = lo1; -} - -/* Return an random value between low and high */ -static uint32_t get_random_tlb(uint32_t low, uint32_t high) -{ - uint32_t val; - - qemu_guest_getrandom_nofail(&val, sizeof(val)); - return val % (high - low + 1) + low; -} - -void helper_tlbsrch(CPULoongArchState *env) -{ - int index, match; - - if (FIELD_EX64(env->CSR_TLBRERA, CSR_TLBRERA, ISTLBR)) { - match = loongarch_tlb_search(env, env->CSR_TLBREHI, &index); - } else { - match = loongarch_tlb_search(env, env->CSR_TLBEHI, &index); - } - - if (match) { - env->CSR_TLBIDX = FIELD_DP64(env->CSR_TLBIDX, CSR_TLBIDX, INDEX, index); - env->CSR_TLBIDX = FIELD_DP64(env->CSR_TLBIDX, CSR_TLBIDX, NE, 0); - return; - } - - env->CSR_TLBIDX = FIELD_DP64(env->CSR_TLBIDX, CSR_TLBIDX, NE, 1); -} - -void helper_tlbrd(CPULoongArchState *env) -{ - LoongArchTLB *tlb; - int index; - uint8_t tlb_ps, tlb_e; - - index = FIELD_EX64(env->CSR_TLBIDX, CSR_TLBIDX, INDEX); - tlb = &env->tlb[index]; - - if (index >= LOONGARCH_STLB) { - tlb_ps = FIELD_EX64(tlb->tlb_misc, TLB_MISC, PS); - } else { - tlb_ps = FIELD_EX64(env->CSR_STLBPS, CSR_STLBPS, PS); - } - tlb_e = FIELD_EX64(tlb->tlb_misc, TLB_MISC, E); - - if (!tlb_e) { - /* Invalid TLB entry */ - env->CSR_TLBIDX = FIELD_DP64(env->CSR_TLBIDX, CSR_TLBIDX, NE, 1); - env->CSR_ASID = FIELD_DP64(env->CSR_ASID, CSR_ASID, ASID, 0); - env->CSR_TLBEHI = 0; - env->CSR_TLBELO0 = 0; - env->CSR_TLBELO1 = 0; - env->CSR_TLBIDX = FIELD_DP64(env->CSR_TLBIDX, CSR_TLBIDX, PS, 0); - } else { - /* Valid TLB entry */ - env->CSR_TLBIDX = FIELD_DP64(env->CSR_TLBIDX, CSR_TLBIDX, NE, 0); - env->CSR_TLBIDX = FIELD_DP64(env->CSR_TLBIDX, CSR_TLBIDX, - PS, (tlb_ps & 0x3f)); - env->CSR_TLBEHI = FIELD_EX64(tlb->tlb_misc, TLB_MISC, VPPN) << - R_TLB_MISC_VPPN_SHIFT; - env->CSR_TLBELO0 = tlb->tlb_entry0; - env->CSR_TLBELO1 = tlb->tlb_entry1; - } -} - -void helper_tlbwr(CPULoongArchState *env) -{ - int index = FIELD_EX64(env->CSR_TLBIDX, CSR_TLBIDX, INDEX); - - invalidate_tlb(env, index); - - if (FIELD_EX64(env->CSR_TLBIDX, CSR_TLBIDX, NE)) { - env->tlb[index].tlb_misc = FIELD_DP64(env->tlb[index].tlb_misc, - TLB_MISC, E, 0); - return; - } - - fill_tlb_entry(env, index); -} - -void helper_tlbfill(CPULoongArchState *env) -{ - uint64_t address, entryhi; - int index, set, stlb_idx; - uint16_t pagesize, stlb_ps; - - if (FIELD_EX64(env->CSR_TLBRERA, CSR_TLBRERA, ISTLBR)) { - entryhi = env->CSR_TLBREHI; - pagesize = FIELD_EX64(env->CSR_TLBREHI, CSR_TLBREHI, PS); - } else { - entryhi = env->CSR_TLBEHI; - pagesize = FIELD_EX64(env->CSR_TLBIDX, CSR_TLBIDX, PS); - } - - stlb_ps = FIELD_EX64(env->CSR_STLBPS, CSR_STLBPS, PS); - - if (pagesize == stlb_ps) { - /* Only write into STLB bits [47:13] */ - address = entryhi & ~MAKE_64BIT_MASK(0, R_CSR_TLBEHI_64_VPPN_SHIFT); - - /* Choose one set ramdomly */ - set = get_random_tlb(0, 7); - - /* Index in one set */ - stlb_idx = (address >> (stlb_ps + 1)) & 0xff; /* [0,255] */ - - index = set * 256 + stlb_idx; - } else { - /* Only write into MTLB */ - index = get_random_tlb(LOONGARCH_STLB, LOONGARCH_TLB_MAX - 1); - } - - invalidate_tlb(env, index); - fill_tlb_entry(env, index); -} - -void helper_tlbclr(CPULoongArchState *env) -{ - LoongArchTLB *tlb; - int i, index; - uint16_t csr_asid, tlb_asid, tlb_g; - - csr_asid = FIELD_EX64(env->CSR_ASID, CSR_ASID, ASID); - index = FIELD_EX64(env->CSR_TLBIDX, CSR_TLBIDX, INDEX); - - if (index < LOONGARCH_STLB) { - /* STLB. One line per operation */ - for (i = 0; i < 8; i++) { - tlb = &env->tlb[i * 256 + (index % 256)]; - tlb_asid = FIELD_EX64(tlb->tlb_misc, TLB_MISC, ASID); - tlb_g = FIELD_EX64(tlb->tlb_entry0, TLBENTRY, G); - if (!tlb_g && tlb_asid == csr_asid) { - tlb->tlb_misc = FIELD_DP64(tlb->tlb_misc, TLB_MISC, E, 0); - } - } - } else if (index < LOONGARCH_TLB_MAX) { - /* All MTLB entries */ - for (i = LOONGARCH_STLB; i < LOONGARCH_TLB_MAX; i++) { - tlb = &env->tlb[i]; - tlb_asid = FIELD_EX64(tlb->tlb_misc, TLB_MISC, ASID); - tlb_g = FIELD_EX64(tlb->tlb_entry0, TLBENTRY, G); - if (!tlb_g && tlb_asid == csr_asid) { - tlb->tlb_misc = FIELD_DP64(tlb->tlb_misc, TLB_MISC, E, 0); - } - } - } - - tlb_flush(env_cpu(env)); -} - -void helper_tlbflush(CPULoongArchState *env) -{ - int i, index; - - index = FIELD_EX64(env->CSR_TLBIDX, CSR_TLBIDX, INDEX); - - if (index < LOONGARCH_STLB) { - /* STLB. One line per operation */ - for (i = 0; i < 8; i++) { - int s_idx = i * 256 + (index % 256); - env->tlb[s_idx].tlb_misc = FIELD_DP64(env->tlb[s_idx].tlb_misc, - TLB_MISC, E, 0); - } - } else if (index < LOONGARCH_TLB_MAX) { - /* All MTLB entries */ - for (i = LOONGARCH_STLB; i < LOONGARCH_TLB_MAX; i++) { - env->tlb[i].tlb_misc = FIELD_DP64(env->tlb[i].tlb_misc, - TLB_MISC, E, 0); - } - } - - tlb_flush(env_cpu(env)); -} - -void helper_invtlb_all(CPULoongArchState *env) -{ - for (int i = 0; i < LOONGARCH_TLB_MAX; i++) { - env->tlb[i].tlb_misc = FIELD_DP64(env->tlb[i].tlb_misc, - TLB_MISC, E, 0); - } - tlb_flush(env_cpu(env)); -} - -void helper_invtlb_all_g(CPULoongArchState *env, uint32_t g) -{ - for (int i = 0; i < LOONGARCH_TLB_MAX; i++) { - LoongArchTLB *tlb = &env->tlb[i]; - uint8_t tlb_g = FIELD_EX64(tlb->tlb_entry0, TLBENTRY, G); - - if (tlb_g == g) { - tlb->tlb_misc = FIELD_DP64(tlb->tlb_misc, TLB_MISC, E, 0); - } - } - tlb_flush(env_cpu(env)); -} - -void helper_invtlb_all_asid(CPULoongArchState *env, target_ulong info) -{ - uint16_t asid = info & R_CSR_ASID_ASID_MASK; - - for (int i = 0; i < LOONGARCH_TLB_MAX; i++) { - LoongArchTLB *tlb = &env->tlb[i]; - uint8_t tlb_g = FIELD_EX64(tlb->tlb_entry0, TLBENTRY, G); - uint16_t tlb_asid = FIELD_EX64(tlb->tlb_misc, TLB_MISC, ASID); - - if (!tlb_g && (tlb_asid == asid)) { - tlb->tlb_misc = FIELD_DP64(tlb->tlb_misc, TLB_MISC, E, 0); - } - } - tlb_flush(env_cpu(env)); -} - -void helper_invtlb_page_asid(CPULoongArchState *env, target_ulong info, - target_ulong addr) -{ - uint16_t asid = info & 0x3ff; - - for (int i = 0; i < LOONGARCH_TLB_MAX; i++) { - LoongArchTLB *tlb = &env->tlb[i]; - uint8_t tlb_g = FIELD_EX64(tlb->tlb_entry0, TLBENTRY, G); - uint16_t tlb_asid = FIELD_EX64(tlb->tlb_misc, TLB_MISC, ASID); - uint64_t vpn, tlb_vppn; - uint8_t tlb_ps, compare_shift; - - if (i >= LOONGARCH_STLB) { - tlb_ps = FIELD_EX64(tlb->tlb_misc, TLB_MISC, PS); - } else { - tlb_ps = FIELD_EX64(env->CSR_STLBPS, CSR_STLBPS, PS); - } - tlb_vppn = FIELD_EX64(tlb->tlb_misc, TLB_MISC, VPPN); - vpn = (addr & TARGET_VIRT_MASK) >> (tlb_ps + 1); - compare_shift = tlb_ps + 1 - R_TLB_MISC_VPPN_SHIFT; - - if (!tlb_g && (tlb_asid == asid) && - (vpn == (tlb_vppn >> compare_shift))) { - tlb->tlb_misc = FIELD_DP64(tlb->tlb_misc, TLB_MISC, E, 0); - } - } - tlb_flush(env_cpu(env)); -} - -void helper_invtlb_page_asid_or_g(CPULoongArchState *env, - target_ulong info, target_ulong addr) -{ - uint16_t asid = info & 0x3ff; - - for (int i = 0; i < LOONGARCH_TLB_MAX; i++) { - LoongArchTLB *tlb = &env->tlb[i]; - uint8_t tlb_g = FIELD_EX64(tlb->tlb_entry0, TLBENTRY, G); - uint16_t tlb_asid = FIELD_EX64(tlb->tlb_misc, TLB_MISC, ASID); - uint64_t vpn, tlb_vppn; - uint8_t tlb_ps, compare_shift; - - if (i >= LOONGARCH_STLB) { - tlb_ps = FIELD_EX64(tlb->tlb_misc, TLB_MISC, PS); - } else { - tlb_ps = FIELD_EX64(env->CSR_STLBPS, CSR_STLBPS, PS); - } - tlb_vppn = FIELD_EX64(tlb->tlb_misc, TLB_MISC, VPPN); - vpn = (addr & TARGET_VIRT_MASK) >> (tlb_ps + 1); - compare_shift = tlb_ps + 1 - R_TLB_MISC_VPPN_SHIFT; - - if ((tlb_g || (tlb_asid == asid)) && - (vpn == (tlb_vppn >> compare_shift))) { - tlb->tlb_misc = FIELD_DP64(tlb->tlb_misc, TLB_MISC, E, 0); - } - } - tlb_flush(env_cpu(env)); -} - -bool loongarch_cpu_tlb_fill(CPUState *cs, vaddr address, int size, - MMUAccessType access_type, int mmu_idx, - bool probe, uintptr_t retaddr) -{ - LoongArchCPU *cpu = LOONGARCH_CPU(cs); - CPULoongArchState *env = &cpu->env; - hwaddr physical; - int prot; - int ret; - - /* Data access */ - ret = get_physical_address(env, &physical, &prot, address, - access_type, mmu_idx); - - if (ret == TLBRET_MATCH) { - tlb_set_page(cs, address & TARGET_PAGE_MASK, - physical & TARGET_PAGE_MASK, prot, - mmu_idx, TARGET_PAGE_SIZE); - qemu_log_mask(CPU_LOG_MMU, - "%s address=%" VADDR_PRIx " physical " HWADDR_FMT_plx - " prot %d\n", __func__, address, physical, prot); - return true; - } else { - qemu_log_mask(CPU_LOG_MMU, - "%s address=%" VADDR_PRIx " ret %d\n", __func__, address, - ret); - } - if (probe) { - return false; - } - raise_mmu_exception(env, address, access_type, ret); - cpu_loop_exit_restore(cs, retaddr); -} - -target_ulong helper_lddir(CPULoongArchState *env, target_ulong base, - target_ulong level, uint32_t mem_idx) -{ - CPUState *cs = env_cpu(env); - target_ulong badvaddr, index, phys, ret; - int shift; - uint64_t dir_base, dir_width; - bool huge = (base >> LOONGARCH_PAGE_HUGE_SHIFT) & 0x1; - - badvaddr = env->CSR_TLBRBADV; - base = base & TARGET_PHYS_MASK; - - /* 0:64bit, 1:128bit, 2:192bit, 3:256bit */ - shift = FIELD_EX64(env->CSR_PWCL, CSR_PWCL, PTEWIDTH); - shift = (shift + 1) * 3; - - if (huge) { - return base; - } - switch (level) { - case 1: - dir_base = FIELD_EX64(env->CSR_PWCL, CSR_PWCL, DIR1_BASE); - dir_width = FIELD_EX64(env->CSR_PWCL, CSR_PWCL, DIR1_WIDTH); - break; - case 2: - dir_base = FIELD_EX64(env->CSR_PWCL, CSR_PWCL, DIR2_BASE); - dir_width = FIELD_EX64(env->CSR_PWCL, CSR_PWCL, DIR2_WIDTH); - break; - case 3: - dir_base = FIELD_EX64(env->CSR_PWCH, CSR_PWCH, DIR3_BASE); - dir_width = FIELD_EX64(env->CSR_PWCH, CSR_PWCH, DIR3_WIDTH); - break; - case 4: - dir_base = FIELD_EX64(env->CSR_PWCH, CSR_PWCH, DIR4_BASE); - dir_width = FIELD_EX64(env->CSR_PWCH, CSR_PWCH, DIR4_WIDTH); - break; - default: - do_raise_exception(env, EXCCODE_INE, GETPC()); - return 0; - } - index = (badvaddr >> dir_base) & ((1 << dir_width) - 1); - phys = base | index << shift; - ret = ldq_phys(cs->as, phys) & TARGET_PHYS_MASK; - return ret; -} - -void helper_ldpte(CPULoongArchState *env, target_ulong base, target_ulong odd, - uint32_t mem_idx) -{ - CPUState *cs = env_cpu(env); - target_ulong phys, tmp0, ptindex, ptoffset0, ptoffset1, ps, badv; - int shift; - bool huge = (base >> LOONGARCH_PAGE_HUGE_SHIFT) & 0x1; - uint64_t ptbase = FIELD_EX64(env->CSR_PWCL, CSR_PWCL, PTBASE); - uint64_t ptwidth = FIELD_EX64(env->CSR_PWCL, CSR_PWCL, PTWIDTH); - - base = base & TARGET_PHYS_MASK; - - if (huge) { - /* Huge Page. base is paddr */ - tmp0 = base ^ (1 << LOONGARCH_PAGE_HUGE_SHIFT); - /* Move Global bit */ - tmp0 = ((tmp0 & (1 << LOONGARCH_HGLOBAL_SHIFT)) >> - LOONGARCH_HGLOBAL_SHIFT) << R_TLBENTRY_G_SHIFT | - (tmp0 & (~(1 << LOONGARCH_HGLOBAL_SHIFT))); - ps = ptbase + ptwidth - 1; - if (odd) { - tmp0 += MAKE_64BIT_MASK(ps, 1); - } - } else { - /* 0:64bit, 1:128bit, 2:192bit, 3:256bit */ - shift = FIELD_EX64(env->CSR_PWCL, CSR_PWCL, PTEWIDTH); - shift = (shift + 1) * 3; - badv = env->CSR_TLBRBADV; - - ptindex = (badv >> ptbase) & ((1 << ptwidth) - 1); - ptindex = ptindex & ~0x1; /* clear bit 0 */ - ptoffset0 = ptindex << shift; - ptoffset1 = (ptindex + 1) << shift; - - phys = base | (odd ? ptoffset1 : ptoffset0); - tmp0 = ldq_phys(cs->as, phys) & TARGET_PHYS_MASK; - ps = ptbase; - } - - if (odd) { - env->CSR_TLBRELO1 = tmp0; - } else { - env->CSR_TLBRELO0 = tmp0; - } - env->CSR_TLBREHI = FIELD_DP64(env->CSR_TLBREHI, CSR_TLBREHI, PS, ps); -} diff --git a/target/loongarch/translate.c b/target/loongarch/translate.c deleted file mode 100644 index 21f4db6fbd..0000000000 --- a/target/loongarch/translate.c +++ /dev/null @@ -1,370 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-or-later */ -/* - * LoongArch emulation for QEMU - main translation routines. - * - * Copyright (c) 2021 Loongson Technology Corporation Limited - */ - -#include "qemu/osdep.h" -#include "cpu.h" -#include "tcg/tcg-op.h" -#include "tcg/tcg-op-gvec.h" -#include "exec/translation-block.h" -#include "exec/translator.h" -#include "exec/helper-proto.h" -#include "exec/helper-gen.h" -#include "exec/log.h" -#include "qemu/qemu-print.h" -#include "fpu/softfloat.h" -#include "translate.h" -#include "internals.h" -#include "vec.h" - -/* Global register indices */ -TCGv cpu_gpr[32], cpu_pc; -static TCGv cpu_lladdr, cpu_llval; - -#define HELPER_H "helper.h" -#include "exec/helper-info.c.inc" -#undef HELPER_H - -#define DISAS_STOP DISAS_TARGET_0 -#define DISAS_EXIT DISAS_TARGET_1 -#define DISAS_EXIT_UPDATE DISAS_TARGET_2 - -static inline int vec_full_offset(int regno) -{ - return offsetof(CPULoongArchState, fpr[regno]); -} - -static inline int vec_reg_offset(int regno, int index, MemOp mop) -{ - const uint8_t size = 1 << mop; - int offs = index * size; - - if (HOST_BIG_ENDIAN && size < 8 ) { - offs ^= (8 - size); - } - - return offs + vec_full_offset(regno); -} - -static inline void get_vreg64(TCGv_i64 dest, int regno, int index) -{ - tcg_gen_ld_i64(dest, tcg_env, - offsetof(CPULoongArchState, fpr[regno].vreg.D(index))); -} - -static inline void set_vreg64(TCGv_i64 src, int regno, int index) -{ - tcg_gen_st_i64(src, tcg_env, - offsetof(CPULoongArchState, fpr[regno].vreg.D(index))); -} - -static inline int plus_1(DisasContext *ctx, int x) -{ - return x + 1; -} - -static inline int shl_1(DisasContext *ctx, int x) -{ - return x << 1; -} - -static inline int shl_2(DisasContext *ctx, int x) -{ - return x << 2; -} - -static inline int shl_3(DisasContext *ctx, int x) -{ - return x << 3; -} - -/* - * LoongArch the upper 32 bits are undefined ("can be any value"). - * QEMU chooses to nanbox, because it is most likely to show guest bugs early. - */ -static void gen_nanbox_s(TCGv_i64 out, TCGv_i64 in) -{ - tcg_gen_ori_i64(out, in, MAKE_64BIT_MASK(32, 32)); -} - -void generate_exception(DisasContext *ctx, int excp) -{ - tcg_gen_movi_tl(cpu_pc, ctx->base.pc_next); - gen_helper_raise_exception(tcg_env, tcg_constant_i32(excp)); - ctx->base.is_jmp = DISAS_NORETURN; -} - -static inline void gen_goto_tb(DisasContext *ctx, int n, target_ulong dest) -{ - if (ctx->va32) { - dest = (uint32_t) dest; - } - - if (translator_use_goto_tb(&ctx->base, dest)) { - tcg_gen_goto_tb(n); - tcg_gen_movi_tl(cpu_pc, dest); - tcg_gen_exit_tb(ctx->base.tb, n); - } else { - tcg_gen_movi_tl(cpu_pc, dest); - tcg_gen_lookup_and_goto_ptr(); - } -} - -static void loongarch_tr_init_disas_context(DisasContextBase *dcbase, - CPUState *cs) -{ - int64_t bound; - CPULoongArchState *env = cpu_env(cs); - DisasContext *ctx = container_of(dcbase, DisasContext, base); - - ctx->page_start = ctx->base.pc_first & TARGET_PAGE_MASK; - ctx->plv = ctx->base.tb->flags & HW_FLAGS_PLV_MASK; - if (ctx->base.tb->flags & HW_FLAGS_CRMD_PG) { - ctx->mem_idx = ctx->plv; - } else { - ctx->mem_idx = MMU_IDX_DA; - } - - /* Bound the number of insns to execute to those left on the page. */ - bound = -(ctx->base.pc_first | TARGET_PAGE_MASK) / 4; - ctx->base.max_insns = MIN(ctx->base.max_insns, bound); - - if (FIELD_EX64(env->cpucfg[2], CPUCFG2, LSX)) { - ctx->vl = LSX_LEN; - } - - if (FIELD_EX64(env->cpucfg[2], CPUCFG2, LASX)) { - ctx->vl = LASX_LEN; - } - - ctx->la64 = is_la64(env); - ctx->va32 = (ctx->base.tb->flags & HW_FLAGS_VA32) != 0; - - ctx->zero = tcg_constant_tl(0); - - ctx->cpucfg1 = env->cpucfg[1]; - ctx->cpucfg2 = env->cpucfg[2]; -} - -static void loongarch_tr_tb_start(DisasContextBase *dcbase, CPUState *cs) -{ -} - -static void loongarch_tr_insn_start(DisasContextBase *dcbase, CPUState *cs) -{ - DisasContext *ctx = container_of(dcbase, DisasContext, base); - - tcg_gen_insn_start(ctx->base.pc_next); -} - -/* - * Wrappers for getting reg values. - * - * The $zero register does not have cpu_gpr[0] allocated -- we supply the - * constant zero as a source, and an uninitialized sink as destination. - * - * Further, we may provide an extension for word operations. - */ -static TCGv gpr_src(DisasContext *ctx, int reg_num, DisasExtend src_ext) -{ - TCGv t; - - if (reg_num == 0) { - return ctx->zero; - } - - switch (src_ext) { - case EXT_NONE: - return cpu_gpr[reg_num]; - case EXT_SIGN: - t = tcg_temp_new(); - tcg_gen_ext32s_tl(t, cpu_gpr[reg_num]); - return t; - case EXT_ZERO: - t = tcg_temp_new(); - tcg_gen_ext32u_tl(t, cpu_gpr[reg_num]); - return t; - } - g_assert_not_reached(); -} - -static TCGv gpr_dst(DisasContext *ctx, int reg_num, DisasExtend dst_ext) -{ - if (reg_num == 0 || dst_ext) { - return tcg_temp_new(); - } - return cpu_gpr[reg_num]; -} - -static void gen_set_gpr(int reg_num, TCGv t, DisasExtend dst_ext) -{ - if (reg_num != 0) { - switch (dst_ext) { - case EXT_NONE: - tcg_gen_mov_tl(cpu_gpr[reg_num], t); - break; - case EXT_SIGN: - tcg_gen_ext32s_tl(cpu_gpr[reg_num], t); - break; - case EXT_ZERO: - tcg_gen_ext32u_tl(cpu_gpr[reg_num], t); - break; - default: - g_assert_not_reached(); - } - } -} - -static TCGv get_fpr(DisasContext *ctx, int reg_num) -{ - TCGv t = tcg_temp_new(); - tcg_gen_ld_i64(t, tcg_env, - offsetof(CPULoongArchState, fpr[reg_num].vreg.D(0))); - return t; -} - -static void set_fpr(int reg_num, TCGv val) -{ - tcg_gen_st_i64(val, tcg_env, - offsetof(CPULoongArchState, fpr[reg_num].vreg.D(0))); -} - -static TCGv make_address_x(DisasContext *ctx, TCGv base, TCGv addend) -{ - TCGv temp = NULL; - - if (addend || ctx->va32) { - temp = tcg_temp_new(); - } - if (addend) { - tcg_gen_add_tl(temp, base, addend); - base = temp; - } - if (ctx->va32) { - tcg_gen_ext32u_tl(temp, base); - base = temp; - } - return base; -} - -static TCGv make_address_i(DisasContext *ctx, TCGv base, target_long ofs) -{ - TCGv addend = ofs ? tcg_constant_tl(ofs) : NULL; - return make_address_x(ctx, base, addend); -} - -static uint64_t make_address_pc(DisasContext *ctx, uint64_t addr) -{ - if (ctx->va32) { - addr = (int32_t)addr; - } - return addr; -} - -#include "decode-insns.c.inc" -#include "insn_trans/trans_arith.c.inc" -#include "insn_trans/trans_shift.c.inc" -#include "insn_trans/trans_bit.c.inc" -#include "insn_trans/trans_memory.c.inc" -#include "insn_trans/trans_atomic.c.inc" -#include "insn_trans/trans_extra.c.inc" -#include "insn_trans/trans_farith.c.inc" -#include "insn_trans/trans_fcmp.c.inc" -#include "insn_trans/trans_fcnv.c.inc" -#include "insn_trans/trans_fmov.c.inc" -#include "insn_trans/trans_fmemory.c.inc" -#include "insn_trans/trans_branch.c.inc" -#include "insn_trans/trans_privileged.c.inc" -#include "insn_trans/trans_vec.c.inc" - -static void loongarch_tr_translate_insn(DisasContextBase *dcbase, CPUState *cs) -{ - CPULoongArchState *env = cpu_env(cs); - DisasContext *ctx = container_of(dcbase, DisasContext, base); - - ctx->opcode = translator_ldl(env, &ctx->base, ctx->base.pc_next); - - if (!decode(ctx, ctx->opcode)) { - qemu_log_mask(LOG_UNIMP, "Error: unknown opcode. " - TARGET_FMT_lx ": 0x%x\n", - ctx->base.pc_next, ctx->opcode); - generate_exception(ctx, EXCCODE_INE); - } - - ctx->base.pc_next += 4; - - if (ctx->va32) { - ctx->base.pc_next = (uint32_t)ctx->base.pc_next; - } -} - -static void loongarch_tr_tb_stop(DisasContextBase *dcbase, CPUState *cs) -{ - DisasContext *ctx = container_of(dcbase, DisasContext, base); - - switch (ctx->base.is_jmp) { - case DISAS_STOP: - tcg_gen_movi_tl(cpu_pc, ctx->base.pc_next); - tcg_gen_lookup_and_goto_ptr(); - break; - case DISAS_TOO_MANY: - gen_goto_tb(ctx, 0, ctx->base.pc_next); - break; - case DISAS_NORETURN: - break; - case DISAS_EXIT_UPDATE: - tcg_gen_movi_tl(cpu_pc, ctx->base.pc_next); - QEMU_FALLTHROUGH; - case DISAS_EXIT: - tcg_gen_exit_tb(NULL, 0); - break; - default: - g_assert_not_reached(); - } -} - -static void loongarch_tr_disas_log(const DisasContextBase *dcbase, - CPUState *cpu, FILE *logfile) -{ - qemu_log("IN: %s\n", lookup_symbol(dcbase->pc_first)); - target_disas(logfile, cpu, dcbase->pc_first, dcbase->tb->size); -} - -static const TranslatorOps loongarch_tr_ops = { - .init_disas_context = loongarch_tr_init_disas_context, - .tb_start = loongarch_tr_tb_start, - .insn_start = loongarch_tr_insn_start, - .translate_insn = loongarch_tr_translate_insn, - .tb_stop = loongarch_tr_tb_stop, - .disas_log = loongarch_tr_disas_log, -}; - -void gen_intermediate_code(CPUState *cs, TranslationBlock *tb, int *max_insns, - target_ulong pc, void *host_pc) -{ - DisasContext ctx; - - translator_loop(cs, tb, max_insns, pc, host_pc, - &loongarch_tr_ops, &ctx.base); -} - -void loongarch_translate_init(void) -{ - int i; - - cpu_gpr[0] = NULL; - for (i = 1; i < 32; i++) { - cpu_gpr[i] = tcg_global_mem_new(tcg_env, - offsetof(CPULoongArchState, gpr[i]), - regnames[i]); - } - - cpu_pc = tcg_global_mem_new(tcg_env, offsetof(CPULoongArchState, pc), "pc"); - cpu_lladdr = tcg_global_mem_new(tcg_env, - offsetof(CPULoongArchState, lladdr), "lladdr"); - cpu_llval = tcg_global_mem_new(tcg_env, - offsetof(CPULoongArchState, llval), "llval"); -} diff --git a/target/loongarch/vec_helper.c b/target/loongarch/vec_helper.c deleted file mode 100644 index 3faf52cbc4..0000000000 --- a/target/loongarch/vec_helper.c +++ /dev/null @@ -1,3494 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-or-later */ -/* - * QEMU LoongArch vector helper functions. - * - * Copyright (c) 2022-2023 Loongson Technology Corporation Limited - */ - -#include "qemu/osdep.h" -#include "cpu.h" -#include "exec/exec-all.h" -#include "exec/helper-proto.h" -#include "fpu/softfloat.h" -#include "internals.h" -#include "tcg/tcg.h" -#include "vec.h" -#include "tcg/tcg-gvec-desc.h" - -#define DO_ODD_EVEN(NAME, BIT, E1, E2, DO_OP) \ -void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t desc) \ -{ \ - int i; \ - VReg *Vd = (VReg *)vd; \ - VReg *Vj = (VReg *)vj; \ - VReg *Vk = (VReg *)vk; \ - typedef __typeof(Vd->E1(0)) TD; \ - int oprsz = simd_oprsz(desc); \ - \ - for (i = 0; i < oprsz / (BIT / 8); i++) { \ - Vd->E1(i) = DO_OP((TD)Vj->E2(2 * i + 1), (TD)Vk->E2(2 * i)); \ - } \ -} - -DO_ODD_EVEN(vhaddw_h_b, 16, H, B, DO_ADD) -DO_ODD_EVEN(vhaddw_w_h, 32, W, H, DO_ADD) -DO_ODD_EVEN(vhaddw_d_w, 64, D, W, DO_ADD) - -void HELPER(vhaddw_q_d)(void *vd, void *vj, void *vk, uint32_t desc) -{ - int i; - VReg *Vd = (VReg *)vd; - VReg *Vj = (VReg *)vj; - VReg *Vk = (VReg *)vk; - int oprsz = simd_oprsz(desc); - - for (i = 0; i < oprsz / 16 ; i++) { - Vd->Q(i) = int128_add(int128_makes64(Vj->D(2 * i + 1)), - int128_makes64(Vk->D(2 * i))); - } -} - -DO_ODD_EVEN(vhsubw_h_b, 16, H, B, DO_SUB) -DO_ODD_EVEN(vhsubw_w_h, 32, W, H, DO_SUB) -DO_ODD_EVEN(vhsubw_d_w, 64, D, W, DO_SUB) - -void HELPER(vhsubw_q_d)(void *vd, void *vj, void *vk, uint32_t desc) -{ - int i; - VReg *Vd = (VReg *)vd; - VReg *Vj = (VReg *)vj; - VReg *Vk = (VReg *)vk; - int oprsz = simd_oprsz(desc); - - for (i = 0; i < oprsz / 16; i++) { - Vd->Q(i) = int128_sub(int128_makes64(Vj->D(2 * i + 1)), - int128_makes64(Vk->D(2 * i))); - } -} - -DO_ODD_EVEN(vhaddw_hu_bu, 16, UH, UB, DO_ADD) -DO_ODD_EVEN(vhaddw_wu_hu, 32, UW, UH, DO_ADD) -DO_ODD_EVEN(vhaddw_du_wu, 64, UD, UW, DO_ADD) - -void HELPER(vhaddw_qu_du)(void *vd, void *vj, void *vk, uint32_t desc) -{ - int i; - VReg *Vd = (VReg *)vd; - VReg *Vj = (VReg *)vj; - VReg *Vk = (VReg *)vk; - int oprsz = simd_oprsz(desc); - - for (i = 0; i < oprsz / 16; i ++) { - Vd->Q(i) = int128_add(int128_make64(Vj->UD(2 * i + 1)), - int128_make64(Vk->UD(2 * i))); - } -} - -DO_ODD_EVEN(vhsubw_hu_bu, 16, UH, UB, DO_SUB) -DO_ODD_EVEN(vhsubw_wu_hu, 32, UW, UH, DO_SUB) -DO_ODD_EVEN(vhsubw_du_wu, 64, UD, UW, DO_SUB) - -void HELPER(vhsubw_qu_du)(void *vd, void *vj, void *vk, uint32_t desc) -{ - int i; - VReg *Vd = (VReg *)vd; - VReg *Vj = (VReg *)vj; - VReg *Vk = (VReg *)vk; - int oprsz = simd_oprsz(desc); - - for (i = 0; i < oprsz / 16; i++) { - Vd->Q(i) = int128_sub(int128_make64(Vj->UD(2 * i + 1)), - int128_make64(Vk->UD(2 * i))); - } -} - -#define DO_EVEN(NAME, BIT, E1, E2, DO_OP) \ -void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t desc) \ -{ \ - int i; \ - VReg *Vd = (VReg *)vd; \ - VReg *Vj = (VReg *)vj; \ - VReg *Vk = (VReg *)vk; \ - typedef __typeof(Vd->E1(0)) TD; \ - int oprsz = simd_oprsz(desc); \ - \ - for (i = 0; i < oprsz / (BIT / 8); i++) { \ - Vd->E1(i) = DO_OP((TD)Vj->E2(2 * i) ,(TD)Vk->E2(2 * i)); \ - } \ -} - -#define DO_ODD(NAME, BIT, E1, E2, DO_OP) \ -void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t desc) \ -{ \ - int i; \ - VReg *Vd = (VReg *)vd; \ - VReg *Vj = (VReg *)vj; \ - VReg *Vk = (VReg *)vk; \ - typedef __typeof(Vd->E1(0)) TD; \ - int oprsz = simd_oprsz(desc); \ - \ - for (i = 0; i < oprsz / (BIT / 8); i++) { \ - Vd->E1(i) = DO_OP((TD)Vj->E2(2 * i + 1), (TD)Vk->E2(2 * i + 1)); \ - } \ -} - -void HELPER(vaddwev_q_d)(void *vd, void *vj, void *vk, uint32_t desc) -{ - int i; - VReg *Vd = (VReg *)vd; - VReg *Vj = (VReg *)vj; - VReg *Vk = (VReg *)vk; - int oprsz = simd_oprsz(desc); - - for (i = 0; i < oprsz / 16; i++) { - Vd->Q(i) = int128_add(int128_makes64(Vj->D(2 * i)), - int128_makes64(Vk->D(2 * i))); - } -} - -DO_EVEN(vaddwev_h_b, 16, H, B, DO_ADD) -DO_EVEN(vaddwev_w_h, 32, W, H, DO_ADD) -DO_EVEN(vaddwev_d_w, 64, D, W, DO_ADD) - -void HELPER(vaddwod_q_d)(void *vd, void *vj, void *vk, uint32_t desc) -{ - int i; - VReg *Vd = (VReg *)vd; - VReg *Vj = (VReg *)vj; - VReg *Vk = (VReg *)vk; - int oprsz = simd_oprsz(desc); - - for (i = 0; i < oprsz / 16; i++) { - Vd->Q(i) = int128_add(int128_makes64(Vj->D(2 * i +1)), - int128_makes64(Vk->D(2 * i +1))); - } -} - -DO_ODD(vaddwod_h_b, 16, H, B, DO_ADD) -DO_ODD(vaddwod_w_h, 32, W, H, DO_ADD) -DO_ODD(vaddwod_d_w, 64, D, W, DO_ADD) - -void HELPER(vsubwev_q_d)(void *vd, void *vj, void *vk, uint32_t desc) -{ - int i; - VReg *Vd = (VReg *)vd; - VReg *Vj = (VReg *)vj; - VReg *Vk = (VReg *)vk; - int oprsz = simd_oprsz(desc); - - for (i = 0; i < oprsz / 16; i++) { - Vd->Q(i) = int128_sub(int128_makes64(Vj->D(2 * i)), - int128_makes64(Vk->D(2 * i))); - } -} - -DO_EVEN(vsubwev_h_b, 16, H, B, DO_SUB) -DO_EVEN(vsubwev_w_h, 32, W, H, DO_SUB) -DO_EVEN(vsubwev_d_w, 64, D, W, DO_SUB) - -void HELPER(vsubwod_q_d)(void *vd, void *vj, void *vk, uint32_t desc) -{ - int i; - VReg *Vd = (VReg *)vd; - VReg *Vj = (VReg *)vj; - VReg *Vk = (VReg *)vk; - int oprsz = simd_oprsz(desc); - - for (i = 0; i < oprsz / 16; i++) { - Vd->Q(i) = int128_sub(int128_makes64(Vj->D(2 * i + 1)), - int128_makes64(Vk->D(2 * i + 1))); - } -} - -DO_ODD(vsubwod_h_b, 16, H, B, DO_SUB) -DO_ODD(vsubwod_w_h, 32, W, H, DO_SUB) -DO_ODD(vsubwod_d_w, 64, D, W, DO_SUB) - -void HELPER(vaddwev_q_du)(void *vd, void *vj, void *vk, uint32_t desc) -{ - int i; - VReg *Vd = (VReg *)vd; - VReg *Vj = (VReg *)vj; - VReg *Vk = (VReg *)vk; - int oprsz = simd_oprsz(desc); - - for (i = 0; i < oprsz / 16; i++) { - Vd->Q(i) = int128_add(int128_make64(Vj->UD(2 * i)), - int128_make64(Vk->UD(2 * i))); - } -} - -DO_EVEN(vaddwev_h_bu, 16, UH, UB, DO_ADD) -DO_EVEN(vaddwev_w_hu, 32, UW, UH, DO_ADD) -DO_EVEN(vaddwev_d_wu, 64, UD, UW, DO_ADD) - -void HELPER(vaddwod_q_du)(void *vd, void *vj, void *vk, uint32_t desc) -{ - int i; - VReg *Vd = (VReg *)vd; - VReg *Vj = (VReg *)vj; - VReg *Vk = (VReg *)vk; - int oprsz = simd_oprsz(desc); - - for (i = 0; i < oprsz / 16; i++) { - Vd->Q(i) = int128_add(int128_make64(Vj->UD(2 * i + 1)), - int128_make64(Vk->UD(2 * i + 1))); - } -} - -DO_ODD(vaddwod_h_bu, 16, UH, UB, DO_ADD) -DO_ODD(vaddwod_w_hu, 32, UW, UH, DO_ADD) -DO_ODD(vaddwod_d_wu, 64, UD, UW, DO_ADD) - -void HELPER(vsubwev_q_du)(void *vd, void *vj, void *vk, uint32_t desc) -{ - int i; - VReg *Vd = (VReg *)vd; - VReg *Vj = (VReg *)vj; - VReg *Vk = (VReg *)vk; - int oprsz = simd_oprsz(desc); - - for (i = 0; i < oprsz / 16; i++) { - Vd->Q(i) = int128_sub(int128_make64(Vj->UD(2 * i)), - int128_make64(Vk->UD(2 * i))); - } -} - -DO_EVEN(vsubwev_h_bu, 16, UH, UB, DO_SUB) -DO_EVEN(vsubwev_w_hu, 32, UW, UH, DO_SUB) -DO_EVEN(vsubwev_d_wu, 64, UD, UW, DO_SUB) - -void HELPER(vsubwod_q_du)(void *vd, void *vj, void *vk, uint32_t desc) -{ - int i; - VReg *Vd = (VReg *)vd; - VReg *Vj = (VReg *)vj; - VReg *Vk = (VReg *)vk; - int oprsz = simd_oprsz(desc); - - for (i = 0; i < oprsz / 16; i++) { - Vd->Q(i) = int128_sub(int128_make64(Vj->UD(2 * i + 1)), - int128_make64(Vk->UD(2 * i + 1))); - } -} - -DO_ODD(vsubwod_h_bu, 16, UH, UB, DO_SUB) -DO_ODD(vsubwod_w_hu, 32, UW, UH, DO_SUB) -DO_ODD(vsubwod_d_wu, 64, UD, UW, DO_SUB) - -#define DO_EVEN_U_S(NAME, BIT, ES1, EU1, ES2, EU2, DO_OP) \ -void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t desc) \ -{ \ - int i; \ - VReg *Vd = (VReg *)vd; \ - VReg *Vj = (VReg *)vj; \ - VReg *Vk = (VReg *)vk; \ - typedef __typeof(Vd->ES1(0)) TDS; \ - typedef __typeof(Vd->EU1(0)) TDU; \ - int oprsz = simd_oprsz(desc); \ - \ - for (i = 0; i < oprsz / (BIT / 8); i++) { \ - Vd->ES1(i) = DO_OP((TDU)Vj->EU2(2 * i) ,(TDS)Vk->ES2(2 * i)); \ - } \ -} - -#define DO_ODD_U_S(NAME, BIT, ES1, EU1, ES2, EU2, DO_OP) \ -void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t desc) \ -{ \ - int i; \ - VReg *Vd = (VReg *)vd; \ - VReg *Vj = (VReg *)vj; \ - VReg *Vk = (VReg *)vk; \ - typedef __typeof(Vd->ES1(0)) TDS; \ - typedef __typeof(Vd->EU1(0)) TDU; \ - int oprsz = simd_oprsz(desc); \ - \ - for (i = 0; i < oprsz / (BIT / 8); i++) { \ - Vd->ES1(i) = DO_OP((TDU)Vj->EU2(2 * i + 1), (TDS)Vk->ES2(2 * i + 1)); \ - } \ -} - -void HELPER(vaddwev_q_du_d)(void *vd, void *vj, void *vk, uint32_t desc) -{ - int i; - VReg *Vd = (VReg *)vd; - VReg *Vj = (VReg *)vj; - VReg *Vk = (VReg *)vk; - int oprsz = simd_oprsz(desc); - - for (i = 0; i < oprsz / 16; i++) { - Vd->Q(i) = int128_add(int128_make64(Vj->UD(2 * i)), - int128_makes64(Vk->D(2 * i))); - } -} - -DO_EVEN_U_S(vaddwev_h_bu_b, 16, H, UH, B, UB, DO_ADD) -DO_EVEN_U_S(vaddwev_w_hu_h, 32, W, UW, H, UH, DO_ADD) -DO_EVEN_U_S(vaddwev_d_wu_w, 64, D, UD, W, UW, DO_ADD) - -void HELPER(vaddwod_q_du_d)(void *vd, void *vj, void *vk, uint32_t desc) -{ - int i; - VReg *Vd = (VReg *)vd; - VReg *Vj = (VReg *)vj; - VReg *Vk = (VReg *)vk; - int oprsz = simd_oprsz(desc); - - for (i = 0; i < oprsz / 16; i++) { - Vd->Q(i) = int128_add(int128_make64(Vj->UD(2 * i + 1)), - int128_makes64(Vk->D(2 * i + 1))); - } -} - -DO_ODD_U_S(vaddwod_h_bu_b, 16, H, UH, B, UB, DO_ADD) -DO_ODD_U_S(vaddwod_w_hu_h, 32, W, UW, H, UH, DO_ADD) -DO_ODD_U_S(vaddwod_d_wu_w, 64, D, UD, W, UW, DO_ADD) - -#define DO_3OP(NAME, BIT, E, DO_OP) \ -void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t desc) \ -{ \ - int i; \ - VReg *Vd = (VReg *)vd; \ - VReg *Vj = (VReg *)vj; \ - VReg *Vk = (VReg *)vk; \ - int oprsz = simd_oprsz(desc); \ - \ - for (i = 0; i < oprsz / (BIT / 8); i++) { \ - Vd->E(i) = DO_OP(Vj->E(i), Vk->E(i)); \ - } \ -} - -DO_3OP(vavg_b, 8, B, DO_VAVG) -DO_3OP(vavg_h, 16, H, DO_VAVG) -DO_3OP(vavg_w, 32, W, DO_VAVG) -DO_3OP(vavg_d, 64, D, DO_VAVG) -DO_3OP(vavgr_b, 8, B, DO_VAVGR) -DO_3OP(vavgr_h, 16, H, DO_VAVGR) -DO_3OP(vavgr_w, 32, W, DO_VAVGR) -DO_3OP(vavgr_d, 64, D, DO_VAVGR) -DO_3OP(vavg_bu, 8, UB, DO_VAVG) -DO_3OP(vavg_hu, 16, UH, DO_VAVG) -DO_3OP(vavg_wu, 32, UW, DO_VAVG) -DO_3OP(vavg_du, 64, UD, DO_VAVG) -DO_3OP(vavgr_bu, 8, UB, DO_VAVGR) -DO_3OP(vavgr_hu, 16, UH, DO_VAVGR) -DO_3OP(vavgr_wu, 32, UW, DO_VAVGR) -DO_3OP(vavgr_du, 64, UD, DO_VAVGR) - -DO_3OP(vabsd_b, 8, B, DO_VABSD) -DO_3OP(vabsd_h, 16, H, DO_VABSD) -DO_3OP(vabsd_w, 32, W, DO_VABSD) -DO_3OP(vabsd_d, 64, D, DO_VABSD) -DO_3OP(vabsd_bu, 8, UB, DO_VABSD) -DO_3OP(vabsd_hu, 16, UH, DO_VABSD) -DO_3OP(vabsd_wu, 32, UW, DO_VABSD) -DO_3OP(vabsd_du, 64, UD, DO_VABSD) - -#define DO_VADDA(NAME, BIT, E) \ -void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t desc) \ -{ \ - int i; \ - VReg *Vd = (VReg *)vd; \ - VReg *Vj = (VReg *)vj; \ - VReg *Vk = (VReg *)vk; \ - int oprsz = simd_oprsz(desc); \ - \ - for (i = 0; i < oprsz / (BIT / 8); i++) { \ - Vd->E(i) = DO_VABS(Vj->E(i)) + DO_VABS(Vk->E(i)); \ - } \ -} - -DO_VADDA(vadda_b, 8, B) -DO_VADDA(vadda_h, 16, H) -DO_VADDA(vadda_w, 32, W) -DO_VADDA(vadda_d, 64, D) - -#define VMINMAXI(NAME, BIT, E, DO_OP) \ -void HELPER(NAME)(void *vd, void *vj, uint64_t imm, uint32_t desc) \ -{ \ - int i; \ - VReg *Vd = (VReg *)vd; \ - VReg *Vj = (VReg *)vj; \ - typedef __typeof(Vd->E(0)) TD; \ - int oprsz = simd_oprsz(desc); \ - \ - for (i = 0; i < oprsz / (BIT / 8); i++) { \ - Vd->E(i) = DO_OP(Vj->E(i), (TD)imm); \ - } \ -} - -VMINMAXI(vmini_b, 8, B, DO_MIN) -VMINMAXI(vmini_h, 16, H, DO_MIN) -VMINMAXI(vmini_w, 32, W, DO_MIN) -VMINMAXI(vmini_d, 64, D, DO_MIN) -VMINMAXI(vmaxi_b, 8, B, DO_MAX) -VMINMAXI(vmaxi_h, 16, H, DO_MAX) -VMINMAXI(vmaxi_w, 32, W, DO_MAX) -VMINMAXI(vmaxi_d, 64, D, DO_MAX) -VMINMAXI(vmini_bu, 8, UB, DO_MIN) -VMINMAXI(vmini_hu, 16, UH, DO_MIN) -VMINMAXI(vmini_wu, 32, UW, DO_MIN) -VMINMAXI(vmini_du, 64, UD, DO_MIN) -VMINMAXI(vmaxi_bu, 8, UB, DO_MAX) -VMINMAXI(vmaxi_hu, 16, UH, DO_MAX) -VMINMAXI(vmaxi_wu, 32, UW, DO_MAX) -VMINMAXI(vmaxi_du, 64, UD, DO_MAX) - -#define DO_VMUH(NAME, BIT, E1, E2, DO_OP) \ -void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t desc) \ -{ \ - int i; \ - VReg *Vd = (VReg *)vd; \ - VReg *Vj = (VReg *)vj; \ - VReg *Vk = (VReg *)vk; \ - typedef __typeof(Vd->E1(0)) T; \ - int oprsz = simd_oprsz(desc); \ - \ - for (i = 0; i < oprsz / (BIT / 8); i++) { \ - Vd->E2(i) = ((T)Vj->E2(i)) * ((T)Vk->E2(i)) >> BIT; \ - } \ -} - -void HELPER(vmuh_d)(void *vd, void *vj, void *vk, uint32_t desc) -{ - int i; - uint64_t l, h; - VReg *Vd = (VReg *)vd; - VReg *Vj = (VReg *)vj; - VReg *Vk = (VReg *)vk; - int oprsz = simd_oprsz(desc); - - for (i = 0; i < oprsz / 8; i++) { - muls64(&l, &h, Vj->D(i), Vk->D(i)); - Vd->D(i) = h; - } -} - -DO_VMUH(vmuh_b, 8, H, B, DO_MUH) -DO_VMUH(vmuh_h, 16, W, H, DO_MUH) -DO_VMUH(vmuh_w, 32, D, W, DO_MUH) - -void HELPER(vmuh_du)(void *vd, void *vj, void *vk, uint32_t desc) -{ - int i; - uint64_t l, h; - VReg *Vd = (VReg *)vd; - VReg *Vj = (VReg *)vj; - VReg *Vk = (VReg *)vk; - int oprsz = simd_oprsz(desc); - - for (i = 0; i < oprsz / 8; i++) { - mulu64(&l, &h, Vj->D(i), Vk->D(i)); - Vd->D(i) = h; - } -} - -DO_VMUH(vmuh_bu, 8, UH, UB, DO_MUH) -DO_VMUH(vmuh_hu, 16, UW, UH, DO_MUH) -DO_VMUH(vmuh_wu, 32, UD, UW, DO_MUH) - -DO_EVEN(vmulwev_h_b, 16, H, B, DO_MUL) -DO_EVEN(vmulwev_w_h, 32, W, H, DO_MUL) -DO_EVEN(vmulwev_d_w, 64, D, W, DO_MUL) - -DO_ODD(vmulwod_h_b, 16, H, B, DO_MUL) -DO_ODD(vmulwod_w_h, 32, W, H, DO_MUL) -DO_ODD(vmulwod_d_w, 64, D, W, DO_MUL) - -DO_EVEN(vmulwev_h_bu, 16, UH, UB, DO_MUL) -DO_EVEN(vmulwev_w_hu, 32, UW, UH, DO_MUL) -DO_EVEN(vmulwev_d_wu, 64, UD, UW, DO_MUL) - -DO_ODD(vmulwod_h_bu, 16, UH, UB, DO_MUL) -DO_ODD(vmulwod_w_hu, 32, UW, UH, DO_MUL) -DO_ODD(vmulwod_d_wu, 64, UD, UW, DO_MUL) - -DO_EVEN_U_S(vmulwev_h_bu_b, 16, H, UH, B, UB, DO_MUL) -DO_EVEN_U_S(vmulwev_w_hu_h, 32, W, UW, H, UH, DO_MUL) -DO_EVEN_U_S(vmulwev_d_wu_w, 64, D, UD, W, UW, DO_MUL) - -DO_ODD_U_S(vmulwod_h_bu_b, 16, H, UH, B, UB, DO_MUL) -DO_ODD_U_S(vmulwod_w_hu_h, 32, W, UW, H, UH, DO_MUL) -DO_ODD_U_S(vmulwod_d_wu_w, 64, D, UD, W, UW, DO_MUL) - -#define VMADDSUB(NAME, BIT, E, DO_OP) \ -void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t desc) \ -{ \ - int i; \ - VReg *Vd = (VReg *)vd; \ - VReg *Vj = (VReg *)vj; \ - VReg *Vk = (VReg *)vk; \ - int oprsz = simd_oprsz(desc); \ - \ - for (i = 0; i < oprsz / (BIT / 8); i++) { \ - Vd->E(i) = DO_OP(Vd->E(i), Vj->E(i) ,Vk->E(i)); \ - } \ -} - -VMADDSUB(vmadd_b, 8, B, DO_MADD) -VMADDSUB(vmadd_h, 16, H, DO_MADD) -VMADDSUB(vmadd_w, 32, W, DO_MADD) -VMADDSUB(vmadd_d, 64, D, DO_MADD) -VMADDSUB(vmsub_b, 8, B, DO_MSUB) -VMADDSUB(vmsub_h, 16, H, DO_MSUB) -VMADDSUB(vmsub_w, 32, W, DO_MSUB) -VMADDSUB(vmsub_d, 64, D, DO_MSUB) - -#define VMADDWEV(NAME, BIT, E1, E2, DO_OP) \ -void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t desc) \ -{ \ - int i; \ - VReg *Vd = (VReg *)vd; \ - VReg *Vj = (VReg *)vj; \ - VReg *Vk = (VReg *)vk; \ - typedef __typeof(Vd->E1(0)) TD; \ - int oprsz = simd_oprsz(desc); \ - \ - for (i = 0; i < oprsz / (BIT / 8); i++) { \ - Vd->E1(i) += DO_OP((TD)Vj->E2(2 * i), (TD)Vk->E2(2 * i)); \ - } \ -} - -VMADDWEV(vmaddwev_h_b, 16, H, B, DO_MUL) -VMADDWEV(vmaddwev_w_h, 32, W, H, DO_MUL) -VMADDWEV(vmaddwev_d_w, 64, D, W, DO_MUL) -VMADDWEV(vmaddwev_h_bu, 16, UH, UB, DO_MUL) -VMADDWEV(vmaddwev_w_hu, 32, UW, UH, DO_MUL) -VMADDWEV(vmaddwev_d_wu, 64, UD, UW, DO_MUL) - -#define VMADDWOD(NAME, BIT, E1, E2, DO_OP) \ -void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t desc) \ -{ \ - int i; \ - VReg *Vd = (VReg *)vd; \ - VReg *Vj = (VReg *)vj; \ - VReg *Vk = (VReg *)vk; \ - typedef __typeof(Vd->E1(0)) TD; \ - int oprsz = simd_oprsz(desc); \ - \ - for (i = 0; i < oprsz / (BIT / 8); i++) { \ - Vd->E1(i) += DO_OP((TD)Vj->E2(2 * i + 1), \ - (TD)Vk->E2(2 * i + 1)); \ - } \ -} - -VMADDWOD(vmaddwod_h_b, 16, H, B, DO_MUL) -VMADDWOD(vmaddwod_w_h, 32, W, H, DO_MUL) -VMADDWOD(vmaddwod_d_w, 64, D, W, DO_MUL) -VMADDWOD(vmaddwod_h_bu, 16, UH, UB, DO_MUL) -VMADDWOD(vmaddwod_w_hu, 32, UW, UH, DO_MUL) -VMADDWOD(vmaddwod_d_wu, 64, UD, UW, DO_MUL) - -#define VMADDWEV_U_S(NAME, BIT, ES1, EU1, ES2, EU2, DO_OP) \ -void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t desc) \ -{ \ - int i; \ - VReg *Vd = (VReg *)vd; \ - VReg *Vj = (VReg *)vj; \ - VReg *Vk = (VReg *)vk; \ - typedef __typeof(Vd->ES1(0)) TS1; \ - typedef __typeof(Vd->EU1(0)) TU1; \ - int oprsz = simd_oprsz(desc); \ - \ - for (i = 0; i < oprsz / (BIT / 8); i++) { \ - Vd->ES1(i) += DO_OP((TU1)Vj->EU2(2 * i), \ - (TS1)Vk->ES2(2 * i)); \ - } \ -} - -VMADDWEV_U_S(vmaddwev_h_bu_b, 16, H, UH, B, UB, DO_MUL) -VMADDWEV_U_S(vmaddwev_w_hu_h, 32, W, UW, H, UH, DO_MUL) -VMADDWEV_U_S(vmaddwev_d_wu_w, 64, D, UD, W, UW, DO_MUL) - -#define VMADDWOD_U_S(NAME, BIT, ES1, EU1, ES2, EU2, DO_OP) \ -void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t desc) \ -{ \ - int i; \ - VReg *Vd = (VReg *)vd; \ - VReg *Vj = (VReg *)vj; \ - VReg *Vk = (VReg *)vk; \ - typedef __typeof(Vd->ES1(0)) TS1; \ - typedef __typeof(Vd->EU1(0)) TU1; \ - int oprsz = simd_oprsz(desc); \ - \ - for (i = 0; i < oprsz / (BIT / 8); i++) { \ - Vd->ES1(i) += DO_OP((TU1)Vj->EU2(2 * i + 1), \ - (TS1)Vk->ES2(2 * i + 1)); \ - } \ -} - -VMADDWOD_U_S(vmaddwod_h_bu_b, 16, H, UH, B, UB, DO_MUL) -VMADDWOD_U_S(vmaddwod_w_hu_h, 32, W, UW, H, UH, DO_MUL) -VMADDWOD_U_S(vmaddwod_d_wu_w, 64, D, UD, W, UW, DO_MUL) - -#define VDIV(NAME, BIT, E, DO_OP) \ -void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t desc) \ -{ \ - int i; \ - VReg *Vd = (VReg *)vd; \ - VReg *Vj = (VReg *)vj; \ - VReg *Vk = (VReg *)vk; \ - int oprsz = simd_oprsz(desc); \ - \ - for (i = 0; i < oprsz / (BIT / 8); i++) { \ - Vd->E(i) = DO_OP(Vj->E(i), Vk->E(i)); \ - } \ -} - -VDIV(vdiv_b, 8, B, DO_DIV) -VDIV(vdiv_h, 16, H, DO_DIV) -VDIV(vdiv_w, 32, W, DO_DIV) -VDIV(vdiv_d, 64, D, DO_DIV) -VDIV(vdiv_bu, 8, UB, DO_DIVU) -VDIV(vdiv_hu, 16, UH, DO_DIVU) -VDIV(vdiv_wu, 32, UW, DO_DIVU) -VDIV(vdiv_du, 64, UD, DO_DIVU) -VDIV(vmod_b, 8, B, DO_REM) -VDIV(vmod_h, 16, H, DO_REM) -VDIV(vmod_w, 32, W, DO_REM) -VDIV(vmod_d, 64, D, DO_REM) -VDIV(vmod_bu, 8, UB, DO_REMU) -VDIV(vmod_hu, 16, UH, DO_REMU) -VDIV(vmod_wu, 32, UW, DO_REMU) -VDIV(vmod_du, 64, UD, DO_REMU) - -#define VSAT_S(NAME, BIT, E) \ -void HELPER(NAME)(void *vd, void *vj, uint64_t max, uint32_t desc) \ -{ \ - int i; \ - VReg *Vd = (VReg *)vd; \ - VReg *Vj = (VReg *)vj; \ - typedef __typeof(Vd->E(0)) TD; \ - int oprsz = simd_oprsz(desc); \ - \ - for (i = 0; i < oprsz / (BIT / 8); i++) { \ - Vd->E(i) = Vj->E(i) > (TD)max ? (TD)max : \ - Vj->E(i) < (TD)~max ? (TD)~max: Vj->E(i); \ - } \ -} - -VSAT_S(vsat_b, 8, B) -VSAT_S(vsat_h, 16, H) -VSAT_S(vsat_w, 32, W) -VSAT_S(vsat_d, 64, D) - -#define VSAT_U(NAME, BIT, E) \ -void HELPER(NAME)(void *vd, void *vj, uint64_t max, uint32_t desc) \ -{ \ - int i; \ - VReg *Vd = (VReg *)vd; \ - VReg *Vj = (VReg *)vj; \ - typedef __typeof(Vd->E(0)) TD; \ - int oprsz = simd_oprsz(desc); \ - \ - for (i = 0; i < oprsz / (BIT / 8); i++) { \ - Vd->E(i) = Vj->E(i) > (TD)max ? (TD)max : Vj->E(i); \ - } \ -} - -VSAT_U(vsat_bu, 8, UB) -VSAT_U(vsat_hu, 16, UH) -VSAT_U(vsat_wu, 32, UW) -VSAT_U(vsat_du, 64, UD) - -#define VEXTH(NAME, BIT, E1, E2) \ -void HELPER(NAME)(void *vd, void *vj, uint32_t desc) \ -{ \ - int i, j, ofs; \ - VReg *Vd = (VReg *)vd; \ - VReg *Vj = (VReg *)vj; \ - int oprsz = simd_oprsz(desc); \ - \ - ofs = LSX_LEN / BIT; \ - for (i = 0; i < oprsz / 16; i++) { \ - for (j = 0; j < ofs; j++) { \ - Vd->E1(j + i * ofs) = Vj->E2(j + ofs + ofs * 2 * i); \ - } \ - } \ -} - -void HELPER(vexth_q_d)(void *vd, void *vj, uint32_t desc) -{ - int i; - VReg *Vd = (VReg *)vd; - VReg *Vj = (VReg *)vj; - int oprsz = simd_oprsz(desc); - - for (i = 0; i < oprsz / 16; i++) { - Vd->Q(i) = int128_makes64(Vj->D(2 * i + 1)); - } -} - -void HELPER(vexth_qu_du)(void *vd, void *vj, uint32_t desc) -{ - int i; - VReg *Vd = (VReg *)vd; - VReg *Vj = (VReg *)vj; - int oprsz = simd_oprsz(desc); - - for (i = 0; i < oprsz / 16; i++) { - Vd->Q(i) = int128_make64(Vj->UD(2 * i + 1)); - } -} - -VEXTH(vexth_h_b, 16, H, B) -VEXTH(vexth_w_h, 32, W, H) -VEXTH(vexth_d_w, 64, D, W) -VEXTH(vexth_hu_bu, 16, UH, UB) -VEXTH(vexth_wu_hu, 32, UW, UH) -VEXTH(vexth_du_wu, 64, UD, UW) - -#define VEXT2XV(NAME, BIT, E1, E2) \ -void HELPER(NAME)(void *vd, void *vj, uint32_t desc) \ -{ \ - int i; \ - VReg temp = {}; \ - VReg *Vd = (VReg *)vd; \ - VReg *Vj = (VReg *)vj; \ - int oprsz = simd_oprsz(desc); \ - \ - for (i = 0; i < oprsz / (BIT / 8); i++) { \ - temp.E1(i) = Vj->E2(i); \ - } \ - *Vd = temp; \ -} - -VEXT2XV(vext2xv_h_b, 16, H, B) -VEXT2XV(vext2xv_w_b, 32, W, B) -VEXT2XV(vext2xv_d_b, 64, D, B) -VEXT2XV(vext2xv_w_h, 32, W, H) -VEXT2XV(vext2xv_d_h, 64, D, H) -VEXT2XV(vext2xv_d_w, 64, D, W) -VEXT2XV(vext2xv_hu_bu, 16, UH, UB) -VEXT2XV(vext2xv_wu_bu, 32, UW, UB) -VEXT2XV(vext2xv_du_bu, 64, UD, UB) -VEXT2XV(vext2xv_wu_hu, 32, UW, UH) -VEXT2XV(vext2xv_du_hu, 64, UD, UH) -VEXT2XV(vext2xv_du_wu, 64, UD, UW) - -DO_3OP(vsigncov_b, 8, B, DO_SIGNCOV) -DO_3OP(vsigncov_h, 16, H, DO_SIGNCOV) -DO_3OP(vsigncov_w, 32, W, DO_SIGNCOV) -DO_3OP(vsigncov_d, 64, D, DO_SIGNCOV) - -static uint64_t do_vmskltz_b(int64_t val) -{ - uint64_t m = 0x8080808080808080ULL; - uint64_t c = val & m; - c |= c << 7; - c |= c << 14; - c |= c << 28; - return c >> 56; -} - -void HELPER(vmskltz_b)(void *vd, void *vj, uint32_t desc) -{ - int i; - uint16_t temp = 0; - VReg *Vd = (VReg *)vd; - VReg *Vj = (VReg *)vj; - int oprsz = simd_oprsz(desc); - - for (i = 0; i < oprsz / 16; i++) { - temp = 0; - temp = do_vmskltz_b(Vj->D(2 * i)); - temp |= (do_vmskltz_b(Vj->D(2 * i + 1)) << 8); - Vd->D(2 * i) = temp; - Vd->D(2 * i + 1) = 0; - } -} - -static uint64_t do_vmskltz_h(int64_t val) -{ - uint64_t m = 0x8000800080008000ULL; - uint64_t c = val & m; - c |= c << 15; - c |= c << 30; - return c >> 60; -} - -void HELPER(vmskltz_h)(void *vd, void *vj, uint32_t desc) -{ - int i; - uint16_t temp = 0; - VReg *Vd = (VReg *)vd; - VReg *Vj = (VReg *)vj; - int oprsz = simd_oprsz(desc); - - for (i = 0; i < oprsz / 16; i++) { - temp = 0; - temp = do_vmskltz_h(Vj->D(2 * i)); - temp |= (do_vmskltz_h(Vj->D(2 * i + 1)) << 4); - Vd->D(2 * i) = temp; - Vd->D(2 * i + 1) = 0; - } -} - -static uint64_t do_vmskltz_w(int64_t val) -{ - uint64_t m = 0x8000000080000000ULL; - uint64_t c = val & m; - c |= c << 31; - return c >> 62; -} - -void HELPER(vmskltz_w)(void *vd, void *vj, uint32_t desc) -{ - int i; - uint16_t temp = 0; - VReg *Vd = (VReg *)vd; - VReg *Vj = (VReg *)vj; - int oprsz = simd_oprsz(desc); - - for (i = 0; i < oprsz / 16; i++) { - temp = 0; - temp = do_vmskltz_w(Vj->D(2 * i)); - temp |= (do_vmskltz_w(Vj->D(2 * i + 1)) << 2); - Vd->D(2 * i) = temp; - Vd->D(2 * i + 1) = 0; - } -} - -static uint64_t do_vmskltz_d(int64_t val) -{ - return (uint64_t)val >> 63; -} -void HELPER(vmskltz_d)(void *vd, void *vj, uint32_t desc) -{ - int i; - uint16_t temp = 0; - VReg *Vd = (VReg *)vd; - VReg *Vj = (VReg *)vj; - int oprsz = simd_oprsz(desc); - - for (i = 0; i < oprsz / 16; i++) { - temp = 0; - temp = do_vmskltz_d(Vj->D(2 * i)); - temp |= (do_vmskltz_d(Vj->D(2 * i + 1)) << 1); - Vd->D(2 * i) = temp; - Vd->D(2 * i + 1) = 0; - } -} - -void HELPER(vmskgez_b)(void *vd, void *vj, uint32_t desc) -{ - int i; - uint16_t temp = 0; - VReg *Vd = (VReg *)vd; - VReg *Vj = (VReg *)vj; - int oprsz = simd_oprsz(desc); - - for (i = 0; i < oprsz / 16; i++) { - temp = 0; - temp = do_vmskltz_b(Vj->D(2 * i)); - temp |= (do_vmskltz_b(Vj->D(2 * i + 1)) << 8); - Vd->D(2 * i) = (uint16_t)(~temp); - Vd->D(2 * i + 1) = 0; - } -} - -static uint64_t do_vmskez_b(uint64_t a) -{ - uint64_t m = 0x7f7f7f7f7f7f7f7fULL; - uint64_t c = ~(((a & m) + m) | a | m); - c |= c << 7; - c |= c << 14; - c |= c << 28; - return c >> 56; -} - -void HELPER(vmsknz_b)(void *vd, void *vj, uint32_t desc) -{ - int i; - uint16_t temp = 0; - VReg *Vd = (VReg *)vd; - VReg *Vj = (VReg *)vj; - int oprsz = simd_oprsz(desc); - - for (i = 0; i < oprsz / 16; i++) { - temp = 0; - temp = do_vmskez_b(Vj->D(2 * i)); - temp |= (do_vmskez_b(Vj->D(2 * i + 1)) << 8); - Vd->D(2 * i) = (uint16_t)(~temp); - Vd->D(2 * i + 1) = 0; - } -} - -void HELPER(vnori_b)(void *vd, void *vj, uint64_t imm, uint32_t desc) -{ - int i; - VReg *Vd = (VReg *)vd; - VReg *Vj = (VReg *)vj; - - for (i = 0; i < simd_oprsz(desc); i++) { - Vd->B(i) = ~(Vj->B(i) | (uint8_t)imm); - } -} - -#define VSLLWIL(NAME, BIT, E1, E2) \ -void HELPER(NAME)(void *vd, void *vj, uint64_t imm, uint32_t desc) \ -{ \ - int i, j, ofs; \ - VReg temp = {}; \ - VReg *Vd = (VReg *)vd; \ - VReg *Vj = (VReg *)vj; \ - int oprsz = simd_oprsz(desc); \ - typedef __typeof(temp.E1(0)) TD; \ - \ - ofs = LSX_LEN / BIT; \ - for (i = 0; i < oprsz / 16; i++) { \ - for (j = 0; j < ofs; j++) { \ - temp.E1(j + ofs * i) = (TD)Vj->E2(j + ofs * 2 * i) << (imm % BIT); \ - } \ - } \ - *Vd = temp; \ -} - - -void HELPER(vextl_q_d)(void *vd, void *vj, uint32_t desc) -{ - int i; - VReg *Vd = (VReg *)vd; - VReg *Vj = (VReg *)vj; - int oprsz = simd_oprsz(desc); - - for (i = 0; i < oprsz / 16; i++) { - Vd->Q(i) = int128_makes64(Vj->D(2 * i)); - } -} - -void HELPER(vextl_qu_du)(void *vd, void *vj, uint32_t desc) -{ - int i; - VReg *Vd = (VReg *)vd; - VReg *Vj = (VReg *)vj; - int oprsz = simd_oprsz(desc); - - for (i = 0; i < oprsz / 16; i++) { - Vd->Q(i) = int128_make64(Vj->UD(2 * i)); - } -} - -VSLLWIL(vsllwil_h_b, 16, H, B) -VSLLWIL(vsllwil_w_h, 32, W, H) -VSLLWIL(vsllwil_d_w, 64, D, W) -VSLLWIL(vsllwil_hu_bu, 16, UH, UB) -VSLLWIL(vsllwil_wu_hu, 32, UW, UH) -VSLLWIL(vsllwil_du_wu, 64, UD, UW) - -#define do_vsrlr(E, T) \ -static T do_vsrlr_ ##E(T s1, int sh) \ -{ \ - if (sh == 0) { \ - return s1; \ - } else { \ - return (s1 >> sh) + ((s1 >> (sh - 1)) & 0x1); \ - } \ -} - -do_vsrlr(B, uint8_t) -do_vsrlr(H, uint16_t) -do_vsrlr(W, uint32_t) -do_vsrlr(D, uint64_t) - -#define VSRLR(NAME, BIT, T, E) \ -void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t desc) \ -{ \ - int i; \ - VReg *Vd = (VReg *)vd; \ - VReg *Vj = (VReg *)vj; \ - VReg *Vk = (VReg *)vk; \ - int oprsz = simd_oprsz(desc); \ - \ - for (i = 0; i < oprsz / (BIT / 8); i++) { \ - Vd->E(i) = do_vsrlr_ ## E(Vj->E(i), ((T)Vk->E(i))%BIT); \ - } \ -} - -VSRLR(vsrlr_b, 8, uint8_t, B) -VSRLR(vsrlr_h, 16, uint16_t, H) -VSRLR(vsrlr_w, 32, uint32_t, W) -VSRLR(vsrlr_d, 64, uint64_t, D) - -#define VSRLRI(NAME, BIT, E) \ -void HELPER(NAME)(void *vd, void *vj, uint64_t imm, uint32_t desc) \ -{ \ - int i; \ - VReg *Vd = (VReg *)vd; \ - VReg *Vj = (VReg *)vj; \ - int oprsz = simd_oprsz(desc); \ - \ - for (i = 0; i < oprsz / (BIT / 8); i++) { \ - Vd->E(i) = do_vsrlr_ ## E(Vj->E(i), imm); \ - } \ -} - -VSRLRI(vsrlri_b, 8, B) -VSRLRI(vsrlri_h, 16, H) -VSRLRI(vsrlri_w, 32, W) -VSRLRI(vsrlri_d, 64, D) - -#define do_vsrar(E, T) \ -static T do_vsrar_ ##E(T s1, int sh) \ -{ \ - if (sh == 0) { \ - return s1; \ - } else { \ - return (s1 >> sh) + ((s1 >> (sh - 1)) & 0x1); \ - } \ -} - -do_vsrar(B, int8_t) -do_vsrar(H, int16_t) -do_vsrar(W, int32_t) -do_vsrar(D, int64_t) - -#define VSRAR(NAME, BIT, T, E) \ -void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t desc) \ -{ \ - int i; \ - VReg *Vd = (VReg *)vd; \ - VReg *Vj = (VReg *)vj; \ - VReg *Vk = (VReg *)vk; \ - int oprsz = simd_oprsz(desc); \ - \ - for (i = 0; i < oprsz / (BIT / 8); i++) { \ - Vd->E(i) = do_vsrar_ ## E(Vj->E(i), ((T)Vk->E(i))%BIT); \ - } \ -} - -VSRAR(vsrar_b, 8, uint8_t, B) -VSRAR(vsrar_h, 16, uint16_t, H) -VSRAR(vsrar_w, 32, uint32_t, W) -VSRAR(vsrar_d, 64, uint64_t, D) - -#define VSRARI(NAME, BIT, E) \ -void HELPER(NAME)(void *vd, void *vj, uint64_t imm, uint32_t desc) \ -{ \ - int i; \ - VReg *Vd = (VReg *)vd; \ - VReg *Vj = (VReg *)vj; \ - int oprsz = simd_oprsz(desc); \ - \ - for (i = 0; i < oprsz / (BIT / 8); i++) { \ - Vd->E(i) = do_vsrar_ ## E(Vj->E(i), imm); \ - } \ -} - -VSRARI(vsrari_b, 8, B) -VSRARI(vsrari_h, 16, H) -VSRARI(vsrari_w, 32, W) -VSRARI(vsrari_d, 64, D) - -#define VSRLN(NAME, BIT, E1, E2) \ -void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t desc) \ -{ \ - int i, j, ofs; \ - VReg *Vd = (VReg *)vd; \ - VReg *Vj = (VReg *)vj; \ - VReg *Vk = (VReg *)vk; \ - int oprsz = simd_oprsz(desc); \ - \ - ofs = LSX_LEN / BIT; \ - for (i = 0; i < oprsz / 16; i++) { \ - for (j = 0; j < ofs; j++) { \ - Vd->E1(j + ofs * 2 * i) = R_SHIFT(Vj->E2(j + ofs * i), \ - Vk->E2(j + ofs * i) % BIT); \ - } \ - Vd->D(2 * i + 1) = 0; \ - } \ -} - -VSRLN(vsrln_b_h, 16, B, UH) -VSRLN(vsrln_h_w, 32, H, UW) -VSRLN(vsrln_w_d, 64, W, UD) - -#define VSRAN(NAME, BIT, E1, E2, E3) \ -void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t desc) \ -{ \ - int i, j, ofs; \ - VReg *Vd = (VReg *)vd; \ - VReg *Vj = (VReg *)vj; \ - VReg *Vk = (VReg *)vk; \ - int oprsz = simd_oprsz(desc); \ - \ - ofs = LSX_LEN / BIT; \ - for (i = 0; i < oprsz / 16; i++) { \ - for (j = 0; j < ofs; j++) { \ - Vd->E1(j + ofs * 2 * i) = R_SHIFT(Vj->E2(j + ofs * i), \ - Vk->E3(j + ofs * i) % BIT); \ - } \ - Vd->D(2 * i + 1) = 0; \ - } \ -} - -VSRAN(vsran_b_h, 16, B, H, UH) -VSRAN(vsran_h_w, 32, H, W, UW) -VSRAN(vsran_w_d, 64, W, D, UD) - -#define VSRLNI(NAME, BIT, E1, E2) \ -void HELPER(NAME)(void *vd, void *vj, uint64_t imm, uint32_t desc) \ -{ \ - int i, j, ofs; \ - VReg temp = {}; \ - VReg *Vd = (VReg *)vd; \ - VReg *Vj = (VReg *)vj; \ - int oprsz = simd_oprsz(desc); \ - \ - ofs = LSX_LEN / BIT; \ - for (i = 0; i < oprsz / 16; i++) { \ - for (j = 0; j < ofs; j++) { \ - temp.E1(j + ofs * 2 * i) = R_SHIFT(Vj->E2(j + ofs * i), imm); \ - temp.E1(j + ofs * (2 * i + 1)) = R_SHIFT(Vd->E2(j + ofs * i), \ - imm); \ - } \ - } \ - *Vd = temp; \ -} - -void HELPER(vsrlni_d_q)(void *vd, void *vj, uint64_t imm, uint32_t desc) -{ - int i; - VReg temp = {}; - VReg *Vd = (VReg *)vd; - VReg *Vj = (VReg *)vj; - - for (i = 0; i < 2; i++) { - temp.D(2 * i) = int128_getlo(int128_urshift(Vj->Q(i), imm % 128)); - temp.D(2 * i +1) = int128_getlo(int128_urshift(Vd->Q(i), imm % 128)); - } - *Vd = temp; -} - -VSRLNI(vsrlni_b_h, 16, B, UH) -VSRLNI(vsrlni_h_w, 32, H, UW) -VSRLNI(vsrlni_w_d, 64, W, UD) - -#define VSRANI(NAME, BIT, E1, E2) \ -void HELPER(NAME)(void *vd, void *vj, uint64_t imm, uint32_t desc) \ -{ \ - int i, j, ofs; \ - VReg temp = {}; \ - VReg *Vd = (VReg *)vd; \ - VReg *Vj = (VReg *)vj; \ - int oprsz = simd_oprsz(desc); \ - \ - ofs = LSX_LEN / BIT; \ - for (i = 0; i < oprsz / 16; i++) { \ - for (j = 0; j < ofs; j++) { \ - temp.E1(j + ofs * 2 * i) = R_SHIFT(Vj->E2(j + ofs * i), imm); \ - temp.E1(j + ofs * (2 * i + 1)) = R_SHIFT(Vd->E2(j + ofs * i), \ - imm); \ - } \ - } \ - *Vd = temp; \ -} - -void HELPER(vsrani_d_q)(void *vd, void *vj, uint64_t imm, uint32_t desc) -{ - int i; - VReg temp = {}; - VReg *Vd = (VReg *)vd; - VReg *Vj = (VReg *)vj; - - for (i = 0; i < 2; i++) { - temp.D(2 * i) = int128_getlo(int128_rshift(Vj->Q(i), imm % 128)); - temp.D(2 * i + 1) = int128_getlo(int128_rshift(Vd->Q(i), imm % 128)); - } - *Vd = temp; -} - -VSRANI(vsrani_b_h, 16, B, H) -VSRANI(vsrani_h_w, 32, H, W) -VSRANI(vsrani_w_d, 64, W, D) - -#define VSRLRN(NAME, BIT, E1, E2, E3) \ -void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t desc) \ -{ \ - int i, j, ofs; \ - VReg *Vd = (VReg *)vd; \ - VReg *Vj = (VReg *)vj; \ - VReg *Vk = (VReg *)vk; \ - int oprsz = simd_oprsz(desc); \ - \ - ofs = LSX_LEN / BIT; \ - for (i = 0; i < oprsz / 16; i++) { \ - for (j = 0; j < ofs; j++) { \ - Vd->E1(j + ofs * 2 * i) = do_vsrlr_ ##E2(Vj->E2(j + ofs * i), \ - Vk->E3(j + ofs * i) % BIT); \ - } \ - Vd->D(2 * i + 1) = 0; \ - } \ -} - -VSRLRN(vsrlrn_b_h, 16, B, H, UH) -VSRLRN(vsrlrn_h_w, 32, H, W, UW) -VSRLRN(vsrlrn_w_d, 64, W, D, UD) - -#define VSRARN(NAME, BIT, E1, E2, E3) \ -void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t desc) \ -{ \ - int i, j, ofs; \ - VReg *Vd = (VReg *)vd; \ - VReg *Vj = (VReg *)vj; \ - VReg *Vk = (VReg *)vk; \ - int oprsz = simd_oprsz(desc); \ - \ - ofs = LSX_LEN / BIT; \ - for (i = 0; i < oprsz / 16; i++) { \ - for (j = 0; j < ofs; j++) { \ - Vd->E1(j + ofs * 2 * i) = do_vsrar_ ## E2(Vj->E2(j + ofs * i), \ - Vk->E3(j + ofs * i) % BIT); \ - } \ - Vd->D(2 * i + 1) = 0; \ - } \ -} - -VSRARN(vsrarn_b_h, 16, B, H, UH) -VSRARN(vsrarn_h_w, 32, H, W, UW) -VSRARN(vsrarn_w_d, 64, W, D, UD) - -#define VSRLRNI(NAME, BIT, E1, E2) \ -void HELPER(NAME)(void *vd, void *vj, uint64_t imm, uint32_t desc) \ -{ \ - int i, j, ofs; \ - VReg temp = {}; \ - VReg *Vd = (VReg *)vd; \ - VReg *Vj = (VReg *)vj; \ - int oprsz = simd_oprsz(desc); \ - \ - ofs = LSX_LEN / BIT; \ - for (i = 0; i < oprsz / 16; i++) { \ - for (j = 0; j < ofs; j++) { \ - temp.E1(j + ofs * 2 * i) = do_vsrlr_ ## E2(Vj->E2(j + ofs * i), imm); \ - temp.E1(j + ofs * (2 * i + 1)) = do_vsrlr_ ## E2(Vd->E2(j + ofs * i), \ - imm); \ - } \ - } \ - *Vd = temp; \ -} - -void HELPER(vsrlrni_d_q)(void *vd, void *vj, uint64_t imm, uint32_t desc) -{ - int i; - VReg temp = {}; - VReg *Vd = (VReg *)vd; - VReg *Vj = (VReg *)vj; - Int128 r[4]; - int oprsz = simd_oprsz(desc); - - for (i = 0; i < oprsz / 16; i++) { - if (imm == 0) { - temp.D(2 * i) = int128_getlo(Vj->Q(i)); - temp.D(2 * i + 1) = int128_getlo(Vd->Q(i)); - } else { - r[2 * i] = int128_and(int128_urshift(Vj->Q(i), (imm - 1)), - int128_one()); - r[2 * i + 1] = int128_and(int128_urshift(Vd->Q(i), (imm - 1)), - int128_one()); - temp.D(2 * i) = int128_getlo(int128_add(int128_urshift(Vj->Q(i), - imm), r[2 * i])); - temp.D(2 * i + 1) = int128_getlo(int128_add(int128_urshift(Vd->Q(i), - imm), r[ 2 * i + 1])); - } - } - *Vd = temp; -} - -VSRLRNI(vsrlrni_b_h, 16, B, H) -VSRLRNI(vsrlrni_h_w, 32, H, W) -VSRLRNI(vsrlrni_w_d, 64, W, D) - -#define VSRARNI(NAME, BIT, E1, E2) \ -void HELPER(NAME)(void *vd, void *vj, uint64_t imm, uint32_t desc) \ -{ \ - int i, j, ofs; \ - VReg temp = {}; \ - VReg *Vd = (VReg *)vd; \ - VReg *Vj = (VReg *)vj; \ - int oprsz = simd_oprsz(desc); \ - \ - ofs = LSX_LEN / BIT; \ - for (i = 0; i < oprsz / 16; i++) { \ - for (j = 0; j < ofs; j++) { \ - temp.E1(j + ofs * 2 * i) = do_vsrar_ ## E2(Vj->E2(j + ofs * i), imm); \ - temp.E1(j + ofs * (2 * i + 1)) = do_vsrar_ ## E2(Vd->E2(j + ofs * i), \ - imm); \ - } \ - } \ - *Vd = temp; \ -} - -void HELPER(vsrarni_d_q)(void *vd, void *vj, uint64_t imm, uint32_t desc) -{ - int i; - VReg temp = {}; - VReg *Vd = (VReg *)vd; - VReg *Vj = (VReg *)vj; - Int128 r[4]; - int oprsz = simd_oprsz(desc); - - for (i = 0; i < oprsz / 16; i++) { - if (imm == 0) { - temp.D(2 * i) = int128_getlo(Vj->Q(i)); - temp.D(2 * i + 1) = int128_getlo(Vd->Q(i)); - } else { - r[2 * i] = int128_and(int128_rshift(Vj->Q(i), (imm - 1)), - int128_one()); - r[2 * i + 1] = int128_and(int128_rshift(Vd->Q(i), (imm - 1)), - int128_one()); - temp.D(2 * i) = int128_getlo(int128_add(int128_rshift(Vj->Q(i), - imm), r[2 * i])); - temp.D(2 * i + 1) = int128_getlo(int128_add(int128_rshift(Vd->Q(i), - imm), r[2 * i + 1])); - } - } - *Vd = temp; -} - -VSRARNI(vsrarni_b_h, 16, B, H) -VSRARNI(vsrarni_h_w, 32, H, W) -VSRARNI(vsrarni_w_d, 64, W, D) - -#define SSRLNS(NAME, T1, T2, T3) \ -static T1 do_ssrlns_ ## NAME(T2 e2, int sa, int sh) \ -{ \ - T1 shft_res; \ - if (sa == 0) { \ - shft_res = e2; \ - } else { \ - shft_res = (((T1)e2) >> sa); \ - } \ - T3 mask; \ - mask = (1ull << sh) -1; \ - if (shft_res > mask) { \ - return mask; \ - } else { \ - return shft_res; \ - } \ -} - -SSRLNS(B, uint16_t, int16_t, uint8_t) -SSRLNS(H, uint32_t, int32_t, uint16_t) -SSRLNS(W, uint64_t, int64_t, uint32_t) - -#define VSSRLN(NAME, BIT, E1, E2, E3) \ -void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t desc) \ -{ \ - int i, j, ofs; \ - VReg *Vd = (VReg *)vd; \ - VReg *Vj = (VReg *)vj; \ - VReg *Vk = (VReg *)vk; \ - int oprsz = simd_oprsz(desc); \ - \ - ofs = LSX_LEN / BIT; \ - for (i = 0; i < oprsz / 16; i++) { \ - for (j = 0; j < ofs; j++) { \ - Vd->E1(j + ofs * 2 * i) = do_ssrlns_ ## E1(Vj->E2(j + ofs * i), \ - Vk->E3(j + ofs * i) % BIT, \ - BIT / 2 - 1); \ - } \ - Vd->D(2 * i + 1) = 0; \ - } \ -} - -VSSRLN(vssrln_b_h, 16, B, H, UH) -VSSRLN(vssrln_h_w, 32, H, W, UW) -VSSRLN(vssrln_w_d, 64, W, D, UD) - -#define SSRANS(E, T1, T2) \ -static T1 do_ssrans_ ## E(T1 e2, int sa, int sh) \ -{ \ - T1 shft_res; \ - if (sa == 0) { \ - shft_res = e2; \ - } else { \ - shft_res = e2 >> sa; \ - } \ - T2 mask; \ - mask = (1ll << sh) - 1; \ - if (shft_res > mask) { \ - return mask; \ - } else if (shft_res < -(mask + 1)) { \ - return ~mask; \ - } else { \ - return shft_res; \ - } \ -} - -SSRANS(B, int16_t, int8_t) -SSRANS(H, int32_t, int16_t) -SSRANS(W, int64_t, int32_t) - -#define VSSRAN(NAME, BIT, E1, E2, E3) \ -void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t desc) \ -{ \ - int i, j, ofs; \ - VReg *Vd = (VReg *)vd; \ - VReg *Vj = (VReg *)vj; \ - VReg *Vk = (VReg *)vk; \ - int oprsz = simd_oprsz(desc); \ - \ - ofs = LSX_LEN / BIT; \ - for (i = 0; i < oprsz / 16; i++) { \ - for (j = 0; j < ofs; j++) { \ - Vd->E1(j + ofs * 2 * i) = do_ssrans_ ## E1(Vj->E2(j + ofs * i), \ - Vk->E3(j + ofs * i) % BIT, \ - BIT / 2 - 1); \ - } \ - Vd->D(2 * i + 1) = 0; \ - } \ -} - -VSSRAN(vssran_b_h, 16, B, H, UH) -VSSRAN(vssran_h_w, 32, H, W, UW) -VSSRAN(vssran_w_d, 64, W, D, UD) - -#define SSRLNU(E, T1, T2, T3) \ -static T1 do_ssrlnu_ ## E(T3 e2, int sa, int sh) \ -{ \ - T1 shft_res; \ - if (sa == 0) { \ - shft_res = e2; \ - } else { \ - shft_res = (((T1)e2) >> sa); \ - } \ - T2 mask; \ - mask = (1ull << sh) - 1; \ - if (shft_res > mask) { \ - return mask; \ - } else { \ - return shft_res; \ - } \ -} - -SSRLNU(B, uint16_t, uint8_t, int16_t) -SSRLNU(H, uint32_t, uint16_t, int32_t) -SSRLNU(W, uint64_t, uint32_t, int64_t) - -#define VSSRLNU(NAME, BIT, E1, E2, E3) \ -void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t desc) \ -{ \ - int i, j, ofs; \ - VReg *Vd = (VReg *)vd; \ - VReg *Vj = (VReg *)vj; \ - VReg *Vk = (VReg *)vk; \ - int oprsz = simd_oprsz(desc); \ - \ - ofs = LSX_LEN / BIT; \ - for (i = 0; i < oprsz / 16; i++) { \ - for (j = 0; j < ofs; j++) { \ - Vd->E1(j + ofs * 2 * i) = do_ssrlnu_ ## E1(Vj->E2(j + ofs * i), \ - Vk->E3(j + ofs * i) % BIT, \ - BIT / 2); \ - } \ - Vd->D(2 * i + 1) = 0; \ - } \ -} - -VSSRLNU(vssrln_bu_h, 16, B, H, UH) -VSSRLNU(vssrln_hu_w, 32, H, W, UW) -VSSRLNU(vssrln_wu_d, 64, W, D, UD) - -#define SSRANU(E, T1, T2, T3) \ -static T1 do_ssranu_ ## E(T3 e2, int sa, int sh) \ -{ \ - T1 shft_res; \ - if (sa == 0) { \ - shft_res = e2; \ - } else { \ - shft_res = e2 >> sa; \ - } \ - if (e2 < 0) { \ - shft_res = 0; \ - } \ - T2 mask; \ - mask = (1ull << sh) - 1; \ - if (shft_res > mask) { \ - return mask; \ - } else { \ - return shft_res; \ - } \ -} - -SSRANU(B, uint16_t, uint8_t, int16_t) -SSRANU(H, uint32_t, uint16_t, int32_t) -SSRANU(W, uint64_t, uint32_t, int64_t) - -#define VSSRANU(NAME, BIT, E1, E2, E3) \ -void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t desc) \ -{ \ - int i, j, ofs; \ - VReg *Vd = (VReg *)vd; \ - VReg *Vj = (VReg *)vj; \ - VReg *Vk = (VReg *)vk; \ - int oprsz = simd_oprsz(desc); \ - \ - ofs = LSX_LEN / BIT; \ - for (i = 0; i < oprsz / 16; i++) { \ - for (j = 0; j < ofs; j++) { \ - Vd->E1(j + ofs * 2 * i) = do_ssranu_ ## E1(Vj->E2(j + ofs * i), \ - Vk->E3(j + ofs * i) % BIT, \ - BIT / 2); \ - } \ - Vd->D(2 * i + 1) = 0; \ - } \ -} - -VSSRANU(vssran_bu_h, 16, B, H, UH) -VSSRANU(vssran_hu_w, 32, H, W, UW) -VSSRANU(vssran_wu_d, 64, W, D, UD) - -#define VSSRLNI(NAME, BIT, E1, E2) \ -void HELPER(NAME)(void *vd, void *vj, uint64_t imm, uint32_t desc) \ -{ \ - int i, j, ofs; \ - VReg temp = {}; \ - VReg *Vd = (VReg *)vd; \ - VReg *Vj = (VReg *)vj; \ - int oprsz = simd_oprsz(desc); \ - \ - ofs = LSX_LEN / BIT; \ - for (i = 0; i < oprsz / 16; i++) { \ - for (j = 0; j < ofs; j++) { \ - temp.E1(j + ofs * 2 * i) = do_ssrlns_ ## E1(Vj->E2(j + ofs * i), \ - imm, BIT / 2 - 1); \ - temp.E1(j + ofs * (2 * i + 1)) = do_ssrlns_ ## E1(Vd->E2(j + ofs * i), \ - imm, BIT / 2 - 1); \ - } \ - } \ - *Vd = temp; \ -} - -static void do_vssrlni_q(VReg *Vd, VReg *Vj, - uint64_t imm, int idx, Int128 mask) -{ - Int128 shft_res1, shft_res2; - - if (imm == 0) { - shft_res1 = Vj->Q(idx); - shft_res2 = Vd->Q(idx); - } else { - shft_res1 = int128_urshift(Vj->Q(idx), imm); - shft_res2 = int128_urshift(Vd->Q(idx), imm); - } - - if (int128_ult(mask, shft_res1)) { - Vd->D(idx * 2) = int128_getlo(mask); - }else { - Vd->D(idx * 2) = int128_getlo(shft_res1); - } - - if (int128_ult(mask, shft_res2)) { - Vd->D(idx * 2 + 1) = int128_getlo(mask); - }else { - Vd->D(idx * 2 + 1) = int128_getlo(shft_res2); - } -} - -void HELPER(vssrlni_d_q)(void *vd, void *vj, uint64_t imm, uint32_t desc) -{ - int i; - Int128 mask; - VReg *Vd = (VReg *)vd; - VReg *Vj = (VReg *)vj; - int oprsz = simd_oprsz(desc); - - mask = int128_sub(int128_lshift(int128_one(), 63), int128_one()); - - for (i = 0; i < oprsz / 16; i++) { - do_vssrlni_q(Vd, Vj, imm, i, mask); - } -} - -VSSRLNI(vssrlni_b_h, 16, B, H) -VSSRLNI(vssrlni_h_w, 32, H, W) -VSSRLNI(vssrlni_w_d, 64, W, D) - -#define VSSRANI(NAME, BIT, E1, E2) \ -void HELPER(NAME)(void *vd, void *vj, uint64_t imm, uint32_t desc) \ -{ \ - int i, j, ofs; \ - VReg temp = {}; \ - VReg *Vd = (VReg *)vd; \ - VReg *Vj = (VReg *)vj; \ - int oprsz = simd_oprsz(desc); \ - \ - ofs = LSX_LEN / BIT; \ - for (i = 0; i < oprsz / 16; i++) { \ - for (j = 0; j < ofs; j++) { \ - temp.E1(j + ofs * 2 * i) = do_ssrans_ ## E1(Vj->E2(j + ofs * i), \ - imm, BIT / 2 - 1); \ - temp.E1(j + ofs * (2 * i + 1)) = do_ssrans_ ## E1(Vd->E2(j + ofs * i), \ - imm, BIT / 2 - 1); \ - } \ - } \ - *Vd = temp; \ -} - -static void do_vssrani_d_q(VReg *Vd, VReg *Vj, - uint64_t imm, int idx, Int128 mask, Int128 min) -{ - Int128 shft_res1, shft_res2; - - if (imm == 0) { - shft_res1 = Vj->Q(idx); - shft_res2 = Vd->Q(idx); - } else { - shft_res1 = int128_rshift(Vj->Q(idx), imm); - shft_res2 = int128_rshift(Vd->Q(idx), imm); - } - - if (int128_gt(shft_res1, mask)) { - Vd->D(idx * 2) = int128_getlo(mask); - } else if (int128_lt(shft_res1, int128_neg(min))) { - Vd->D(idx * 2) = int128_getlo(min); - } else { - Vd->D(idx * 2) = int128_getlo(shft_res1); - } - - if (int128_gt(shft_res2, mask)) { - Vd->D(idx * 2 + 1) = int128_getlo(mask); - } else if (int128_lt(shft_res2, int128_neg(min))) { - Vd->D(idx * 2 + 1) = int128_getlo(min); - } else { - Vd->D(idx * 2 + 1) = int128_getlo(shft_res2); - } -} - -void HELPER(vssrani_d_q)(void *vd, void *vj, uint64_t imm, uint32_t desc) -{ - int i; - Int128 mask, min; - VReg *Vd = (VReg *)vd; - VReg *Vj = (VReg *)vj; - int oprsz = simd_oprsz(desc); - - mask = int128_sub(int128_lshift(int128_one(), 63), int128_one()); - min = int128_lshift(int128_one(), 63); - - for (i = 0; i < oprsz / 16; i++) { - do_vssrani_d_q(Vd, Vj, imm, i, mask, min); - } -} - - -VSSRANI(vssrani_b_h, 16, B, H) -VSSRANI(vssrani_h_w, 32, H, W) -VSSRANI(vssrani_w_d, 64, W, D) - -#define VSSRLNUI(NAME, BIT, E1, E2) \ -void HELPER(NAME)(void *vd, void *vj, uint64_t imm, uint32_t desc) \ -{ \ - int i, j, ofs; \ - VReg temp = {}; \ - VReg *Vd = (VReg *)vd; \ - VReg *Vj = (VReg *)vj; \ - int oprsz = simd_oprsz(desc); \ - \ - ofs = LSX_LEN / BIT; \ - for (i = 0; i < oprsz / 16; i++) { \ - for (j = 0; j < ofs; j++) { \ - temp.E1(j + ofs * 2 * i) = do_ssrlnu_ ## E1(Vj->E2(j + ofs * i), \ - imm, BIT / 2); \ - temp.E1(j + ofs * (2 * i + 1)) = do_ssrlnu_ ## E1(Vd->E2(j + ofs * i), \ - imm, BIT / 2); \ - } \ - } \ - *Vd = temp; \ -} - -void HELPER(vssrlni_du_q)(void *vd, void *vj, uint64_t imm, uint32_t desc) -{ - int i; - Int128 mask; - VReg *Vd = (VReg *)vd; - VReg *Vj = (VReg *)vj; - int oprsz = simd_oprsz(desc); - - mask = int128_sub(int128_lshift(int128_one(), 64), int128_one()); - - for (i = 0; i < oprsz / 16; i++) { - do_vssrlni_q(Vd, Vj, imm, i, mask); - } -} - -VSSRLNUI(vssrlni_bu_h, 16, B, H) -VSSRLNUI(vssrlni_hu_w, 32, H, W) -VSSRLNUI(vssrlni_wu_d, 64, W, D) - -#define VSSRANUI(NAME, BIT, E1, E2) \ -void HELPER(NAME)(void *vd, void *vj, uint64_t imm, uint32_t desc) \ -{ \ - int i, j, ofs; \ - VReg temp = {}; \ - VReg *Vd = (VReg *)vd; \ - VReg *Vj = (VReg *)vj; \ - int oprsz = simd_oprsz(desc); \ - \ - ofs = LSX_LEN / BIT; \ - for (i = 0; i < oprsz / 16; i++) { \ - for (j = 0; j < ofs; j++) { \ - temp.E1(j + ofs * 2 * i) = do_ssranu_ ## E1(Vj->E2(j + ofs * i), \ - imm, BIT / 2); \ - temp.E1(j + ofs * (2 * i + 1)) = do_ssranu_ ## E1(Vd->E2(j + ofs * i), \ - imm, BIT / 2); \ - } \ - } \ - *Vd = temp; \ -} - -static void do_vssrani_du_q(VReg *Vd, VReg *Vj, - uint64_t imm, int idx, Int128 mask) -{ - Int128 shft_res1, shft_res2; - - if (imm == 0) { - shft_res1 = Vj->Q(idx); - shft_res2 = Vd->Q(idx); - } else { - shft_res1 = int128_rshift(Vj->Q(idx), imm); - shft_res2 = int128_rshift(Vd->Q(idx), imm); - } - - if (int128_lt(Vj->Q(idx), int128_zero())) { - shft_res1 = int128_zero(); - } - - if (int128_lt(Vd->Q(idx), int128_zero())) { - shft_res2 = int128_zero(); - } - if (int128_ult(mask, shft_res1)) { - Vd->D(idx * 2) = int128_getlo(mask); - }else { - Vd->D(idx * 2) = int128_getlo(shft_res1); - } - - if (int128_ult(mask, shft_res2)) { - Vd->D(idx * 2 + 1) = int128_getlo(mask); - }else { - Vd->D(idx * 2 + 1) = int128_getlo(shft_res2); - } - -} - -void HELPER(vssrani_du_q)(void *vd, void *vj, uint64_t imm, uint32_t desc) -{ - int i; - Int128 mask; - VReg *Vd = (VReg *)vd; - VReg *Vj = (VReg *)vj; - int oprsz = simd_oprsz(desc); - - mask = int128_sub(int128_lshift(int128_one(), 64), int128_one()); - - for (i = 0; i < oprsz / 16; i++) { - do_vssrani_du_q(Vd, Vj, imm, i, mask); - } -} - -VSSRANUI(vssrani_bu_h, 16, B, H) -VSSRANUI(vssrani_hu_w, 32, H, W) -VSSRANUI(vssrani_wu_d, 64, W, D) - -#define SSRLRNS(E1, E2, T1, T2, T3) \ -static T1 do_ssrlrns_ ## E1(T2 e2, int sa, int sh) \ -{ \ - T1 shft_res; \ - \ - shft_res = do_vsrlr_ ## E2(e2, sa); \ - T1 mask; \ - mask = (1ull << sh) - 1; \ - if (shft_res > mask) { \ - return mask; \ - } else { \ - return shft_res; \ - } \ -} - -SSRLRNS(B, H, uint16_t, int16_t, uint8_t) -SSRLRNS(H, W, uint32_t, int32_t, uint16_t) -SSRLRNS(W, D, uint64_t, int64_t, uint32_t) - -#define VSSRLRN(NAME, BIT, E1, E2, E3) \ -void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t desc) \ -{ \ - int i, j, ofs; \ - VReg *Vd = (VReg *)vd; \ - VReg *Vj = (VReg *)vj; \ - VReg *Vk = (VReg *)vk; \ - int oprsz = simd_oprsz(desc); \ - \ - ofs = LSX_LEN / BIT; \ - for (i = 0; i < oprsz / 16; i++) { \ - for (j = 0; j < ofs; j++) { \ - Vd->E1(j + ofs * 2 * i) = do_ssrlrns_ ## E1(Vj->E2(j + ofs * i), \ - Vk->E3(j + ofs * i) % BIT, \ - BIT / 2 - 1); \ - } \ - Vd->D(2 * i + 1) = 0; \ - } \ -} - -VSSRLRN(vssrlrn_b_h, 16, B, H, UH) -VSSRLRN(vssrlrn_h_w, 32, H, W, UW) -VSSRLRN(vssrlrn_w_d, 64, W, D, UD) - -#define SSRARNS(E1, E2, T1, T2) \ -static T1 do_ssrarns_ ## E1(T1 e2, int sa, int sh) \ -{ \ - T1 shft_res; \ - \ - shft_res = do_vsrar_ ## E2(e2, sa); \ - T2 mask; \ - mask = (1ll << sh) - 1; \ - if (shft_res > mask) { \ - return mask; \ - } else if (shft_res < -(mask +1)) { \ - return ~mask; \ - } else { \ - return shft_res; \ - } \ -} - -SSRARNS(B, H, int16_t, int8_t) -SSRARNS(H, W, int32_t, int16_t) -SSRARNS(W, D, int64_t, int32_t) - -#define VSSRARN(NAME, BIT, E1, E2, E3) \ -void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t desc) \ -{ \ - int i, j, ofs; \ - VReg *Vd = (VReg *)vd; \ - VReg *Vj = (VReg *)vj; \ - VReg *Vk = (VReg *)vk; \ - int oprsz = simd_oprsz(desc); \ - \ - ofs = LSX_LEN / BIT; \ - for (i = 0; i < oprsz / 16; i++) { \ - for (j = 0; j < ofs; j++) { \ - Vd->E1(j + ofs * 2 * i) = do_ssrarns_ ## E1(Vj->E2(j + ofs * i), \ - Vk->E3(j + ofs * i) % BIT, \ - BIT/ 2 - 1); \ - } \ - Vd->D(2 * i + 1) = 0; \ - } \ -} - -VSSRARN(vssrarn_b_h, 16, B, H, UH) -VSSRARN(vssrarn_h_w, 32, H, W, UW) -VSSRARN(vssrarn_w_d, 64, W, D, UD) - -#define SSRLRNU(E1, E2, T1, T2, T3) \ -static T1 do_ssrlrnu_ ## E1(T3 e2, int sa, int sh) \ -{ \ - T1 shft_res; \ - \ - shft_res = do_vsrlr_ ## E2(e2, sa); \ - \ - T2 mask; \ - mask = (1ull << sh) - 1; \ - if (shft_res > mask) { \ - return mask; \ - } else { \ - return shft_res; \ - } \ -} - -SSRLRNU(B, H, uint16_t, uint8_t, int16_t) -SSRLRNU(H, W, uint32_t, uint16_t, int32_t) -SSRLRNU(W, D, uint64_t, uint32_t, int64_t) - -#define VSSRLRNU(NAME, BIT, E1, E2, E3) \ -void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t desc) \ -{ \ - int i, j, ofs; \ - VReg *Vd = (VReg *)vd; \ - VReg *Vj = (VReg *)vj; \ - VReg *Vk = (VReg *)vk; \ - int oprsz = simd_oprsz(desc); \ - \ - ofs = LSX_LEN / BIT; \ - for (i = 0; i < oprsz / 16; i++) { \ - for (j = 0; j < ofs; j++) { \ - Vd->E1(j + ofs * 2 * i) = do_ssrlrnu_ ## E1(Vj->E2(j + ofs * i), \ - Vk->E3(j + ofs * i) % BIT, \ - BIT / 2); \ - } \ - Vd->D(2 * i + 1) = 0; \ - } \ -} - -VSSRLRNU(vssrlrn_bu_h, 16, B, H, UH) -VSSRLRNU(vssrlrn_hu_w, 32, H, W, UW) -VSSRLRNU(vssrlrn_wu_d, 64, W, D, UD) - -#define SSRARNU(E1, E2, T1, T2, T3) \ -static T1 do_ssrarnu_ ## E1(T3 e2, int sa, int sh) \ -{ \ - T1 shft_res; \ - \ - if (e2 < 0) { \ - shft_res = 0; \ - } else { \ - shft_res = do_vsrar_ ## E2(e2, sa); \ - } \ - T2 mask; \ - mask = (1ull << sh) - 1; \ - if (shft_res > mask) { \ - return mask; \ - } else { \ - return shft_res; \ - } \ -} - -SSRARNU(B, H, uint16_t, uint8_t, int16_t) -SSRARNU(H, W, uint32_t, uint16_t, int32_t) -SSRARNU(W, D, uint64_t, uint32_t, int64_t) - -#define VSSRARNU(NAME, BIT, E1, E2, E3) \ -void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t desc) \ -{ \ - int i, j, ofs; \ - VReg *Vd = (VReg *)vd; \ - VReg *Vj = (VReg *)vj; \ - VReg *Vk = (VReg *)vk; \ - int oprsz = simd_oprsz(desc); \ - \ - ofs = LSX_LEN / BIT; \ - for (i = 0; i < oprsz / 16; i++) { \ - for (j = 0; j < ofs; j++) { \ - Vd->E1(j + ofs * 2 * i) = do_ssrarnu_ ## E1(Vj->E2(j + ofs * i), \ - Vk->E3(j + ofs * i) % BIT, \ - BIT / 2); \ - } \ - Vd->D(2 * i + 1) = 0; \ - } \ -} - -VSSRARNU(vssrarn_bu_h, 16, B, H, UH) -VSSRARNU(vssrarn_hu_w, 32, H, W, UW) -VSSRARNU(vssrarn_wu_d, 64, W, D, UD) - -#define VSSRLRNI(NAME, BIT, E1, E2) \ -void HELPER(NAME)(void *vd, void *vj, uint64_t imm, uint32_t desc) \ -{ \ - int i, j, ofs; \ - VReg temp = {}; \ - VReg *Vd = (VReg *)vd; \ - VReg *Vj = (VReg *)vj; \ - int oprsz = simd_oprsz(desc); \ - \ - ofs = LSX_LEN / BIT; \ - for (i = 0; i < oprsz / 16; i++) { \ - for (j = 0; j < ofs; j++) { \ - temp.E1(j + ofs * 2 * i) = do_ssrlrns_ ## E1(Vj->E2(j + ofs * i), \ - imm, BIT / 2 - 1); \ - temp.E1(j + ofs * (2 * i + 1)) = do_ssrlrns_ ## E1(Vd->E2(j + ofs * i), \ - imm, BIT / 2 - 1); \ - } \ - } \ - *Vd = temp; \ -} - -static void do_vssrlrni_q(VReg *Vd, VReg * Vj, - uint64_t imm, int idx, Int128 mask) -{ - Int128 shft_res1, shft_res2, r1, r2; - if (imm == 0) { - shft_res1 = Vj->Q(idx); - shft_res2 = Vd->Q(idx); - } else { - r1 = int128_and(int128_urshift(Vj->Q(idx), (imm - 1)), int128_one()); - r2 = int128_and(int128_urshift(Vd->Q(idx), (imm - 1)), int128_one()); - shft_res1 = (int128_add(int128_urshift(Vj->Q(idx), imm), r1)); - shft_res2 = (int128_add(int128_urshift(Vd->Q(idx), imm), r2)); - } - - if (int128_ult(mask, shft_res1)) { - Vd->D(idx * 2) = int128_getlo(mask); - }else { - Vd->D(idx * 2) = int128_getlo(shft_res1); - } - - if (int128_ult(mask, shft_res2)) { - Vd->D(idx * 2 + 1) = int128_getlo(mask); - }else { - Vd->D(idx * 2 + 1) = int128_getlo(shft_res2); - } -} - -void HELPER(vssrlrni_d_q)(void *vd, void *vj, uint64_t imm, uint32_t desc) -{ - int i; - Int128 mask; - VReg *Vd = (VReg *)vd; - VReg *Vj = (VReg *)vj; - int oprsz = simd_oprsz(desc); - - mask = int128_sub(int128_lshift(int128_one(), 63), int128_one()); - - for (i = 0; i < oprsz / 16; i++) { - do_vssrlrni_q(Vd, Vj, imm, i, mask); - } -} - -VSSRLRNI(vssrlrni_b_h, 16, B, H) -VSSRLRNI(vssrlrni_h_w, 32, H, W) -VSSRLRNI(vssrlrni_w_d, 64, W, D) - -#define VSSRARNI(NAME, BIT, E1, E2) \ -void HELPER(NAME)(void *vd, void *vj, uint64_t imm, uint32_t desc) \ -{ \ - int i, j, ofs; \ - VReg temp = {}; \ - VReg *Vd = (VReg *)vd; \ - VReg *Vj = (VReg *)vj; \ - int oprsz = simd_oprsz(desc); \ - \ - ofs = LSX_LEN / BIT; \ - for (i = 0; i < oprsz / 16; i++) { \ - for (j = 0; j < ofs; j++) { \ - temp.E1(j + ofs * 2 * i) = do_ssrarns_ ## E1(Vj->E2(j + ofs * i), \ - imm, BIT / 2 - 1); \ - temp.E1(j + ofs * (2 * i + 1)) = do_ssrarns_ ## E1(Vd->E2(j + ofs * i), \ - imm, BIT / 2 - 1); \ - } \ - } \ - *Vd = temp; \ -} - -static void do_vssrarni_d_q(VReg *Vd, VReg *Vj, - uint64_t imm, int idx, Int128 mask1, Int128 mask2) -{ - Int128 shft_res1, shft_res2, r1, r2; - - if (imm == 0) { - shft_res1 = Vj->Q(idx); - shft_res2 = Vd->Q(idx); - } else { - r1 = int128_and(int128_rshift(Vj->Q(idx), (imm - 1)), int128_one()); - r2 = int128_and(int128_rshift(Vd->Q(idx), (imm - 1)), int128_one()); - shft_res1 = int128_add(int128_rshift(Vj->Q(idx), imm), r1); - shft_res2 = int128_add(int128_rshift(Vd->Q(idx), imm), r2); - } - if (int128_gt(shft_res1, mask1)) { - Vd->D(idx * 2) = int128_getlo(mask1); - } else if (int128_lt(shft_res1, int128_neg(mask2))) { - Vd->D(idx * 2) = int128_getlo(mask2); - } else { - Vd->D(idx * 2) = int128_getlo(shft_res1); - } - - if (int128_gt(shft_res2, mask1)) { - Vd->D(idx * 2 + 1) = int128_getlo(mask1); - } else if (int128_lt(shft_res2, int128_neg(mask2))) { - Vd->D(idx * 2 + 1) = int128_getlo(mask2); - } else { - Vd->D(idx * 2 + 1) = int128_getlo(shft_res2); - } -} - -void HELPER(vssrarni_d_q)(void *vd, void *vj, uint64_t imm, uint32_t desc) -{ - int i; - Int128 mask1, mask2; - VReg *Vd = (VReg *)vd; - VReg *Vj = (VReg *)vj; - int oprsz = simd_oprsz(desc); - - mask1 = int128_sub(int128_lshift(int128_one(), 63), int128_one()); - mask2 = int128_lshift(int128_one(), 63); - - for (i = 0; i < oprsz / 16; i++) { - do_vssrarni_d_q(Vd, Vj, imm, i, mask1, mask2); - } -} - -VSSRARNI(vssrarni_b_h, 16, B, H) -VSSRARNI(vssrarni_h_w, 32, H, W) -VSSRARNI(vssrarni_w_d, 64, W, D) - -#define VSSRLRNUI(NAME, BIT, E1, E2) \ -void HELPER(NAME)(void *vd, void *vj, uint64_t imm, uint32_t desc) \ -{ \ - int i, j, ofs; \ - VReg temp = {}; \ - VReg *Vd = (VReg *)vd; \ - VReg *Vj = (VReg *)vj; \ - int oprsz = simd_oprsz(desc); \ - \ - ofs = LSX_LEN / BIT; \ - for (i = 0; i < oprsz / 16; i++) { \ - for (j = 0; j < ofs; j++) { \ - temp.E1(j + ofs * 2 * i) = do_ssrlrnu_ ## E1(Vj->E2(j + ofs * i), \ - imm, BIT / 2); \ - temp.E1(j + ofs * (2 * i + 1)) = do_ssrlrnu_ ## E1(Vd->E2(j + ofs * i), \ - imm, BIT / 2); \ - } \ - } \ - *Vd = temp; \ -} - -void HELPER(vssrlrni_du_q)(void *vd, void *vj, uint64_t imm, uint32_t desc) -{ - int i; - Int128 mask; - VReg *Vd = (VReg *)vd; - VReg *Vj = (VReg *)vj; - int oprsz = simd_oprsz(desc); - - mask = int128_sub(int128_lshift(int128_one(), 64), int128_one()); - - for (i = 0; i < oprsz / 16; i++) { - do_vssrlrni_q(Vd, Vj, imm, i, mask); - } -} - -VSSRLRNUI(vssrlrni_bu_h, 16, B, H) -VSSRLRNUI(vssrlrni_hu_w, 32, H, W) -VSSRLRNUI(vssrlrni_wu_d, 64, W, D) - -#define VSSRARNUI(NAME, BIT, E1, E2) \ -void HELPER(NAME)(void *vd, void *vj, uint64_t imm, uint32_t desc) \ -{ \ - int i, j, ofs; \ - VReg temp = {}; \ - VReg *Vd = (VReg *)vd; \ - VReg *Vj = (VReg *)vj; \ - int oprsz = simd_oprsz(desc); \ - \ - ofs = LSX_LEN / BIT; \ - for (i = 0; i < oprsz / 16; i++) { \ - for (j = 0; j < ofs; j++) { \ - temp.E1(j + ofs * 2 * i) = do_ssrarnu_ ## E1(Vj->E2(j + ofs * i), \ - imm, BIT / 2); \ - temp.E1(j + ofs * (2 * i + 1)) = do_ssrarnu_ ## E1(Vd->E2(j + ofs * i), \ - imm, BIT / 2); \ - } \ - } \ - *Vd = temp; \ -} - -static void do_vssrarni_du_q(VReg *Vd, VReg *Vj, - uint64_t imm, int idx, Int128 mask1, Int128 mask2) -{ - Int128 shft_res1, shft_res2, r1, r2; - - if (imm == 0) { - shft_res1 = Vj->Q(idx); - shft_res2 = Vd->Q(idx); - } else { - r1 = int128_and(int128_rshift(Vj->Q(idx), (imm - 1)), int128_one()); - r2 = int128_and(int128_rshift(Vd->Q(idx), (imm - 1)), int128_one()); - shft_res1 = int128_add(int128_rshift(Vj->Q(idx), imm), r1); - shft_res2 = int128_add(int128_rshift(Vd->Q(idx), imm), r2); - } - - if (int128_lt(Vj->Q(idx), int128_zero())) { - shft_res1 = int128_zero(); - } - if (int128_lt(Vd->Q(idx), int128_zero())) { - shft_res2 = int128_zero(); - } - - if (int128_gt(shft_res1, mask1)) { - Vd->D(idx * 2) = int128_getlo(mask1); - } else if (int128_lt(shft_res1, int128_neg(mask2))) { - Vd->D(idx * 2) = int128_getlo(mask2); - } else { - Vd->D(idx * 2) = int128_getlo(shft_res1); - } - - if (int128_gt(shft_res2, mask1)) { - Vd->D(idx * 2 + 1) = int128_getlo(mask1); - } else if (int128_lt(shft_res2, int128_neg(mask2))) { - Vd->D(idx * 2 + 1) = int128_getlo(mask2); - } else { - Vd->D(idx * 2 + 1) = int128_getlo(shft_res2); - } -} - -void HELPER(vssrarni_du_q)(void *vd, void *vj, uint64_t imm, uint32_t desc) -{ - int i; - Int128 mask1, mask2; - VReg *Vd = (VReg *)vd; - VReg *Vj = (VReg *)vj; - int oprsz = simd_oprsz(desc); - - mask1 = int128_sub(int128_lshift(int128_one(), 64), int128_one()); - mask2 = int128_lshift(int128_one(), 64); - - for (i = 0; i < oprsz / 16; i++) { - do_vssrarni_du_q(Vd, Vj, imm, i, mask1, mask2); - } -} - -VSSRARNUI(vssrarni_bu_h, 16, B, H) -VSSRARNUI(vssrarni_hu_w, 32, H, W) -VSSRARNUI(vssrarni_wu_d, 64, W, D) - -#define DO_2OP(NAME, BIT, E, DO_OP) \ -void HELPER(NAME)(void *vd, void *vj, uint32_t desc) \ -{ \ - int i; \ - VReg *Vd = (VReg *)vd; \ - VReg *Vj = (VReg *)vj; \ - int oprsz = simd_oprsz(desc); \ - \ - for (i = 0; i < oprsz / (BIT / 8); i++) \ - { \ - Vd->E(i) = DO_OP(Vj->E(i)); \ - } \ -} - -DO_2OP(vclo_b, 8, UB, DO_CLO_B) -DO_2OP(vclo_h, 16, UH, DO_CLO_H) -DO_2OP(vclo_w, 32, UW, DO_CLO_W) -DO_2OP(vclo_d, 64, UD, DO_CLO_D) -DO_2OP(vclz_b, 8, UB, DO_CLZ_B) -DO_2OP(vclz_h, 16, UH, DO_CLZ_H) -DO_2OP(vclz_w, 32, UW, DO_CLZ_W) -DO_2OP(vclz_d, 64, UD, DO_CLZ_D) - -#define VPCNT(NAME, BIT, E, FN) \ -void HELPER(NAME)(void *vd, void *vj, uint32_t desc) \ -{ \ - int i; \ - VReg *Vd = (VReg *)vd; \ - VReg *Vj = (VReg *)vj; \ - int oprsz = simd_oprsz(desc); \ - \ - for (i = 0; i < oprsz / (BIT / 8); i++) \ - { \ - Vd->E(i) = FN(Vj->E(i)); \ - } \ -} - -VPCNT(vpcnt_b, 8, UB, ctpop8) -VPCNT(vpcnt_h, 16, UH, ctpop16) -VPCNT(vpcnt_w, 32, UW, ctpop32) -VPCNT(vpcnt_d, 64, UD, ctpop64) - -#define DO_BIT(NAME, BIT, E, DO_OP) \ -void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t desc) \ -{ \ - int i; \ - VReg *Vd = (VReg *)vd; \ - VReg *Vj = (VReg *)vj; \ - VReg *Vk = (VReg *)vk; \ - int oprsz = simd_oprsz(desc); \ - \ - for (i = 0; i < oprsz / (BIT / 8); i++) { \ - Vd->E(i) = DO_OP(Vj->E(i), Vk->E(i)%BIT); \ - } \ -} - -DO_BIT(vbitclr_b, 8, UB, DO_BITCLR) -DO_BIT(vbitclr_h, 16, UH, DO_BITCLR) -DO_BIT(vbitclr_w, 32, UW, DO_BITCLR) -DO_BIT(vbitclr_d, 64, UD, DO_BITCLR) -DO_BIT(vbitset_b, 8, UB, DO_BITSET) -DO_BIT(vbitset_h, 16, UH, DO_BITSET) -DO_BIT(vbitset_w, 32, UW, DO_BITSET) -DO_BIT(vbitset_d, 64, UD, DO_BITSET) -DO_BIT(vbitrev_b, 8, UB, DO_BITREV) -DO_BIT(vbitrev_h, 16, UH, DO_BITREV) -DO_BIT(vbitrev_w, 32, UW, DO_BITREV) -DO_BIT(vbitrev_d, 64, UD, DO_BITREV) - -#define DO_BITI(NAME, BIT, E, DO_OP) \ -void HELPER(NAME)(void *vd, void *vj, uint64_t imm, uint32_t desc) \ -{ \ - int i; \ - VReg *Vd = (VReg *)vd; \ - VReg *Vj = (VReg *)vj; \ - int oprsz = simd_oprsz(desc); \ - \ - for (i = 0; i < oprsz / (BIT / 8); i++) { \ - Vd->E(i) = DO_OP(Vj->E(i), imm); \ - } \ -} - -DO_BITI(vbitclri_b, 8, UB, DO_BITCLR) -DO_BITI(vbitclri_h, 16, UH, DO_BITCLR) -DO_BITI(vbitclri_w, 32, UW, DO_BITCLR) -DO_BITI(vbitclri_d, 64, UD, DO_BITCLR) -DO_BITI(vbitseti_b, 8, UB, DO_BITSET) -DO_BITI(vbitseti_h, 16, UH, DO_BITSET) -DO_BITI(vbitseti_w, 32, UW, DO_BITSET) -DO_BITI(vbitseti_d, 64, UD, DO_BITSET) -DO_BITI(vbitrevi_b, 8, UB, DO_BITREV) -DO_BITI(vbitrevi_h, 16, UH, DO_BITREV) -DO_BITI(vbitrevi_w, 32, UW, DO_BITREV) -DO_BITI(vbitrevi_d, 64, UD, DO_BITREV) - -#define VFRSTP(NAME, BIT, MASK, E) \ -void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t desc) \ -{ \ - int i, j, m, ofs; \ - VReg *Vd = (VReg *)vd; \ - VReg *Vj = (VReg *)vj; \ - VReg *Vk = (VReg *)vk; \ - int oprsz = simd_oprsz(desc); \ - \ - ofs = LSX_LEN / BIT; \ - for (i = 0; i < oprsz / 16; i++) { \ - m = Vk->E(i * ofs) & MASK; \ - for (j = 0; j < ofs; j++) { \ - if (Vj->E(j + ofs * i) < 0) { \ - break; \ - } \ - } \ - Vd->E(m + i * ofs) = j; \ - } \ -} - -VFRSTP(vfrstp_b, 8, 0xf, B) -VFRSTP(vfrstp_h, 16, 0x7, H) - -#define VFRSTPI(NAME, BIT, E) \ -void HELPER(NAME)(void *vd, void *vj, uint64_t imm, uint32_t desc) \ -{ \ - int i, j, m, ofs; \ - VReg *Vd = (VReg *)vd; \ - VReg *Vj = (VReg *)vj; \ - int oprsz = simd_oprsz(desc); \ - \ - ofs = LSX_LEN / BIT; \ - m = imm % ofs; \ - for (i = 0; i < oprsz / 16; i++) { \ - for (j = 0; j < ofs; j++) { \ - if (Vj->E(j + ofs * i) < 0) { \ - break; \ - } \ - } \ - Vd->E(m + i * ofs) = j; \ - } \ -} - -VFRSTPI(vfrstpi_b, 8, B) -VFRSTPI(vfrstpi_h, 16, H) - -static void vec_update_fcsr0_mask(CPULoongArchState *env, - uintptr_t pc, int mask) -{ - int flags = get_float_exception_flags(&env->fp_status); - - set_float_exception_flags(0, &env->fp_status); - - flags &= ~mask; - - if (flags) { - flags = ieee_ex_to_loongarch(flags); - UPDATE_FP_CAUSE(env->fcsr0, flags); - } - - if (GET_FP_ENABLES(env->fcsr0) & flags) { - do_raise_exception(env, EXCCODE_FPE, pc); - } else { - UPDATE_FP_FLAGS(env->fcsr0, flags); - } -} - -static void vec_update_fcsr0(CPULoongArchState *env, uintptr_t pc) -{ - vec_update_fcsr0_mask(env, pc, 0); -} - -static inline void vec_clear_cause(CPULoongArchState *env) -{ - SET_FP_CAUSE(env->fcsr0, 0); -} - -#define DO_3OP_F(NAME, BIT, E, FN) \ -void HELPER(NAME)(void *vd, void *vj, void *vk, \ - CPULoongArchState *env, uint32_t desc) \ -{ \ - int i; \ - VReg *Vd = (VReg *)vd; \ - VReg *Vj = (VReg *)vj; \ - VReg *Vk = (VReg *)vk; \ - int oprsz = simd_oprsz(desc); \ - \ - vec_clear_cause(env); \ - for (i = 0; i < oprsz / (BIT / 8); i++) { \ - Vd->E(i) = FN(Vj->E(i), Vk->E(i), &env->fp_status); \ - vec_update_fcsr0(env, GETPC()); \ - } \ -} - -DO_3OP_F(vfadd_s, 32, UW, float32_add) -DO_3OP_F(vfadd_d, 64, UD, float64_add) -DO_3OP_F(vfsub_s, 32, UW, float32_sub) -DO_3OP_F(vfsub_d, 64, UD, float64_sub) -DO_3OP_F(vfmul_s, 32, UW, float32_mul) -DO_3OP_F(vfmul_d, 64, UD, float64_mul) -DO_3OP_F(vfdiv_s, 32, UW, float32_div) -DO_3OP_F(vfdiv_d, 64, UD, float64_div) -DO_3OP_F(vfmax_s, 32, UW, float32_maxnum) -DO_3OP_F(vfmax_d, 64, UD, float64_maxnum) -DO_3OP_F(vfmin_s, 32, UW, float32_minnum) -DO_3OP_F(vfmin_d, 64, UD, float64_minnum) -DO_3OP_F(vfmaxa_s, 32, UW, float32_maxnummag) -DO_3OP_F(vfmaxa_d, 64, UD, float64_maxnummag) -DO_3OP_F(vfmina_s, 32, UW, float32_minnummag) -DO_3OP_F(vfmina_d, 64, UD, float64_minnummag) - -#define DO_4OP_F(NAME, BIT, E, FN, flags) \ -void HELPER(NAME)(void *vd, void *vj, void *vk, void *va, \ - CPULoongArchState *env, uint32_t desc) \ -{ \ - int i; \ - VReg *Vd = (VReg *)vd; \ - VReg *Vj = (VReg *)vj; \ - VReg *Vk = (VReg *)vk; \ - VReg *Va = (VReg *)va; \ - int oprsz = simd_oprsz(desc); \ - \ - vec_clear_cause(env); \ - for (i = 0; i < oprsz / (BIT / 8); i++) { \ - Vd->E(i) = FN(Vj->E(i), Vk->E(i), Va->E(i), flags, &env->fp_status); \ - vec_update_fcsr0(env, GETPC()); \ - } \ -} - -DO_4OP_F(vfmadd_s, 32, UW, float32_muladd, 0) -DO_4OP_F(vfmadd_d, 64, UD, float64_muladd, 0) -DO_4OP_F(vfmsub_s, 32, UW, float32_muladd, float_muladd_negate_c) -DO_4OP_F(vfmsub_d, 64, UD, float64_muladd, float_muladd_negate_c) -DO_4OP_F(vfnmadd_s, 32, UW, float32_muladd, float_muladd_negate_result) -DO_4OP_F(vfnmadd_d, 64, UD, float64_muladd, float_muladd_negate_result) -DO_4OP_F(vfnmsub_s, 32, UW, float32_muladd, - float_muladd_negate_c | float_muladd_negate_result) -DO_4OP_F(vfnmsub_d, 64, UD, float64_muladd, - float_muladd_negate_c | float_muladd_negate_result) - -#define DO_2OP_F(NAME, BIT, E, FN) \ -void HELPER(NAME)(void *vd, void *vj, \ - CPULoongArchState *env, uint32_t desc) \ -{ \ - int i; \ - VReg *Vd = (VReg *)vd; \ - VReg *Vj = (VReg *)vj; \ - int oprsz = simd_oprsz(desc); \ - \ - vec_clear_cause(env); \ - for (i = 0; i < oprsz / (BIT / 8); i++) { \ - Vd->E(i) = FN(env, Vj->E(i)); \ - } \ -} - -#define FLOGB(BIT, T) \ -static T do_flogb_## BIT(CPULoongArchState *env, T fj) \ -{ \ - T fp, fd; \ - float_status *status = &env->fp_status; \ - FloatRoundMode old_mode = get_float_rounding_mode(status); \ - \ - set_float_rounding_mode(float_round_down, status); \ - fp = float ## BIT ##_log2(fj, status); \ - fd = float ## BIT ##_round_to_int(fp, status); \ - set_float_rounding_mode(old_mode, status); \ - vec_update_fcsr0_mask(env, GETPC(), float_flag_inexact); \ - return fd; \ -} - -FLOGB(32, uint32_t) -FLOGB(64, uint64_t) - -#define FCLASS(NAME, BIT, E, FN) \ -void HELPER(NAME)(void *vd, void *vj, \ - CPULoongArchState *env, uint32_t desc) \ -{ \ - int i; \ - VReg *Vd = (VReg *)vd; \ - VReg *Vj = (VReg *)vj; \ - int oprsz = simd_oprsz(desc); \ - \ - for (i = 0; i < oprsz / (BIT / 8); i++) { \ - Vd->E(i) = FN(env, Vj->E(i)); \ - } \ -} - -FCLASS(vfclass_s, 32, UW, helper_fclass_s) -FCLASS(vfclass_d, 64, UD, helper_fclass_d) - -#define FSQRT(BIT, T) \ -static T do_fsqrt_## BIT(CPULoongArchState *env, T fj) \ -{ \ - T fd; \ - fd = float ## BIT ##_sqrt(fj, &env->fp_status); \ - vec_update_fcsr0(env, GETPC()); \ - return fd; \ -} - -FSQRT(32, uint32_t) -FSQRT(64, uint64_t) - -#define FRECIP(BIT, T) \ -static T do_frecip_## BIT(CPULoongArchState *env, T fj) \ -{ \ - T fd; \ - fd = float ## BIT ##_div(float ## BIT ##_one, fj, &env->fp_status); \ - vec_update_fcsr0(env, GETPC()); \ - return fd; \ -} - -FRECIP(32, uint32_t) -FRECIP(64, uint64_t) - -#define FRSQRT(BIT, T) \ -static T do_frsqrt_## BIT(CPULoongArchState *env, T fj) \ -{ \ - T fd, fp; \ - fp = float ## BIT ##_sqrt(fj, &env->fp_status); \ - fd = float ## BIT ##_div(float ## BIT ##_one, fp, &env->fp_status); \ - vec_update_fcsr0(env, GETPC()); \ - return fd; \ -} - -FRSQRT(32, uint32_t) -FRSQRT(64, uint64_t) - -DO_2OP_F(vflogb_s, 32, UW, do_flogb_32) -DO_2OP_F(vflogb_d, 64, UD, do_flogb_64) -DO_2OP_F(vfsqrt_s, 32, UW, do_fsqrt_32) -DO_2OP_F(vfsqrt_d, 64, UD, do_fsqrt_64) -DO_2OP_F(vfrecip_s, 32, UW, do_frecip_32) -DO_2OP_F(vfrecip_d, 64, UD, do_frecip_64) -DO_2OP_F(vfrsqrt_s, 32, UW, do_frsqrt_32) -DO_2OP_F(vfrsqrt_d, 64, UD, do_frsqrt_64) - -static uint32_t float16_cvt_float32(uint16_t h, float_status *status) -{ - return float16_to_float32(h, true, status); -} -static uint64_t float32_cvt_float64(uint32_t s, float_status *status) -{ - return float32_to_float64(s, status); -} - -static uint16_t float32_cvt_float16(uint32_t s, float_status *status) -{ - return float32_to_float16(s, true, status); -} -static uint32_t float64_cvt_float32(uint64_t d, float_status *status) -{ - return float64_to_float32(d, status); -} - -void HELPER(vfcvtl_s_h)(void *vd, void *vj, - CPULoongArchState *env, uint32_t desc) -{ - int i, j, ofs; - VReg temp = {}; - VReg *Vd = (VReg *)vd; - VReg *Vj = (VReg *)vj; - int oprsz = simd_oprsz(desc); - - ofs = LSX_LEN / 32; - vec_clear_cause(env); - for (i = 0; i < oprsz / 16; i++) { - for (j = 0; j < ofs; j++) { - temp.UW(j + ofs * i) =float16_cvt_float32(Vj->UH(j + ofs * 2 * i), - &env->fp_status); - } - vec_update_fcsr0(env, GETPC()); - } - *Vd = temp; -} - -void HELPER(vfcvtl_d_s)(void *vd, void *vj, - CPULoongArchState *env, uint32_t desc) -{ - int i, j, ofs; - VReg temp = {}; - VReg *Vd = (VReg *)vd; - VReg *Vj = (VReg *)vj; - int oprsz = simd_oprsz(desc); - - ofs = LSX_LEN / 64; - vec_clear_cause(env); - for (i = 0; i < oprsz / 16; i++) { - for (j = 0; j < ofs; j++) { - temp.UD(j + ofs * i) = float32_cvt_float64(Vj->UW(j + ofs * 2 * i), - &env->fp_status); - } - vec_update_fcsr0(env, GETPC()); - } - *Vd = temp; -} - -void HELPER(vfcvth_s_h)(void *vd, void *vj, - CPULoongArchState *env, uint32_t desc) -{ - int i, j, ofs; - VReg temp = {}; - VReg *Vd = (VReg *)vd; - VReg *Vj = (VReg *)vj; - int oprsz = simd_oprsz(desc); - - ofs = LSX_LEN / 32; - vec_clear_cause(env); - for (i = 0; i < oprsz / 16; i++) { - for (j = 0; j < ofs; j++) { - temp.UW(j + ofs * i) = float16_cvt_float32(Vj->UH(j + ofs * (2 * i + 1)), - &env->fp_status); - } - vec_update_fcsr0(env, GETPC()); - } - *Vd = temp; -} - -void HELPER(vfcvth_d_s)(void *vd, void *vj, - CPULoongArchState *env, uint32_t desc) -{ - int i, j, ofs; - VReg temp = {}; - VReg *Vd = (VReg *)vd; - VReg *Vj = (VReg *)vj; - int oprsz = simd_oprsz(desc); - - ofs = LSX_LEN / 64; - vec_clear_cause(env); - for (i = 0; i < oprsz / 16; i++) { - for (j = 0; j < ofs; j++) { - temp.UD(j + ofs * i) = float32_cvt_float64(Vj->UW(j + ofs * (2 * i + 1)), - &env->fp_status); - } - vec_update_fcsr0(env, GETPC()); - } - *Vd = temp; -} - -void HELPER(vfcvt_h_s)(void *vd, void *vj, void *vk, - CPULoongArchState *env, uint32_t desc) -{ - int i, j, ofs; - VReg temp = {}; - VReg *Vd = (VReg *)vd; - VReg *Vj = (VReg *)vj; - VReg *Vk = (VReg *)vk; - int oprsz = simd_oprsz(desc); - - ofs = LSX_LEN / 32; - vec_clear_cause(env); - for(i = 0; i < oprsz / 16; i++) { - for (j = 0; j < ofs; j++) { - temp.UH(j + ofs * (2 * i + 1)) = float32_cvt_float16(Vj->UW(j + ofs * i), - &env->fp_status); - temp.UH(j + ofs * 2 * i) = float32_cvt_float16(Vk->UW(j + ofs * i), - &env->fp_status); - } - vec_update_fcsr0(env, GETPC()); - } - *Vd = temp; -} - -void HELPER(vfcvt_s_d)(void *vd, void *vj, void *vk, - CPULoongArchState *env, uint32_t desc) -{ - int i, j, ofs; - VReg temp = {}; - VReg *Vd = (VReg *)vd; - VReg *Vj = (VReg *)vj; - VReg *Vk = (VReg *)vk; - int oprsz = simd_oprsz(desc); - - ofs = LSX_LEN / 64; - vec_clear_cause(env); - for(i = 0; i < oprsz / 16; i++) { - for (j = 0; j < ofs; j++) { - temp.UW(j + ofs * (2 * i + 1)) = float64_cvt_float32(Vj->UD(j + ofs * i), - &env->fp_status); - temp.UW(j + ofs * 2 * i) = float64_cvt_float32(Vk->UD(j + ofs * i), - &env->fp_status); - } - vec_update_fcsr0(env, GETPC()); - } - *Vd = temp; -} - -void HELPER(vfrint_s)(void *vd, void *vj, - CPULoongArchState *env, uint32_t desc) -{ - int i; - VReg *Vd = (VReg *)vd; - VReg *Vj = (VReg *)vj; - int oprsz = simd_oprsz(desc); - - vec_clear_cause(env); - for (i = 0; i < oprsz / 4; i++) { - Vd->W(i) = float32_round_to_int(Vj->UW(i), &env->fp_status); - vec_update_fcsr0(env, GETPC()); - } -} - -void HELPER(vfrint_d)(void *vd, void *vj, - CPULoongArchState *env, uint32_t desc) -{ - int i; - VReg *Vd = (VReg *)vd; - VReg *Vj = (VReg *)vj; - int oprsz = simd_oprsz(desc); - - vec_clear_cause(env); - for (i = 0; i < oprsz / 8; i++) { - Vd->D(i) = float64_round_to_int(Vj->UD(i), &env->fp_status); - vec_update_fcsr0(env, GETPC()); - } -} - -#define FCVT_2OP(NAME, BIT, E, MODE) \ -void HELPER(NAME)(void *vd, void *vj, \ - CPULoongArchState *env, uint32_t desc) \ -{ \ - int i; \ - VReg *Vd = (VReg *)vd; \ - VReg *Vj = (VReg *)vj; \ - int oprsz = simd_oprsz(desc); \ - \ - vec_clear_cause(env); \ - for (i = 0; i < oprsz / (BIT / 8); i++) { \ - FloatRoundMode old_mode = get_float_rounding_mode(&env->fp_status); \ - set_float_rounding_mode(MODE, &env->fp_status); \ - Vd->E(i) = float## BIT ## _round_to_int(Vj->E(i), &env->fp_status); \ - set_float_rounding_mode(old_mode, &env->fp_status); \ - vec_update_fcsr0(env, GETPC()); \ - } \ -} - -FCVT_2OP(vfrintrne_s, 32, UW, float_round_nearest_even) -FCVT_2OP(vfrintrne_d, 64, UD, float_round_nearest_even) -FCVT_2OP(vfrintrz_s, 32, UW, float_round_to_zero) -FCVT_2OP(vfrintrz_d, 64, UD, float_round_to_zero) -FCVT_2OP(vfrintrp_s, 32, UW, float_round_up) -FCVT_2OP(vfrintrp_d, 64, UD, float_round_up) -FCVT_2OP(vfrintrm_s, 32, UW, float_round_down) -FCVT_2OP(vfrintrm_d, 64, UD, float_round_down) - -#define FTINT(NAME, FMT1, FMT2, T1, T2, MODE) \ -static T2 do_ftint ## NAME(CPULoongArchState *env, T1 fj) \ -{ \ - T2 fd; \ - FloatRoundMode old_mode = get_float_rounding_mode(&env->fp_status); \ - \ - set_float_rounding_mode(MODE, &env->fp_status); \ - fd = do_## FMT1 ##_to_## FMT2(env, fj); \ - set_float_rounding_mode(old_mode, &env->fp_status); \ - return fd; \ -} - -#define DO_FTINT(FMT1, FMT2, T1, T2) \ -static T2 do_## FMT1 ##_to_## FMT2(CPULoongArchState *env, T1 fj) \ -{ \ - T2 fd; \ - \ - fd = FMT1 ##_to_## FMT2(fj, &env->fp_status); \ - if (get_float_exception_flags(&env->fp_status) & (float_flag_invalid)) { \ - if (FMT1 ##_is_any_nan(fj)) { \ - fd = 0; \ - } \ - } \ - vec_update_fcsr0(env, GETPC()); \ - return fd; \ -} - -DO_FTINT(float32, int32, uint32_t, uint32_t) -DO_FTINT(float64, int64, uint64_t, uint64_t) -DO_FTINT(float32, uint32, uint32_t, uint32_t) -DO_FTINT(float64, uint64, uint64_t, uint64_t) -DO_FTINT(float64, int32, uint64_t, uint32_t) -DO_FTINT(float32, int64, uint32_t, uint64_t) - -FTINT(rne_w_s, float32, int32, uint32_t, uint32_t, float_round_nearest_even) -FTINT(rne_l_d, float64, int64, uint64_t, uint64_t, float_round_nearest_even) -FTINT(rp_w_s, float32, int32, uint32_t, uint32_t, float_round_up) -FTINT(rp_l_d, float64, int64, uint64_t, uint64_t, float_round_up) -FTINT(rz_w_s, float32, int32, uint32_t, uint32_t, float_round_to_zero) -FTINT(rz_l_d, float64, int64, uint64_t, uint64_t, float_round_to_zero) -FTINT(rm_w_s, float32, int32, uint32_t, uint32_t, float_round_down) -FTINT(rm_l_d, float64, int64, uint64_t, uint64_t, float_round_down) - -DO_2OP_F(vftintrne_w_s, 32, UW, do_ftintrne_w_s) -DO_2OP_F(vftintrne_l_d, 64, UD, do_ftintrne_l_d) -DO_2OP_F(vftintrp_w_s, 32, UW, do_ftintrp_w_s) -DO_2OP_F(vftintrp_l_d, 64, UD, do_ftintrp_l_d) -DO_2OP_F(vftintrz_w_s, 32, UW, do_ftintrz_w_s) -DO_2OP_F(vftintrz_l_d, 64, UD, do_ftintrz_l_d) -DO_2OP_F(vftintrm_w_s, 32, UW, do_ftintrm_w_s) -DO_2OP_F(vftintrm_l_d, 64, UD, do_ftintrm_l_d) -DO_2OP_F(vftint_w_s, 32, UW, do_float32_to_int32) -DO_2OP_F(vftint_l_d, 64, UD, do_float64_to_int64) - -FTINT(rz_wu_s, float32, uint32, uint32_t, uint32_t, float_round_to_zero) -FTINT(rz_lu_d, float64, uint64, uint64_t, uint64_t, float_round_to_zero) - -DO_2OP_F(vftintrz_wu_s, 32, UW, do_ftintrz_wu_s) -DO_2OP_F(vftintrz_lu_d, 64, UD, do_ftintrz_lu_d) -DO_2OP_F(vftint_wu_s, 32, UW, do_float32_to_uint32) -DO_2OP_F(vftint_lu_d, 64, UD, do_float64_to_uint64) - -FTINT(rm_w_d, float64, int32, uint64_t, uint32_t, float_round_down) -FTINT(rp_w_d, float64, int32, uint64_t, uint32_t, float_round_up) -FTINT(rz_w_d, float64, int32, uint64_t, uint32_t, float_round_to_zero) -FTINT(rne_w_d, float64, int32, uint64_t, uint32_t, float_round_nearest_even) - -#define FTINT_W_D(NAME, FN) \ -void HELPER(NAME)(void *vd, void *vj, void *vk, \ - CPULoongArchState *env, uint32_t desc) \ -{ \ - int i, j, ofs; \ - VReg temp = {}; \ - VReg *Vd = (VReg *)vd; \ - VReg *Vj = (VReg *)vj; \ - VReg *Vk = (VReg *)vk; \ - int oprsz = simd_oprsz(desc); \ - \ - ofs = LSX_LEN / 64; \ - vec_clear_cause(env); \ - for (i = 0; i < oprsz / 16; i++) { \ - for (j = 0; j < ofs; j++) { \ - temp.W(j + ofs * (2 * i + 1)) = FN(env, Vj->UD(j + ofs * i)); \ - temp.W(j + ofs * 2 * i) = FN(env, Vk->UD(j + ofs * i)); \ - } \ - } \ - *Vd = temp; \ -} - -FTINT_W_D(vftint_w_d, do_float64_to_int32) -FTINT_W_D(vftintrm_w_d, do_ftintrm_w_d) -FTINT_W_D(vftintrp_w_d, do_ftintrp_w_d) -FTINT_W_D(vftintrz_w_d, do_ftintrz_w_d) -FTINT_W_D(vftintrne_w_d, do_ftintrne_w_d) - -FTINT(rml_l_s, float32, int64, uint32_t, uint64_t, float_round_down) -FTINT(rpl_l_s, float32, int64, uint32_t, uint64_t, float_round_up) -FTINT(rzl_l_s, float32, int64, uint32_t, uint64_t, float_round_to_zero) -FTINT(rnel_l_s, float32, int64, uint32_t, uint64_t, float_round_nearest_even) -FTINT(rmh_l_s, float32, int64, uint32_t, uint64_t, float_round_down) -FTINT(rph_l_s, float32, int64, uint32_t, uint64_t, float_round_up) -FTINT(rzh_l_s, float32, int64, uint32_t, uint64_t, float_round_to_zero) -FTINT(rneh_l_s, float32, int64, uint32_t, uint64_t, float_round_nearest_even) - -#define FTINTL_L_S(NAME, FN) \ -void HELPER(NAME)(void *vd, void *vj, \ - CPULoongArchState *env, uint32_t desc) \ -{ \ - int i, j, ofs; \ - VReg temp; \ - VReg *Vd = (VReg *)vd; \ - VReg *Vj = (VReg *)vj; \ - int oprsz = simd_oprsz(desc); \ - \ - ofs = LSX_LEN / 64; \ - vec_clear_cause(env); \ - for (i = 0; i < oprsz / 16; i++) { \ - for (j = 0; j < ofs; j++) { \ - temp.D(j + ofs * i) = FN(env, Vj->UW(j + ofs * 2 * i)); \ - } \ - } \ - *Vd = temp; \ -} - -FTINTL_L_S(vftintl_l_s, do_float32_to_int64) -FTINTL_L_S(vftintrml_l_s, do_ftintrml_l_s) -FTINTL_L_S(vftintrpl_l_s, do_ftintrpl_l_s) -FTINTL_L_S(vftintrzl_l_s, do_ftintrzl_l_s) -FTINTL_L_S(vftintrnel_l_s, do_ftintrnel_l_s) - -#define FTINTH_L_S(NAME, FN) \ -void HELPER(NAME)(void *vd, void *vj, \ - CPULoongArchState *env, uint32_t desc) \ -{ \ - int i, j, ofs; \ - VReg temp = {}; \ - VReg *Vd = (VReg *)vd; \ - VReg *Vj = (VReg *)vj; \ - int oprsz = simd_oprsz(desc); \ - \ - ofs = LSX_LEN / 64; \ - vec_clear_cause(env); \ - for (i = 0; i < oprsz / 16; i++) { \ - for (j = 0; j < ofs; j++) { \ - temp.D(j + ofs * i) = FN(env, Vj->UW(j + ofs * (2 * i + 1))); \ - } \ - } \ - *Vd = temp; \ -} - -FTINTH_L_S(vftinth_l_s, do_float32_to_int64) -FTINTH_L_S(vftintrmh_l_s, do_ftintrmh_l_s) -FTINTH_L_S(vftintrph_l_s, do_ftintrph_l_s) -FTINTH_L_S(vftintrzh_l_s, do_ftintrzh_l_s) -FTINTH_L_S(vftintrneh_l_s, do_ftintrneh_l_s) - -#define FFINT(NAME, FMT1, FMT2, T1, T2) \ -static T2 do_ffint_ ## NAME(CPULoongArchState *env, T1 fj) \ -{ \ - T2 fd; \ - \ - fd = FMT1 ##_to_## FMT2(fj, &env->fp_status); \ - vec_update_fcsr0(env, GETPC()); \ - return fd; \ -} - -FFINT(s_w, int32, float32, int32_t, uint32_t) -FFINT(d_l, int64, float64, int64_t, uint64_t) -FFINT(s_wu, uint32, float32, uint32_t, uint32_t) -FFINT(d_lu, uint64, float64, uint64_t, uint64_t) - -DO_2OP_F(vffint_s_w, 32, W, do_ffint_s_w) -DO_2OP_F(vffint_d_l, 64, D, do_ffint_d_l) -DO_2OP_F(vffint_s_wu, 32, UW, do_ffint_s_wu) -DO_2OP_F(vffint_d_lu, 64, UD, do_ffint_d_lu) - -void HELPER(vffintl_d_w)(void *vd, void *vj, - CPULoongArchState *env, uint32_t desc) -{ - int i, j, ofs; - VReg temp = {}; - VReg *Vd = (VReg *)vd; - VReg *Vj = (VReg *)vj; - int oprsz = simd_oprsz(desc); - - ofs = LSX_LEN / 64; - vec_clear_cause(env); - for (i = 0; i < oprsz / 16; i++) { - for (j = 0; j < ofs; j++) { - temp.D(j + ofs * i) = int32_to_float64(Vj->W(j + ofs * 2 * i), - &env->fp_status); - } - vec_update_fcsr0(env, GETPC()); - } - *Vd = temp; -} - -void HELPER(vffinth_d_w)(void *vd, void *vj, - CPULoongArchState *env, uint32_t desc) -{ - int i, j, ofs; - VReg temp = {}; - VReg *Vd = (VReg *)vd; - VReg *Vj = (VReg *)vj; - int oprsz = simd_oprsz(desc); - - ofs = LSX_LEN / 64; - vec_clear_cause(env); - for (i = 0; i < oprsz /16; i++) { - for (j = 0; j < ofs; j++) { - temp.D(j + ofs * i) = int32_to_float64(Vj->W(j + ofs * (2 * i + 1)), - &env->fp_status); - } - vec_update_fcsr0(env, GETPC()); - } - *Vd = temp; -} - -void HELPER(vffint_s_l)(void *vd, void *vj, void *vk, - CPULoongArchState *env, uint32_t desc) -{ - int i, j, ofs; - VReg temp = {}; - VReg *Vd = (VReg *)vd; - VReg *Vj = (VReg *)vj; - VReg *Vk = (VReg *)vk; - int oprsz = simd_oprsz(desc); - - ofs = LSX_LEN / 64; - vec_clear_cause(env); - for (i = 0; i < oprsz / 16; i++) { - for (j = 0; j < ofs; j++) { - temp.W(j + ofs * (2 * i + 1)) = int64_to_float32(Vj->D(j + ofs * i), - &env->fp_status); - temp.W(j + ofs * 2 * i) = int64_to_float32(Vk->D(j + ofs * i), - &env->fp_status); - } - vec_update_fcsr0(env, GETPC()); - } - *Vd = temp; -} - -#define VCMPI(NAME, BIT, E, DO_OP) \ -void HELPER(NAME)(void *vd, void *vj, uint64_t imm, uint32_t desc) \ -{ \ - int i; \ - VReg *Vd = (VReg *)vd; \ - VReg *Vj = (VReg *)vj; \ - typedef __typeof(Vd->E(0)) TD; \ - int oprsz = simd_oprsz(desc); \ - \ - for (i = 0; i < oprsz / (BIT / 8); i++) { \ - Vd->E(i) = DO_OP(Vj->E(i), (TD)imm); \ - } \ -} - -VCMPI(vseqi_b, 8, B, VSEQ) -VCMPI(vseqi_h, 16, H, VSEQ) -VCMPI(vseqi_w, 32, W, VSEQ) -VCMPI(vseqi_d, 64, D, VSEQ) -VCMPI(vslei_b, 8, B, VSLE) -VCMPI(vslei_h, 16, H, VSLE) -VCMPI(vslei_w, 32, W, VSLE) -VCMPI(vslei_d, 64, D, VSLE) -VCMPI(vslei_bu, 8, UB, VSLE) -VCMPI(vslei_hu, 16, UH, VSLE) -VCMPI(vslei_wu, 32, UW, VSLE) -VCMPI(vslei_du, 64, UD, VSLE) -VCMPI(vslti_b, 8, B, VSLT) -VCMPI(vslti_h, 16, H, VSLT) -VCMPI(vslti_w, 32, W, VSLT) -VCMPI(vslti_d, 64, D, VSLT) -VCMPI(vslti_bu, 8, UB, VSLT) -VCMPI(vslti_hu, 16, UH, VSLT) -VCMPI(vslti_wu, 32, UW, VSLT) -VCMPI(vslti_du, 64, UD, VSLT) - -static uint64_t vfcmp_common(CPULoongArchState *env, - FloatRelation cmp, uint32_t flags) -{ - uint64_t ret = 0; - - switch (cmp) { - case float_relation_less: - ret = (flags & FCMP_LT); - break; - case float_relation_equal: - ret = (flags & FCMP_EQ); - break; - case float_relation_greater: - ret = (flags & FCMP_GT); - break; - case float_relation_unordered: - ret = (flags & FCMP_UN); - break; - default: - g_assert_not_reached(); - } - - if (ret) { - ret = -1; - } - - return ret; -} - -#define VFCMP(NAME, BIT, E, FN) \ -void HELPER(NAME)(CPULoongArchState *env, uint32_t oprsz, \ - uint32_t vd, uint32_t vj, uint32_t vk, uint32_t flags) \ -{ \ - int i; \ - VReg t; \ - VReg *Vd = &(env->fpr[vd].vreg); \ - VReg *Vj = &(env->fpr[vj].vreg); \ - VReg *Vk = &(env->fpr[vk].vreg); \ - \ - vec_clear_cause(env); \ - for (i = 0; i < oprsz / (BIT / 8); i++) { \ - FloatRelation cmp; \ - cmp = FN(Vj->E(i), Vk->E(i), &env->fp_status); \ - t.E(i) = vfcmp_common(env, cmp, flags); \ - vec_update_fcsr0(env, GETPC()); \ - } \ - *Vd = t; \ -} - -VFCMP(vfcmp_c_s, 32, UW, float32_compare_quiet) -VFCMP(vfcmp_s_s, 32, UW, float32_compare) -VFCMP(vfcmp_c_d, 64, UD, float64_compare_quiet) -VFCMP(vfcmp_s_d, 64, UD, float64_compare) - -void HELPER(vbitseli_b)(void *vd, void *vj, uint64_t imm, uint32_t desc) -{ - int i; - VReg *Vd = (VReg *)vd; - VReg *Vj = (VReg *)vj; - - for (i = 0; i < simd_oprsz(desc); i++) { - Vd->B(i) = (~Vd->B(i) & Vj->B(i)) | (Vd->B(i) & imm); - } -} - -/* Copy from target/arm/tcg/sve_helper.c */ -static inline bool do_match2(uint64_t n, uint64_t m0, uint64_t m1, int esz) -{ - int bits = 8 << esz; - uint64_t ones = dup_const(esz, 1); - uint64_t signs = ones << (bits - 1); - uint64_t cmp0, cmp1; - - cmp1 = dup_const(esz, n); - cmp0 = cmp1 ^ m0; - cmp1 = cmp1 ^ m1; - cmp0 = (cmp0 - ones) & ~cmp0; - cmp1 = (cmp1 - ones) & ~cmp1; - return (cmp0 | cmp1) & signs; -} - -#define SETANYEQZ(NAME, MO) \ -void HELPER(NAME)(CPULoongArchState *env, \ - uint32_t oprsz, uint32_t cd, uint32_t vj) \ -{ \ - VReg *Vj = &(env->fpr[vj].vreg); \ - \ - env->cf[cd & 0x7] = do_match2(0, Vj->D(0), Vj->D(1), MO); \ - if (oprsz == 32) { \ - env->cf[cd & 0x7] = env->cf[cd & 0x7] || \ - do_match2(0, Vj->D(2), Vj->D(3), MO); \ - } \ -} - -SETANYEQZ(vsetanyeqz_b, MO_8) -SETANYEQZ(vsetanyeqz_h, MO_16) -SETANYEQZ(vsetanyeqz_w, MO_32) -SETANYEQZ(vsetanyeqz_d, MO_64) - -#define SETALLNEZ(NAME, MO) \ -void HELPER(NAME)(CPULoongArchState *env, \ - uint32_t oprsz, uint32_t cd, uint32_t vj) \ -{ \ - VReg *Vj = &(env->fpr[vj].vreg); \ - \ - env->cf[cd & 0x7]= !do_match2(0, Vj->D(0), Vj->D(1), MO); \ - if (oprsz == 32) { \ - env->cf[cd & 0x7] = env->cf[cd & 0x7] && \ - !do_match2(0, Vj->D(2), Vj->D(3), MO); \ - } \ -} - -SETALLNEZ(vsetallnez_b, MO_8) -SETALLNEZ(vsetallnez_h, MO_16) -SETALLNEZ(vsetallnez_w, MO_32) -SETALLNEZ(vsetallnez_d, MO_64) - -#define XVINSVE0(NAME, E, MASK) \ -void HELPER(NAME)(void *vd, void *vj, uint64_t imm, uint32_t desc) \ -{ \ - VReg *Vd = (VReg *)vd; \ - VReg *Vj = (VReg *)vj; \ - Vd->E(imm & MASK) = Vj->E(0); \ -} - -XVINSVE0(xvinsve0_w, W, 0x7) -XVINSVE0(xvinsve0_d, D, 0x3) - -#define XVPICKVE(NAME, E, BIT, MASK) \ -void HELPER(NAME)(void *vd, void *vj, uint64_t imm, uint32_t desc) \ -{ \ - int i; \ - VReg *Vd = (VReg *)vd; \ - VReg *Vj = (VReg *)vj; \ - int oprsz = simd_oprsz(desc); \ - \ - Vd->E(0) = Vj->E(imm & MASK); \ - for (i = 1; i < oprsz / (BIT / 8); i++) { \ - Vd->E(i) = 0; \ - } \ -} - -XVPICKVE(xvpickve_w, W, 32, 0x7) -XVPICKVE(xvpickve_d, D, 64, 0x3) - -#define VPACKEV(NAME, BIT, E) \ -void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t desc) \ -{ \ - int i; \ - VReg temp = {}; \ - VReg *Vd = (VReg *)vd; \ - VReg *Vj = (VReg *)vj; \ - VReg *Vk = (VReg *)vk; \ - int oprsz = simd_oprsz(desc); \ - \ - for (i = 0; i < oprsz / (BIT / 8); i++) { \ - temp.E(2 * i + 1) = Vj->E(2 * i); \ - temp.E(2 *i) = Vk->E(2 * i); \ - } \ - *Vd = temp; \ -} - -VPACKEV(vpackev_b, 16, B) -VPACKEV(vpackev_h, 32, H) -VPACKEV(vpackev_w, 64, W) -VPACKEV(vpackev_d, 128, D) - -#define VPACKOD(NAME, BIT, E) \ -void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t desc) \ -{ \ - int i; \ - VReg temp = {}; \ - VReg *Vd = (VReg *)vd; \ - VReg *Vj = (VReg *)vj; \ - VReg *Vk = (VReg *)vk; \ - int oprsz = simd_oprsz(desc); \ - \ - for (i = 0; i < oprsz / (BIT / 8); i++) { \ - temp.E(2 * i + 1) = Vj->E(2 * i + 1); \ - temp.E(2 * i) = Vk->E(2 * i + 1); \ - } \ - *Vd = temp; \ -} - -VPACKOD(vpackod_b, 16, B) -VPACKOD(vpackod_h, 32, H) -VPACKOD(vpackod_w, 64, W) -VPACKOD(vpackod_d, 128, D) - -#define VPICKEV(NAME, BIT, E) \ -void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t desc) \ -{ \ - int i, j, ofs; \ - VReg temp = {}; \ - VReg *Vd = (VReg *)vd; \ - VReg *Vj = (VReg *)vj; \ - VReg *Vk = (VReg *)vk; \ - int oprsz = simd_oprsz(desc); \ - \ - ofs = LSX_LEN / BIT; \ - for (i = 0; i < oprsz / 16; i++) { \ - for (j = 0; j < ofs; j++) { \ - temp.E(j + ofs * (2 * i + 1)) = Vj->E(2 * (j + ofs * i)); \ - temp.E(j + ofs * 2 * i) = Vk->E(2 * (j + ofs * i)); \ - } \ - } \ - *Vd = temp; \ -} - -VPICKEV(vpickev_b, 16, B) -VPICKEV(vpickev_h, 32, H) -VPICKEV(vpickev_w, 64, W) -VPICKEV(vpickev_d, 128, D) - -#define VPICKOD(NAME, BIT, E) \ -void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t desc) \ -{ \ - int i, j, ofs; \ - VReg temp = {}; \ - VReg *Vd = (VReg *)vd; \ - VReg *Vj = (VReg *)vj; \ - VReg *Vk = (VReg *)vk; \ - int oprsz = simd_oprsz(desc); \ - \ - ofs = LSX_LEN / BIT; \ - for (i = 0; i < oprsz / 16; i++) { \ - for (j = 0; j < ofs; j++) { \ - temp.E(j + ofs * (2 * i + 1)) = Vj->E(2 * (j + ofs * i) + 1); \ - temp.E(j + ofs * 2 * i) = Vk->E(2 * (j + ofs * i) + 1); \ - } \ - } \ - *Vd = temp; \ -} - -VPICKOD(vpickod_b, 16, B) -VPICKOD(vpickod_h, 32, H) -VPICKOD(vpickod_w, 64, W) -VPICKOD(vpickod_d, 128, D) - -#define VILVL(NAME, BIT, E) \ -void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t desc) \ -{ \ - int i, j, ofs; \ - VReg temp = {}; \ - VReg *Vd = (VReg *)vd; \ - VReg *Vj = (VReg *)vj; \ - VReg *Vk = (VReg *)vk; \ - int oprsz = simd_oprsz(desc); \ - \ - ofs = LSX_LEN / BIT; \ - for (i = 0; i < oprsz / 16; i++) { \ - for (j = 0; j < ofs; j++) { \ - temp.E(2 * (j + ofs * i) + 1) = Vj->E(j + ofs * 2 * i); \ - temp.E(2 * (j + ofs * i)) = Vk->E(j + ofs * 2 * i); \ - } \ - } \ - *Vd = temp; \ -} - -VILVL(vilvl_b, 16, B) -VILVL(vilvl_h, 32, H) -VILVL(vilvl_w, 64, W) -VILVL(vilvl_d, 128, D) - -#define VILVH(NAME, BIT, E) \ -void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t desc) \ -{ \ - int i, j, ofs; \ - VReg temp = {}; \ - VReg *Vd = (VReg *)vd; \ - VReg *Vj = (VReg *)vj; \ - VReg *Vk = (VReg *)vk; \ - int oprsz = simd_oprsz(desc); \ - \ - ofs = LSX_LEN / BIT; \ - for (i = 0; i < oprsz / 16; i++) { \ - for (j = 0; j < ofs; j++) { \ - temp.E(2 * (j + ofs * i) + 1) = Vj->E(j + ofs * (2 * i + 1)); \ - temp.E(2 * (j + ofs * i)) = Vk->E(j + ofs * (2 * i + 1)); \ - } \ - } \ - *Vd = temp; \ -} - -VILVH(vilvh_b, 16, B) -VILVH(vilvh_h, 32, H) -VILVH(vilvh_w, 64, W) -VILVH(vilvh_d, 128, D) - -void HELPER(vshuf_b)(void *vd, void *vj, void *vk, void *va, uint32_t desc) -{ - int i, j, m; - VReg temp = {}; - VReg *Vd = (VReg *)vd; - VReg *Vj = (VReg *)vj; - VReg *Vk = (VReg *)vk; - VReg *Va = (VReg *)va; - int oprsz = simd_oprsz(desc); - - m = LSX_LEN / 8; - for (i = 0; i < (oprsz / 16) * m; i++) { - j = i < m ? 0 : 1; - uint64_t k = (uint8_t)Va->B(i) % (2 * m); - temp.B(i) = k < m ? Vk->B(k + j * m): Vj->B(k + (j - 1) * m); - } - *Vd = temp; -} - -#define VSHUF(NAME, BIT, E) \ -void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t desc) \ -{ \ - int i, j, m; \ - VReg temp = {}; \ - VReg *Vd = (VReg *)vd; \ - VReg *Vj = (VReg *)vj; \ - VReg *Vk = (VReg *)vk; \ - int oprsz = simd_oprsz(desc); \ - \ - m = LSX_LEN / BIT; \ - for (i = 0; i < (oprsz / 16) * m; i++) { \ - j = i < m ? 0 : 1; \ - uint64_t k = ((uint8_t)Vd->E(i)) % (2 * m); \ - temp.E(i) = k < m ? Vk->E(k + j * m) : Vj->E(k + (j - 1) * m); \ - } \ - *Vd = temp; \ -} - -VSHUF(vshuf_h, 16, H) -VSHUF(vshuf_w, 32, W) -VSHUF(vshuf_d, 64, D) - -#define VSHUF4I(NAME, BIT, E) \ -void HELPER(NAME)(void *vd, void *vj, uint64_t imm, uint32_t desc) \ -{ \ - int i, j, max; \ - VReg temp = {}; \ - VReg *Vd = (VReg *)vd; \ - VReg *Vj = (VReg *)vj; \ - int oprsz = simd_oprsz(desc); \ - \ - max = LSX_LEN / BIT; \ - for (i = 0; i < oprsz / (BIT / 8); i++) { \ - j = i < max ? 1 : 2; \ - temp.E(i) = Vj->E(SHF_POS(i - ((j -1)* max), imm) + (j - 1) * max); \ - } \ - *Vd = temp; \ -} - -VSHUF4I(vshuf4i_b, 8, B) -VSHUF4I(vshuf4i_h, 16, H) -VSHUF4I(vshuf4i_w, 32, W) - -void HELPER(vshuf4i_d)(void *vd, void *vj, uint64_t imm, uint32_t desc) -{ - int i; - VReg temp = {}; - VReg *Vd = (VReg *)vd; - VReg *Vj = (VReg *)vj; - int oprsz = simd_oprsz(desc); - - for (i = 0; i < oprsz / 16; i++) { - temp.D(2 * i) = (imm & 2 ? Vj : Vd)->D((imm & 1) + 2 * i); - temp.D(2 * i + 1) = (imm & 8 ? Vj : Vd)->D(((imm >> 2) & 1) + 2 * i); - } - *Vd = temp; -} - -void HELPER(vperm_w)(void *vd, void *vj, void *vk, uint32_t desc) -{ - int i, m; - VReg temp = {}; - VReg *Vd = (VReg *)vd; - VReg *Vj = (VReg *)vj; - VReg *Vk = (VReg *)vk; - - m = LASX_LEN / 32; - for (i = 0; i < m ; i++) { - uint64_t k = (uint8_t)Vk->W(i) % 8; - temp.W(i) = Vj->W(k); - } - *Vd = temp; -} - -void HELPER(vpermi_w)(void *vd, void *vj, uint64_t imm, uint32_t desc) -{ - int i; - VReg temp = {}; - VReg *Vd = (VReg *)vd; - VReg *Vj = (VReg *)vj; - int oprsz = simd_oprsz(desc); - - for (i = 0; i < oprsz / 16; i++) { - temp.W(4 * i) = Vj->W((imm & 0x3) + 4 * i); - temp.W(4 * i + 1) = Vj->W(((imm >> 2) & 0x3) + 4 * i); - temp.W(4 * i + 2) = Vd->W(((imm >> 4) & 0x3) + 4 * i); - temp.W(4 * i + 3) = Vd->W(((imm >> 6) & 0x3) + 4 * i); - } - *Vd = temp; -} - -void HELPER(vpermi_d)(void *vd, void *vj, uint64_t imm, uint32_t desc) -{ - VReg temp = {}; - VReg *Vd = (VReg *)vd; - VReg *Vj = (VReg *)vj; - - temp.D(0) = Vj->D(imm & 0x3); - temp.D(1) = Vj->D((imm >> 2) & 0x3); - temp.D(2) = Vj->D((imm >> 4) & 0x3); - temp.D(3) = Vj->D((imm >> 6) & 0x3); - *Vd = temp; -} - -void HELPER(vpermi_q)(void *vd, void *vj, uint64_t imm, uint32_t desc) -{ - int i; - VReg temp; - VReg *Vd = (VReg *)vd; - VReg *Vj = (VReg *)vj; - - for (i = 0; i < 2; i++, imm >>= 4) { - temp.Q(i) = (imm & 2 ? Vd: Vj)->Q(imm & 1); - } - *Vd = temp; -} - -#define VEXTRINS(NAME, BIT, E, MASK) \ -void HELPER(NAME)(void *vd, void *vj, uint64_t imm, uint32_t desc) \ -{ \ - int i, ins, extr, max; \ - VReg *Vd = (VReg *)vd; \ - VReg *Vj = (VReg *)vj; \ - int oprsz = simd_oprsz(desc); \ - \ - max = LSX_LEN / BIT; \ - ins = (imm >> 4) & MASK; \ - extr = imm & MASK; \ - for (i = 0; i < oprsz / 16; i++) { \ - Vd->E(ins + i * max) = Vj->E(extr + i * max); \ - } \ -} - -VEXTRINS(vextrins_b, 8, B, 0xf) -VEXTRINS(vextrins_h, 16, H, 0x7) -VEXTRINS(vextrins_w, 32, W, 0x3) -VEXTRINS(vextrins_d, 64, D, 0x1)