+++ /dev/null
-/* SPDX-License-Identifier: GPL-2.0-or-later */
-/*
- * QEMU LoongArch constant timer support
- *
- * Copyright (c) 2021 Loongson Technology Corporation Limited
- */
-
-#include "qemu/osdep.h"
-#include "qemu/timer.h"
-#include "cpu.h"
-#include "internals.h"
-#include "cpu-csr.h"
-
-#define TIMER_PERIOD 10 /* 10 ns period for 100 MHz frequency */
-#define CONSTANT_TIMER_TICK_MASK 0xfffffffffffcUL
-#define CONSTANT_TIMER_ENABLE 0x1UL
-
-uint64_t cpu_loongarch_get_constant_timer_counter(LoongArchCPU *cpu)
-{
- return qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) / TIMER_PERIOD;
-}
-
-uint64_t cpu_loongarch_get_constant_timer_ticks(LoongArchCPU *cpu)
-{
- uint64_t now, expire;
-
- now = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL);
- expire = timer_expire_time_ns(&cpu->timer);
-
- return (expire - now) / TIMER_PERIOD;
-}
-
-void cpu_loongarch_store_constant_timer_config(LoongArchCPU *cpu,
- uint64_t value)
-{
- CPULoongArchState *env = &cpu->env;
- uint64_t now, next;
-
- env->CSR_TCFG = value;
- if (value & CONSTANT_TIMER_ENABLE) {
- now = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL);
- next = now + (value & CONSTANT_TIMER_TICK_MASK) * TIMER_PERIOD;
- timer_mod(&cpu->timer, next);
- } else {
- timer_del(&cpu->timer);
- }
-}
-
-void loongarch_constant_timer_cb(void *opaque)
-{
- LoongArchCPU *cpu = opaque;
- CPULoongArchState *env = &cpu->env;
- uint64_t now, next;
-
- if (FIELD_EX64(env->CSR_TCFG, CSR_TCFG, PERIODIC)) {
- now = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL);
- next = now + (env->CSR_TCFG & CONSTANT_TIMER_TICK_MASK) * TIMER_PERIOD;
- timer_mod(&cpu->timer, next);
- } else {
- env->CSR_TCFG = FIELD_DP64(env->CSR_TCFG, CSR_TCFG, EN, 0);
- }
-
- loongarch_cpu_set_irq(opaque, IRQ_TIMER, 1);
-}
+++ /dev/null
-/* SPDX-License-Identifier: GPL-2.0-or-later */
-/*
- * LoongArch emulation helpers for CSRs
- *
- * Copyright (c) 2021 Loongson Technology Corporation Limited
- */
-
-#include "qemu/osdep.h"
-#include "qemu/main-loop.h"
-#include "cpu.h"
-#include "internals.h"
-#include "qemu/host-utils.h"
-#include "exec/helper-proto.h"
-#include "exec/exec-all.h"
-#include "exec/cpu_ldst.h"
-#include "hw/irq.h"
-#include "cpu-csr.h"
-
-target_ulong helper_csrrd_pgd(CPULoongArchState *env)
-{
- int64_t v;
-
- if (env->CSR_TLBRERA & 0x1) {
- v = env->CSR_TLBRBADV;
- } else {
- v = env->CSR_BADV;
- }
-
- if ((v >> 63) & 0x1) {
- v = env->CSR_PGDH;
- } else {
- v = env->CSR_PGDL;
- }
-
- return v;
-}
-
-target_ulong helper_csrrd_cpuid(CPULoongArchState *env)
-{
- LoongArchCPU *lac = env_archcpu(env);
-
- env->CSR_CPUID = CPU(lac)->cpu_index;
-
- return env->CSR_CPUID;
-}
-
-target_ulong helper_csrrd_tval(CPULoongArchState *env)
-{
- LoongArchCPU *cpu = env_archcpu(env);
-
- return cpu_loongarch_get_constant_timer_ticks(cpu);
-}
-
-target_ulong helper_csrwr_estat(CPULoongArchState *env, target_ulong val)
-{
- int64_t old_v = env->CSR_ESTAT;
-
- /* Only IS[1:0] can be written */
- env->CSR_ESTAT = deposit64(env->CSR_ESTAT, 0, 2, val);
-
- return old_v;
-}
-
-target_ulong helper_csrwr_asid(CPULoongArchState *env, target_ulong val)
-{
- int64_t old_v = env->CSR_ASID;
-
- /* Only ASID filed of CSR_ASID can be written */
- env->CSR_ASID = deposit64(env->CSR_ASID, 0, 10, val);
- if (old_v != env->CSR_ASID) {
- tlb_flush(env_cpu(env));
- }
- return old_v;
-}
-
-target_ulong helper_csrwr_tcfg(CPULoongArchState *env, target_ulong val)
-{
- LoongArchCPU *cpu = env_archcpu(env);
- int64_t old_v = env->CSR_TCFG;
-
- cpu_loongarch_store_constant_timer_config(cpu, val);
-
- return old_v;
-}
-
-target_ulong helper_csrwr_ticlr(CPULoongArchState *env, target_ulong val)
-{
- LoongArchCPU *cpu = env_archcpu(env);
- int64_t old_v = 0;
-
- if (val & 0x1) {
- qemu_mutex_lock_iothread();
- loongarch_cpu_set_irq(cpu, IRQ_TIMER, 0);
- qemu_mutex_unlock_iothread();
- }
- return old_v;
-}
+++ /dev/null
-/* SPDX-License-Identifier: GPL-2.0-or-later */
-/*
- * LoongArch float point emulation helpers for QEMU
- *
- * Copyright (c) 2021 Loongson Technology Corporation Limited
- */
-
-#include "qemu/osdep.h"
-#include "cpu.h"
-#include "exec/helper-proto.h"
-#include "exec/exec-all.h"
-#include "exec/cpu_ldst.h"
-#include "fpu/softfloat.h"
-#include "internals.h"
-
-static inline uint64_t nanbox_s(float32 fp)
-{
- return fp | MAKE_64BIT_MASK(32, 32);
-}
-
-/* Convert loongarch rounding mode in fcsr0 to IEEE library */
-static const FloatRoundMode ieee_rm[4] = {
- float_round_nearest_even,
- float_round_to_zero,
- float_round_up,
- float_round_down
-};
-
-void restore_fp_status(CPULoongArchState *env)
-{
- set_float_rounding_mode(ieee_rm[(env->fcsr0 >> FCSR0_RM) & 0x3],
- &env->fp_status);
- set_flush_to_zero(0, &env->fp_status);
-}
-
-int ieee_ex_to_loongarch(int xcpt)
-{
- int ret = 0;
- if (xcpt & float_flag_invalid) {
- ret |= FP_INVALID;
- }
- if (xcpt & float_flag_overflow) {
- ret |= FP_OVERFLOW;
- }
- if (xcpt & float_flag_underflow) {
- ret |= FP_UNDERFLOW;
- }
- if (xcpt & float_flag_divbyzero) {
- ret |= FP_DIV0;
- }
- if (xcpt & float_flag_inexact) {
- ret |= FP_INEXACT;
- }
- return ret;
-}
-
-static void update_fcsr0_mask(CPULoongArchState *env, uintptr_t pc, int mask)
-{
- int flags = get_float_exception_flags(&env->fp_status);
-
- set_float_exception_flags(0, &env->fp_status);
-
- flags &= ~mask;
-
- if (!flags) {
- SET_FP_CAUSE(env->fcsr0, flags);
- return;
- } else {
- flags = ieee_ex_to_loongarch(flags);
- SET_FP_CAUSE(env->fcsr0, flags);
- }
-
- if (GET_FP_ENABLES(env->fcsr0) & flags) {
- do_raise_exception(env, EXCCODE_FPE, pc);
- } else {
- UPDATE_FP_FLAGS(env->fcsr0, flags);
- }
-}
-
-static void update_fcsr0(CPULoongArchState *env, uintptr_t pc)
-{
- update_fcsr0_mask(env, pc, 0);
-}
-
-uint64_t helper_fadd_s(CPULoongArchState *env, uint64_t fj, uint64_t fk)
-{
- uint64_t fd;
-
- fd = nanbox_s(float32_add((uint32_t)fj, (uint32_t)fk, &env->fp_status));
- update_fcsr0(env, GETPC());
- return fd;
-}
-
-uint64_t helper_fadd_d(CPULoongArchState *env, uint64_t fj, uint64_t fk)
-{
- uint64_t fd;
-
- fd = float64_add(fj, fk, &env->fp_status);
- update_fcsr0(env, GETPC());
- return fd;
-}
-
-uint64_t helper_fsub_s(CPULoongArchState *env, uint64_t fj, uint64_t fk)
-{
- uint64_t fd;
-
- fd = nanbox_s(float32_sub((uint32_t)fj, (uint32_t)fk, &env->fp_status));
- update_fcsr0(env, GETPC());
- return fd;
-}
-
-uint64_t helper_fsub_d(CPULoongArchState *env, uint64_t fj, uint64_t fk)
-{
- uint64_t fd;
-
- fd = float64_sub(fj, fk, &env->fp_status);
- update_fcsr0(env, GETPC());
- return fd;
-}
-
-uint64_t helper_fmul_s(CPULoongArchState *env, uint64_t fj, uint64_t fk)
-{
- uint64_t fd;
-
- fd = nanbox_s(float32_mul((uint32_t)fj, (uint32_t)fk, &env->fp_status));
- update_fcsr0(env, GETPC());
- return fd;
-}
-
-uint64_t helper_fmul_d(CPULoongArchState *env, uint64_t fj, uint64_t fk)
-{
- uint64_t fd;
-
- fd = float64_mul(fj, fk, &env->fp_status);
- update_fcsr0(env, GETPC());
- return fd;
-}
-
-uint64_t helper_fdiv_s(CPULoongArchState *env, uint64_t fj, uint64_t fk)
-{
- uint64_t fd;
-
- fd = nanbox_s(float32_div((uint32_t)fj, (uint32_t)fk, &env->fp_status));
- update_fcsr0(env, GETPC());
- return fd;
-}
-
-uint64_t helper_fdiv_d(CPULoongArchState *env, uint64_t fj, uint64_t fk)
-{
- uint64_t fd;
-
- fd = float64_div(fj, fk, &env->fp_status);
- update_fcsr0(env, GETPC());
- return fd;
-}
-
-uint64_t helper_fmax_s(CPULoongArchState *env, uint64_t fj, uint64_t fk)
-{
- uint64_t fd;
-
- fd = nanbox_s(float32_maxnum((uint32_t)fj, (uint32_t)fk, &env->fp_status));
- update_fcsr0(env, GETPC());
- return fd;
-}
-
-uint64_t helper_fmax_d(CPULoongArchState *env, uint64_t fj, uint64_t fk)
-{
- uint64_t fd;
-
- fd = float64_maxnum(fj, fk, &env->fp_status);
- update_fcsr0(env, GETPC());
- return fd;
-}
-
-uint64_t helper_fmin_s(CPULoongArchState *env, uint64_t fj, uint64_t fk)
-{
- uint64_t fd;
-
- fd = nanbox_s(float32_minnum((uint32_t)fj, (uint32_t)fk, &env->fp_status));
- update_fcsr0(env, GETPC());
- return fd;
-}
-
-uint64_t helper_fmin_d(CPULoongArchState *env, uint64_t fj, uint64_t fk)
-{
- uint64_t fd;
-
- fd = float64_minnum(fj, fk, &env->fp_status);
- update_fcsr0(env, GETPC());
- return fd;
-}
-
-uint64_t helper_fmaxa_s(CPULoongArchState *env, uint64_t fj, uint64_t fk)
-{
- uint64_t fd;
-
- fd = nanbox_s(float32_maxnummag((uint32_t)fj,
- (uint32_t)fk, &env->fp_status));
- update_fcsr0(env, GETPC());
- return fd;
-}
-
-uint64_t helper_fmaxa_d(CPULoongArchState *env, uint64_t fj, uint64_t fk)
-{
- uint64_t fd;
-
- fd = float64_maxnummag(fj, fk, &env->fp_status);
- update_fcsr0(env, GETPC());
- return fd;
-}
-
-uint64_t helper_fmina_s(CPULoongArchState *env, uint64_t fj, uint64_t fk)
-{
- uint64_t fd;
-
- fd = nanbox_s(float32_minnummag((uint32_t)fj,
- (uint32_t)fk, &env->fp_status));
- update_fcsr0(env, GETPC());
- return fd;
-}
-
-uint64_t helper_fmina_d(CPULoongArchState *env, uint64_t fj, uint64_t fk)
-{
- uint64_t fd;
-
- fd = float64_minnummag(fj, fk, &env->fp_status);
- update_fcsr0(env, GETPC());
- return fd;
-}
-
-uint64_t helper_fscaleb_s(CPULoongArchState *env, uint64_t fj, uint64_t fk)
-{
- uint64_t fd;
- int32_t n = (int32_t)fk;
-
- fd = nanbox_s(float32_scalbn((uint32_t)fj,
- n > 0x200 ? 0x200 :
- n < -0x200 ? -0x200 : n,
- &env->fp_status));
- update_fcsr0(env, GETPC());
- return fd;
-}
-
-uint64_t helper_fscaleb_d(CPULoongArchState *env, uint64_t fj, uint64_t fk)
-{
- uint64_t fd;
- int64_t n = (int64_t)fk;
-
- fd = float64_scalbn(fj,
- n > 0x1000 ? 0x1000 :
- n < -0x1000 ? -0x1000 : n,
- &env->fp_status);
- update_fcsr0(env, GETPC());
- return fd;
-}
-
-uint64_t helper_fsqrt_s(CPULoongArchState *env, uint64_t fj)
-{
- uint64_t fd;
-
- fd = nanbox_s(float32_sqrt((uint32_t)fj, &env->fp_status));
- update_fcsr0(env, GETPC());
- return fd;
-}
-
-uint64_t helper_fsqrt_d(CPULoongArchState *env, uint64_t fj)
-{
- uint64_t fd;
-
- fd = float64_sqrt(fj, &env->fp_status);
- update_fcsr0(env, GETPC());
- return fd;
-}
-
-uint64_t helper_frecip_s(CPULoongArchState *env, uint64_t fj)
-{
- uint64_t fd;
-
- fd = nanbox_s(float32_div(float32_one, (uint32_t)fj, &env->fp_status));
- update_fcsr0(env, GETPC());
- return fd;
-}
-
-uint64_t helper_frecip_d(CPULoongArchState *env, uint64_t fj)
-{
- uint64_t fd;
-
- fd = float64_div(float64_one, fj, &env->fp_status);
- update_fcsr0(env, GETPC());
- return fd;
-}
-
-uint64_t helper_frsqrt_s(CPULoongArchState *env, uint64_t fj)
-{
- uint64_t fd;
- uint32_t fp;
-
- fp = float32_sqrt((uint32_t)fj, &env->fp_status);
- fd = nanbox_s(float32_div(float32_one, fp, &env->fp_status));
- update_fcsr0(env, GETPC());
- return fd;
-}
-
-uint64_t helper_frsqrt_d(CPULoongArchState *env, uint64_t fj)
-{
- uint64_t fp, fd;
-
- fp = float64_sqrt(fj, &env->fp_status);
- fd = float64_div(float64_one, fp, &env->fp_status);
- update_fcsr0(env, GETPC());
- return fd;
-}
-
-uint64_t helper_flogb_s(CPULoongArchState *env, uint64_t fj)
-{
- uint64_t fd;
- uint32_t fp;
- float_status *status = &env->fp_status;
- FloatRoundMode old_mode = get_float_rounding_mode(status);
-
- set_float_rounding_mode(float_round_down, status);
- fp = float32_log2((uint32_t)fj, status);
- fd = nanbox_s(float32_round_to_int(fp, status));
- set_float_rounding_mode(old_mode, status);
- update_fcsr0_mask(env, GETPC(), float_flag_inexact);
- return fd;
-}
-
-uint64_t helper_flogb_d(CPULoongArchState *env, uint64_t fj)
-{
- uint64_t fd;
- float_status *status = &env->fp_status;
- FloatRoundMode old_mode = get_float_rounding_mode(status);
-
- set_float_rounding_mode(float_round_down, status);
- fd = float64_log2(fj, status);
- fd = float64_round_to_int(fd, status);
- set_float_rounding_mode(old_mode, status);
- update_fcsr0_mask(env, GETPC(), float_flag_inexact);
- return fd;
-}
-
-uint64_t helper_fclass_s(CPULoongArchState *env, uint64_t fj)
-{
- float32 f = fj;
- bool sign = float32_is_neg(f);
-
- if (float32_is_infinity(f)) {
- return sign ? 1 << 2 : 1 << 6;
- } else if (float32_is_zero(f)) {
- return sign ? 1 << 5 : 1 << 9;
- } else if (float32_is_zero_or_denormal(f)) {
- return sign ? 1 << 4 : 1 << 8;
- } else if (float32_is_any_nan(f)) {
- float_status s = { }; /* for snan_bit_is_one */
- return float32_is_quiet_nan(f, &s) ? 1 << 1 : 1 << 0;
- } else {
- return sign ? 1 << 3 : 1 << 7;
- }
-}
-
-uint64_t helper_fclass_d(CPULoongArchState *env, uint64_t fj)
-{
- float64 f = fj;
- bool sign = float64_is_neg(f);
-
- if (float64_is_infinity(f)) {
- return sign ? 1 << 2 : 1 << 6;
- } else if (float64_is_zero(f)) {
- return sign ? 1 << 5 : 1 << 9;
- } else if (float64_is_zero_or_denormal(f)) {
- return sign ? 1 << 4 : 1 << 8;
- } else if (float64_is_any_nan(f)) {
- float_status s = { }; /* for snan_bit_is_one */
- return float64_is_quiet_nan(f, &s) ? 1 << 1 : 1 << 0;
- } else {
- return sign ? 1 << 3 : 1 << 7;
- }
-}
-
-uint64_t helper_fmuladd_s(CPULoongArchState *env, uint64_t fj,
- uint64_t fk, uint64_t fa, uint32_t flag)
-{
- uint64_t fd;
-
- fd = nanbox_s(float32_muladd((uint32_t)fj, (uint32_t)fk,
- (uint32_t)fa, flag, &env->fp_status));
- update_fcsr0(env, GETPC());
- return fd;
-}
-
-uint64_t helper_fmuladd_d(CPULoongArchState *env, uint64_t fj,
- uint64_t fk, uint64_t fa, uint32_t flag)
-{
- uint64_t fd;
-
- fd = float64_muladd(fj, fk, fa, flag, &env->fp_status);
- update_fcsr0(env, GETPC());
- return fd;
-}
-
-static uint64_t fcmp_common(CPULoongArchState *env, FloatRelation cmp,
- uint32_t flags)
-{
- bool ret;
-
- switch (cmp) {
- case float_relation_less:
- ret = (flags & FCMP_LT);
- break;
- case float_relation_equal:
- ret = (flags & FCMP_EQ);
- break;
- case float_relation_greater:
- ret = (flags & FCMP_GT);
- break;
- case float_relation_unordered:
- ret = (flags & FCMP_UN);
- break;
- default:
- g_assert_not_reached();
- }
- update_fcsr0(env, GETPC());
-
- return ret;
-}
-
-/* fcmp_cXXX_s */
-uint64_t helper_fcmp_c_s(CPULoongArchState *env, uint64_t fj,
- uint64_t fk, uint32_t flags)
-{
- FloatRelation cmp = float32_compare_quiet((uint32_t)fj,
- (uint32_t)fk, &env->fp_status);
- return fcmp_common(env, cmp, flags);
-}
-
-/* fcmp_sXXX_s */
-uint64_t helper_fcmp_s_s(CPULoongArchState *env, uint64_t fj,
- uint64_t fk, uint32_t flags)
-{
- FloatRelation cmp = float32_compare((uint32_t)fj,
- (uint32_t)fk, &env->fp_status);
- return fcmp_common(env, cmp, flags);
-}
-
-/* fcmp_cXXX_d */
-uint64_t helper_fcmp_c_d(CPULoongArchState *env, uint64_t fj,
- uint64_t fk, uint32_t flags)
-{
- FloatRelation cmp = float64_compare_quiet(fj, fk, &env->fp_status);
- return fcmp_common(env, cmp, flags);
-}
-
-/* fcmp_sXXX_d */
-uint64_t helper_fcmp_s_d(CPULoongArchState *env, uint64_t fj,
- uint64_t fk, uint32_t flags)
-{
- FloatRelation cmp = float64_compare(fj, fk, &env->fp_status);
- return fcmp_common(env, cmp, flags);
-}
-
-/* floating point conversion */
-uint64_t helper_fcvt_s_d(CPULoongArchState *env, uint64_t fj)
-{
- uint64_t fd;
-
- fd = nanbox_s(float64_to_float32(fj, &env->fp_status));
- update_fcsr0(env, GETPC());
- return fd;
-}
-
-uint64_t helper_fcvt_d_s(CPULoongArchState *env, uint64_t fj)
-{
- uint64_t fd;
-
- fd = float32_to_float64((uint32_t)fj, &env->fp_status);
- update_fcsr0(env, GETPC());
- return fd;
-}
-
-uint64_t helper_ffint_s_w(CPULoongArchState *env, uint64_t fj)
-{
- uint64_t fd;
-
- fd = nanbox_s(int32_to_float32((int32_t)fj, &env->fp_status));
- update_fcsr0(env, GETPC());
- return fd;
-}
-
-uint64_t helper_ffint_s_l(CPULoongArchState *env, uint64_t fj)
-{
- uint64_t fd;
-
- fd = nanbox_s(int64_to_float32(fj, &env->fp_status));
- update_fcsr0(env, GETPC());
- return fd;
-}
-
-uint64_t helper_ffint_d_w(CPULoongArchState *env, uint64_t fj)
-{
- uint64_t fd;
-
- fd = int32_to_float64((int32_t)fj, &env->fp_status);
- update_fcsr0(env, GETPC());
- return fd;
-}
-
-uint64_t helper_ffint_d_l(CPULoongArchState *env, uint64_t fj)
-{
- uint64_t fd;
-
- fd = int64_to_float64(fj, &env->fp_status);
- update_fcsr0(env, GETPC());
- return fd;
-}
-
-uint64_t helper_frint_s(CPULoongArchState *env, uint64_t fj)
-{
- uint64_t fd;
-
- fd = (uint64_t)(float32_round_to_int((uint32_t)fj, &env->fp_status));
- update_fcsr0(env, GETPC());
- return fd;
-}
-
-uint64_t helper_frint_d(CPULoongArchState *env, uint64_t fj)
-{
- uint64_t fd;
-
- fd = float64_round_to_int(fj, &env->fp_status);
- update_fcsr0(env, GETPC());
- return fd;
-}
-
-uint64_t helper_ftintrm_l_d(CPULoongArchState *env, uint64_t fj)
-{
- uint64_t fd;
- FloatRoundMode old_mode = get_float_rounding_mode(&env->fp_status);
-
- set_float_rounding_mode(float_round_down, &env->fp_status);
- fd = float64_to_int64(fj, &env->fp_status);
- set_float_rounding_mode(old_mode, &env->fp_status);
-
- if (get_float_exception_flags(&env->fp_status) & (float_flag_invalid)) {
- if (float64_is_any_nan(fj)) {
- fd = 0;
- }
- }
- update_fcsr0(env, GETPC());
- return fd;
-}
-
-uint64_t helper_ftintrm_l_s(CPULoongArchState *env, uint64_t fj)
-{
- uint64_t fd;
- FloatRoundMode old_mode = get_float_rounding_mode(&env->fp_status);
-
- set_float_rounding_mode(float_round_down, &env->fp_status);
- fd = float32_to_int64((uint32_t)fj, &env->fp_status);
- set_float_rounding_mode(old_mode, &env->fp_status);
-
- if (get_float_exception_flags(&env->fp_status) & (float_flag_invalid)) {
- if (float32_is_any_nan((uint32_t)fj)) {
- fd = 0;
- }
- }
- update_fcsr0(env, GETPC());
- return fd;
-}
-
-uint64_t helper_ftintrm_w_d(CPULoongArchState *env, uint64_t fj)
-{
- uint64_t fd;
- FloatRoundMode old_mode = get_float_rounding_mode(&env->fp_status);
-
- set_float_rounding_mode(float_round_down, &env->fp_status);
- fd = (uint64_t)float64_to_int32(fj, &env->fp_status);
- set_float_rounding_mode(old_mode, &env->fp_status);
-
- if (get_float_exception_flags(&env->fp_status) & (float_flag_invalid)) {
- if (float64_is_any_nan(fj)) {
- fd = 0;
- }
- }
- update_fcsr0(env, GETPC());
- return fd;
-}
-
-uint64_t helper_ftintrm_w_s(CPULoongArchState *env, uint64_t fj)
-{
- uint64_t fd;
- FloatRoundMode old_mode = get_float_rounding_mode(&env->fp_status);
-
- set_float_rounding_mode(float_round_down, &env->fp_status);
- fd = (uint64_t)float32_to_int32((uint32_t)fj, &env->fp_status);
- set_float_rounding_mode(old_mode, &env->fp_status);
-
- if (get_float_exception_flags(&env->fp_status) & (float_flag_invalid)) {
- if (float32_is_any_nan((uint32_t)fj)) {
- fd = 0;
- }
- }
- update_fcsr0(env, GETPC());
- return fd;
-}
-
-uint64_t helper_ftintrp_l_d(CPULoongArchState *env, uint64_t fj)
-{
- uint64_t fd;
- FloatRoundMode old_mode = get_float_rounding_mode(&env->fp_status);
-
- set_float_rounding_mode(float_round_up, &env->fp_status);
- fd = float64_to_int64(fj, &env->fp_status);
- set_float_rounding_mode(old_mode, &env->fp_status);
-
- if (get_float_exception_flags(&env->fp_status) & (float_flag_invalid)) {
- if (float64_is_any_nan(fj)) {
- fd = 0;
- }
- }
- update_fcsr0(env, GETPC());
- return fd;
-}
-
-uint64_t helper_ftintrp_l_s(CPULoongArchState *env, uint64_t fj)
-{
- uint64_t fd;
- FloatRoundMode old_mode = get_float_rounding_mode(&env->fp_status);
-
- set_float_rounding_mode(float_round_up, &env->fp_status);
- fd = float32_to_int64((uint32_t)fj, &env->fp_status);
- set_float_rounding_mode(old_mode, &env->fp_status);
-
- if (get_float_exception_flags(&env->fp_status) & (float_flag_invalid)) {
- if (float32_is_any_nan((uint32_t)fj)) {
- fd = 0;
- }
- }
- update_fcsr0(env, GETPC());
- return fd;
-}
-
-uint64_t helper_ftintrp_w_d(CPULoongArchState *env, uint64_t fj)
-{
- uint64_t fd;
- FloatRoundMode old_mode = get_float_rounding_mode(&env->fp_status);
-
- set_float_rounding_mode(float_round_up, &env->fp_status);
- fd = (uint64_t)float64_to_int32(fj, &env->fp_status);
- set_float_rounding_mode(old_mode, &env->fp_status);
-
- if (get_float_exception_flags(&env->fp_status) & (float_flag_invalid)) {
- if (float64_is_any_nan(fj)) {
- fd = 0;
- }
- }
- update_fcsr0(env, GETPC());
- return fd;
-}
-
-uint64_t helper_ftintrp_w_s(CPULoongArchState *env, uint64_t fj)
-{
- uint64_t fd;
- FloatRoundMode old_mode = get_float_rounding_mode(&env->fp_status);
-
- set_float_rounding_mode(float_round_up, &env->fp_status);
- fd = (uint64_t)float32_to_int32((uint32_t)fj, &env->fp_status);
- set_float_rounding_mode(old_mode, &env->fp_status);
-
- if (get_float_exception_flags(&env->fp_status) & (float_flag_invalid)) {
- if (float32_is_any_nan((uint32_t)fj)) {
- fd = 0;
- }
- }
- update_fcsr0(env, GETPC());
- return fd;
-}
-
-uint64_t helper_ftintrz_l_d(CPULoongArchState *env, uint64_t fj)
-{
- uint64_t fd;
- FloatRoundMode old_mode = get_float_rounding_mode(&env->fp_status);
-
- fd = float64_to_int64_round_to_zero(fj, &env->fp_status);
- set_float_rounding_mode(old_mode, &env->fp_status);
-
- if (get_float_exception_flags(&env->fp_status) & (float_flag_invalid)) {
- if (float64_is_any_nan(fj)) {
- fd = 0;
- }
- }
- update_fcsr0(env, GETPC());
- return fd;
-}
-
-uint64_t helper_ftintrz_l_s(CPULoongArchState *env, uint64_t fj)
-{
- uint64_t fd;
- FloatRoundMode old_mode = get_float_rounding_mode(&env->fp_status);
-
- fd = float32_to_int64_round_to_zero((uint32_t)fj, &env->fp_status);
- set_float_rounding_mode(old_mode, &env->fp_status);
-
- if (get_float_exception_flags(&env->fp_status) & (float_flag_invalid)) {
- if (float32_is_any_nan((uint32_t)fj)) {
- fd = 0;
- }
- }
- update_fcsr0(env, GETPC());
- return fd;
-}
-
-uint64_t helper_ftintrz_w_d(CPULoongArchState *env, uint64_t fj)
-{
- uint64_t fd;
- FloatRoundMode old_mode = get_float_rounding_mode(&env->fp_status);
-
- fd = (uint64_t)float64_to_int32_round_to_zero(fj, &env->fp_status);
- set_float_rounding_mode(old_mode, &env->fp_status);
-
- if (get_float_exception_flags(&env->fp_status) & (float_flag_invalid)) {
- if (float64_is_any_nan(fj)) {
- fd = 0;
- }
- }
- update_fcsr0(env, GETPC());
- return fd;
-}
-
-uint64_t helper_ftintrz_w_s(CPULoongArchState *env, uint64_t fj)
-{
- uint32_t fd;
- FloatRoundMode old_mode = get_float_rounding_mode(&env->fp_status);
-
- fd = float32_to_int32_round_to_zero((uint32_t)fj, &env->fp_status);
- set_float_rounding_mode(old_mode, &env->fp_status);
-
- if (get_float_exception_flags(&env->fp_status) & (float_flag_invalid)) {
- if (float32_is_any_nan((uint32_t)fj)) {
- fd = 0;
- }
- }
- update_fcsr0(env, GETPC());
- return (uint64_t)fd;
-}
-
-uint64_t helper_ftintrne_l_d(CPULoongArchState *env, uint64_t fj)
-{
- uint64_t fd;
- FloatRoundMode old_mode = get_float_rounding_mode(&env->fp_status);
-
- set_float_rounding_mode(float_round_nearest_even, &env->fp_status);
- fd = float64_to_int64(fj, &env->fp_status);
- set_float_rounding_mode(old_mode, &env->fp_status);
-
- if (get_float_exception_flags(&env->fp_status) & (float_flag_invalid)) {
- if (float64_is_any_nan(fj)) {
- fd = 0;
- }
- }
- update_fcsr0(env, GETPC());
- return fd;
-}
-
-uint64_t helper_ftintrne_l_s(CPULoongArchState *env, uint64_t fj)
-{
- uint64_t fd;
- FloatRoundMode old_mode = get_float_rounding_mode(&env->fp_status);
-
- set_float_rounding_mode(float_round_nearest_even, &env->fp_status);
- fd = float32_to_int64((uint32_t)fj, &env->fp_status);
- set_float_rounding_mode(old_mode, &env->fp_status);
-
- if (get_float_exception_flags(&env->fp_status) & (float_flag_invalid)) {
- if (float32_is_any_nan((uint32_t)fj)) {
- fd = 0;
- }
- }
- update_fcsr0(env, GETPC());
- return fd;
-}
-
-uint64_t helper_ftintrne_w_d(CPULoongArchState *env, uint64_t fj)
-{
- uint64_t fd;
- FloatRoundMode old_mode = get_float_rounding_mode(&env->fp_status);
-
- set_float_rounding_mode(float_round_nearest_even, &env->fp_status);
- fd = (uint64_t)float64_to_int32(fj, &env->fp_status);
- set_float_rounding_mode(old_mode, &env->fp_status);
-
- if (get_float_exception_flags(&env->fp_status) & (float_flag_invalid)) {
- if (float64_is_any_nan(fj)) {
- fd = 0;
- }
- }
- update_fcsr0(env, GETPC());
- return fd;
-}
-
-uint64_t helper_ftintrne_w_s(CPULoongArchState *env, uint64_t fj)
-{
- uint32_t fd;
- FloatRoundMode old_mode = get_float_rounding_mode(&env->fp_status);
-
- set_float_rounding_mode(float_round_nearest_even, &env->fp_status);
- fd = float32_to_int32((uint32_t)fj, &env->fp_status);
- set_float_rounding_mode(old_mode, &env->fp_status);
-
- if (get_float_exception_flags(&env->fp_status) & (float_flag_invalid)) {
- if (float32_is_any_nan((uint32_t)fj)) {
- fd = 0;
- }
- }
- update_fcsr0(env, GETPC());
- return (uint64_t)fd;
-}
-
-uint64_t helper_ftint_l_d(CPULoongArchState *env, uint64_t fj)
-{
- uint64_t fd;
-
- fd = float64_to_int64(fj, &env->fp_status);
- if (get_float_exception_flags(&env->fp_status) & (float_flag_invalid)) {
- if (float64_is_any_nan(fj)) {
- fd = 0;
- }
- }
- update_fcsr0(env, GETPC());
- return fd;
-}
-
-uint64_t helper_ftint_l_s(CPULoongArchState *env, uint64_t fj)
-{
- uint64_t fd;
-
- fd = float32_to_int64((uint32_t)fj, &env->fp_status);
- if (get_float_exception_flags(&env->fp_status) & (float_flag_invalid)) {
- if (float32_is_any_nan((uint32_t)fj)) {
- fd = 0;
- }
- }
- update_fcsr0(env, GETPC());
- return fd;
-}
-
-uint64_t helper_ftint_w_s(CPULoongArchState *env, uint64_t fj)
-{
- uint64_t fd;
-
- fd = (uint64_t)float32_to_int32((uint32_t)fj, &env->fp_status);
- if (get_float_exception_flags(&env->fp_status) & (float_flag_invalid)) {
- if (float32_is_any_nan((uint32_t)fj)) {
- fd = 0;
- }
- }
- update_fcsr0(env, GETPC());
- return fd;
-}
-
-uint64_t helper_ftint_w_d(CPULoongArchState *env, uint64_t fj)
-{
- uint64_t fd;
-
- fd = (uint64_t)float64_to_int32(fj, &env->fp_status);
- if (get_float_exception_flags(&env->fp_status) & (float_flag_invalid)) {
- if (float64_is_any_nan(fj)) {
- fd = 0;
- }
- }
- update_fcsr0(env, GETPC());
- return fd;
-}
-
-void helper_set_rounding_mode(CPULoongArchState *env)
-{
- set_float_rounding_mode(ieee_rm[(env->fcsr0 >> FCSR0_RM) & 0x3],
- &env->fp_status);
-}
+++ /dev/null
-/* SPDX-License-Identifier: GPL-2.0-or-later */
-/*
- * Copyright (c) 2021 Loongson Technology Corporation Limited
- */
-
-static bool gen_rrr(DisasContext *ctx, arg_rrr *a,
- DisasExtend src1_ext, DisasExtend src2_ext,
- DisasExtend dst_ext, void (*func)(TCGv, TCGv, TCGv))
-{
- TCGv dest = gpr_dst(ctx, a->rd, dst_ext);
- TCGv src1 = gpr_src(ctx, a->rj, src1_ext);
- TCGv src2 = gpr_src(ctx, a->rk, src2_ext);
-
- func(dest, src1, src2);
- gen_set_gpr(a->rd, dest, dst_ext);
-
- return true;
-}
-
-static bool gen_rri_v(DisasContext *ctx, arg_rr_i *a,
- DisasExtend src_ext, DisasExtend dst_ext,
- void (*func)(TCGv, TCGv, TCGv))
-{
- TCGv dest = gpr_dst(ctx, a->rd, dst_ext);
- TCGv src1 = gpr_src(ctx, a->rj, src_ext);
- TCGv src2 = tcg_constant_tl(a->imm);
-
- func(dest, src1, src2);
- gen_set_gpr(a->rd, dest, dst_ext);
-
- return true;
-}
-
-static bool gen_rri_c(DisasContext *ctx, arg_rr_i *a,
- DisasExtend src_ext, DisasExtend dst_ext,
- void (*func)(TCGv, TCGv, target_long))
-{
- TCGv dest = gpr_dst(ctx, a->rd, dst_ext);
- TCGv src1 = gpr_src(ctx, a->rj, src_ext);
-
- func(dest, src1, a->imm);
- gen_set_gpr(a->rd, dest, dst_ext);
-
- return true;
-}
-
-static bool gen_rrr_sa(DisasContext *ctx, arg_rrr_sa *a,
- DisasExtend src_ext, DisasExtend dst_ext,
- void (*func)(TCGv, TCGv, TCGv, target_long))
-{
- TCGv dest = gpr_dst(ctx, a->rd, dst_ext);
- TCGv src1 = gpr_src(ctx, a->rj, src_ext);
- TCGv src2 = gpr_src(ctx, a->rk, src_ext);
-
- func(dest, src1, src2, a->sa);
- gen_set_gpr(a->rd, dest, dst_ext);
-
- return true;
-}
-
-static bool trans_lu12i_w(DisasContext *ctx, arg_lu12i_w *a)
-{
- TCGv dest = gpr_dst(ctx, a->rd, EXT_NONE);
-
- tcg_gen_movi_tl(dest, a->imm << 12);
- gen_set_gpr(a->rd, dest, EXT_NONE);
-
- return true;
-}
-
-static bool gen_pc(DisasContext *ctx, arg_r_i *a,
- target_ulong (*func)(target_ulong, int))
-{
- TCGv dest = gpr_dst(ctx, a->rd, EXT_NONE);
- target_ulong addr = make_address_pc(ctx, func(ctx->base.pc_next, a->imm));
-
- tcg_gen_movi_tl(dest, addr);
- gen_set_gpr(a->rd, dest, EXT_NONE);
-
- return true;
-}
-
-static void gen_slt(TCGv dest, TCGv src1, TCGv src2)
-{
- tcg_gen_setcond_tl(TCG_COND_LT, dest, src1, src2);
-}
-
-static void gen_sltu(TCGv dest, TCGv src1, TCGv src2)
-{
- tcg_gen_setcond_tl(TCG_COND_LTU, dest, src1, src2);
-}
-
-static void gen_mulh_w(TCGv dest, TCGv src1, TCGv src2)
-{
- tcg_gen_mul_i64(dest, src1, src2);
- tcg_gen_sari_i64(dest, dest, 32);
-}
-
-static void gen_mulh_d(TCGv dest, TCGv src1, TCGv src2)
-{
- TCGv discard = tcg_temp_new();
- tcg_gen_muls2_tl(discard, dest, src1, src2);
-}
-
-static void gen_mulh_du(TCGv dest, TCGv src1, TCGv src2)
-{
- TCGv discard = tcg_temp_new();
- tcg_gen_mulu2_tl(discard, dest, src1, src2);
-}
-
-static void prep_divisor_d(TCGv ret, TCGv src1, TCGv src2)
-{
- TCGv t0 = tcg_temp_new();
- TCGv t1 = tcg_temp_new();
- TCGv zero = tcg_constant_tl(0);
-
- /*
- * If min / -1, set the divisor to 1.
- * This avoids potential host overflow trap and produces min.
- * If x / 0, set the divisor to 1.
- * This avoids potential host overflow trap;
- * the required result is undefined.
- */
- tcg_gen_setcondi_tl(TCG_COND_EQ, ret, src1, INT64_MIN);
- tcg_gen_setcondi_tl(TCG_COND_EQ, t0, src2, -1);
- tcg_gen_setcondi_tl(TCG_COND_EQ, t1, src2, 0);
- tcg_gen_and_tl(ret, ret, t0);
- tcg_gen_or_tl(ret, ret, t1);
- tcg_gen_movcond_tl(TCG_COND_NE, ret, ret, zero, ret, src2);
-}
-
-static void prep_divisor_du(TCGv ret, TCGv src2)
-{
- TCGv zero = tcg_constant_tl(0);
- TCGv one = tcg_constant_tl(1);
-
- /*
- * If x / 0, set the divisor to 1.
- * This avoids potential host overflow trap;
- * the required result is undefined.
- */
- tcg_gen_movcond_tl(TCG_COND_EQ, ret, src2, zero, one, src2);
-}
-
-static void gen_div_d(TCGv dest, TCGv src1, TCGv src2)
-{
- TCGv t0 = tcg_temp_new();
- prep_divisor_d(t0, src1, src2);
- tcg_gen_div_tl(dest, src1, t0);
-}
-
-static void gen_rem_d(TCGv dest, TCGv src1, TCGv src2)
-{
- TCGv t0 = tcg_temp_new();
- prep_divisor_d(t0, src1, src2);
- tcg_gen_rem_tl(dest, src1, t0);
-}
-
-static void gen_div_du(TCGv dest, TCGv src1, TCGv src2)
-{
- TCGv t0 = tcg_temp_new();
- prep_divisor_du(t0, src2);
- tcg_gen_divu_tl(dest, src1, t0);
-}
-
-static void gen_rem_du(TCGv dest, TCGv src1, TCGv src2)
-{
- TCGv t0 = tcg_temp_new();
- prep_divisor_du(t0, src2);
- tcg_gen_remu_tl(dest, src1, t0);
-}
-
-static void gen_div_w(TCGv dest, TCGv src1, TCGv src2)
-{
- TCGv t0 = tcg_temp_new();
- /* We need not check for integer overflow for div_w. */
- prep_divisor_du(t0, src2);
- tcg_gen_div_tl(dest, src1, t0);
-}
-
-static void gen_rem_w(TCGv dest, TCGv src1, TCGv src2)
-{
- TCGv t0 = tcg_temp_new();
- /* We need not check for integer overflow for rem_w. */
- prep_divisor_du(t0, src2);
- tcg_gen_rem_tl(dest, src1, t0);
-}
-
-static void gen_alsl(TCGv dest, TCGv src1, TCGv src2, target_long sa)
-{
- TCGv t0 = tcg_temp_new();
- tcg_gen_shli_tl(t0, src1, sa);
- tcg_gen_add_tl(dest, t0, src2);
-}
-
-static bool trans_lu32i_d(DisasContext *ctx, arg_lu32i_d *a)
-{
- TCGv dest = gpr_dst(ctx, a->rd, EXT_NONE);
- TCGv src1 = gpr_src(ctx, a->rd, EXT_NONE);
- TCGv src2 = tcg_constant_tl(a->imm);
-
- if (!avail_64(ctx)) {
- return false;
- }
-
- tcg_gen_deposit_tl(dest, src1, src2, 32, 32);
- gen_set_gpr(a->rd, dest, EXT_NONE);
-
- return true;
-}
-
-static bool trans_lu52i_d(DisasContext *ctx, arg_lu52i_d *a)
-{
- TCGv dest = gpr_dst(ctx, a->rd, EXT_NONE);
- TCGv src1 = gpr_src(ctx, a->rj, EXT_NONE);
- TCGv src2 = tcg_constant_tl(a->imm);
-
- if (!avail_64(ctx)) {
- return false;
- }
-
- tcg_gen_deposit_tl(dest, src1, src2, 52, 12);
- gen_set_gpr(a->rd, dest, EXT_NONE);
-
- return true;
-}
-
-static target_ulong gen_pcaddi(target_ulong pc, int imm)
-{
- return pc + (imm << 2);
-}
-
-static target_ulong gen_pcalau12i(target_ulong pc, int imm)
-{
- return (pc + (imm << 12)) & ~0xfff;
-}
-
-static target_ulong gen_pcaddu12i(target_ulong pc, int imm)
-{
- return pc + (imm << 12);
-}
-
-static target_ulong gen_pcaddu18i(target_ulong pc, int imm)
-{
- return pc + ((target_ulong)(imm) << 18);
-}
-
-static bool trans_addu16i_d(DisasContext *ctx, arg_addu16i_d *a)
-{
- TCGv dest = gpr_dst(ctx, a->rd, EXT_NONE);
- TCGv src1 = gpr_src(ctx, a->rj, EXT_NONE);
-
- if (!avail_64(ctx)) {
- return false;
- }
-
- tcg_gen_addi_tl(dest, src1, a->imm << 16);
- gen_set_gpr(a->rd, dest, EXT_NONE);
-
- return true;
-}
-
-TRANS(add_w, ALL, gen_rrr, EXT_NONE, EXT_NONE, EXT_SIGN, tcg_gen_add_tl)
-TRANS(add_d, 64, gen_rrr, EXT_NONE, EXT_NONE, EXT_NONE, tcg_gen_add_tl)
-TRANS(sub_w, ALL, gen_rrr, EXT_NONE, EXT_NONE, EXT_SIGN, tcg_gen_sub_tl)
-TRANS(sub_d, 64, gen_rrr, EXT_NONE, EXT_NONE, EXT_NONE, tcg_gen_sub_tl)
-TRANS(and, ALL, gen_rrr, EXT_NONE, EXT_NONE, EXT_NONE, tcg_gen_and_tl)
-TRANS(or, ALL, gen_rrr, EXT_NONE, EXT_NONE, EXT_NONE, tcg_gen_or_tl)
-TRANS(xor, ALL, gen_rrr, EXT_NONE, EXT_NONE, EXT_NONE, tcg_gen_xor_tl)
-TRANS(nor, ALL, gen_rrr, EXT_NONE, EXT_NONE, EXT_NONE, tcg_gen_nor_tl)
-TRANS(andn, ALL, gen_rrr, EXT_NONE, EXT_NONE, EXT_NONE, tcg_gen_andc_tl)
-TRANS(orn, ALL, gen_rrr, EXT_NONE, EXT_NONE, EXT_NONE, tcg_gen_orc_tl)
-TRANS(slt, ALL, gen_rrr, EXT_NONE, EXT_NONE, EXT_NONE, gen_slt)
-TRANS(sltu, ALL, gen_rrr, EXT_NONE, EXT_NONE, EXT_NONE, gen_sltu)
-TRANS(mul_w, ALL, gen_rrr, EXT_SIGN, EXT_SIGN, EXT_SIGN, tcg_gen_mul_tl)
-TRANS(mul_d, 64, gen_rrr, EXT_NONE, EXT_NONE, EXT_NONE, tcg_gen_mul_tl)
-TRANS(mulh_w, ALL, gen_rrr, EXT_SIGN, EXT_SIGN, EXT_NONE, gen_mulh_w)
-TRANS(mulh_wu, ALL, gen_rrr, EXT_ZERO, EXT_ZERO, EXT_NONE, gen_mulh_w)
-TRANS(mulh_d, 64, gen_rrr, EXT_NONE, EXT_NONE, EXT_NONE, gen_mulh_d)
-TRANS(mulh_du, 64, gen_rrr, EXT_NONE, EXT_NONE, EXT_NONE, gen_mulh_du)
-TRANS(mulw_d_w, 64, gen_rrr, EXT_SIGN, EXT_SIGN, EXT_NONE, tcg_gen_mul_tl)
-TRANS(mulw_d_wu, 64, gen_rrr, EXT_ZERO, EXT_ZERO, EXT_NONE, tcg_gen_mul_tl)
-TRANS(div_w, ALL, gen_rrr, EXT_SIGN, EXT_SIGN, EXT_SIGN, gen_div_w)
-TRANS(mod_w, ALL, gen_rrr, EXT_SIGN, EXT_SIGN, EXT_SIGN, gen_rem_w)
-TRANS(div_wu, ALL, gen_rrr, EXT_ZERO, EXT_ZERO, EXT_SIGN, gen_div_du)
-TRANS(mod_wu, ALL, gen_rrr, EXT_ZERO, EXT_ZERO, EXT_SIGN, gen_rem_du)
-TRANS(div_d, 64, gen_rrr, EXT_NONE, EXT_NONE, EXT_NONE, gen_div_d)
-TRANS(mod_d, 64, gen_rrr, EXT_NONE, EXT_NONE, EXT_NONE, gen_rem_d)
-TRANS(div_du, 64, gen_rrr, EXT_NONE, EXT_NONE, EXT_NONE, gen_div_du)
-TRANS(mod_du, 64, gen_rrr, EXT_NONE, EXT_NONE, EXT_NONE, gen_rem_du)
-TRANS(slti, ALL, gen_rri_v, EXT_NONE, EXT_NONE, gen_slt)
-TRANS(sltui, ALL, gen_rri_v, EXT_NONE, EXT_NONE, gen_sltu)
-TRANS(addi_w, ALL, gen_rri_c, EXT_NONE, EXT_SIGN, tcg_gen_addi_tl)
-TRANS(addi_d, 64, gen_rri_c, EXT_NONE, EXT_NONE, tcg_gen_addi_tl)
-TRANS(alsl_w, ALL, gen_rrr_sa, EXT_NONE, EXT_SIGN, gen_alsl)
-TRANS(alsl_wu, 64, gen_rrr_sa, EXT_NONE, EXT_ZERO, gen_alsl)
-TRANS(alsl_d, 64, gen_rrr_sa, EXT_NONE, EXT_NONE, gen_alsl)
-TRANS(pcaddi, ALL, gen_pc, gen_pcaddi)
-TRANS(pcalau12i, ALL, gen_pc, gen_pcalau12i)
-TRANS(pcaddu12i, ALL, gen_pc, gen_pcaddu12i)
-TRANS(pcaddu18i, 64, gen_pc, gen_pcaddu18i)
-TRANS(andi, ALL, gen_rri_c, EXT_NONE, EXT_NONE, tcg_gen_andi_tl)
-TRANS(ori, ALL, gen_rri_c, EXT_NONE, EXT_NONE, tcg_gen_ori_tl)
-TRANS(xori, ALL, gen_rri_c, EXT_NONE, EXT_NONE, tcg_gen_xori_tl)
+++ /dev/null
-/* SPDX-License-Identifier: GPL-2.0-or-later */
-/*
- * Copyright (c) 2021 Loongson Technology Corporation Limited
- */
-
-static bool gen_ll(DisasContext *ctx, arg_rr_i *a, MemOp mop)
-{
- TCGv dest = gpr_dst(ctx, a->rd, EXT_NONE);
- TCGv src1 = gpr_src(ctx, a->rj, EXT_NONE);
- TCGv t0 = make_address_i(ctx, src1, a->imm);
-
- tcg_gen_qemu_ld_i64(dest, t0, ctx->mem_idx, mop);
- tcg_gen_st_tl(t0, tcg_env, offsetof(CPULoongArchState, lladdr));
- tcg_gen_st_tl(dest, tcg_env, offsetof(CPULoongArchState, llval));
- gen_set_gpr(a->rd, dest, EXT_NONE);
-
- return true;
-}
-
-static bool gen_sc(DisasContext *ctx, arg_rr_i *a, MemOp mop)
-{
- TCGv dest = gpr_dst(ctx, a->rd, EXT_NONE);
- TCGv src1 = gpr_src(ctx, a->rj, EXT_NONE);
- TCGv src2 = gpr_src(ctx, a->rd, EXT_NONE);
- TCGv t0 = tcg_temp_new();
- TCGv val = tcg_temp_new();
-
- TCGLabel *l1 = gen_new_label();
- TCGLabel *done = gen_new_label();
-
- tcg_gen_addi_tl(t0, src1, a->imm);
- tcg_gen_brcond_tl(TCG_COND_EQ, t0, cpu_lladdr, l1);
- tcg_gen_movi_tl(dest, 0);
- tcg_gen_br(done);
-
- gen_set_label(l1);
- tcg_gen_mov_tl(val, src2);
- /* generate cmpxchg */
- tcg_gen_atomic_cmpxchg_tl(t0, cpu_lladdr, cpu_llval,
- val, ctx->mem_idx, mop);
- tcg_gen_setcond_tl(TCG_COND_EQ, dest, t0, cpu_llval);
- gen_set_label(done);
- gen_set_gpr(a->rd, dest, EXT_NONE);
-
- return true;
-}
-
-static bool gen_am(DisasContext *ctx, arg_rrr *a,
- void (*func)(TCGv, TCGv, TCGv, TCGArg, MemOp),
- MemOp mop)
-{
- TCGv dest = gpr_dst(ctx, a->rd, EXT_NONE);
- TCGv addr = gpr_src(ctx, a->rj, EXT_NONE);
- TCGv val = gpr_src(ctx, a->rk, EXT_NONE);
-
- if (a->rd != 0 && (a->rj == a->rd || a->rk == a->rd)) {
- qemu_log_mask(LOG_GUEST_ERROR,
- "Warning: source register overlaps destination register"
- "in atomic insn at pc=0x" TARGET_FMT_lx "\n",
- ctx->base.pc_next - 4);
- return false;
- }
-
- addr = make_address_i(ctx, addr, 0);
-
- func(dest, addr, val, ctx->mem_idx, mop);
- gen_set_gpr(a->rd, dest, EXT_NONE);
-
- return true;
-}
-
-TRANS(ll_w, ALL, gen_ll, MO_TESL)
-TRANS(sc_w, ALL, gen_sc, MO_TESL)
-TRANS(ll_d, 64, gen_ll, MO_TEUQ)
-TRANS(sc_d, 64, gen_sc, MO_TEUQ)
-TRANS(amswap_w, LAM, gen_am, tcg_gen_atomic_xchg_tl, MO_TESL)
-TRANS(amswap_d, LAM, gen_am, tcg_gen_atomic_xchg_tl, MO_TEUQ)
-TRANS(amadd_w, LAM, gen_am, tcg_gen_atomic_fetch_add_tl, MO_TESL)
-TRANS(amadd_d, LAM, gen_am, tcg_gen_atomic_fetch_add_tl, MO_TEUQ)
-TRANS(amand_w, LAM, gen_am, tcg_gen_atomic_fetch_and_tl, MO_TESL)
-TRANS(amand_d, LAM, gen_am, tcg_gen_atomic_fetch_and_tl, MO_TEUQ)
-TRANS(amor_w, LAM, gen_am, tcg_gen_atomic_fetch_or_tl, MO_TESL)
-TRANS(amor_d, LAM, gen_am, tcg_gen_atomic_fetch_or_tl, MO_TEUQ)
-TRANS(amxor_w, LAM, gen_am, tcg_gen_atomic_fetch_xor_tl, MO_TESL)
-TRANS(amxor_d, LAM, gen_am, tcg_gen_atomic_fetch_xor_tl, MO_TEUQ)
-TRANS(ammax_w, LAM, gen_am, tcg_gen_atomic_fetch_smax_tl, MO_TESL)
-TRANS(ammax_d, LAM, gen_am, tcg_gen_atomic_fetch_smax_tl, MO_TEUQ)
-TRANS(ammin_w, LAM, gen_am, tcg_gen_atomic_fetch_smin_tl, MO_TESL)
-TRANS(ammin_d, LAM, gen_am, tcg_gen_atomic_fetch_smin_tl, MO_TEUQ)
-TRANS(ammax_wu, LAM, gen_am, tcg_gen_atomic_fetch_umax_tl, MO_TESL)
-TRANS(ammax_du, LAM, gen_am, tcg_gen_atomic_fetch_umax_tl, MO_TEUQ)
-TRANS(ammin_wu, LAM, gen_am, tcg_gen_atomic_fetch_umin_tl, MO_TESL)
-TRANS(ammin_du, LAM, gen_am, tcg_gen_atomic_fetch_umin_tl, MO_TEUQ)
-TRANS(amswap_db_w, LAM, gen_am, tcg_gen_atomic_xchg_tl, MO_TESL)
-TRANS(amswap_db_d, LAM, gen_am, tcg_gen_atomic_xchg_tl, MO_TEUQ)
-TRANS(amadd_db_w, LAM, gen_am, tcg_gen_atomic_fetch_add_tl, MO_TESL)
-TRANS(amadd_db_d, LAM, gen_am, tcg_gen_atomic_fetch_add_tl, MO_TEUQ)
-TRANS(amand_db_w, LAM, gen_am, tcg_gen_atomic_fetch_and_tl, MO_TESL)
-TRANS(amand_db_d, LAM, gen_am, tcg_gen_atomic_fetch_and_tl, MO_TEUQ)
-TRANS(amor_db_w, LAM, gen_am, tcg_gen_atomic_fetch_or_tl, MO_TESL)
-TRANS(amor_db_d, LAM, gen_am, tcg_gen_atomic_fetch_or_tl, MO_TEUQ)
-TRANS(amxor_db_w, LAM, gen_am, tcg_gen_atomic_fetch_xor_tl, MO_TESL)
-TRANS(amxor_db_d, LAM, gen_am, tcg_gen_atomic_fetch_xor_tl, MO_TEUQ)
-TRANS(ammax_db_w, LAM, gen_am, tcg_gen_atomic_fetch_smax_tl, MO_TESL)
-TRANS(ammax_db_d, LAM, gen_am, tcg_gen_atomic_fetch_smax_tl, MO_TEUQ)
-TRANS(ammin_db_w, LAM, gen_am, tcg_gen_atomic_fetch_smin_tl, MO_TESL)
-TRANS(ammin_db_d, LAM, gen_am, tcg_gen_atomic_fetch_smin_tl, MO_TEUQ)
-TRANS(ammax_db_wu, LAM, gen_am, tcg_gen_atomic_fetch_umax_tl, MO_TESL)
-TRANS(ammax_db_du, LAM, gen_am, tcg_gen_atomic_fetch_umax_tl, MO_TEUQ)
-TRANS(ammin_db_wu, LAM, gen_am, tcg_gen_atomic_fetch_umin_tl, MO_TESL)
-TRANS(ammin_db_du, LAM, gen_am, tcg_gen_atomic_fetch_umin_tl, MO_TEUQ)
+++ /dev/null
-/* SPDX-License-Identifier: GPL-2.0-or-later */
-/*
- * Copyright (c) 2021 Loongson Technology Corporation Limited
- */
-
-static bool gen_rr(DisasContext *ctx, arg_rr *a,
- DisasExtend src_ext, DisasExtend dst_ext,
- void (*func)(TCGv, TCGv))
-{
- TCGv dest = gpr_dst(ctx, a->rd, dst_ext);
- TCGv src1 = gpr_src(ctx, a->rj, src_ext);
-
- func(dest, src1);
- gen_set_gpr(a->rd, dest, dst_ext);
-
- return true;
-}
-
-static void gen_bytepick_w(TCGv dest, TCGv src1, TCGv src2, target_long sa)
-{
- tcg_gen_concat_tl_i64(dest, src1, src2);
- tcg_gen_sextract_i64(dest, dest, (32 - sa * 8), 32);
-}
-
-static void gen_bytepick_d(TCGv dest, TCGv src1, TCGv src2, target_long sa)
-{
- tcg_gen_extract2_i64(dest, src1, src2, (64 - sa * 8));
-}
-
-static bool gen_bstrins(DisasContext *ctx, arg_rr_ms_ls *a,
- DisasExtend dst_ext)
-{
- TCGv src1 = gpr_src(ctx, a->rd, EXT_NONE);
- TCGv src2 = gpr_src(ctx, a->rj, EXT_NONE);
- TCGv dest = gpr_dst(ctx, a->rd, EXT_NONE);
-
- if (a->ls > a->ms) {
- return false;
- }
-
- tcg_gen_deposit_tl(dest, src1, src2, a->ls, a->ms - a->ls + 1);
- gen_set_gpr(a->rd, dest, dst_ext);
- return true;
-}
-
-static bool gen_bstrpick(DisasContext *ctx, arg_rr_ms_ls *a,
- DisasExtend dst_ext)
-{
- TCGv dest = gpr_dst(ctx, a->rd, EXT_NONE);
- TCGv src1 = gpr_src(ctx, a->rj, EXT_NONE);
-
- if (a->ls > a->ms) {
- return false;
- }
-
- tcg_gen_extract_tl(dest, src1, a->ls, a->ms - a->ls + 1);
- gen_set_gpr(a->rd, dest, dst_ext);
- return true;
-}
-
-static void gen_clz_w(TCGv dest, TCGv src1)
-{
- tcg_gen_clzi_tl(dest, src1, TARGET_LONG_BITS);
- tcg_gen_subi_tl(dest, dest, TARGET_LONG_BITS - 32);
-}
-
-static void gen_clo_w(TCGv dest, TCGv src1)
-{
- tcg_gen_not_tl(dest, src1);
- tcg_gen_ext32u_tl(dest, dest);
- gen_clz_w(dest, dest);
-}
-
-static void gen_ctz_w(TCGv dest, TCGv src1)
-{
- tcg_gen_ori_tl(dest, src1, (target_ulong)MAKE_64BIT_MASK(32, 32));
- tcg_gen_ctzi_tl(dest, dest, TARGET_LONG_BITS);
-}
-
-static void gen_cto_w(TCGv dest, TCGv src1)
-{
- tcg_gen_not_tl(dest, src1);
- gen_ctz_w(dest, dest);
-}
-
-static void gen_clz_d(TCGv dest, TCGv src1)
-{
- tcg_gen_clzi_i64(dest, src1, TARGET_LONG_BITS);
-}
-
-static void gen_clo_d(TCGv dest, TCGv src1)
-{
- tcg_gen_not_tl(dest, src1);
- gen_clz_d(dest, dest);
-}
-
-static void gen_ctz_d(TCGv dest, TCGv src1)
-{
- tcg_gen_ctzi_tl(dest, src1, TARGET_LONG_BITS);
-}
-
-static void gen_cto_d(TCGv dest, TCGv src1)
-{
- tcg_gen_not_tl(dest, src1);
- gen_ctz_d(dest, dest);
-}
-
-static void gen_revb_2w(TCGv dest, TCGv src1)
-{
- tcg_gen_bswap64_i64(dest, src1);
- tcg_gen_rotri_i64(dest, dest, 32);
-}
-
-static void gen_revb_2h(TCGv dest, TCGv src1)
-{
- TCGv mask = tcg_constant_tl(0x00FF00FF);
- TCGv t0 = tcg_temp_new();
- TCGv t1 = tcg_temp_new();
-
- tcg_gen_shri_tl(t0, src1, 8);
- tcg_gen_and_tl(t0, t0, mask);
- tcg_gen_and_tl(t1, src1, mask);
- tcg_gen_shli_tl(t1, t1, 8);
- tcg_gen_or_tl(dest, t0, t1);
-}
-
-static void gen_revb_4h(TCGv dest, TCGv src1)
-{
- TCGv mask = tcg_constant_tl(0x00FF00FF00FF00FFULL);
- TCGv t0 = tcg_temp_new();
- TCGv t1 = tcg_temp_new();
-
- tcg_gen_shri_tl(t0, src1, 8);
- tcg_gen_and_tl(t0, t0, mask);
- tcg_gen_and_tl(t1, src1, mask);
- tcg_gen_shli_tl(t1, t1, 8);
- tcg_gen_or_tl(dest, t0, t1);
-}
-
-static void gen_revh_2w(TCGv dest, TCGv src1)
-{
- TCGv_i64 t0 = tcg_temp_new_i64();
- TCGv_i64 t1 = tcg_temp_new_i64();
- TCGv_i64 mask = tcg_constant_i64(0x0000ffff0000ffffull);
-
- tcg_gen_shri_i64(t0, src1, 16);
- tcg_gen_and_i64(t1, src1, mask);
- tcg_gen_and_i64(t0, t0, mask);
- tcg_gen_shli_i64(t1, t1, 16);
- tcg_gen_or_i64(dest, t1, t0);
-}
-
-static void gen_revh_d(TCGv dest, TCGv src1)
-{
- TCGv t0 = tcg_temp_new();
- TCGv t1 = tcg_temp_new();
- TCGv mask = tcg_constant_tl(0x0000FFFF0000FFFFULL);
-
- tcg_gen_shri_tl(t1, src1, 16);
- tcg_gen_and_tl(t1, t1, mask);
- tcg_gen_and_tl(t0, src1, mask);
- tcg_gen_shli_tl(t0, t0, 16);
- tcg_gen_or_tl(t0, t0, t1);
- tcg_gen_rotri_tl(dest, t0, 32);
-}
-
-static void gen_maskeqz(TCGv dest, TCGv src1, TCGv src2)
-{
- TCGv zero = tcg_constant_tl(0);
-
- tcg_gen_movcond_tl(TCG_COND_EQ, dest, src2, zero, zero, src1);
-}
-
-static void gen_masknez(TCGv dest, TCGv src1, TCGv src2)
-{
- TCGv zero = tcg_constant_tl(0);
-
- tcg_gen_movcond_tl(TCG_COND_NE, dest, src2, zero, zero, src1);
-}
-
-TRANS(ext_w_h, ALL, gen_rr, EXT_NONE, EXT_NONE, tcg_gen_ext16s_tl)
-TRANS(ext_w_b, ALL, gen_rr, EXT_NONE, EXT_NONE, tcg_gen_ext8s_tl)
-TRANS(clo_w, ALL, gen_rr, EXT_NONE, EXT_NONE, gen_clo_w)
-TRANS(clz_w, ALL, gen_rr, EXT_ZERO, EXT_NONE, gen_clz_w)
-TRANS(cto_w, ALL, gen_rr, EXT_NONE, EXT_NONE, gen_cto_w)
-TRANS(ctz_w, ALL, gen_rr, EXT_NONE, EXT_NONE, gen_ctz_w)
-TRANS(clo_d, 64, gen_rr, EXT_NONE, EXT_NONE, gen_clo_d)
-TRANS(clz_d, 64, gen_rr, EXT_NONE, EXT_NONE, gen_clz_d)
-TRANS(cto_d, 64, gen_rr, EXT_NONE, EXT_NONE, gen_cto_d)
-TRANS(ctz_d, 64, gen_rr, EXT_NONE, EXT_NONE, gen_ctz_d)
-TRANS(revb_2h, ALL, gen_rr, EXT_NONE, EXT_SIGN, gen_revb_2h)
-TRANS(revb_4h, 64, gen_rr, EXT_NONE, EXT_NONE, gen_revb_4h)
-TRANS(revb_2w, 64, gen_rr, EXT_NONE, EXT_NONE, gen_revb_2w)
-TRANS(revb_d, 64, gen_rr, EXT_NONE, EXT_NONE, tcg_gen_bswap64_i64)
-TRANS(revh_2w, 64, gen_rr, EXT_NONE, EXT_NONE, gen_revh_2w)
-TRANS(revh_d, 64, gen_rr, EXT_NONE, EXT_NONE, gen_revh_d)
-TRANS(bitrev_4b, ALL, gen_rr, EXT_ZERO, EXT_SIGN, gen_helper_bitswap)
-TRANS(bitrev_8b, 64, gen_rr, EXT_NONE, EXT_NONE, gen_helper_bitswap)
-TRANS(bitrev_w, ALL, gen_rr, EXT_NONE, EXT_SIGN, gen_helper_bitrev_w)
-TRANS(bitrev_d, 64, gen_rr, EXT_NONE, EXT_NONE, gen_helper_bitrev_d)
-TRANS(maskeqz, ALL, gen_rrr, EXT_NONE, EXT_NONE, EXT_NONE, gen_maskeqz)
-TRANS(masknez, ALL, gen_rrr, EXT_NONE, EXT_NONE, EXT_NONE, gen_masknez)
-TRANS(bytepick_w, ALL, gen_rrr_sa, EXT_NONE, EXT_NONE, gen_bytepick_w)
-TRANS(bytepick_d, 64, gen_rrr_sa, EXT_NONE, EXT_NONE, gen_bytepick_d)
-TRANS(bstrins_w, ALL, gen_bstrins, EXT_SIGN)
-TRANS(bstrins_d, 64, gen_bstrins, EXT_NONE)
-TRANS(bstrpick_w, ALL, gen_bstrpick, EXT_SIGN)
-TRANS(bstrpick_d, 64, gen_bstrpick, EXT_NONE)
+++ /dev/null
-/* SPDX-License-Identifier: GPL-2.0-or-later */
-/*
- * Copyright (c) 2021 Loongson Technology Corporation Limited
- */
-
-static bool trans_b(DisasContext *ctx, arg_b *a)
-{
- gen_goto_tb(ctx, 0, ctx->base.pc_next + a->offs);
- ctx->base.is_jmp = DISAS_NORETURN;
- return true;
-}
-
-static bool trans_bl(DisasContext *ctx, arg_bl *a)
-{
- tcg_gen_movi_tl(cpu_gpr[1], make_address_pc(ctx, ctx->base.pc_next + 4));
- gen_goto_tb(ctx, 0, ctx->base.pc_next + a->offs);
- ctx->base.is_jmp = DISAS_NORETURN;
- return true;
-}
-
-static bool trans_jirl(DisasContext *ctx, arg_jirl *a)
-{
- TCGv dest = gpr_dst(ctx, a->rd, EXT_NONE);
- TCGv src1 = gpr_src(ctx, a->rj, EXT_NONE);
-
- TCGv addr = make_address_i(ctx, src1, a->imm);
- tcg_gen_mov_tl(cpu_pc, addr);
- tcg_gen_movi_tl(dest, make_address_pc(ctx, ctx->base.pc_next + 4));
- gen_set_gpr(a->rd, dest, EXT_NONE);
- tcg_gen_lookup_and_goto_ptr();
- ctx->base.is_jmp = DISAS_NORETURN;
- return true;
-}
-
-static void gen_bc(DisasContext *ctx, TCGv src1, TCGv src2,
- target_long offs, TCGCond cond)
-{
- TCGLabel *l = gen_new_label();
- tcg_gen_brcond_tl(cond, src1, src2, l);
- gen_goto_tb(ctx, 1, ctx->base.pc_next + 4);
- gen_set_label(l);
- gen_goto_tb(ctx, 0, ctx->base.pc_next + offs);
- ctx->base.is_jmp = DISAS_NORETURN;
-}
-
-static bool gen_rr_bc(DisasContext *ctx, arg_rr_offs *a, TCGCond cond)
-{
- TCGv src1 = gpr_src(ctx, a->rj, EXT_NONE);
- TCGv src2 = gpr_src(ctx, a->rd, EXT_NONE);
-
- gen_bc(ctx, src1, src2, a->offs, cond);
- return true;
-}
-
-static bool gen_rz_bc(DisasContext *ctx, arg_r_offs *a, TCGCond cond)
-{
- TCGv src1 = gpr_src(ctx, a->rj, EXT_NONE);
- TCGv src2 = tcg_constant_tl(0);
-
- gen_bc(ctx, src1, src2, a->offs, cond);
- return true;
-}
-
-static bool gen_cz_bc(DisasContext *ctx, arg_c_offs *a, TCGCond cond)
-{
- TCGv src1 = tcg_temp_new();
- TCGv src2 = tcg_constant_tl(0);
-
- tcg_gen_ld8u_tl(src1, tcg_env,
- offsetof(CPULoongArchState, cf[a->cj]));
- gen_bc(ctx, src1, src2, a->offs, cond);
- return true;
-}
-
-TRANS(beq, ALL, gen_rr_bc, TCG_COND_EQ)
-TRANS(bne, ALL, gen_rr_bc, TCG_COND_NE)
-TRANS(blt, ALL, gen_rr_bc, TCG_COND_LT)
-TRANS(bge, ALL, gen_rr_bc, TCG_COND_GE)
-TRANS(bltu, ALL, gen_rr_bc, TCG_COND_LTU)
-TRANS(bgeu, ALL, gen_rr_bc, TCG_COND_GEU)
-TRANS(beqz, ALL, gen_rz_bc, TCG_COND_EQ)
-TRANS(bnez, ALL, gen_rz_bc, TCG_COND_NE)
-TRANS(bceqz, 64, gen_cz_bc, TCG_COND_EQ)
-TRANS(bcnez, 64, gen_cz_bc, TCG_COND_NE)
+++ /dev/null
-/* SPDX-License-Identifier: GPL-2.0-or-later */
-/*
- * Copyright (c) 2021 Loongson Technology Corporation Limited
- */
-
-static bool trans_break(DisasContext *ctx, arg_break *a)
-{
- generate_exception(ctx, EXCCODE_BRK);
- return true;
-}
-
-static bool trans_syscall(DisasContext *ctx, arg_syscall *a)
-{
- generate_exception(ctx, EXCCODE_SYS);
- return true;
-}
-
-static bool trans_asrtle_d(DisasContext *ctx, arg_asrtle_d * a)
-{
- TCGv src1 = gpr_src(ctx, a->rj, EXT_NONE);
- TCGv src2 = gpr_src(ctx, a->rk, EXT_NONE);
-
- if (!avail_64(ctx)) {
- return false;
- }
-
- gen_helper_asrtle_d(tcg_env, src1, src2);
- return true;
-}
-
-static bool trans_asrtgt_d(DisasContext *ctx, arg_asrtgt_d * a)
-{
- TCGv src1 = gpr_src(ctx, a->rj, EXT_NONE);
- TCGv src2 = gpr_src(ctx, a->rk, EXT_NONE);
-
- if (!avail_64(ctx)) {
- return false;
- }
-
- gen_helper_asrtgt_d(tcg_env, src1, src2);
- return true;
-}
-
-static bool gen_rdtime(DisasContext *ctx, arg_rr *a,
- bool word, bool high)
-{
- TCGv dst1 = gpr_dst(ctx, a->rd, EXT_NONE);
- TCGv dst2 = gpr_dst(ctx, a->rj, EXT_NONE);
-
- translator_io_start(&ctx->base);
- gen_helper_rdtime_d(dst1, tcg_env);
- if (word) {
- tcg_gen_sextract_tl(dst1, dst1, high ? 32 : 0, 32);
- }
- tcg_gen_ld_i64(dst2, tcg_env, offsetof(CPULoongArchState, CSR_TID));
-
- return true;
-}
-
-static bool trans_rdtimel_w(DisasContext *ctx, arg_rdtimel_w *a)
-{
- return gen_rdtime(ctx, a, 1, 0);
-}
-
-static bool trans_rdtimeh_w(DisasContext *ctx, arg_rdtimeh_w *a)
-{
- return gen_rdtime(ctx, a, 1, 1);
-}
-
-static bool trans_rdtime_d(DisasContext *ctx, arg_rdtime_d *a)
-{
- return gen_rdtime(ctx, a, 0, 0);
-}
-
-static bool trans_cpucfg(DisasContext *ctx, arg_cpucfg *a)
-{
- TCGv dest = gpr_dst(ctx, a->rd, EXT_NONE);
- TCGv src1 = gpr_src(ctx, a->rj, EXT_NONE);
-
- gen_helper_cpucfg(dest, tcg_env, src1);
- gen_set_gpr(a->rd, dest, EXT_NONE);
-
- return true;
-}
-
-static bool gen_crc(DisasContext *ctx, arg_rrr *a,
- void (*func)(TCGv, TCGv, TCGv, TCGv),
- TCGv tsz)
-{
- TCGv dest = gpr_dst(ctx, a->rd, EXT_SIGN);
- TCGv src1 = gpr_src(ctx, a->rj, EXT_NONE);
- TCGv src2 = gpr_src(ctx, a->rk, EXT_NONE);
-
- func(dest, src2, src1, tsz);
- gen_set_gpr(a->rd, dest, EXT_SIGN);
-
- return true;
-}
-
-TRANS(crc_w_b_w, 64, gen_crc, gen_helper_crc32, tcg_constant_tl(1))
-TRANS(crc_w_h_w, 64, gen_crc, gen_helper_crc32, tcg_constant_tl(2))
-TRANS(crc_w_w_w, 64, gen_crc, gen_helper_crc32, tcg_constant_tl(4))
-TRANS(crc_w_d_w, 64, gen_crc, gen_helper_crc32, tcg_constant_tl(8))
-TRANS(crcc_w_b_w, 64, gen_crc, gen_helper_crc32c, tcg_constant_tl(1))
-TRANS(crcc_w_h_w, 64, gen_crc, gen_helper_crc32c, tcg_constant_tl(2))
-TRANS(crcc_w_w_w, 64, gen_crc, gen_helper_crc32c, tcg_constant_tl(4))
-TRANS(crcc_w_d_w, 64, gen_crc, gen_helper_crc32c, tcg_constant_tl(8))
+++ /dev/null
-/* SPDX-License-Identifier: GPL-2.0-or-later */
-/*
- * Copyright (c) 2021 Loongson Technology Corporation Limited
- */
-
-#ifndef CONFIG_USER_ONLY
-#define CHECK_FPE do { \
- if ((ctx->base.tb->flags & HW_FLAGS_EUEN_FPE) == 0) { \
- generate_exception(ctx, EXCCODE_FPD); \
- return true; \
- } \
-} while (0)
-#else
-#define CHECK_FPE
-#endif
-
-static bool gen_fff(DisasContext *ctx, arg_fff *a,
- void (*func)(TCGv, TCGv_env, TCGv, TCGv))
-{
- TCGv dest = get_fpr(ctx, a->fd);
- TCGv src1 = get_fpr(ctx, a->fj);
- TCGv src2 = get_fpr(ctx, a->fk);
-
- CHECK_FPE;
-
- func(dest, tcg_env, src1, src2);
- set_fpr(a->fd, dest);
-
- return true;
-}
-
-static bool gen_ff(DisasContext *ctx, arg_ff *a,
- void (*func)(TCGv, TCGv_env, TCGv))
-{
- TCGv dest = get_fpr(ctx, a->fd);
- TCGv src = get_fpr(ctx, a->fj);
-
- CHECK_FPE;
-
- func(dest, tcg_env, src);
- set_fpr(a->fd, dest);
-
- return true;
-}
-
-static bool gen_muladd(DisasContext *ctx, arg_ffff *a,
- void (*func)(TCGv, TCGv_env, TCGv, TCGv, TCGv, TCGv_i32),
- int flag)
-{
- TCGv_i32 tflag = tcg_constant_i32(flag);
- TCGv dest = get_fpr(ctx, a->fd);
- TCGv src1 = get_fpr(ctx, a->fj);
- TCGv src2 = get_fpr(ctx, a->fk);
- TCGv src3 = get_fpr(ctx, a->fa);
-
- CHECK_FPE;
-
- func(dest, tcg_env, src1, src2, src3, tflag);
- set_fpr(a->fd, dest);
-
- return true;
-}
-
-static bool trans_fcopysign_s(DisasContext *ctx, arg_fcopysign_s *a)
-{
- TCGv dest = get_fpr(ctx, a->fd);
- TCGv src1 = get_fpr(ctx, a->fk);
- TCGv src2 = get_fpr(ctx, a->fj);
-
- if (!avail_FP_SP(ctx)) {
- return false;
- }
-
- CHECK_FPE;
-
- tcg_gen_deposit_i64(dest, src1, src2, 0, 31);
- set_fpr(a->fd, dest);
-
- return true;
-}
-
-static bool trans_fcopysign_d(DisasContext *ctx, arg_fcopysign_d *a)
-{
- TCGv dest = get_fpr(ctx, a->fd);
- TCGv src1 = get_fpr(ctx, a->fk);
- TCGv src2 = get_fpr(ctx, a->fj);
-
- if (!avail_FP_DP(ctx)) {
- return false;
- }
-
- CHECK_FPE;
-
- tcg_gen_deposit_i64(dest, src1, src2, 0, 63);
- set_fpr(a->fd, dest);
-
- return true;
-}
-
-static bool trans_fabs_s(DisasContext *ctx, arg_fabs_s *a)
-{
- TCGv dest = get_fpr(ctx, a->fd);
- TCGv src = get_fpr(ctx, a->fj);
-
- if (!avail_FP_SP(ctx)) {
- return false;
- }
-
- CHECK_FPE;
-
- tcg_gen_andi_i64(dest, src, MAKE_64BIT_MASK(0, 31));
- gen_nanbox_s(dest, dest);
- set_fpr(a->fd, dest);
-
- return true;
-}
-
-static bool trans_fabs_d(DisasContext *ctx, arg_fabs_d *a)
-{
- TCGv dest = get_fpr(ctx, a->fd);
- TCGv src = get_fpr(ctx, a->fj);
-
- if (!avail_FP_DP(ctx)) {
- return false;
- }
-
- CHECK_FPE;
-
- tcg_gen_andi_i64(dest, src, MAKE_64BIT_MASK(0, 63));
- set_fpr(a->fd, dest);
-
- return true;
-}
-
-static bool trans_fneg_s(DisasContext *ctx, arg_fneg_s *a)
-{
- TCGv dest = get_fpr(ctx, a->fd);
- TCGv src = get_fpr(ctx, a->fj);
-
- if (!avail_FP_SP(ctx)) {
- return false;
- }
-
- CHECK_FPE;
-
- tcg_gen_xori_i64(dest, src, 0x80000000);
- gen_nanbox_s(dest, dest);
- set_fpr(a->fd, dest);
-
- return true;
-}
-
-static bool trans_fneg_d(DisasContext *ctx, arg_fneg_d *a)
-{
- TCGv dest = get_fpr(ctx, a->fd);
- TCGv src = get_fpr(ctx, a->fj);
-
- if (!avail_FP_DP(ctx)) {
- return false;
- }
-
- CHECK_FPE;
-
- tcg_gen_xori_i64(dest, src, 0x8000000000000000LL);
- set_fpr(a->fd, dest);
-
- return true;
-}
-
-TRANS(fadd_s, FP_SP, gen_fff, gen_helper_fadd_s)
-TRANS(fadd_d, FP_DP, gen_fff, gen_helper_fadd_d)
-TRANS(fsub_s, FP_SP, gen_fff, gen_helper_fsub_s)
-TRANS(fsub_d, FP_DP, gen_fff, gen_helper_fsub_d)
-TRANS(fmul_s, FP_SP, gen_fff, gen_helper_fmul_s)
-TRANS(fmul_d, FP_DP, gen_fff, gen_helper_fmul_d)
-TRANS(fdiv_s, FP_SP, gen_fff, gen_helper_fdiv_s)
-TRANS(fdiv_d, FP_DP, gen_fff, gen_helper_fdiv_d)
-TRANS(fmax_s, FP_SP, gen_fff, gen_helper_fmax_s)
-TRANS(fmax_d, FP_DP, gen_fff, gen_helper_fmax_d)
-TRANS(fmin_s, FP_SP, gen_fff, gen_helper_fmin_s)
-TRANS(fmin_d, FP_DP, gen_fff, gen_helper_fmin_d)
-TRANS(fmaxa_s, FP_SP, gen_fff, gen_helper_fmaxa_s)
-TRANS(fmaxa_d, FP_DP, gen_fff, gen_helper_fmaxa_d)
-TRANS(fmina_s, FP_SP, gen_fff, gen_helper_fmina_s)
-TRANS(fmina_d, FP_DP, gen_fff, gen_helper_fmina_d)
-TRANS(fscaleb_s, FP_SP, gen_fff, gen_helper_fscaleb_s)
-TRANS(fscaleb_d, FP_DP, gen_fff, gen_helper_fscaleb_d)
-TRANS(fsqrt_s, FP_SP, gen_ff, gen_helper_fsqrt_s)
-TRANS(fsqrt_d, FP_DP, gen_ff, gen_helper_fsqrt_d)
-TRANS(frecip_s, FP_SP, gen_ff, gen_helper_frecip_s)
-TRANS(frecip_d, FP_DP, gen_ff, gen_helper_frecip_d)
-TRANS(frsqrt_s, FP_SP, gen_ff, gen_helper_frsqrt_s)
-TRANS(frsqrt_d, FP_DP, gen_ff, gen_helper_frsqrt_d)
-TRANS(flogb_s, FP_SP, gen_ff, gen_helper_flogb_s)
-TRANS(flogb_d, FP_DP, gen_ff, gen_helper_flogb_d)
-TRANS(fclass_s, FP_SP, gen_ff, gen_helper_fclass_s)
-TRANS(fclass_d, FP_DP, gen_ff, gen_helper_fclass_d)
-TRANS(fmadd_s, FP_SP, gen_muladd, gen_helper_fmuladd_s, 0)
-TRANS(fmadd_d, FP_DP, gen_muladd, gen_helper_fmuladd_d, 0)
-TRANS(fmsub_s, FP_SP, gen_muladd, gen_helper_fmuladd_s, float_muladd_negate_c)
-TRANS(fmsub_d, FP_DP, gen_muladd, gen_helper_fmuladd_d, float_muladd_negate_c)
-TRANS(fnmadd_s, FP_SP, gen_muladd, gen_helper_fmuladd_s, float_muladd_negate_result)
-TRANS(fnmadd_d, FP_DP, gen_muladd, gen_helper_fmuladd_d, float_muladd_negate_result)
-TRANS(fnmsub_s, FP_SP, gen_muladd, gen_helper_fmuladd_s,
- float_muladd_negate_c | float_muladd_negate_result)
-TRANS(fnmsub_d, FP_DP, gen_muladd, gen_helper_fmuladd_d,
- float_muladd_negate_c | float_muladd_negate_result)
+++ /dev/null
-/* SPDX-License-Identifier: GPL-2.0-or-later */
-/*
- * Copyright (c) 2021 Loongson Technology Corporation Limited
- */
-
-/* bit0(signaling/quiet) bit1(lt) bit2(eq) bit3(un) bit4(neq) */
-static uint32_t get_fcmp_flags(int cond)
-{
- uint32_t flags = 0;
-
- if (cond & 0x1) {
- flags |= FCMP_LT;
- }
- if (cond & 0x2) {
- flags |= FCMP_EQ;
- }
- if (cond & 0x4) {
- flags |= FCMP_UN;
- }
- if (cond & 0x8) {
- flags |= FCMP_GT | FCMP_LT;
- }
- return flags;
-}
-
-static bool trans_fcmp_cond_s(DisasContext *ctx, arg_fcmp_cond_s *a)
-{
- TCGv var, src1, src2;
- uint32_t flags;
- void (*fn)(TCGv, TCGv_env, TCGv, TCGv, TCGv_i32);
-
- if (!avail_FP_SP(ctx)) {
- return false;
- }
-
- CHECK_FPE;
-
- var = tcg_temp_new();
- src1 = get_fpr(ctx, a->fj);
- src2 = get_fpr(ctx, a->fk);
- fn = (a->fcond & 1 ? gen_helper_fcmp_s_s : gen_helper_fcmp_c_s);
- flags = get_fcmp_flags(a->fcond >> 1);
-
- fn(var, tcg_env, src1, src2, tcg_constant_i32(flags));
-
- tcg_gen_st8_tl(var, tcg_env, offsetof(CPULoongArchState, cf[a->cd]));
- return true;
-}
-
-static bool trans_fcmp_cond_d(DisasContext *ctx, arg_fcmp_cond_d *a)
-{
- TCGv var, src1, src2;
- uint32_t flags;
- void (*fn)(TCGv, TCGv_env, TCGv, TCGv, TCGv_i32);
-
- if (!avail_FP_DP(ctx)) {
- return false;
- }
-
- CHECK_FPE;
-
- var = tcg_temp_new();
- src1 = get_fpr(ctx, a->fj);
- src2 = get_fpr(ctx, a->fk);
- fn = (a->fcond & 1 ? gen_helper_fcmp_s_d : gen_helper_fcmp_c_d);
- flags = get_fcmp_flags(a->fcond >> 1);
-
- fn(var, tcg_env, src1, src2, tcg_constant_i32(flags));
-
- tcg_gen_st8_tl(var, tcg_env, offsetof(CPULoongArchState, cf[a->cd]));
- return true;
-}
+++ /dev/null
-/* SPDX-License-Identifier: GPL-2.0-or-later */
-/*
- * Copyright (c) 2021 Loongson Technology Corporation Limited
- */
-
-TRANS(fcvt_s_d, FP_DP, gen_ff, gen_helper_fcvt_s_d)
-TRANS(fcvt_d_s, FP_DP, gen_ff, gen_helper_fcvt_d_s)
-TRANS(ftintrm_w_s, FP_SP, gen_ff, gen_helper_ftintrm_w_s)
-TRANS(ftintrm_w_d, FP_DP, gen_ff, gen_helper_ftintrm_w_d)
-TRANS(ftintrm_l_s, FP_SP, gen_ff, gen_helper_ftintrm_l_s)
-TRANS(ftintrm_l_d, FP_DP, gen_ff, gen_helper_ftintrm_l_d)
-TRANS(ftintrp_w_s, FP_SP, gen_ff, gen_helper_ftintrp_w_s)
-TRANS(ftintrp_w_d, FP_DP, gen_ff, gen_helper_ftintrp_w_d)
-TRANS(ftintrp_l_s, FP_SP, gen_ff, gen_helper_ftintrp_l_s)
-TRANS(ftintrp_l_d, FP_DP, gen_ff, gen_helper_ftintrp_l_d)
-TRANS(ftintrz_w_s, FP_SP, gen_ff, gen_helper_ftintrz_w_s)
-TRANS(ftintrz_w_d, FP_DP, gen_ff, gen_helper_ftintrz_w_d)
-TRANS(ftintrz_l_s, FP_SP, gen_ff, gen_helper_ftintrz_l_s)
-TRANS(ftintrz_l_d, FP_DP, gen_ff, gen_helper_ftintrz_l_d)
-TRANS(ftintrne_w_s, FP_SP, gen_ff, gen_helper_ftintrne_w_s)
-TRANS(ftintrne_w_d, FP_DP, gen_ff, gen_helper_ftintrne_w_d)
-TRANS(ftintrne_l_s, FP_SP, gen_ff, gen_helper_ftintrne_l_s)
-TRANS(ftintrne_l_d, FP_DP, gen_ff, gen_helper_ftintrne_l_d)
-TRANS(ftint_w_s, FP_SP, gen_ff, gen_helper_ftint_w_s)
-TRANS(ftint_w_d, FP_DP, gen_ff, gen_helper_ftint_w_d)
-TRANS(ftint_l_s, FP_SP, gen_ff, gen_helper_ftint_l_s)
-TRANS(ftint_l_d, FP_DP, gen_ff, gen_helper_ftint_l_d)
-TRANS(ffint_s_w, FP_SP, gen_ff, gen_helper_ffint_s_w)
-TRANS(ffint_s_l, FP_SP, gen_ff, gen_helper_ffint_s_l)
-TRANS(ffint_d_w, FP_DP, gen_ff, gen_helper_ffint_d_w)
-TRANS(ffint_d_l, FP_DP, gen_ff, gen_helper_ffint_d_l)
-TRANS(frint_s, FP_SP, gen_ff, gen_helper_frint_s)
-TRANS(frint_d, FP_DP, gen_ff, gen_helper_frint_d)
+++ /dev/null
-/* SPDX-License-Identifier: GPL-2.0-or-later */
-/*
- * Copyright (c) 2021 Loongson Technology Corporation Limited
- */
-
-static void maybe_nanbox_load(TCGv freg, MemOp mop)
-{
- if ((mop & MO_SIZE) == MO_32) {
- gen_nanbox_s(freg, freg);
- }
-}
-
-static bool gen_fload_i(DisasContext *ctx, arg_fr_i *a, MemOp mop)
-{
- TCGv addr = gpr_src(ctx, a->rj, EXT_NONE);
- TCGv dest = get_fpr(ctx, a->fd);
-
- CHECK_FPE;
-
- addr = make_address_i(ctx, addr, a->imm);
-
- tcg_gen_qemu_ld_tl(dest, addr, ctx->mem_idx, mop);
- maybe_nanbox_load(dest, mop);
- set_fpr(a->fd, dest);
-
- return true;
-}
-
-static bool gen_fstore_i(DisasContext *ctx, arg_fr_i *a, MemOp mop)
-{
- TCGv addr = gpr_src(ctx, a->rj, EXT_NONE);
- TCGv src = get_fpr(ctx, a->fd);
-
- CHECK_FPE;
-
- addr = make_address_i(ctx, addr, a->imm);
-
- tcg_gen_qemu_st_tl(src, addr, ctx->mem_idx, mop);
-
- return true;
-}
-
-static bool gen_floadx(DisasContext *ctx, arg_frr *a, MemOp mop)
-{
- TCGv src1 = gpr_src(ctx, a->rj, EXT_NONE);
- TCGv src2 = gpr_src(ctx, a->rk, EXT_NONE);
- TCGv dest = get_fpr(ctx, a->fd);
- TCGv addr;
-
- CHECK_FPE;
-
- addr = make_address_x(ctx, src1, src2);
- tcg_gen_qemu_ld_tl(dest, addr, ctx->mem_idx, mop);
- maybe_nanbox_load(dest, mop);
- set_fpr(a->fd, dest);
-
- return true;
-}
-
-static bool gen_fstorex(DisasContext *ctx, arg_frr *a, MemOp mop)
-{
- TCGv src1 = gpr_src(ctx, a->rj, EXT_NONE);
- TCGv src2 = gpr_src(ctx, a->rk, EXT_NONE);
- TCGv src3 = get_fpr(ctx, a->fd);
- TCGv addr;
-
- CHECK_FPE;
-
- addr = make_address_x(ctx, src1, src2);
- tcg_gen_qemu_st_tl(src3, addr, ctx->mem_idx, mop);
-
- return true;
-}
-
-static bool gen_fload_gt(DisasContext *ctx, arg_frr *a, MemOp mop)
-{
- TCGv src1 = gpr_src(ctx, a->rj, EXT_NONE);
- TCGv src2 = gpr_src(ctx, a->rk, EXT_NONE);
- TCGv dest = get_fpr(ctx, a->fd);
- TCGv addr;
-
- CHECK_FPE;
-
- gen_helper_asrtgt_d(tcg_env, src1, src2);
- addr = make_address_x(ctx, src1, src2);
- tcg_gen_qemu_ld_tl(dest, addr, ctx->mem_idx, mop);
- maybe_nanbox_load(dest, mop);
- set_fpr(a->fd, dest);
-
- return true;
-}
-
-static bool gen_fstore_gt(DisasContext *ctx, arg_frr *a, MemOp mop)
-{
- TCGv src1 = gpr_src(ctx, a->rj, EXT_NONE);
- TCGv src2 = gpr_src(ctx, a->rk, EXT_NONE);
- TCGv src3 = get_fpr(ctx, a->fd);
- TCGv addr;
-
- CHECK_FPE;
-
- gen_helper_asrtgt_d(tcg_env, src1, src2);
- addr = make_address_x(ctx, src1, src2);
- tcg_gen_qemu_st_tl(src3, addr, ctx->mem_idx, mop);
-
- return true;
-}
-
-static bool gen_fload_le(DisasContext *ctx, arg_frr *a, MemOp mop)
-{
- TCGv src1 = gpr_src(ctx, a->rj, EXT_NONE);
- TCGv src2 = gpr_src(ctx, a->rk, EXT_NONE);
- TCGv dest = get_fpr(ctx, a->fd);
- TCGv addr;
-
- CHECK_FPE;
-
- gen_helper_asrtle_d(tcg_env, src1, src2);
- addr = make_address_x(ctx, src1, src2);
- tcg_gen_qemu_ld_tl(dest, addr, ctx->mem_idx, mop);
- maybe_nanbox_load(dest, mop);
- set_fpr(a->fd, dest);
-
- return true;
-}
-
-static bool gen_fstore_le(DisasContext *ctx, arg_frr *a, MemOp mop)
-{
- TCGv src1 = gpr_src(ctx, a->rj, EXT_NONE);
- TCGv src2 = gpr_src(ctx, a->rk, EXT_NONE);
- TCGv src3 = get_fpr(ctx, a->fd);
- TCGv addr;
-
- CHECK_FPE;
-
- gen_helper_asrtle_d(tcg_env, src1, src2);
- addr = make_address_x(ctx, src1, src2);
- tcg_gen_qemu_st_tl(src3, addr, ctx->mem_idx, mop);
-
- return true;
-}
-
-TRANS(fld_s, FP_SP, gen_fload_i, MO_TEUL)
-TRANS(fst_s, FP_SP, gen_fstore_i, MO_TEUL)
-TRANS(fld_d, FP_DP, gen_fload_i, MO_TEUQ)
-TRANS(fst_d, FP_DP, gen_fstore_i, MO_TEUQ)
-TRANS(fldx_s, FP_SP, gen_floadx, MO_TEUL)
-TRANS(fldx_d, FP_DP, gen_floadx, MO_TEUQ)
-TRANS(fstx_s, FP_SP, gen_fstorex, MO_TEUL)
-TRANS(fstx_d, FP_DP, gen_fstorex, MO_TEUQ)
-TRANS(fldgt_s, FP_SP, gen_fload_gt, MO_TEUL)
-TRANS(fldgt_d, FP_DP, gen_fload_gt, MO_TEUQ)
-TRANS(fldle_s, FP_SP, gen_fload_le, MO_TEUL)
-TRANS(fldle_d, FP_DP, gen_fload_le, MO_TEUQ)
-TRANS(fstgt_s, FP_SP, gen_fstore_gt, MO_TEUL)
-TRANS(fstgt_d, FP_DP, gen_fstore_gt, MO_TEUQ)
-TRANS(fstle_s, FP_SP, gen_fstore_le, MO_TEUL)
-TRANS(fstle_d, FP_DP, gen_fstore_le, MO_TEUQ)
+++ /dev/null
-/* SPDX-License-Identifier: GPL-2.0-or-later */
-/*
- * Copyright (c) 2021 Loongson Technology Corporation Limited
- */
-
-static const uint32_t fcsr_mask[4] = {
- UINT32_MAX, FCSR0_M1, FCSR0_M2, FCSR0_M3
-};
-
-static bool trans_fsel(DisasContext *ctx, arg_fsel *a)
-{
- TCGv zero = tcg_constant_tl(0);
- TCGv dest = get_fpr(ctx, a->fd);
- TCGv src1 = get_fpr(ctx, a->fj);
- TCGv src2 = get_fpr(ctx, a->fk);
- TCGv cond;
-
- if (!avail_FP(ctx)) {
- return false;
- }
-
- CHECK_FPE;
-
- cond = tcg_temp_new();
- tcg_gen_ld8u_tl(cond, tcg_env, offsetof(CPULoongArchState, cf[a->ca]));
- tcg_gen_movcond_tl(TCG_COND_EQ, dest, cond, zero, src1, src2);
- set_fpr(a->fd, dest);
-
- return true;
-}
-
-static bool gen_f2f(DisasContext *ctx, arg_ff *a,
- void (*func)(TCGv, TCGv), bool nanbox)
-{
- TCGv dest = get_fpr(ctx, a->fd);
- TCGv src = get_fpr(ctx, a->fj);
-
- CHECK_FPE;
-
- func(dest, src);
- if (nanbox) {
- gen_nanbox_s(dest, dest);
- }
- set_fpr(a->fd, dest);
-
- return true;
-}
-
-static bool gen_r2f(DisasContext *ctx, arg_fr *a,
- void (*func)(TCGv, TCGv))
-{
- TCGv src = gpr_src(ctx, a->rj, EXT_NONE);
- TCGv dest = get_fpr(ctx, a->fd);
-
- if (!avail_FP(ctx)) {
- return false;
- }
-
- CHECK_FPE;
-
- func(dest, src);
- set_fpr(a->fd, dest);
-
- return true;
-}
-
-static bool gen_f2r(DisasContext *ctx, arg_rf *a,
- void (*func)(TCGv, TCGv))
-{
- TCGv dest = gpr_dst(ctx, a->rd, EXT_NONE);
- TCGv src = get_fpr(ctx, a->fj);
-
- if (!avail_FP(ctx)) {
- return false;
- }
-
- CHECK_FPE;
-
- func(dest, src);
- gen_set_gpr(a->rd, dest, EXT_NONE);
-
- return true;
-}
-
-static bool trans_movgr2fcsr(DisasContext *ctx, arg_movgr2fcsr *a)
-{
- uint32_t mask = fcsr_mask[a->fcsrd];
- TCGv Rj = gpr_src(ctx, a->rj, EXT_NONE);
-
- if (!avail_FP(ctx)) {
- return false;
- }
-
- CHECK_FPE;
-
- if (mask == UINT32_MAX) {
- tcg_gen_st32_i64(Rj, tcg_env, offsetof(CPULoongArchState, fcsr0));
- } else {
- TCGv_i32 fcsr0 = tcg_temp_new_i32();
- TCGv_i32 temp = tcg_temp_new_i32();
-
- tcg_gen_ld_i32(fcsr0, tcg_env, offsetof(CPULoongArchState, fcsr0));
- tcg_gen_extrl_i64_i32(temp, Rj);
- tcg_gen_andi_i32(temp, temp, mask);
- tcg_gen_andi_i32(fcsr0, fcsr0, ~mask);
- tcg_gen_or_i32(fcsr0, fcsr0, temp);
- tcg_gen_st_i32(fcsr0, tcg_env, offsetof(CPULoongArchState, fcsr0));
- }
-
- /*
- * Install the new rounding mode to fpu_status, if changed.
- * Note that FCSR3 is exactly the rounding mode field.
- */
- if (mask & FCSR0_M3) {
- gen_helper_set_rounding_mode(tcg_env);
- }
- return true;
-}
-
-static bool trans_movfcsr2gr(DisasContext *ctx, arg_movfcsr2gr *a)
-{
- TCGv dest = gpr_dst(ctx, a->rd, EXT_NONE);
-
- if (!avail_FP(ctx)) {
- return false;
- }
-
- CHECK_FPE;
-
- tcg_gen_ld32u_i64(dest, tcg_env, offsetof(CPULoongArchState, fcsr0));
- tcg_gen_andi_i64(dest, dest, fcsr_mask[a->fcsrs]);
- gen_set_gpr(a->rd, dest, EXT_NONE);
-
- return true;
-}
-
-static void gen_movgr2fr_w(TCGv dest, TCGv src)
-{
- tcg_gen_deposit_i64(dest, dest, src, 0, 32);
-}
-
-static void gen_movgr2frh_w(TCGv dest, TCGv src)
-{
- tcg_gen_deposit_i64(dest, dest, src, 32, 32);
-}
-
-static void gen_movfrh2gr_s(TCGv dest, TCGv src)
-{
- tcg_gen_sextract_tl(dest, src, 32, 32);
-}
-
-static bool trans_movfr2cf(DisasContext *ctx, arg_movfr2cf *a)
-{
- TCGv t0;
- TCGv src = get_fpr(ctx, a->fj);
-
- if (!avail_FP(ctx)) {
- return false;
- }
-
- CHECK_FPE;
-
- t0 = tcg_temp_new();
- tcg_gen_andi_tl(t0, src, 0x1);
- tcg_gen_st8_tl(t0, tcg_env, offsetof(CPULoongArchState, cf[a->cd & 0x7]));
-
- return true;
-}
-
-static bool trans_movcf2fr(DisasContext *ctx, arg_movcf2fr *a)
-{
- TCGv dest = get_fpr(ctx, a->fd);
-
- if (!avail_FP(ctx)) {
- return false;
- }
-
- CHECK_FPE;
-
- tcg_gen_ld8u_tl(dest, tcg_env,
- offsetof(CPULoongArchState, cf[a->cj & 0x7]));
- set_fpr(a->fd, dest);
-
- return true;
-}
-
-static bool trans_movgr2cf(DisasContext *ctx, arg_movgr2cf *a)
-{
- TCGv t0;
-
- if (!avail_FP(ctx)) {
- return false;
- }
-
- CHECK_FPE;
-
- t0 = tcg_temp_new();
- tcg_gen_andi_tl(t0, gpr_src(ctx, a->rj, EXT_NONE), 0x1);
- tcg_gen_st8_tl(t0, tcg_env, offsetof(CPULoongArchState, cf[a->cd & 0x7]));
-
- return true;
-}
-
-static bool trans_movcf2gr(DisasContext *ctx, arg_movcf2gr *a)
-{
- if (!avail_FP(ctx)) {
- return false;
- }
-
- CHECK_FPE;
-
- tcg_gen_ld8u_tl(gpr_dst(ctx, a->rd, EXT_NONE), tcg_env,
- offsetof(CPULoongArchState, cf[a->cj & 0x7]));
- return true;
-}
-
-TRANS(fmov_s, FP_SP, gen_f2f, tcg_gen_mov_tl, true)
-TRANS(fmov_d, FP_DP, gen_f2f, tcg_gen_mov_tl, false)
-TRANS(movgr2fr_w, FP_SP, gen_r2f, gen_movgr2fr_w)
-TRANS(movgr2fr_d, 64, gen_r2f, tcg_gen_mov_tl)
-TRANS(movgr2frh_w, FP_DP, gen_r2f, gen_movgr2frh_w)
-TRANS(movfr2gr_s, FP_SP, gen_f2r, tcg_gen_ext32s_tl)
-TRANS(movfr2gr_d, 64, gen_f2r, tcg_gen_mov_tl)
-TRANS(movfrh2gr_s, FP_DP, gen_f2r, gen_movfrh2gr_s)
+++ /dev/null
-/* SPDX-License-Identifier: GPL-2.0-or-later */
-/*
- * Copyright (c) 2021 Loongson Technology Corporation Limited
- */
-
-static bool gen_load(DisasContext *ctx, arg_rr_i *a, MemOp mop)
-{
- TCGv dest = gpr_dst(ctx, a->rd, EXT_NONE);
- TCGv addr = gpr_src(ctx, a->rj, EXT_NONE);
-
- addr = make_address_i(ctx, addr, a->imm);
-
- tcg_gen_qemu_ld_tl(dest, addr, ctx->mem_idx, mop);
- gen_set_gpr(a->rd, dest, EXT_NONE);
- return true;
-}
-
-static bool gen_store(DisasContext *ctx, arg_rr_i *a, MemOp mop)
-{
- TCGv data = gpr_src(ctx, a->rd, EXT_NONE);
- TCGv addr = gpr_src(ctx, a->rj, EXT_NONE);
-
- addr = make_address_i(ctx, addr, a->imm);
-
- tcg_gen_qemu_st_tl(data, addr, ctx->mem_idx, mop);
- return true;
-}
-
-static bool gen_loadx(DisasContext *ctx, arg_rrr *a, MemOp mop)
-{
- TCGv dest = gpr_dst(ctx, a->rd, EXT_NONE);
- TCGv src1 = gpr_src(ctx, a->rj, EXT_NONE);
- TCGv src2 = gpr_src(ctx, a->rk, EXT_NONE);
- TCGv addr = make_address_x(ctx, src1, src2);
-
- tcg_gen_qemu_ld_tl(dest, addr, ctx->mem_idx, mop);
- gen_set_gpr(a->rd, dest, EXT_NONE);
-
- return true;
-}
-
-static bool gen_storex(DisasContext *ctx, arg_rrr *a, MemOp mop)
-{
- TCGv data = gpr_src(ctx, a->rd, EXT_NONE);
- TCGv src1 = gpr_src(ctx, a->rj, EXT_NONE);
- TCGv src2 = gpr_src(ctx, a->rk, EXT_NONE);
- TCGv addr = make_address_x(ctx, src1, src2);
-
- tcg_gen_qemu_st_tl(data, addr, ctx->mem_idx, mop);
-
- return true;
-}
-
-static bool gen_load_gt(DisasContext *ctx, arg_rrr *a, MemOp mop)
-{
- TCGv dest = gpr_dst(ctx, a->rd, EXT_NONE);
- TCGv src1 = gpr_src(ctx, a->rj, EXT_NONE);
- TCGv src2 = gpr_src(ctx, a->rk, EXT_NONE);
-
- gen_helper_asrtgt_d(tcg_env, src1, src2);
- src1 = make_address_i(ctx, src1, 0);
- tcg_gen_qemu_ld_tl(dest, src1, ctx->mem_idx, mop);
- gen_set_gpr(a->rd, dest, EXT_NONE);
-
- return true;
-}
-
-static bool gen_load_le(DisasContext *ctx, arg_rrr *a, MemOp mop)
-{
- TCGv dest = gpr_dst(ctx, a->rd, EXT_NONE);
- TCGv src1 = gpr_src(ctx, a->rj, EXT_NONE);
- TCGv src2 = gpr_src(ctx, a->rk, EXT_NONE);
-
- gen_helper_asrtle_d(tcg_env, src1, src2);
- src1 = make_address_i(ctx, src1, 0);
- tcg_gen_qemu_ld_tl(dest, src1, ctx->mem_idx, mop);
- gen_set_gpr(a->rd, dest, EXT_NONE);
-
- return true;
-}
-
-static bool gen_store_gt(DisasContext *ctx, arg_rrr *a, MemOp mop)
-{
- TCGv data = gpr_src(ctx, a->rd, EXT_NONE);
- TCGv src1 = gpr_src(ctx, a->rj, EXT_NONE);
- TCGv src2 = gpr_src(ctx, a->rk, EXT_NONE);
-
- gen_helper_asrtgt_d(tcg_env, src1, src2);
- src1 = make_address_i(ctx, src1, 0);
- tcg_gen_qemu_st_tl(data, src1, ctx->mem_idx, mop);
-
- return true;
-}
-
-static bool gen_store_le(DisasContext *ctx, arg_rrr *a, MemOp mop)
-{
- TCGv data = gpr_src(ctx, a->rd, EXT_NONE);
- TCGv src1 = gpr_src(ctx, a->rj, EXT_NONE);
- TCGv src2 = gpr_src(ctx, a->rk, EXT_NONE);
-
- gen_helper_asrtle_d(tcg_env, src1, src2);
- src1 = make_address_i(ctx, src1, 0);
- tcg_gen_qemu_st_tl(data, src1, ctx->mem_idx, mop);
-
- return true;
-}
-
-static bool trans_preld(DisasContext *ctx, arg_preld *a)
-{
- return true;
-}
-
-static bool trans_preldx(DisasContext *ctx, arg_preldx * a)
-{
- return true;
-}
-
-static bool trans_dbar(DisasContext *ctx, arg_dbar * a)
-{
- tcg_gen_mb(TCG_BAR_SC | TCG_MO_ALL);
- return true;
-}
-
-static bool trans_ibar(DisasContext *ctx, arg_ibar *a)
-{
- ctx->base.is_jmp = DISAS_STOP;
- return true;
-}
-
-static bool gen_ldptr(DisasContext *ctx, arg_rr_i *a, MemOp mop)
-{
- TCGv dest = gpr_dst(ctx, a->rd, EXT_NONE);
- TCGv addr = gpr_src(ctx, a->rj, EXT_NONE);
-
- addr = make_address_i(ctx, addr, a->imm);
-
- tcg_gen_qemu_ld_tl(dest, addr, ctx->mem_idx, mop);
- gen_set_gpr(a->rd, dest, EXT_NONE);
- return true;
-}
-
-static bool gen_stptr(DisasContext *ctx, arg_rr_i *a, MemOp mop)
-{
- TCGv data = gpr_src(ctx, a->rd, EXT_NONE);
- TCGv addr = gpr_src(ctx, a->rj, EXT_NONE);
-
- addr = make_address_i(ctx, addr, a->imm);
-
- tcg_gen_qemu_st_tl(data, addr, ctx->mem_idx, mop);
- return true;
-}
-
-TRANS(ld_b, ALL, gen_load, MO_SB)
-TRANS(ld_h, ALL, gen_load, MO_TESW)
-TRANS(ld_w, ALL, gen_load, MO_TESL)
-TRANS(ld_d, 64, gen_load, MO_TEUQ)
-TRANS(st_b, ALL, gen_store, MO_UB)
-TRANS(st_h, ALL, gen_store, MO_TEUW)
-TRANS(st_w, ALL, gen_store, MO_TEUL)
-TRANS(st_d, 64, gen_store, MO_TEUQ)
-TRANS(ld_bu, ALL, gen_load, MO_UB)
-TRANS(ld_hu, ALL, gen_load, MO_TEUW)
-TRANS(ld_wu, 64, gen_load, MO_TEUL)
-TRANS(ldx_b, 64, gen_loadx, MO_SB)
-TRANS(ldx_h, 64, gen_loadx, MO_TESW)
-TRANS(ldx_w, 64, gen_loadx, MO_TESL)
-TRANS(ldx_d, 64, gen_loadx, MO_TEUQ)
-TRANS(stx_b, 64, gen_storex, MO_UB)
-TRANS(stx_h, 64, gen_storex, MO_TEUW)
-TRANS(stx_w, 64, gen_storex, MO_TEUL)
-TRANS(stx_d, 64, gen_storex, MO_TEUQ)
-TRANS(ldx_bu, 64, gen_loadx, MO_UB)
-TRANS(ldx_hu, 64, gen_loadx, MO_TEUW)
-TRANS(ldx_wu, 64, gen_loadx, MO_TEUL)
-TRANS(ldptr_w, 64, gen_ldptr, MO_TESL)
-TRANS(stptr_w, 64, gen_stptr, MO_TEUL)
-TRANS(ldptr_d, 64, gen_ldptr, MO_TEUQ)
-TRANS(stptr_d, 64, gen_stptr, MO_TEUQ)
-TRANS(ldgt_b, 64, gen_load_gt, MO_SB)
-TRANS(ldgt_h, 64, gen_load_gt, MO_TESW)
-TRANS(ldgt_w, 64, gen_load_gt, MO_TESL)
-TRANS(ldgt_d, 64, gen_load_gt, MO_TEUQ)
-TRANS(ldle_b, 64, gen_load_le, MO_SB)
-TRANS(ldle_h, 64, gen_load_le, MO_TESW)
-TRANS(ldle_w, 64, gen_load_le, MO_TESL)
-TRANS(ldle_d, 64, gen_load_le, MO_TEUQ)
-TRANS(stgt_b, 64, gen_store_gt, MO_UB)
-TRANS(stgt_h, 64, gen_store_gt, MO_TEUW)
-TRANS(stgt_w, 64, gen_store_gt, MO_TEUL)
-TRANS(stgt_d, 64, gen_store_gt, MO_TEUQ)
-TRANS(stle_b, 64, gen_store_le, MO_UB)
-TRANS(stle_h, 64, gen_store_le, MO_TEUW)
-TRANS(stle_w, 64, gen_store_le, MO_TEUL)
-TRANS(stle_d, 64, gen_store_le, MO_TEUQ)
+++ /dev/null
-/* SPDX-License-Identifier: GPL-2.0-or-later */
-/*
- * Copyright (c) 2021 Loongson Technology Corporation Limited
- *
- * LoongArch translation routines for the privileged instructions.
- */
-
-#include "cpu-csr.h"
-
-#ifdef CONFIG_USER_ONLY
-
-#define GEN_FALSE_TRANS(name) \
-static bool trans_##name(DisasContext *ctx, arg_##name * a) \
-{ \
- return false; \
-}
-
-GEN_FALSE_TRANS(csrrd)
-GEN_FALSE_TRANS(csrwr)
-GEN_FALSE_TRANS(csrxchg)
-GEN_FALSE_TRANS(iocsrrd_b)
-GEN_FALSE_TRANS(iocsrrd_h)
-GEN_FALSE_TRANS(iocsrrd_w)
-GEN_FALSE_TRANS(iocsrrd_d)
-GEN_FALSE_TRANS(iocsrwr_b)
-GEN_FALSE_TRANS(iocsrwr_h)
-GEN_FALSE_TRANS(iocsrwr_w)
-GEN_FALSE_TRANS(iocsrwr_d)
-GEN_FALSE_TRANS(tlbsrch)
-GEN_FALSE_TRANS(tlbrd)
-GEN_FALSE_TRANS(tlbwr)
-GEN_FALSE_TRANS(tlbfill)
-GEN_FALSE_TRANS(tlbclr)
-GEN_FALSE_TRANS(tlbflush)
-GEN_FALSE_TRANS(invtlb)
-GEN_FALSE_TRANS(cacop)
-GEN_FALSE_TRANS(ldpte)
-GEN_FALSE_TRANS(lddir)
-GEN_FALSE_TRANS(ertn)
-GEN_FALSE_TRANS(dbcl)
-GEN_FALSE_TRANS(idle)
-
-#else
-
-typedef void (*GenCSRRead)(TCGv dest, TCGv_ptr env);
-typedef void (*GenCSRWrite)(TCGv dest, TCGv_ptr env, TCGv src);
-
-typedef struct {
- int offset;
- int flags;
- GenCSRRead readfn;
- GenCSRWrite writefn;
-} CSRInfo;
-
-enum {
- CSRFL_READONLY = (1 << 0),
- CSRFL_EXITTB = (1 << 1),
- CSRFL_IO = (1 << 2),
-};
-
-#define CSR_OFF_FUNCS(NAME, FL, RD, WR) \
- [LOONGARCH_CSR_##NAME] = { \
- .offset = offsetof(CPULoongArchState, CSR_##NAME), \
- .flags = FL, .readfn = RD, .writefn = WR \
- }
-
-#define CSR_OFF_ARRAY(NAME, N) \
- [LOONGARCH_CSR_##NAME(N)] = { \
- .offset = offsetof(CPULoongArchState, CSR_##NAME[N]), \
- .flags = 0, .readfn = NULL, .writefn = NULL \
- }
-
-#define CSR_OFF_FLAGS(NAME, FL) \
- CSR_OFF_FUNCS(NAME, FL, NULL, NULL)
-
-#define CSR_OFF(NAME) \
- CSR_OFF_FLAGS(NAME, 0)
-
-static const CSRInfo csr_info[] = {
- CSR_OFF_FLAGS(CRMD, CSRFL_EXITTB),
- CSR_OFF(PRMD),
- CSR_OFF_FLAGS(EUEN, CSRFL_EXITTB),
- CSR_OFF_FLAGS(MISC, CSRFL_READONLY),
- CSR_OFF(ECFG),
- CSR_OFF_FUNCS(ESTAT, CSRFL_EXITTB, NULL, gen_helper_csrwr_estat),
- CSR_OFF(ERA),
- CSR_OFF(BADV),
- CSR_OFF_FLAGS(BADI, CSRFL_READONLY),
- CSR_OFF(EENTRY),
- CSR_OFF(TLBIDX),
- CSR_OFF(TLBEHI),
- CSR_OFF(TLBELO0),
- CSR_OFF(TLBELO1),
- CSR_OFF_FUNCS(ASID, CSRFL_EXITTB, NULL, gen_helper_csrwr_asid),
- CSR_OFF(PGDL),
- CSR_OFF(PGDH),
- CSR_OFF_FUNCS(PGD, CSRFL_READONLY, gen_helper_csrrd_pgd, NULL),
- CSR_OFF(PWCL),
- CSR_OFF(PWCH),
- CSR_OFF(STLBPS),
- CSR_OFF(RVACFG),
- CSR_OFF_FUNCS(CPUID, CSRFL_READONLY, gen_helper_csrrd_cpuid, NULL),
- CSR_OFF_FLAGS(PRCFG1, CSRFL_READONLY),
- CSR_OFF_FLAGS(PRCFG2, CSRFL_READONLY),
- CSR_OFF_FLAGS(PRCFG3, CSRFL_READONLY),
- CSR_OFF_ARRAY(SAVE, 0),
- CSR_OFF_ARRAY(SAVE, 1),
- CSR_OFF_ARRAY(SAVE, 2),
- CSR_OFF_ARRAY(SAVE, 3),
- CSR_OFF_ARRAY(SAVE, 4),
- CSR_OFF_ARRAY(SAVE, 5),
- CSR_OFF_ARRAY(SAVE, 6),
- CSR_OFF_ARRAY(SAVE, 7),
- CSR_OFF_ARRAY(SAVE, 8),
- CSR_OFF_ARRAY(SAVE, 9),
- CSR_OFF_ARRAY(SAVE, 10),
- CSR_OFF_ARRAY(SAVE, 11),
- CSR_OFF_ARRAY(SAVE, 12),
- CSR_OFF_ARRAY(SAVE, 13),
- CSR_OFF_ARRAY(SAVE, 14),
- CSR_OFF_ARRAY(SAVE, 15),
- CSR_OFF(TID),
- CSR_OFF_FUNCS(TCFG, CSRFL_IO, NULL, gen_helper_csrwr_tcfg),
- CSR_OFF_FUNCS(TVAL, CSRFL_READONLY | CSRFL_IO, gen_helper_csrrd_tval, NULL),
- CSR_OFF(CNTC),
- CSR_OFF_FUNCS(TICLR, CSRFL_IO, NULL, gen_helper_csrwr_ticlr),
- CSR_OFF(LLBCTL),
- CSR_OFF(IMPCTL1),
- CSR_OFF(IMPCTL2),
- CSR_OFF(TLBRENTRY),
- CSR_OFF(TLBRBADV),
- CSR_OFF(TLBRERA),
- CSR_OFF(TLBRSAVE),
- CSR_OFF(TLBRELO0),
- CSR_OFF(TLBRELO1),
- CSR_OFF(TLBREHI),
- CSR_OFF(TLBRPRMD),
- CSR_OFF(MERRCTL),
- CSR_OFF(MERRINFO1),
- CSR_OFF(MERRINFO2),
- CSR_OFF(MERRENTRY),
- CSR_OFF(MERRERA),
- CSR_OFF(MERRSAVE),
- CSR_OFF(CTAG),
- CSR_OFF_ARRAY(DMW, 0),
- CSR_OFF_ARRAY(DMW, 1),
- CSR_OFF_ARRAY(DMW, 2),
- CSR_OFF_ARRAY(DMW, 3),
- CSR_OFF(DBG),
- CSR_OFF(DERA),
- CSR_OFF(DSAVE),
-};
-
-static bool check_plv(DisasContext *ctx)
-{
- if (ctx->plv == MMU_PLV_USER) {
- generate_exception(ctx, EXCCODE_IPE);
- return true;
- }
- return false;
-}
-
-static const CSRInfo *get_csr(unsigned csr_num)
-{
- const CSRInfo *csr;
-
- if (csr_num >= ARRAY_SIZE(csr_info)) {
- return NULL;
- }
- csr = &csr_info[csr_num];
- if (csr->offset == 0) {
- return NULL;
- }
- return csr;
-}
-
-static bool check_csr_flags(DisasContext *ctx, const CSRInfo *csr, bool write)
-{
- if ((csr->flags & CSRFL_READONLY) && write) {
- return false;
- }
- if ((csr->flags & CSRFL_IO) && translator_io_start(&ctx->base)) {
- ctx->base.is_jmp = DISAS_EXIT_UPDATE;
- } else if ((csr->flags & CSRFL_EXITTB) && write) {
- ctx->base.is_jmp = DISAS_EXIT_UPDATE;
- }
- return true;
-}
-
-static bool trans_csrrd(DisasContext *ctx, arg_csrrd *a)
-{
- TCGv dest;
- const CSRInfo *csr;
-
- if (check_plv(ctx)) {
- return false;
- }
- csr = get_csr(a->csr);
- if (csr == NULL) {
- /* CSR is undefined: read as 0. */
- dest = tcg_constant_tl(0);
- } else {
- check_csr_flags(ctx, csr, false);
- dest = gpr_dst(ctx, a->rd, EXT_NONE);
- if (csr->readfn) {
- csr->readfn(dest, tcg_env);
- } else {
- tcg_gen_ld_tl(dest, tcg_env, csr->offset);
- }
- }
- gen_set_gpr(a->rd, dest, EXT_NONE);
- return true;
-}
-
-static bool trans_csrwr(DisasContext *ctx, arg_csrwr *a)
-{
- TCGv dest, src1;
- const CSRInfo *csr;
-
- if (check_plv(ctx)) {
- return false;
- }
- csr = get_csr(a->csr);
- if (csr == NULL) {
- /* CSR is undefined: write ignored, read old_value as 0. */
- gen_set_gpr(a->rd, tcg_constant_tl(0), EXT_NONE);
- return true;
- }
- if (!check_csr_flags(ctx, csr, true)) {
- /* CSR is readonly: trap. */
- return false;
- }
- src1 = gpr_src(ctx, a->rd, EXT_NONE);
- if (csr->writefn) {
- dest = gpr_dst(ctx, a->rd, EXT_NONE);
- csr->writefn(dest, tcg_env, src1);
- } else {
- dest = tcg_temp_new();
- tcg_gen_ld_tl(dest, tcg_env, csr->offset);
- tcg_gen_st_tl(src1, tcg_env, csr->offset);
- }
- gen_set_gpr(a->rd, dest, EXT_NONE);
- return true;
-}
-
-static bool trans_csrxchg(DisasContext *ctx, arg_csrxchg *a)
-{
- TCGv src1, mask, oldv, newv, temp;
- const CSRInfo *csr;
-
- if (check_plv(ctx)) {
- return false;
- }
- csr = get_csr(a->csr);
- if (csr == NULL) {
- /* CSR is undefined: write ignored, read old_value as 0. */
- gen_set_gpr(a->rd, tcg_constant_tl(0), EXT_NONE);
- return true;
- }
-
- if (!check_csr_flags(ctx, csr, true)) {
- /* CSR is readonly: trap. */
- return false;
- }
-
- /* So far only readonly csrs have readfn. */
- assert(csr->readfn == NULL);
-
- src1 = gpr_src(ctx, a->rd, EXT_NONE);
- mask = gpr_src(ctx, a->rj, EXT_NONE);
- oldv = tcg_temp_new();
- newv = tcg_temp_new();
- temp = tcg_temp_new();
-
- tcg_gen_ld_tl(oldv, tcg_env, csr->offset);
- tcg_gen_and_tl(newv, src1, mask);
- tcg_gen_andc_tl(temp, oldv, mask);
- tcg_gen_or_tl(newv, newv, temp);
-
- if (csr->writefn) {
- csr->writefn(oldv, tcg_env, newv);
- } else {
- tcg_gen_st_tl(newv, tcg_env, csr->offset);
- }
- gen_set_gpr(a->rd, oldv, EXT_NONE);
- return true;
-}
-
-static bool gen_iocsrrd(DisasContext *ctx, arg_rr *a,
- void (*func)(TCGv, TCGv_ptr, TCGv))
-{
- TCGv dest = gpr_dst(ctx, a->rd, EXT_NONE);
- TCGv src1 = gpr_src(ctx, a->rj, EXT_NONE);
-
- if (check_plv(ctx)) {
- return false;
- }
- func(dest, tcg_env, src1);
- return true;
-}
-
-static bool gen_iocsrwr(DisasContext *ctx, arg_rr *a,
- void (*func)(TCGv_ptr, TCGv, TCGv))
-{
- TCGv val = gpr_src(ctx, a->rd, EXT_NONE);
- TCGv addr = gpr_src(ctx, a->rj, EXT_NONE);
-
- if (check_plv(ctx)) {
- return false;
- }
- func(tcg_env, addr, val);
- return true;
-}
-
-TRANS(iocsrrd_b, IOCSR, gen_iocsrrd, gen_helper_iocsrrd_b)
-TRANS(iocsrrd_h, IOCSR, gen_iocsrrd, gen_helper_iocsrrd_h)
-TRANS(iocsrrd_w, IOCSR, gen_iocsrrd, gen_helper_iocsrrd_w)
-TRANS(iocsrrd_d, IOCSR, gen_iocsrrd, gen_helper_iocsrrd_d)
-TRANS(iocsrwr_b, IOCSR, gen_iocsrwr, gen_helper_iocsrwr_b)
-TRANS(iocsrwr_h, IOCSR, gen_iocsrwr, gen_helper_iocsrwr_h)
-TRANS(iocsrwr_w, IOCSR, gen_iocsrwr, gen_helper_iocsrwr_w)
-TRANS(iocsrwr_d, IOCSR, gen_iocsrwr, gen_helper_iocsrwr_d)
-
-static void check_mmu_idx(DisasContext *ctx)
-{
- if (ctx->mem_idx != MMU_IDX_DA) {
- tcg_gen_movi_tl(cpu_pc, ctx->base.pc_next + 4);
- ctx->base.is_jmp = DISAS_EXIT;
- }
-}
-
-static bool trans_tlbsrch(DisasContext *ctx, arg_tlbsrch *a)
-{
- if (check_plv(ctx)) {
- return false;
- }
- gen_helper_tlbsrch(tcg_env);
- return true;
-}
-
-static bool trans_tlbrd(DisasContext *ctx, arg_tlbrd *a)
-{
- if (check_plv(ctx)) {
- return false;
- }
- gen_helper_tlbrd(tcg_env);
- return true;
-}
-
-static bool trans_tlbwr(DisasContext *ctx, arg_tlbwr *a)
-{
- if (check_plv(ctx)) {
- return false;
- }
- gen_helper_tlbwr(tcg_env);
- check_mmu_idx(ctx);
- return true;
-}
-
-static bool trans_tlbfill(DisasContext *ctx, arg_tlbfill *a)
-{
- if (check_plv(ctx)) {
- return false;
- }
- gen_helper_tlbfill(tcg_env);
- check_mmu_idx(ctx);
- return true;
-}
-
-static bool trans_tlbclr(DisasContext *ctx, arg_tlbclr *a)
-{
- if (check_plv(ctx)) {
- return false;
- }
- gen_helper_tlbclr(tcg_env);
- check_mmu_idx(ctx);
- return true;
-}
-
-static bool trans_tlbflush(DisasContext *ctx, arg_tlbflush *a)
-{
- if (check_plv(ctx)) {
- return false;
- }
- gen_helper_tlbflush(tcg_env);
- check_mmu_idx(ctx);
- return true;
-}
-
-static bool trans_invtlb(DisasContext *ctx, arg_invtlb *a)
-{
- TCGv rj = gpr_src(ctx, a->rj, EXT_NONE);
- TCGv rk = gpr_src(ctx, a->rk, EXT_NONE);
-
- if (check_plv(ctx)) {
- return false;
- }
-
- switch (a->imm) {
- case 0:
- case 1:
- gen_helper_invtlb_all(tcg_env);
- break;
- case 2:
- gen_helper_invtlb_all_g(tcg_env, tcg_constant_i32(1));
- break;
- case 3:
- gen_helper_invtlb_all_g(tcg_env, tcg_constant_i32(0));
- break;
- case 4:
- gen_helper_invtlb_all_asid(tcg_env, rj);
- break;
- case 5:
- gen_helper_invtlb_page_asid(tcg_env, rj, rk);
- break;
- case 6:
- gen_helper_invtlb_page_asid_or_g(tcg_env, rj, rk);
- break;
- default:
- return false;
- }
- ctx->base.is_jmp = DISAS_STOP;
- return true;
-}
-
-static bool trans_cacop(DisasContext *ctx, arg_cacop *a)
-{
- /* Treat the cacop as a nop */
- if (check_plv(ctx)) {
- return false;
- }
- return true;
-}
-
-static bool trans_ldpte(DisasContext *ctx, arg_ldpte *a)
-{
- TCGv_i32 mem_idx = tcg_constant_i32(ctx->mem_idx);
- TCGv src1 = gpr_src(ctx, a->rj, EXT_NONE);
-
- if (!avail_LSPW(ctx)) {
- return true;
- }
-
- if (check_plv(ctx)) {
- return false;
- }
- gen_helper_ldpte(tcg_env, src1, tcg_constant_tl(a->imm), mem_idx);
- return true;
-}
-
-static bool trans_lddir(DisasContext *ctx, arg_lddir *a)
-{
- TCGv_i32 mem_idx = tcg_constant_i32(ctx->mem_idx);
- TCGv src = gpr_src(ctx, a->rj, EXT_NONE);
- TCGv dest = gpr_dst(ctx, a->rd, EXT_NONE);
-
- if (!avail_LSPW(ctx)) {
- return true;
- }
-
- if (check_plv(ctx)) {
- return false;
- }
- gen_helper_lddir(dest, tcg_env, src, tcg_constant_tl(a->imm), mem_idx);
- return true;
-}
-
-static bool trans_ertn(DisasContext *ctx, arg_ertn *a)
-{
- if (check_plv(ctx)) {
- return false;
- }
- gen_helper_ertn(tcg_env);
- ctx->base.is_jmp = DISAS_EXIT;
- return true;
-}
-
-static bool trans_dbcl(DisasContext *ctx, arg_dbcl *a)
-{
- if (check_plv(ctx)) {
- return false;
- }
- generate_exception(ctx, EXCCODE_DBP);
- return true;
-}
-
-static bool trans_idle(DisasContext *ctx, arg_idle *a)
-{
- if (check_plv(ctx)) {
- return false;
- }
-
- tcg_gen_movi_tl(cpu_pc, ctx->base.pc_next + 4);
- gen_helper_idle(tcg_env);
- ctx->base.is_jmp = DISAS_NORETURN;
- return true;
-}
-#endif
+++ /dev/null
-/* SPDX-License-Identifier: GPL-2.0-or-later */
-/*
- * Copyright (c) 2021 Loongson Technology Corporation Limited
- */
-
-static void gen_sll_w(TCGv dest, TCGv src1, TCGv src2)
-{
- TCGv t0 = tcg_temp_new();
- tcg_gen_andi_tl(t0, src2, 0x1f);
- tcg_gen_shl_tl(dest, src1, t0);
-}
-
-static void gen_srl_w(TCGv dest, TCGv src1, TCGv src2)
-{
- TCGv t0 = tcg_temp_new();
- tcg_gen_andi_tl(t0, src2, 0x1f);
- tcg_gen_shr_tl(dest, src1, t0);
-}
-
-static void gen_sra_w(TCGv dest, TCGv src1, TCGv src2)
-{
- TCGv t0 = tcg_temp_new();
- tcg_gen_andi_tl(t0, src2, 0x1f);
- tcg_gen_sar_tl(dest, src1, t0);
-}
-
-static void gen_sll_d(TCGv dest, TCGv src1, TCGv src2)
-{
- TCGv t0 = tcg_temp_new();
- tcg_gen_andi_tl(t0, src2, 0x3f);
- tcg_gen_shl_tl(dest, src1, t0);
-}
-
-static void gen_srl_d(TCGv dest, TCGv src1, TCGv src2)
-{
- TCGv t0 = tcg_temp_new();
- tcg_gen_andi_tl(t0, src2, 0x3f);
- tcg_gen_shr_tl(dest, src1, t0);
-}
-
-static void gen_sra_d(TCGv dest, TCGv src1, TCGv src2)
-{
- TCGv t0 = tcg_temp_new();
- tcg_gen_andi_tl(t0, src2, 0x3f);
- tcg_gen_sar_tl(dest, src1, t0);
-}
-
-static void gen_rotr_w(TCGv dest, TCGv src1, TCGv src2)
-{
- TCGv_i32 t1 = tcg_temp_new_i32();
- TCGv_i32 t2 = tcg_temp_new_i32();
- TCGv t0 = tcg_temp_new();
-
- tcg_gen_andi_tl(t0, src2, 0x1f);
-
- tcg_gen_trunc_tl_i32(t1, src1);
- tcg_gen_trunc_tl_i32(t2, t0);
-
- tcg_gen_rotr_i32(t1, t1, t2);
- tcg_gen_ext_i32_tl(dest, t1);
-}
-
-static void gen_rotr_d(TCGv dest, TCGv src1, TCGv src2)
-{
- TCGv t0 = tcg_temp_new();
- tcg_gen_andi_tl(t0, src2, 0x3f);
- tcg_gen_rotr_tl(dest, src1, t0);
-}
-
-static bool trans_srai_w(DisasContext *ctx, arg_srai_w *a)
-{
- TCGv dest = gpr_dst(ctx, a->rd, EXT_NONE);
- TCGv src1 = gpr_src(ctx, a->rj, EXT_ZERO);
-
- if (!avail_64(ctx)) {
- return false;
- }
-
- tcg_gen_sextract_tl(dest, src1, a->imm, 32 - a->imm);
- gen_set_gpr(a->rd, dest, EXT_NONE);
-
- return true;
-}
-
-TRANS(sll_w, ALL, gen_rrr, EXT_ZERO, EXT_NONE, EXT_SIGN, gen_sll_w)
-TRANS(srl_w, ALL, gen_rrr, EXT_ZERO, EXT_NONE, EXT_SIGN, gen_srl_w)
-TRANS(sra_w, ALL, gen_rrr, EXT_SIGN, EXT_NONE, EXT_SIGN, gen_sra_w)
-TRANS(sll_d, 64, gen_rrr, EXT_NONE, EXT_NONE, EXT_NONE, gen_sll_d)
-TRANS(srl_d, 64, gen_rrr, EXT_NONE, EXT_NONE, EXT_NONE, gen_srl_d)
-TRANS(sra_d, 64, gen_rrr, EXT_NONE, EXT_NONE, EXT_NONE, gen_sra_d)
-TRANS(rotr_w, 64, gen_rrr, EXT_ZERO, EXT_NONE, EXT_SIGN, gen_rotr_w)
-TRANS(rotr_d, 64, gen_rrr, EXT_NONE, EXT_NONE, EXT_NONE, gen_rotr_d)
-TRANS(slli_w, ALL, gen_rri_c, EXT_NONE, EXT_SIGN, tcg_gen_shli_tl)
-TRANS(slli_d, 64, gen_rri_c, EXT_NONE, EXT_NONE, tcg_gen_shli_tl)
-TRANS(srli_w, ALL, gen_rri_c, EXT_ZERO, EXT_SIGN, tcg_gen_shri_tl)
-TRANS(srli_d, 64, gen_rri_c, EXT_NONE, EXT_NONE, tcg_gen_shri_tl)
-TRANS(srai_d, 64, gen_rri_c, EXT_NONE, EXT_NONE, tcg_gen_sari_tl)
-TRANS(rotri_w, 64, gen_rri_v, EXT_NONE, EXT_NONE, gen_rotr_w)
-TRANS(rotri_d, 64, gen_rri_c, EXT_NONE, EXT_NONE, tcg_gen_rotri_tl)
+++ /dev/null
-/* SPDX-License-Identifier: GPL-2.0-or-later */
-/*
- * LoongArch vector translate functions
- * Copyright (c) 2022-2023 Loongson Technology Corporation Limited
- */
-
-static bool check_vec(DisasContext *ctx, uint32_t oprsz)
-{
- if ((oprsz == 16) && ((ctx->base.tb->flags & HW_FLAGS_EUEN_SXE) == 0)) {
- generate_exception(ctx, EXCCODE_SXD);
- return false;
- }
-
- if ((oprsz == 32) && ((ctx->base.tb->flags & HW_FLAGS_EUEN_ASXE) == 0)) {
- generate_exception(ctx, EXCCODE_ASXD);
- return false;
- }
-
- return true;
-}
-
-static bool gen_vvvv_ptr_vl(DisasContext *ctx, arg_vvvv *a, uint32_t oprsz,
- gen_helper_gvec_4_ptr *fn)
-{
- if (!check_vec(ctx, oprsz)) {
- return true;
- }
-
- tcg_gen_gvec_4_ptr(vec_full_offset(a->vd),
- vec_full_offset(a->vj),
- vec_full_offset(a->vk),
- vec_full_offset(a->va),
- tcg_env,
- oprsz, ctx->vl / 8, 0, fn);
- return true;
-}
-
-static bool gen_vvvv_ptr(DisasContext *ctx, arg_vvvv *a,
- gen_helper_gvec_4_ptr *fn)
-{
- return gen_vvvv_ptr_vl(ctx, a, 16, fn);
-}
-
-static bool gen_xxxx_ptr(DisasContext *ctx, arg_vvvv *a,
- gen_helper_gvec_4_ptr *fn)
-{
- return gen_vvvv_ptr_vl(ctx, a, 32, fn);
-}
-
-static bool gen_vvvv_vl(DisasContext *ctx, arg_vvvv *a, uint32_t oprsz,
- gen_helper_gvec_4 *fn)
-{
- if (!check_vec(ctx, oprsz)) {
- return true;
- }
-
- tcg_gen_gvec_4_ool(vec_full_offset(a->vd),
- vec_full_offset(a->vj),
- vec_full_offset(a->vk),
- vec_full_offset(a->va),
- oprsz, ctx->vl / 8, 0, fn);
- return true;
-}
-
-static bool gen_vvvv(DisasContext *ctx, arg_vvvv *a,
- gen_helper_gvec_4 *fn)
-{
- return gen_vvvv_vl(ctx, a, 16, fn);
-}
-
-static bool gen_xxxx(DisasContext *ctx, arg_vvvv *a,
- gen_helper_gvec_4 *fn)
-{
- return gen_vvvv_vl(ctx, a, 32, fn);
-}
-
-static bool gen_vvv_ptr_vl(DisasContext *ctx, arg_vvv *a, uint32_t oprsz,
- gen_helper_gvec_3_ptr *fn)
-{
- if (!check_vec(ctx, oprsz)) {
- return true;
- }
- tcg_gen_gvec_3_ptr(vec_full_offset(a->vd),
- vec_full_offset(a->vj),
- vec_full_offset(a->vk),
- tcg_env,
- oprsz, ctx->vl / 8, 0, fn);
- return true;
-}
-
-static bool gen_vvv_ptr(DisasContext *ctx, arg_vvv *a,
- gen_helper_gvec_3_ptr *fn)
-{
- return gen_vvv_ptr_vl(ctx, a, 16, fn);
-}
-
-static bool gen_xxx_ptr(DisasContext *ctx, arg_vvv *a,
- gen_helper_gvec_3_ptr *fn)
-{
- return gen_vvv_ptr_vl(ctx, a, 32, fn);
-}
-
-static bool gen_vvv_vl(DisasContext *ctx, arg_vvv *a, uint32_t oprsz,
- gen_helper_gvec_3 *fn)
-{
- if (!check_vec(ctx, oprsz)) {
- return true;
- }
-
- tcg_gen_gvec_3_ool(vec_full_offset(a->vd),
- vec_full_offset(a->vj),
- vec_full_offset(a->vk),
- oprsz, ctx->vl / 8, 0, fn);
- return true;
-}
-
-static bool gen_vvv(DisasContext *ctx, arg_vvv *a, gen_helper_gvec_3 *fn)
-{
- return gen_vvv_vl(ctx, a, 16, fn);
-}
-
-static bool gen_xxx(DisasContext *ctx, arg_vvv *a, gen_helper_gvec_3 *fn)
-{
- return gen_vvv_vl(ctx, a, 32, fn);
-}
-
-static bool gen_vv_ptr_vl(DisasContext *ctx, arg_vv *a, uint32_t oprsz,
- gen_helper_gvec_2_ptr *fn)
-{
- if (!check_vec(ctx, oprsz)) {
- return true;
- }
-
- tcg_gen_gvec_2_ptr(vec_full_offset(a->vd),
- vec_full_offset(a->vj),
- tcg_env,
- oprsz, ctx->vl / 8, 0, fn);
- return true;
-}
-
-static bool gen_vv_ptr(DisasContext *ctx, arg_vv *a,
- gen_helper_gvec_2_ptr *fn)
-{
- return gen_vv_ptr_vl(ctx, a, 16, fn);
-}
-
-static bool gen_xx_ptr(DisasContext *ctx, arg_vv *a,
- gen_helper_gvec_2_ptr *fn)
-{
- return gen_vv_ptr_vl(ctx, a, 32, fn);
-}
-
-static bool gen_vv_vl(DisasContext *ctx, arg_vv *a, uint32_t oprsz,
- gen_helper_gvec_2 *fn)
-{
- if (!check_vec(ctx, oprsz)) {
- return true;
- }
-
- tcg_gen_gvec_2_ool(vec_full_offset(a->vd),
- vec_full_offset(a->vj),
- oprsz, ctx->vl / 8, 0, fn);
- return true;
-}
-
-static bool gen_vv(DisasContext *ctx, arg_vv *a, gen_helper_gvec_2 *fn)
-{
- return gen_vv_vl(ctx, a, 16, fn);
-}
-
-static bool gen_xx(DisasContext *ctx, arg_vv *a, gen_helper_gvec_2 *fn)
-{
- return gen_vv_vl(ctx, a, 32, fn);
-}
-
-static bool gen_vv_i_vl(DisasContext *ctx, arg_vv_i *a, uint32_t oprsz,
- gen_helper_gvec_2i *fn)
-{
- if (!check_vec(ctx, oprsz)) {
- return true;
- }
-
- tcg_gen_gvec_2i_ool(vec_full_offset(a->vd),
- vec_full_offset(a->vj),
- tcg_constant_i64(a->imm),
- oprsz, ctx->vl / 8, 0, fn);
- return true;
-}
-
-static bool gen_vv_i(DisasContext *ctx, arg_vv_i *a, gen_helper_gvec_2i *fn)
-{
- return gen_vv_i_vl(ctx, a, 16, fn);
-}
-
-static bool gen_xx_i(DisasContext *ctx, arg_vv_i *a, gen_helper_gvec_2i *fn)
-{
- return gen_vv_i_vl(ctx, a, 32, fn);
-}
-
-static bool gen_cv_vl(DisasContext *ctx, arg_cv *a, uint32_t sz,
- void (*func)(TCGv_ptr, TCGv_i32, TCGv_i32, TCGv_i32))
-{
- if (!check_vec(ctx, sz)) {
- return true;
- }
-
- TCGv_i32 vj = tcg_constant_i32(a->vj);
- TCGv_i32 cd = tcg_constant_i32(a->cd);
- TCGv_i32 oprsz = tcg_constant_i32(sz);
-
- func(tcg_env, oprsz, cd, vj);
- return true;
-}
-
-static bool gen_cv(DisasContext *ctx, arg_cv *a,
- void (*func)(TCGv_ptr, TCGv_i32, TCGv_i32, TCGv_i32))
-{
- return gen_cv_vl(ctx, a, 16, func);
-}
-
-static bool gen_cx(DisasContext *ctx, arg_cv *a,
- void (*func)(TCGv_ptr, TCGv_i32, TCGv_i32, TCGv_i32))
-{
- return gen_cv_vl(ctx, a, 32, func);
-}
-
-static bool gvec_vvv_vl(DisasContext *ctx, arg_vvv *a,
- uint32_t oprsz, MemOp mop,
- void (*func)(unsigned, uint32_t, uint32_t,
- uint32_t, uint32_t, uint32_t))
-{
- uint32_t vd_ofs = vec_full_offset(a->vd);
- uint32_t vj_ofs = vec_full_offset(a->vj);
- uint32_t vk_ofs = vec_full_offset(a->vk);
-
- if (!check_vec(ctx, oprsz)) {
- return true;
- }
-
- func(mop, vd_ofs, vj_ofs, vk_ofs, oprsz, ctx->vl / 8);
- return true;
-}
-
-static bool gvec_vvv(DisasContext *ctx, arg_vvv *a, MemOp mop,
- void (*func)(unsigned, uint32_t, uint32_t,
- uint32_t, uint32_t, uint32_t))
-{
- return gvec_vvv_vl(ctx, a, 16, mop, func);
-}
-
-static bool gvec_xxx(DisasContext *ctx, arg_vvv *a, MemOp mop,
- void (*func)(unsigned, uint32_t, uint32_t,
- uint32_t, uint32_t, uint32_t))
-{
- return gvec_vvv_vl(ctx, a, 32, mop, func);
-}
-
-static bool gvec_vv_vl(DisasContext *ctx, arg_vv *a,
- uint32_t oprsz, MemOp mop,
- void (*func)(unsigned, uint32_t, uint32_t,
- uint32_t, uint32_t))
-{
- uint32_t vd_ofs = vec_full_offset(a->vd);
- uint32_t vj_ofs = vec_full_offset(a->vj);
-
- if (!check_vec(ctx, oprsz)) {
- return true;
- }
-
- func(mop, vd_ofs, vj_ofs, oprsz, ctx->vl / 8);
- return true;
-}
-
-
-static bool gvec_vv(DisasContext *ctx, arg_vv *a, MemOp mop,
- void (*func)(unsigned, uint32_t, uint32_t,
- uint32_t, uint32_t))
-{
- return gvec_vv_vl(ctx, a, 16, mop, func);
-}
-
-static bool gvec_xx(DisasContext *ctx, arg_vv *a, MemOp mop,
- void (*func)(unsigned, uint32_t, uint32_t,
- uint32_t, uint32_t))
-{
- return gvec_vv_vl(ctx, a, 32, mop, func);
-}
-
-static bool gvec_vv_i_vl(DisasContext *ctx, arg_vv_i *a,
- uint32_t oprsz, MemOp mop,
- void (*func)(unsigned, uint32_t, uint32_t,
- int64_t, uint32_t, uint32_t))
-{
- uint32_t vd_ofs = vec_full_offset(a->vd);
- uint32_t vj_ofs = vec_full_offset(a->vj);
-
- if (!check_vec(ctx, oprsz)) {
- return true;
- }
-
- func(mop, vd_ofs, vj_ofs, a->imm, oprsz, ctx->vl / 8);
- return true;
-}
-
-static bool gvec_vv_i(DisasContext *ctx, arg_vv_i *a, MemOp mop,
- void (*func)(unsigned, uint32_t, uint32_t,
- int64_t, uint32_t, uint32_t))
-{
- return gvec_vv_i_vl(ctx, a, 16, mop, func);
-}
-
-static bool gvec_xx_i(DisasContext *ctx, arg_vv_i *a, MemOp mop,
- void (*func)(unsigned, uint32_t, uint32_t,
- int64_t, uint32_t, uint32_t))
-{
- return gvec_vv_i_vl(ctx,a, 32, mop, func);
-}
-
-static bool gvec_subi_vl(DisasContext *ctx, arg_vv_i *a,
- uint32_t oprsz, MemOp mop)
-{
- uint32_t vd_ofs = vec_full_offset(a->vd);
- uint32_t vj_ofs = vec_full_offset(a->vj);
-
- if (!check_vec(ctx, oprsz)) {
- return true;
- }
-
- tcg_gen_gvec_addi(mop, vd_ofs, vj_ofs, -a->imm, oprsz, ctx->vl / 8);
- return true;
-}
-
-static bool gvec_subi(DisasContext *ctx, arg_vv_i *a, MemOp mop)
-{
- return gvec_subi_vl(ctx, a, 16, mop);
-}
-
-static bool gvec_xsubi(DisasContext *ctx, arg_vv_i *a, MemOp mop)
-{
- return gvec_subi_vl(ctx, a, 32, mop);
-}
-
-TRANS(vadd_b, LSX, gvec_vvv, MO_8, tcg_gen_gvec_add)
-TRANS(vadd_h, LSX, gvec_vvv, MO_16, tcg_gen_gvec_add)
-TRANS(vadd_w, LSX, gvec_vvv, MO_32, tcg_gen_gvec_add)
-TRANS(vadd_d, LSX, gvec_vvv, MO_64, tcg_gen_gvec_add)
-TRANS(xvadd_b, LASX, gvec_xxx, MO_8, tcg_gen_gvec_add)
-TRANS(xvadd_h, LASX, gvec_xxx, MO_16, tcg_gen_gvec_add)
-TRANS(xvadd_w, LASX, gvec_xxx, MO_32, tcg_gen_gvec_add)
-TRANS(xvadd_d, LASX, gvec_xxx, MO_64, tcg_gen_gvec_add)
-
-static bool gen_vaddsub_q_vl(DisasContext *ctx, arg_vvv *a, uint32_t oprsz,
- void (*func)(TCGv_i64, TCGv_i64, TCGv_i64,
- TCGv_i64, TCGv_i64, TCGv_i64))
-{
- int i;
- TCGv_i64 rh, rl, ah, al, bh, bl;
-
- if (!check_vec(ctx, oprsz)) {
- return true;
- }
-
- rh = tcg_temp_new_i64();
- rl = tcg_temp_new_i64();
- ah = tcg_temp_new_i64();
- al = tcg_temp_new_i64();
- bh = tcg_temp_new_i64();
- bl = tcg_temp_new_i64();
-
- for (i = 0; i < oprsz / 16; i++) {
- get_vreg64(ah, a->vj, 1 + i * 2);
- get_vreg64(al, a->vj, i * 2);
- get_vreg64(bh, a->vk, 1 + i * 2);
- get_vreg64(bl, a->vk, i * 2);
-
- func(rl, rh, al, ah, bl, bh);
-
- set_vreg64(rh, a->vd, 1 + i * 2);
- set_vreg64(rl, a->vd, i * 2);
- }
- return true;
-}
-
-static bool gen_vaddsub_q(DisasContext *ctx, arg_vvv *a,
- void (*func)(TCGv_i64, TCGv_i64, TCGv_i64,
- TCGv_i64, TCGv_i64, TCGv_i64))
-{
- return gen_vaddsub_q_vl(ctx, a, 16, func);
-}
-
-static bool gen_xvaddsub_q(DisasContext *ctx, arg_vvv *a,
- void (*func)(TCGv_i64, TCGv_i64, TCGv_i64,
- TCGv_i64, TCGv_i64, TCGv_i64))
-{
- return gen_vaddsub_q_vl(ctx, a, 32, func);
-}
-
-TRANS(vsub_b, LSX, gvec_vvv, MO_8, tcg_gen_gvec_sub)
-TRANS(vsub_h, LSX, gvec_vvv, MO_16, tcg_gen_gvec_sub)
-TRANS(vsub_w, LSX, gvec_vvv, MO_32, tcg_gen_gvec_sub)
-TRANS(vsub_d, LSX, gvec_vvv, MO_64, tcg_gen_gvec_sub)
-TRANS(xvsub_b, LASX, gvec_xxx, MO_8, tcg_gen_gvec_sub)
-TRANS(xvsub_h, LASX, gvec_xxx, MO_16, tcg_gen_gvec_sub)
-TRANS(xvsub_w, LASX, gvec_xxx, MO_32, tcg_gen_gvec_sub)
-TRANS(xvsub_d, LASX, gvec_xxx, MO_64, tcg_gen_gvec_sub)
-
-TRANS(vadd_q, LSX, gen_vaddsub_q, tcg_gen_add2_i64)
-TRANS(vsub_q, LSX, gen_vaddsub_q, tcg_gen_sub2_i64)
-TRANS(xvadd_q, LASX, gen_xvaddsub_q, tcg_gen_add2_i64)
-TRANS(xvsub_q, LASX, gen_xvaddsub_q, tcg_gen_sub2_i64)
-
-TRANS(vaddi_bu, LSX, gvec_vv_i, MO_8, tcg_gen_gvec_addi)
-TRANS(vaddi_hu, LSX, gvec_vv_i, MO_16, tcg_gen_gvec_addi)
-TRANS(vaddi_wu, LSX, gvec_vv_i, MO_32, tcg_gen_gvec_addi)
-TRANS(vaddi_du, LSX, gvec_vv_i, MO_64, tcg_gen_gvec_addi)
-TRANS(vsubi_bu, LSX, gvec_subi, MO_8)
-TRANS(vsubi_hu, LSX, gvec_subi, MO_16)
-TRANS(vsubi_wu, LSX, gvec_subi, MO_32)
-TRANS(vsubi_du, LSX, gvec_subi, MO_64)
-TRANS(xvaddi_bu, LASX, gvec_xx_i, MO_8, tcg_gen_gvec_addi)
-TRANS(xvaddi_hu, LASX, gvec_xx_i, MO_16, tcg_gen_gvec_addi)
-TRANS(xvaddi_wu, LASX, gvec_xx_i, MO_32, tcg_gen_gvec_addi)
-TRANS(xvaddi_du, LASX, gvec_xx_i, MO_64, tcg_gen_gvec_addi)
-TRANS(xvsubi_bu, LASX, gvec_xsubi, MO_8)
-TRANS(xvsubi_hu, LASX, gvec_xsubi, MO_16)
-TRANS(xvsubi_wu, LASX, gvec_xsubi, MO_32)
-TRANS(xvsubi_du, LASX, gvec_xsubi, MO_64)
-
-TRANS(vneg_b, LSX, gvec_vv, MO_8, tcg_gen_gvec_neg)
-TRANS(vneg_h, LSX, gvec_vv, MO_16, tcg_gen_gvec_neg)
-TRANS(vneg_w, LSX, gvec_vv, MO_32, tcg_gen_gvec_neg)
-TRANS(vneg_d, LSX, gvec_vv, MO_64, tcg_gen_gvec_neg)
-TRANS(xvneg_b, LASX, gvec_xx, MO_8, tcg_gen_gvec_neg)
-TRANS(xvneg_h, LASX, gvec_xx, MO_16, tcg_gen_gvec_neg)
-TRANS(xvneg_w, LASX, gvec_xx, MO_32, tcg_gen_gvec_neg)
-TRANS(xvneg_d, LASX, gvec_xx, MO_64, tcg_gen_gvec_neg)
-
-TRANS(vsadd_b, LSX, gvec_vvv, MO_8, tcg_gen_gvec_ssadd)
-TRANS(vsadd_h, LSX, gvec_vvv, MO_16, tcg_gen_gvec_ssadd)
-TRANS(vsadd_w, LSX, gvec_vvv, MO_32, tcg_gen_gvec_ssadd)
-TRANS(vsadd_d, LSX, gvec_vvv, MO_64, tcg_gen_gvec_ssadd)
-TRANS(vsadd_bu, LSX, gvec_vvv, MO_8, tcg_gen_gvec_usadd)
-TRANS(vsadd_hu, LSX, gvec_vvv, MO_16, tcg_gen_gvec_usadd)
-TRANS(vsadd_wu, LSX, gvec_vvv, MO_32, tcg_gen_gvec_usadd)
-TRANS(vsadd_du, LSX, gvec_vvv, MO_64, tcg_gen_gvec_usadd)
-TRANS(vssub_b, LSX, gvec_vvv, MO_8, tcg_gen_gvec_sssub)
-TRANS(vssub_h, LSX, gvec_vvv, MO_16, tcg_gen_gvec_sssub)
-TRANS(vssub_w, LSX, gvec_vvv, MO_32, tcg_gen_gvec_sssub)
-TRANS(vssub_d, LSX, gvec_vvv, MO_64, tcg_gen_gvec_sssub)
-TRANS(vssub_bu, LSX, gvec_vvv, MO_8, tcg_gen_gvec_ussub)
-TRANS(vssub_hu, LSX, gvec_vvv, MO_16, tcg_gen_gvec_ussub)
-TRANS(vssub_wu, LSX, gvec_vvv, MO_32, tcg_gen_gvec_ussub)
-TRANS(vssub_du, LSX, gvec_vvv, MO_64, tcg_gen_gvec_ussub)
-
-TRANS(xvsadd_b, LASX, gvec_xxx, MO_8, tcg_gen_gvec_ssadd)
-TRANS(xvsadd_h, LASX, gvec_xxx, MO_16, tcg_gen_gvec_ssadd)
-TRANS(xvsadd_w, LASX, gvec_xxx, MO_32, tcg_gen_gvec_ssadd)
-TRANS(xvsadd_d, LASX, gvec_xxx, MO_64, tcg_gen_gvec_ssadd)
-TRANS(xvsadd_bu, LASX, gvec_xxx, MO_8, tcg_gen_gvec_usadd)
-TRANS(xvsadd_hu, LASX, gvec_xxx, MO_16, tcg_gen_gvec_usadd)
-TRANS(xvsadd_wu, LASX, gvec_xxx, MO_32, tcg_gen_gvec_usadd)
-TRANS(xvsadd_du, LASX, gvec_xxx, MO_64, tcg_gen_gvec_usadd)
-TRANS(xvssub_b, LASX, gvec_xxx, MO_8, tcg_gen_gvec_sssub)
-TRANS(xvssub_h, LASX, gvec_xxx, MO_16, tcg_gen_gvec_sssub)
-TRANS(xvssub_w, LASX, gvec_xxx, MO_32, tcg_gen_gvec_sssub)
-TRANS(xvssub_d, LASX, gvec_xxx, MO_64, tcg_gen_gvec_sssub)
-TRANS(xvssub_bu, LASX, gvec_xxx, MO_8, tcg_gen_gvec_ussub)
-TRANS(xvssub_hu, LASX, gvec_xxx, MO_16, tcg_gen_gvec_ussub)
-TRANS(xvssub_wu, LASX, gvec_xxx, MO_32, tcg_gen_gvec_ussub)
-TRANS(xvssub_du, LASX, gvec_xxx, MO_64, tcg_gen_gvec_ussub)
-
-TRANS(vhaddw_h_b, LSX, gen_vvv, gen_helper_vhaddw_h_b)
-TRANS(vhaddw_w_h, LSX, gen_vvv, gen_helper_vhaddw_w_h)
-TRANS(vhaddw_d_w, LSX, gen_vvv, gen_helper_vhaddw_d_w)
-TRANS(vhaddw_q_d, LSX, gen_vvv, gen_helper_vhaddw_q_d)
-TRANS(vhaddw_hu_bu, LSX, gen_vvv, gen_helper_vhaddw_hu_bu)
-TRANS(vhaddw_wu_hu, LSX, gen_vvv, gen_helper_vhaddw_wu_hu)
-TRANS(vhaddw_du_wu, LSX, gen_vvv, gen_helper_vhaddw_du_wu)
-TRANS(vhaddw_qu_du, LSX, gen_vvv, gen_helper_vhaddw_qu_du)
-TRANS(vhsubw_h_b, LSX, gen_vvv, gen_helper_vhsubw_h_b)
-TRANS(vhsubw_w_h, LSX, gen_vvv, gen_helper_vhsubw_w_h)
-TRANS(vhsubw_d_w, LSX, gen_vvv, gen_helper_vhsubw_d_w)
-TRANS(vhsubw_q_d, LSX, gen_vvv, gen_helper_vhsubw_q_d)
-TRANS(vhsubw_hu_bu, LSX, gen_vvv, gen_helper_vhsubw_hu_bu)
-TRANS(vhsubw_wu_hu, LSX, gen_vvv, gen_helper_vhsubw_wu_hu)
-TRANS(vhsubw_du_wu, LSX, gen_vvv, gen_helper_vhsubw_du_wu)
-TRANS(vhsubw_qu_du, LSX, gen_vvv, gen_helper_vhsubw_qu_du)
-
-TRANS(xvhaddw_h_b, LASX, gen_xxx, gen_helper_vhaddw_h_b)
-TRANS(xvhaddw_w_h, LASX, gen_xxx, gen_helper_vhaddw_w_h)
-TRANS(xvhaddw_d_w, LASX, gen_xxx, gen_helper_vhaddw_d_w)
-TRANS(xvhaddw_q_d, LASX, gen_xxx, gen_helper_vhaddw_q_d)
-TRANS(xvhaddw_hu_bu, LASX, gen_xxx, gen_helper_vhaddw_hu_bu)
-TRANS(xvhaddw_wu_hu, LASX, gen_xxx, gen_helper_vhaddw_wu_hu)
-TRANS(xvhaddw_du_wu, LASX, gen_xxx, gen_helper_vhaddw_du_wu)
-TRANS(xvhaddw_qu_du, LASX, gen_xxx, gen_helper_vhaddw_qu_du)
-TRANS(xvhsubw_h_b, LASX, gen_xxx, gen_helper_vhsubw_h_b)
-TRANS(xvhsubw_w_h, LASX, gen_xxx, gen_helper_vhsubw_w_h)
-TRANS(xvhsubw_d_w, LASX, gen_xxx, gen_helper_vhsubw_d_w)
-TRANS(xvhsubw_q_d, LASX, gen_xxx, gen_helper_vhsubw_q_d)
-TRANS(xvhsubw_hu_bu, LASX, gen_xxx, gen_helper_vhsubw_hu_bu)
-TRANS(xvhsubw_wu_hu, LASX, gen_xxx, gen_helper_vhsubw_wu_hu)
-TRANS(xvhsubw_du_wu, LASX, gen_xxx, gen_helper_vhsubw_du_wu)
-TRANS(xvhsubw_qu_du, LASX, gen_xxx, gen_helper_vhsubw_qu_du)
-
-static void gen_vaddwev_s(unsigned vece, TCGv_vec t, TCGv_vec a, TCGv_vec b)
-{
- TCGv_vec t1, t2;
-
- int halfbits = 4 << vece;
-
- t1 = tcg_temp_new_vec_matching(a);
- t2 = tcg_temp_new_vec_matching(b);
-
- /* Sign-extend the even elements from a */
- tcg_gen_shli_vec(vece, t1, a, halfbits);
- tcg_gen_sari_vec(vece, t1, t1, halfbits);
-
- /* Sign-extend the even elements from b */
- tcg_gen_shli_vec(vece, t2, b, halfbits);
- tcg_gen_sari_vec(vece, t2, t2, halfbits);
-
- tcg_gen_add_vec(vece, t, t1, t2);
-}
-
-static void gen_vaddwev_w_h(TCGv_i32 t, TCGv_i32 a, TCGv_i32 b)
-{
- TCGv_i32 t1, t2;
-
- t1 = tcg_temp_new_i32();
- t2 = tcg_temp_new_i32();
- tcg_gen_ext16s_i32(t1, a);
- tcg_gen_ext16s_i32(t2, b);
- tcg_gen_add_i32(t, t1, t2);
-}
-
-static void gen_vaddwev_d_w(TCGv_i64 t, TCGv_i64 a, TCGv_i64 b)
-{
- TCGv_i64 t1, t2;
-
- t1 = tcg_temp_new_i64();
- t2 = tcg_temp_new_i64();
- tcg_gen_ext32s_i64(t1, a);
- tcg_gen_ext32s_i64(t2, b);
- tcg_gen_add_i64(t, t1, t2);
-}
-
-static void do_vaddwev_s(unsigned vece, uint32_t vd_ofs, uint32_t vj_ofs,
- uint32_t vk_ofs, uint32_t oprsz, uint32_t maxsz)
-{
- static const TCGOpcode vecop_list[] = {
- INDEX_op_shli_vec, INDEX_op_sari_vec, INDEX_op_add_vec, 0
- };
- static const GVecGen3 op[4] = {
- {
- .fniv = gen_vaddwev_s,
- .fno = gen_helper_vaddwev_h_b,
- .opt_opc = vecop_list,
- .vece = MO_16
- },
- {
- .fni4 = gen_vaddwev_w_h,
- .fniv = gen_vaddwev_s,
- .fno = gen_helper_vaddwev_w_h,
- .opt_opc = vecop_list,
- .vece = MO_32
- },
- {
- .fni8 = gen_vaddwev_d_w,
- .fniv = gen_vaddwev_s,
- .fno = gen_helper_vaddwev_d_w,
- .opt_opc = vecop_list,
- .vece = MO_64
- },
- {
- .fno = gen_helper_vaddwev_q_d,
- .vece = MO_128
- },
- };
-
- tcg_gen_gvec_3(vd_ofs, vj_ofs, vk_ofs, oprsz, maxsz, &op[vece]);
-}
-
-TRANS(vaddwev_h_b, LSX, gvec_vvv, MO_8, do_vaddwev_s)
-TRANS(vaddwev_w_h, LSX, gvec_vvv, MO_16, do_vaddwev_s)
-TRANS(vaddwev_d_w, LSX, gvec_vvv, MO_32, do_vaddwev_s)
-TRANS(vaddwev_q_d, LSX, gvec_vvv, MO_64, do_vaddwev_s)
-TRANS(xvaddwev_h_b, LASX, gvec_xxx, MO_8, do_vaddwev_s)
-TRANS(xvaddwev_w_h, LASX, gvec_xxx, MO_16, do_vaddwev_s)
-TRANS(xvaddwev_d_w, LASX, gvec_xxx, MO_32, do_vaddwev_s)
-TRANS(xvaddwev_q_d, LASX, gvec_xxx, MO_64, do_vaddwev_s)
-
-static void gen_vaddwod_w_h(TCGv_i32 t, TCGv_i32 a, TCGv_i32 b)
-{
- TCGv_i32 t1, t2;
-
- t1 = tcg_temp_new_i32();
- t2 = tcg_temp_new_i32();
- tcg_gen_sari_i32(t1, a, 16);
- tcg_gen_sari_i32(t2, b, 16);
- tcg_gen_add_i32(t, t1, t2);
-}
-
-static void gen_vaddwod_d_w(TCGv_i64 t, TCGv_i64 a, TCGv_i64 b)
-{
- TCGv_i64 t1, t2;
-
- t1 = tcg_temp_new_i64();
- t2 = tcg_temp_new_i64();
- tcg_gen_sari_i64(t1, a, 32);
- tcg_gen_sari_i64(t2, b, 32);
- tcg_gen_add_i64(t, t1, t2);
-}
-
-static void gen_vaddwod_s(unsigned vece, TCGv_vec t, TCGv_vec a, TCGv_vec b)
-{
- TCGv_vec t1, t2;
-
- int halfbits = 4 << vece;
-
- t1 = tcg_temp_new_vec_matching(a);
- t2 = tcg_temp_new_vec_matching(b);
-
- /* Sign-extend the odd elements for vector */
- tcg_gen_sari_vec(vece, t1, a, halfbits);
- tcg_gen_sari_vec(vece, t2, b, halfbits);
-
- tcg_gen_add_vec(vece, t, t1, t2);
-}
-
-static void do_vaddwod_s(unsigned vece, uint32_t vd_ofs, uint32_t vj_ofs,
- uint32_t vk_ofs, uint32_t oprsz, uint32_t maxsz)
-{
- static const TCGOpcode vecop_list[] = {
- INDEX_op_sari_vec, INDEX_op_add_vec, 0
- };
- static const GVecGen3 op[4] = {
- {
- .fniv = gen_vaddwod_s,
- .fno = gen_helper_vaddwod_h_b,
- .opt_opc = vecop_list,
- .vece = MO_16
- },
- {
- .fni4 = gen_vaddwod_w_h,
- .fniv = gen_vaddwod_s,
- .fno = gen_helper_vaddwod_w_h,
- .opt_opc = vecop_list,
- .vece = MO_32
- },
- {
- .fni8 = gen_vaddwod_d_w,
- .fniv = gen_vaddwod_s,
- .fno = gen_helper_vaddwod_d_w,
- .opt_opc = vecop_list,
- .vece = MO_64
- },
- {
- .fno = gen_helper_vaddwod_q_d,
- .vece = MO_128
- },
- };
-
- tcg_gen_gvec_3(vd_ofs, vj_ofs, vk_ofs, oprsz, maxsz, &op[vece]);
-}
-
-TRANS(vaddwod_h_b, LSX, gvec_vvv, MO_8, do_vaddwod_s)
-TRANS(vaddwod_w_h, LSX, gvec_vvv, MO_16, do_vaddwod_s)
-TRANS(vaddwod_d_w, LSX, gvec_vvv, MO_32, do_vaddwod_s)
-TRANS(vaddwod_q_d, LSX, gvec_vvv, MO_64, do_vaddwod_s)
-TRANS(xvaddwod_h_b, LASX, gvec_xxx, MO_8, do_vaddwod_s)
-TRANS(xvaddwod_w_h, LASX, gvec_xxx, MO_16, do_vaddwod_s)
-TRANS(xvaddwod_d_w, LASX, gvec_xxx, MO_32, do_vaddwod_s)
-TRANS(xvaddwod_q_d, LASX, gvec_xxx, MO_64, do_vaddwod_s)
-
-
-static void gen_vsubwev_s(unsigned vece, TCGv_vec t, TCGv_vec a, TCGv_vec b)
-{
- TCGv_vec t1, t2;
-
- int halfbits = 4 << vece;
-
- t1 = tcg_temp_new_vec_matching(a);
- t2 = tcg_temp_new_vec_matching(b);
-
- /* Sign-extend the even elements from a */
- tcg_gen_shli_vec(vece, t1, a, halfbits);
- tcg_gen_sari_vec(vece, t1, t1, halfbits);
-
- /* Sign-extend the even elements from b */
- tcg_gen_shli_vec(vece, t2, b, halfbits);
- tcg_gen_sari_vec(vece, t2, t2, halfbits);
-
- tcg_gen_sub_vec(vece, t, t1, t2);
-}
-
-static void gen_vsubwev_w_h(TCGv_i32 t, TCGv_i32 a, TCGv_i32 b)
-{
- TCGv_i32 t1, t2;
-
- t1 = tcg_temp_new_i32();
- t2 = tcg_temp_new_i32();
- tcg_gen_ext16s_i32(t1, a);
- tcg_gen_ext16s_i32(t2, b);
- tcg_gen_sub_i32(t, t1, t2);
-}
-
-static void gen_vsubwev_d_w(TCGv_i64 t, TCGv_i64 a, TCGv_i64 b)
-{
- TCGv_i64 t1, t2;
-
- t1 = tcg_temp_new_i64();
- t2 = tcg_temp_new_i64();
- tcg_gen_ext32s_i64(t1, a);
- tcg_gen_ext32s_i64(t2, b);
- tcg_gen_sub_i64(t, t1, t2);
-}
-
-static void do_vsubwev_s(unsigned vece, uint32_t vd_ofs, uint32_t vj_ofs,
- uint32_t vk_ofs, uint32_t oprsz, uint32_t maxsz)
-{
- static const TCGOpcode vecop_list[] = {
- INDEX_op_shli_vec, INDEX_op_sari_vec, INDEX_op_sub_vec, 0
- };
- static const GVecGen3 op[4] = {
- {
- .fniv = gen_vsubwev_s,
- .fno = gen_helper_vsubwev_h_b,
- .opt_opc = vecop_list,
- .vece = MO_16
- },
- {
- .fni4 = gen_vsubwev_w_h,
- .fniv = gen_vsubwev_s,
- .fno = gen_helper_vsubwev_w_h,
- .opt_opc = vecop_list,
- .vece = MO_32
- },
- {
- .fni8 = gen_vsubwev_d_w,
- .fniv = gen_vsubwev_s,
- .fno = gen_helper_vsubwev_d_w,
- .opt_opc = vecop_list,
- .vece = MO_64
- },
- {
- .fno = gen_helper_vsubwev_q_d,
- .vece = MO_128
- },
- };
-
- tcg_gen_gvec_3(vd_ofs, vj_ofs, vk_ofs, oprsz, maxsz, &op[vece]);
-}
-
-TRANS(vsubwev_h_b, LSX, gvec_vvv, MO_8, do_vsubwev_s)
-TRANS(vsubwev_w_h, LSX, gvec_vvv, MO_16, do_vsubwev_s)
-TRANS(vsubwev_d_w, LSX, gvec_vvv, MO_32, do_vsubwev_s)
-TRANS(vsubwev_q_d, LSX, gvec_vvv, MO_64, do_vsubwev_s)
-TRANS(xvsubwev_h_b, LASX, gvec_xxx, MO_8, do_vsubwev_s)
-TRANS(xvsubwev_w_h, LASX, gvec_xxx, MO_16, do_vsubwev_s)
-TRANS(xvsubwev_d_w, LASX, gvec_xxx, MO_32, do_vsubwev_s)
-TRANS(xvsubwev_q_d, LASX, gvec_xxx, MO_64, do_vsubwev_s)
-
-static void gen_vsubwod_s(unsigned vece, TCGv_vec t, TCGv_vec a, TCGv_vec b)
-{
- TCGv_vec t1, t2;
-
- int halfbits = 4 << vece;
-
- t1 = tcg_temp_new_vec_matching(a);
- t2 = tcg_temp_new_vec_matching(b);
-
- /* Sign-extend the odd elements for vector */
- tcg_gen_sari_vec(vece, t1, a, halfbits);
- tcg_gen_sari_vec(vece, t2, b, halfbits);
-
- tcg_gen_sub_vec(vece, t, t1, t2);
-}
-
-static void gen_vsubwod_w_h(TCGv_i32 t, TCGv_i32 a, TCGv_i32 b)
-{
- TCGv_i32 t1, t2;
-
- t1 = tcg_temp_new_i32();
- t2 = tcg_temp_new_i32();
- tcg_gen_sari_i32(t1, a, 16);
- tcg_gen_sari_i32(t2, b, 16);
- tcg_gen_sub_i32(t, t1, t2);
-}
-
-static void gen_vsubwod_d_w(TCGv_i64 t, TCGv_i64 a, TCGv_i64 b)
-{
- TCGv_i64 t1, t2;
-
- t1 = tcg_temp_new_i64();
- t2 = tcg_temp_new_i64();
- tcg_gen_sari_i64(t1, a, 32);
- tcg_gen_sari_i64(t2, b, 32);
- tcg_gen_sub_i64(t, t1, t2);
-}
-
-static void do_vsubwod_s(unsigned vece, uint32_t vd_ofs, uint32_t vj_ofs,
- uint32_t vk_ofs, uint32_t oprsz, uint32_t maxsz)
-{
- static const TCGOpcode vecop_list[] = {
- INDEX_op_sari_vec, INDEX_op_sub_vec, 0
- };
- static const GVecGen3 op[4] = {
- {
- .fniv = gen_vsubwod_s,
- .fno = gen_helper_vsubwod_h_b,
- .opt_opc = vecop_list,
- .vece = MO_16
- },
- {
- .fni4 = gen_vsubwod_w_h,
- .fniv = gen_vsubwod_s,
- .fno = gen_helper_vsubwod_w_h,
- .opt_opc = vecop_list,
- .vece = MO_32
- },
- {
- .fni8 = gen_vsubwod_d_w,
- .fniv = gen_vsubwod_s,
- .fno = gen_helper_vsubwod_d_w,
- .opt_opc = vecop_list,
- .vece = MO_64
- },
- {
- .fno = gen_helper_vsubwod_q_d,
- .vece = MO_128
- },
- };
-
- tcg_gen_gvec_3(vd_ofs, vj_ofs, vk_ofs, oprsz, maxsz, &op[vece]);
-}
-
-TRANS(vsubwod_h_b, LSX, gvec_vvv, MO_8, do_vsubwod_s)
-TRANS(vsubwod_w_h, LSX, gvec_vvv, MO_16, do_vsubwod_s)
-TRANS(vsubwod_d_w, LSX, gvec_vvv, MO_32, do_vsubwod_s)
-TRANS(vsubwod_q_d, LSX, gvec_vvv, MO_64, do_vsubwod_s)
-TRANS(xvsubwod_h_b, LASX, gvec_xxx, MO_8, do_vsubwod_s)
-TRANS(xvsubwod_w_h, LASX, gvec_xxx, MO_16, do_vsubwod_s)
-TRANS(xvsubwod_d_w, LASX, gvec_xxx, MO_32, do_vsubwod_s)
-TRANS(xvsubwod_q_d, LASX, gvec_xxx, MO_64, do_vsubwod_s)
-
-static void gen_vaddwev_u(unsigned vece, TCGv_vec t, TCGv_vec a, TCGv_vec b)
-{
- TCGv_vec t1, t2, t3;
-
- t1 = tcg_temp_new_vec_matching(a);
- t2 = tcg_temp_new_vec_matching(b);
- t3 = tcg_constant_vec_matching(t, vece, MAKE_64BIT_MASK(0, 4 << vece));
- tcg_gen_and_vec(vece, t1, a, t3);
- tcg_gen_and_vec(vece, t2, b, t3);
- tcg_gen_add_vec(vece, t, t1, t2);
-}
-
-static void gen_vaddwev_w_hu(TCGv_i32 t, TCGv_i32 a, TCGv_i32 b)
-{
- TCGv_i32 t1, t2;
-
- t1 = tcg_temp_new_i32();
- t2 = tcg_temp_new_i32();
- tcg_gen_ext16u_i32(t1, a);
- tcg_gen_ext16u_i32(t2, b);
- tcg_gen_add_i32(t, t1, t2);
-}
-
-static void gen_vaddwev_d_wu(TCGv_i64 t, TCGv_i64 a, TCGv_i64 b)
-{
- TCGv_i64 t1, t2;
-
- t1 = tcg_temp_new_i64();
- t2 = tcg_temp_new_i64();
- tcg_gen_ext32u_i64(t1, a);
- tcg_gen_ext32u_i64(t2, b);
- tcg_gen_add_i64(t, t1, t2);
-}
-
-static void do_vaddwev_u(unsigned vece, uint32_t vd_ofs, uint32_t vj_ofs,
- uint32_t vk_ofs, uint32_t oprsz, uint32_t maxsz)
-{
- static const TCGOpcode vecop_list[] = {
- INDEX_op_add_vec, 0
- };
- static const GVecGen3 op[4] = {
- {
- .fniv = gen_vaddwev_u,
- .fno = gen_helper_vaddwev_h_bu,
- .opt_opc = vecop_list,
- .vece = MO_16
- },
- {
- .fni4 = gen_vaddwev_w_hu,
- .fniv = gen_vaddwev_u,
- .fno = gen_helper_vaddwev_w_hu,
- .opt_opc = vecop_list,
- .vece = MO_32
- },
- {
- .fni8 = gen_vaddwev_d_wu,
- .fniv = gen_vaddwev_u,
- .fno = gen_helper_vaddwev_d_wu,
- .opt_opc = vecop_list,
- .vece = MO_64
- },
- {
- .fno = gen_helper_vaddwev_q_du,
- .vece = MO_128
- },
- };
-
- tcg_gen_gvec_3(vd_ofs, vj_ofs, vk_ofs, oprsz, maxsz, &op[vece]);
-}
-
-TRANS(vaddwev_h_bu, LSX, gvec_vvv, MO_8, do_vaddwev_u)
-TRANS(vaddwev_w_hu, LSX, gvec_vvv, MO_16, do_vaddwev_u)
-TRANS(vaddwev_d_wu, LSX, gvec_vvv, MO_32, do_vaddwev_u)
-TRANS(vaddwev_q_du, LSX, gvec_vvv, MO_64, do_vaddwev_u)
-TRANS(xvaddwev_h_bu, LASX, gvec_xxx, MO_8, do_vaddwev_u)
-TRANS(xvaddwev_w_hu, LASX, gvec_xxx, MO_16, do_vaddwev_u)
-TRANS(xvaddwev_d_wu, LASX, gvec_xxx, MO_32, do_vaddwev_u)
-TRANS(xvaddwev_q_du, LASX, gvec_xxx, MO_64, do_vaddwev_u)
-
-static void gen_vaddwod_u(unsigned vece, TCGv_vec t, TCGv_vec a, TCGv_vec b)
-{
- TCGv_vec t1, t2;
-
- int halfbits = 4 << vece;
-
- t1 = tcg_temp_new_vec_matching(a);
- t2 = tcg_temp_new_vec_matching(b);
-
- /* Zero-extend the odd elements for vector */
- tcg_gen_shri_vec(vece, t1, a, halfbits);
- tcg_gen_shri_vec(vece, t2, b, halfbits);
-
- tcg_gen_add_vec(vece, t, t1, t2);
-}
-
-static void gen_vaddwod_w_hu(TCGv_i32 t, TCGv_i32 a, TCGv_i32 b)
-{
- TCGv_i32 t1, t2;
-
- t1 = tcg_temp_new_i32();
- t2 = tcg_temp_new_i32();
- tcg_gen_shri_i32(t1, a, 16);
- tcg_gen_shri_i32(t2, b, 16);
- tcg_gen_add_i32(t, t1, t2);
-}
-
-static void gen_vaddwod_d_wu(TCGv_i64 t, TCGv_i64 a, TCGv_i64 b)
-{
- TCGv_i64 t1, t2;
-
- t1 = tcg_temp_new_i64();
- t2 = tcg_temp_new_i64();
- tcg_gen_shri_i64(t1, a, 32);
- tcg_gen_shri_i64(t2, b, 32);
- tcg_gen_add_i64(t, t1, t2);
-}
-
-static void do_vaddwod_u(unsigned vece, uint32_t vd_ofs, uint32_t vj_ofs,
- uint32_t vk_ofs, uint32_t oprsz, uint32_t maxsz)
-{
- static const TCGOpcode vecop_list[] = {
- INDEX_op_shri_vec, INDEX_op_add_vec, 0
- };
- static const GVecGen3 op[4] = {
- {
- .fniv = gen_vaddwod_u,
- .fno = gen_helper_vaddwod_h_bu,
- .opt_opc = vecop_list,
- .vece = MO_16
- },
- {
- .fni4 = gen_vaddwod_w_hu,
- .fniv = gen_vaddwod_u,
- .fno = gen_helper_vaddwod_w_hu,
- .opt_opc = vecop_list,
- .vece = MO_32
- },
- {
- .fni8 = gen_vaddwod_d_wu,
- .fniv = gen_vaddwod_u,
- .fno = gen_helper_vaddwod_d_wu,
- .opt_opc = vecop_list,
- .vece = MO_64
- },
- {
- .fno = gen_helper_vaddwod_q_du,
- .vece = MO_128
- },
- };
-
- tcg_gen_gvec_3(vd_ofs, vj_ofs, vk_ofs, oprsz, maxsz, &op[vece]);
-}
-
-TRANS(vaddwod_h_bu, LSX, gvec_vvv, MO_8, do_vaddwod_u)
-TRANS(vaddwod_w_hu, LSX, gvec_vvv, MO_16, do_vaddwod_u)
-TRANS(vaddwod_d_wu, LSX, gvec_vvv, MO_32, do_vaddwod_u)
-TRANS(vaddwod_q_du, LSX, gvec_vvv, MO_64, do_vaddwod_u)
-TRANS(xvaddwod_h_bu, LASX, gvec_xxx, MO_8, do_vaddwod_u)
-TRANS(xvaddwod_w_hu, LASX, gvec_xxx, MO_16, do_vaddwod_u)
-TRANS(xvaddwod_d_wu, LASX, gvec_xxx, MO_32, do_vaddwod_u)
-TRANS(xvaddwod_q_du, LASX, gvec_xxx, MO_64, do_vaddwod_u)
-
-static void gen_vsubwev_u(unsigned vece, TCGv_vec t, TCGv_vec a, TCGv_vec b)
-{
- TCGv_vec t1, t2, t3;
-
- t1 = tcg_temp_new_vec_matching(a);
- t2 = tcg_temp_new_vec_matching(b);
- t3 = tcg_constant_vec_matching(t, vece, MAKE_64BIT_MASK(0, 4 << vece));
- tcg_gen_and_vec(vece, t1, a, t3);
- tcg_gen_and_vec(vece, t2, b, t3);
- tcg_gen_sub_vec(vece, t, t1, t2);
-}
-
-static void gen_vsubwev_w_hu(TCGv_i32 t, TCGv_i32 a, TCGv_i32 b)
-{
- TCGv_i32 t1, t2;
-
- t1 = tcg_temp_new_i32();
- t2 = tcg_temp_new_i32();
- tcg_gen_ext16u_i32(t1, a);
- tcg_gen_ext16u_i32(t2, b);
- tcg_gen_sub_i32(t, t1, t2);
-}
-
-static void gen_vsubwev_d_wu(TCGv_i64 t, TCGv_i64 a, TCGv_i64 b)
-{
- TCGv_i64 t1, t2;
-
- t1 = tcg_temp_new_i64();
- t2 = tcg_temp_new_i64();
- tcg_gen_ext32u_i64(t1, a);
- tcg_gen_ext32u_i64(t2, b);
- tcg_gen_sub_i64(t, t1, t2);
-}
-
-static void do_vsubwev_u(unsigned vece, uint32_t vd_ofs, uint32_t vj_ofs,
- uint32_t vk_ofs, uint32_t oprsz, uint32_t maxsz)
-{
- static const TCGOpcode vecop_list[] = {
- INDEX_op_sub_vec, 0
- };
- static const GVecGen3 op[4] = {
- {
- .fniv = gen_vsubwev_u,
- .fno = gen_helper_vsubwev_h_bu,
- .opt_opc = vecop_list,
- .vece = MO_16
- },
- {
- .fni4 = gen_vsubwev_w_hu,
- .fniv = gen_vsubwev_u,
- .fno = gen_helper_vsubwev_w_hu,
- .opt_opc = vecop_list,
- .vece = MO_32
- },
- {
- .fni8 = gen_vsubwev_d_wu,
- .fniv = gen_vsubwev_u,
- .fno = gen_helper_vsubwev_d_wu,
- .opt_opc = vecop_list,
- .vece = MO_64
- },
- {
- .fno = gen_helper_vsubwev_q_du,
- .vece = MO_128
- },
- };
-
- tcg_gen_gvec_3(vd_ofs, vj_ofs, vk_ofs, oprsz, maxsz, &op[vece]);
-}
-
-TRANS(vsubwev_h_bu, LSX, gvec_vvv, MO_8, do_vsubwev_u)
-TRANS(vsubwev_w_hu, LSX, gvec_vvv, MO_16, do_vsubwev_u)
-TRANS(vsubwev_d_wu, LSX, gvec_vvv, MO_32, do_vsubwev_u)
-TRANS(vsubwev_q_du, LSX, gvec_vvv, MO_64, do_vsubwev_u)
-TRANS(xvsubwev_h_bu, LASX, gvec_xxx, MO_8, do_vsubwev_u)
-TRANS(xvsubwev_w_hu, LASX, gvec_xxx, MO_16, do_vsubwev_u)
-TRANS(xvsubwev_d_wu, LASX, gvec_xxx, MO_32, do_vsubwev_u)
-TRANS(xvsubwev_q_du, LASX, gvec_xxx, MO_64, do_vsubwev_u)
-
-static void gen_vsubwod_u(unsigned vece, TCGv_vec t, TCGv_vec a, TCGv_vec b)
-{
- TCGv_vec t1, t2;
-
- int halfbits = 4 << vece;
-
- t1 = tcg_temp_new_vec_matching(a);
- t2 = tcg_temp_new_vec_matching(b);
-
- /* Zero-extend the odd elements for vector */
- tcg_gen_shri_vec(vece, t1, a, halfbits);
- tcg_gen_shri_vec(vece, t2, b, halfbits);
-
- tcg_gen_sub_vec(vece, t, t1, t2);
-}
-
-static void gen_vsubwod_w_hu(TCGv_i32 t, TCGv_i32 a, TCGv_i32 b)
-{
- TCGv_i32 t1, t2;
-
- t1 = tcg_temp_new_i32();
- t2 = tcg_temp_new_i32();
- tcg_gen_shri_i32(t1, a, 16);
- tcg_gen_shri_i32(t2, b, 16);
- tcg_gen_sub_i32(t, t1, t2);
-}
-
-static void gen_vsubwod_d_wu(TCGv_i64 t, TCGv_i64 a, TCGv_i64 b)
-{
- TCGv_i64 t1, t2;
-
- t1 = tcg_temp_new_i64();
- t2 = tcg_temp_new_i64();
- tcg_gen_shri_i64(t1, a, 32);
- tcg_gen_shri_i64(t2, b, 32);
- tcg_gen_sub_i64(t, t1, t2);
-}
-
-static void do_vsubwod_u(unsigned vece, uint32_t vd_ofs, uint32_t vj_ofs,
- uint32_t vk_ofs, uint32_t oprsz, uint32_t maxsz)
-{
- static const TCGOpcode vecop_list[] = {
- INDEX_op_shri_vec, INDEX_op_sub_vec, 0
- };
- static const GVecGen3 op[4] = {
- {
- .fniv = gen_vsubwod_u,
- .fno = gen_helper_vsubwod_h_bu,
- .opt_opc = vecop_list,
- .vece = MO_16
- },
- {
- .fni4 = gen_vsubwod_w_hu,
- .fniv = gen_vsubwod_u,
- .fno = gen_helper_vsubwod_w_hu,
- .opt_opc = vecop_list,
- .vece = MO_32
- },
- {
- .fni8 = gen_vsubwod_d_wu,
- .fniv = gen_vsubwod_u,
- .fno = gen_helper_vsubwod_d_wu,
- .opt_opc = vecop_list,
- .vece = MO_64
- },
- {
- .fno = gen_helper_vsubwod_q_du,
- .vece = MO_128
- },
- };
-
- tcg_gen_gvec_3(vd_ofs, vj_ofs, vk_ofs, oprsz, maxsz, &op[vece]);
-}
-
-TRANS(vsubwod_h_bu, LSX, gvec_vvv, MO_8, do_vsubwod_u)
-TRANS(vsubwod_w_hu, LSX, gvec_vvv, MO_16, do_vsubwod_u)
-TRANS(vsubwod_d_wu, LSX, gvec_vvv, MO_32, do_vsubwod_u)
-TRANS(vsubwod_q_du, LSX, gvec_vvv, MO_64, do_vsubwod_u)
-TRANS(xvsubwod_h_bu, LASX, gvec_xxx, MO_8, do_vsubwod_u)
-TRANS(xvsubwod_w_hu, LASX, gvec_xxx, MO_16, do_vsubwod_u)
-TRANS(xvsubwod_d_wu, LASX, gvec_xxx, MO_32, do_vsubwod_u)
-TRANS(xvsubwod_q_du, LASX, gvec_xxx, MO_64, do_vsubwod_u)
-
-static void gen_vaddwev_u_s(unsigned vece, TCGv_vec t, TCGv_vec a, TCGv_vec b)
-{
- TCGv_vec t1, t2, t3;
-
- int halfbits = 4 << vece;
-
- t1 = tcg_temp_new_vec_matching(a);
- t2 = tcg_temp_new_vec_matching(b);
- t3 = tcg_constant_vec_matching(t, vece, MAKE_64BIT_MASK(0, halfbits));
-
- /* Zero-extend the even elements from a */
- tcg_gen_and_vec(vece, t1, a, t3);
-
- /* Sign-extend the even elements from b */
- tcg_gen_shli_vec(vece, t2, b, halfbits);
- tcg_gen_sari_vec(vece, t2, t2, halfbits);
-
- tcg_gen_add_vec(vece, t, t1, t2);
-}
-
-static void gen_vaddwev_w_hu_h(TCGv_i32 t, TCGv_i32 a, TCGv_i32 b)
-{
- TCGv_i32 t1, t2;
-
- t1 = tcg_temp_new_i32();
- t2 = tcg_temp_new_i32();
- tcg_gen_ext16u_i32(t1, a);
- tcg_gen_ext16s_i32(t2, b);
- tcg_gen_add_i32(t, t1, t2);
-}
-
-static void gen_vaddwev_d_wu_w(TCGv_i64 t, TCGv_i64 a, TCGv_i64 b)
-{
- TCGv_i64 t1, t2;
-
- t1 = tcg_temp_new_i64();
- t2 = tcg_temp_new_i64();
- tcg_gen_ext32u_i64(t1, a);
- tcg_gen_ext32s_i64(t2, b);
- tcg_gen_add_i64(t, t1, t2);
-}
-
-static void do_vaddwev_u_s(unsigned vece, uint32_t vd_ofs, uint32_t vj_ofs,
- uint32_t vk_ofs, uint32_t oprsz, uint32_t maxsz)
-{
- static const TCGOpcode vecop_list[] = {
- INDEX_op_shli_vec, INDEX_op_sari_vec, INDEX_op_add_vec, 0
- };
- static const GVecGen3 op[4] = {
- {
- .fniv = gen_vaddwev_u_s,
- .fno = gen_helper_vaddwev_h_bu_b,
- .opt_opc = vecop_list,
- .vece = MO_16
- },
- {
- .fni4 = gen_vaddwev_w_hu_h,
- .fniv = gen_vaddwev_u_s,
- .fno = gen_helper_vaddwev_w_hu_h,
- .opt_opc = vecop_list,
- .vece = MO_32
- },
- {
- .fni8 = gen_vaddwev_d_wu_w,
- .fniv = gen_vaddwev_u_s,
- .fno = gen_helper_vaddwev_d_wu_w,
- .opt_opc = vecop_list,
- .vece = MO_64
- },
- {
- .fno = gen_helper_vaddwev_q_du_d,
- .vece = MO_128
- },
- };
-
- tcg_gen_gvec_3(vd_ofs, vj_ofs, vk_ofs, oprsz, maxsz, &op[vece]);
-}
-
-TRANS(vaddwev_h_bu_b, LSX, gvec_vvv, MO_8, do_vaddwev_u_s)
-TRANS(vaddwev_w_hu_h, LSX, gvec_vvv, MO_16, do_vaddwev_u_s)
-TRANS(vaddwev_d_wu_w, LSX, gvec_vvv, MO_32, do_vaddwev_u_s)
-TRANS(vaddwev_q_du_d, LSX, gvec_vvv, MO_64, do_vaddwev_u_s)
-TRANS(xvaddwev_h_bu_b, LASX, gvec_xxx, MO_8, do_vaddwev_u_s)
-TRANS(xvaddwev_w_hu_h, LASX, gvec_xxx, MO_16, do_vaddwev_u_s)
-TRANS(xvaddwev_d_wu_w, LASX, gvec_xxx, MO_32, do_vaddwev_u_s)
-TRANS(xvaddwev_q_du_d, LASX, gvec_xxx, MO_64, do_vaddwev_u_s)
-
-static void gen_vaddwod_u_s(unsigned vece, TCGv_vec t, TCGv_vec a, TCGv_vec b)
-{
- TCGv_vec t1, t2;
-
- int halfbits = 4 << vece;
-
- t1 = tcg_temp_new_vec_matching(a);
- t2 = tcg_temp_new_vec_matching(b);
-
- /* Zero-extend the odd elements from a */
- tcg_gen_shri_vec(vece, t1, a, halfbits);
- /* Sign-extend the odd elements from b */
- tcg_gen_sari_vec(vece, t2, b, halfbits);
-
- tcg_gen_add_vec(vece, t, t1, t2);
-}
-
-static void gen_vaddwod_w_hu_h(TCGv_i32 t, TCGv_i32 a, TCGv_i32 b)
-{
- TCGv_i32 t1, t2;
-
- t1 = tcg_temp_new_i32();
- t2 = tcg_temp_new_i32();
- tcg_gen_shri_i32(t1, a, 16);
- tcg_gen_sari_i32(t2, b, 16);
- tcg_gen_add_i32(t, t1, t2);
-}
-
-static void gen_vaddwod_d_wu_w(TCGv_i64 t, TCGv_i64 a, TCGv_i64 b)
-{
- TCGv_i64 t1, t2;
-
- t1 = tcg_temp_new_i64();
- t2 = tcg_temp_new_i64();
- tcg_gen_shri_i64(t1, a, 32);
- tcg_gen_sari_i64(t2, b, 32);
- tcg_gen_add_i64(t, t1, t2);
-}
-
-static void do_vaddwod_u_s(unsigned vece, uint32_t vd_ofs, uint32_t vj_ofs,
- uint32_t vk_ofs, uint32_t oprsz, uint32_t maxsz)
-{
- static const TCGOpcode vecop_list[] = {
- INDEX_op_shri_vec, INDEX_op_sari_vec, INDEX_op_add_vec, 0
- };
- static const GVecGen3 op[4] = {
- {
- .fniv = gen_vaddwod_u_s,
- .fno = gen_helper_vaddwod_h_bu_b,
- .opt_opc = vecop_list,
- .vece = MO_16
- },
- {
- .fni4 = gen_vaddwod_w_hu_h,
- .fniv = gen_vaddwod_u_s,
- .fno = gen_helper_vaddwod_w_hu_h,
- .opt_opc = vecop_list,
- .vece = MO_32
- },
- {
- .fni8 = gen_vaddwod_d_wu_w,
- .fniv = gen_vaddwod_u_s,
- .fno = gen_helper_vaddwod_d_wu_w,
- .opt_opc = vecop_list,
- .vece = MO_64
- },
- {
- .fno = gen_helper_vaddwod_q_du_d,
- .vece = MO_128
- },
- };
-
- tcg_gen_gvec_3(vd_ofs, vj_ofs, vk_ofs, oprsz, maxsz, &op[vece]);
-}
-
-TRANS(vaddwod_h_bu_b, LSX, gvec_vvv, MO_8, do_vaddwod_u_s)
-TRANS(vaddwod_w_hu_h, LSX, gvec_vvv, MO_16, do_vaddwod_u_s)
-TRANS(vaddwod_d_wu_w, LSX, gvec_vvv, MO_32, do_vaddwod_u_s)
-TRANS(vaddwod_q_du_d, LSX, gvec_vvv, MO_64, do_vaddwod_u_s)
-TRANS(xvaddwod_h_bu_b, LSX, gvec_xxx, MO_8, do_vaddwod_u_s)
-TRANS(xvaddwod_w_hu_h, LSX, gvec_xxx, MO_16, do_vaddwod_u_s)
-TRANS(xvaddwod_d_wu_w, LSX, gvec_xxx, MO_32, do_vaddwod_u_s)
-TRANS(xvaddwod_q_du_d, LSX, gvec_xxx, MO_64, do_vaddwod_u_s)
-
-static void do_vavg(unsigned vece, TCGv_vec t, TCGv_vec a, TCGv_vec b,
- void (*gen_shr_vec)(unsigned, TCGv_vec,
- TCGv_vec, int64_t),
- void (*gen_round_vec)(unsigned, TCGv_vec,
- TCGv_vec, TCGv_vec))
-{
- TCGv_vec tmp = tcg_temp_new_vec_matching(t);
- gen_round_vec(vece, tmp, a, b);
- tcg_gen_and_vec(vece, tmp, tmp, tcg_constant_vec_matching(t, vece, 1));
- gen_shr_vec(vece, a, a, 1);
- gen_shr_vec(vece, b, b, 1);
- tcg_gen_add_vec(vece, t, a, b);
- tcg_gen_add_vec(vece, t, t, tmp);
-}
-
-static void gen_vavg_s(unsigned vece, TCGv_vec t, TCGv_vec a, TCGv_vec b)
-{
- do_vavg(vece, t, a, b, tcg_gen_sari_vec, tcg_gen_and_vec);
-}
-
-static void gen_vavg_u(unsigned vece, TCGv_vec t, TCGv_vec a, TCGv_vec b)
-{
- do_vavg(vece, t, a, b, tcg_gen_shri_vec, tcg_gen_and_vec);
-}
-
-static void gen_vavgr_s(unsigned vece, TCGv_vec t, TCGv_vec a, TCGv_vec b)
-{
- do_vavg(vece, t, a, b, tcg_gen_sari_vec, tcg_gen_or_vec);
-}
-
-static void gen_vavgr_u(unsigned vece, TCGv_vec t, TCGv_vec a, TCGv_vec b)
-{
- do_vavg(vece, t, a, b, tcg_gen_shri_vec, tcg_gen_or_vec);
-}
-
-static void do_vavg_s(unsigned vece, uint32_t vd_ofs, uint32_t vj_ofs,
- uint32_t vk_ofs, uint32_t oprsz, uint32_t maxsz)
-{
- static const TCGOpcode vecop_list[] = {
- INDEX_op_sari_vec, INDEX_op_add_vec, 0
- };
- static const GVecGen3 op[4] = {
- {
- .fniv = gen_vavg_s,
- .fno = gen_helper_vavg_b,
- .opt_opc = vecop_list,
- .vece = MO_8
- },
- {
- .fniv = gen_vavg_s,
- .fno = gen_helper_vavg_h,
- .opt_opc = vecop_list,
- .vece = MO_16
- },
- {
- .fniv = gen_vavg_s,
- .fno = gen_helper_vavg_w,
- .opt_opc = vecop_list,
- .vece = MO_32
- },
- {
- .fniv = gen_vavg_s,
- .fno = gen_helper_vavg_d,
- .opt_opc = vecop_list,
- .vece = MO_64
- },
- };
-
- tcg_gen_gvec_3(vd_ofs, vj_ofs, vk_ofs, oprsz, maxsz, &op[vece]);
-}
-
-static void do_vavg_u(unsigned vece, uint32_t vd_ofs, uint32_t vj_ofs,
- uint32_t vk_ofs, uint32_t oprsz, uint32_t maxsz)
-{
- static const TCGOpcode vecop_list[] = {
- INDEX_op_shri_vec, INDEX_op_add_vec, 0
- };
- static const GVecGen3 op[4] = {
- {
- .fniv = gen_vavg_u,
- .fno = gen_helper_vavg_bu,
- .opt_opc = vecop_list,
- .vece = MO_8
- },
- {
- .fniv = gen_vavg_u,
- .fno = gen_helper_vavg_hu,
- .opt_opc = vecop_list,
- .vece = MO_16
- },
- {
- .fniv = gen_vavg_u,
- .fno = gen_helper_vavg_wu,
- .opt_opc = vecop_list,
- .vece = MO_32
- },
- {
- .fniv = gen_vavg_u,
- .fno = gen_helper_vavg_du,
- .opt_opc = vecop_list,
- .vece = MO_64
- },
- };
-
- tcg_gen_gvec_3(vd_ofs, vj_ofs, vk_ofs, oprsz, maxsz, &op[vece]);
-}
-
-TRANS(vavg_b, LSX, gvec_vvv, MO_8, do_vavg_s)
-TRANS(vavg_h, LSX, gvec_vvv, MO_16, do_vavg_s)
-TRANS(vavg_w, LSX, gvec_vvv, MO_32, do_vavg_s)
-TRANS(vavg_d, LSX, gvec_vvv, MO_64, do_vavg_s)
-TRANS(vavg_bu, LSX, gvec_vvv, MO_8, do_vavg_u)
-TRANS(vavg_hu, LSX, gvec_vvv, MO_16, do_vavg_u)
-TRANS(vavg_wu, LSX, gvec_vvv, MO_32, do_vavg_u)
-TRANS(vavg_du, LSX, gvec_vvv, MO_64, do_vavg_u)
-TRANS(xvavg_b, LASX, gvec_xxx, MO_8, do_vavg_s)
-TRANS(xvavg_h, LASX, gvec_xxx, MO_16, do_vavg_s)
-TRANS(xvavg_w, LASX, gvec_xxx, MO_32, do_vavg_s)
-TRANS(xvavg_d, LASX, gvec_xxx, MO_64, do_vavg_s)
-TRANS(xvavg_bu, LASX, gvec_xxx, MO_8, do_vavg_u)
-TRANS(xvavg_hu, LASX, gvec_xxx, MO_16, do_vavg_u)
-TRANS(xvavg_wu, LASX, gvec_xxx, MO_32, do_vavg_u)
-TRANS(xvavg_du, LASX, gvec_xxx, MO_64, do_vavg_u)
-
-static void do_vavgr_s(unsigned vece, uint32_t vd_ofs, uint32_t vj_ofs,
- uint32_t vk_ofs, uint32_t oprsz, uint32_t maxsz)
-{
- static const TCGOpcode vecop_list[] = {
- INDEX_op_sari_vec, INDEX_op_add_vec, 0
- };
- static const GVecGen3 op[4] = {
- {
- .fniv = gen_vavgr_s,
- .fno = gen_helper_vavgr_b,
- .opt_opc = vecop_list,
- .vece = MO_8
- },
- {
- .fniv = gen_vavgr_s,
- .fno = gen_helper_vavgr_h,
- .opt_opc = vecop_list,
- .vece = MO_16
- },
- {
- .fniv = gen_vavgr_s,
- .fno = gen_helper_vavgr_w,
- .opt_opc = vecop_list,
- .vece = MO_32
- },
- {
- .fniv = gen_vavgr_s,
- .fno = gen_helper_vavgr_d,
- .opt_opc = vecop_list,
- .vece = MO_64
- },
- };
-
- tcg_gen_gvec_3(vd_ofs, vj_ofs, vk_ofs, oprsz, maxsz, &op[vece]);
-}
-
-static void do_vavgr_u(unsigned vece, uint32_t vd_ofs, uint32_t vj_ofs,
- uint32_t vk_ofs, uint32_t oprsz, uint32_t maxsz)
-{
- static const TCGOpcode vecop_list[] = {
- INDEX_op_shri_vec, INDEX_op_add_vec, 0
- };
- static const GVecGen3 op[4] = {
- {
- .fniv = gen_vavgr_u,
- .fno = gen_helper_vavgr_bu,
- .opt_opc = vecop_list,
- .vece = MO_8
- },
- {
- .fniv = gen_vavgr_u,
- .fno = gen_helper_vavgr_hu,
- .opt_opc = vecop_list,
- .vece = MO_16
- },
- {
- .fniv = gen_vavgr_u,
- .fno = gen_helper_vavgr_wu,
- .opt_opc = vecop_list,
- .vece = MO_32
- },
- {
- .fniv = gen_vavgr_u,
- .fno = gen_helper_vavgr_du,
- .opt_opc = vecop_list,
- .vece = MO_64
- },
- };
-
- tcg_gen_gvec_3(vd_ofs, vj_ofs, vk_ofs, oprsz, maxsz, &op[vece]);
-}
-
-TRANS(vavgr_b, LSX, gvec_vvv, MO_8, do_vavgr_s)
-TRANS(vavgr_h, LSX, gvec_vvv, MO_16, do_vavgr_s)
-TRANS(vavgr_w, LSX, gvec_vvv, MO_32, do_vavgr_s)
-TRANS(vavgr_d, LSX, gvec_vvv, MO_64, do_vavgr_s)
-TRANS(vavgr_bu, LSX, gvec_vvv, MO_8, do_vavgr_u)
-TRANS(vavgr_hu, LSX, gvec_vvv, MO_16, do_vavgr_u)
-TRANS(vavgr_wu, LSX, gvec_vvv, MO_32, do_vavgr_u)
-TRANS(vavgr_du, LSX, gvec_vvv, MO_64, do_vavgr_u)
-TRANS(xvavgr_b, LASX, gvec_xxx, MO_8, do_vavgr_s)
-TRANS(xvavgr_h, LASX, gvec_xxx, MO_16, do_vavgr_s)
-TRANS(xvavgr_w, LASX, gvec_xxx, MO_32, do_vavgr_s)
-TRANS(xvavgr_d, LASX, gvec_xxx, MO_64, do_vavgr_s)
-TRANS(xvavgr_bu, LASX, gvec_xxx, MO_8, do_vavgr_u)
-TRANS(xvavgr_hu, LASX, gvec_xxx, MO_16, do_vavgr_u)
-TRANS(xvavgr_wu, LASX, gvec_xxx, MO_32, do_vavgr_u)
-TRANS(xvavgr_du, LASX, gvec_xxx, MO_64, do_vavgr_u)
-
-static void gen_vabsd_s(unsigned vece, TCGv_vec t, TCGv_vec a, TCGv_vec b)
-{
- tcg_gen_smax_vec(vece, t, a, b);
- tcg_gen_smin_vec(vece, a, a, b);
- tcg_gen_sub_vec(vece, t, t, a);
-}
-
-static void do_vabsd_s(unsigned vece, uint32_t vd_ofs, uint32_t vj_ofs,
- uint32_t vk_ofs, uint32_t oprsz, uint32_t maxsz)
-{
- static const TCGOpcode vecop_list[] = {
- INDEX_op_smax_vec, INDEX_op_smin_vec, INDEX_op_sub_vec, 0
- };
- static const GVecGen3 op[4] = {
- {
- .fniv = gen_vabsd_s,
- .fno = gen_helper_vabsd_b,
- .opt_opc = vecop_list,
- .vece = MO_8
- },
- {
- .fniv = gen_vabsd_s,
- .fno = gen_helper_vabsd_h,
- .opt_opc = vecop_list,
- .vece = MO_16
- },
- {
- .fniv = gen_vabsd_s,
- .fno = gen_helper_vabsd_w,
- .opt_opc = vecop_list,
- .vece = MO_32
- },
- {
- .fniv = gen_vabsd_s,
- .fno = gen_helper_vabsd_d,
- .opt_opc = vecop_list,
- .vece = MO_64
- },
- };
-
- tcg_gen_gvec_3(vd_ofs, vj_ofs, vk_ofs, oprsz, maxsz, &op[vece]);
-}
-
-static void gen_vabsd_u(unsigned vece, TCGv_vec t, TCGv_vec a, TCGv_vec b)
-{
- tcg_gen_umax_vec(vece, t, a, b);
- tcg_gen_umin_vec(vece, a, a, b);
- tcg_gen_sub_vec(vece, t, t, a);
-}
-
-static void do_vabsd_u(unsigned vece, uint32_t vd_ofs, uint32_t vj_ofs,
- uint32_t vk_ofs, uint32_t oprsz, uint32_t maxsz)
-{
- static const TCGOpcode vecop_list[] = {
- INDEX_op_umax_vec, INDEX_op_umin_vec, INDEX_op_sub_vec, 0
- };
- static const GVecGen3 op[4] = {
- {
- .fniv = gen_vabsd_u,
- .fno = gen_helper_vabsd_bu,
- .opt_opc = vecop_list,
- .vece = MO_8
- },
- {
- .fniv = gen_vabsd_u,
- .fno = gen_helper_vabsd_hu,
- .opt_opc = vecop_list,
- .vece = MO_16
- },
- {
- .fniv = gen_vabsd_u,
- .fno = gen_helper_vabsd_wu,
- .opt_opc = vecop_list,
- .vece = MO_32
- },
- {
- .fniv = gen_vabsd_u,
- .fno = gen_helper_vabsd_du,
- .opt_opc = vecop_list,
- .vece = MO_64
- },
- };
-
- tcg_gen_gvec_3(vd_ofs, vj_ofs, vk_ofs, oprsz, maxsz, &op[vece]);
-}
-
-TRANS(vabsd_b, LSX, gvec_vvv, MO_8, do_vabsd_s)
-TRANS(vabsd_h, LSX, gvec_vvv, MO_16, do_vabsd_s)
-TRANS(vabsd_w, LSX, gvec_vvv, MO_32, do_vabsd_s)
-TRANS(vabsd_d, LSX, gvec_vvv, MO_64, do_vabsd_s)
-TRANS(vabsd_bu, LSX, gvec_vvv, MO_8, do_vabsd_u)
-TRANS(vabsd_hu, LSX, gvec_vvv, MO_16, do_vabsd_u)
-TRANS(vabsd_wu, LSX, gvec_vvv, MO_32, do_vabsd_u)
-TRANS(vabsd_du, LSX, gvec_vvv, MO_64, do_vabsd_u)
-TRANS(xvabsd_b, LASX, gvec_xxx, MO_8, do_vabsd_s)
-TRANS(xvabsd_h, LASX, gvec_xxx, MO_16, do_vabsd_s)
-TRANS(xvabsd_w, LASX, gvec_xxx, MO_32, do_vabsd_s)
-TRANS(xvabsd_d, LASX, gvec_xxx, MO_64, do_vabsd_s)
-TRANS(xvabsd_bu, LASX, gvec_xxx, MO_8, do_vabsd_u)
-TRANS(xvabsd_hu, LASX, gvec_xxx, MO_16, do_vabsd_u)
-TRANS(xvabsd_wu, LASX, gvec_xxx, MO_32, do_vabsd_u)
-TRANS(xvabsd_du, LASX, gvec_xxx, MO_64, do_vabsd_u)
-
-static void gen_vadda(unsigned vece, TCGv_vec t, TCGv_vec a, TCGv_vec b)
-{
- TCGv_vec t1, t2;
-
- t1 = tcg_temp_new_vec_matching(a);
- t2 = tcg_temp_new_vec_matching(b);
-
- tcg_gen_abs_vec(vece, t1, a);
- tcg_gen_abs_vec(vece, t2, b);
- tcg_gen_add_vec(vece, t, t1, t2);
-}
-
-static void do_vadda(unsigned vece, uint32_t vd_ofs, uint32_t vj_ofs,
- uint32_t vk_ofs, uint32_t oprsz, uint32_t maxsz)
-{
- static const TCGOpcode vecop_list[] = {
- INDEX_op_abs_vec, INDEX_op_add_vec, 0
- };
- static const GVecGen3 op[4] = {
- {
- .fniv = gen_vadda,
- .fno = gen_helper_vadda_b,
- .opt_opc = vecop_list,
- .vece = MO_8
- },
- {
- .fniv = gen_vadda,
- .fno = gen_helper_vadda_h,
- .opt_opc = vecop_list,
- .vece = MO_16
- },
- {
- .fniv = gen_vadda,
- .fno = gen_helper_vadda_w,
- .opt_opc = vecop_list,
- .vece = MO_32
- },
- {
- .fniv = gen_vadda,
- .fno = gen_helper_vadda_d,
- .opt_opc = vecop_list,
- .vece = MO_64
- },
- };
-
- tcg_gen_gvec_3(vd_ofs, vj_ofs, vk_ofs, oprsz, maxsz, &op[vece]);
-}
-
-TRANS(vadda_b, LSX, gvec_vvv, MO_8, do_vadda)
-TRANS(vadda_h, LSX, gvec_vvv, MO_16, do_vadda)
-TRANS(vadda_w, LSX, gvec_vvv, MO_32, do_vadda)
-TRANS(vadda_d, LSX, gvec_vvv, MO_64, do_vadda)
-TRANS(xvadda_b, LASX, gvec_xxx, MO_8, do_vadda)
-TRANS(xvadda_h, LASX, gvec_xxx, MO_16, do_vadda)
-TRANS(xvadda_w, LASX, gvec_xxx, MO_32, do_vadda)
-TRANS(xvadda_d, LASX, gvec_xxx, MO_64, do_vadda)
-
-TRANS(vmax_b, LSX, gvec_vvv, MO_8, tcg_gen_gvec_smax)
-TRANS(vmax_h, LSX, gvec_vvv, MO_16, tcg_gen_gvec_smax)
-TRANS(vmax_w, LSX, gvec_vvv, MO_32, tcg_gen_gvec_smax)
-TRANS(vmax_d, LSX, gvec_vvv, MO_64, tcg_gen_gvec_smax)
-TRANS(vmax_bu, LSX, gvec_vvv, MO_8, tcg_gen_gvec_umax)
-TRANS(vmax_hu, LSX, gvec_vvv, MO_16, tcg_gen_gvec_umax)
-TRANS(vmax_wu, LSX, gvec_vvv, MO_32, tcg_gen_gvec_umax)
-TRANS(vmax_du, LSX, gvec_vvv, MO_64, tcg_gen_gvec_umax)
-TRANS(xvmax_b, LASX, gvec_xxx, MO_8, tcg_gen_gvec_smax)
-TRANS(xvmax_h, LASX, gvec_xxx, MO_16, tcg_gen_gvec_smax)
-TRANS(xvmax_w, LASX, gvec_xxx, MO_32, tcg_gen_gvec_smax)
-TRANS(xvmax_d, LASX, gvec_xxx, MO_64, tcg_gen_gvec_smax)
-TRANS(xvmax_bu, LASX, gvec_xxx, MO_8, tcg_gen_gvec_umax)
-TRANS(xvmax_hu, LASX, gvec_xxx, MO_16, tcg_gen_gvec_umax)
-TRANS(xvmax_wu, LASX, gvec_xxx, MO_32, tcg_gen_gvec_umax)
-TRANS(xvmax_du, LASX, gvec_xxx, MO_64, tcg_gen_gvec_umax)
-
-TRANS(vmin_b, LSX, gvec_vvv, MO_8, tcg_gen_gvec_smin)
-TRANS(vmin_h, LSX, gvec_vvv, MO_16, tcg_gen_gvec_smin)
-TRANS(vmin_w, LSX, gvec_vvv, MO_32, tcg_gen_gvec_smin)
-TRANS(vmin_d, LSX, gvec_vvv, MO_64, tcg_gen_gvec_smin)
-TRANS(vmin_bu, LSX, gvec_vvv, MO_8, tcg_gen_gvec_umin)
-TRANS(vmin_hu, LSX, gvec_vvv, MO_16, tcg_gen_gvec_umin)
-TRANS(vmin_wu, LSX, gvec_vvv, MO_32, tcg_gen_gvec_umin)
-TRANS(vmin_du, LSX, gvec_vvv, MO_64, tcg_gen_gvec_umin)
-TRANS(xvmin_b, LASX, gvec_xxx, MO_8, tcg_gen_gvec_smin)
-TRANS(xvmin_h, LASX, gvec_xxx, MO_16, tcg_gen_gvec_smin)
-TRANS(xvmin_w, LASX, gvec_xxx, MO_32, tcg_gen_gvec_smin)
-TRANS(xvmin_d, LASX, gvec_xxx, MO_64, tcg_gen_gvec_smin)
-TRANS(xvmin_bu, LASX, gvec_xxx, MO_8, tcg_gen_gvec_umin)
-TRANS(xvmin_hu, LASX, gvec_xxx, MO_16, tcg_gen_gvec_umin)
-TRANS(xvmin_wu, LASX, gvec_xxx, MO_32, tcg_gen_gvec_umin)
-TRANS(xvmin_du, LASX, gvec_xxx, MO_64, tcg_gen_gvec_umin)
-
-static void gen_vmini_s(unsigned vece, TCGv_vec t, TCGv_vec a, int64_t imm)
-{
- tcg_gen_smin_vec(vece, t, a, tcg_constant_vec_matching(t, vece, imm));
-}
-
-static void gen_vmini_u(unsigned vece, TCGv_vec t, TCGv_vec a, int64_t imm)
-{
- tcg_gen_umin_vec(vece, t, a, tcg_constant_vec_matching(t, vece, imm));
-}
-
-static void gen_vmaxi_s(unsigned vece, TCGv_vec t, TCGv_vec a, int64_t imm)
-{
- tcg_gen_smax_vec(vece, t, a, tcg_constant_vec_matching(t, vece, imm));
-}
-
-static void gen_vmaxi_u(unsigned vece, TCGv_vec t, TCGv_vec a, int64_t imm)
-{
- tcg_gen_umax_vec(vece, t, a, tcg_constant_vec_matching(t, vece, imm));
-}
-
-static void do_vmini_s(unsigned vece, uint32_t vd_ofs, uint32_t vj_ofs,
- int64_t imm, uint32_t oprsz, uint32_t maxsz)
-{
- static const TCGOpcode vecop_list[] = {
- INDEX_op_smin_vec, 0
- };
- static const GVecGen2i op[4] = {
- {
- .fniv = gen_vmini_s,
- .fnoi = gen_helper_vmini_b,
- .opt_opc = vecop_list,
- .vece = MO_8
- },
- {
- .fniv = gen_vmini_s,
- .fnoi = gen_helper_vmini_h,
- .opt_opc = vecop_list,
- .vece = MO_16
- },
- {
- .fniv = gen_vmini_s,
- .fnoi = gen_helper_vmini_w,
- .opt_opc = vecop_list,
- .vece = MO_32
- },
- {
- .fniv = gen_vmini_s,
- .fnoi = gen_helper_vmini_d,
- .opt_opc = vecop_list,
- .vece = MO_64
- },
- };
-
- tcg_gen_gvec_2i(vd_ofs, vj_ofs, oprsz, maxsz, imm, &op[vece]);
-}
-
-static void do_vmini_u(unsigned vece, uint32_t vd_ofs, uint32_t vj_ofs,
- int64_t imm, uint32_t oprsz, uint32_t maxsz)
-{
- static const TCGOpcode vecop_list[] = {
- INDEX_op_umin_vec, 0
- };
- static const GVecGen2i op[4] = {
- {
- .fniv = gen_vmini_u,
- .fnoi = gen_helper_vmini_bu,
- .opt_opc = vecop_list,
- .vece = MO_8
- },
- {
- .fniv = gen_vmini_u,
- .fnoi = gen_helper_vmini_hu,
- .opt_opc = vecop_list,
- .vece = MO_16
- },
- {
- .fniv = gen_vmini_u,
- .fnoi = gen_helper_vmini_wu,
- .opt_opc = vecop_list,
- .vece = MO_32
- },
- {
- .fniv = gen_vmini_u,
- .fnoi = gen_helper_vmini_du,
- .opt_opc = vecop_list,
- .vece = MO_64
- },
- };
-
- tcg_gen_gvec_2i(vd_ofs, vj_ofs, oprsz, maxsz, imm, &op[vece]);
-}
-
-TRANS(vmini_b, LSX, gvec_vv_i, MO_8, do_vmini_s)
-TRANS(vmini_h, LSX, gvec_vv_i, MO_16, do_vmini_s)
-TRANS(vmini_w, LSX, gvec_vv_i, MO_32, do_vmini_s)
-TRANS(vmini_d, LSX, gvec_vv_i, MO_64, do_vmini_s)
-TRANS(vmini_bu, LSX, gvec_vv_i, MO_8, do_vmini_u)
-TRANS(vmini_hu, LSX, gvec_vv_i, MO_16, do_vmini_u)
-TRANS(vmini_wu, LSX, gvec_vv_i, MO_32, do_vmini_u)
-TRANS(vmini_du, LSX, gvec_vv_i, MO_64, do_vmini_u)
-TRANS(xvmini_b, LASX, gvec_xx_i, MO_8, do_vmini_s)
-TRANS(xvmini_h, LASX, gvec_xx_i, MO_16, do_vmini_s)
-TRANS(xvmini_w, LASX, gvec_xx_i, MO_32, do_vmini_s)
-TRANS(xvmini_d, LASX, gvec_xx_i, MO_64, do_vmini_s)
-TRANS(xvmini_bu, LASX, gvec_xx_i, MO_8, do_vmini_u)
-TRANS(xvmini_hu, LASX, gvec_xx_i, MO_16, do_vmini_u)
-TRANS(xvmini_wu, LASX, gvec_xx_i, MO_32, do_vmini_u)
-TRANS(xvmini_du, LASX, gvec_xx_i, MO_64, do_vmini_u)
-
-static void do_vmaxi_s(unsigned vece, uint32_t vd_ofs, uint32_t vj_ofs,
- int64_t imm, uint32_t oprsz, uint32_t maxsz)
-{
- static const TCGOpcode vecop_list[] = {
- INDEX_op_smax_vec, 0
- };
- static const GVecGen2i op[4] = {
- {
- .fniv = gen_vmaxi_s,
- .fnoi = gen_helper_vmaxi_b,
- .opt_opc = vecop_list,
- .vece = MO_8
- },
- {
- .fniv = gen_vmaxi_s,
- .fnoi = gen_helper_vmaxi_h,
- .opt_opc = vecop_list,
- .vece = MO_16
- },
- {
- .fniv = gen_vmaxi_s,
- .fnoi = gen_helper_vmaxi_w,
- .opt_opc = vecop_list,
- .vece = MO_32
- },
- {
- .fniv = gen_vmaxi_s,
- .fnoi = gen_helper_vmaxi_d,
- .opt_opc = vecop_list,
- .vece = MO_64
- },
- };
-
- tcg_gen_gvec_2i(vd_ofs, vj_ofs, oprsz, maxsz, imm, &op[vece]);
-}
-
-static void do_vmaxi_u(unsigned vece, uint32_t vd_ofs, uint32_t vj_ofs,
- int64_t imm, uint32_t oprsz, uint32_t maxsz)
-{
- static const TCGOpcode vecop_list[] = {
- INDEX_op_umax_vec, 0
- };
- static const GVecGen2i op[4] = {
- {
- .fniv = gen_vmaxi_u,
- .fnoi = gen_helper_vmaxi_bu,
- .opt_opc = vecop_list,
- .vece = MO_8
- },
- {
- .fniv = gen_vmaxi_u,
- .fnoi = gen_helper_vmaxi_hu,
- .opt_opc = vecop_list,
- .vece = MO_16
- },
- {
- .fniv = gen_vmaxi_u,
- .fnoi = gen_helper_vmaxi_wu,
- .opt_opc = vecop_list,
- .vece = MO_32
- },
- {
- .fniv = gen_vmaxi_u,
- .fnoi = gen_helper_vmaxi_du,
- .opt_opc = vecop_list,
- .vece = MO_64
- },
- };
-
- tcg_gen_gvec_2i(vd_ofs, vj_ofs, oprsz, maxsz, imm, &op[vece]);
-}
-
-TRANS(vmaxi_b, LSX, gvec_vv_i, MO_8, do_vmaxi_s)
-TRANS(vmaxi_h, LSX, gvec_vv_i, MO_16, do_vmaxi_s)
-TRANS(vmaxi_w, LSX, gvec_vv_i, MO_32, do_vmaxi_s)
-TRANS(vmaxi_d, LSX, gvec_vv_i, MO_64, do_vmaxi_s)
-TRANS(vmaxi_bu, LSX, gvec_vv_i, MO_8, do_vmaxi_u)
-TRANS(vmaxi_hu, LSX, gvec_vv_i, MO_16, do_vmaxi_u)
-TRANS(vmaxi_wu, LSX, gvec_vv_i, MO_32, do_vmaxi_u)
-TRANS(vmaxi_du, LSX, gvec_vv_i, MO_64, do_vmaxi_u)
-TRANS(xvmaxi_b, LASX, gvec_xx_i, MO_8, do_vmaxi_s)
-TRANS(xvmaxi_h, LASX, gvec_xx_i, MO_16, do_vmaxi_s)
-TRANS(xvmaxi_w, LASX, gvec_xx_i, MO_32, do_vmaxi_s)
-TRANS(xvmaxi_d, LASX, gvec_xx_i, MO_64, do_vmaxi_s)
-TRANS(xvmaxi_bu, LASX, gvec_xx_i, MO_8, do_vmaxi_u)
-TRANS(xvmaxi_hu, LASX, gvec_xx_i, MO_16, do_vmaxi_u)
-TRANS(xvmaxi_wu, LASX, gvec_xx_i, MO_32, do_vmaxi_u)
-TRANS(xvmaxi_du, LASX, gvec_xx_i, MO_64, do_vmaxi_u)
-
-TRANS(vmul_b, LSX, gvec_vvv, MO_8, tcg_gen_gvec_mul)
-TRANS(vmul_h, LSX, gvec_vvv, MO_16, tcg_gen_gvec_mul)
-TRANS(vmul_w, LSX, gvec_vvv, MO_32, tcg_gen_gvec_mul)
-TRANS(vmul_d, LSX, gvec_vvv, MO_64, tcg_gen_gvec_mul)
-TRANS(xvmul_b, LASX, gvec_xxx, MO_8, tcg_gen_gvec_mul)
-TRANS(xvmul_h, LASX, gvec_xxx, MO_16, tcg_gen_gvec_mul)
-TRANS(xvmul_w, LASX, gvec_xxx, MO_32, tcg_gen_gvec_mul)
-TRANS(xvmul_d, LASX, gvec_xxx, MO_64, tcg_gen_gvec_mul)
-
-static void gen_vmuh_w(TCGv_i32 t, TCGv_i32 a, TCGv_i32 b)
-{
- TCGv_i32 discard = tcg_temp_new_i32();
- tcg_gen_muls2_i32(discard, t, a, b);
-}
-
-static void gen_vmuh_d(TCGv_i64 t, TCGv_i64 a, TCGv_i64 b)
-{
- TCGv_i64 discard = tcg_temp_new_i64();
- tcg_gen_muls2_i64(discard, t, a, b);
-}
-
-static void do_vmuh_s(unsigned vece, uint32_t vd_ofs, uint32_t vj_ofs,
- uint32_t vk_ofs, uint32_t oprsz, uint32_t maxsz)
-{
- static const GVecGen3 op[4] = {
- {
- .fno = gen_helper_vmuh_b,
- .vece = MO_8
- },
- {
- .fno = gen_helper_vmuh_h,
- .vece = MO_16
- },
- {
- .fni4 = gen_vmuh_w,
- .fno = gen_helper_vmuh_w,
- .vece = MO_32
- },
- {
- .fni8 = gen_vmuh_d,
- .fno = gen_helper_vmuh_d,
- .vece = MO_64
- },
- };
-
- tcg_gen_gvec_3(vd_ofs, vj_ofs, vk_ofs, oprsz, maxsz, &op[vece]);
-}
-
-TRANS(vmuh_b, LSX, gvec_vvv, MO_8, do_vmuh_s)
-TRANS(vmuh_h, LSX, gvec_vvv, MO_16, do_vmuh_s)
-TRANS(vmuh_w, LSX, gvec_vvv, MO_32, do_vmuh_s)
-TRANS(vmuh_d, LSX, gvec_vvv, MO_64, do_vmuh_s)
-TRANS(xvmuh_b, LASX, gvec_xxx, MO_8, do_vmuh_s)
-TRANS(xvmuh_h, LASX, gvec_xxx, MO_16, do_vmuh_s)
-TRANS(xvmuh_w, LASX, gvec_xxx, MO_32, do_vmuh_s)
-TRANS(xvmuh_d, LASX, gvec_xxx, MO_64, do_vmuh_s)
-
-static void gen_vmuh_wu(TCGv_i32 t, TCGv_i32 a, TCGv_i32 b)
-{
- TCGv_i32 discard = tcg_temp_new_i32();
- tcg_gen_mulu2_i32(discard, t, a, b);
-}
-
-static void gen_vmuh_du(TCGv_i64 t, TCGv_i64 a, TCGv_i64 b)
-{
- TCGv_i64 discard = tcg_temp_new_i64();
- tcg_gen_mulu2_i64(discard, t, a, b);
-}
-
-static void do_vmuh_u(unsigned vece, uint32_t vd_ofs, uint32_t vj_ofs,
- uint32_t vk_ofs, uint32_t oprsz, uint32_t maxsz)
-{
- static const GVecGen3 op[4] = {
- {
- .fno = gen_helper_vmuh_bu,
- .vece = MO_8
- },
- {
- .fno = gen_helper_vmuh_hu,
- .vece = MO_16
- },
- {
- .fni4 = gen_vmuh_wu,
- .fno = gen_helper_vmuh_wu,
- .vece = MO_32
- },
- {
- .fni8 = gen_vmuh_du,
- .fno = gen_helper_vmuh_du,
- .vece = MO_64
- },
- };
-
- tcg_gen_gvec_3(vd_ofs, vj_ofs, vk_ofs, oprsz, maxsz, &op[vece]);
-}
-
-TRANS(vmuh_bu, LSX, gvec_vvv, MO_8, do_vmuh_u)
-TRANS(vmuh_hu, LSX, gvec_vvv, MO_16, do_vmuh_u)
-TRANS(vmuh_wu, LSX, gvec_vvv, MO_32, do_vmuh_u)
-TRANS(vmuh_du, LSX, gvec_vvv, MO_64, do_vmuh_u)
-TRANS(xvmuh_bu, LASX, gvec_xxx, MO_8, do_vmuh_u)
-TRANS(xvmuh_hu, LASX, gvec_xxx, MO_16, do_vmuh_u)
-TRANS(xvmuh_wu, LASX, gvec_xxx, MO_32, do_vmuh_u)
-TRANS(xvmuh_du, LASX, gvec_xxx, MO_64, do_vmuh_u)
-
-static void gen_vmulwev_s(unsigned vece, TCGv_vec t, TCGv_vec a, TCGv_vec b)
-{
- TCGv_vec t1, t2;
- int halfbits = 4 << vece;
-
- t1 = tcg_temp_new_vec_matching(a);
- t2 = tcg_temp_new_vec_matching(b);
- tcg_gen_shli_vec(vece, t1, a, halfbits);
- tcg_gen_sari_vec(vece, t1, t1, halfbits);
- tcg_gen_shli_vec(vece, t2, b, halfbits);
- tcg_gen_sari_vec(vece, t2, t2, halfbits);
- tcg_gen_mul_vec(vece, t, t1, t2);
-}
-
-static void gen_vmulwev_w_h(TCGv_i32 t, TCGv_i32 a, TCGv_i32 b)
-{
- TCGv_i32 t1, t2;
-
- t1 = tcg_temp_new_i32();
- t2 = tcg_temp_new_i32();
- tcg_gen_ext16s_i32(t1, a);
- tcg_gen_ext16s_i32(t2, b);
- tcg_gen_mul_i32(t, t1, t2);
-}
-
-static void gen_vmulwev_d_w(TCGv_i64 t, TCGv_i64 a, TCGv_i64 b)
-{
- TCGv_i64 t1, t2;
-
- t1 = tcg_temp_new_i64();
- t2 = tcg_temp_new_i64();
- tcg_gen_ext32s_i64(t1, a);
- tcg_gen_ext32s_i64(t2, b);
- tcg_gen_mul_i64(t, t1, t2);
-}
-
-static void do_vmulwev_s(unsigned vece, uint32_t vd_ofs, uint32_t vj_ofs,
- uint32_t vk_ofs, uint32_t oprsz, uint32_t maxsz)
-{
- static const TCGOpcode vecop_list[] = {
- INDEX_op_shli_vec, INDEX_op_sari_vec, INDEX_op_mul_vec, 0
- };
- static const GVecGen3 op[3] = {
- {
- .fniv = gen_vmulwev_s,
- .fno = gen_helper_vmulwev_h_b,
- .opt_opc = vecop_list,
- .vece = MO_16
- },
- {
- .fni4 = gen_vmulwev_w_h,
- .fniv = gen_vmulwev_s,
- .fno = gen_helper_vmulwev_w_h,
- .opt_opc = vecop_list,
- .vece = MO_32
- },
- {
- .fni8 = gen_vmulwev_d_w,
- .fniv = gen_vmulwev_s,
- .fno = gen_helper_vmulwev_d_w,
- .opt_opc = vecop_list,
- .vece = MO_64
- },
- };
-
- tcg_gen_gvec_3(vd_ofs, vj_ofs, vk_ofs, oprsz, maxsz, &op[vece]);
-}
-
-TRANS(vmulwev_h_b, LSX, gvec_vvv, MO_8, do_vmulwev_s)
-TRANS(vmulwev_w_h, LSX, gvec_vvv, MO_16, do_vmulwev_s)
-TRANS(vmulwev_d_w, LSX, gvec_vvv, MO_32, do_vmulwev_s)
-TRANS(xvmulwev_h_b, LASX, gvec_xxx, MO_8, do_vmulwev_s)
-TRANS(xvmulwev_w_h, LASX, gvec_xxx, MO_16, do_vmulwev_s)
-TRANS(xvmulwev_d_w, LASX, gvec_xxx, MO_32, do_vmulwev_s)
-
-static void tcg_gen_mulus2_i64(TCGv_i64 rl, TCGv_i64 rh,
- TCGv_i64 arg1, TCGv_i64 arg2)
-{
- tcg_gen_mulsu2_i64(rl, rh, arg2, arg1);
-}
-
-static bool gen_vmul_q_vl(DisasContext *ctx,
- arg_vvv *a, uint32_t oprsz, int idx1, int idx2,
- void (*func)(TCGv_i64, TCGv_i64,
- TCGv_i64, TCGv_i64))
-{
- TCGv_i64 rh, rl, arg1, arg2;
- int i;
-
- if (!check_vec(ctx, oprsz)) {
- return true;
- }
-
- rh = tcg_temp_new_i64();
- rl = tcg_temp_new_i64();
- arg1 = tcg_temp_new_i64();
- arg2 = tcg_temp_new_i64();
-
- for (i = 0; i < oprsz / 16; i++) {
- get_vreg64(arg1, a->vj, 2 * i + idx1);
- get_vreg64(arg2, a->vk, 2 * i + idx2);
-
- func(rl, rh, arg1, arg2);
-
- set_vreg64(rh, a->vd, 2 * i + 1);
- set_vreg64(rl, a->vd, 2 * i);
- }
-
- return true;
-}
-
-static bool gen_vmul_q(DisasContext *ctx, arg_vvv *a, int idx1, int idx2,
- void (*func)(TCGv_i64, TCGv_i64,
- TCGv_i64, TCGv_i64))
-{
- return gen_vmul_q_vl(ctx, a, 16, idx1, idx2, func);
-}
-
-static bool gen_xvmul_q(DisasContext *ctx, arg_vvv *a, int idx1, int idx2,
- void (*func)(TCGv_i64, TCGv_i64,
- TCGv_i64, TCGv_i64))
-{
- return gen_vmul_q_vl(ctx, a, 32, idx1, idx2, func);
-}
-
-TRANS(vmulwev_q_d, LSX, gen_vmul_q, 0, 0, tcg_gen_muls2_i64)
-TRANS(vmulwod_q_d, LSX, gen_vmul_q, 1, 1, tcg_gen_muls2_i64)
-TRANS(vmulwev_q_du, LSX, gen_vmul_q, 0, 0, tcg_gen_mulu2_i64)
-TRANS(vmulwod_q_du, LSX, gen_vmul_q, 1, 1, tcg_gen_mulu2_i64)
-TRANS(vmulwev_q_du_d, LSX, gen_vmul_q, 0, 0, tcg_gen_mulus2_i64)
-TRANS(vmulwod_q_du_d, LSX, gen_vmul_q, 1, 1, tcg_gen_mulus2_i64)
-TRANS(xvmulwev_q_d, LASX, gen_xvmul_q, 0, 0, tcg_gen_muls2_i64)
-TRANS(xvmulwod_q_d, LASX, gen_xvmul_q, 1, 1, tcg_gen_muls2_i64)
-TRANS(xvmulwev_q_du, LASX, gen_xvmul_q, 0, 0, tcg_gen_mulu2_i64)
-TRANS(xvmulwod_q_du, LASX, gen_xvmul_q, 1, 1, tcg_gen_mulu2_i64)
-TRANS(xvmulwev_q_du_d, LASX, gen_xvmul_q, 0, 0, tcg_gen_mulus2_i64)
-TRANS(xvmulwod_q_du_d, LASX, gen_xvmul_q, 1, 1, tcg_gen_mulus2_i64)
-
-static void gen_vmulwod_s(unsigned vece, TCGv_vec t, TCGv_vec a, TCGv_vec b)
-{
- TCGv_vec t1, t2;
- int halfbits = 4 << vece;
-
- t1 = tcg_temp_new_vec_matching(a);
- t2 = tcg_temp_new_vec_matching(b);
- tcg_gen_sari_vec(vece, t1, a, halfbits);
- tcg_gen_sari_vec(vece, t2, b, halfbits);
- tcg_gen_mul_vec(vece, t, t1, t2);
-}
-
-static void gen_vmulwod_w_h(TCGv_i32 t, TCGv_i32 a, TCGv_i32 b)
-{
- TCGv_i32 t1, t2;
-
- t1 = tcg_temp_new_i32();
- t2 = tcg_temp_new_i32();
- tcg_gen_sari_i32(t1, a, 16);
- tcg_gen_sari_i32(t2, b, 16);
- tcg_gen_mul_i32(t, t1, t2);
-}
-
-static void gen_vmulwod_d_w(TCGv_i64 t, TCGv_i64 a, TCGv_i64 b)
-{
- TCGv_i64 t1, t2;
-
- t1 = tcg_temp_new_i64();
- t2 = tcg_temp_new_i64();
- tcg_gen_sari_i64(t1, a, 32);
- tcg_gen_sari_i64(t2, b, 32);
- tcg_gen_mul_i64(t, t1, t2);
-}
-
-static void do_vmulwod_s(unsigned vece, uint32_t vd_ofs, uint32_t vj_ofs,
- uint32_t vk_ofs, uint32_t oprsz, uint32_t maxsz)
-{
- static const TCGOpcode vecop_list[] = {
- INDEX_op_sari_vec, INDEX_op_mul_vec, 0
- };
- static const GVecGen3 op[3] = {
- {
- .fniv = gen_vmulwod_s,
- .fno = gen_helper_vmulwod_h_b,
- .opt_opc = vecop_list,
- .vece = MO_16
- },
- {
- .fni4 = gen_vmulwod_w_h,
- .fniv = gen_vmulwod_s,
- .fno = gen_helper_vmulwod_w_h,
- .opt_opc = vecop_list,
- .vece = MO_32
- },
- {
- .fni8 = gen_vmulwod_d_w,
- .fniv = gen_vmulwod_s,
- .fno = gen_helper_vmulwod_d_w,
- .opt_opc = vecop_list,
- .vece = MO_64
- },
- };
-
- tcg_gen_gvec_3(vd_ofs, vj_ofs, vk_ofs, oprsz, maxsz, &op[vece]);
-}
-
-TRANS(vmulwod_h_b, LSX, gvec_vvv, MO_8, do_vmulwod_s)
-TRANS(vmulwod_w_h, LSX, gvec_vvv, MO_16, do_vmulwod_s)
-TRANS(vmulwod_d_w, LSX, gvec_vvv, MO_32, do_vmulwod_s)
-TRANS(xvmulwod_h_b, LASX, gvec_xxx, MO_8, do_vmulwod_s)
-TRANS(xvmulwod_w_h, LASX, gvec_xxx, MO_16, do_vmulwod_s)
-TRANS(xvmulwod_d_w, LASX, gvec_xxx, MO_32, do_vmulwod_s)
-
-static void gen_vmulwev_u(unsigned vece, TCGv_vec t, TCGv_vec a, TCGv_vec b)
-{
- TCGv_vec t1, t2, mask;
-
- t1 = tcg_temp_new_vec_matching(a);
- t2 = tcg_temp_new_vec_matching(b);
- mask = tcg_constant_vec_matching(t, vece, MAKE_64BIT_MASK(0, 4 << vece));
- tcg_gen_and_vec(vece, t1, a, mask);
- tcg_gen_and_vec(vece, t2, b, mask);
- tcg_gen_mul_vec(vece, t, t1, t2);
-}
-
-static void gen_vmulwev_w_hu(TCGv_i32 t, TCGv_i32 a, TCGv_i32 b)
-{
- TCGv_i32 t1, t2;
-
- t1 = tcg_temp_new_i32();
- t2 = tcg_temp_new_i32();
- tcg_gen_ext16u_i32(t1, a);
- tcg_gen_ext16u_i32(t2, b);
- tcg_gen_mul_i32(t, t1, t2);
-}
-
-static void gen_vmulwev_d_wu(TCGv_i64 t, TCGv_i64 a, TCGv_i64 b)
-{
- TCGv_i64 t1, t2;
-
- t1 = tcg_temp_new_i64();
- t2 = tcg_temp_new_i64();
- tcg_gen_ext32u_i64(t1, a);
- tcg_gen_ext32u_i64(t2, b);
- tcg_gen_mul_i64(t, t1, t2);
-}
-
-static void do_vmulwev_u(unsigned vece, uint32_t vd_ofs, uint32_t vj_ofs,
- uint32_t vk_ofs, uint32_t oprsz, uint32_t maxsz)
-{
- static const TCGOpcode vecop_list[] = {
- INDEX_op_mul_vec, 0
- };
- static const GVecGen3 op[3] = {
- {
- .fniv = gen_vmulwev_u,
- .fno = gen_helper_vmulwev_h_bu,
- .opt_opc = vecop_list,
- .vece = MO_16
- },
- {
- .fni4 = gen_vmulwev_w_hu,
- .fniv = gen_vmulwev_u,
- .fno = gen_helper_vmulwev_w_hu,
- .opt_opc = vecop_list,
- .vece = MO_32
- },
- {
- .fni8 = gen_vmulwev_d_wu,
- .fniv = gen_vmulwev_u,
- .fno = gen_helper_vmulwev_d_wu,
- .opt_opc = vecop_list,
- .vece = MO_64
- },
- };
-
- tcg_gen_gvec_3(vd_ofs, vj_ofs, vk_ofs, oprsz, maxsz, &op[vece]);
-}
-
-TRANS(vmulwev_h_bu, LSX, gvec_vvv, MO_8, do_vmulwev_u)
-TRANS(vmulwev_w_hu, LSX, gvec_vvv, MO_16, do_vmulwev_u)
-TRANS(vmulwev_d_wu, LSX, gvec_vvv, MO_32, do_vmulwev_u)
-TRANS(xvmulwev_h_bu, LASX, gvec_xxx, MO_8, do_vmulwev_u)
-TRANS(xvmulwev_w_hu, LASX, gvec_xxx, MO_16, do_vmulwev_u)
-TRANS(xvmulwev_d_wu, LASX, gvec_xxx, MO_32, do_vmulwev_u)
-
-static void gen_vmulwod_u(unsigned vece, TCGv_vec t, TCGv_vec a, TCGv_vec b)
-{
- TCGv_vec t1, t2;
- int halfbits = 4 << vece;
-
- t1 = tcg_temp_new_vec_matching(a);
- t2 = tcg_temp_new_vec_matching(b);
- tcg_gen_shri_vec(vece, t1, a, halfbits);
- tcg_gen_shri_vec(vece, t2, b, halfbits);
- tcg_gen_mul_vec(vece, t, t1, t2);
-}
-
-static void gen_vmulwod_w_hu(TCGv_i32 t, TCGv_i32 a, TCGv_i32 b)
-{
- TCGv_i32 t1, t2;
-
- t1 = tcg_temp_new_i32();
- t2 = tcg_temp_new_i32();
- tcg_gen_shri_i32(t1, a, 16);
- tcg_gen_shri_i32(t2, b, 16);
- tcg_gen_mul_i32(t, t1, t2);
-}
-
-static void gen_vmulwod_d_wu(TCGv_i64 t, TCGv_i64 a, TCGv_i64 b)
-{
- TCGv_i64 t1, t2;
-
- t1 = tcg_temp_new_i64();
- t2 = tcg_temp_new_i64();
- tcg_gen_shri_i64(t1, a, 32);
- tcg_gen_shri_i64(t2, b, 32);
- tcg_gen_mul_i64(t, t1, t2);
-}
-
-static void do_vmulwod_u(unsigned vece, uint32_t vd_ofs, uint32_t vj_ofs,
- uint32_t vk_ofs, uint32_t oprsz, uint32_t maxsz)
-{
- static const TCGOpcode vecop_list[] = {
- INDEX_op_shri_vec, INDEX_op_mul_vec, 0
- };
- static const GVecGen3 op[3] = {
- {
- .fniv = gen_vmulwod_u,
- .fno = gen_helper_vmulwod_h_bu,
- .opt_opc = vecop_list,
- .vece = MO_16
- },
- {
- .fni4 = gen_vmulwod_w_hu,
- .fniv = gen_vmulwod_u,
- .fno = gen_helper_vmulwod_w_hu,
- .opt_opc = vecop_list,
- .vece = MO_32
- },
- {
- .fni8 = gen_vmulwod_d_wu,
- .fniv = gen_vmulwod_u,
- .fno = gen_helper_vmulwod_d_wu,
- .opt_opc = vecop_list,
- .vece = MO_64
- },
- };
-
- tcg_gen_gvec_3(vd_ofs, vj_ofs, vk_ofs, oprsz, maxsz, &op[vece]);
-}
-
-TRANS(vmulwod_h_bu, LSX, gvec_vvv, MO_8, do_vmulwod_u)
-TRANS(vmulwod_w_hu, LSX, gvec_vvv, MO_16, do_vmulwod_u)
-TRANS(vmulwod_d_wu, LSX, gvec_vvv, MO_32, do_vmulwod_u)
-TRANS(xvmulwod_h_bu, LASX, gvec_xxx, MO_8, do_vmulwod_u)
-TRANS(xvmulwod_w_hu, LASX, gvec_xxx, MO_16, do_vmulwod_u)
-TRANS(xvmulwod_d_wu, LASX, gvec_xxx, MO_32, do_vmulwod_u)
-
-static void gen_vmulwev_u_s(unsigned vece, TCGv_vec t, TCGv_vec a, TCGv_vec b)
-{
- TCGv_vec t1, t2, mask;
- int halfbits = 4 << vece;
-
- t1 = tcg_temp_new_vec_matching(a);
- t2 = tcg_temp_new_vec_matching(b);
- mask = tcg_constant_vec_matching(t, vece, MAKE_64BIT_MASK(0, 4 << vece));
- tcg_gen_and_vec(vece, t1, a, mask);
- tcg_gen_shli_vec(vece, t2, b, halfbits);
- tcg_gen_sari_vec(vece, t2, t2, halfbits);
- tcg_gen_mul_vec(vece, t, t1, t2);
-}
-
-static void gen_vmulwev_w_hu_h(TCGv_i32 t, TCGv_i32 a, TCGv_i32 b)
-{
- TCGv_i32 t1, t2;
-
- t1 = tcg_temp_new_i32();
- t2 = tcg_temp_new_i32();
- tcg_gen_ext16u_i32(t1, a);
- tcg_gen_ext16s_i32(t2, b);
- tcg_gen_mul_i32(t, t1, t2);
-}
-
-static void gen_vmulwev_d_wu_w(TCGv_i64 t, TCGv_i64 a, TCGv_i64 b)
-{
- TCGv_i64 t1, t2;
-
- t1 = tcg_temp_new_i64();
- t2 = tcg_temp_new_i64();
- tcg_gen_ext32u_i64(t1, a);
- tcg_gen_ext32s_i64(t2, b);
- tcg_gen_mul_i64(t, t1, t2);
-}
-
-static void do_vmulwev_u_s(unsigned vece, uint32_t vd_ofs, uint32_t vj_ofs,
- uint32_t vk_ofs, uint32_t oprsz, uint32_t maxsz)
-{
- static const TCGOpcode vecop_list[] = {
- INDEX_op_shli_vec, INDEX_op_sari_vec, INDEX_op_mul_vec, 0
- };
- static const GVecGen3 op[3] = {
- {
- .fniv = gen_vmulwev_u_s,
- .fno = gen_helper_vmulwev_h_bu_b,
- .opt_opc = vecop_list,
- .vece = MO_16
- },
- {
- .fni4 = gen_vmulwev_w_hu_h,
- .fniv = gen_vmulwev_u_s,
- .fno = gen_helper_vmulwev_w_hu_h,
- .opt_opc = vecop_list,
- .vece = MO_32
- },
- {
- .fni8 = gen_vmulwev_d_wu_w,
- .fniv = gen_vmulwev_u_s,
- .fno = gen_helper_vmulwev_d_wu_w,
- .opt_opc = vecop_list,
- .vece = MO_64
- },
- };
-
- tcg_gen_gvec_3(vd_ofs, vj_ofs, vk_ofs, oprsz, maxsz, &op[vece]);
-}
-
-TRANS(vmulwev_h_bu_b, LSX, gvec_vvv, MO_8, do_vmulwev_u_s)
-TRANS(vmulwev_w_hu_h, LSX, gvec_vvv, MO_16, do_vmulwev_u_s)
-TRANS(vmulwev_d_wu_w, LSX, gvec_vvv, MO_32, do_vmulwev_u_s)
-TRANS(xvmulwev_h_bu_b, LASX, gvec_xxx, MO_8, do_vmulwev_u_s)
-TRANS(xvmulwev_w_hu_h, LASX, gvec_xxx, MO_16, do_vmulwev_u_s)
-TRANS(xvmulwev_d_wu_w, LASX, gvec_xxx, MO_32, do_vmulwev_u_s)
-
-static void gen_vmulwod_u_s(unsigned vece, TCGv_vec t, TCGv_vec a, TCGv_vec b)
-{
- TCGv_vec t1, t2;
- int halfbits = 4 << vece;
-
- t1 = tcg_temp_new_vec_matching(a);
- t2 = tcg_temp_new_vec_matching(b);
- tcg_gen_shri_vec(vece, t1, a, halfbits);
- tcg_gen_sari_vec(vece, t2, b, halfbits);
- tcg_gen_mul_vec(vece, t, t1, t2);
-}
-
-static void gen_vmulwod_w_hu_h(TCGv_i32 t, TCGv_i32 a, TCGv_i32 b)
-{
- TCGv_i32 t1, t2;
-
- t1 = tcg_temp_new_i32();
- t2 = tcg_temp_new_i32();
- tcg_gen_shri_i32(t1, a, 16);
- tcg_gen_sari_i32(t2, b, 16);
- tcg_gen_mul_i32(t, t1, t2);
-}
-static void gen_vmulwod_d_wu_w(TCGv_i64 t, TCGv_i64 a, TCGv_i64 b)
-{
- TCGv_i64 t1, t2;
-
- t1 = tcg_temp_new_i64();
- t2 = tcg_temp_new_i64();
- tcg_gen_shri_i64(t1, a, 32);
- tcg_gen_sari_i64(t2, b, 32);
- tcg_gen_mul_i64(t, t1, t2);
-}
-
-static void do_vmulwod_u_s(unsigned vece, uint32_t vd_ofs, uint32_t vj_ofs,
- uint32_t vk_ofs, uint32_t oprsz, uint32_t maxsz)
-{
- static const TCGOpcode vecop_list[] = {
- INDEX_op_shri_vec, INDEX_op_sari_vec, INDEX_op_mul_vec, 0
- };
- static const GVecGen3 op[3] = {
- {
- .fniv = gen_vmulwod_u_s,
- .fno = gen_helper_vmulwod_h_bu_b,
- .opt_opc = vecop_list,
- .vece = MO_16
- },
- {
- .fni4 = gen_vmulwod_w_hu_h,
- .fniv = gen_vmulwod_u_s,
- .fno = gen_helper_vmulwod_w_hu_h,
- .opt_opc = vecop_list,
- .vece = MO_32
- },
- {
- .fni8 = gen_vmulwod_d_wu_w,
- .fniv = gen_vmulwod_u_s,
- .fno = gen_helper_vmulwod_d_wu_w,
- .opt_opc = vecop_list,
- .vece = MO_64
- },
- };
-
- tcg_gen_gvec_3(vd_ofs, vj_ofs, vk_ofs, oprsz, maxsz, &op[vece]);
-}
-
-TRANS(vmulwod_h_bu_b, LSX, gvec_vvv, MO_8, do_vmulwod_u_s)
-TRANS(vmulwod_w_hu_h, LSX, gvec_vvv, MO_16, do_vmulwod_u_s)
-TRANS(vmulwod_d_wu_w, LSX, gvec_vvv, MO_32, do_vmulwod_u_s)
-TRANS(xvmulwod_h_bu_b, LASX, gvec_xxx, MO_8, do_vmulwod_u_s)
-TRANS(xvmulwod_w_hu_h, LASX, gvec_xxx, MO_16, do_vmulwod_u_s)
-TRANS(xvmulwod_d_wu_w, LASX, gvec_xxx, MO_32, do_vmulwod_u_s)
-
-static void gen_vmadd(unsigned vece, TCGv_vec t, TCGv_vec a, TCGv_vec b)
-{
- TCGv_vec t1;
-
- t1 = tcg_temp_new_vec_matching(t);
- tcg_gen_mul_vec(vece, t1, a, b);
- tcg_gen_add_vec(vece, t, t, t1);
-}
-
-static void gen_vmadd_w(TCGv_i32 t, TCGv_i32 a, TCGv_i32 b)
-{
- TCGv_i32 t1;
-
- t1 = tcg_temp_new_i32();
- tcg_gen_mul_i32(t1, a, b);
- tcg_gen_add_i32(t, t, t1);
-}
-
-static void gen_vmadd_d(TCGv_i64 t, TCGv_i64 a, TCGv_i64 b)
-{
- TCGv_i64 t1;
-
- t1 = tcg_temp_new_i64();
- tcg_gen_mul_i64(t1, a, b);
- tcg_gen_add_i64(t, t, t1);
-}
-
-static void do_vmadd(unsigned vece, uint32_t vd_ofs, uint32_t vj_ofs,
- uint32_t vk_ofs, uint32_t oprsz, uint32_t maxsz)
-{
- static const TCGOpcode vecop_list[] = {
- INDEX_op_mul_vec, INDEX_op_add_vec, 0
- };
- static const GVecGen3 op[4] = {
- {
- .fniv = gen_vmadd,
- .fno = gen_helper_vmadd_b,
- .load_dest = true,
- .opt_opc = vecop_list,
- .vece = MO_8
- },
- {
- .fniv = gen_vmadd,
- .fno = gen_helper_vmadd_h,
- .load_dest = true,
- .opt_opc = vecop_list,
- .vece = MO_16
- },
- {
- .fni4 = gen_vmadd_w,
- .fniv = gen_vmadd,
- .fno = gen_helper_vmadd_w,
- .load_dest = true,
- .opt_opc = vecop_list,
- .vece = MO_32
- },
- {
- .fni8 = gen_vmadd_d,
- .fniv = gen_vmadd,
- .fno = gen_helper_vmadd_d,
- .load_dest = true,
- .opt_opc = vecop_list,
- .vece = MO_64
- },
- };
-
- tcg_gen_gvec_3(vd_ofs, vj_ofs, vk_ofs, oprsz, maxsz, &op[vece]);
-}
-
-TRANS(vmadd_b, LSX, gvec_vvv, MO_8, do_vmadd)
-TRANS(vmadd_h, LSX, gvec_vvv, MO_16, do_vmadd)
-TRANS(vmadd_w, LSX, gvec_vvv, MO_32, do_vmadd)
-TRANS(vmadd_d, LSX, gvec_vvv, MO_64, do_vmadd)
-TRANS(xvmadd_b, LASX, gvec_xxx, MO_8, do_vmadd)
-TRANS(xvmadd_h, LASX, gvec_xxx, MO_16, do_vmadd)
-TRANS(xvmadd_w, LASX, gvec_xxx, MO_32, do_vmadd)
-TRANS(xvmadd_d, LASX, gvec_xxx, MO_64, do_vmadd)
-
-static void gen_vmsub(unsigned vece, TCGv_vec t, TCGv_vec a, TCGv_vec b)
-{
- TCGv_vec t1;
-
- t1 = tcg_temp_new_vec_matching(t);
- tcg_gen_mul_vec(vece, t1, a, b);
- tcg_gen_sub_vec(vece, t, t, t1);
-}
-
-static void gen_vmsub_w(TCGv_i32 t, TCGv_i32 a, TCGv_i32 b)
-{
- TCGv_i32 t1;
-
- t1 = tcg_temp_new_i32();
- tcg_gen_mul_i32(t1, a, b);
- tcg_gen_sub_i32(t, t, t1);
-}
-
-static void gen_vmsub_d(TCGv_i64 t, TCGv_i64 a, TCGv_i64 b)
-{
- TCGv_i64 t1;
-
- t1 = tcg_temp_new_i64();
- tcg_gen_mul_i64(t1, a, b);
- tcg_gen_sub_i64(t, t, t1);
-}
-
-static void do_vmsub(unsigned vece, uint32_t vd_ofs, uint32_t vj_ofs,
- uint32_t vk_ofs, uint32_t oprsz, uint32_t maxsz)
-{
- static const TCGOpcode vecop_list[] = {
- INDEX_op_mul_vec, INDEX_op_sub_vec, 0
- };
- static const GVecGen3 op[4] = {
- {
- .fniv = gen_vmsub,
- .fno = gen_helper_vmsub_b,
- .load_dest = true,
- .opt_opc = vecop_list,
- .vece = MO_8
- },
- {
- .fniv = gen_vmsub,
- .fno = gen_helper_vmsub_h,
- .load_dest = true,
- .opt_opc = vecop_list,
- .vece = MO_16
- },
- {
- .fni4 = gen_vmsub_w,
- .fniv = gen_vmsub,
- .fno = gen_helper_vmsub_w,
- .load_dest = true,
- .opt_opc = vecop_list,
- .vece = MO_32
- },
- {
- .fni8 = gen_vmsub_d,
- .fniv = gen_vmsub,
- .fno = gen_helper_vmsub_d,
- .load_dest = true,
- .opt_opc = vecop_list,
- .vece = MO_64
- },
- };
-
- tcg_gen_gvec_3(vd_ofs, vj_ofs, vk_ofs, oprsz, maxsz, &op[vece]);
-}
-
-TRANS(vmsub_b, LSX, gvec_vvv, MO_8, do_vmsub)
-TRANS(vmsub_h, LSX, gvec_vvv, MO_16, do_vmsub)
-TRANS(vmsub_w, LSX, gvec_vvv, MO_32, do_vmsub)
-TRANS(vmsub_d, LSX, gvec_vvv, MO_64, do_vmsub)
-TRANS(xvmsub_b, LASX, gvec_xxx, MO_8, do_vmsub)
-TRANS(xvmsub_h, LASX, gvec_xxx, MO_16, do_vmsub)
-TRANS(xvmsub_w, LASX, gvec_xxx, MO_32, do_vmsub)
-TRANS(xvmsub_d, LASX, gvec_xxx, MO_64, do_vmsub)
-
-static void gen_vmaddwev_s(unsigned vece, TCGv_vec t, TCGv_vec a, TCGv_vec b)
-{
- TCGv_vec t1, t2, t3;
- int halfbits = 4 << vece;
-
- t1 = tcg_temp_new_vec_matching(a);
- t2 = tcg_temp_new_vec_matching(b);
- t3 = tcg_temp_new_vec_matching(t);
- tcg_gen_shli_vec(vece, t1, a, halfbits);
- tcg_gen_sari_vec(vece, t1, t1, halfbits);
- tcg_gen_shli_vec(vece, t2, b, halfbits);
- tcg_gen_sari_vec(vece, t2, t2, halfbits);
- tcg_gen_mul_vec(vece, t3, t1, t2);
- tcg_gen_add_vec(vece, t, t, t3);
-}
-
-static void gen_vmaddwev_w_h(TCGv_i32 t, TCGv_i32 a, TCGv_i32 b)
-{
- TCGv_i32 t1;
-
- t1 = tcg_temp_new_i32();
- gen_vmulwev_w_h(t1, a, b);
- tcg_gen_add_i32(t, t, t1);
-}
-
-static void gen_vmaddwev_d_w(TCGv_i64 t, TCGv_i64 a, TCGv_i64 b)
-{
- TCGv_i64 t1;
-
- t1 = tcg_temp_new_i64();
- gen_vmulwev_d_w(t1, a, b);
- tcg_gen_add_i64(t, t, t1);
-}
-
-static void do_vmaddwev_s(unsigned vece, uint32_t vd_ofs, uint32_t vj_ofs,
- uint32_t vk_ofs, uint32_t oprsz, uint32_t maxsz)
-{
- static const TCGOpcode vecop_list[] = {
- INDEX_op_shli_vec, INDEX_op_sari_vec,
- INDEX_op_mul_vec, INDEX_op_add_vec, 0
- };
- static const GVecGen3 op[3] = {
- {
- .fniv = gen_vmaddwev_s,
- .fno = gen_helper_vmaddwev_h_b,
- .load_dest = true,
- .opt_opc = vecop_list,
- .vece = MO_16
- },
- {
- .fni4 = gen_vmaddwev_w_h,
- .fniv = gen_vmaddwev_s,
- .fno = gen_helper_vmaddwev_w_h,
- .load_dest = true,
- .opt_opc = vecop_list,
- .vece = MO_32
- },
- {
- .fni8 = gen_vmaddwev_d_w,
- .fniv = gen_vmaddwev_s,
- .fno = gen_helper_vmaddwev_d_w,
- .load_dest = true,
- .opt_opc = vecop_list,
- .vece = MO_64
- },
- };
-
- tcg_gen_gvec_3(vd_ofs, vj_ofs, vk_ofs, oprsz, maxsz, &op[vece]);
-}
-
-TRANS(vmaddwev_h_b, LSX, gvec_vvv, MO_8, do_vmaddwev_s)
-TRANS(vmaddwev_w_h, LSX, gvec_vvv, MO_16, do_vmaddwev_s)
-TRANS(vmaddwev_d_w, LSX, gvec_vvv, MO_32, do_vmaddwev_s)
-TRANS(xvmaddwev_h_b, LASX, gvec_xxx, MO_8, do_vmaddwev_s)
-TRANS(xvmaddwev_w_h, LASX, gvec_xxx, MO_16, do_vmaddwev_s)
-TRANS(xvmaddwev_d_w, LASX, gvec_xxx, MO_32, do_vmaddwev_s)
-
-static bool gen_vmadd_q_vl(DisasContext * ctx,
- arg_vvv *a, uint32_t oprsz, int idx1, int idx2,
- void (*func)(TCGv_i64, TCGv_i64,
- TCGv_i64, TCGv_i64))
-{
- TCGv_i64 rh, rl, arg1, arg2, th, tl;
- int i;
-
- if (!check_vec(ctx, oprsz)) {
- return true;
- }
-
- rh = tcg_temp_new_i64();
- rl = tcg_temp_new_i64();
- arg1 = tcg_temp_new_i64();
- arg2 = tcg_temp_new_i64();
- th = tcg_temp_new_i64();
- tl = tcg_temp_new_i64();
-
- for (i = 0; i < oprsz / 16; i++) {
- get_vreg64(arg1, a->vj, 2 * i + idx1);
- get_vreg64(arg2, a->vk, 2 * i + idx2);
- get_vreg64(rh, a->vd, 2 * i + 1);
- get_vreg64(rl, a->vd, 2 * i);
-
- func(tl, th, arg1, arg2);
- tcg_gen_add2_i64(rl, rh, rl, rh, tl, th);
-
- set_vreg64(rh, a->vd, 2 * i + 1);
- set_vreg64(rl, a->vd, 2 * i);
- }
-
- return true;
-}
-
-static bool gen_vmadd_q(DisasContext *ctx, arg_vvv *a, int idx1, int idx2,
- void (*func)(TCGv_i64, TCGv_i64, TCGv_i64, TCGv_i64))
-{
- return gen_vmadd_q_vl(ctx, a, 16, idx1, idx2, func);
-}
-
-static bool gen_xvmadd_q(DisasContext *ctx, arg_vvv *a, int idx1, int idx2,
- void (*func)(TCGv_i64, TCGv_i64, TCGv_i64, TCGv_i64))
-{
- return gen_vmadd_q_vl(ctx, a, 32, idx1, idx2, func);
-}
-
-TRANS(vmaddwev_q_d, LSX, gen_vmadd_q, 0, 0, tcg_gen_muls2_i64)
-TRANS(vmaddwod_q_d, LSX, gen_vmadd_q, 1, 1, tcg_gen_muls2_i64)
-TRANS(vmaddwev_q_du, LSX, gen_vmadd_q, 0, 0, tcg_gen_mulu2_i64)
-TRANS(vmaddwod_q_du, LSX, gen_vmadd_q, 1, 1, tcg_gen_mulu2_i64)
-TRANS(vmaddwev_q_du_d, LSX, gen_vmadd_q, 0, 0, tcg_gen_mulus2_i64)
-TRANS(vmaddwod_q_du_d, LSX, gen_vmadd_q, 1, 1, tcg_gen_mulus2_i64)
-TRANS(xvmaddwev_q_d, LASX, gen_xvmadd_q, 0, 0, tcg_gen_muls2_i64)
-TRANS(xvmaddwod_q_d, LASX, gen_xvmadd_q, 1, 1, tcg_gen_muls2_i64)
-TRANS(xvmaddwev_q_du, LASX, gen_xvmadd_q, 0, 0, tcg_gen_mulu2_i64)
-TRANS(xvmaddwod_q_du, LASX, gen_xvmadd_q, 1, 1, tcg_gen_mulu2_i64)
-TRANS(xvmaddwev_q_du_d, LASX, gen_xvmadd_q, 0, 0, tcg_gen_mulus2_i64)
-TRANS(xvmaddwod_q_du_d, LASX, gen_xvmadd_q, 1, 1, tcg_gen_mulus2_i64)
-
-static void gen_vmaddwod_s(unsigned vece, TCGv_vec t, TCGv_vec a, TCGv_vec b)
-{
- TCGv_vec t1, t2, t3;
- int halfbits = 4 << vece;
-
- t1 = tcg_temp_new_vec_matching(a);
- t2 = tcg_temp_new_vec_matching(b);
- t3 = tcg_temp_new_vec_matching(t);
- tcg_gen_sari_vec(vece, t1, a, halfbits);
- tcg_gen_sari_vec(vece, t2, b, halfbits);
- tcg_gen_mul_vec(vece, t3, t1, t2);
- tcg_gen_add_vec(vece, t, t, t3);
-}
-
-static void gen_vmaddwod_w_h(TCGv_i32 t, TCGv_i32 a, TCGv_i32 b)
-{
- TCGv_i32 t1;
-
- t1 = tcg_temp_new_i32();
- gen_vmulwod_w_h(t1, a, b);
- tcg_gen_add_i32(t, t, t1);
-}
-
-static void gen_vmaddwod_d_w(TCGv_i64 t, TCGv_i64 a, TCGv_i64 b)
-{
- TCGv_i64 t1;
-
- t1 = tcg_temp_new_i64();
- gen_vmulwod_d_w(t1, a, b);
- tcg_gen_add_i64(t, t, t1);
-}
-
-static void do_vmaddwod_s(unsigned vece, uint32_t vd_ofs, uint32_t vj_ofs,
- uint32_t vk_ofs, uint32_t oprsz, uint32_t maxsz)
-{
- static const TCGOpcode vecop_list[] = {
- INDEX_op_sari_vec, INDEX_op_mul_vec, INDEX_op_add_vec, 0
- };
- static const GVecGen3 op[3] = {
- {
- .fniv = gen_vmaddwod_s,
- .fno = gen_helper_vmaddwod_h_b,
- .load_dest = true,
- .opt_opc = vecop_list,
- .vece = MO_16
- },
- {
- .fni4 = gen_vmaddwod_w_h,
- .fniv = gen_vmaddwod_s,
- .fno = gen_helper_vmaddwod_w_h,
- .load_dest = true,
- .opt_opc = vecop_list,
- .vece = MO_32
- },
- {
- .fni8 = gen_vmaddwod_d_w,
- .fniv = gen_vmaddwod_s,
- .fno = gen_helper_vmaddwod_d_w,
- .load_dest = true,
- .opt_opc = vecop_list,
- .vece = MO_64
- },
- };
-
- tcg_gen_gvec_3(vd_ofs, vj_ofs, vk_ofs, oprsz, maxsz, &op[vece]);
-}
-
-TRANS(vmaddwod_h_b, LSX, gvec_vvv, MO_8, do_vmaddwod_s)
-TRANS(vmaddwod_w_h, LSX, gvec_vvv, MO_16, do_vmaddwod_s)
-TRANS(vmaddwod_d_w, LSX, gvec_vvv, MO_32, do_vmaddwod_s)
-TRANS(xvmaddwod_h_b, LASX, gvec_xxx, MO_8, do_vmaddwod_s)
-TRANS(xvmaddwod_w_h, LASX, gvec_xxx, MO_16, do_vmaddwod_s)
-TRANS(xvmaddwod_d_w, LASX, gvec_xxx, MO_32, do_vmaddwod_s)
-
-static void gen_vmaddwev_u(unsigned vece, TCGv_vec t, TCGv_vec a, TCGv_vec b)
-{
- TCGv_vec t1, t2, mask;
-
- t1 = tcg_temp_new_vec_matching(t);
- t2 = tcg_temp_new_vec_matching(b);
- mask = tcg_constant_vec_matching(t, vece, MAKE_64BIT_MASK(0, 4 << vece));
- tcg_gen_and_vec(vece, t1, a, mask);
- tcg_gen_and_vec(vece, t2, b, mask);
- tcg_gen_mul_vec(vece, t1, t1, t2);
- tcg_gen_add_vec(vece, t, t, t1);
-}
-
-static void gen_vmaddwev_w_hu(TCGv_i32 t, TCGv_i32 a, TCGv_i32 b)
-{
- TCGv_i32 t1;
-
- t1 = tcg_temp_new_i32();
- gen_vmulwev_w_hu(t1, a, b);
- tcg_gen_add_i32(t, t, t1);
-}
-
-static void gen_vmaddwev_d_wu(TCGv_i64 t, TCGv_i64 a, TCGv_i64 b)
-{
- TCGv_i64 t1;
-
- t1 = tcg_temp_new_i64();
- gen_vmulwev_d_wu(t1, a, b);
- tcg_gen_add_i64(t, t, t1);
-}
-
-static void do_vmaddwev_u(unsigned vece, uint32_t vd_ofs, uint32_t vj_ofs,
- uint32_t vk_ofs, uint32_t oprsz, uint32_t maxsz)
-{
- static const TCGOpcode vecop_list[] = {
- INDEX_op_mul_vec, INDEX_op_add_vec, 0
- };
- static const GVecGen3 op[3] = {
- {
- .fniv = gen_vmaddwev_u,
- .fno = gen_helper_vmaddwev_h_bu,
- .load_dest = true,
- .opt_opc = vecop_list,
- .vece = MO_16
- },
- {
- .fni4 = gen_vmaddwev_w_hu,
- .fniv = gen_vmaddwev_u,
- .fno = gen_helper_vmaddwev_w_hu,
- .load_dest = true,
- .opt_opc = vecop_list,
- .vece = MO_32
- },
- {
- .fni8 = gen_vmaddwev_d_wu,
- .fniv = gen_vmaddwev_u,
- .fno = gen_helper_vmaddwev_d_wu,
- .load_dest = true,
- .opt_opc = vecop_list,
- .vece = MO_64
- },
- };
-
- tcg_gen_gvec_3(vd_ofs, vj_ofs, vk_ofs, oprsz, maxsz, &op[vece]);
-}
-
-TRANS(vmaddwev_h_bu, LSX, gvec_vvv, MO_8, do_vmaddwev_u)
-TRANS(vmaddwev_w_hu, LSX, gvec_vvv, MO_16, do_vmaddwev_u)
-TRANS(vmaddwev_d_wu, LSX, gvec_vvv, MO_32, do_vmaddwev_u)
-TRANS(xvmaddwev_h_bu, LASX, gvec_xxx, MO_8, do_vmaddwev_u)
-TRANS(xvmaddwev_w_hu, LASX, gvec_xxx, MO_16, do_vmaddwev_u)
-TRANS(xvmaddwev_d_wu, LASX, gvec_xxx, MO_32, do_vmaddwev_u)
-
-static void gen_vmaddwod_u(unsigned vece, TCGv_vec t, TCGv_vec a, TCGv_vec b)
-{
- TCGv_vec t1, t2, t3;
- int halfbits = 4 << vece;
-
- t1 = tcg_temp_new_vec_matching(a);
- t2 = tcg_temp_new_vec_matching(b);
- t3 = tcg_temp_new_vec_matching(t);
- tcg_gen_shri_vec(vece, t1, a, halfbits);
- tcg_gen_shri_vec(vece, t2, b, halfbits);
- tcg_gen_mul_vec(vece, t3, t1, t2);
- tcg_gen_add_vec(vece, t, t, t3);
-}
-
-static void gen_vmaddwod_w_hu(TCGv_i32 t, TCGv_i32 a, TCGv_i32 b)
-{
- TCGv_i32 t1;
-
- t1 = tcg_temp_new_i32();
- gen_vmulwod_w_hu(t1, a, b);
- tcg_gen_add_i32(t, t, t1);
-}
-
-static void gen_vmaddwod_d_wu(TCGv_i64 t, TCGv_i64 a, TCGv_i64 b)
-{
- TCGv_i64 t1;
-
- t1 = tcg_temp_new_i64();
- gen_vmulwod_d_wu(t1, a, b);
- tcg_gen_add_i64(t, t, t1);
-}
-
-static void do_vmaddwod_u(unsigned vece, uint32_t vd_ofs, uint32_t vj_ofs,
- uint32_t vk_ofs, uint32_t oprsz, uint32_t maxsz)
-{
- static const TCGOpcode vecop_list[] = {
- INDEX_op_shri_vec, INDEX_op_mul_vec, INDEX_op_add_vec, 0
- };
- static const GVecGen3 op[3] = {
- {
- .fniv = gen_vmaddwod_u,
- .fno = gen_helper_vmaddwod_h_bu,
- .load_dest = true,
- .opt_opc = vecop_list,
- .vece = MO_16
- },
- {
- .fni4 = gen_vmaddwod_w_hu,
- .fniv = gen_vmaddwod_u,
- .fno = gen_helper_vmaddwod_w_hu,
- .load_dest = true,
- .opt_opc = vecop_list,
- .vece = MO_32
- },
- {
- .fni8 = gen_vmaddwod_d_wu,
- .fniv = gen_vmaddwod_u,
- .fno = gen_helper_vmaddwod_d_wu,
- .load_dest = true,
- .opt_opc = vecop_list,
- .vece = MO_64
- },
- };
-
- tcg_gen_gvec_3(vd_ofs, vj_ofs, vk_ofs, oprsz, maxsz, &op[vece]);
-}
-
-TRANS(vmaddwod_h_bu, LSX, gvec_vvv, MO_8, do_vmaddwod_u)
-TRANS(vmaddwod_w_hu, LSX, gvec_vvv, MO_16, do_vmaddwod_u)
-TRANS(vmaddwod_d_wu, LSX, gvec_vvv, MO_32, do_vmaddwod_u)
-TRANS(xvmaddwod_h_bu, LASX, gvec_xxx, MO_8, do_vmaddwod_u)
-TRANS(xvmaddwod_w_hu, LASX, gvec_xxx, MO_16, do_vmaddwod_u)
-TRANS(xvmaddwod_d_wu, LASX, gvec_xxx, MO_32, do_vmaddwod_u)
-
-static void gen_vmaddwev_u_s(unsigned vece, TCGv_vec t, TCGv_vec a, TCGv_vec b)
-{
- TCGv_vec t1, t2, mask;
- int halfbits = 4 << vece;
-
- t1 = tcg_temp_new_vec_matching(a);
- t2 = tcg_temp_new_vec_matching(b);
- mask = tcg_constant_vec_matching(t, vece, MAKE_64BIT_MASK(0, 4 << vece));
- tcg_gen_and_vec(vece, t1, a, mask);
- tcg_gen_shli_vec(vece, t2, b, halfbits);
- tcg_gen_sari_vec(vece, t2, t2, halfbits);
- tcg_gen_mul_vec(vece, t1, t1, t2);
- tcg_gen_add_vec(vece, t, t, t1);
-}
-
-static void gen_vmaddwev_w_hu_h(TCGv_i32 t, TCGv_i32 a, TCGv_i32 b)
-{
- TCGv_i32 t1;
-
- t1 = tcg_temp_new_i32();
- gen_vmulwev_w_hu_h(t1, a, b);
- tcg_gen_add_i32(t, t, t1);
-}
-
-static void gen_vmaddwev_d_wu_w(TCGv_i64 t, TCGv_i64 a, TCGv_i64 b)
-{
- TCGv_i64 t1;
-
- t1 = tcg_temp_new_i64();
- gen_vmulwev_d_wu_w(t1, a, b);
- tcg_gen_add_i64(t, t, t1);
-}
-
-static void do_vmaddwev_u_s(unsigned vece, uint32_t vd_ofs, uint32_t vj_ofs,
- uint32_t vk_ofs, uint32_t oprsz, uint32_t maxsz)
-{
- static const TCGOpcode vecop_list[] = {
- INDEX_op_shli_vec, INDEX_op_sari_vec,
- INDEX_op_mul_vec, INDEX_op_add_vec, 0
- };
- static const GVecGen3 op[3] = {
- {
- .fniv = gen_vmaddwev_u_s,
- .fno = gen_helper_vmaddwev_h_bu_b,
- .load_dest = true,
- .opt_opc = vecop_list,
- .vece = MO_16
- },
- {
- .fni4 = gen_vmaddwev_w_hu_h,
- .fniv = gen_vmaddwev_u_s,
- .fno = gen_helper_vmaddwev_w_hu_h,
- .load_dest = true,
- .opt_opc = vecop_list,
- .vece = MO_32
- },
- {
- .fni8 = gen_vmaddwev_d_wu_w,
- .fniv = gen_vmaddwev_u_s,
- .fno = gen_helper_vmaddwev_d_wu_w,
- .load_dest = true,
- .opt_opc = vecop_list,
- .vece = MO_64
- },
- };
-
- tcg_gen_gvec_3(vd_ofs, vj_ofs, vk_ofs, oprsz, maxsz, &op[vece]);
-}
-
-TRANS(vmaddwev_h_bu_b, LSX, gvec_vvv, MO_8, do_vmaddwev_u_s)
-TRANS(vmaddwev_w_hu_h, LSX, gvec_vvv, MO_16, do_vmaddwev_u_s)
-TRANS(vmaddwev_d_wu_w, LSX, gvec_vvv, MO_32, do_vmaddwev_u_s)
-TRANS(xvmaddwev_h_bu_b, LASX, gvec_xxx, MO_8, do_vmaddwev_u_s)
-TRANS(xvmaddwev_w_hu_h, LASX, gvec_xxx, MO_16, do_vmaddwev_u_s)
-TRANS(xvmaddwev_d_wu_w, LASX, gvec_xxx, MO_32, do_vmaddwev_u_s)
-
-static void gen_vmaddwod_u_s(unsigned vece, TCGv_vec t, TCGv_vec a, TCGv_vec b)
-{
- TCGv_vec t1, t2, t3;
- int halfbits = 4 << vece;
-
- t1 = tcg_temp_new_vec_matching(a);
- t2 = tcg_temp_new_vec_matching(b);
- t3 = tcg_temp_new_vec_matching(t);
- tcg_gen_shri_vec(vece, t1, a, halfbits);
- tcg_gen_sari_vec(vece, t2, b, halfbits);
- tcg_gen_mul_vec(vece, t3, t1, t2);
- tcg_gen_add_vec(vece, t, t, t3);
-}
-
-static void gen_vmaddwod_w_hu_h(TCGv_i32 t, TCGv_i32 a, TCGv_i32 b)
-{
- TCGv_i32 t1;
-
- t1 = tcg_temp_new_i32();
- gen_vmulwod_w_hu_h(t1, a, b);
- tcg_gen_add_i32(t, t, t1);
-}
-
-static void gen_vmaddwod_d_wu_w(TCGv_i64 t, TCGv_i64 a, TCGv_i64 b)
-{
- TCGv_i64 t1;
-
- t1 = tcg_temp_new_i64();
- gen_vmulwod_d_wu_w(t1, a, b);
- tcg_gen_add_i64(t, t, t1);
-}
-
-static void do_vmaddwod_u_s(unsigned vece, uint32_t vd_ofs, uint32_t vj_ofs,
- uint32_t vk_ofs, uint32_t oprsz, uint32_t maxsz)
-{
- static const TCGOpcode vecop_list[] = {
- INDEX_op_shri_vec, INDEX_op_sari_vec,
- INDEX_op_mul_vec, INDEX_op_add_vec, 0
- };
- static const GVecGen3 op[3] = {
- {
- .fniv = gen_vmaddwod_u_s,
- .fno = gen_helper_vmaddwod_h_bu_b,
- .load_dest = true,
- .opt_opc = vecop_list,
- .vece = MO_16
- },
- {
- .fni4 = gen_vmaddwod_w_hu_h,
- .fniv = gen_vmaddwod_u_s,
- .fno = gen_helper_vmaddwod_w_hu_h,
- .load_dest = true,
- .opt_opc = vecop_list,
- .vece = MO_32
- },
- {
- .fni8 = gen_vmaddwod_d_wu_w,
- .fniv = gen_vmaddwod_u_s,
- .fno = gen_helper_vmaddwod_d_wu_w,
- .load_dest = true,
- .opt_opc = vecop_list,
- .vece = MO_64
- },
- };
-
- tcg_gen_gvec_3(vd_ofs, vj_ofs, vk_ofs, oprsz, maxsz, &op[vece]);
-}
-
-TRANS(vmaddwod_h_bu_b, LSX, gvec_vvv, MO_8, do_vmaddwod_u_s)
-TRANS(vmaddwod_w_hu_h, LSX, gvec_vvv, MO_16, do_vmaddwod_u_s)
-TRANS(vmaddwod_d_wu_w, LSX, gvec_vvv, MO_32, do_vmaddwod_u_s)
-TRANS(xvmaddwod_h_bu_b, LASX, gvec_xxx, MO_8, do_vmaddwod_u_s)
-TRANS(xvmaddwod_w_hu_h, LASX, gvec_xxx, MO_16, do_vmaddwod_u_s)
-TRANS(xvmaddwod_d_wu_w, LASX, gvec_xxx, MO_32, do_vmaddwod_u_s)
-
-TRANS(vdiv_b, LSX, gen_vvv, gen_helper_vdiv_b)
-TRANS(vdiv_h, LSX, gen_vvv, gen_helper_vdiv_h)
-TRANS(vdiv_w, LSX, gen_vvv, gen_helper_vdiv_w)
-TRANS(vdiv_d, LSX, gen_vvv, gen_helper_vdiv_d)
-TRANS(vdiv_bu, LSX, gen_vvv, gen_helper_vdiv_bu)
-TRANS(vdiv_hu, LSX, gen_vvv, gen_helper_vdiv_hu)
-TRANS(vdiv_wu, LSX, gen_vvv, gen_helper_vdiv_wu)
-TRANS(vdiv_du, LSX, gen_vvv, gen_helper_vdiv_du)
-TRANS(vmod_b, LSX, gen_vvv, gen_helper_vmod_b)
-TRANS(vmod_h, LSX, gen_vvv, gen_helper_vmod_h)
-TRANS(vmod_w, LSX, gen_vvv, gen_helper_vmod_w)
-TRANS(vmod_d, LSX, gen_vvv, gen_helper_vmod_d)
-TRANS(vmod_bu, LSX, gen_vvv, gen_helper_vmod_bu)
-TRANS(vmod_hu, LSX, gen_vvv, gen_helper_vmod_hu)
-TRANS(vmod_wu, LSX, gen_vvv, gen_helper_vmod_wu)
-TRANS(vmod_du, LSX, gen_vvv, gen_helper_vmod_du)
-TRANS(xvdiv_b, LASX, gen_xxx, gen_helper_vdiv_b)
-TRANS(xvdiv_h, LASX, gen_xxx, gen_helper_vdiv_h)
-TRANS(xvdiv_w, LASX, gen_xxx, gen_helper_vdiv_w)
-TRANS(xvdiv_d, LASX, gen_xxx, gen_helper_vdiv_d)
-TRANS(xvdiv_bu, LASX, gen_xxx, gen_helper_vdiv_bu)
-TRANS(xvdiv_hu, LASX, gen_xxx, gen_helper_vdiv_hu)
-TRANS(xvdiv_wu, LASX, gen_xxx, gen_helper_vdiv_wu)
-TRANS(xvdiv_du, LASX, gen_xxx, gen_helper_vdiv_du)
-TRANS(xvmod_b, LASX, gen_xxx, gen_helper_vmod_b)
-TRANS(xvmod_h, LASX, gen_xxx, gen_helper_vmod_h)
-TRANS(xvmod_w, LASX, gen_xxx, gen_helper_vmod_w)
-TRANS(xvmod_d, LASX, gen_xxx, gen_helper_vmod_d)
-TRANS(xvmod_bu, LASX, gen_xxx, gen_helper_vmod_bu)
-TRANS(xvmod_hu, LASX, gen_xxx, gen_helper_vmod_hu)
-TRANS(xvmod_wu, LASX, gen_xxx, gen_helper_vmod_wu)
-TRANS(xvmod_du, LASX, gen_xxx, gen_helper_vmod_du)
-
-static void gen_vsat_s(unsigned vece, TCGv_vec t, TCGv_vec a, TCGv_vec max)
-{
- TCGv_vec min;
-
- min = tcg_temp_new_vec_matching(t);
- tcg_gen_not_vec(vece, min, max);
- tcg_gen_smax_vec(vece, t, a, min);
- tcg_gen_smin_vec(vece, t, t, max);
-}
-
-static void do_vsat_s(unsigned vece, uint32_t vd_ofs, uint32_t vj_ofs,
- int64_t imm, uint32_t oprsz, uint32_t maxsz)
-{
- static const TCGOpcode vecop_list[] = {
- INDEX_op_smax_vec, INDEX_op_smin_vec, 0
- };
- static const GVecGen2s op[4] = {
- {
- .fniv = gen_vsat_s,
- .fno = gen_helper_vsat_b,
- .opt_opc = vecop_list,
- .vece = MO_8
- },
- {
- .fniv = gen_vsat_s,
- .fno = gen_helper_vsat_h,
- .opt_opc = vecop_list,
- .vece = MO_16
- },
- {
- .fniv = gen_vsat_s,
- .fno = gen_helper_vsat_w,
- .opt_opc = vecop_list,
- .vece = MO_32
- },
- {
- .fniv = gen_vsat_s,
- .fno = gen_helper_vsat_d,
- .opt_opc = vecop_list,
- .vece = MO_64
- },
- };
-
- tcg_gen_gvec_2s(vd_ofs, vj_ofs, oprsz, maxsz,
- tcg_constant_i64((1ll<< imm) -1), &op[vece]);
-}
-
-TRANS(vsat_b, LSX, gvec_vv_i, MO_8, do_vsat_s)
-TRANS(vsat_h, LSX, gvec_vv_i, MO_16, do_vsat_s)
-TRANS(vsat_w, LSX, gvec_vv_i, MO_32, do_vsat_s)
-TRANS(vsat_d, LSX, gvec_vv_i, MO_64, do_vsat_s)
-TRANS(xvsat_b, LASX, gvec_xx_i, MO_8, do_vsat_s)
-TRANS(xvsat_h, LASX, gvec_xx_i, MO_16, do_vsat_s)
-TRANS(xvsat_w, LASX, gvec_xx_i, MO_32, do_vsat_s)
-TRANS(xvsat_d, LASX, gvec_xx_i, MO_64, do_vsat_s)
-
-static void gen_vsat_u(unsigned vece, TCGv_vec t, TCGv_vec a, TCGv_vec max)
-{
- tcg_gen_umin_vec(vece, t, a, max);
-}
-
-static void do_vsat_u(unsigned vece, uint32_t vd_ofs, uint32_t vj_ofs,
- int64_t imm, uint32_t oprsz, uint32_t maxsz)
-{
- uint64_t max;
- static const TCGOpcode vecop_list[] = {
- INDEX_op_umin_vec, 0
- };
- static const GVecGen2s op[4] = {
- {
- .fniv = gen_vsat_u,
- .fno = gen_helper_vsat_bu,
- .opt_opc = vecop_list,
- .vece = MO_8
- },
- {
- .fniv = gen_vsat_u,
- .fno = gen_helper_vsat_hu,
- .opt_opc = vecop_list,
- .vece = MO_16
- },
- {
- .fniv = gen_vsat_u,
- .fno = gen_helper_vsat_wu,
- .opt_opc = vecop_list,
- .vece = MO_32
- },
- {
- .fniv = gen_vsat_u,
- .fno = gen_helper_vsat_du,
- .opt_opc = vecop_list,
- .vece = MO_64
- },
- };
-
- max = (imm == 0x3f) ? UINT64_MAX : (1ull << (imm + 1)) - 1;
- tcg_gen_gvec_2s(vd_ofs, vj_ofs, oprsz, maxsz,
- tcg_constant_i64(max), &op[vece]);
-}
-
-TRANS(vsat_bu, LSX, gvec_vv_i, MO_8, do_vsat_u)
-TRANS(vsat_hu, LSX, gvec_vv_i, MO_16, do_vsat_u)
-TRANS(vsat_wu, LSX, gvec_vv_i, MO_32, do_vsat_u)
-TRANS(vsat_du, LSX, gvec_vv_i, MO_64, do_vsat_u)
-TRANS(xvsat_bu, LASX, gvec_xx_i, MO_8, do_vsat_u)
-TRANS(xvsat_hu, LASX, gvec_xx_i, MO_16, do_vsat_u)
-TRANS(xvsat_wu, LASX, gvec_xx_i, MO_32, do_vsat_u)
-TRANS(xvsat_du, LASX, gvec_xx_i, MO_64, do_vsat_u)
-
-TRANS(vexth_h_b, LSX, gen_vv, gen_helper_vexth_h_b)
-TRANS(vexth_w_h, LSX, gen_vv, gen_helper_vexth_w_h)
-TRANS(vexth_d_w, LSX, gen_vv, gen_helper_vexth_d_w)
-TRANS(vexth_q_d, LSX, gen_vv, gen_helper_vexth_q_d)
-TRANS(vexth_hu_bu, LSX, gen_vv, gen_helper_vexth_hu_bu)
-TRANS(vexth_wu_hu, LSX, gen_vv, gen_helper_vexth_wu_hu)
-TRANS(vexth_du_wu, LSX, gen_vv, gen_helper_vexth_du_wu)
-TRANS(vexth_qu_du, LSX, gen_vv, gen_helper_vexth_qu_du)
-TRANS(xvexth_h_b, LASX, gen_xx, gen_helper_vexth_h_b)
-TRANS(xvexth_w_h, LASX, gen_xx, gen_helper_vexth_w_h)
-TRANS(xvexth_d_w, LASX, gen_xx, gen_helper_vexth_d_w)
-TRANS(xvexth_q_d, LASX, gen_xx, gen_helper_vexth_q_d)
-TRANS(xvexth_hu_bu, LASX, gen_xx, gen_helper_vexth_hu_bu)
-TRANS(xvexth_wu_hu, LASX, gen_xx, gen_helper_vexth_wu_hu)
-TRANS(xvexth_du_wu, LASX, gen_xx, gen_helper_vexth_du_wu)
-TRANS(xvexth_qu_du, LASX, gen_xx, gen_helper_vexth_qu_du)
-
-TRANS(vext2xv_h_b, LASX, gen_xx, gen_helper_vext2xv_h_b)
-TRANS(vext2xv_w_b, LASX, gen_xx, gen_helper_vext2xv_w_b)
-TRANS(vext2xv_d_b, LASX, gen_xx, gen_helper_vext2xv_d_b)
-TRANS(vext2xv_w_h, LASX, gen_xx, gen_helper_vext2xv_w_h)
-TRANS(vext2xv_d_h, LASX, gen_xx, gen_helper_vext2xv_d_h)
-TRANS(vext2xv_d_w, LASX, gen_xx, gen_helper_vext2xv_d_w)
-TRANS(vext2xv_hu_bu, LASX, gen_xx, gen_helper_vext2xv_hu_bu)
-TRANS(vext2xv_wu_bu, LASX, gen_xx, gen_helper_vext2xv_wu_bu)
-TRANS(vext2xv_du_bu, LASX, gen_xx, gen_helper_vext2xv_du_bu)
-TRANS(vext2xv_wu_hu, LASX, gen_xx, gen_helper_vext2xv_wu_hu)
-TRANS(vext2xv_du_hu, LASX, gen_xx, gen_helper_vext2xv_du_hu)
-TRANS(vext2xv_du_wu, LASX, gen_xx, gen_helper_vext2xv_du_wu)
-
-static void gen_vsigncov(unsigned vece, TCGv_vec t, TCGv_vec a, TCGv_vec b)
-{
- TCGv_vec t1, zero;
-
- t1 = tcg_temp_new_vec_matching(t);
- zero = tcg_constant_vec_matching(t, vece, 0);
-
- tcg_gen_neg_vec(vece, t1, b);
- tcg_gen_cmpsel_vec(TCG_COND_LT, vece, t, a, zero, t1, b);
- tcg_gen_cmpsel_vec(TCG_COND_EQ, vece, t, a, zero, zero, t);
-}
-
-static void do_vsigncov(unsigned vece, uint32_t vd_ofs, uint32_t vj_ofs,
- uint32_t vk_ofs, uint32_t oprsz, uint32_t maxsz)
-{
- static const TCGOpcode vecop_list[] = {
- INDEX_op_neg_vec, INDEX_op_cmpsel_vec, 0
- };
- static const GVecGen3 op[4] = {
- {
- .fniv = gen_vsigncov,
- .fno = gen_helper_vsigncov_b,
- .opt_opc = vecop_list,
- .vece = MO_8
- },
- {
- .fniv = gen_vsigncov,
- .fno = gen_helper_vsigncov_h,
- .opt_opc = vecop_list,
- .vece = MO_16
- },
- {
- .fniv = gen_vsigncov,
- .fno = gen_helper_vsigncov_w,
- .opt_opc = vecop_list,
- .vece = MO_32
- },
- {
- .fniv = gen_vsigncov,
- .fno = gen_helper_vsigncov_d,
- .opt_opc = vecop_list,
- .vece = MO_64
- },
- };
-
- tcg_gen_gvec_3(vd_ofs, vj_ofs, vk_ofs, oprsz, maxsz, &op[vece]);
-}
-
-TRANS(vsigncov_b, LSX, gvec_vvv, MO_8, do_vsigncov)
-TRANS(vsigncov_h, LSX, gvec_vvv, MO_16, do_vsigncov)
-TRANS(vsigncov_w, LSX, gvec_vvv, MO_32, do_vsigncov)
-TRANS(vsigncov_d, LSX, gvec_vvv, MO_64, do_vsigncov)
-TRANS(xvsigncov_b, LASX, gvec_xxx, MO_8, do_vsigncov)
-TRANS(xvsigncov_h, LASX, gvec_xxx, MO_16, do_vsigncov)
-TRANS(xvsigncov_w, LASX, gvec_xxx, MO_32, do_vsigncov)
-TRANS(xvsigncov_d, LASX, gvec_xxx, MO_64, do_vsigncov)
-
-TRANS(vmskltz_b, LSX, gen_vv, gen_helper_vmskltz_b)
-TRANS(vmskltz_h, LSX, gen_vv, gen_helper_vmskltz_h)
-TRANS(vmskltz_w, LSX, gen_vv, gen_helper_vmskltz_w)
-TRANS(vmskltz_d, LSX, gen_vv, gen_helper_vmskltz_d)
-TRANS(vmskgez_b, LSX, gen_vv, gen_helper_vmskgez_b)
-TRANS(vmsknz_b, LSX, gen_vv, gen_helper_vmsknz_b)
-TRANS(xvmskltz_b, LASX, gen_xx, gen_helper_vmskltz_b)
-TRANS(xvmskltz_h, LASX, gen_xx, gen_helper_vmskltz_h)
-TRANS(xvmskltz_w, LASX, gen_xx, gen_helper_vmskltz_w)
-TRANS(xvmskltz_d, LASX, gen_xx, gen_helper_vmskltz_d)
-TRANS(xvmskgez_b, LASX, gen_xx, gen_helper_vmskgez_b)
-TRANS(xvmsknz_b, LASX, gen_xx, gen_helper_vmsknz_b)
-
-#define EXPAND_BYTE(bit) ((uint64_t)(bit ? 0xff : 0))
-
-static uint64_t vldi_get_value(DisasContext *ctx, uint32_t imm)
-{
- int mode;
- uint64_t data, t;
-
- /*
- * imm bit [11:8] is mode, mode value is 0-12.
- * other values are invalid.
- */
- mode = (imm >> 8) & 0xf;
- t = imm & 0xff;
- switch (mode) {
- case 0:
- /* data: {2{24'0, imm[7:0]}} */
- data = (t << 32) | t ;
- break;
- case 1:
- /* data: {2{16'0, imm[7:0], 8'0}} */
- data = (t << 24) | (t << 8);
- break;
- case 2:
- /* data: {2{8'0, imm[7:0], 16'0}} */
- data = (t << 48) | (t << 16);
- break;
- case 3:
- /* data: {2{imm[7:0], 24'0}} */
- data = (t << 56) | (t << 24);
- break;
- case 4:
- /* data: {4{8'0, imm[7:0]}} */
- data = (t << 48) | (t << 32) | (t << 16) | t;
- break;
- case 5:
- /* data: {4{imm[7:0], 8'0}} */
- data = (t << 56) |(t << 40) | (t << 24) | (t << 8);
- break;
- case 6:
- /* data: {2{16'0, imm[7:0], 8'1}} */
- data = (t << 40) | ((uint64_t)0xff << 32) | (t << 8) | 0xff;
- break;
- case 7:
- /* data: {2{8'0, imm[7:0], 16'1}} */
- data = (t << 48) | ((uint64_t)0xffff << 32) | (t << 16) | 0xffff;
- break;
- case 8:
- /* data: {8{imm[7:0]}} */
- data =(t << 56) | (t << 48) | (t << 40) | (t << 32) |
- (t << 24) | (t << 16) | (t << 8) | t;
- break;
- case 9:
- /* data: {{8{imm[7]}, ..., 8{imm[0]}}} */
- {
- uint64_t b0,b1,b2,b3,b4,b5,b6,b7;
- b0 = t& 0x1;
- b1 = (t & 0x2) >> 1;
- b2 = (t & 0x4) >> 2;
- b3 = (t & 0x8) >> 3;
- b4 = (t & 0x10) >> 4;
- b5 = (t & 0x20) >> 5;
- b6 = (t & 0x40) >> 6;
- b7 = (t & 0x80) >> 7;
- data = (EXPAND_BYTE(b7) << 56) |
- (EXPAND_BYTE(b6) << 48) |
- (EXPAND_BYTE(b5) << 40) |
- (EXPAND_BYTE(b4) << 32) |
- (EXPAND_BYTE(b3) << 24) |
- (EXPAND_BYTE(b2) << 16) |
- (EXPAND_BYTE(b1) << 8) |
- EXPAND_BYTE(b0);
- }
- break;
- case 10:
- /* data: {2{imm[7], ~imm[6], {5{imm[6]}}, imm[5:0], 19'0}} */
- {
- uint64_t b6, b7;
- uint64_t t0, t1;
- b6 = (imm & 0x40) >> 6;
- b7 = (imm & 0x80) >> 7;
- t0 = (imm & 0x3f);
- t1 = (b7 << 6) | ((1-b6) << 5) | (uint64_t)(b6 ? 0x1f : 0);
- data = (t1 << 57) | (t0 << 51) | (t1 << 25) | (t0 << 19);
- }
- break;
- case 11:
- /* data: {32'0, imm[7], ~{imm[6]}, 5{imm[6]}, imm[5:0], 19'0} */
- {
- uint64_t b6,b7;
- uint64_t t0, t1;
- b6 = (imm & 0x40) >> 6;
- b7 = (imm & 0x80) >> 7;
- t0 = (imm & 0x3f);
- t1 = (b7 << 6) | ((1-b6) << 5) | (b6 ? 0x1f : 0);
- data = (t1 << 25) | (t0 << 19);
- }
- break;
- case 12:
- /* data: {imm[7], ~imm[6], 8{imm[6]}, imm[5:0], 48'0} */
- {
- uint64_t b6,b7;
- uint64_t t0, t1;
- b6 = (imm & 0x40) >> 6;
- b7 = (imm & 0x80) >> 7;
- t0 = (imm & 0x3f);
- t1 = (b7 << 9) | ((1-b6) << 8) | (b6 ? 0xff : 0);
- data = (t1 << 54) | (t0 << 48);
- }
- break;
- default:
- generate_exception(ctx, EXCCODE_INE);
- g_assert_not_reached();
- }
- return data;
-}
-
-static bool gen_vldi(DisasContext *ctx, arg_vldi *a, uint32_t oprsz)
-{
- int sel, vece;
- uint64_t value;
-
- if (!check_vec(ctx, oprsz)) {
- return true;
- }
-
- sel = (a->imm >> 12) & 0x1;
-
- if (sel) {
- value = vldi_get_value(ctx, a->imm);
- vece = MO_64;
- } else {
- value = ((int32_t)(a->imm << 22)) >> 22;
- vece = (a->imm >> 10) & 0x3;
- }
-
- tcg_gen_gvec_dup_i64(vece, vec_full_offset(a->vd), oprsz, ctx->vl/8,
- tcg_constant_i64(value));
- return true;
-}
-
-TRANS(vldi, LSX, gen_vldi, 16)
-TRANS(xvldi, LASX, gen_vldi, 32)
-
-static bool gen_vandn_v(DisasContext *ctx, arg_vvv *a, uint32_t oprsz)
-{
- uint32_t vd_ofs, vj_ofs, vk_ofs;
-
- if (!check_vec(ctx, oprsz)) {
- return true;
- }
-
- vd_ofs = vec_full_offset(a->vd);
- vj_ofs = vec_full_offset(a->vj);
- vk_ofs = vec_full_offset(a->vk);
-
- tcg_gen_gvec_andc(MO_64, vd_ofs, vk_ofs, vj_ofs, oprsz, ctx->vl / 8);
- return true;
-}
-
-static void gen_vnori(unsigned vece, TCGv_vec t, TCGv_vec a, int64_t imm)
-{
- TCGv_vec t1;
-
- t1 = tcg_constant_vec_matching(t, vece, imm);
- tcg_gen_nor_vec(vece, t, a, t1);
-}
-
-static void gen_vnori_b(TCGv_i64 t, TCGv_i64 a, int64_t imm)
-{
- tcg_gen_movi_i64(t, dup_const(MO_8, imm));
- tcg_gen_nor_i64(t, a, t);
-}
-
-static void do_vnori_b(unsigned vece, uint32_t vd_ofs, uint32_t vj_ofs,
- int64_t imm, uint32_t oprsz, uint32_t maxsz)
-{
- static const TCGOpcode vecop_list[] = {
- INDEX_op_nor_vec, 0
- };
- static const GVecGen2i op = {
- .fni8 = gen_vnori_b,
- .fniv = gen_vnori,
- .fnoi = gen_helper_vnori_b,
- .opt_opc = vecop_list,
- .vece = MO_8
- };
-
- tcg_gen_gvec_2i(vd_ofs, vj_ofs, oprsz, maxsz, imm, &op);
-}
-
-TRANS(vand_v, LSX, gvec_vvv, MO_64, tcg_gen_gvec_and)
-TRANS(vor_v, LSX, gvec_vvv, MO_64, tcg_gen_gvec_or)
-TRANS(vxor_v, LSX, gvec_vvv, MO_64, tcg_gen_gvec_xor)
-TRANS(vnor_v, LSX, gvec_vvv, MO_64, tcg_gen_gvec_nor)
-TRANS(vandn_v, LSX, gen_vandn_v, 16)
-TRANS(vorn_v, LSX, gvec_vvv, MO_64, tcg_gen_gvec_orc)
-TRANS(vandi_b, LSX, gvec_vv_i, MO_8, tcg_gen_gvec_andi)
-TRANS(vori_b, LSX, gvec_vv_i, MO_8, tcg_gen_gvec_ori)
-TRANS(vxori_b, LSX, gvec_vv_i, MO_8, tcg_gen_gvec_xori)
-TRANS(vnori_b, LSX, gvec_vv_i, MO_8, do_vnori_b)
-TRANS(xvand_v, LASX, gvec_xxx, MO_64, tcg_gen_gvec_and)
-TRANS(xvor_v, LASX, gvec_xxx, MO_64, tcg_gen_gvec_or)
-TRANS(xvxor_v, LASX, gvec_xxx, MO_64, tcg_gen_gvec_xor)
-TRANS(xvnor_v, LASX, gvec_xxx, MO_64, tcg_gen_gvec_nor)
-TRANS(xvandn_v, LASX, gen_vandn_v, 32)
-TRANS(xvorn_v, LASX, gvec_xxx, MO_64, tcg_gen_gvec_orc)
-TRANS(xvandi_b, LASX, gvec_xx_i, MO_8, tcg_gen_gvec_andi)
-TRANS(xvori_b, LASX, gvec_xx_i, MO_8, tcg_gen_gvec_ori)
-TRANS(xvxori_b, LASX, gvec_xx_i, MO_8, tcg_gen_gvec_xori)
-TRANS(xvnori_b, LASX, gvec_xx_i, MO_8, do_vnori_b)
-
-TRANS(vsll_b, LSX, gvec_vvv, MO_8, tcg_gen_gvec_shlv)
-TRANS(vsll_h, LSX, gvec_vvv, MO_16, tcg_gen_gvec_shlv)
-TRANS(vsll_w, LSX, gvec_vvv, MO_32, tcg_gen_gvec_shlv)
-TRANS(vsll_d, LSX, gvec_vvv, MO_64, tcg_gen_gvec_shlv)
-TRANS(vslli_b, LSX, gvec_vv_i, MO_8, tcg_gen_gvec_shli)
-TRANS(vslli_h, LSX, gvec_vv_i, MO_16, tcg_gen_gvec_shli)
-TRANS(vslli_w, LSX, gvec_vv_i, MO_32, tcg_gen_gvec_shli)
-TRANS(vslli_d, LSX, gvec_vv_i, MO_64, tcg_gen_gvec_shli)
-TRANS(xvsll_b, LASX, gvec_xxx, MO_8, tcg_gen_gvec_shlv)
-TRANS(xvsll_h, LASX, gvec_xxx, MO_16, tcg_gen_gvec_shlv)
-TRANS(xvsll_w, LASX, gvec_xxx, MO_32, tcg_gen_gvec_shlv)
-TRANS(xvsll_d, LASX, gvec_xxx, MO_64, tcg_gen_gvec_shlv)
-TRANS(xvslli_b, LASX, gvec_xx_i, MO_8, tcg_gen_gvec_shli)
-TRANS(xvslli_h, LASX, gvec_xx_i, MO_16, tcg_gen_gvec_shli)
-TRANS(xvslli_w, LASX, gvec_xx_i, MO_32, tcg_gen_gvec_shli)
-TRANS(xvslli_d, LASX, gvec_xx_i, MO_64, tcg_gen_gvec_shli)
-
-TRANS(vsrl_b, LSX, gvec_vvv, MO_8, tcg_gen_gvec_shrv)
-TRANS(vsrl_h, LSX, gvec_vvv, MO_16, tcg_gen_gvec_shrv)
-TRANS(vsrl_w, LSX, gvec_vvv, MO_32, tcg_gen_gvec_shrv)
-TRANS(vsrl_d, LSX, gvec_vvv, MO_64, tcg_gen_gvec_shrv)
-TRANS(vsrli_b, LSX, gvec_vv_i, MO_8, tcg_gen_gvec_shri)
-TRANS(vsrli_h, LSX, gvec_vv_i, MO_16, tcg_gen_gvec_shri)
-TRANS(vsrli_w, LSX, gvec_vv_i, MO_32, tcg_gen_gvec_shri)
-TRANS(vsrli_d, LSX, gvec_vv_i, MO_64, tcg_gen_gvec_shri)
-TRANS(xvsrl_b, LASX, gvec_xxx, MO_8, tcg_gen_gvec_shrv)
-TRANS(xvsrl_h, LASX, gvec_xxx, MO_16, tcg_gen_gvec_shrv)
-TRANS(xvsrl_w, LASX, gvec_xxx, MO_32, tcg_gen_gvec_shrv)
-TRANS(xvsrl_d, LASX, gvec_xxx, MO_64, tcg_gen_gvec_shrv)
-TRANS(xvsrli_b, LASX, gvec_xx_i, MO_8, tcg_gen_gvec_shri)
-TRANS(xvsrli_h, LASX, gvec_xx_i, MO_16, tcg_gen_gvec_shri)
-TRANS(xvsrli_w, LASX, gvec_xx_i, MO_32, tcg_gen_gvec_shri)
-TRANS(xvsrli_d, LASX, gvec_xx_i, MO_64, tcg_gen_gvec_shri)
-
-TRANS(vsra_b, LSX, gvec_vvv, MO_8, tcg_gen_gvec_sarv)
-TRANS(vsra_h, LSX, gvec_vvv, MO_16, tcg_gen_gvec_sarv)
-TRANS(vsra_w, LSX, gvec_vvv, MO_32, tcg_gen_gvec_sarv)
-TRANS(vsra_d, LSX, gvec_vvv, MO_64, tcg_gen_gvec_sarv)
-TRANS(vsrai_b, LSX, gvec_vv_i, MO_8, tcg_gen_gvec_sari)
-TRANS(vsrai_h, LSX, gvec_vv_i, MO_16, tcg_gen_gvec_sari)
-TRANS(vsrai_w, LSX, gvec_vv_i, MO_32, tcg_gen_gvec_sari)
-TRANS(vsrai_d, LSX, gvec_vv_i, MO_64, tcg_gen_gvec_sari)
-TRANS(xvsra_b, LASX, gvec_xxx, MO_8, tcg_gen_gvec_sarv)
-TRANS(xvsra_h, LASX, gvec_xxx, MO_16, tcg_gen_gvec_sarv)
-TRANS(xvsra_w, LASX, gvec_xxx, MO_32, tcg_gen_gvec_sarv)
-TRANS(xvsra_d, LASX, gvec_xxx, MO_64, tcg_gen_gvec_sarv)
-TRANS(xvsrai_b, LASX, gvec_xx_i, MO_8, tcg_gen_gvec_sari)
-TRANS(xvsrai_h, LASX, gvec_xx_i, MO_16, tcg_gen_gvec_sari)
-TRANS(xvsrai_w, LASX, gvec_xx_i, MO_32, tcg_gen_gvec_sari)
-TRANS(xvsrai_d, LASX, gvec_xx_i, MO_64, tcg_gen_gvec_sari)
-
-TRANS(vrotr_b, LSX, gvec_vvv, MO_8, tcg_gen_gvec_rotrv)
-TRANS(vrotr_h, LSX, gvec_vvv, MO_16, tcg_gen_gvec_rotrv)
-TRANS(vrotr_w, LSX, gvec_vvv, MO_32, tcg_gen_gvec_rotrv)
-TRANS(vrotr_d, LSX, gvec_vvv, MO_64, tcg_gen_gvec_rotrv)
-TRANS(vrotri_b, LSX, gvec_vv_i, MO_8, tcg_gen_gvec_rotri)
-TRANS(vrotri_h, LSX, gvec_vv_i, MO_16, tcg_gen_gvec_rotri)
-TRANS(vrotri_w, LSX, gvec_vv_i, MO_32, tcg_gen_gvec_rotri)
-TRANS(vrotri_d, LSX, gvec_vv_i, MO_64, tcg_gen_gvec_rotri)
-TRANS(xvrotr_b, LASX, gvec_xxx, MO_8, tcg_gen_gvec_rotrv)
-TRANS(xvrotr_h, LASX, gvec_xxx, MO_16, tcg_gen_gvec_rotrv)
-TRANS(xvrotr_w, LASX, gvec_xxx, MO_32, tcg_gen_gvec_rotrv)
-TRANS(xvrotr_d, LASX, gvec_xxx, MO_64, tcg_gen_gvec_rotrv)
-TRANS(xvrotri_b, LASX, gvec_xx_i, MO_8, tcg_gen_gvec_rotri)
-TRANS(xvrotri_h, LASX, gvec_xx_i, MO_16, tcg_gen_gvec_rotri)
-TRANS(xvrotri_w, LASX, gvec_xx_i, MO_32, tcg_gen_gvec_rotri)
-TRANS(xvrotri_d, LASX, gvec_xx_i, MO_64, tcg_gen_gvec_rotri)
-
-TRANS(vsllwil_h_b, LSX, gen_vv_i, gen_helper_vsllwil_h_b)
-TRANS(vsllwil_w_h, LSX, gen_vv_i, gen_helper_vsllwil_w_h)
-TRANS(vsllwil_d_w, LSX, gen_vv_i, gen_helper_vsllwil_d_w)
-TRANS(vextl_q_d, LSX, gen_vv, gen_helper_vextl_q_d)
-TRANS(vsllwil_hu_bu, LSX, gen_vv_i, gen_helper_vsllwil_hu_bu)
-TRANS(vsllwil_wu_hu, LSX, gen_vv_i, gen_helper_vsllwil_wu_hu)
-TRANS(vsllwil_du_wu, LSX, gen_vv_i, gen_helper_vsllwil_du_wu)
-TRANS(vextl_qu_du, LSX, gen_vv, gen_helper_vextl_qu_du)
-TRANS(xvsllwil_h_b, LASX, gen_xx_i, gen_helper_vsllwil_h_b)
-TRANS(xvsllwil_w_h, LASX, gen_xx_i, gen_helper_vsllwil_w_h)
-TRANS(xvsllwil_d_w, LASX, gen_xx_i, gen_helper_vsllwil_d_w)
-TRANS(xvextl_q_d, LASX, gen_xx, gen_helper_vextl_q_d)
-TRANS(xvsllwil_hu_bu, LASX, gen_xx_i, gen_helper_vsllwil_hu_bu)
-TRANS(xvsllwil_wu_hu, LASX, gen_xx_i, gen_helper_vsllwil_wu_hu)
-TRANS(xvsllwil_du_wu, LASX, gen_xx_i, gen_helper_vsllwil_du_wu)
-TRANS(xvextl_qu_du, LASX, gen_xx, gen_helper_vextl_qu_du)
-
-TRANS(vsrlr_b, LSX, gen_vvv, gen_helper_vsrlr_b)
-TRANS(vsrlr_h, LSX, gen_vvv, gen_helper_vsrlr_h)
-TRANS(vsrlr_w, LSX, gen_vvv, gen_helper_vsrlr_w)
-TRANS(vsrlr_d, LSX, gen_vvv, gen_helper_vsrlr_d)
-TRANS(vsrlri_b, LSX, gen_vv_i, gen_helper_vsrlri_b)
-TRANS(vsrlri_h, LSX, gen_vv_i, gen_helper_vsrlri_h)
-TRANS(vsrlri_w, LSX, gen_vv_i, gen_helper_vsrlri_w)
-TRANS(vsrlri_d, LSX, gen_vv_i, gen_helper_vsrlri_d)
-TRANS(xvsrlr_b, LASX, gen_xxx, gen_helper_vsrlr_b)
-TRANS(xvsrlr_h, LASX, gen_xxx, gen_helper_vsrlr_h)
-TRANS(xvsrlr_w, LASX, gen_xxx, gen_helper_vsrlr_w)
-TRANS(xvsrlr_d, LASX, gen_xxx, gen_helper_vsrlr_d)
-TRANS(xvsrlri_b, LASX, gen_xx_i, gen_helper_vsrlri_b)
-TRANS(xvsrlri_h, LASX, gen_xx_i, gen_helper_vsrlri_h)
-TRANS(xvsrlri_w, LASX, gen_xx_i, gen_helper_vsrlri_w)
-TRANS(xvsrlri_d, LASX, gen_xx_i, gen_helper_vsrlri_d)
-
-TRANS(vsrar_b, LSX, gen_vvv, gen_helper_vsrar_b)
-TRANS(vsrar_h, LSX, gen_vvv, gen_helper_vsrar_h)
-TRANS(vsrar_w, LSX, gen_vvv, gen_helper_vsrar_w)
-TRANS(vsrar_d, LSX, gen_vvv, gen_helper_vsrar_d)
-TRANS(vsrari_b, LSX, gen_vv_i, gen_helper_vsrari_b)
-TRANS(vsrari_h, LSX, gen_vv_i, gen_helper_vsrari_h)
-TRANS(vsrari_w, LSX, gen_vv_i, gen_helper_vsrari_w)
-TRANS(vsrari_d, LSX, gen_vv_i, gen_helper_vsrari_d)
-TRANS(xvsrar_b, LASX, gen_xxx, gen_helper_vsrar_b)
-TRANS(xvsrar_h, LASX, gen_xxx, gen_helper_vsrar_h)
-TRANS(xvsrar_w, LASX, gen_xxx, gen_helper_vsrar_w)
-TRANS(xvsrar_d, LASX, gen_xxx, gen_helper_vsrar_d)
-TRANS(xvsrari_b, LASX, gen_xx_i, gen_helper_vsrari_b)
-TRANS(xvsrari_h, LASX, gen_xx_i, gen_helper_vsrari_h)
-TRANS(xvsrari_w, LASX, gen_xx_i, gen_helper_vsrari_w)
-TRANS(xvsrari_d, LASX, gen_xx_i, gen_helper_vsrari_d)
-
-TRANS(vsrln_b_h, LSX, gen_vvv, gen_helper_vsrln_b_h)
-TRANS(vsrln_h_w, LSX, gen_vvv, gen_helper_vsrln_h_w)
-TRANS(vsrln_w_d, LSX, gen_vvv, gen_helper_vsrln_w_d)
-TRANS(vsran_b_h, LSX, gen_vvv, gen_helper_vsran_b_h)
-TRANS(vsran_h_w, LSX, gen_vvv, gen_helper_vsran_h_w)
-TRANS(vsran_w_d, LSX, gen_vvv, gen_helper_vsran_w_d)
-TRANS(xvsrln_b_h, LASX, gen_xxx, gen_helper_vsrln_b_h)
-TRANS(xvsrln_h_w, LASX, gen_xxx, gen_helper_vsrln_h_w)
-TRANS(xvsrln_w_d, LASX, gen_xxx, gen_helper_vsrln_w_d)
-TRANS(xvsran_b_h, LASX, gen_xxx, gen_helper_vsran_b_h)
-TRANS(xvsran_h_w, LASX, gen_xxx, gen_helper_vsran_h_w)
-TRANS(xvsran_w_d, LASX, gen_xxx, gen_helper_vsran_w_d)
-
-TRANS(vsrlni_b_h, LSX, gen_vv_i, gen_helper_vsrlni_b_h)
-TRANS(vsrlni_h_w, LSX, gen_vv_i, gen_helper_vsrlni_h_w)
-TRANS(vsrlni_w_d, LSX, gen_vv_i, gen_helper_vsrlni_w_d)
-TRANS(vsrlni_d_q, LSX, gen_vv_i, gen_helper_vsrlni_d_q)
-TRANS(vsrani_b_h, LSX, gen_vv_i, gen_helper_vsrani_b_h)
-TRANS(vsrani_h_w, LSX, gen_vv_i, gen_helper_vsrani_h_w)
-TRANS(vsrani_w_d, LSX, gen_vv_i, gen_helper_vsrani_w_d)
-TRANS(vsrani_d_q, LSX, gen_vv_i, gen_helper_vsrani_d_q)
-TRANS(xvsrlni_b_h, LASX, gen_xx_i, gen_helper_vsrlni_b_h)
-TRANS(xvsrlni_h_w, LASX, gen_xx_i, gen_helper_vsrlni_h_w)
-TRANS(xvsrlni_w_d, LASX, gen_xx_i, gen_helper_vsrlni_w_d)
-TRANS(xvsrlni_d_q, LASX, gen_xx_i, gen_helper_vsrlni_d_q)
-TRANS(xvsrani_b_h, LASX, gen_xx_i, gen_helper_vsrani_b_h)
-TRANS(xvsrani_h_w, LASX, gen_xx_i, gen_helper_vsrani_h_w)
-TRANS(xvsrani_w_d, LASX, gen_xx_i, gen_helper_vsrani_w_d)
-TRANS(xvsrani_d_q, LASX, gen_xx_i, gen_helper_vsrani_d_q)
-
-TRANS(vsrlrn_b_h, LSX, gen_vvv, gen_helper_vsrlrn_b_h)
-TRANS(vsrlrn_h_w, LSX, gen_vvv, gen_helper_vsrlrn_h_w)
-TRANS(vsrlrn_w_d, LSX, gen_vvv, gen_helper_vsrlrn_w_d)
-TRANS(vsrarn_b_h, LSX, gen_vvv, gen_helper_vsrarn_b_h)
-TRANS(vsrarn_h_w, LSX, gen_vvv, gen_helper_vsrarn_h_w)
-TRANS(vsrarn_w_d, LSX, gen_vvv, gen_helper_vsrarn_w_d)
-TRANS(xvsrlrn_b_h, LASX, gen_xxx, gen_helper_vsrlrn_b_h)
-TRANS(xvsrlrn_h_w, LASX, gen_xxx, gen_helper_vsrlrn_h_w)
-TRANS(xvsrlrn_w_d, LASX, gen_xxx, gen_helper_vsrlrn_w_d)
-TRANS(xvsrarn_b_h, LASX, gen_xxx, gen_helper_vsrarn_b_h)
-TRANS(xvsrarn_h_w, LASX, gen_xxx, gen_helper_vsrarn_h_w)
-TRANS(xvsrarn_w_d, LASX, gen_xxx, gen_helper_vsrarn_w_d)
-
-TRANS(vsrlrni_b_h, LSX, gen_vv_i, gen_helper_vsrlrni_b_h)
-TRANS(vsrlrni_h_w, LSX, gen_vv_i, gen_helper_vsrlrni_h_w)
-TRANS(vsrlrni_w_d, LSX, gen_vv_i, gen_helper_vsrlrni_w_d)
-TRANS(vsrlrni_d_q, LSX, gen_vv_i, gen_helper_vsrlrni_d_q)
-TRANS(vsrarni_b_h, LSX, gen_vv_i, gen_helper_vsrarni_b_h)
-TRANS(vsrarni_h_w, LSX, gen_vv_i, gen_helper_vsrarni_h_w)
-TRANS(vsrarni_w_d, LSX, gen_vv_i, gen_helper_vsrarni_w_d)
-TRANS(vsrarni_d_q, LSX, gen_vv_i, gen_helper_vsrarni_d_q)
-TRANS(xvsrlrni_b_h, LASX, gen_xx_i, gen_helper_vsrlrni_b_h)
-TRANS(xvsrlrni_h_w, LASX, gen_xx_i, gen_helper_vsrlrni_h_w)
-TRANS(xvsrlrni_w_d, LASX, gen_xx_i, gen_helper_vsrlrni_w_d)
-TRANS(xvsrlrni_d_q, LASX, gen_xx_i, gen_helper_vsrlrni_d_q)
-TRANS(xvsrarni_b_h, LASX, gen_xx_i, gen_helper_vsrarni_b_h)
-TRANS(xvsrarni_h_w, LASX, gen_xx_i, gen_helper_vsrarni_h_w)
-TRANS(xvsrarni_w_d, LASX, gen_xx_i, gen_helper_vsrarni_w_d)
-TRANS(xvsrarni_d_q, LASX, gen_xx_i, gen_helper_vsrarni_d_q)
-
-TRANS(vssrln_b_h, LSX, gen_vvv, gen_helper_vssrln_b_h)
-TRANS(vssrln_h_w, LSX, gen_vvv, gen_helper_vssrln_h_w)
-TRANS(vssrln_w_d, LSX, gen_vvv, gen_helper_vssrln_w_d)
-TRANS(vssran_b_h, LSX, gen_vvv, gen_helper_vssran_b_h)
-TRANS(vssran_h_w, LSX, gen_vvv, gen_helper_vssran_h_w)
-TRANS(vssran_w_d, LSX, gen_vvv, gen_helper_vssran_w_d)
-TRANS(vssrln_bu_h, LSX, gen_vvv, gen_helper_vssrln_bu_h)
-TRANS(vssrln_hu_w, LSX, gen_vvv, gen_helper_vssrln_hu_w)
-TRANS(vssrln_wu_d, LSX, gen_vvv, gen_helper_vssrln_wu_d)
-TRANS(vssran_bu_h, LSX, gen_vvv, gen_helper_vssran_bu_h)
-TRANS(vssran_hu_w, LSX, gen_vvv, gen_helper_vssran_hu_w)
-TRANS(vssran_wu_d, LSX, gen_vvv, gen_helper_vssran_wu_d)
-TRANS(xvssrln_b_h, LASX, gen_xxx, gen_helper_vssrln_b_h)
-TRANS(xvssrln_h_w, LASX, gen_xxx, gen_helper_vssrln_h_w)
-TRANS(xvssrln_w_d, LASX, gen_xxx, gen_helper_vssrln_w_d)
-TRANS(xvssran_b_h, LASX, gen_xxx, gen_helper_vssran_b_h)
-TRANS(xvssran_h_w, LASX, gen_xxx, gen_helper_vssran_h_w)
-TRANS(xvssran_w_d, LASX, gen_xxx, gen_helper_vssran_w_d)
-TRANS(xvssrln_bu_h, LASX, gen_xxx, gen_helper_vssrln_bu_h)
-TRANS(xvssrln_hu_w, LASX, gen_xxx, gen_helper_vssrln_hu_w)
-TRANS(xvssrln_wu_d, LASX, gen_xxx, gen_helper_vssrln_wu_d)
-TRANS(xvssran_bu_h, LASX, gen_xxx, gen_helper_vssran_bu_h)
-TRANS(xvssran_hu_w, LASX, gen_xxx, gen_helper_vssran_hu_w)
-TRANS(xvssran_wu_d, LASX, gen_xxx, gen_helper_vssran_wu_d)
-
-TRANS(vssrlni_b_h, LSX, gen_vv_i, gen_helper_vssrlni_b_h)
-TRANS(vssrlni_h_w, LSX, gen_vv_i, gen_helper_vssrlni_h_w)
-TRANS(vssrlni_w_d, LSX, gen_vv_i, gen_helper_vssrlni_w_d)
-TRANS(vssrlni_d_q, LSX, gen_vv_i, gen_helper_vssrlni_d_q)
-TRANS(vssrani_b_h, LSX, gen_vv_i, gen_helper_vssrani_b_h)
-TRANS(vssrani_h_w, LSX, gen_vv_i, gen_helper_vssrani_h_w)
-TRANS(vssrani_w_d, LSX, gen_vv_i, gen_helper_vssrani_w_d)
-TRANS(vssrani_d_q, LSX, gen_vv_i, gen_helper_vssrani_d_q)
-TRANS(vssrlni_bu_h, LSX, gen_vv_i, gen_helper_vssrlni_bu_h)
-TRANS(vssrlni_hu_w, LSX, gen_vv_i, gen_helper_vssrlni_hu_w)
-TRANS(vssrlni_wu_d, LSX, gen_vv_i, gen_helper_vssrlni_wu_d)
-TRANS(vssrlni_du_q, LSX, gen_vv_i, gen_helper_vssrlni_du_q)
-TRANS(vssrani_bu_h, LSX, gen_vv_i, gen_helper_vssrani_bu_h)
-TRANS(vssrani_hu_w, LSX, gen_vv_i, gen_helper_vssrani_hu_w)
-TRANS(vssrani_wu_d, LSX, gen_vv_i, gen_helper_vssrani_wu_d)
-TRANS(vssrani_du_q, LSX, gen_vv_i, gen_helper_vssrani_du_q)
-TRANS(xvssrlni_b_h, LASX, gen_xx_i, gen_helper_vssrlni_b_h)
-TRANS(xvssrlni_h_w, LASX, gen_xx_i, gen_helper_vssrlni_h_w)
-TRANS(xvssrlni_w_d, LASX, gen_xx_i, gen_helper_vssrlni_w_d)
-TRANS(xvssrlni_d_q, LASX, gen_xx_i, gen_helper_vssrlni_d_q)
-TRANS(xvssrani_b_h, LASX, gen_xx_i, gen_helper_vssrani_b_h)
-TRANS(xvssrani_h_w, LASX, gen_xx_i, gen_helper_vssrani_h_w)
-TRANS(xvssrani_w_d, LASX, gen_xx_i, gen_helper_vssrani_w_d)
-TRANS(xvssrani_d_q, LASX, gen_xx_i, gen_helper_vssrani_d_q)
-TRANS(xvssrlni_bu_h, LASX, gen_xx_i, gen_helper_vssrlni_bu_h)
-TRANS(xvssrlni_hu_w, LASX, gen_xx_i, gen_helper_vssrlni_hu_w)
-TRANS(xvssrlni_wu_d, LASX, gen_xx_i, gen_helper_vssrlni_wu_d)
-TRANS(xvssrlni_du_q, LASX, gen_xx_i, gen_helper_vssrlni_du_q)
-TRANS(xvssrani_bu_h, LASX, gen_xx_i, gen_helper_vssrani_bu_h)
-TRANS(xvssrani_hu_w, LASX, gen_xx_i, gen_helper_vssrani_hu_w)
-TRANS(xvssrani_wu_d, LASX, gen_xx_i, gen_helper_vssrani_wu_d)
-TRANS(xvssrani_du_q, LASX, gen_xx_i, gen_helper_vssrani_du_q)
-
-TRANS(vssrlrn_b_h, LSX, gen_vvv, gen_helper_vssrlrn_b_h)
-TRANS(vssrlrn_h_w, LSX, gen_vvv, gen_helper_vssrlrn_h_w)
-TRANS(vssrlrn_w_d, LSX, gen_vvv, gen_helper_vssrlrn_w_d)
-TRANS(vssrarn_b_h, LSX, gen_vvv, gen_helper_vssrarn_b_h)
-TRANS(vssrarn_h_w, LSX, gen_vvv, gen_helper_vssrarn_h_w)
-TRANS(vssrarn_w_d, LSX, gen_vvv, gen_helper_vssrarn_w_d)
-TRANS(vssrlrn_bu_h, LSX, gen_vvv, gen_helper_vssrlrn_bu_h)
-TRANS(vssrlrn_hu_w, LSX, gen_vvv, gen_helper_vssrlrn_hu_w)
-TRANS(vssrlrn_wu_d, LSX, gen_vvv, gen_helper_vssrlrn_wu_d)
-TRANS(vssrarn_bu_h, LSX, gen_vvv, gen_helper_vssrarn_bu_h)
-TRANS(vssrarn_hu_w, LSX, gen_vvv, gen_helper_vssrarn_hu_w)
-TRANS(vssrarn_wu_d, LSX, gen_vvv, gen_helper_vssrarn_wu_d)
-TRANS(xvssrlrn_b_h, LASX, gen_xxx, gen_helper_vssrlrn_b_h)
-TRANS(xvssrlrn_h_w, LASX, gen_xxx, gen_helper_vssrlrn_h_w)
-TRANS(xvssrlrn_w_d, LASX, gen_xxx, gen_helper_vssrlrn_w_d)
-TRANS(xvssrarn_b_h, LASX, gen_xxx, gen_helper_vssrarn_b_h)
-TRANS(xvssrarn_h_w, LASX, gen_xxx, gen_helper_vssrarn_h_w)
-TRANS(xvssrarn_w_d, LASX, gen_xxx, gen_helper_vssrarn_w_d)
-TRANS(xvssrlrn_bu_h, LASX, gen_xxx, gen_helper_vssrlrn_bu_h)
-TRANS(xvssrlrn_hu_w, LASX, gen_xxx, gen_helper_vssrlrn_hu_w)
-TRANS(xvssrlrn_wu_d, LASX, gen_xxx, gen_helper_vssrlrn_wu_d)
-TRANS(xvssrarn_bu_h, LASX, gen_xxx, gen_helper_vssrarn_bu_h)
-TRANS(xvssrarn_hu_w, LASX, gen_xxx, gen_helper_vssrarn_hu_w)
-TRANS(xvssrarn_wu_d, LASX, gen_xxx, gen_helper_vssrarn_wu_d)
-
-TRANS(vssrlrni_b_h, LSX, gen_vv_i, gen_helper_vssrlrni_b_h)
-TRANS(vssrlrni_h_w, LSX, gen_vv_i, gen_helper_vssrlrni_h_w)
-TRANS(vssrlrni_w_d, LSX, gen_vv_i, gen_helper_vssrlrni_w_d)
-TRANS(vssrlrni_d_q, LSX, gen_vv_i, gen_helper_vssrlrni_d_q)
-TRANS(vssrarni_b_h, LSX, gen_vv_i, gen_helper_vssrarni_b_h)
-TRANS(vssrarni_h_w, LSX, gen_vv_i, gen_helper_vssrarni_h_w)
-TRANS(vssrarni_w_d, LSX, gen_vv_i, gen_helper_vssrarni_w_d)
-TRANS(vssrarni_d_q, LSX, gen_vv_i, gen_helper_vssrarni_d_q)
-TRANS(vssrlrni_bu_h, LSX, gen_vv_i, gen_helper_vssrlrni_bu_h)
-TRANS(vssrlrni_hu_w, LSX, gen_vv_i, gen_helper_vssrlrni_hu_w)
-TRANS(vssrlrni_wu_d, LSX, gen_vv_i, gen_helper_vssrlrni_wu_d)
-TRANS(vssrlrni_du_q, LSX, gen_vv_i, gen_helper_vssrlrni_du_q)
-TRANS(vssrarni_bu_h, LSX, gen_vv_i, gen_helper_vssrarni_bu_h)
-TRANS(vssrarni_hu_w, LSX, gen_vv_i, gen_helper_vssrarni_hu_w)
-TRANS(vssrarni_wu_d, LSX, gen_vv_i, gen_helper_vssrarni_wu_d)
-TRANS(vssrarni_du_q, LSX, gen_vv_i, gen_helper_vssrarni_du_q)
-TRANS(xvssrlrni_b_h, LASX, gen_xx_i, gen_helper_vssrlrni_b_h)
-TRANS(xvssrlrni_h_w, LASX, gen_xx_i, gen_helper_vssrlrni_h_w)
-TRANS(xvssrlrni_w_d, LASX, gen_xx_i, gen_helper_vssrlrni_w_d)
-TRANS(xvssrlrni_d_q, LASX, gen_xx_i, gen_helper_vssrlrni_d_q)
-TRANS(xvssrarni_b_h, LASX, gen_xx_i, gen_helper_vssrarni_b_h)
-TRANS(xvssrarni_h_w, LASX, gen_xx_i, gen_helper_vssrarni_h_w)
-TRANS(xvssrarni_w_d, LASX, gen_xx_i, gen_helper_vssrarni_w_d)
-TRANS(xvssrarni_d_q, LASX, gen_xx_i, gen_helper_vssrarni_d_q)
-TRANS(xvssrlrni_bu_h, LASX, gen_xx_i, gen_helper_vssrlrni_bu_h)
-TRANS(xvssrlrni_hu_w, LASX, gen_xx_i, gen_helper_vssrlrni_hu_w)
-TRANS(xvssrlrni_wu_d, LASX, gen_xx_i, gen_helper_vssrlrni_wu_d)
-TRANS(xvssrlrni_du_q, LASX, gen_xx_i, gen_helper_vssrlrni_du_q)
-TRANS(xvssrarni_bu_h, LASX, gen_xx_i, gen_helper_vssrarni_bu_h)
-TRANS(xvssrarni_hu_w, LASX, gen_xx_i, gen_helper_vssrarni_hu_w)
-TRANS(xvssrarni_wu_d, LASX, gen_xx_i, gen_helper_vssrarni_wu_d)
-TRANS(xvssrarni_du_q, LASX, gen_xx_i, gen_helper_vssrarni_du_q)
-
-TRANS(vclo_b, LSX, gen_vv, gen_helper_vclo_b)
-TRANS(vclo_h, LSX, gen_vv, gen_helper_vclo_h)
-TRANS(vclo_w, LSX, gen_vv, gen_helper_vclo_w)
-TRANS(vclo_d, LSX, gen_vv, gen_helper_vclo_d)
-TRANS(vclz_b, LSX, gen_vv, gen_helper_vclz_b)
-TRANS(vclz_h, LSX, gen_vv, gen_helper_vclz_h)
-TRANS(vclz_w, LSX, gen_vv, gen_helper_vclz_w)
-TRANS(vclz_d, LSX, gen_vv, gen_helper_vclz_d)
-TRANS(xvclo_b, LASX, gen_xx, gen_helper_vclo_b)
-TRANS(xvclo_h, LASX, gen_xx, gen_helper_vclo_h)
-TRANS(xvclo_w, LASX, gen_xx, gen_helper_vclo_w)
-TRANS(xvclo_d, LASX, gen_xx, gen_helper_vclo_d)
-TRANS(xvclz_b, LASX, gen_xx, gen_helper_vclz_b)
-TRANS(xvclz_h, LASX, gen_xx, gen_helper_vclz_h)
-TRANS(xvclz_w, LASX, gen_xx, gen_helper_vclz_w)
-TRANS(xvclz_d, LASX, gen_xx, gen_helper_vclz_d)
-
-TRANS(vpcnt_b, LSX, gen_vv, gen_helper_vpcnt_b)
-TRANS(vpcnt_h, LSX, gen_vv, gen_helper_vpcnt_h)
-TRANS(vpcnt_w, LSX, gen_vv, gen_helper_vpcnt_w)
-TRANS(vpcnt_d, LSX, gen_vv, gen_helper_vpcnt_d)
-TRANS(xvpcnt_b, LASX, gen_xx, gen_helper_vpcnt_b)
-TRANS(xvpcnt_h, LASX, gen_xx, gen_helper_vpcnt_h)
-TRANS(xvpcnt_w, LASX, gen_xx, gen_helper_vpcnt_w)
-TRANS(xvpcnt_d, LASX, gen_xx, gen_helper_vpcnt_d)
-
-static void do_vbit(unsigned vece, TCGv_vec t, TCGv_vec a, TCGv_vec b,
- void (*func)(unsigned, TCGv_vec, TCGv_vec, TCGv_vec))
-{
- TCGv_vec mask, lsh, t1, one;
-
- lsh = tcg_temp_new_vec_matching(t);
- t1 = tcg_temp_new_vec_matching(t);
- mask = tcg_constant_vec_matching(t, vece, (8 << vece) - 1);
- one = tcg_constant_vec_matching(t, vece, 1);
-
- tcg_gen_and_vec(vece, lsh, b, mask);
- tcg_gen_shlv_vec(vece, t1, one, lsh);
- func(vece, t, a, t1);
-}
-
-static void gen_vbitclr(unsigned vece, TCGv_vec t, TCGv_vec a, TCGv_vec b)
-{
- do_vbit(vece, t, a, b, tcg_gen_andc_vec);
-}
-
-static void gen_vbitset(unsigned vece, TCGv_vec t, TCGv_vec a, TCGv_vec b)
-{
- do_vbit(vece, t, a, b, tcg_gen_or_vec);
-}
-
-static void gen_vbitrev(unsigned vece, TCGv_vec t, TCGv_vec a, TCGv_vec b)
-{
- do_vbit(vece, t, a, b, tcg_gen_xor_vec);
-}
-
-static void do_vbitclr(unsigned vece, uint32_t vd_ofs, uint32_t vj_ofs,
- uint32_t vk_ofs, uint32_t oprsz, uint32_t maxsz)
-{
- static const TCGOpcode vecop_list[] = {
- INDEX_op_shlv_vec, INDEX_op_andc_vec, 0
- };
- static const GVecGen3 op[4] = {
- {
- .fniv = gen_vbitclr,
- .fno = gen_helper_vbitclr_b,
- .opt_opc = vecop_list,
- .vece = MO_8
- },
- {
- .fniv = gen_vbitclr,
- .fno = gen_helper_vbitclr_h,
- .opt_opc = vecop_list,
- .vece = MO_16
- },
- {
- .fniv = gen_vbitclr,
- .fno = gen_helper_vbitclr_w,
- .opt_opc = vecop_list,
- .vece = MO_32
- },
- {
- .fniv = gen_vbitclr,
- .fno = gen_helper_vbitclr_d,
- .opt_opc = vecop_list,
- .vece = MO_64
- },
- };
-
- tcg_gen_gvec_3(vd_ofs, vj_ofs, vk_ofs, oprsz, maxsz, &op[vece]);
-}
-
-TRANS(vbitclr_b, LSX, gvec_vvv, MO_8, do_vbitclr)
-TRANS(vbitclr_h, LSX, gvec_vvv, MO_16, do_vbitclr)
-TRANS(vbitclr_w, LSX, gvec_vvv, MO_32, do_vbitclr)
-TRANS(vbitclr_d, LSX, gvec_vvv, MO_64, do_vbitclr)
-TRANS(xvbitclr_b, LASX, gvec_xxx, MO_8, do_vbitclr)
-TRANS(xvbitclr_h, LASX, gvec_xxx, MO_16, do_vbitclr)
-TRANS(xvbitclr_w, LASX, gvec_xxx, MO_32, do_vbitclr)
-TRANS(xvbitclr_d, LASX, gvec_xxx, MO_64, do_vbitclr)
-
-static void do_vbiti(unsigned vece, TCGv_vec t, TCGv_vec a, int64_t imm,
- void (*func)(unsigned, TCGv_vec, TCGv_vec, TCGv_vec))
-{
- int lsh;
- TCGv_vec t1, one;
-
- lsh = imm & ((8 << vece) -1);
- t1 = tcg_temp_new_vec_matching(t);
- one = tcg_constant_vec_matching(t, vece, 1);
-
- tcg_gen_shli_vec(vece, t1, one, lsh);
- func(vece, t, a, t1);
-}
-
-static void gen_vbitclri(unsigned vece, TCGv_vec t, TCGv_vec a, int64_t imm)
-{
- do_vbiti(vece, t, a, imm, tcg_gen_andc_vec);
-}
-
-static void gen_vbitseti(unsigned vece, TCGv_vec t, TCGv_vec a, int64_t imm)
-{
- do_vbiti(vece, t, a, imm, tcg_gen_or_vec);
-}
-
-static void gen_vbitrevi(unsigned vece, TCGv_vec t, TCGv_vec a, int64_t imm)
-{
- do_vbiti(vece, t, a, imm, tcg_gen_xor_vec);
-}
-
-static void do_vbitclri(unsigned vece, uint32_t vd_ofs, uint32_t vj_ofs,
- int64_t imm, uint32_t oprsz, uint32_t maxsz)
-{
- static const TCGOpcode vecop_list[] = {
- INDEX_op_shli_vec, INDEX_op_andc_vec, 0
- };
- static const GVecGen2i op[4] = {
- {
- .fniv = gen_vbitclri,
- .fnoi = gen_helper_vbitclri_b,
- .opt_opc = vecop_list,
- .vece = MO_8
- },
- {
- .fniv = gen_vbitclri,
- .fnoi = gen_helper_vbitclri_h,
- .opt_opc = vecop_list,
- .vece = MO_16
- },
- {
- .fniv = gen_vbitclri,
- .fnoi = gen_helper_vbitclri_w,
- .opt_opc = vecop_list,
- .vece = MO_32
- },
- {
- .fniv = gen_vbitclri,
- .fnoi = gen_helper_vbitclri_d,
- .opt_opc = vecop_list,
- .vece = MO_64
- },
- };
-
- tcg_gen_gvec_2i(vd_ofs, vj_ofs, oprsz, maxsz, imm, &op[vece]);
-}
-
-TRANS(vbitclri_b, LSX, gvec_vv_i, MO_8, do_vbitclri)
-TRANS(vbitclri_h, LSX, gvec_vv_i, MO_16, do_vbitclri)
-TRANS(vbitclri_w, LSX, gvec_vv_i, MO_32, do_vbitclri)
-TRANS(vbitclri_d, LSX, gvec_vv_i, MO_64, do_vbitclri)
-TRANS(xvbitclri_b, LASX, gvec_xx_i, MO_8, do_vbitclri)
-TRANS(xvbitclri_h, LASX, gvec_xx_i, MO_16, do_vbitclri)
-TRANS(xvbitclri_w, LASX, gvec_xx_i, MO_32, do_vbitclri)
-TRANS(xvbitclri_d, LASX, gvec_xx_i, MO_64, do_vbitclri)
-
-static void do_vbitset(unsigned vece, uint32_t vd_ofs, uint32_t vj_ofs,
- uint32_t vk_ofs, uint32_t oprsz, uint32_t maxsz)
-{
- static const TCGOpcode vecop_list[] = {
- INDEX_op_shlv_vec, 0
- };
- static const GVecGen3 op[4] = {
- {
- .fniv = gen_vbitset,
- .fno = gen_helper_vbitset_b,
- .opt_opc = vecop_list,
- .vece = MO_8
- },
- {
- .fniv = gen_vbitset,
- .fno = gen_helper_vbitset_h,
- .opt_opc = vecop_list,
- .vece = MO_16
- },
- {
- .fniv = gen_vbitset,
- .fno = gen_helper_vbitset_w,
- .opt_opc = vecop_list,
- .vece = MO_32
- },
- {
- .fniv = gen_vbitset,
- .fno = gen_helper_vbitset_d,
- .opt_opc = vecop_list,
- .vece = MO_64
- },
- };
-
- tcg_gen_gvec_3(vd_ofs, vj_ofs, vk_ofs, oprsz, maxsz, &op[vece]);
-}
-
-TRANS(vbitset_b, LSX, gvec_vvv, MO_8, do_vbitset)
-TRANS(vbitset_h, LSX, gvec_vvv, MO_16, do_vbitset)
-TRANS(vbitset_w, LSX, gvec_vvv, MO_32, do_vbitset)
-TRANS(vbitset_d, LSX, gvec_vvv, MO_64, do_vbitset)
-TRANS(xvbitset_b, LASX, gvec_xxx, MO_8, do_vbitset)
-TRANS(xvbitset_h, LASX, gvec_xxx, MO_16, do_vbitset)
-TRANS(xvbitset_w, LASX, gvec_xxx, MO_32, do_vbitset)
-TRANS(xvbitset_d, LASX, gvec_xxx, MO_64, do_vbitset)
-
-static void do_vbitseti(unsigned vece, uint32_t vd_ofs, uint32_t vj_ofs,
- int64_t imm, uint32_t oprsz, uint32_t maxsz)
-{
- static const TCGOpcode vecop_list[] = {
- INDEX_op_shli_vec, 0
- };
- static const GVecGen2i op[4] = {
- {
- .fniv = gen_vbitseti,
- .fnoi = gen_helper_vbitseti_b,
- .opt_opc = vecop_list,
- .vece = MO_8
- },
- {
- .fniv = gen_vbitseti,
- .fnoi = gen_helper_vbitseti_h,
- .opt_opc = vecop_list,
- .vece = MO_16
- },
- {
- .fniv = gen_vbitseti,
- .fnoi = gen_helper_vbitseti_w,
- .opt_opc = vecop_list,
- .vece = MO_32
- },
- {
- .fniv = gen_vbitseti,
- .fnoi = gen_helper_vbitseti_d,
- .opt_opc = vecop_list,
- .vece = MO_64
- },
- };
-
- tcg_gen_gvec_2i(vd_ofs, vj_ofs, oprsz, maxsz, imm, &op[vece]);
-}
-
-TRANS(vbitseti_b, LSX, gvec_vv_i, MO_8, do_vbitseti)
-TRANS(vbitseti_h, LSX, gvec_vv_i, MO_16, do_vbitseti)
-TRANS(vbitseti_w, LSX, gvec_vv_i, MO_32, do_vbitseti)
-TRANS(vbitseti_d, LSX, gvec_vv_i, MO_64, do_vbitseti)
-TRANS(xvbitseti_b, LASX, gvec_xx_i, MO_8, do_vbitseti)
-TRANS(xvbitseti_h, LASX, gvec_xx_i, MO_16, do_vbitseti)
-TRANS(xvbitseti_w, LASX, gvec_xx_i, MO_32, do_vbitseti)
-TRANS(xvbitseti_d, LASX, gvec_xx_i, MO_64, do_vbitseti)
-
-static void do_vbitrev(unsigned vece, uint32_t vd_ofs, uint32_t vj_ofs,
- uint32_t vk_ofs, uint32_t oprsz, uint32_t maxsz)
-{
- static const TCGOpcode vecop_list[] = {
- INDEX_op_shlv_vec, 0
- };
- static const GVecGen3 op[4] = {
- {
- .fniv = gen_vbitrev,
- .fno = gen_helper_vbitrev_b,
- .opt_opc = vecop_list,
- .vece = MO_8
- },
- {
- .fniv = gen_vbitrev,
- .fno = gen_helper_vbitrev_h,
- .opt_opc = vecop_list,
- .vece = MO_16
- },
- {
- .fniv = gen_vbitrev,
- .fno = gen_helper_vbitrev_w,
- .opt_opc = vecop_list,
- .vece = MO_32
- },
- {
- .fniv = gen_vbitrev,
- .fno = gen_helper_vbitrev_d,
- .opt_opc = vecop_list,
- .vece = MO_64
- },
- };
-
- tcg_gen_gvec_3(vd_ofs, vj_ofs, vk_ofs, oprsz, maxsz, &op[vece]);
-}
-
-TRANS(vbitrev_b, LSX, gvec_vvv, MO_8, do_vbitrev)
-TRANS(vbitrev_h, LSX, gvec_vvv, MO_16, do_vbitrev)
-TRANS(vbitrev_w, LSX, gvec_vvv, MO_32, do_vbitrev)
-TRANS(vbitrev_d, LSX, gvec_vvv, MO_64, do_vbitrev)
-TRANS(xvbitrev_b, LASX, gvec_xxx, MO_8, do_vbitrev)
-TRANS(xvbitrev_h, LASX, gvec_xxx, MO_16, do_vbitrev)
-TRANS(xvbitrev_w, LASX, gvec_xxx, MO_32, do_vbitrev)
-TRANS(xvbitrev_d, LASX, gvec_xxx, MO_64, do_vbitrev)
-
-static void do_vbitrevi(unsigned vece, uint32_t vd_ofs, uint32_t vj_ofs,
- int64_t imm, uint32_t oprsz, uint32_t maxsz)
-{
- static const TCGOpcode vecop_list[] = {
- INDEX_op_shli_vec, 0
- };
- static const GVecGen2i op[4] = {
- {
- .fniv = gen_vbitrevi,
- .fnoi = gen_helper_vbitrevi_b,
- .opt_opc = vecop_list,
- .vece = MO_8
- },
- {
- .fniv = gen_vbitrevi,
- .fnoi = gen_helper_vbitrevi_h,
- .opt_opc = vecop_list,
- .vece = MO_16
- },
- {
- .fniv = gen_vbitrevi,
- .fnoi = gen_helper_vbitrevi_w,
- .opt_opc = vecop_list,
- .vece = MO_32
- },
- {
- .fniv = gen_vbitrevi,
- .fnoi = gen_helper_vbitrevi_d,
- .opt_opc = vecop_list,
- .vece = MO_64
- },
- };
-
- tcg_gen_gvec_2i(vd_ofs, vj_ofs, oprsz, maxsz, imm, &op[vece]);
-}
-
-TRANS(vbitrevi_b, LSX, gvec_vv_i, MO_8, do_vbitrevi)
-TRANS(vbitrevi_h, LSX, gvec_vv_i, MO_16, do_vbitrevi)
-TRANS(vbitrevi_w, LSX, gvec_vv_i, MO_32, do_vbitrevi)
-TRANS(vbitrevi_d, LSX, gvec_vv_i, MO_64, do_vbitrevi)
-TRANS(xvbitrevi_b, LASX, gvec_xx_i, MO_8, do_vbitrevi)
-TRANS(xvbitrevi_h, LASX, gvec_xx_i, MO_16, do_vbitrevi)
-TRANS(xvbitrevi_w, LASX, gvec_xx_i, MO_32, do_vbitrevi)
-TRANS(xvbitrevi_d, LASX, gvec_xx_i, MO_64, do_vbitrevi)
-
-TRANS(vfrstp_b, LSX, gen_vvv, gen_helper_vfrstp_b)
-TRANS(vfrstp_h, LSX, gen_vvv, gen_helper_vfrstp_h)
-TRANS(vfrstpi_b, LSX, gen_vv_i, gen_helper_vfrstpi_b)
-TRANS(vfrstpi_h, LSX, gen_vv_i, gen_helper_vfrstpi_h)
-TRANS(xvfrstp_b, LASX, gen_xxx, gen_helper_vfrstp_b)
-TRANS(xvfrstp_h, LASX, gen_xxx, gen_helper_vfrstp_h)
-TRANS(xvfrstpi_b, LASX, gen_xx_i, gen_helper_vfrstpi_b)
-TRANS(xvfrstpi_h, LASX, gen_xx_i, gen_helper_vfrstpi_h)
-
-TRANS(vfadd_s, LSX, gen_vvv_ptr, gen_helper_vfadd_s)
-TRANS(vfadd_d, LSX, gen_vvv_ptr, gen_helper_vfadd_d)
-TRANS(vfsub_s, LSX, gen_vvv_ptr, gen_helper_vfsub_s)
-TRANS(vfsub_d, LSX, gen_vvv_ptr, gen_helper_vfsub_d)
-TRANS(vfmul_s, LSX, gen_vvv_ptr, gen_helper_vfmul_s)
-TRANS(vfmul_d, LSX, gen_vvv_ptr, gen_helper_vfmul_d)
-TRANS(vfdiv_s, LSX, gen_vvv_ptr, gen_helper_vfdiv_s)
-TRANS(vfdiv_d, LSX, gen_vvv_ptr, gen_helper_vfdiv_d)
-TRANS(xvfadd_s, LASX, gen_xxx_ptr, gen_helper_vfadd_s)
-TRANS(xvfadd_d, LASX, gen_xxx_ptr, gen_helper_vfadd_d)
-TRANS(xvfsub_s, LASX, gen_xxx_ptr, gen_helper_vfsub_s)
-TRANS(xvfsub_d, LASX, gen_xxx_ptr, gen_helper_vfsub_d)
-TRANS(xvfmul_s, LASX, gen_xxx_ptr, gen_helper_vfmul_s)
-TRANS(xvfmul_d, LASX, gen_xxx_ptr, gen_helper_vfmul_d)
-TRANS(xvfdiv_s, LASX, gen_xxx_ptr, gen_helper_vfdiv_s)
-TRANS(xvfdiv_d, LASX, gen_xxx_ptr, gen_helper_vfdiv_d)
-
-TRANS(vfmadd_s, LSX, gen_vvvv_ptr, gen_helper_vfmadd_s)
-TRANS(vfmadd_d, LSX, gen_vvvv_ptr, gen_helper_vfmadd_d)
-TRANS(vfmsub_s, LSX, gen_vvvv_ptr, gen_helper_vfmsub_s)
-TRANS(vfmsub_d, LSX, gen_vvvv_ptr, gen_helper_vfmsub_d)
-TRANS(vfnmadd_s, LSX, gen_vvvv_ptr, gen_helper_vfnmadd_s)
-TRANS(vfnmadd_d, LSX, gen_vvvv_ptr, gen_helper_vfnmadd_d)
-TRANS(vfnmsub_s, LSX, gen_vvvv_ptr, gen_helper_vfnmsub_s)
-TRANS(vfnmsub_d, LSX, gen_vvvv_ptr, gen_helper_vfnmsub_d)
-TRANS(xvfmadd_s, LASX, gen_xxxx_ptr, gen_helper_vfmadd_s)
-TRANS(xvfmadd_d, LASX, gen_xxxx_ptr, gen_helper_vfmadd_d)
-TRANS(xvfmsub_s, LASX, gen_xxxx_ptr, gen_helper_vfmsub_s)
-TRANS(xvfmsub_d, LASX, gen_xxxx_ptr, gen_helper_vfmsub_d)
-TRANS(xvfnmadd_s, LASX, gen_xxxx_ptr, gen_helper_vfnmadd_s)
-TRANS(xvfnmadd_d, LASX, gen_xxxx_ptr, gen_helper_vfnmadd_d)
-TRANS(xvfnmsub_s, LASX, gen_xxxx_ptr, gen_helper_vfnmsub_s)
-TRANS(xvfnmsub_d, LASX, gen_xxxx_ptr, gen_helper_vfnmsub_d)
-
-TRANS(vfmax_s, LSX, gen_vvv_ptr, gen_helper_vfmax_s)
-TRANS(vfmax_d, LSX, gen_vvv_ptr, gen_helper_vfmax_d)
-TRANS(vfmin_s, LSX, gen_vvv_ptr, gen_helper_vfmin_s)
-TRANS(vfmin_d, LSX, gen_vvv_ptr, gen_helper_vfmin_d)
-TRANS(xvfmax_s, LASX, gen_xxx_ptr, gen_helper_vfmax_s)
-TRANS(xvfmax_d, LASX, gen_xxx_ptr, gen_helper_vfmax_d)
-TRANS(xvfmin_s, LASX, gen_xxx_ptr, gen_helper_vfmin_s)
-TRANS(xvfmin_d, LASX, gen_xxx_ptr, gen_helper_vfmin_d)
-
-TRANS(vfmaxa_s, LSX, gen_vvv_ptr, gen_helper_vfmaxa_s)
-TRANS(vfmaxa_d, LSX, gen_vvv_ptr, gen_helper_vfmaxa_d)
-TRANS(vfmina_s, LSX, gen_vvv_ptr, gen_helper_vfmina_s)
-TRANS(vfmina_d, LSX, gen_vvv_ptr, gen_helper_vfmina_d)
-TRANS(xvfmaxa_s, LASX, gen_xxx_ptr, gen_helper_vfmaxa_s)
-TRANS(xvfmaxa_d, LASX, gen_xxx_ptr, gen_helper_vfmaxa_d)
-TRANS(xvfmina_s, LASX, gen_xxx_ptr, gen_helper_vfmina_s)
-TRANS(xvfmina_d, LASX, gen_xxx_ptr, gen_helper_vfmina_d)
-
-TRANS(vflogb_s, LSX, gen_vv_ptr, gen_helper_vflogb_s)
-TRANS(vflogb_d, LSX, gen_vv_ptr, gen_helper_vflogb_d)
-TRANS(xvflogb_s, LASX, gen_xx_ptr, gen_helper_vflogb_s)
-TRANS(xvflogb_d, LASX, gen_xx_ptr, gen_helper_vflogb_d)
-
-TRANS(vfclass_s, LSX, gen_vv_ptr, gen_helper_vfclass_s)
-TRANS(vfclass_d, LSX, gen_vv_ptr, gen_helper_vfclass_d)
-TRANS(xvfclass_s, LASX, gen_xx_ptr, gen_helper_vfclass_s)
-TRANS(xvfclass_d, LASX, gen_xx_ptr, gen_helper_vfclass_d)
-
-TRANS(vfsqrt_s, LSX, gen_vv_ptr, gen_helper_vfsqrt_s)
-TRANS(vfsqrt_d, LSX, gen_vv_ptr, gen_helper_vfsqrt_d)
-TRANS(vfrecip_s, LSX, gen_vv_ptr, gen_helper_vfrecip_s)
-TRANS(vfrecip_d, LSX, gen_vv_ptr, gen_helper_vfrecip_d)
-TRANS(vfrsqrt_s, LSX, gen_vv_ptr, gen_helper_vfrsqrt_s)
-TRANS(vfrsqrt_d, LSX, gen_vv_ptr, gen_helper_vfrsqrt_d)
-TRANS(xvfsqrt_s, LASX, gen_xx_ptr, gen_helper_vfsqrt_s)
-TRANS(xvfsqrt_d, LASX, gen_xx_ptr, gen_helper_vfsqrt_d)
-TRANS(xvfrecip_s, LASX, gen_xx_ptr, gen_helper_vfrecip_s)
-TRANS(xvfrecip_d, LASX, gen_xx_ptr, gen_helper_vfrecip_d)
-TRANS(xvfrsqrt_s, LASX, gen_xx_ptr, gen_helper_vfrsqrt_s)
-TRANS(xvfrsqrt_d, LASX, gen_xx_ptr, gen_helper_vfrsqrt_d)
-
-TRANS(vfcvtl_s_h, LSX, gen_vv_ptr, gen_helper_vfcvtl_s_h)
-TRANS(vfcvth_s_h, LSX, gen_vv_ptr, gen_helper_vfcvth_s_h)
-TRANS(vfcvtl_d_s, LSX, gen_vv_ptr, gen_helper_vfcvtl_d_s)
-TRANS(vfcvth_d_s, LSX, gen_vv_ptr, gen_helper_vfcvth_d_s)
-TRANS(vfcvt_h_s, LSX, gen_vvv_ptr, gen_helper_vfcvt_h_s)
-TRANS(vfcvt_s_d, LSX, gen_vvv_ptr, gen_helper_vfcvt_s_d)
-TRANS(xvfcvtl_s_h, LASX, gen_xx_ptr, gen_helper_vfcvtl_s_h)
-TRANS(xvfcvth_s_h, LASX, gen_xx_ptr, gen_helper_vfcvth_s_h)
-TRANS(xvfcvtl_d_s, LASX, gen_xx_ptr, gen_helper_vfcvtl_d_s)
-TRANS(xvfcvth_d_s, LASX, gen_xx_ptr, gen_helper_vfcvth_d_s)
-TRANS(xvfcvt_h_s, LASX, gen_xxx_ptr, gen_helper_vfcvt_h_s)
-TRANS(xvfcvt_s_d, LASX, gen_xxx_ptr, gen_helper_vfcvt_s_d)
-
-TRANS(vfrintrne_s, LSX, gen_vv_ptr, gen_helper_vfrintrne_s)
-TRANS(vfrintrne_d, LSX, gen_vv_ptr, gen_helper_vfrintrne_d)
-TRANS(vfrintrz_s, LSX, gen_vv_ptr, gen_helper_vfrintrz_s)
-TRANS(vfrintrz_d, LSX, gen_vv_ptr, gen_helper_vfrintrz_d)
-TRANS(vfrintrp_s, LSX, gen_vv_ptr, gen_helper_vfrintrp_s)
-TRANS(vfrintrp_d, LSX, gen_vv_ptr, gen_helper_vfrintrp_d)
-TRANS(vfrintrm_s, LSX, gen_vv_ptr, gen_helper_vfrintrm_s)
-TRANS(vfrintrm_d, LSX, gen_vv_ptr, gen_helper_vfrintrm_d)
-TRANS(vfrint_s, LSX, gen_vv_ptr, gen_helper_vfrint_s)
-TRANS(vfrint_d, LSX, gen_vv_ptr, gen_helper_vfrint_d)
-TRANS(xvfrintrne_s, LASX, gen_xx_ptr, gen_helper_vfrintrne_s)
-TRANS(xvfrintrne_d, LASX, gen_xx_ptr, gen_helper_vfrintrne_d)
-TRANS(xvfrintrz_s, LASX, gen_xx_ptr, gen_helper_vfrintrz_s)
-TRANS(xvfrintrz_d, LASX, gen_xx_ptr, gen_helper_vfrintrz_d)
-TRANS(xvfrintrp_s, LASX, gen_xx_ptr, gen_helper_vfrintrp_s)
-TRANS(xvfrintrp_d, LASX, gen_xx_ptr, gen_helper_vfrintrp_d)
-TRANS(xvfrintrm_s, LASX, gen_xx_ptr, gen_helper_vfrintrm_s)
-TRANS(xvfrintrm_d, LASX, gen_xx_ptr, gen_helper_vfrintrm_d)
-TRANS(xvfrint_s, LASX, gen_xx_ptr, gen_helper_vfrint_s)
-TRANS(xvfrint_d, LASX, gen_xx_ptr, gen_helper_vfrint_d)
-
-TRANS(vftintrne_w_s, LSX, gen_vv_ptr, gen_helper_vftintrne_w_s)
-TRANS(vftintrne_l_d, LSX, gen_vv_ptr, gen_helper_vftintrne_l_d)
-TRANS(vftintrz_w_s, LSX, gen_vv_ptr, gen_helper_vftintrz_w_s)
-TRANS(vftintrz_l_d, LSX, gen_vv_ptr, gen_helper_vftintrz_l_d)
-TRANS(vftintrp_w_s, LSX, gen_vv_ptr, gen_helper_vftintrp_w_s)
-TRANS(vftintrp_l_d, LSX, gen_vv_ptr, gen_helper_vftintrp_l_d)
-TRANS(vftintrm_w_s, LSX, gen_vv_ptr, gen_helper_vftintrm_w_s)
-TRANS(vftintrm_l_d, LSX, gen_vv_ptr, gen_helper_vftintrm_l_d)
-TRANS(vftint_w_s, LSX, gen_vv_ptr, gen_helper_vftint_w_s)
-TRANS(vftint_l_d, LSX, gen_vv_ptr, gen_helper_vftint_l_d)
-TRANS(vftintrz_wu_s, LSX, gen_vv_ptr, gen_helper_vftintrz_wu_s)
-TRANS(vftintrz_lu_d, LSX, gen_vv_ptr, gen_helper_vftintrz_lu_d)
-TRANS(vftint_wu_s, LSX, gen_vv_ptr, gen_helper_vftint_wu_s)
-TRANS(vftint_lu_d, LSX, gen_vv_ptr, gen_helper_vftint_lu_d)
-TRANS(vftintrne_w_d, LSX, gen_vvv_ptr, gen_helper_vftintrne_w_d)
-TRANS(vftintrz_w_d, LSX, gen_vvv_ptr, gen_helper_vftintrz_w_d)
-TRANS(vftintrp_w_d, LSX, gen_vvv_ptr, gen_helper_vftintrp_w_d)
-TRANS(vftintrm_w_d, LSX, gen_vvv_ptr, gen_helper_vftintrm_w_d)
-TRANS(vftint_w_d, LSX, gen_vvv_ptr, gen_helper_vftint_w_d)
-TRANS(vftintrnel_l_s, LSX, gen_vv_ptr, gen_helper_vftintrnel_l_s)
-TRANS(vftintrneh_l_s, LSX, gen_vv_ptr, gen_helper_vftintrneh_l_s)
-TRANS(vftintrzl_l_s, LSX, gen_vv_ptr, gen_helper_vftintrzl_l_s)
-TRANS(vftintrzh_l_s, LSX, gen_vv_ptr, gen_helper_vftintrzh_l_s)
-TRANS(vftintrpl_l_s, LSX, gen_vv_ptr, gen_helper_vftintrpl_l_s)
-TRANS(vftintrph_l_s, LSX, gen_vv_ptr, gen_helper_vftintrph_l_s)
-TRANS(vftintrml_l_s, LSX, gen_vv_ptr, gen_helper_vftintrml_l_s)
-TRANS(vftintrmh_l_s, LSX, gen_vv_ptr, gen_helper_vftintrmh_l_s)
-TRANS(vftintl_l_s, LSX, gen_vv_ptr, gen_helper_vftintl_l_s)
-TRANS(vftinth_l_s, LSX, gen_vv_ptr, gen_helper_vftinth_l_s)
-TRANS(xvftintrne_w_s, LASX, gen_xx_ptr, gen_helper_vftintrne_w_s)
-TRANS(xvftintrne_l_d, LASX, gen_xx_ptr, gen_helper_vftintrne_l_d)
-TRANS(xvftintrz_w_s, LASX, gen_xx_ptr, gen_helper_vftintrz_w_s)
-TRANS(xvftintrz_l_d, LASX, gen_xx_ptr, gen_helper_vftintrz_l_d)
-TRANS(xvftintrp_w_s, LASX, gen_xx_ptr, gen_helper_vftintrp_w_s)
-TRANS(xvftintrp_l_d, LASX, gen_xx_ptr, gen_helper_vftintrp_l_d)
-TRANS(xvftintrm_w_s, LASX, gen_xx_ptr, gen_helper_vftintrm_w_s)
-TRANS(xvftintrm_l_d, LASX, gen_xx_ptr, gen_helper_vftintrm_l_d)
-TRANS(xvftint_w_s, LASX, gen_xx_ptr, gen_helper_vftint_w_s)
-TRANS(xvftint_l_d, LASX, gen_xx_ptr, gen_helper_vftint_l_d)
-TRANS(xvftintrz_wu_s, LASX, gen_xx_ptr, gen_helper_vftintrz_wu_s)
-TRANS(xvftintrz_lu_d, LASX, gen_xx_ptr, gen_helper_vftintrz_lu_d)
-TRANS(xvftint_wu_s, LASX, gen_xx_ptr, gen_helper_vftint_wu_s)
-TRANS(xvftint_lu_d, LASX, gen_xx_ptr, gen_helper_vftint_lu_d)
-TRANS(xvftintrne_w_d, LASX, gen_xxx_ptr, gen_helper_vftintrne_w_d)
-TRANS(xvftintrz_w_d, LASX, gen_xxx_ptr, gen_helper_vftintrz_w_d)
-TRANS(xvftintrp_w_d, LASX, gen_xxx_ptr, gen_helper_vftintrp_w_d)
-TRANS(xvftintrm_w_d, LASX, gen_xxx_ptr, gen_helper_vftintrm_w_d)
-TRANS(xvftint_w_d, LASX, gen_xxx_ptr, gen_helper_vftint_w_d)
-TRANS(xvftintrnel_l_s, LASX, gen_xx_ptr, gen_helper_vftintrnel_l_s)
-TRANS(xvftintrneh_l_s, LASX, gen_xx_ptr, gen_helper_vftintrneh_l_s)
-TRANS(xvftintrzl_l_s, LASX, gen_xx_ptr, gen_helper_vftintrzl_l_s)
-TRANS(xvftintrzh_l_s, LASX, gen_xx_ptr, gen_helper_vftintrzh_l_s)
-TRANS(xvftintrpl_l_s, LASX, gen_xx_ptr, gen_helper_vftintrpl_l_s)
-TRANS(xvftintrph_l_s, LASX, gen_xx_ptr, gen_helper_vftintrph_l_s)
-TRANS(xvftintrml_l_s, LASX, gen_xx_ptr, gen_helper_vftintrml_l_s)
-TRANS(xvftintrmh_l_s, LASX, gen_xx_ptr, gen_helper_vftintrmh_l_s)
-TRANS(xvftintl_l_s, LASX, gen_xx_ptr, gen_helper_vftintl_l_s)
-TRANS(xvftinth_l_s, LASX, gen_xx_ptr, gen_helper_vftinth_l_s)
-
-TRANS(vffint_s_w, LSX, gen_vv_ptr, gen_helper_vffint_s_w)
-TRANS(vffint_d_l, LSX, gen_vv_ptr, gen_helper_vffint_d_l)
-TRANS(vffint_s_wu, LSX, gen_vv_ptr, gen_helper_vffint_s_wu)
-TRANS(vffint_d_lu, LSX, gen_vv_ptr, gen_helper_vffint_d_lu)
-TRANS(vffintl_d_w, LSX, gen_vv_ptr, gen_helper_vffintl_d_w)
-TRANS(vffinth_d_w, LSX, gen_vv_ptr, gen_helper_vffinth_d_w)
-TRANS(vffint_s_l, LSX, gen_vvv_ptr, gen_helper_vffint_s_l)
-TRANS(xvffint_s_w, LASX, gen_xx_ptr, gen_helper_vffint_s_w)
-TRANS(xvffint_d_l, LASX, gen_xx_ptr, gen_helper_vffint_d_l)
-TRANS(xvffint_s_wu, LASX, gen_xx_ptr, gen_helper_vffint_s_wu)
-TRANS(xvffint_d_lu, LASX, gen_xx_ptr, gen_helper_vffint_d_lu)
-TRANS(xvffintl_d_w, LASX, gen_xx_ptr, gen_helper_vffintl_d_w)
-TRANS(xvffinth_d_w, LASX, gen_xx_ptr, gen_helper_vffinth_d_w)
-TRANS(xvffint_s_l, LASX, gen_xxx_ptr, gen_helper_vffint_s_l)
-
-static bool do_cmp_vl(DisasContext *ctx, arg_vvv *a,
- uint32_t oprsz, MemOp mop, TCGCond cond)
-{
- uint32_t vd_ofs, vj_ofs, vk_ofs;
-
- if (!check_vec(ctx, oprsz)) {
- return true;
- }
-
- vd_ofs = vec_full_offset(a->vd);
- vj_ofs = vec_full_offset(a->vj);
- vk_ofs = vec_full_offset(a->vk);
-
- tcg_gen_gvec_cmp(cond, mop, vd_ofs, vj_ofs, vk_ofs, oprsz, ctx->vl / 8);
- return true;
-}
-
-static bool do_cmp(DisasContext *ctx, arg_vvv *a,
- MemOp mop, TCGCond cond)
-{
- return do_cmp_vl(ctx, a, 16, mop, cond);
-}
-
-static bool do_xcmp(DisasContext *ctx, arg_vvv *a,
- MemOp mop, TCGCond cond)
-{
- return do_cmp_vl(ctx, a, 32, mop, cond);
-}
-
-static bool do_cmpi_vl(DisasContext *ctx, arg_vv_i *a,
- uint32_t oprsz, MemOp mop, TCGCond cond)
-{
- uint32_t vd_ofs, vj_ofs;
-
- if (!check_vec(ctx, oprsz)) {
- return true;
- }
-
- vd_ofs = vec_full_offset(a->vd);
- vj_ofs = vec_full_offset(a->vj);
-
- tcg_gen_gvec_cmpi(cond, mop, vd_ofs, vj_ofs, a->imm, oprsz, ctx->vl / 8);
- return true;
-}
-
-static bool do_cmpi(DisasContext *ctx, arg_vv_i *a,
- MemOp mop, TCGCond cond)
-{
- return do_cmpi_vl(ctx, a, 16, mop, cond);
-}
-
-static bool do_xcmpi(DisasContext *ctx, arg_vv_i *a,
- MemOp mop, TCGCond cond)
-{
- return do_cmpi_vl(ctx, a, 32, mop, cond);
-}
-
-TRANS(vseq_b, LSX, do_cmp, MO_8, TCG_COND_EQ)
-TRANS(vseq_h, LSX, do_cmp, MO_16, TCG_COND_EQ)
-TRANS(vseq_w, LSX, do_cmp, MO_32, TCG_COND_EQ)
-TRANS(vseq_d, LSX, do_cmp, MO_64, TCG_COND_EQ)
-TRANS(vseqi_b, LSX, do_cmpi, MO_8, TCG_COND_EQ)
-TRANS(vseqi_h, LSX, do_cmpi, MO_16, TCG_COND_EQ)
-TRANS(vseqi_w, LSX, do_cmpi, MO_32, TCG_COND_EQ)
-TRANS(vseqi_d, LSX, do_cmpi, MO_64, TCG_COND_EQ)
-TRANS(xvseq_b, LASX, do_xcmp, MO_8, TCG_COND_EQ)
-TRANS(xvseq_h, LASX, do_xcmp, MO_16, TCG_COND_EQ)
-TRANS(xvseq_w, LASX, do_xcmp, MO_32, TCG_COND_EQ)
-TRANS(xvseq_d, LASX, do_xcmp, MO_64, TCG_COND_EQ)
-TRANS(xvseqi_b, LASX, do_xcmpi, MO_8, TCG_COND_EQ)
-TRANS(xvseqi_h, LASX, do_xcmpi, MO_16, TCG_COND_EQ)
-TRANS(xvseqi_w, LASX, do_xcmpi, MO_32, TCG_COND_EQ)
-TRANS(xvseqi_d, LASX, do_xcmpi, MO_64, TCG_COND_EQ)
-
-TRANS(vsle_b, LSX, do_cmp, MO_8, TCG_COND_LE)
-TRANS(vsle_h, LSX, do_cmp, MO_16, TCG_COND_LE)
-TRANS(vsle_w, LSX, do_cmp, MO_32, TCG_COND_LE)
-TRANS(vsle_d, LSX, do_cmp, MO_64, TCG_COND_LE)
-TRANS(vslei_b, LSX, do_cmpi, MO_8, TCG_COND_LE)
-TRANS(vslei_h, LSX, do_cmpi, MO_16, TCG_COND_LE)
-TRANS(vslei_w, LSX, do_cmpi, MO_32, TCG_COND_LE)
-TRANS(vslei_d, LSX, do_cmpi, MO_64, TCG_COND_LE)
-TRANS(vsle_bu, LSX, do_cmp, MO_8, TCG_COND_LEU)
-TRANS(vsle_hu, LSX, do_cmp, MO_16, TCG_COND_LEU)
-TRANS(vsle_wu, LSX, do_cmp, MO_32, TCG_COND_LEU)
-TRANS(vsle_du, LSX, do_cmp, MO_64, TCG_COND_LEU)
-TRANS(vslei_bu, LSX, do_cmpi, MO_8, TCG_COND_LEU)
-TRANS(vslei_hu, LSX, do_cmpi, MO_16, TCG_COND_LEU)
-TRANS(vslei_wu, LSX, do_cmpi, MO_32, TCG_COND_LEU)
-TRANS(vslei_du, LSX, do_cmpi, MO_64, TCG_COND_LEU)
-TRANS(xvsle_b, LASX, do_xcmp, MO_8, TCG_COND_LE)
-TRANS(xvsle_h, LASX, do_xcmp, MO_16, TCG_COND_LE)
-TRANS(xvsle_w, LASX, do_xcmp, MO_32, TCG_COND_LE)
-TRANS(xvsle_d, LASX, do_xcmp, MO_64, TCG_COND_LE)
-TRANS(xvslei_b, LASX, do_xcmpi, MO_8, TCG_COND_LE)
-TRANS(xvslei_h, LASX, do_xcmpi, MO_16, TCG_COND_LE)
-TRANS(xvslei_w, LASX, do_xcmpi, MO_32, TCG_COND_LE)
-TRANS(xvslei_d, LASX, do_xcmpi, MO_64, TCG_COND_LE)
-TRANS(xvsle_bu, LASX, do_xcmp, MO_8, TCG_COND_LEU)
-TRANS(xvsle_hu, LASX, do_xcmp, MO_16, TCG_COND_LEU)
-TRANS(xvsle_wu, LASX, do_xcmp, MO_32, TCG_COND_LEU)
-TRANS(xvsle_du, LASX, do_xcmp, MO_64, TCG_COND_LEU)
-TRANS(xvslei_bu, LASX, do_xcmpi, MO_8, TCG_COND_LEU)
-TRANS(xvslei_hu, LASX, do_xcmpi, MO_16, TCG_COND_LEU)
-TRANS(xvslei_wu, LASX, do_xcmpi, MO_32, TCG_COND_LEU)
-TRANS(xvslei_du, LASX, do_xcmpi, MO_64, TCG_COND_LEU)
-
-TRANS(vslt_b, LSX, do_cmp, MO_8, TCG_COND_LT)
-TRANS(vslt_h, LSX, do_cmp, MO_16, TCG_COND_LT)
-TRANS(vslt_w, LSX, do_cmp, MO_32, TCG_COND_LT)
-TRANS(vslt_d, LSX, do_cmp, MO_64, TCG_COND_LT)
-TRANS(vslti_b, LSX, do_cmpi, MO_8, TCG_COND_LT)
-TRANS(vslti_h, LSX, do_cmpi, MO_16, TCG_COND_LT)
-TRANS(vslti_w, LSX, do_cmpi, MO_32, TCG_COND_LT)
-TRANS(vslti_d, LSX, do_cmpi, MO_64, TCG_COND_LT)
-TRANS(vslt_bu, LSX, do_cmp, MO_8, TCG_COND_LTU)
-TRANS(vslt_hu, LSX, do_cmp, MO_16, TCG_COND_LTU)
-TRANS(vslt_wu, LSX, do_cmp, MO_32, TCG_COND_LTU)
-TRANS(vslt_du, LSX, do_cmp, MO_64, TCG_COND_LTU)
-TRANS(vslti_bu, LSX, do_cmpi, MO_8, TCG_COND_LTU)
-TRANS(vslti_hu, LSX, do_cmpi, MO_16, TCG_COND_LTU)
-TRANS(vslti_wu, LSX, do_cmpi, MO_32, TCG_COND_LTU)
-TRANS(vslti_du, LSX, do_cmpi, MO_64, TCG_COND_LTU)
-TRANS(xvslt_b, LASX, do_xcmp, MO_8, TCG_COND_LT)
-TRANS(xvslt_h, LASX, do_xcmp, MO_16, TCG_COND_LT)
-TRANS(xvslt_w, LASX, do_xcmp, MO_32, TCG_COND_LT)
-TRANS(xvslt_d, LASX, do_xcmp, MO_64, TCG_COND_LT)
-TRANS(xvslti_b, LASX, do_xcmpi, MO_8, TCG_COND_LT)
-TRANS(xvslti_h, LASX, do_xcmpi, MO_16, TCG_COND_LT)
-TRANS(xvslti_w, LASX, do_xcmpi, MO_32, TCG_COND_LT)
-TRANS(xvslti_d, LASX, do_xcmpi, MO_64, TCG_COND_LT)
-TRANS(xvslt_bu, LASX, do_xcmp, MO_8, TCG_COND_LTU)
-TRANS(xvslt_hu, LASX, do_xcmp, MO_16, TCG_COND_LTU)
-TRANS(xvslt_wu, LASX, do_xcmp, MO_32, TCG_COND_LTU)
-TRANS(xvslt_du, LASX, do_xcmp, MO_64, TCG_COND_LTU)
-TRANS(xvslti_bu, LASX, do_xcmpi, MO_8, TCG_COND_LTU)
-TRANS(xvslti_hu, LASX, do_xcmpi, MO_16, TCG_COND_LTU)
-TRANS(xvslti_wu, LASX, do_xcmpi, MO_32, TCG_COND_LTU)
-TRANS(xvslti_du, LASX, do_xcmpi, MO_64, TCG_COND_LTU)
-
-static bool do_vfcmp_cond_s(DisasContext *ctx, arg_vvv_fcond *a, uint32_t sz)
-{
- uint32_t flags;
- void (*fn)(TCGv_env, TCGv_i32, TCGv_i32, TCGv_i32, TCGv_i32, TCGv_i32);
- TCGv_i32 vd = tcg_constant_i32(a->vd);
- TCGv_i32 vj = tcg_constant_i32(a->vj);
- TCGv_i32 vk = tcg_constant_i32(a->vk);
- TCGv_i32 oprsz = tcg_constant_i32(sz);
-
- if (!check_vec(ctx, sz)) {
- return true;
- }
-
- fn = (a->fcond & 1 ? gen_helper_vfcmp_s_s : gen_helper_vfcmp_c_s);
- flags = get_fcmp_flags(a->fcond >> 1);
- fn(tcg_env, oprsz, vd, vj, vk, tcg_constant_i32(flags));
-
- return true;
-}
-
-static bool do_vfcmp_cond_d(DisasContext *ctx, arg_vvv_fcond *a, uint32_t sz)
-{
- uint32_t flags;
- void (*fn)(TCGv_env, TCGv_i32, TCGv_i32, TCGv_i32, TCGv_i32, TCGv_i32);
- TCGv_i32 vd = tcg_constant_i32(a->vd);
- TCGv_i32 vj = tcg_constant_i32(a->vj);
- TCGv_i32 vk = tcg_constant_i32(a->vk);
- TCGv_i32 oprsz = tcg_constant_i32(sz);
-
- if (!check_vec(ctx, sz)) {
- return true;
- }
-
- fn = (a->fcond & 1 ? gen_helper_vfcmp_s_d : gen_helper_vfcmp_c_d);
- flags = get_fcmp_flags(a->fcond >> 1);
- fn(tcg_env, oprsz, vd, vj, vk, tcg_constant_i32(flags));
-
- return true;
-}
-
-TRANS(vfcmp_cond_s, LSX, do_vfcmp_cond_s, 16)
-TRANS(vfcmp_cond_d, LSX, do_vfcmp_cond_d, 16)
-TRANS(xvfcmp_cond_s, LASX, do_vfcmp_cond_s, 32)
-TRANS(xvfcmp_cond_d, LASX, do_vfcmp_cond_d, 32)
-
-static bool do_vbitsel_v(DisasContext *ctx, arg_vvvv *a, uint32_t oprsz)
-{
- if (!check_vec(ctx, oprsz)) {
- return true;
- }
-
- tcg_gen_gvec_bitsel(MO_64, vec_full_offset(a->vd), vec_full_offset(a->va),
- vec_full_offset(a->vk), vec_full_offset(a->vj),
- oprsz, ctx->vl / 8);
- return true;
-}
-
-TRANS(vbitsel_v, LSX, do_vbitsel_v, 16)
-TRANS(xvbitsel_v, LASX, do_vbitsel_v, 32)
-
-static void gen_vbitseli(unsigned vece, TCGv_vec a, TCGv_vec b, int64_t imm)
-{
- tcg_gen_bitsel_vec(vece, a, a, tcg_constant_vec_matching(a, vece, imm), b);
-}
-
-static bool do_vbitseli_b(DisasContext *ctx, arg_vv_i *a, uint32_t oprsz)
-{
- static const GVecGen2i op = {
- .fniv = gen_vbitseli,
- .fnoi = gen_helper_vbitseli_b,
- .vece = MO_8,
- .load_dest = true
- };
-
- if (!check_vec(ctx, oprsz)) {
- return true;
- }
-
- tcg_gen_gvec_2i(vec_full_offset(a->vd), vec_full_offset(a->vj),
- oprsz, ctx->vl / 8, a->imm , &op);
- return true;
-}
-
-TRANS(vbitseli_b, LSX, do_vbitseli_b, 16)
-TRANS(xvbitseli_b, LASX, do_vbitseli_b, 32)
-
-#define VSET(NAME, COND) \
-static bool trans_## NAME (DisasContext *ctx, arg_cv *a) \
-{ \
- TCGv_i64 t1, al, ah; \
- \
- al = tcg_temp_new_i64(); \
- ah = tcg_temp_new_i64(); \
- t1 = tcg_temp_new_i64(); \
- \
- get_vreg64(ah, a->vj, 1); \
- get_vreg64(al, a->vj, 0); \
- \
- if (!avail_LSX(ctx)) { \
- return false; \
- } \
- \
- if (!check_vec(ctx, 16)) { \
- return true; \
- } \
- \
- tcg_gen_or_i64(t1, al, ah); \
- tcg_gen_setcondi_i64(COND, t1, t1, 0); \
- tcg_gen_st8_tl(t1, tcg_env, offsetof(CPULoongArchState, cf[a->cd & 0x7])); \
- \
- return true; \
-}
-
-VSET(vseteqz_v, TCG_COND_EQ)
-VSET(vsetnez_v, TCG_COND_NE)
-
-TRANS(vsetanyeqz_b, LSX, gen_cv, gen_helper_vsetanyeqz_b)
-TRANS(vsetanyeqz_h, LSX, gen_cv, gen_helper_vsetanyeqz_h)
-TRANS(vsetanyeqz_w, LSX, gen_cv, gen_helper_vsetanyeqz_w)
-TRANS(vsetanyeqz_d, LSX, gen_cv, gen_helper_vsetanyeqz_d)
-TRANS(vsetallnez_b, LSX, gen_cv, gen_helper_vsetallnez_b)
-TRANS(vsetallnez_h, LSX, gen_cv, gen_helper_vsetallnez_h)
-TRANS(vsetallnez_w, LSX, gen_cv, gen_helper_vsetallnez_w)
-TRANS(vsetallnez_d, LSX, gen_cv, gen_helper_vsetallnez_d)
-
-#define XVSET(NAME, COND) \
-static bool trans_## NAME(DisasContext *ctx, arg_cv * a) \
-{ \
- TCGv_i64 t1, t2, d[4]; \
- \
- d[0] = tcg_temp_new_i64(); \
- d[1] = tcg_temp_new_i64(); \
- d[2] = tcg_temp_new_i64(); \
- d[3] = tcg_temp_new_i64(); \
- t1 = tcg_temp_new_i64(); \
- t2 = tcg_temp_new_i64(); \
- \
- get_vreg64(d[0], a->vj, 0); \
- get_vreg64(d[1], a->vj, 1); \
- get_vreg64(d[2], a->vj, 2); \
- get_vreg64(d[3], a->vj, 3); \
- \
- if (!avail_LASX(ctx)) { \
- return false; \
- } \
- \
- if (!check_vec(ctx, 32)) { \
- return true; \
- } \
- \
- tcg_gen_or_i64(t1, d[0], d[1]); \
- tcg_gen_or_i64(t2, d[2], d[3]); \
- tcg_gen_or_i64(t1, t2, t1); \
- tcg_gen_setcondi_i64(COND, t1, t1, 0); \
- tcg_gen_st8_tl(t1, tcg_env, offsetof(CPULoongArchState, cf[a->cd & 0x7])); \
- \
- return true; \
-}
-
-XVSET(xvseteqz_v, TCG_COND_EQ)
-XVSET(xvsetnez_v, TCG_COND_NE)
-
-TRANS(xvsetanyeqz_b, LASX, gen_cx, gen_helper_vsetanyeqz_b)
-TRANS(xvsetanyeqz_h, LASX, gen_cx, gen_helper_vsetanyeqz_h)
-TRANS(xvsetanyeqz_w, LASX, gen_cx, gen_helper_vsetanyeqz_w)
-TRANS(xvsetanyeqz_d, LASX, gen_cx, gen_helper_vsetanyeqz_d)
-TRANS(xvsetallnez_b, LASX, gen_cx, gen_helper_vsetallnez_b)
-TRANS(xvsetallnez_h, LASX, gen_cx, gen_helper_vsetallnez_h)
-TRANS(xvsetallnez_w, LASX, gen_cx, gen_helper_vsetallnez_w)
-TRANS(xvsetallnez_d, LASX, gen_cx, gen_helper_vsetallnez_d)
-
-static bool gen_g2v_vl(DisasContext *ctx, arg_vr_i *a, uint32_t oprsz, MemOp mop,
- void (*func)(TCGv, TCGv_ptr, tcg_target_long))
-{
- TCGv src = gpr_src(ctx, a->rj, EXT_NONE);
-
- if (!check_vec(ctx, oprsz)) {
- return true;
- }
-
- func(src, tcg_env, vec_reg_offset(a->vd, a->imm, mop));
-
- return true;
-}
-
-static bool gen_g2v(DisasContext *ctx, arg_vr_i *a, MemOp mop,
- void (*func)(TCGv, TCGv_ptr, tcg_target_long))
-{
- return gen_g2v_vl(ctx, a, 16, mop, func);
-}
-
-static bool gen_g2x(DisasContext *ctx, arg_vr_i *a, MemOp mop,
- void (*func)(TCGv, TCGv_ptr, tcg_target_long))
-{
- return gen_g2v_vl(ctx, a, 32, mop, func);
-}
-
-TRANS(vinsgr2vr_b, LSX, gen_g2v, MO_8, tcg_gen_st8_i64)
-TRANS(vinsgr2vr_h, LSX, gen_g2v, MO_16, tcg_gen_st16_i64)
-TRANS(vinsgr2vr_w, LSX, gen_g2v, MO_32, tcg_gen_st32_i64)
-TRANS(vinsgr2vr_d, LSX, gen_g2v, MO_64, tcg_gen_st_i64)
-TRANS(xvinsgr2vr_w, LASX, gen_g2x, MO_32, tcg_gen_st32_i64)
-TRANS(xvinsgr2vr_d, LASX, gen_g2x, MO_64, tcg_gen_st_i64)
-
-static bool gen_v2g_vl(DisasContext *ctx, arg_rv_i *a, uint32_t oprsz, MemOp mop,
- void (*func)(TCGv, TCGv_ptr, tcg_target_long))
-{
- TCGv dst = gpr_dst(ctx, a->rd, EXT_NONE);
-
- if (!check_vec(ctx, oprsz)) {
- return true;
- }
-
- func(dst, tcg_env, vec_reg_offset(a->vj, a->imm, mop));
-
- return true;
-}
-
-static bool gen_v2g(DisasContext *ctx, arg_rv_i *a, MemOp mop,
- void (*func)(TCGv, TCGv_ptr, tcg_target_long))
-{
- return gen_v2g_vl(ctx, a, 16, mop, func);
-}
-
-static bool gen_x2g(DisasContext *ctx, arg_rv_i *a, MemOp mop,
- void (*func)(TCGv, TCGv_ptr, tcg_target_long))
-{
- return gen_v2g_vl(ctx, a, 32, mop, func);
-}
-
-TRANS(vpickve2gr_b, LSX, gen_v2g, MO_8, tcg_gen_ld8s_i64)
-TRANS(vpickve2gr_h, LSX, gen_v2g, MO_16, tcg_gen_ld16s_i64)
-TRANS(vpickve2gr_w, LSX, gen_v2g, MO_32, tcg_gen_ld32s_i64)
-TRANS(vpickve2gr_d, LSX, gen_v2g, MO_64, tcg_gen_ld_i64)
-TRANS(vpickve2gr_bu, LSX, gen_v2g, MO_8, tcg_gen_ld8u_i64)
-TRANS(vpickve2gr_hu, LSX, gen_v2g, MO_16, tcg_gen_ld16u_i64)
-TRANS(vpickve2gr_wu, LSX, gen_v2g, MO_32, tcg_gen_ld32u_i64)
-TRANS(vpickve2gr_du, LSX, gen_v2g, MO_64, tcg_gen_ld_i64)
-TRANS(xvpickve2gr_w, LASX, gen_x2g, MO_32, tcg_gen_ld32s_i64)
-TRANS(xvpickve2gr_d, LASX, gen_x2g, MO_64, tcg_gen_ld_i64)
-TRANS(xvpickve2gr_wu, LASX, gen_x2g, MO_32, tcg_gen_ld32u_i64)
-TRANS(xvpickve2gr_du, LASX, gen_x2g, MO_64, tcg_gen_ld_i64)
-
-static bool gvec_dup_vl(DisasContext *ctx, arg_vr *a,
- uint32_t oprsz, MemOp mop)
-{
- TCGv src = gpr_src(ctx, a->rj, EXT_NONE);
-
- if (!check_vec(ctx, oprsz)) {
- return true;
- }
-
- tcg_gen_gvec_dup_i64(mop, vec_full_offset(a->vd),
- oprsz, ctx->vl/8, src);
- return true;
-}
-
-static bool gvec_dup(DisasContext *ctx, arg_vr *a, MemOp mop)
-{
- return gvec_dup_vl(ctx, a, 16, mop);
-}
-
-static bool gvec_dupx(DisasContext *ctx, arg_vr *a, MemOp mop)
-{
- return gvec_dup_vl(ctx, a, 32, mop);
-}
-
-TRANS(vreplgr2vr_b, LSX, gvec_dup, MO_8)
-TRANS(vreplgr2vr_h, LSX, gvec_dup, MO_16)
-TRANS(vreplgr2vr_w, LSX, gvec_dup, MO_32)
-TRANS(vreplgr2vr_d, LSX, gvec_dup, MO_64)
-TRANS(xvreplgr2vr_b, LASX, gvec_dupx, MO_8)
-TRANS(xvreplgr2vr_h, LASX, gvec_dupx, MO_16)
-TRANS(xvreplgr2vr_w, LASX, gvec_dupx, MO_32)
-TRANS(xvreplgr2vr_d, LASX, gvec_dupx, MO_64)
-
-static bool trans_vreplvei_b(DisasContext *ctx, arg_vv_i *a)
-{
- if (!avail_LSX(ctx)) {
- return false;
- }
-
- if (!check_vec(ctx, 16)) {
- return true;
- }
-
- tcg_gen_gvec_dup_mem(MO_8,vec_full_offset(a->vd),
- offsetof(CPULoongArchState,
- fpr[a->vj].vreg.B((a->imm))),
- 16, ctx->vl/8);
- return true;
-}
-
-static bool trans_vreplvei_h(DisasContext *ctx, arg_vv_i *a)
-{
- if (!avail_LSX(ctx)) {
- return false;
- }
-
- if (!check_vec(ctx, 16)) {
- return true;
- }
-
- tcg_gen_gvec_dup_mem(MO_16, vec_full_offset(a->vd),
- offsetof(CPULoongArchState,
- fpr[a->vj].vreg.H((a->imm))),
- 16, ctx->vl/8);
- return true;
-}
-static bool trans_vreplvei_w(DisasContext *ctx, arg_vv_i *a)
-{
- if (!avail_LSX(ctx)) {
- return false;
- }
-
- if (!check_vec(ctx, 16)) {
- return true;
- }
-
- tcg_gen_gvec_dup_mem(MO_32, vec_full_offset(a->vd),
- offsetof(CPULoongArchState,
- fpr[a->vj].vreg.W((a->imm))),
- 16, ctx->vl/8);
- return true;
-}
-static bool trans_vreplvei_d(DisasContext *ctx, arg_vv_i *a)
-{
- if (!avail_LSX(ctx)) {
- return false;
- }
-
- if (!check_vec(ctx, 16)) {
- return true;
- }
-
- tcg_gen_gvec_dup_mem(MO_64, vec_full_offset(a->vd),
- offsetof(CPULoongArchState,
- fpr[a->vj].vreg.D((a->imm))),
- 16, ctx->vl/8);
- return true;
-}
-
-static bool gen_vreplve_vl(DisasContext *ctx, arg_vvr *a,
- uint32_t oprsz, int vece, int bit,
- void (*func)(TCGv_i64, TCGv_ptr, tcg_target_long))
-{
- int i;
- TCGv_i64 t0 = tcg_temp_new_i64();
- TCGv_ptr t1 = tcg_temp_new_ptr();
- TCGv_i64 t2 = tcg_temp_new_i64();
-
- if (!check_vec(ctx, oprsz)) {
- return true;
- }
-
- tcg_gen_andi_i64(t0, gpr_src(ctx, a->rk, EXT_NONE), (LSX_LEN / bit) - 1);
- tcg_gen_shli_i64(t0, t0, vece);
- if (HOST_BIG_ENDIAN) {
- tcg_gen_xori_i64(t0, t0, vece << ((LSX_LEN / bit) - 1));
- }
-
- tcg_gen_trunc_i64_ptr(t1, t0);
- tcg_gen_add_ptr(t1, t1, tcg_env);
-
- for (i = 0; i < oprsz; i += 16) {
- func(t2, t1, vec_full_offset(a->vj) + i);
- tcg_gen_gvec_dup_i64(vece, vec_full_offset(a->vd) + i, 16, 16, t2);
- }
-
- return true;
-}
-
-static bool gen_vreplve(DisasContext *ctx, arg_vvr *a, int vece, int bit,
- void (*func)(TCGv_i64, TCGv_ptr, tcg_target_long))
-{
- return gen_vreplve_vl(ctx, a, 16, vece, bit, func);
-}
-
-static bool gen_xvreplve(DisasContext *ctx, arg_vvr *a, int vece, int bit,
- void (*func)(TCGv_i64, TCGv_ptr, tcg_target_long))
-{
- return gen_vreplve_vl(ctx, a, 32, vece, bit, func);
-}
-
-TRANS(vreplve_b, LSX, gen_vreplve, MO_8, 8, tcg_gen_ld8u_i64)
-TRANS(vreplve_h, LSX, gen_vreplve, MO_16, 16, tcg_gen_ld16u_i64)
-TRANS(vreplve_w, LSX, gen_vreplve, MO_32, 32, tcg_gen_ld32u_i64)
-TRANS(vreplve_d, LSX, gen_vreplve, MO_64, 64, tcg_gen_ld_i64)
-TRANS(xvreplve_b, LASX, gen_xvreplve, MO_8, 8, tcg_gen_ld8u_i64)
-TRANS(xvreplve_h, LASX, gen_xvreplve, MO_16, 16, tcg_gen_ld16u_i64)
-TRANS(xvreplve_w, LASX, gen_xvreplve, MO_32, 32, tcg_gen_ld32u_i64)
-TRANS(xvreplve_d, LASX, gen_xvreplve, MO_64, 64, tcg_gen_ld_i64)
-
-static bool gen_xvrepl128(DisasContext *ctx, arg_vv_i *a, MemOp mop)
-{
- int i;
-
- if (!check_vec(ctx, 32)) {
- return true;
- }
-
- for (i = 0; i < 32; i += 16) {
- tcg_gen_gvec_dup_mem(mop, vec_full_offset(a->vd) + i,
- vec_reg_offset(a->vj, a->imm, mop) + i, 16, 16);
-
- }
- return true;
-}
-
-TRANS(xvrepl128vei_b, LASX, gen_xvrepl128, MO_8)
-TRANS(xvrepl128vei_h, LASX, gen_xvrepl128, MO_16)
-TRANS(xvrepl128vei_w, LASX, gen_xvrepl128, MO_32)
-TRANS(xvrepl128vei_d, LASX, gen_xvrepl128, MO_64)
-
-static bool gen_xvreplve0(DisasContext *ctx, arg_vv *a, MemOp mop)
-{
- if (!check_vec(ctx, 32)) {
- return true;
- }
-
- tcg_gen_gvec_dup_mem(mop, vec_full_offset(a->vd),
- vec_full_offset(a->vj), 32, 32);
- return true;
-}
-
-TRANS(xvreplve0_b, LASX, gen_xvreplve0, MO_8)
-TRANS(xvreplve0_h, LASX, gen_xvreplve0, MO_16)
-TRANS(xvreplve0_w, LASX, gen_xvreplve0, MO_32)
-TRANS(xvreplve0_d, LASX, gen_xvreplve0, MO_64)
-TRANS(xvreplve0_q, LASX, gen_xvreplve0, MO_128)
-
-TRANS(xvinsve0_w, LASX, gen_xx_i, gen_helper_xvinsve0_w)
-TRANS(xvinsve0_d, LASX, gen_xx_i, gen_helper_xvinsve0_d)
-
-TRANS(xvpickve_w, LASX, gen_xx_i, gen_helper_xvpickve_w)
-TRANS(xvpickve_d, LASX, gen_xx_i, gen_helper_xvpickve_d)
-
-static bool do_vbsll_v(DisasContext *ctx, arg_vv_i *a, uint32_t oprsz)
-{
- int i, ofs;
-
- if (!check_vec(ctx, oprsz)) {
- return true;
- }
-
- for (i = 0; i < oprsz / 16; i++) {
- TCGv desthigh = tcg_temp_new_i64();
- TCGv destlow = tcg_temp_new_i64();
- TCGv high = tcg_temp_new_i64();
- TCGv low = tcg_temp_new_i64();
-
- get_vreg64(low, a->vj, 2 * i);
-
- ofs = ((a->imm) & 0xf) * 8;
- if (ofs < 64) {
- get_vreg64(high, a->vj, 2 * i + 1);
- tcg_gen_extract2_i64(desthigh, low, high, 64 - ofs);
- tcg_gen_shli_i64(destlow, low, ofs);
- } else {
- tcg_gen_shli_i64(desthigh, low, ofs - 64);
- destlow = tcg_constant_i64(0);
- }
- set_vreg64(desthigh, a->vd, 2 * i + 1);
- set_vreg64(destlow, a->vd, 2 * i);
- }
-
- return true;
-}
-
-static bool do_vbsrl_v(DisasContext *ctx, arg_vv_i *a, uint32_t oprsz)
-{
- int i, ofs;
-
- if (!check_vec(ctx, 32)) {
- return true;
- }
-
- for (i = 0; i < oprsz / 16; i++) {
- TCGv desthigh = tcg_temp_new_i64();
- TCGv destlow = tcg_temp_new_i64();
- TCGv high = tcg_temp_new_i64();
- TCGv low = tcg_temp_new_i64();
- get_vreg64(high, a->vj, 2 * i + 1);
-
- ofs = ((a->imm) & 0xf) * 8;
- if (ofs < 64) {
- get_vreg64(low, a->vj, 2 * i);
- tcg_gen_extract2_i64(destlow, low, high, ofs);
- tcg_gen_shri_i64(desthigh, high, ofs);
- } else {
- tcg_gen_shri_i64(destlow, high, ofs - 64);
- desthigh = tcg_constant_i64(0);
- }
- set_vreg64(desthigh, a->vd, 2 * i + 1);
- set_vreg64(destlow, a->vd, 2 * i);
- }
-
- return true;
-}
-
-TRANS(vbsll_v, LSX, do_vbsll_v, 16)
-TRANS(vbsrl_v, LSX, do_vbsrl_v, 16)
-TRANS(xvbsll_v, LASX, do_vbsll_v, 32)
-TRANS(xvbsrl_v, LASX, do_vbsrl_v, 32)
-
-TRANS(vpackev_b, LSX, gen_vvv, gen_helper_vpackev_b)
-TRANS(vpackev_h, LSX, gen_vvv, gen_helper_vpackev_h)
-TRANS(vpackev_w, LSX, gen_vvv, gen_helper_vpackev_w)
-TRANS(vpackev_d, LSX, gen_vvv, gen_helper_vpackev_d)
-TRANS(vpackod_b, LSX, gen_vvv, gen_helper_vpackod_b)
-TRANS(vpackod_h, LSX, gen_vvv, gen_helper_vpackod_h)
-TRANS(vpackod_w, LSX, gen_vvv, gen_helper_vpackod_w)
-TRANS(vpackod_d, LSX, gen_vvv, gen_helper_vpackod_d)
-TRANS(xvpackev_b, LASX, gen_xxx, gen_helper_vpackev_b)
-TRANS(xvpackev_h, LASX, gen_xxx, gen_helper_vpackev_h)
-TRANS(xvpackev_w, LASX, gen_xxx, gen_helper_vpackev_w)
-TRANS(xvpackev_d, LASX, gen_xxx, gen_helper_vpackev_d)
-TRANS(xvpackod_b, LASX, gen_xxx, gen_helper_vpackod_b)
-TRANS(xvpackod_h, LASX, gen_xxx, gen_helper_vpackod_h)
-TRANS(xvpackod_w, LASX, gen_xxx, gen_helper_vpackod_w)
-TRANS(xvpackod_d, LASX, gen_xxx, gen_helper_vpackod_d)
-
-TRANS(vpickev_b, LSX, gen_vvv, gen_helper_vpickev_b)
-TRANS(vpickev_h, LSX, gen_vvv, gen_helper_vpickev_h)
-TRANS(vpickev_w, LSX, gen_vvv, gen_helper_vpickev_w)
-TRANS(vpickev_d, LSX, gen_vvv, gen_helper_vpickev_d)
-TRANS(vpickod_b, LSX, gen_vvv, gen_helper_vpickod_b)
-TRANS(vpickod_h, LSX, gen_vvv, gen_helper_vpickod_h)
-TRANS(vpickod_w, LSX, gen_vvv, gen_helper_vpickod_w)
-TRANS(vpickod_d, LSX, gen_vvv, gen_helper_vpickod_d)
-TRANS(xvpickev_b, LASX, gen_xxx, gen_helper_vpickev_b)
-TRANS(xvpickev_h, LASX, gen_xxx, gen_helper_vpickev_h)
-TRANS(xvpickev_w, LASX, gen_xxx, gen_helper_vpickev_w)
-TRANS(xvpickev_d, LASX, gen_xxx, gen_helper_vpickev_d)
-TRANS(xvpickod_b, LASX, gen_xxx, gen_helper_vpickod_b)
-TRANS(xvpickod_h, LASX, gen_xxx, gen_helper_vpickod_h)
-TRANS(xvpickod_w, LASX, gen_xxx, gen_helper_vpickod_w)
-TRANS(xvpickod_d, LASX, gen_xxx, gen_helper_vpickod_d)
-
-TRANS(vilvl_b, LSX, gen_vvv, gen_helper_vilvl_b)
-TRANS(vilvl_h, LSX, gen_vvv, gen_helper_vilvl_h)
-TRANS(vilvl_w, LSX, gen_vvv, gen_helper_vilvl_w)
-TRANS(vilvl_d, LSX, gen_vvv, gen_helper_vilvl_d)
-TRANS(vilvh_b, LSX, gen_vvv, gen_helper_vilvh_b)
-TRANS(vilvh_h, LSX, gen_vvv, gen_helper_vilvh_h)
-TRANS(vilvh_w, LSX, gen_vvv, gen_helper_vilvh_w)
-TRANS(vilvh_d, LSX, gen_vvv, gen_helper_vilvh_d)
-TRANS(xvilvl_b, LASX, gen_xxx, gen_helper_vilvl_b)
-TRANS(xvilvl_h, LASX, gen_xxx, gen_helper_vilvl_h)
-TRANS(xvilvl_w, LASX, gen_xxx, gen_helper_vilvl_w)
-TRANS(xvilvl_d, LASX, gen_xxx, gen_helper_vilvl_d)
-TRANS(xvilvh_b, LASX, gen_xxx, gen_helper_vilvh_b)
-TRANS(xvilvh_h, LASX, gen_xxx, gen_helper_vilvh_h)
-TRANS(xvilvh_w, LASX, gen_xxx, gen_helper_vilvh_w)
-TRANS(xvilvh_d, LASX, gen_xxx, gen_helper_vilvh_d)
-
-TRANS(vshuf_b, LSX, gen_vvvv, gen_helper_vshuf_b)
-TRANS(vshuf_h, LSX, gen_vvv, gen_helper_vshuf_h)
-TRANS(vshuf_w, LSX, gen_vvv, gen_helper_vshuf_w)
-TRANS(vshuf_d, LSX, gen_vvv, gen_helper_vshuf_d)
-TRANS(xvshuf_b, LASX, gen_xxxx, gen_helper_vshuf_b)
-TRANS(xvshuf_h, LASX, gen_xxx, gen_helper_vshuf_h)
-TRANS(xvshuf_w, LASX, gen_xxx, gen_helper_vshuf_w)
-TRANS(xvshuf_d, LASX, gen_xxx, gen_helper_vshuf_d)
-TRANS(vshuf4i_b, LSX, gen_vv_i, gen_helper_vshuf4i_b)
-TRANS(vshuf4i_h, LSX, gen_vv_i, gen_helper_vshuf4i_h)
-TRANS(vshuf4i_w, LSX, gen_vv_i, gen_helper_vshuf4i_w)
-TRANS(vshuf4i_d, LSX, gen_vv_i, gen_helper_vshuf4i_d)
-TRANS(xvshuf4i_b, LASX, gen_xx_i, gen_helper_vshuf4i_b)
-TRANS(xvshuf4i_h, LASX, gen_xx_i, gen_helper_vshuf4i_h)
-TRANS(xvshuf4i_w, LASX, gen_xx_i, gen_helper_vshuf4i_w)
-TRANS(xvshuf4i_d, LASX, gen_xx_i, gen_helper_vshuf4i_d)
-
-TRANS(xvperm_w, LASX, gen_xxx, gen_helper_vperm_w)
-TRANS(vpermi_w, LSX, gen_vv_i, gen_helper_vpermi_w)
-TRANS(xvpermi_w, LASX, gen_xx_i, gen_helper_vpermi_w)
-TRANS(xvpermi_d, LASX, gen_xx_i, gen_helper_vpermi_d)
-TRANS(xvpermi_q, LASX, gen_xx_i, gen_helper_vpermi_q)
-
-TRANS(vextrins_b, LSX, gen_vv_i, gen_helper_vextrins_b)
-TRANS(vextrins_h, LSX, gen_vv_i, gen_helper_vextrins_h)
-TRANS(vextrins_w, LSX, gen_vv_i, gen_helper_vextrins_w)
-TRANS(vextrins_d, LSX, gen_vv_i, gen_helper_vextrins_d)
-TRANS(xvextrins_b, LASX, gen_xx_i, gen_helper_vextrins_b)
-TRANS(xvextrins_h, LASX, gen_xx_i, gen_helper_vextrins_h)
-TRANS(xvextrins_w, LASX, gen_xx_i, gen_helper_vextrins_w)
-TRANS(xvextrins_d, LASX, gen_xx_i, gen_helper_vextrins_d)
-
-static bool trans_vld(DisasContext *ctx, arg_vr_i *a)
-{
- TCGv addr;
- TCGv_i64 rl, rh;
- TCGv_i128 val;
-
- if (!avail_LSX(ctx)) {
- return false;
- }
-
- if (!check_vec(ctx, 16)) {
- return true;
- }
-
- addr = gpr_src(ctx, a->rj, EXT_NONE);
- val = tcg_temp_new_i128();
- rl = tcg_temp_new_i64();
- rh = tcg_temp_new_i64();
-
- addr = make_address_i(ctx, addr, a->imm);
-
- tcg_gen_qemu_ld_i128(val, addr, ctx->mem_idx, MO_128 | MO_TE);
- tcg_gen_extr_i128_i64(rl, rh, val);
- set_vreg64(rh, a->vd, 1);
- set_vreg64(rl, a->vd, 0);
-
- return true;
-}
-
-static bool trans_vst(DisasContext *ctx, arg_vr_i *a)
-{
- TCGv addr;
- TCGv_i128 val;
- TCGv_i64 ah, al;
-
- if (!avail_LSX(ctx)) {
- return false;
- }
-
- if (!check_vec(ctx, 16)) {
- return true;
- }
-
- addr = gpr_src(ctx, a->rj, EXT_NONE);
- val = tcg_temp_new_i128();
- ah = tcg_temp_new_i64();
- al = tcg_temp_new_i64();
-
- addr = make_address_i(ctx, addr, a->imm);
-
- get_vreg64(ah, a->vd, 1);
- get_vreg64(al, a->vd, 0);
- tcg_gen_concat_i64_i128(val, al, ah);
- tcg_gen_qemu_st_i128(val, addr, ctx->mem_idx, MO_128 | MO_TE);
-
- return true;
-}
-
-static bool trans_vldx(DisasContext *ctx, arg_vrr *a)
-{
- TCGv addr, src1, src2;
- TCGv_i64 rl, rh;
- TCGv_i128 val;
-
- if (!avail_LSX(ctx)) {
- return false;
- }
-
- if (!check_vec(ctx, 16)) {
- return true;
- }
-
- src1 = gpr_src(ctx, a->rj, EXT_NONE);
- src2 = gpr_src(ctx, a->rk, EXT_NONE);
- val = tcg_temp_new_i128();
- rl = tcg_temp_new_i64();
- rh = tcg_temp_new_i64();
-
- addr = make_address_x(ctx, src1, src2);
- tcg_gen_qemu_ld_i128(val, addr, ctx->mem_idx, MO_128 | MO_TE);
- tcg_gen_extr_i128_i64(rl, rh, val);
- set_vreg64(rh, a->vd, 1);
- set_vreg64(rl, a->vd, 0);
-
- return true;
-}
-
-static bool trans_vstx(DisasContext *ctx, arg_vrr *a)
-{
- TCGv addr, src1, src2;
- TCGv_i64 ah, al;
- TCGv_i128 val;
-
- if (!avail_LSX(ctx)) {
- return false;
- }
-
- if (!check_vec(ctx, 16)) {
- return true;
- }
-
- src1 = gpr_src(ctx, a->rj, EXT_NONE);
- src2 = gpr_src(ctx, a->rk, EXT_NONE);
- val = tcg_temp_new_i128();
- ah = tcg_temp_new_i64();
- al = tcg_temp_new_i64();
-
- addr = make_address_x(ctx, src1, src2);
- get_vreg64(ah, a->vd, 1);
- get_vreg64(al, a->vd, 0);
- tcg_gen_concat_i64_i128(val, al, ah);
- tcg_gen_qemu_st_i128(val, addr, ctx->mem_idx, MO_128 | MO_TE);
-
- return true;
-}
-
-static bool do_vldrepl_vl(DisasContext *ctx, arg_vr_i *a,
- uint32_t oprsz, MemOp mop)
-{
- TCGv addr;
- TCGv_i64 val;
-
- if (!check_vec(ctx, oprsz)) {
- return true;
- }
-
- addr = gpr_src(ctx, a->rj, EXT_NONE);
- val = tcg_temp_new_i64();
-
- addr = make_address_i(ctx, addr, a->imm);
-
- tcg_gen_qemu_ld_i64(val, addr, ctx->mem_idx, mop);
- tcg_gen_gvec_dup_i64(mop, vec_full_offset(a->vd), oprsz, ctx->vl / 8, val);
-
- return true;
-}
-
-static bool do_vldrepl(DisasContext *ctx, arg_vr_i *a, MemOp mop)
-{
- return do_vldrepl_vl(ctx, a, 16, mop);
-}
-
-static bool do_xvldrepl(DisasContext *ctx, arg_vr_i *a, MemOp mop)
-{
- return do_vldrepl_vl(ctx, a, 32, mop);
-}
-
-TRANS(vldrepl_b, LSX, do_vldrepl, MO_8)
-TRANS(vldrepl_h, LSX, do_vldrepl, MO_16)
-TRANS(vldrepl_w, LSX, do_vldrepl, MO_32)
-TRANS(vldrepl_d, LSX, do_vldrepl, MO_64)
-TRANS(xvldrepl_b, LASX, do_xvldrepl, MO_8)
-TRANS(xvldrepl_h, LASX, do_xvldrepl, MO_16)
-TRANS(xvldrepl_w, LASX, do_xvldrepl, MO_32)
-TRANS(xvldrepl_d, LASX, do_xvldrepl, MO_64)
-
-static bool do_vstelm_vl(DisasContext *ctx,
- arg_vr_ii *a, uint32_t oprsz, MemOp mop)
-{
- TCGv addr;
- TCGv_i64 val;
-
- if (!check_vec(ctx, oprsz)) {
- return true;
- }
-
- addr = gpr_src(ctx, a->rj, EXT_NONE);
- val = tcg_temp_new_i64();
-
- addr = make_address_i(ctx, addr, a->imm);
- tcg_gen_ld_i64(val, tcg_env, vec_reg_offset(a->vd, a->imm2, mop));
- tcg_gen_qemu_st_i64(val, addr, ctx->mem_idx, mop);
- return true;
-}
-
-static bool do_vstelm(DisasContext *ctx, arg_vr_ii *a, MemOp mop)
-{
- return do_vstelm_vl(ctx, a, 16, mop);
-}
-
-static bool do_xvstelm(DisasContext *ctx, arg_vr_ii *a, MemOp mop)
-{
- return do_vstelm_vl(ctx, a, 32, mop);
-}
-
-TRANS(vstelm_b, LSX, do_vstelm, MO_8)
-TRANS(vstelm_h, LSX, do_vstelm, MO_16)
-TRANS(vstelm_w, LSX, do_vstelm, MO_32)
-TRANS(vstelm_d, LSX, do_vstelm, MO_64)
-TRANS(xvstelm_b, LASX, do_xvstelm, MO_8)
-TRANS(xvstelm_h, LASX, do_xvstelm, MO_16)
-TRANS(xvstelm_w, LASX, do_xvstelm, MO_32)
-TRANS(xvstelm_d, LASX, do_xvstelm, MO_64)
-
-static bool gen_lasx_memory(DisasContext *ctx, arg_vr_i *a,
- void (*func)(DisasContext *, int, TCGv))
-{
- TCGv addr = gpr_src(ctx, a->rj, EXT_NONE);
- TCGv temp = NULL;
-
- if (!check_vec(ctx, 32)) {
- return true;
- }
-
- if (a->imm) {
- temp = tcg_temp_new();
- tcg_gen_addi_tl(temp, addr, a->imm);
- addr = temp;
- }
-
- func(ctx, a->vd, addr);
- return true;
-}
-
-static void gen_xvld(DisasContext *ctx, int vreg, TCGv addr)
-{
- int i;
- TCGv temp = tcg_temp_new();
- TCGv dest = tcg_temp_new();
-
- tcg_gen_qemu_ld_i64(dest, addr, ctx->mem_idx, MO_TEUQ);
- set_vreg64(dest, vreg, 0);
-
- for (i = 1; i < 4; i++) {
- tcg_gen_addi_tl(temp, addr, 8 * i);
- tcg_gen_qemu_ld_i64(dest, temp, ctx->mem_idx, MO_TEUQ);
- set_vreg64(dest, vreg, i);
- }
-}
-
-static void gen_xvst(DisasContext * ctx, int vreg, TCGv addr)
-{
- int i;
- TCGv temp = tcg_temp_new();
- TCGv dest = tcg_temp_new();
-
- get_vreg64(dest, vreg, 0);
- tcg_gen_qemu_st_i64(dest, addr, ctx->mem_idx, MO_TEUQ);
-
- for (i = 1; i < 4; i++) {
- tcg_gen_addi_tl(temp, addr, 8 * i);
- get_vreg64(dest, vreg, i);
- tcg_gen_qemu_st_i64(dest, temp, ctx->mem_idx, MO_TEUQ);
- }
-}
-
-TRANS(xvld, LASX, gen_lasx_memory, gen_xvld)
-TRANS(xvst, LASX, gen_lasx_memory, gen_xvst)
-
-static bool gen_lasx_memoryx(DisasContext *ctx, arg_vrr *a,
- void (*func)(DisasContext*, int, TCGv))
-{
- TCGv src1 = gpr_src(ctx, a->rj, EXT_NONE);
- TCGv src2 = gpr_src(ctx, a->rk, EXT_NONE);
- TCGv addr = tcg_temp_new();
-
- if (!check_vec(ctx, 32)) {
- return true;
- }
-
- tcg_gen_add_tl(addr, src1, src2);
- func(ctx, a->vd, addr);
-
- return true;
-}
-
-TRANS(xvldx, LASX, gen_lasx_memoryx, gen_xvld)
-TRANS(xvstx, LASX, gen_lasx_memoryx, gen_xvst)
+++ /dev/null
-/* SPDX-License-Identifier: GPL-2.0-or-later */
-/*
- * Copyright (c) 2021 Loongson Technology Corporation Limited
- *
- * Helpers for IOCSR reads/writes
- */
-
-#include "qemu/osdep.h"
-#include "cpu.h"
-#include "qemu/host-utils.h"
-#include "exec/helper-proto.h"
-#include "exec/exec-all.h"
-#include "exec/cpu_ldst.h"
-
-#define GET_MEMTXATTRS(cas) \
- ((MemTxAttrs){.requester_id = env_cpu(cas)->cpu_index})
-
-uint64_t helper_iocsrrd_b(CPULoongArchState *env, target_ulong r_addr)
-{
- return address_space_ldub(&env->address_space_iocsr, r_addr,
- GET_MEMTXATTRS(env), NULL);
-}
-
-uint64_t helper_iocsrrd_h(CPULoongArchState *env, target_ulong r_addr)
-{
- return address_space_lduw(&env->address_space_iocsr, r_addr,
- GET_MEMTXATTRS(env), NULL);
-}
-
-uint64_t helper_iocsrrd_w(CPULoongArchState *env, target_ulong r_addr)
-{
- return address_space_ldl(&env->address_space_iocsr, r_addr,
- GET_MEMTXATTRS(env), NULL);
-}
-
-uint64_t helper_iocsrrd_d(CPULoongArchState *env, target_ulong r_addr)
-{
- return address_space_ldq(&env->address_space_iocsr, r_addr,
- GET_MEMTXATTRS(env), NULL);
-}
-
-void helper_iocsrwr_b(CPULoongArchState *env, target_ulong w_addr,
- target_ulong val)
-{
- address_space_stb(&env->address_space_iocsr, w_addr,
- val, GET_MEMTXATTRS(env), NULL);
-}
-
-void helper_iocsrwr_h(CPULoongArchState *env, target_ulong w_addr,
- target_ulong val)
-{
- address_space_stw(&env->address_space_iocsr, w_addr,
- val, GET_MEMTXATTRS(env), NULL);
-}
-
-void helper_iocsrwr_w(CPULoongArchState *env, target_ulong w_addr,
- target_ulong val)
-{
- address_space_stl(&env->address_space_iocsr, w_addr,
- val, GET_MEMTXATTRS(env), NULL);
-}
-
-void helper_iocsrwr_d(CPULoongArchState *env, target_ulong w_addr,
- target_ulong val)
-{
- address_space_stq(&env->address_space_iocsr, w_addr,
- val, GET_MEMTXATTRS(env), NULL);
-}
'cpu.c',
'gdbstub.c',
))
-loongarch_tcg_ss = ss.source_set()
-loongarch_tcg_ss.add(gen)
-loongarch_tcg_ss.add(files(
- 'fpu_helper.c',
- 'op_helper.c',
- 'translate.c',
- 'vec_helper.c',
-))
-loongarch_tcg_ss.add(zlib)
loongarch_system_ss = ss.source_set()
loongarch_system_ss.add(files(
'loongarch-qmp-cmds.c',
'machine.c',
- 'tlb_helper.c',
- 'constant_timer.c',
- 'csr_helper.c',
- 'iocsr_helper.c',
))
common_ss.add(when: 'CONFIG_LOONGARCH_DIS', if_true: [files('disas.c'), gen])
-loongarch_ss.add_all(when: 'CONFIG_TCG', if_true: [loongarch_tcg_ss])
+subdir('tcg')
target_arch += {'loongarch': loongarch_ss}
target_system_arch += {'loongarch': loongarch_system_ss}
+++ /dev/null
-/* SPDX-License-Identifier: GPL-2.0-or-later */
-/*
- * LoongArch emulation helpers for QEMU.
- *
- * Copyright (c) 2021 Loongson Technology Corporation Limited
- */
-
-#include "qemu/osdep.h"
-#include "qemu/log.h"
-#include "cpu.h"
-#include "qemu/host-utils.h"
-#include "exec/helper-proto.h"
-#include "exec/exec-all.h"
-#include "exec/cpu_ldst.h"
-#include "internals.h"
-#include "qemu/crc32c.h"
-#include <zlib.h>
-#include "cpu-csr.h"
-
-/* Exceptions helpers */
-void helper_raise_exception(CPULoongArchState *env, uint32_t exception)
-{
- do_raise_exception(env, exception, GETPC());
-}
-
-target_ulong helper_bitrev_w(target_ulong rj)
-{
- return (int32_t)revbit32(rj);
-}
-
-target_ulong helper_bitrev_d(target_ulong rj)
-{
- return revbit64(rj);
-}
-
-target_ulong helper_bitswap(target_ulong v)
-{
- v = ((v >> 1) & (target_ulong)0x5555555555555555ULL) |
- ((v & (target_ulong)0x5555555555555555ULL) << 1);
- v = ((v >> 2) & (target_ulong)0x3333333333333333ULL) |
- ((v & (target_ulong)0x3333333333333333ULL) << 2);
- v = ((v >> 4) & (target_ulong)0x0F0F0F0F0F0F0F0FULL) |
- ((v & (target_ulong)0x0F0F0F0F0F0F0F0FULL) << 4);
- return v;
-}
-
-/* loongarch assert op */
-void helper_asrtle_d(CPULoongArchState *env, target_ulong rj, target_ulong rk)
-{
- if (rj > rk) {
- env->CSR_BADV = rj;
- do_raise_exception(env, EXCCODE_BCE, GETPC());
- }
-}
-
-void helper_asrtgt_d(CPULoongArchState *env, target_ulong rj, target_ulong rk)
-{
- if (rj <= rk) {
- env->CSR_BADV = rj;
- do_raise_exception(env, EXCCODE_BCE, GETPC());
- }
-}
-
-target_ulong helper_crc32(target_ulong val, target_ulong m, uint64_t sz)
-{
- uint8_t buf[8];
- target_ulong mask = ((sz * 8) == 64) ? -1ULL : ((1ULL << (sz * 8)) - 1);
-
- m &= mask;
- stq_le_p(buf, m);
- return (int32_t) (crc32(val ^ 0xffffffff, buf, sz) ^ 0xffffffff);
-}
-
-target_ulong helper_crc32c(target_ulong val, target_ulong m, uint64_t sz)
-{
- uint8_t buf[8];
- target_ulong mask = ((sz * 8) == 64) ? -1ULL : ((1ULL << (sz * 8)) - 1);
- m &= mask;
- stq_le_p(buf, m);
- return (int32_t) (crc32c(val, buf, sz) ^ 0xffffffff);
-}
-
-target_ulong helper_cpucfg(CPULoongArchState *env, target_ulong rj)
-{
- return rj >= ARRAY_SIZE(env->cpucfg) ? 0 : env->cpucfg[rj];
-}
-
-uint64_t helper_rdtime_d(CPULoongArchState *env)
-{
-#ifdef CONFIG_USER_ONLY
- return cpu_get_host_ticks();
-#else
- uint64_t plv;
- LoongArchCPU *cpu = env_archcpu(env);
-
- plv = FIELD_EX64(env->CSR_CRMD, CSR_CRMD, PLV);
- if (extract64(env->CSR_MISC, R_CSR_MISC_DRDTL_SHIFT + plv, 1)) {
- do_raise_exception(env, EXCCODE_IPE, GETPC());
- }
-
- return cpu_loongarch_get_constant_timer_counter(cpu);
-#endif
-}
-
-#ifndef CONFIG_USER_ONLY
-void helper_ertn(CPULoongArchState *env)
-{
- uint64_t csr_pplv, csr_pie;
- if (FIELD_EX64(env->CSR_TLBRERA, CSR_TLBRERA, ISTLBR)) {
- csr_pplv = FIELD_EX64(env->CSR_TLBRPRMD, CSR_TLBRPRMD, PPLV);
- csr_pie = FIELD_EX64(env->CSR_TLBRPRMD, CSR_TLBRPRMD, PIE);
-
- env->CSR_TLBRERA = FIELD_DP64(env->CSR_TLBRERA, CSR_TLBRERA, ISTLBR, 0);
- env->CSR_CRMD = FIELD_DP64(env->CSR_CRMD, CSR_CRMD, DA, 0);
- env->CSR_CRMD = FIELD_DP64(env->CSR_CRMD, CSR_CRMD, PG, 1);
- set_pc(env, env->CSR_TLBRERA);
- qemu_log_mask(CPU_LOG_INT, "%s: TLBRERA " TARGET_FMT_lx "\n",
- __func__, env->CSR_TLBRERA);
- } else {
- csr_pplv = FIELD_EX64(env->CSR_PRMD, CSR_PRMD, PPLV);
- csr_pie = FIELD_EX64(env->CSR_PRMD, CSR_PRMD, PIE);
-
- set_pc(env, env->CSR_ERA);
- qemu_log_mask(CPU_LOG_INT, "%s: ERA " TARGET_FMT_lx "\n",
- __func__, env->CSR_ERA);
- }
- env->CSR_CRMD = FIELD_DP64(env->CSR_CRMD, CSR_CRMD, PLV, csr_pplv);
- env->CSR_CRMD = FIELD_DP64(env->CSR_CRMD, CSR_CRMD, IE, csr_pie);
-
- env->lladdr = 1;
-}
-
-void helper_idle(CPULoongArchState *env)
-{
- CPUState *cs = env_cpu(env);
-
- cs->halted = 1;
- do_raise_exception(env, EXCP_HLT, 0);
-}
-#endif
--- /dev/null
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * QEMU LoongArch constant timer support
+ *
+ * Copyright (c) 2021 Loongson Technology Corporation Limited
+ */
+
+#include "qemu/osdep.h"
+#include "qemu/timer.h"
+#include "cpu.h"
+#include "internals.h"
+#include "cpu-csr.h"
+
+#define TIMER_PERIOD 10 /* 10 ns period for 100 MHz frequency */
+#define CONSTANT_TIMER_TICK_MASK 0xfffffffffffcUL
+#define CONSTANT_TIMER_ENABLE 0x1UL
+
+uint64_t cpu_loongarch_get_constant_timer_counter(LoongArchCPU *cpu)
+{
+ return qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) / TIMER_PERIOD;
+}
+
+uint64_t cpu_loongarch_get_constant_timer_ticks(LoongArchCPU *cpu)
+{
+ uint64_t now, expire;
+
+ now = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL);
+ expire = timer_expire_time_ns(&cpu->timer);
+
+ return (expire - now) / TIMER_PERIOD;
+}
+
+void cpu_loongarch_store_constant_timer_config(LoongArchCPU *cpu,
+ uint64_t value)
+{
+ CPULoongArchState *env = &cpu->env;
+ uint64_t now, next;
+
+ env->CSR_TCFG = value;
+ if (value & CONSTANT_TIMER_ENABLE) {
+ now = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL);
+ next = now + (value & CONSTANT_TIMER_TICK_MASK) * TIMER_PERIOD;
+ timer_mod(&cpu->timer, next);
+ } else {
+ timer_del(&cpu->timer);
+ }
+}
+
+void loongarch_constant_timer_cb(void *opaque)
+{
+ LoongArchCPU *cpu = opaque;
+ CPULoongArchState *env = &cpu->env;
+ uint64_t now, next;
+
+ if (FIELD_EX64(env->CSR_TCFG, CSR_TCFG, PERIODIC)) {
+ now = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL);
+ next = now + (env->CSR_TCFG & CONSTANT_TIMER_TICK_MASK) * TIMER_PERIOD;
+ timer_mod(&cpu->timer, next);
+ } else {
+ env->CSR_TCFG = FIELD_DP64(env->CSR_TCFG, CSR_TCFG, EN, 0);
+ }
+
+ loongarch_cpu_set_irq(opaque, IRQ_TIMER, 1);
+}
--- /dev/null
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * LoongArch emulation helpers for CSRs
+ *
+ * Copyright (c) 2021 Loongson Technology Corporation Limited
+ */
+
+#include "qemu/osdep.h"
+#include "qemu/main-loop.h"
+#include "cpu.h"
+#include "internals.h"
+#include "qemu/host-utils.h"
+#include "exec/helper-proto.h"
+#include "exec/exec-all.h"
+#include "exec/cpu_ldst.h"
+#include "hw/irq.h"
+#include "cpu-csr.h"
+
+target_ulong helper_csrrd_pgd(CPULoongArchState *env)
+{
+ int64_t v;
+
+ if (env->CSR_TLBRERA & 0x1) {
+ v = env->CSR_TLBRBADV;
+ } else {
+ v = env->CSR_BADV;
+ }
+
+ if ((v >> 63) & 0x1) {
+ v = env->CSR_PGDH;
+ } else {
+ v = env->CSR_PGDL;
+ }
+
+ return v;
+}
+
+target_ulong helper_csrrd_cpuid(CPULoongArchState *env)
+{
+ LoongArchCPU *lac = env_archcpu(env);
+
+ env->CSR_CPUID = CPU(lac)->cpu_index;
+
+ return env->CSR_CPUID;
+}
+
+target_ulong helper_csrrd_tval(CPULoongArchState *env)
+{
+ LoongArchCPU *cpu = env_archcpu(env);
+
+ return cpu_loongarch_get_constant_timer_ticks(cpu);
+}
+
+target_ulong helper_csrwr_estat(CPULoongArchState *env, target_ulong val)
+{
+ int64_t old_v = env->CSR_ESTAT;
+
+ /* Only IS[1:0] can be written */
+ env->CSR_ESTAT = deposit64(env->CSR_ESTAT, 0, 2, val);
+
+ return old_v;
+}
+
+target_ulong helper_csrwr_asid(CPULoongArchState *env, target_ulong val)
+{
+ int64_t old_v = env->CSR_ASID;
+
+ /* Only ASID filed of CSR_ASID can be written */
+ env->CSR_ASID = deposit64(env->CSR_ASID, 0, 10, val);
+ if (old_v != env->CSR_ASID) {
+ tlb_flush(env_cpu(env));
+ }
+ return old_v;
+}
+
+target_ulong helper_csrwr_tcfg(CPULoongArchState *env, target_ulong val)
+{
+ LoongArchCPU *cpu = env_archcpu(env);
+ int64_t old_v = env->CSR_TCFG;
+
+ cpu_loongarch_store_constant_timer_config(cpu, val);
+
+ return old_v;
+}
+
+target_ulong helper_csrwr_ticlr(CPULoongArchState *env, target_ulong val)
+{
+ LoongArchCPU *cpu = env_archcpu(env);
+ int64_t old_v = 0;
+
+ if (val & 0x1) {
+ qemu_mutex_lock_iothread();
+ loongarch_cpu_set_irq(cpu, IRQ_TIMER, 0);
+ qemu_mutex_unlock_iothread();
+ }
+ return old_v;
+}
--- /dev/null
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * LoongArch float point emulation helpers for QEMU
+ *
+ * Copyright (c) 2021 Loongson Technology Corporation Limited
+ */
+
+#include "qemu/osdep.h"
+#include "cpu.h"
+#include "exec/helper-proto.h"
+#include "exec/exec-all.h"
+#include "exec/cpu_ldst.h"
+#include "fpu/softfloat.h"
+#include "internals.h"
+
+static inline uint64_t nanbox_s(float32 fp)
+{
+ return fp | MAKE_64BIT_MASK(32, 32);
+}
+
+/* Convert loongarch rounding mode in fcsr0 to IEEE library */
+static const FloatRoundMode ieee_rm[4] = {
+ float_round_nearest_even,
+ float_round_to_zero,
+ float_round_up,
+ float_round_down
+};
+
+void restore_fp_status(CPULoongArchState *env)
+{
+ set_float_rounding_mode(ieee_rm[(env->fcsr0 >> FCSR0_RM) & 0x3],
+ &env->fp_status);
+ set_flush_to_zero(0, &env->fp_status);
+}
+
+int ieee_ex_to_loongarch(int xcpt)
+{
+ int ret = 0;
+ if (xcpt & float_flag_invalid) {
+ ret |= FP_INVALID;
+ }
+ if (xcpt & float_flag_overflow) {
+ ret |= FP_OVERFLOW;
+ }
+ if (xcpt & float_flag_underflow) {
+ ret |= FP_UNDERFLOW;
+ }
+ if (xcpt & float_flag_divbyzero) {
+ ret |= FP_DIV0;
+ }
+ if (xcpt & float_flag_inexact) {
+ ret |= FP_INEXACT;
+ }
+ return ret;
+}
+
+static void update_fcsr0_mask(CPULoongArchState *env, uintptr_t pc, int mask)
+{
+ int flags = get_float_exception_flags(&env->fp_status);
+
+ set_float_exception_flags(0, &env->fp_status);
+
+ flags &= ~mask;
+
+ if (!flags) {
+ SET_FP_CAUSE(env->fcsr0, flags);
+ return;
+ } else {
+ flags = ieee_ex_to_loongarch(flags);
+ SET_FP_CAUSE(env->fcsr0, flags);
+ }
+
+ if (GET_FP_ENABLES(env->fcsr0) & flags) {
+ do_raise_exception(env, EXCCODE_FPE, pc);
+ } else {
+ UPDATE_FP_FLAGS(env->fcsr0, flags);
+ }
+}
+
+static void update_fcsr0(CPULoongArchState *env, uintptr_t pc)
+{
+ update_fcsr0_mask(env, pc, 0);
+}
+
+uint64_t helper_fadd_s(CPULoongArchState *env, uint64_t fj, uint64_t fk)
+{
+ uint64_t fd;
+
+ fd = nanbox_s(float32_add((uint32_t)fj, (uint32_t)fk, &env->fp_status));
+ update_fcsr0(env, GETPC());
+ return fd;
+}
+
+uint64_t helper_fadd_d(CPULoongArchState *env, uint64_t fj, uint64_t fk)
+{
+ uint64_t fd;
+
+ fd = float64_add(fj, fk, &env->fp_status);
+ update_fcsr0(env, GETPC());
+ return fd;
+}
+
+uint64_t helper_fsub_s(CPULoongArchState *env, uint64_t fj, uint64_t fk)
+{
+ uint64_t fd;
+
+ fd = nanbox_s(float32_sub((uint32_t)fj, (uint32_t)fk, &env->fp_status));
+ update_fcsr0(env, GETPC());
+ return fd;
+}
+
+uint64_t helper_fsub_d(CPULoongArchState *env, uint64_t fj, uint64_t fk)
+{
+ uint64_t fd;
+
+ fd = float64_sub(fj, fk, &env->fp_status);
+ update_fcsr0(env, GETPC());
+ return fd;
+}
+
+uint64_t helper_fmul_s(CPULoongArchState *env, uint64_t fj, uint64_t fk)
+{
+ uint64_t fd;
+
+ fd = nanbox_s(float32_mul((uint32_t)fj, (uint32_t)fk, &env->fp_status));
+ update_fcsr0(env, GETPC());
+ return fd;
+}
+
+uint64_t helper_fmul_d(CPULoongArchState *env, uint64_t fj, uint64_t fk)
+{
+ uint64_t fd;
+
+ fd = float64_mul(fj, fk, &env->fp_status);
+ update_fcsr0(env, GETPC());
+ return fd;
+}
+
+uint64_t helper_fdiv_s(CPULoongArchState *env, uint64_t fj, uint64_t fk)
+{
+ uint64_t fd;
+
+ fd = nanbox_s(float32_div((uint32_t)fj, (uint32_t)fk, &env->fp_status));
+ update_fcsr0(env, GETPC());
+ return fd;
+}
+
+uint64_t helper_fdiv_d(CPULoongArchState *env, uint64_t fj, uint64_t fk)
+{
+ uint64_t fd;
+
+ fd = float64_div(fj, fk, &env->fp_status);
+ update_fcsr0(env, GETPC());
+ return fd;
+}
+
+uint64_t helper_fmax_s(CPULoongArchState *env, uint64_t fj, uint64_t fk)
+{
+ uint64_t fd;
+
+ fd = nanbox_s(float32_maxnum((uint32_t)fj, (uint32_t)fk, &env->fp_status));
+ update_fcsr0(env, GETPC());
+ return fd;
+}
+
+uint64_t helper_fmax_d(CPULoongArchState *env, uint64_t fj, uint64_t fk)
+{
+ uint64_t fd;
+
+ fd = float64_maxnum(fj, fk, &env->fp_status);
+ update_fcsr0(env, GETPC());
+ return fd;
+}
+
+uint64_t helper_fmin_s(CPULoongArchState *env, uint64_t fj, uint64_t fk)
+{
+ uint64_t fd;
+
+ fd = nanbox_s(float32_minnum((uint32_t)fj, (uint32_t)fk, &env->fp_status));
+ update_fcsr0(env, GETPC());
+ return fd;
+}
+
+uint64_t helper_fmin_d(CPULoongArchState *env, uint64_t fj, uint64_t fk)
+{
+ uint64_t fd;
+
+ fd = float64_minnum(fj, fk, &env->fp_status);
+ update_fcsr0(env, GETPC());
+ return fd;
+}
+
+uint64_t helper_fmaxa_s(CPULoongArchState *env, uint64_t fj, uint64_t fk)
+{
+ uint64_t fd;
+
+ fd = nanbox_s(float32_maxnummag((uint32_t)fj,
+ (uint32_t)fk, &env->fp_status));
+ update_fcsr0(env, GETPC());
+ return fd;
+}
+
+uint64_t helper_fmaxa_d(CPULoongArchState *env, uint64_t fj, uint64_t fk)
+{
+ uint64_t fd;
+
+ fd = float64_maxnummag(fj, fk, &env->fp_status);
+ update_fcsr0(env, GETPC());
+ return fd;
+}
+
+uint64_t helper_fmina_s(CPULoongArchState *env, uint64_t fj, uint64_t fk)
+{
+ uint64_t fd;
+
+ fd = nanbox_s(float32_minnummag((uint32_t)fj,
+ (uint32_t)fk, &env->fp_status));
+ update_fcsr0(env, GETPC());
+ return fd;
+}
+
+uint64_t helper_fmina_d(CPULoongArchState *env, uint64_t fj, uint64_t fk)
+{
+ uint64_t fd;
+
+ fd = float64_minnummag(fj, fk, &env->fp_status);
+ update_fcsr0(env, GETPC());
+ return fd;
+}
+
+uint64_t helper_fscaleb_s(CPULoongArchState *env, uint64_t fj, uint64_t fk)
+{
+ uint64_t fd;
+ int32_t n = (int32_t)fk;
+
+ fd = nanbox_s(float32_scalbn((uint32_t)fj,
+ n > 0x200 ? 0x200 :
+ n < -0x200 ? -0x200 : n,
+ &env->fp_status));
+ update_fcsr0(env, GETPC());
+ return fd;
+}
+
+uint64_t helper_fscaleb_d(CPULoongArchState *env, uint64_t fj, uint64_t fk)
+{
+ uint64_t fd;
+ int64_t n = (int64_t)fk;
+
+ fd = float64_scalbn(fj,
+ n > 0x1000 ? 0x1000 :
+ n < -0x1000 ? -0x1000 : n,
+ &env->fp_status);
+ update_fcsr0(env, GETPC());
+ return fd;
+}
+
+uint64_t helper_fsqrt_s(CPULoongArchState *env, uint64_t fj)
+{
+ uint64_t fd;
+
+ fd = nanbox_s(float32_sqrt((uint32_t)fj, &env->fp_status));
+ update_fcsr0(env, GETPC());
+ return fd;
+}
+
+uint64_t helper_fsqrt_d(CPULoongArchState *env, uint64_t fj)
+{
+ uint64_t fd;
+
+ fd = float64_sqrt(fj, &env->fp_status);
+ update_fcsr0(env, GETPC());
+ return fd;
+}
+
+uint64_t helper_frecip_s(CPULoongArchState *env, uint64_t fj)
+{
+ uint64_t fd;
+
+ fd = nanbox_s(float32_div(float32_one, (uint32_t)fj, &env->fp_status));
+ update_fcsr0(env, GETPC());
+ return fd;
+}
+
+uint64_t helper_frecip_d(CPULoongArchState *env, uint64_t fj)
+{
+ uint64_t fd;
+
+ fd = float64_div(float64_one, fj, &env->fp_status);
+ update_fcsr0(env, GETPC());
+ return fd;
+}
+
+uint64_t helper_frsqrt_s(CPULoongArchState *env, uint64_t fj)
+{
+ uint64_t fd;
+ uint32_t fp;
+
+ fp = float32_sqrt((uint32_t)fj, &env->fp_status);
+ fd = nanbox_s(float32_div(float32_one, fp, &env->fp_status));
+ update_fcsr0(env, GETPC());
+ return fd;
+}
+
+uint64_t helper_frsqrt_d(CPULoongArchState *env, uint64_t fj)
+{
+ uint64_t fp, fd;
+
+ fp = float64_sqrt(fj, &env->fp_status);
+ fd = float64_div(float64_one, fp, &env->fp_status);
+ update_fcsr0(env, GETPC());
+ return fd;
+}
+
+uint64_t helper_flogb_s(CPULoongArchState *env, uint64_t fj)
+{
+ uint64_t fd;
+ uint32_t fp;
+ float_status *status = &env->fp_status;
+ FloatRoundMode old_mode = get_float_rounding_mode(status);
+
+ set_float_rounding_mode(float_round_down, status);
+ fp = float32_log2((uint32_t)fj, status);
+ fd = nanbox_s(float32_round_to_int(fp, status));
+ set_float_rounding_mode(old_mode, status);
+ update_fcsr0_mask(env, GETPC(), float_flag_inexact);
+ return fd;
+}
+
+uint64_t helper_flogb_d(CPULoongArchState *env, uint64_t fj)
+{
+ uint64_t fd;
+ float_status *status = &env->fp_status;
+ FloatRoundMode old_mode = get_float_rounding_mode(status);
+
+ set_float_rounding_mode(float_round_down, status);
+ fd = float64_log2(fj, status);
+ fd = float64_round_to_int(fd, status);
+ set_float_rounding_mode(old_mode, status);
+ update_fcsr0_mask(env, GETPC(), float_flag_inexact);
+ return fd;
+}
+
+uint64_t helper_fclass_s(CPULoongArchState *env, uint64_t fj)
+{
+ float32 f = fj;
+ bool sign = float32_is_neg(f);
+
+ if (float32_is_infinity(f)) {
+ return sign ? 1 << 2 : 1 << 6;
+ } else if (float32_is_zero(f)) {
+ return sign ? 1 << 5 : 1 << 9;
+ } else if (float32_is_zero_or_denormal(f)) {
+ return sign ? 1 << 4 : 1 << 8;
+ } else if (float32_is_any_nan(f)) {
+ float_status s = { }; /* for snan_bit_is_one */
+ return float32_is_quiet_nan(f, &s) ? 1 << 1 : 1 << 0;
+ } else {
+ return sign ? 1 << 3 : 1 << 7;
+ }
+}
+
+uint64_t helper_fclass_d(CPULoongArchState *env, uint64_t fj)
+{
+ float64 f = fj;
+ bool sign = float64_is_neg(f);
+
+ if (float64_is_infinity(f)) {
+ return sign ? 1 << 2 : 1 << 6;
+ } else if (float64_is_zero(f)) {
+ return sign ? 1 << 5 : 1 << 9;
+ } else if (float64_is_zero_or_denormal(f)) {
+ return sign ? 1 << 4 : 1 << 8;
+ } else if (float64_is_any_nan(f)) {
+ float_status s = { }; /* for snan_bit_is_one */
+ return float64_is_quiet_nan(f, &s) ? 1 << 1 : 1 << 0;
+ } else {
+ return sign ? 1 << 3 : 1 << 7;
+ }
+}
+
+uint64_t helper_fmuladd_s(CPULoongArchState *env, uint64_t fj,
+ uint64_t fk, uint64_t fa, uint32_t flag)
+{
+ uint64_t fd;
+
+ fd = nanbox_s(float32_muladd((uint32_t)fj, (uint32_t)fk,
+ (uint32_t)fa, flag, &env->fp_status));
+ update_fcsr0(env, GETPC());
+ return fd;
+}
+
+uint64_t helper_fmuladd_d(CPULoongArchState *env, uint64_t fj,
+ uint64_t fk, uint64_t fa, uint32_t flag)
+{
+ uint64_t fd;
+
+ fd = float64_muladd(fj, fk, fa, flag, &env->fp_status);
+ update_fcsr0(env, GETPC());
+ return fd;
+}
+
+static uint64_t fcmp_common(CPULoongArchState *env, FloatRelation cmp,
+ uint32_t flags)
+{
+ bool ret;
+
+ switch (cmp) {
+ case float_relation_less:
+ ret = (flags & FCMP_LT);
+ break;
+ case float_relation_equal:
+ ret = (flags & FCMP_EQ);
+ break;
+ case float_relation_greater:
+ ret = (flags & FCMP_GT);
+ break;
+ case float_relation_unordered:
+ ret = (flags & FCMP_UN);
+ break;
+ default:
+ g_assert_not_reached();
+ }
+ update_fcsr0(env, GETPC());
+
+ return ret;
+}
+
+/* fcmp_cXXX_s */
+uint64_t helper_fcmp_c_s(CPULoongArchState *env, uint64_t fj,
+ uint64_t fk, uint32_t flags)
+{
+ FloatRelation cmp = float32_compare_quiet((uint32_t)fj,
+ (uint32_t)fk, &env->fp_status);
+ return fcmp_common(env, cmp, flags);
+}
+
+/* fcmp_sXXX_s */
+uint64_t helper_fcmp_s_s(CPULoongArchState *env, uint64_t fj,
+ uint64_t fk, uint32_t flags)
+{
+ FloatRelation cmp = float32_compare((uint32_t)fj,
+ (uint32_t)fk, &env->fp_status);
+ return fcmp_common(env, cmp, flags);
+}
+
+/* fcmp_cXXX_d */
+uint64_t helper_fcmp_c_d(CPULoongArchState *env, uint64_t fj,
+ uint64_t fk, uint32_t flags)
+{
+ FloatRelation cmp = float64_compare_quiet(fj, fk, &env->fp_status);
+ return fcmp_common(env, cmp, flags);
+}
+
+/* fcmp_sXXX_d */
+uint64_t helper_fcmp_s_d(CPULoongArchState *env, uint64_t fj,
+ uint64_t fk, uint32_t flags)
+{
+ FloatRelation cmp = float64_compare(fj, fk, &env->fp_status);
+ return fcmp_common(env, cmp, flags);
+}
+
+/* floating point conversion */
+uint64_t helper_fcvt_s_d(CPULoongArchState *env, uint64_t fj)
+{
+ uint64_t fd;
+
+ fd = nanbox_s(float64_to_float32(fj, &env->fp_status));
+ update_fcsr0(env, GETPC());
+ return fd;
+}
+
+uint64_t helper_fcvt_d_s(CPULoongArchState *env, uint64_t fj)
+{
+ uint64_t fd;
+
+ fd = float32_to_float64((uint32_t)fj, &env->fp_status);
+ update_fcsr0(env, GETPC());
+ return fd;
+}
+
+uint64_t helper_ffint_s_w(CPULoongArchState *env, uint64_t fj)
+{
+ uint64_t fd;
+
+ fd = nanbox_s(int32_to_float32((int32_t)fj, &env->fp_status));
+ update_fcsr0(env, GETPC());
+ return fd;
+}
+
+uint64_t helper_ffint_s_l(CPULoongArchState *env, uint64_t fj)
+{
+ uint64_t fd;
+
+ fd = nanbox_s(int64_to_float32(fj, &env->fp_status));
+ update_fcsr0(env, GETPC());
+ return fd;
+}
+
+uint64_t helper_ffint_d_w(CPULoongArchState *env, uint64_t fj)
+{
+ uint64_t fd;
+
+ fd = int32_to_float64((int32_t)fj, &env->fp_status);
+ update_fcsr0(env, GETPC());
+ return fd;
+}
+
+uint64_t helper_ffint_d_l(CPULoongArchState *env, uint64_t fj)
+{
+ uint64_t fd;
+
+ fd = int64_to_float64(fj, &env->fp_status);
+ update_fcsr0(env, GETPC());
+ return fd;
+}
+
+uint64_t helper_frint_s(CPULoongArchState *env, uint64_t fj)
+{
+ uint64_t fd;
+
+ fd = (uint64_t)(float32_round_to_int((uint32_t)fj, &env->fp_status));
+ update_fcsr0(env, GETPC());
+ return fd;
+}
+
+uint64_t helper_frint_d(CPULoongArchState *env, uint64_t fj)
+{
+ uint64_t fd;
+
+ fd = float64_round_to_int(fj, &env->fp_status);
+ update_fcsr0(env, GETPC());
+ return fd;
+}
+
+uint64_t helper_ftintrm_l_d(CPULoongArchState *env, uint64_t fj)
+{
+ uint64_t fd;
+ FloatRoundMode old_mode = get_float_rounding_mode(&env->fp_status);
+
+ set_float_rounding_mode(float_round_down, &env->fp_status);
+ fd = float64_to_int64(fj, &env->fp_status);
+ set_float_rounding_mode(old_mode, &env->fp_status);
+
+ if (get_float_exception_flags(&env->fp_status) & (float_flag_invalid)) {
+ if (float64_is_any_nan(fj)) {
+ fd = 0;
+ }
+ }
+ update_fcsr0(env, GETPC());
+ return fd;
+}
+
+uint64_t helper_ftintrm_l_s(CPULoongArchState *env, uint64_t fj)
+{
+ uint64_t fd;
+ FloatRoundMode old_mode = get_float_rounding_mode(&env->fp_status);
+
+ set_float_rounding_mode(float_round_down, &env->fp_status);
+ fd = float32_to_int64((uint32_t)fj, &env->fp_status);
+ set_float_rounding_mode(old_mode, &env->fp_status);
+
+ if (get_float_exception_flags(&env->fp_status) & (float_flag_invalid)) {
+ if (float32_is_any_nan((uint32_t)fj)) {
+ fd = 0;
+ }
+ }
+ update_fcsr0(env, GETPC());
+ return fd;
+}
+
+uint64_t helper_ftintrm_w_d(CPULoongArchState *env, uint64_t fj)
+{
+ uint64_t fd;
+ FloatRoundMode old_mode = get_float_rounding_mode(&env->fp_status);
+
+ set_float_rounding_mode(float_round_down, &env->fp_status);
+ fd = (uint64_t)float64_to_int32(fj, &env->fp_status);
+ set_float_rounding_mode(old_mode, &env->fp_status);
+
+ if (get_float_exception_flags(&env->fp_status) & (float_flag_invalid)) {
+ if (float64_is_any_nan(fj)) {
+ fd = 0;
+ }
+ }
+ update_fcsr0(env, GETPC());
+ return fd;
+}
+
+uint64_t helper_ftintrm_w_s(CPULoongArchState *env, uint64_t fj)
+{
+ uint64_t fd;
+ FloatRoundMode old_mode = get_float_rounding_mode(&env->fp_status);
+
+ set_float_rounding_mode(float_round_down, &env->fp_status);
+ fd = (uint64_t)float32_to_int32((uint32_t)fj, &env->fp_status);
+ set_float_rounding_mode(old_mode, &env->fp_status);
+
+ if (get_float_exception_flags(&env->fp_status) & (float_flag_invalid)) {
+ if (float32_is_any_nan((uint32_t)fj)) {
+ fd = 0;
+ }
+ }
+ update_fcsr0(env, GETPC());
+ return fd;
+}
+
+uint64_t helper_ftintrp_l_d(CPULoongArchState *env, uint64_t fj)
+{
+ uint64_t fd;
+ FloatRoundMode old_mode = get_float_rounding_mode(&env->fp_status);
+
+ set_float_rounding_mode(float_round_up, &env->fp_status);
+ fd = float64_to_int64(fj, &env->fp_status);
+ set_float_rounding_mode(old_mode, &env->fp_status);
+
+ if (get_float_exception_flags(&env->fp_status) & (float_flag_invalid)) {
+ if (float64_is_any_nan(fj)) {
+ fd = 0;
+ }
+ }
+ update_fcsr0(env, GETPC());
+ return fd;
+}
+
+uint64_t helper_ftintrp_l_s(CPULoongArchState *env, uint64_t fj)
+{
+ uint64_t fd;
+ FloatRoundMode old_mode = get_float_rounding_mode(&env->fp_status);
+
+ set_float_rounding_mode(float_round_up, &env->fp_status);
+ fd = float32_to_int64((uint32_t)fj, &env->fp_status);
+ set_float_rounding_mode(old_mode, &env->fp_status);
+
+ if (get_float_exception_flags(&env->fp_status) & (float_flag_invalid)) {
+ if (float32_is_any_nan((uint32_t)fj)) {
+ fd = 0;
+ }
+ }
+ update_fcsr0(env, GETPC());
+ return fd;
+}
+
+uint64_t helper_ftintrp_w_d(CPULoongArchState *env, uint64_t fj)
+{
+ uint64_t fd;
+ FloatRoundMode old_mode = get_float_rounding_mode(&env->fp_status);
+
+ set_float_rounding_mode(float_round_up, &env->fp_status);
+ fd = (uint64_t)float64_to_int32(fj, &env->fp_status);
+ set_float_rounding_mode(old_mode, &env->fp_status);
+
+ if (get_float_exception_flags(&env->fp_status) & (float_flag_invalid)) {
+ if (float64_is_any_nan(fj)) {
+ fd = 0;
+ }
+ }
+ update_fcsr0(env, GETPC());
+ return fd;
+}
+
+uint64_t helper_ftintrp_w_s(CPULoongArchState *env, uint64_t fj)
+{
+ uint64_t fd;
+ FloatRoundMode old_mode = get_float_rounding_mode(&env->fp_status);
+
+ set_float_rounding_mode(float_round_up, &env->fp_status);
+ fd = (uint64_t)float32_to_int32((uint32_t)fj, &env->fp_status);
+ set_float_rounding_mode(old_mode, &env->fp_status);
+
+ if (get_float_exception_flags(&env->fp_status) & (float_flag_invalid)) {
+ if (float32_is_any_nan((uint32_t)fj)) {
+ fd = 0;
+ }
+ }
+ update_fcsr0(env, GETPC());
+ return fd;
+}
+
+uint64_t helper_ftintrz_l_d(CPULoongArchState *env, uint64_t fj)
+{
+ uint64_t fd;
+ FloatRoundMode old_mode = get_float_rounding_mode(&env->fp_status);
+
+ fd = float64_to_int64_round_to_zero(fj, &env->fp_status);
+ set_float_rounding_mode(old_mode, &env->fp_status);
+
+ if (get_float_exception_flags(&env->fp_status) & (float_flag_invalid)) {
+ if (float64_is_any_nan(fj)) {
+ fd = 0;
+ }
+ }
+ update_fcsr0(env, GETPC());
+ return fd;
+}
+
+uint64_t helper_ftintrz_l_s(CPULoongArchState *env, uint64_t fj)
+{
+ uint64_t fd;
+ FloatRoundMode old_mode = get_float_rounding_mode(&env->fp_status);
+
+ fd = float32_to_int64_round_to_zero((uint32_t)fj, &env->fp_status);
+ set_float_rounding_mode(old_mode, &env->fp_status);
+
+ if (get_float_exception_flags(&env->fp_status) & (float_flag_invalid)) {
+ if (float32_is_any_nan((uint32_t)fj)) {
+ fd = 0;
+ }
+ }
+ update_fcsr0(env, GETPC());
+ return fd;
+}
+
+uint64_t helper_ftintrz_w_d(CPULoongArchState *env, uint64_t fj)
+{
+ uint64_t fd;
+ FloatRoundMode old_mode = get_float_rounding_mode(&env->fp_status);
+
+ fd = (uint64_t)float64_to_int32_round_to_zero(fj, &env->fp_status);
+ set_float_rounding_mode(old_mode, &env->fp_status);
+
+ if (get_float_exception_flags(&env->fp_status) & (float_flag_invalid)) {
+ if (float64_is_any_nan(fj)) {
+ fd = 0;
+ }
+ }
+ update_fcsr0(env, GETPC());
+ return fd;
+}
+
+uint64_t helper_ftintrz_w_s(CPULoongArchState *env, uint64_t fj)
+{
+ uint32_t fd;
+ FloatRoundMode old_mode = get_float_rounding_mode(&env->fp_status);
+
+ fd = float32_to_int32_round_to_zero((uint32_t)fj, &env->fp_status);
+ set_float_rounding_mode(old_mode, &env->fp_status);
+
+ if (get_float_exception_flags(&env->fp_status) & (float_flag_invalid)) {
+ if (float32_is_any_nan((uint32_t)fj)) {
+ fd = 0;
+ }
+ }
+ update_fcsr0(env, GETPC());
+ return (uint64_t)fd;
+}
+
+uint64_t helper_ftintrne_l_d(CPULoongArchState *env, uint64_t fj)
+{
+ uint64_t fd;
+ FloatRoundMode old_mode = get_float_rounding_mode(&env->fp_status);
+
+ set_float_rounding_mode(float_round_nearest_even, &env->fp_status);
+ fd = float64_to_int64(fj, &env->fp_status);
+ set_float_rounding_mode(old_mode, &env->fp_status);
+
+ if (get_float_exception_flags(&env->fp_status) & (float_flag_invalid)) {
+ if (float64_is_any_nan(fj)) {
+ fd = 0;
+ }
+ }
+ update_fcsr0(env, GETPC());
+ return fd;
+}
+
+uint64_t helper_ftintrne_l_s(CPULoongArchState *env, uint64_t fj)
+{
+ uint64_t fd;
+ FloatRoundMode old_mode = get_float_rounding_mode(&env->fp_status);
+
+ set_float_rounding_mode(float_round_nearest_even, &env->fp_status);
+ fd = float32_to_int64((uint32_t)fj, &env->fp_status);
+ set_float_rounding_mode(old_mode, &env->fp_status);
+
+ if (get_float_exception_flags(&env->fp_status) & (float_flag_invalid)) {
+ if (float32_is_any_nan((uint32_t)fj)) {
+ fd = 0;
+ }
+ }
+ update_fcsr0(env, GETPC());
+ return fd;
+}
+
+uint64_t helper_ftintrne_w_d(CPULoongArchState *env, uint64_t fj)
+{
+ uint64_t fd;
+ FloatRoundMode old_mode = get_float_rounding_mode(&env->fp_status);
+
+ set_float_rounding_mode(float_round_nearest_even, &env->fp_status);
+ fd = (uint64_t)float64_to_int32(fj, &env->fp_status);
+ set_float_rounding_mode(old_mode, &env->fp_status);
+
+ if (get_float_exception_flags(&env->fp_status) & (float_flag_invalid)) {
+ if (float64_is_any_nan(fj)) {
+ fd = 0;
+ }
+ }
+ update_fcsr0(env, GETPC());
+ return fd;
+}
+
+uint64_t helper_ftintrne_w_s(CPULoongArchState *env, uint64_t fj)
+{
+ uint32_t fd;
+ FloatRoundMode old_mode = get_float_rounding_mode(&env->fp_status);
+
+ set_float_rounding_mode(float_round_nearest_even, &env->fp_status);
+ fd = float32_to_int32((uint32_t)fj, &env->fp_status);
+ set_float_rounding_mode(old_mode, &env->fp_status);
+
+ if (get_float_exception_flags(&env->fp_status) & (float_flag_invalid)) {
+ if (float32_is_any_nan((uint32_t)fj)) {
+ fd = 0;
+ }
+ }
+ update_fcsr0(env, GETPC());
+ return (uint64_t)fd;
+}
+
+uint64_t helper_ftint_l_d(CPULoongArchState *env, uint64_t fj)
+{
+ uint64_t fd;
+
+ fd = float64_to_int64(fj, &env->fp_status);
+ if (get_float_exception_flags(&env->fp_status) & (float_flag_invalid)) {
+ if (float64_is_any_nan(fj)) {
+ fd = 0;
+ }
+ }
+ update_fcsr0(env, GETPC());
+ return fd;
+}
+
+uint64_t helper_ftint_l_s(CPULoongArchState *env, uint64_t fj)
+{
+ uint64_t fd;
+
+ fd = float32_to_int64((uint32_t)fj, &env->fp_status);
+ if (get_float_exception_flags(&env->fp_status) & (float_flag_invalid)) {
+ if (float32_is_any_nan((uint32_t)fj)) {
+ fd = 0;
+ }
+ }
+ update_fcsr0(env, GETPC());
+ return fd;
+}
+
+uint64_t helper_ftint_w_s(CPULoongArchState *env, uint64_t fj)
+{
+ uint64_t fd;
+
+ fd = (uint64_t)float32_to_int32((uint32_t)fj, &env->fp_status);
+ if (get_float_exception_flags(&env->fp_status) & (float_flag_invalid)) {
+ if (float32_is_any_nan((uint32_t)fj)) {
+ fd = 0;
+ }
+ }
+ update_fcsr0(env, GETPC());
+ return fd;
+}
+
+uint64_t helper_ftint_w_d(CPULoongArchState *env, uint64_t fj)
+{
+ uint64_t fd;
+
+ fd = (uint64_t)float64_to_int32(fj, &env->fp_status);
+ if (get_float_exception_flags(&env->fp_status) & (float_flag_invalid)) {
+ if (float64_is_any_nan(fj)) {
+ fd = 0;
+ }
+ }
+ update_fcsr0(env, GETPC());
+ return fd;
+}
+
+void helper_set_rounding_mode(CPULoongArchState *env)
+{
+ set_float_rounding_mode(ieee_rm[(env->fcsr0 >> FCSR0_RM) & 0x3],
+ &env->fp_status);
+}
--- /dev/null
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * Copyright (c) 2021 Loongson Technology Corporation Limited
+ */
+
+static bool gen_rrr(DisasContext *ctx, arg_rrr *a,
+ DisasExtend src1_ext, DisasExtend src2_ext,
+ DisasExtend dst_ext, void (*func)(TCGv, TCGv, TCGv))
+{
+ TCGv dest = gpr_dst(ctx, a->rd, dst_ext);
+ TCGv src1 = gpr_src(ctx, a->rj, src1_ext);
+ TCGv src2 = gpr_src(ctx, a->rk, src2_ext);
+
+ func(dest, src1, src2);
+ gen_set_gpr(a->rd, dest, dst_ext);
+
+ return true;
+}
+
+static bool gen_rri_v(DisasContext *ctx, arg_rr_i *a,
+ DisasExtend src_ext, DisasExtend dst_ext,
+ void (*func)(TCGv, TCGv, TCGv))
+{
+ TCGv dest = gpr_dst(ctx, a->rd, dst_ext);
+ TCGv src1 = gpr_src(ctx, a->rj, src_ext);
+ TCGv src2 = tcg_constant_tl(a->imm);
+
+ func(dest, src1, src2);
+ gen_set_gpr(a->rd, dest, dst_ext);
+
+ return true;
+}
+
+static bool gen_rri_c(DisasContext *ctx, arg_rr_i *a,
+ DisasExtend src_ext, DisasExtend dst_ext,
+ void (*func)(TCGv, TCGv, target_long))
+{
+ TCGv dest = gpr_dst(ctx, a->rd, dst_ext);
+ TCGv src1 = gpr_src(ctx, a->rj, src_ext);
+
+ func(dest, src1, a->imm);
+ gen_set_gpr(a->rd, dest, dst_ext);
+
+ return true;
+}
+
+static bool gen_rrr_sa(DisasContext *ctx, arg_rrr_sa *a,
+ DisasExtend src_ext, DisasExtend dst_ext,
+ void (*func)(TCGv, TCGv, TCGv, target_long))
+{
+ TCGv dest = gpr_dst(ctx, a->rd, dst_ext);
+ TCGv src1 = gpr_src(ctx, a->rj, src_ext);
+ TCGv src2 = gpr_src(ctx, a->rk, src_ext);
+
+ func(dest, src1, src2, a->sa);
+ gen_set_gpr(a->rd, dest, dst_ext);
+
+ return true;
+}
+
+static bool trans_lu12i_w(DisasContext *ctx, arg_lu12i_w *a)
+{
+ TCGv dest = gpr_dst(ctx, a->rd, EXT_NONE);
+
+ tcg_gen_movi_tl(dest, a->imm << 12);
+ gen_set_gpr(a->rd, dest, EXT_NONE);
+
+ return true;
+}
+
+static bool gen_pc(DisasContext *ctx, arg_r_i *a,
+ target_ulong (*func)(target_ulong, int))
+{
+ TCGv dest = gpr_dst(ctx, a->rd, EXT_NONE);
+ target_ulong addr = make_address_pc(ctx, func(ctx->base.pc_next, a->imm));
+
+ tcg_gen_movi_tl(dest, addr);
+ gen_set_gpr(a->rd, dest, EXT_NONE);
+
+ return true;
+}
+
+static void gen_slt(TCGv dest, TCGv src1, TCGv src2)
+{
+ tcg_gen_setcond_tl(TCG_COND_LT, dest, src1, src2);
+}
+
+static void gen_sltu(TCGv dest, TCGv src1, TCGv src2)
+{
+ tcg_gen_setcond_tl(TCG_COND_LTU, dest, src1, src2);
+}
+
+static void gen_mulh_w(TCGv dest, TCGv src1, TCGv src2)
+{
+ tcg_gen_mul_i64(dest, src1, src2);
+ tcg_gen_sari_i64(dest, dest, 32);
+}
+
+static void gen_mulh_d(TCGv dest, TCGv src1, TCGv src2)
+{
+ TCGv discard = tcg_temp_new();
+ tcg_gen_muls2_tl(discard, dest, src1, src2);
+}
+
+static void gen_mulh_du(TCGv dest, TCGv src1, TCGv src2)
+{
+ TCGv discard = tcg_temp_new();
+ tcg_gen_mulu2_tl(discard, dest, src1, src2);
+}
+
+static void prep_divisor_d(TCGv ret, TCGv src1, TCGv src2)
+{
+ TCGv t0 = tcg_temp_new();
+ TCGv t1 = tcg_temp_new();
+ TCGv zero = tcg_constant_tl(0);
+
+ /*
+ * If min / -1, set the divisor to 1.
+ * This avoids potential host overflow trap and produces min.
+ * If x / 0, set the divisor to 1.
+ * This avoids potential host overflow trap;
+ * the required result is undefined.
+ */
+ tcg_gen_setcondi_tl(TCG_COND_EQ, ret, src1, INT64_MIN);
+ tcg_gen_setcondi_tl(TCG_COND_EQ, t0, src2, -1);
+ tcg_gen_setcondi_tl(TCG_COND_EQ, t1, src2, 0);
+ tcg_gen_and_tl(ret, ret, t0);
+ tcg_gen_or_tl(ret, ret, t1);
+ tcg_gen_movcond_tl(TCG_COND_NE, ret, ret, zero, ret, src2);
+}
+
+static void prep_divisor_du(TCGv ret, TCGv src2)
+{
+ TCGv zero = tcg_constant_tl(0);
+ TCGv one = tcg_constant_tl(1);
+
+ /*
+ * If x / 0, set the divisor to 1.
+ * This avoids potential host overflow trap;
+ * the required result is undefined.
+ */
+ tcg_gen_movcond_tl(TCG_COND_EQ, ret, src2, zero, one, src2);
+}
+
+static void gen_div_d(TCGv dest, TCGv src1, TCGv src2)
+{
+ TCGv t0 = tcg_temp_new();
+ prep_divisor_d(t0, src1, src2);
+ tcg_gen_div_tl(dest, src1, t0);
+}
+
+static void gen_rem_d(TCGv dest, TCGv src1, TCGv src2)
+{
+ TCGv t0 = tcg_temp_new();
+ prep_divisor_d(t0, src1, src2);
+ tcg_gen_rem_tl(dest, src1, t0);
+}
+
+static void gen_div_du(TCGv dest, TCGv src1, TCGv src2)
+{
+ TCGv t0 = tcg_temp_new();
+ prep_divisor_du(t0, src2);
+ tcg_gen_divu_tl(dest, src1, t0);
+}
+
+static void gen_rem_du(TCGv dest, TCGv src1, TCGv src2)
+{
+ TCGv t0 = tcg_temp_new();
+ prep_divisor_du(t0, src2);
+ tcg_gen_remu_tl(dest, src1, t0);
+}
+
+static void gen_div_w(TCGv dest, TCGv src1, TCGv src2)
+{
+ TCGv t0 = tcg_temp_new();
+ /* We need not check for integer overflow for div_w. */
+ prep_divisor_du(t0, src2);
+ tcg_gen_div_tl(dest, src1, t0);
+}
+
+static void gen_rem_w(TCGv dest, TCGv src1, TCGv src2)
+{
+ TCGv t0 = tcg_temp_new();
+ /* We need not check for integer overflow for rem_w. */
+ prep_divisor_du(t0, src2);
+ tcg_gen_rem_tl(dest, src1, t0);
+}
+
+static void gen_alsl(TCGv dest, TCGv src1, TCGv src2, target_long sa)
+{
+ TCGv t0 = tcg_temp_new();
+ tcg_gen_shli_tl(t0, src1, sa);
+ tcg_gen_add_tl(dest, t0, src2);
+}
+
+static bool trans_lu32i_d(DisasContext *ctx, arg_lu32i_d *a)
+{
+ TCGv dest = gpr_dst(ctx, a->rd, EXT_NONE);
+ TCGv src1 = gpr_src(ctx, a->rd, EXT_NONE);
+ TCGv src2 = tcg_constant_tl(a->imm);
+
+ if (!avail_64(ctx)) {
+ return false;
+ }
+
+ tcg_gen_deposit_tl(dest, src1, src2, 32, 32);
+ gen_set_gpr(a->rd, dest, EXT_NONE);
+
+ return true;
+}
+
+static bool trans_lu52i_d(DisasContext *ctx, arg_lu52i_d *a)
+{
+ TCGv dest = gpr_dst(ctx, a->rd, EXT_NONE);
+ TCGv src1 = gpr_src(ctx, a->rj, EXT_NONE);
+ TCGv src2 = tcg_constant_tl(a->imm);
+
+ if (!avail_64(ctx)) {
+ return false;
+ }
+
+ tcg_gen_deposit_tl(dest, src1, src2, 52, 12);
+ gen_set_gpr(a->rd, dest, EXT_NONE);
+
+ return true;
+}
+
+static target_ulong gen_pcaddi(target_ulong pc, int imm)
+{
+ return pc + (imm << 2);
+}
+
+static target_ulong gen_pcalau12i(target_ulong pc, int imm)
+{
+ return (pc + (imm << 12)) & ~0xfff;
+}
+
+static target_ulong gen_pcaddu12i(target_ulong pc, int imm)
+{
+ return pc + (imm << 12);
+}
+
+static target_ulong gen_pcaddu18i(target_ulong pc, int imm)
+{
+ return pc + ((target_ulong)(imm) << 18);
+}
+
+static bool trans_addu16i_d(DisasContext *ctx, arg_addu16i_d *a)
+{
+ TCGv dest = gpr_dst(ctx, a->rd, EXT_NONE);
+ TCGv src1 = gpr_src(ctx, a->rj, EXT_NONE);
+
+ if (!avail_64(ctx)) {
+ return false;
+ }
+
+ tcg_gen_addi_tl(dest, src1, a->imm << 16);
+ gen_set_gpr(a->rd, dest, EXT_NONE);
+
+ return true;
+}
+
+TRANS(add_w, ALL, gen_rrr, EXT_NONE, EXT_NONE, EXT_SIGN, tcg_gen_add_tl)
+TRANS(add_d, 64, gen_rrr, EXT_NONE, EXT_NONE, EXT_NONE, tcg_gen_add_tl)
+TRANS(sub_w, ALL, gen_rrr, EXT_NONE, EXT_NONE, EXT_SIGN, tcg_gen_sub_tl)
+TRANS(sub_d, 64, gen_rrr, EXT_NONE, EXT_NONE, EXT_NONE, tcg_gen_sub_tl)
+TRANS(and, ALL, gen_rrr, EXT_NONE, EXT_NONE, EXT_NONE, tcg_gen_and_tl)
+TRANS(or, ALL, gen_rrr, EXT_NONE, EXT_NONE, EXT_NONE, tcg_gen_or_tl)
+TRANS(xor, ALL, gen_rrr, EXT_NONE, EXT_NONE, EXT_NONE, tcg_gen_xor_tl)
+TRANS(nor, ALL, gen_rrr, EXT_NONE, EXT_NONE, EXT_NONE, tcg_gen_nor_tl)
+TRANS(andn, ALL, gen_rrr, EXT_NONE, EXT_NONE, EXT_NONE, tcg_gen_andc_tl)
+TRANS(orn, ALL, gen_rrr, EXT_NONE, EXT_NONE, EXT_NONE, tcg_gen_orc_tl)
+TRANS(slt, ALL, gen_rrr, EXT_NONE, EXT_NONE, EXT_NONE, gen_slt)
+TRANS(sltu, ALL, gen_rrr, EXT_NONE, EXT_NONE, EXT_NONE, gen_sltu)
+TRANS(mul_w, ALL, gen_rrr, EXT_SIGN, EXT_SIGN, EXT_SIGN, tcg_gen_mul_tl)
+TRANS(mul_d, 64, gen_rrr, EXT_NONE, EXT_NONE, EXT_NONE, tcg_gen_mul_tl)
+TRANS(mulh_w, ALL, gen_rrr, EXT_SIGN, EXT_SIGN, EXT_NONE, gen_mulh_w)
+TRANS(mulh_wu, ALL, gen_rrr, EXT_ZERO, EXT_ZERO, EXT_NONE, gen_mulh_w)
+TRANS(mulh_d, 64, gen_rrr, EXT_NONE, EXT_NONE, EXT_NONE, gen_mulh_d)
+TRANS(mulh_du, 64, gen_rrr, EXT_NONE, EXT_NONE, EXT_NONE, gen_mulh_du)
+TRANS(mulw_d_w, 64, gen_rrr, EXT_SIGN, EXT_SIGN, EXT_NONE, tcg_gen_mul_tl)
+TRANS(mulw_d_wu, 64, gen_rrr, EXT_ZERO, EXT_ZERO, EXT_NONE, tcg_gen_mul_tl)
+TRANS(div_w, ALL, gen_rrr, EXT_SIGN, EXT_SIGN, EXT_SIGN, gen_div_w)
+TRANS(mod_w, ALL, gen_rrr, EXT_SIGN, EXT_SIGN, EXT_SIGN, gen_rem_w)
+TRANS(div_wu, ALL, gen_rrr, EXT_ZERO, EXT_ZERO, EXT_SIGN, gen_div_du)
+TRANS(mod_wu, ALL, gen_rrr, EXT_ZERO, EXT_ZERO, EXT_SIGN, gen_rem_du)
+TRANS(div_d, 64, gen_rrr, EXT_NONE, EXT_NONE, EXT_NONE, gen_div_d)
+TRANS(mod_d, 64, gen_rrr, EXT_NONE, EXT_NONE, EXT_NONE, gen_rem_d)
+TRANS(div_du, 64, gen_rrr, EXT_NONE, EXT_NONE, EXT_NONE, gen_div_du)
+TRANS(mod_du, 64, gen_rrr, EXT_NONE, EXT_NONE, EXT_NONE, gen_rem_du)
+TRANS(slti, ALL, gen_rri_v, EXT_NONE, EXT_NONE, gen_slt)
+TRANS(sltui, ALL, gen_rri_v, EXT_NONE, EXT_NONE, gen_sltu)
+TRANS(addi_w, ALL, gen_rri_c, EXT_NONE, EXT_SIGN, tcg_gen_addi_tl)
+TRANS(addi_d, 64, gen_rri_c, EXT_NONE, EXT_NONE, tcg_gen_addi_tl)
+TRANS(alsl_w, ALL, gen_rrr_sa, EXT_NONE, EXT_SIGN, gen_alsl)
+TRANS(alsl_wu, 64, gen_rrr_sa, EXT_NONE, EXT_ZERO, gen_alsl)
+TRANS(alsl_d, 64, gen_rrr_sa, EXT_NONE, EXT_NONE, gen_alsl)
+TRANS(pcaddi, ALL, gen_pc, gen_pcaddi)
+TRANS(pcalau12i, ALL, gen_pc, gen_pcalau12i)
+TRANS(pcaddu12i, ALL, gen_pc, gen_pcaddu12i)
+TRANS(pcaddu18i, 64, gen_pc, gen_pcaddu18i)
+TRANS(andi, ALL, gen_rri_c, EXT_NONE, EXT_NONE, tcg_gen_andi_tl)
+TRANS(ori, ALL, gen_rri_c, EXT_NONE, EXT_NONE, tcg_gen_ori_tl)
+TRANS(xori, ALL, gen_rri_c, EXT_NONE, EXT_NONE, tcg_gen_xori_tl)
--- /dev/null
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * Copyright (c) 2021 Loongson Technology Corporation Limited
+ */
+
+static bool gen_ll(DisasContext *ctx, arg_rr_i *a, MemOp mop)
+{
+ TCGv dest = gpr_dst(ctx, a->rd, EXT_NONE);
+ TCGv src1 = gpr_src(ctx, a->rj, EXT_NONE);
+ TCGv t0 = make_address_i(ctx, src1, a->imm);
+
+ tcg_gen_qemu_ld_i64(dest, t0, ctx->mem_idx, mop);
+ tcg_gen_st_tl(t0, tcg_env, offsetof(CPULoongArchState, lladdr));
+ tcg_gen_st_tl(dest, tcg_env, offsetof(CPULoongArchState, llval));
+ gen_set_gpr(a->rd, dest, EXT_NONE);
+
+ return true;
+}
+
+static bool gen_sc(DisasContext *ctx, arg_rr_i *a, MemOp mop)
+{
+ TCGv dest = gpr_dst(ctx, a->rd, EXT_NONE);
+ TCGv src1 = gpr_src(ctx, a->rj, EXT_NONE);
+ TCGv src2 = gpr_src(ctx, a->rd, EXT_NONE);
+ TCGv t0 = tcg_temp_new();
+ TCGv val = tcg_temp_new();
+
+ TCGLabel *l1 = gen_new_label();
+ TCGLabel *done = gen_new_label();
+
+ tcg_gen_addi_tl(t0, src1, a->imm);
+ tcg_gen_brcond_tl(TCG_COND_EQ, t0, cpu_lladdr, l1);
+ tcg_gen_movi_tl(dest, 0);
+ tcg_gen_br(done);
+
+ gen_set_label(l1);
+ tcg_gen_mov_tl(val, src2);
+ /* generate cmpxchg */
+ tcg_gen_atomic_cmpxchg_tl(t0, cpu_lladdr, cpu_llval,
+ val, ctx->mem_idx, mop);
+ tcg_gen_setcond_tl(TCG_COND_EQ, dest, t0, cpu_llval);
+ gen_set_label(done);
+ gen_set_gpr(a->rd, dest, EXT_NONE);
+
+ return true;
+}
+
+static bool gen_am(DisasContext *ctx, arg_rrr *a,
+ void (*func)(TCGv, TCGv, TCGv, TCGArg, MemOp),
+ MemOp mop)
+{
+ TCGv dest = gpr_dst(ctx, a->rd, EXT_NONE);
+ TCGv addr = gpr_src(ctx, a->rj, EXT_NONE);
+ TCGv val = gpr_src(ctx, a->rk, EXT_NONE);
+
+ if (a->rd != 0 && (a->rj == a->rd || a->rk == a->rd)) {
+ qemu_log_mask(LOG_GUEST_ERROR,
+ "Warning: source register overlaps destination register"
+ "in atomic insn at pc=0x" TARGET_FMT_lx "\n",
+ ctx->base.pc_next - 4);
+ return false;
+ }
+
+ addr = make_address_i(ctx, addr, 0);
+
+ func(dest, addr, val, ctx->mem_idx, mop);
+ gen_set_gpr(a->rd, dest, EXT_NONE);
+
+ return true;
+}
+
+TRANS(ll_w, ALL, gen_ll, MO_TESL)
+TRANS(sc_w, ALL, gen_sc, MO_TESL)
+TRANS(ll_d, 64, gen_ll, MO_TEUQ)
+TRANS(sc_d, 64, gen_sc, MO_TEUQ)
+TRANS(amswap_w, LAM, gen_am, tcg_gen_atomic_xchg_tl, MO_TESL)
+TRANS(amswap_d, LAM, gen_am, tcg_gen_atomic_xchg_tl, MO_TEUQ)
+TRANS(amadd_w, LAM, gen_am, tcg_gen_atomic_fetch_add_tl, MO_TESL)
+TRANS(amadd_d, LAM, gen_am, tcg_gen_atomic_fetch_add_tl, MO_TEUQ)
+TRANS(amand_w, LAM, gen_am, tcg_gen_atomic_fetch_and_tl, MO_TESL)
+TRANS(amand_d, LAM, gen_am, tcg_gen_atomic_fetch_and_tl, MO_TEUQ)
+TRANS(amor_w, LAM, gen_am, tcg_gen_atomic_fetch_or_tl, MO_TESL)
+TRANS(amor_d, LAM, gen_am, tcg_gen_atomic_fetch_or_tl, MO_TEUQ)
+TRANS(amxor_w, LAM, gen_am, tcg_gen_atomic_fetch_xor_tl, MO_TESL)
+TRANS(amxor_d, LAM, gen_am, tcg_gen_atomic_fetch_xor_tl, MO_TEUQ)
+TRANS(ammax_w, LAM, gen_am, tcg_gen_atomic_fetch_smax_tl, MO_TESL)
+TRANS(ammax_d, LAM, gen_am, tcg_gen_atomic_fetch_smax_tl, MO_TEUQ)
+TRANS(ammin_w, LAM, gen_am, tcg_gen_atomic_fetch_smin_tl, MO_TESL)
+TRANS(ammin_d, LAM, gen_am, tcg_gen_atomic_fetch_smin_tl, MO_TEUQ)
+TRANS(ammax_wu, LAM, gen_am, tcg_gen_atomic_fetch_umax_tl, MO_TESL)
+TRANS(ammax_du, LAM, gen_am, tcg_gen_atomic_fetch_umax_tl, MO_TEUQ)
+TRANS(ammin_wu, LAM, gen_am, tcg_gen_atomic_fetch_umin_tl, MO_TESL)
+TRANS(ammin_du, LAM, gen_am, tcg_gen_atomic_fetch_umin_tl, MO_TEUQ)
+TRANS(amswap_db_w, LAM, gen_am, tcg_gen_atomic_xchg_tl, MO_TESL)
+TRANS(amswap_db_d, LAM, gen_am, tcg_gen_atomic_xchg_tl, MO_TEUQ)
+TRANS(amadd_db_w, LAM, gen_am, tcg_gen_atomic_fetch_add_tl, MO_TESL)
+TRANS(amadd_db_d, LAM, gen_am, tcg_gen_atomic_fetch_add_tl, MO_TEUQ)
+TRANS(amand_db_w, LAM, gen_am, tcg_gen_atomic_fetch_and_tl, MO_TESL)
+TRANS(amand_db_d, LAM, gen_am, tcg_gen_atomic_fetch_and_tl, MO_TEUQ)
+TRANS(amor_db_w, LAM, gen_am, tcg_gen_atomic_fetch_or_tl, MO_TESL)
+TRANS(amor_db_d, LAM, gen_am, tcg_gen_atomic_fetch_or_tl, MO_TEUQ)
+TRANS(amxor_db_w, LAM, gen_am, tcg_gen_atomic_fetch_xor_tl, MO_TESL)
+TRANS(amxor_db_d, LAM, gen_am, tcg_gen_atomic_fetch_xor_tl, MO_TEUQ)
+TRANS(ammax_db_w, LAM, gen_am, tcg_gen_atomic_fetch_smax_tl, MO_TESL)
+TRANS(ammax_db_d, LAM, gen_am, tcg_gen_atomic_fetch_smax_tl, MO_TEUQ)
+TRANS(ammin_db_w, LAM, gen_am, tcg_gen_atomic_fetch_smin_tl, MO_TESL)
+TRANS(ammin_db_d, LAM, gen_am, tcg_gen_atomic_fetch_smin_tl, MO_TEUQ)
+TRANS(ammax_db_wu, LAM, gen_am, tcg_gen_atomic_fetch_umax_tl, MO_TESL)
+TRANS(ammax_db_du, LAM, gen_am, tcg_gen_atomic_fetch_umax_tl, MO_TEUQ)
+TRANS(ammin_db_wu, LAM, gen_am, tcg_gen_atomic_fetch_umin_tl, MO_TESL)
+TRANS(ammin_db_du, LAM, gen_am, tcg_gen_atomic_fetch_umin_tl, MO_TEUQ)
--- /dev/null
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * Copyright (c) 2021 Loongson Technology Corporation Limited
+ */
+
+static bool gen_rr(DisasContext *ctx, arg_rr *a,
+ DisasExtend src_ext, DisasExtend dst_ext,
+ void (*func)(TCGv, TCGv))
+{
+ TCGv dest = gpr_dst(ctx, a->rd, dst_ext);
+ TCGv src1 = gpr_src(ctx, a->rj, src_ext);
+
+ func(dest, src1);
+ gen_set_gpr(a->rd, dest, dst_ext);
+
+ return true;
+}
+
+static void gen_bytepick_w(TCGv dest, TCGv src1, TCGv src2, target_long sa)
+{
+ tcg_gen_concat_tl_i64(dest, src1, src2);
+ tcg_gen_sextract_i64(dest, dest, (32 - sa * 8), 32);
+}
+
+static void gen_bytepick_d(TCGv dest, TCGv src1, TCGv src2, target_long sa)
+{
+ tcg_gen_extract2_i64(dest, src1, src2, (64 - sa * 8));
+}
+
+static bool gen_bstrins(DisasContext *ctx, arg_rr_ms_ls *a,
+ DisasExtend dst_ext)
+{
+ TCGv src1 = gpr_src(ctx, a->rd, EXT_NONE);
+ TCGv src2 = gpr_src(ctx, a->rj, EXT_NONE);
+ TCGv dest = gpr_dst(ctx, a->rd, EXT_NONE);
+
+ if (a->ls > a->ms) {
+ return false;
+ }
+
+ tcg_gen_deposit_tl(dest, src1, src2, a->ls, a->ms - a->ls + 1);
+ gen_set_gpr(a->rd, dest, dst_ext);
+ return true;
+}
+
+static bool gen_bstrpick(DisasContext *ctx, arg_rr_ms_ls *a,
+ DisasExtend dst_ext)
+{
+ TCGv dest = gpr_dst(ctx, a->rd, EXT_NONE);
+ TCGv src1 = gpr_src(ctx, a->rj, EXT_NONE);
+
+ if (a->ls > a->ms) {
+ return false;
+ }
+
+ tcg_gen_extract_tl(dest, src1, a->ls, a->ms - a->ls + 1);
+ gen_set_gpr(a->rd, dest, dst_ext);
+ return true;
+}
+
+static void gen_clz_w(TCGv dest, TCGv src1)
+{
+ tcg_gen_clzi_tl(dest, src1, TARGET_LONG_BITS);
+ tcg_gen_subi_tl(dest, dest, TARGET_LONG_BITS - 32);
+}
+
+static void gen_clo_w(TCGv dest, TCGv src1)
+{
+ tcg_gen_not_tl(dest, src1);
+ tcg_gen_ext32u_tl(dest, dest);
+ gen_clz_w(dest, dest);
+}
+
+static void gen_ctz_w(TCGv dest, TCGv src1)
+{
+ tcg_gen_ori_tl(dest, src1, (target_ulong)MAKE_64BIT_MASK(32, 32));
+ tcg_gen_ctzi_tl(dest, dest, TARGET_LONG_BITS);
+}
+
+static void gen_cto_w(TCGv dest, TCGv src1)
+{
+ tcg_gen_not_tl(dest, src1);
+ gen_ctz_w(dest, dest);
+}
+
+static void gen_clz_d(TCGv dest, TCGv src1)
+{
+ tcg_gen_clzi_i64(dest, src1, TARGET_LONG_BITS);
+}
+
+static void gen_clo_d(TCGv dest, TCGv src1)
+{
+ tcg_gen_not_tl(dest, src1);
+ gen_clz_d(dest, dest);
+}
+
+static void gen_ctz_d(TCGv dest, TCGv src1)
+{
+ tcg_gen_ctzi_tl(dest, src1, TARGET_LONG_BITS);
+}
+
+static void gen_cto_d(TCGv dest, TCGv src1)
+{
+ tcg_gen_not_tl(dest, src1);
+ gen_ctz_d(dest, dest);
+}
+
+static void gen_revb_2w(TCGv dest, TCGv src1)
+{
+ tcg_gen_bswap64_i64(dest, src1);
+ tcg_gen_rotri_i64(dest, dest, 32);
+}
+
+static void gen_revb_2h(TCGv dest, TCGv src1)
+{
+ TCGv mask = tcg_constant_tl(0x00FF00FF);
+ TCGv t0 = tcg_temp_new();
+ TCGv t1 = tcg_temp_new();
+
+ tcg_gen_shri_tl(t0, src1, 8);
+ tcg_gen_and_tl(t0, t0, mask);
+ tcg_gen_and_tl(t1, src1, mask);
+ tcg_gen_shli_tl(t1, t1, 8);
+ tcg_gen_or_tl(dest, t0, t1);
+}
+
+static void gen_revb_4h(TCGv dest, TCGv src1)
+{
+ TCGv mask = tcg_constant_tl(0x00FF00FF00FF00FFULL);
+ TCGv t0 = tcg_temp_new();
+ TCGv t1 = tcg_temp_new();
+
+ tcg_gen_shri_tl(t0, src1, 8);
+ tcg_gen_and_tl(t0, t0, mask);
+ tcg_gen_and_tl(t1, src1, mask);
+ tcg_gen_shli_tl(t1, t1, 8);
+ tcg_gen_or_tl(dest, t0, t1);
+}
+
+static void gen_revh_2w(TCGv dest, TCGv src1)
+{
+ TCGv_i64 t0 = tcg_temp_new_i64();
+ TCGv_i64 t1 = tcg_temp_new_i64();
+ TCGv_i64 mask = tcg_constant_i64(0x0000ffff0000ffffull);
+
+ tcg_gen_shri_i64(t0, src1, 16);
+ tcg_gen_and_i64(t1, src1, mask);
+ tcg_gen_and_i64(t0, t0, mask);
+ tcg_gen_shli_i64(t1, t1, 16);
+ tcg_gen_or_i64(dest, t1, t0);
+}
+
+static void gen_revh_d(TCGv dest, TCGv src1)
+{
+ TCGv t0 = tcg_temp_new();
+ TCGv t1 = tcg_temp_new();
+ TCGv mask = tcg_constant_tl(0x0000FFFF0000FFFFULL);
+
+ tcg_gen_shri_tl(t1, src1, 16);
+ tcg_gen_and_tl(t1, t1, mask);
+ tcg_gen_and_tl(t0, src1, mask);
+ tcg_gen_shli_tl(t0, t0, 16);
+ tcg_gen_or_tl(t0, t0, t1);
+ tcg_gen_rotri_tl(dest, t0, 32);
+}
+
+static void gen_maskeqz(TCGv dest, TCGv src1, TCGv src2)
+{
+ TCGv zero = tcg_constant_tl(0);
+
+ tcg_gen_movcond_tl(TCG_COND_EQ, dest, src2, zero, zero, src1);
+}
+
+static void gen_masknez(TCGv dest, TCGv src1, TCGv src2)
+{
+ TCGv zero = tcg_constant_tl(0);
+
+ tcg_gen_movcond_tl(TCG_COND_NE, dest, src2, zero, zero, src1);
+}
+
+TRANS(ext_w_h, ALL, gen_rr, EXT_NONE, EXT_NONE, tcg_gen_ext16s_tl)
+TRANS(ext_w_b, ALL, gen_rr, EXT_NONE, EXT_NONE, tcg_gen_ext8s_tl)
+TRANS(clo_w, ALL, gen_rr, EXT_NONE, EXT_NONE, gen_clo_w)
+TRANS(clz_w, ALL, gen_rr, EXT_ZERO, EXT_NONE, gen_clz_w)
+TRANS(cto_w, ALL, gen_rr, EXT_NONE, EXT_NONE, gen_cto_w)
+TRANS(ctz_w, ALL, gen_rr, EXT_NONE, EXT_NONE, gen_ctz_w)
+TRANS(clo_d, 64, gen_rr, EXT_NONE, EXT_NONE, gen_clo_d)
+TRANS(clz_d, 64, gen_rr, EXT_NONE, EXT_NONE, gen_clz_d)
+TRANS(cto_d, 64, gen_rr, EXT_NONE, EXT_NONE, gen_cto_d)
+TRANS(ctz_d, 64, gen_rr, EXT_NONE, EXT_NONE, gen_ctz_d)
+TRANS(revb_2h, ALL, gen_rr, EXT_NONE, EXT_SIGN, gen_revb_2h)
+TRANS(revb_4h, 64, gen_rr, EXT_NONE, EXT_NONE, gen_revb_4h)
+TRANS(revb_2w, 64, gen_rr, EXT_NONE, EXT_NONE, gen_revb_2w)
+TRANS(revb_d, 64, gen_rr, EXT_NONE, EXT_NONE, tcg_gen_bswap64_i64)
+TRANS(revh_2w, 64, gen_rr, EXT_NONE, EXT_NONE, gen_revh_2w)
+TRANS(revh_d, 64, gen_rr, EXT_NONE, EXT_NONE, gen_revh_d)
+TRANS(bitrev_4b, ALL, gen_rr, EXT_ZERO, EXT_SIGN, gen_helper_bitswap)
+TRANS(bitrev_8b, 64, gen_rr, EXT_NONE, EXT_NONE, gen_helper_bitswap)
+TRANS(bitrev_w, ALL, gen_rr, EXT_NONE, EXT_SIGN, gen_helper_bitrev_w)
+TRANS(bitrev_d, 64, gen_rr, EXT_NONE, EXT_NONE, gen_helper_bitrev_d)
+TRANS(maskeqz, ALL, gen_rrr, EXT_NONE, EXT_NONE, EXT_NONE, gen_maskeqz)
+TRANS(masknez, ALL, gen_rrr, EXT_NONE, EXT_NONE, EXT_NONE, gen_masknez)
+TRANS(bytepick_w, ALL, gen_rrr_sa, EXT_NONE, EXT_NONE, gen_bytepick_w)
+TRANS(bytepick_d, 64, gen_rrr_sa, EXT_NONE, EXT_NONE, gen_bytepick_d)
+TRANS(bstrins_w, ALL, gen_bstrins, EXT_SIGN)
+TRANS(bstrins_d, 64, gen_bstrins, EXT_NONE)
+TRANS(bstrpick_w, ALL, gen_bstrpick, EXT_SIGN)
+TRANS(bstrpick_d, 64, gen_bstrpick, EXT_NONE)
--- /dev/null
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * Copyright (c) 2021 Loongson Technology Corporation Limited
+ */
+
+static bool trans_b(DisasContext *ctx, arg_b *a)
+{
+ gen_goto_tb(ctx, 0, ctx->base.pc_next + a->offs);
+ ctx->base.is_jmp = DISAS_NORETURN;
+ return true;
+}
+
+static bool trans_bl(DisasContext *ctx, arg_bl *a)
+{
+ tcg_gen_movi_tl(cpu_gpr[1], make_address_pc(ctx, ctx->base.pc_next + 4));
+ gen_goto_tb(ctx, 0, ctx->base.pc_next + a->offs);
+ ctx->base.is_jmp = DISAS_NORETURN;
+ return true;
+}
+
+static bool trans_jirl(DisasContext *ctx, arg_jirl *a)
+{
+ TCGv dest = gpr_dst(ctx, a->rd, EXT_NONE);
+ TCGv src1 = gpr_src(ctx, a->rj, EXT_NONE);
+
+ TCGv addr = make_address_i(ctx, src1, a->imm);
+ tcg_gen_mov_tl(cpu_pc, addr);
+ tcg_gen_movi_tl(dest, make_address_pc(ctx, ctx->base.pc_next + 4));
+ gen_set_gpr(a->rd, dest, EXT_NONE);
+ tcg_gen_lookup_and_goto_ptr();
+ ctx->base.is_jmp = DISAS_NORETURN;
+ return true;
+}
+
+static void gen_bc(DisasContext *ctx, TCGv src1, TCGv src2,
+ target_long offs, TCGCond cond)
+{
+ TCGLabel *l = gen_new_label();
+ tcg_gen_brcond_tl(cond, src1, src2, l);
+ gen_goto_tb(ctx, 1, ctx->base.pc_next + 4);
+ gen_set_label(l);
+ gen_goto_tb(ctx, 0, ctx->base.pc_next + offs);
+ ctx->base.is_jmp = DISAS_NORETURN;
+}
+
+static bool gen_rr_bc(DisasContext *ctx, arg_rr_offs *a, TCGCond cond)
+{
+ TCGv src1 = gpr_src(ctx, a->rj, EXT_NONE);
+ TCGv src2 = gpr_src(ctx, a->rd, EXT_NONE);
+
+ gen_bc(ctx, src1, src2, a->offs, cond);
+ return true;
+}
+
+static bool gen_rz_bc(DisasContext *ctx, arg_r_offs *a, TCGCond cond)
+{
+ TCGv src1 = gpr_src(ctx, a->rj, EXT_NONE);
+ TCGv src2 = tcg_constant_tl(0);
+
+ gen_bc(ctx, src1, src2, a->offs, cond);
+ return true;
+}
+
+static bool gen_cz_bc(DisasContext *ctx, arg_c_offs *a, TCGCond cond)
+{
+ TCGv src1 = tcg_temp_new();
+ TCGv src2 = tcg_constant_tl(0);
+
+ tcg_gen_ld8u_tl(src1, tcg_env,
+ offsetof(CPULoongArchState, cf[a->cj]));
+ gen_bc(ctx, src1, src2, a->offs, cond);
+ return true;
+}
+
+TRANS(beq, ALL, gen_rr_bc, TCG_COND_EQ)
+TRANS(bne, ALL, gen_rr_bc, TCG_COND_NE)
+TRANS(blt, ALL, gen_rr_bc, TCG_COND_LT)
+TRANS(bge, ALL, gen_rr_bc, TCG_COND_GE)
+TRANS(bltu, ALL, gen_rr_bc, TCG_COND_LTU)
+TRANS(bgeu, ALL, gen_rr_bc, TCG_COND_GEU)
+TRANS(beqz, ALL, gen_rz_bc, TCG_COND_EQ)
+TRANS(bnez, ALL, gen_rz_bc, TCG_COND_NE)
+TRANS(bceqz, 64, gen_cz_bc, TCG_COND_EQ)
+TRANS(bcnez, 64, gen_cz_bc, TCG_COND_NE)
--- /dev/null
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * Copyright (c) 2021 Loongson Technology Corporation Limited
+ */
+
+static bool trans_break(DisasContext *ctx, arg_break *a)
+{
+ generate_exception(ctx, EXCCODE_BRK);
+ return true;
+}
+
+static bool trans_syscall(DisasContext *ctx, arg_syscall *a)
+{
+ generate_exception(ctx, EXCCODE_SYS);
+ return true;
+}
+
+static bool trans_asrtle_d(DisasContext *ctx, arg_asrtle_d * a)
+{
+ TCGv src1 = gpr_src(ctx, a->rj, EXT_NONE);
+ TCGv src2 = gpr_src(ctx, a->rk, EXT_NONE);
+
+ if (!avail_64(ctx)) {
+ return false;
+ }
+
+ gen_helper_asrtle_d(tcg_env, src1, src2);
+ return true;
+}
+
+static bool trans_asrtgt_d(DisasContext *ctx, arg_asrtgt_d * a)
+{
+ TCGv src1 = gpr_src(ctx, a->rj, EXT_NONE);
+ TCGv src2 = gpr_src(ctx, a->rk, EXT_NONE);
+
+ if (!avail_64(ctx)) {
+ return false;
+ }
+
+ gen_helper_asrtgt_d(tcg_env, src1, src2);
+ return true;
+}
+
+static bool gen_rdtime(DisasContext *ctx, arg_rr *a,
+ bool word, bool high)
+{
+ TCGv dst1 = gpr_dst(ctx, a->rd, EXT_NONE);
+ TCGv dst2 = gpr_dst(ctx, a->rj, EXT_NONE);
+
+ translator_io_start(&ctx->base);
+ gen_helper_rdtime_d(dst1, tcg_env);
+ if (word) {
+ tcg_gen_sextract_tl(dst1, dst1, high ? 32 : 0, 32);
+ }
+ tcg_gen_ld_i64(dst2, tcg_env, offsetof(CPULoongArchState, CSR_TID));
+
+ return true;
+}
+
+static bool trans_rdtimel_w(DisasContext *ctx, arg_rdtimel_w *a)
+{
+ return gen_rdtime(ctx, a, 1, 0);
+}
+
+static bool trans_rdtimeh_w(DisasContext *ctx, arg_rdtimeh_w *a)
+{
+ return gen_rdtime(ctx, a, 1, 1);
+}
+
+static bool trans_rdtime_d(DisasContext *ctx, arg_rdtime_d *a)
+{
+ return gen_rdtime(ctx, a, 0, 0);
+}
+
+static bool trans_cpucfg(DisasContext *ctx, arg_cpucfg *a)
+{
+ TCGv dest = gpr_dst(ctx, a->rd, EXT_NONE);
+ TCGv src1 = gpr_src(ctx, a->rj, EXT_NONE);
+
+ gen_helper_cpucfg(dest, tcg_env, src1);
+ gen_set_gpr(a->rd, dest, EXT_NONE);
+
+ return true;
+}
+
+static bool gen_crc(DisasContext *ctx, arg_rrr *a,
+ void (*func)(TCGv, TCGv, TCGv, TCGv),
+ TCGv tsz)
+{
+ TCGv dest = gpr_dst(ctx, a->rd, EXT_SIGN);
+ TCGv src1 = gpr_src(ctx, a->rj, EXT_NONE);
+ TCGv src2 = gpr_src(ctx, a->rk, EXT_NONE);
+
+ func(dest, src2, src1, tsz);
+ gen_set_gpr(a->rd, dest, EXT_SIGN);
+
+ return true;
+}
+
+TRANS(crc_w_b_w, 64, gen_crc, gen_helper_crc32, tcg_constant_tl(1))
+TRANS(crc_w_h_w, 64, gen_crc, gen_helper_crc32, tcg_constant_tl(2))
+TRANS(crc_w_w_w, 64, gen_crc, gen_helper_crc32, tcg_constant_tl(4))
+TRANS(crc_w_d_w, 64, gen_crc, gen_helper_crc32, tcg_constant_tl(8))
+TRANS(crcc_w_b_w, 64, gen_crc, gen_helper_crc32c, tcg_constant_tl(1))
+TRANS(crcc_w_h_w, 64, gen_crc, gen_helper_crc32c, tcg_constant_tl(2))
+TRANS(crcc_w_w_w, 64, gen_crc, gen_helper_crc32c, tcg_constant_tl(4))
+TRANS(crcc_w_d_w, 64, gen_crc, gen_helper_crc32c, tcg_constant_tl(8))
--- /dev/null
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * Copyright (c) 2021 Loongson Technology Corporation Limited
+ */
+
+#ifndef CONFIG_USER_ONLY
+#define CHECK_FPE do { \
+ if ((ctx->base.tb->flags & HW_FLAGS_EUEN_FPE) == 0) { \
+ generate_exception(ctx, EXCCODE_FPD); \
+ return true; \
+ } \
+} while (0)
+#else
+#define CHECK_FPE
+#endif
+
+static bool gen_fff(DisasContext *ctx, arg_fff *a,
+ void (*func)(TCGv, TCGv_env, TCGv, TCGv))
+{
+ TCGv dest = get_fpr(ctx, a->fd);
+ TCGv src1 = get_fpr(ctx, a->fj);
+ TCGv src2 = get_fpr(ctx, a->fk);
+
+ CHECK_FPE;
+
+ func(dest, tcg_env, src1, src2);
+ set_fpr(a->fd, dest);
+
+ return true;
+}
+
+static bool gen_ff(DisasContext *ctx, arg_ff *a,
+ void (*func)(TCGv, TCGv_env, TCGv))
+{
+ TCGv dest = get_fpr(ctx, a->fd);
+ TCGv src = get_fpr(ctx, a->fj);
+
+ CHECK_FPE;
+
+ func(dest, tcg_env, src);
+ set_fpr(a->fd, dest);
+
+ return true;
+}
+
+static bool gen_muladd(DisasContext *ctx, arg_ffff *a,
+ void (*func)(TCGv, TCGv_env, TCGv, TCGv, TCGv, TCGv_i32),
+ int flag)
+{
+ TCGv_i32 tflag = tcg_constant_i32(flag);
+ TCGv dest = get_fpr(ctx, a->fd);
+ TCGv src1 = get_fpr(ctx, a->fj);
+ TCGv src2 = get_fpr(ctx, a->fk);
+ TCGv src3 = get_fpr(ctx, a->fa);
+
+ CHECK_FPE;
+
+ func(dest, tcg_env, src1, src2, src3, tflag);
+ set_fpr(a->fd, dest);
+
+ return true;
+}
+
+static bool trans_fcopysign_s(DisasContext *ctx, arg_fcopysign_s *a)
+{
+ TCGv dest = get_fpr(ctx, a->fd);
+ TCGv src1 = get_fpr(ctx, a->fk);
+ TCGv src2 = get_fpr(ctx, a->fj);
+
+ if (!avail_FP_SP(ctx)) {
+ return false;
+ }
+
+ CHECK_FPE;
+
+ tcg_gen_deposit_i64(dest, src1, src2, 0, 31);
+ set_fpr(a->fd, dest);
+
+ return true;
+}
+
+static bool trans_fcopysign_d(DisasContext *ctx, arg_fcopysign_d *a)
+{
+ TCGv dest = get_fpr(ctx, a->fd);
+ TCGv src1 = get_fpr(ctx, a->fk);
+ TCGv src2 = get_fpr(ctx, a->fj);
+
+ if (!avail_FP_DP(ctx)) {
+ return false;
+ }
+
+ CHECK_FPE;
+
+ tcg_gen_deposit_i64(dest, src1, src2, 0, 63);
+ set_fpr(a->fd, dest);
+
+ return true;
+}
+
+static bool trans_fabs_s(DisasContext *ctx, arg_fabs_s *a)
+{
+ TCGv dest = get_fpr(ctx, a->fd);
+ TCGv src = get_fpr(ctx, a->fj);
+
+ if (!avail_FP_SP(ctx)) {
+ return false;
+ }
+
+ CHECK_FPE;
+
+ tcg_gen_andi_i64(dest, src, MAKE_64BIT_MASK(0, 31));
+ gen_nanbox_s(dest, dest);
+ set_fpr(a->fd, dest);
+
+ return true;
+}
+
+static bool trans_fabs_d(DisasContext *ctx, arg_fabs_d *a)
+{
+ TCGv dest = get_fpr(ctx, a->fd);
+ TCGv src = get_fpr(ctx, a->fj);
+
+ if (!avail_FP_DP(ctx)) {
+ return false;
+ }
+
+ CHECK_FPE;
+
+ tcg_gen_andi_i64(dest, src, MAKE_64BIT_MASK(0, 63));
+ set_fpr(a->fd, dest);
+
+ return true;
+}
+
+static bool trans_fneg_s(DisasContext *ctx, arg_fneg_s *a)
+{
+ TCGv dest = get_fpr(ctx, a->fd);
+ TCGv src = get_fpr(ctx, a->fj);
+
+ if (!avail_FP_SP(ctx)) {
+ return false;
+ }
+
+ CHECK_FPE;
+
+ tcg_gen_xori_i64(dest, src, 0x80000000);
+ gen_nanbox_s(dest, dest);
+ set_fpr(a->fd, dest);
+
+ return true;
+}
+
+static bool trans_fneg_d(DisasContext *ctx, arg_fneg_d *a)
+{
+ TCGv dest = get_fpr(ctx, a->fd);
+ TCGv src = get_fpr(ctx, a->fj);
+
+ if (!avail_FP_DP(ctx)) {
+ return false;
+ }
+
+ CHECK_FPE;
+
+ tcg_gen_xori_i64(dest, src, 0x8000000000000000LL);
+ set_fpr(a->fd, dest);
+
+ return true;
+}
+
+TRANS(fadd_s, FP_SP, gen_fff, gen_helper_fadd_s)
+TRANS(fadd_d, FP_DP, gen_fff, gen_helper_fadd_d)
+TRANS(fsub_s, FP_SP, gen_fff, gen_helper_fsub_s)
+TRANS(fsub_d, FP_DP, gen_fff, gen_helper_fsub_d)
+TRANS(fmul_s, FP_SP, gen_fff, gen_helper_fmul_s)
+TRANS(fmul_d, FP_DP, gen_fff, gen_helper_fmul_d)
+TRANS(fdiv_s, FP_SP, gen_fff, gen_helper_fdiv_s)
+TRANS(fdiv_d, FP_DP, gen_fff, gen_helper_fdiv_d)
+TRANS(fmax_s, FP_SP, gen_fff, gen_helper_fmax_s)
+TRANS(fmax_d, FP_DP, gen_fff, gen_helper_fmax_d)
+TRANS(fmin_s, FP_SP, gen_fff, gen_helper_fmin_s)
+TRANS(fmin_d, FP_DP, gen_fff, gen_helper_fmin_d)
+TRANS(fmaxa_s, FP_SP, gen_fff, gen_helper_fmaxa_s)
+TRANS(fmaxa_d, FP_DP, gen_fff, gen_helper_fmaxa_d)
+TRANS(fmina_s, FP_SP, gen_fff, gen_helper_fmina_s)
+TRANS(fmina_d, FP_DP, gen_fff, gen_helper_fmina_d)
+TRANS(fscaleb_s, FP_SP, gen_fff, gen_helper_fscaleb_s)
+TRANS(fscaleb_d, FP_DP, gen_fff, gen_helper_fscaleb_d)
+TRANS(fsqrt_s, FP_SP, gen_ff, gen_helper_fsqrt_s)
+TRANS(fsqrt_d, FP_DP, gen_ff, gen_helper_fsqrt_d)
+TRANS(frecip_s, FP_SP, gen_ff, gen_helper_frecip_s)
+TRANS(frecip_d, FP_DP, gen_ff, gen_helper_frecip_d)
+TRANS(frsqrt_s, FP_SP, gen_ff, gen_helper_frsqrt_s)
+TRANS(frsqrt_d, FP_DP, gen_ff, gen_helper_frsqrt_d)
+TRANS(flogb_s, FP_SP, gen_ff, gen_helper_flogb_s)
+TRANS(flogb_d, FP_DP, gen_ff, gen_helper_flogb_d)
+TRANS(fclass_s, FP_SP, gen_ff, gen_helper_fclass_s)
+TRANS(fclass_d, FP_DP, gen_ff, gen_helper_fclass_d)
+TRANS(fmadd_s, FP_SP, gen_muladd, gen_helper_fmuladd_s, 0)
+TRANS(fmadd_d, FP_DP, gen_muladd, gen_helper_fmuladd_d, 0)
+TRANS(fmsub_s, FP_SP, gen_muladd, gen_helper_fmuladd_s, float_muladd_negate_c)
+TRANS(fmsub_d, FP_DP, gen_muladd, gen_helper_fmuladd_d, float_muladd_negate_c)
+TRANS(fnmadd_s, FP_SP, gen_muladd, gen_helper_fmuladd_s, float_muladd_negate_result)
+TRANS(fnmadd_d, FP_DP, gen_muladd, gen_helper_fmuladd_d, float_muladd_negate_result)
+TRANS(fnmsub_s, FP_SP, gen_muladd, gen_helper_fmuladd_s,
+ float_muladd_negate_c | float_muladd_negate_result)
+TRANS(fnmsub_d, FP_DP, gen_muladd, gen_helper_fmuladd_d,
+ float_muladd_negate_c | float_muladd_negate_result)
--- /dev/null
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * Copyright (c) 2021 Loongson Technology Corporation Limited
+ */
+
+/* bit0(signaling/quiet) bit1(lt) bit2(eq) bit3(un) bit4(neq) */
+static uint32_t get_fcmp_flags(int cond)
+{
+ uint32_t flags = 0;
+
+ if (cond & 0x1) {
+ flags |= FCMP_LT;
+ }
+ if (cond & 0x2) {
+ flags |= FCMP_EQ;
+ }
+ if (cond & 0x4) {
+ flags |= FCMP_UN;
+ }
+ if (cond & 0x8) {
+ flags |= FCMP_GT | FCMP_LT;
+ }
+ return flags;
+}
+
+static bool trans_fcmp_cond_s(DisasContext *ctx, arg_fcmp_cond_s *a)
+{
+ TCGv var, src1, src2;
+ uint32_t flags;
+ void (*fn)(TCGv, TCGv_env, TCGv, TCGv, TCGv_i32);
+
+ if (!avail_FP_SP(ctx)) {
+ return false;
+ }
+
+ CHECK_FPE;
+
+ var = tcg_temp_new();
+ src1 = get_fpr(ctx, a->fj);
+ src2 = get_fpr(ctx, a->fk);
+ fn = (a->fcond & 1 ? gen_helper_fcmp_s_s : gen_helper_fcmp_c_s);
+ flags = get_fcmp_flags(a->fcond >> 1);
+
+ fn(var, tcg_env, src1, src2, tcg_constant_i32(flags));
+
+ tcg_gen_st8_tl(var, tcg_env, offsetof(CPULoongArchState, cf[a->cd]));
+ return true;
+}
+
+static bool trans_fcmp_cond_d(DisasContext *ctx, arg_fcmp_cond_d *a)
+{
+ TCGv var, src1, src2;
+ uint32_t flags;
+ void (*fn)(TCGv, TCGv_env, TCGv, TCGv, TCGv_i32);
+
+ if (!avail_FP_DP(ctx)) {
+ return false;
+ }
+
+ CHECK_FPE;
+
+ var = tcg_temp_new();
+ src1 = get_fpr(ctx, a->fj);
+ src2 = get_fpr(ctx, a->fk);
+ fn = (a->fcond & 1 ? gen_helper_fcmp_s_d : gen_helper_fcmp_c_d);
+ flags = get_fcmp_flags(a->fcond >> 1);
+
+ fn(var, tcg_env, src1, src2, tcg_constant_i32(flags));
+
+ tcg_gen_st8_tl(var, tcg_env, offsetof(CPULoongArchState, cf[a->cd]));
+ return true;
+}
--- /dev/null
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * Copyright (c) 2021 Loongson Technology Corporation Limited
+ */
+
+TRANS(fcvt_s_d, FP_DP, gen_ff, gen_helper_fcvt_s_d)
+TRANS(fcvt_d_s, FP_DP, gen_ff, gen_helper_fcvt_d_s)
+TRANS(ftintrm_w_s, FP_SP, gen_ff, gen_helper_ftintrm_w_s)
+TRANS(ftintrm_w_d, FP_DP, gen_ff, gen_helper_ftintrm_w_d)
+TRANS(ftintrm_l_s, FP_SP, gen_ff, gen_helper_ftintrm_l_s)
+TRANS(ftintrm_l_d, FP_DP, gen_ff, gen_helper_ftintrm_l_d)
+TRANS(ftintrp_w_s, FP_SP, gen_ff, gen_helper_ftintrp_w_s)
+TRANS(ftintrp_w_d, FP_DP, gen_ff, gen_helper_ftintrp_w_d)
+TRANS(ftintrp_l_s, FP_SP, gen_ff, gen_helper_ftintrp_l_s)
+TRANS(ftintrp_l_d, FP_DP, gen_ff, gen_helper_ftintrp_l_d)
+TRANS(ftintrz_w_s, FP_SP, gen_ff, gen_helper_ftintrz_w_s)
+TRANS(ftintrz_w_d, FP_DP, gen_ff, gen_helper_ftintrz_w_d)
+TRANS(ftintrz_l_s, FP_SP, gen_ff, gen_helper_ftintrz_l_s)
+TRANS(ftintrz_l_d, FP_DP, gen_ff, gen_helper_ftintrz_l_d)
+TRANS(ftintrne_w_s, FP_SP, gen_ff, gen_helper_ftintrne_w_s)
+TRANS(ftintrne_w_d, FP_DP, gen_ff, gen_helper_ftintrne_w_d)
+TRANS(ftintrne_l_s, FP_SP, gen_ff, gen_helper_ftintrne_l_s)
+TRANS(ftintrne_l_d, FP_DP, gen_ff, gen_helper_ftintrne_l_d)
+TRANS(ftint_w_s, FP_SP, gen_ff, gen_helper_ftint_w_s)
+TRANS(ftint_w_d, FP_DP, gen_ff, gen_helper_ftint_w_d)
+TRANS(ftint_l_s, FP_SP, gen_ff, gen_helper_ftint_l_s)
+TRANS(ftint_l_d, FP_DP, gen_ff, gen_helper_ftint_l_d)
+TRANS(ffint_s_w, FP_SP, gen_ff, gen_helper_ffint_s_w)
+TRANS(ffint_s_l, FP_SP, gen_ff, gen_helper_ffint_s_l)
+TRANS(ffint_d_w, FP_DP, gen_ff, gen_helper_ffint_d_w)
+TRANS(ffint_d_l, FP_DP, gen_ff, gen_helper_ffint_d_l)
+TRANS(frint_s, FP_SP, gen_ff, gen_helper_frint_s)
+TRANS(frint_d, FP_DP, gen_ff, gen_helper_frint_d)
--- /dev/null
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * Copyright (c) 2021 Loongson Technology Corporation Limited
+ */
+
+static void maybe_nanbox_load(TCGv freg, MemOp mop)
+{
+ if ((mop & MO_SIZE) == MO_32) {
+ gen_nanbox_s(freg, freg);
+ }
+}
+
+static bool gen_fload_i(DisasContext *ctx, arg_fr_i *a, MemOp mop)
+{
+ TCGv addr = gpr_src(ctx, a->rj, EXT_NONE);
+ TCGv dest = get_fpr(ctx, a->fd);
+
+ CHECK_FPE;
+
+ addr = make_address_i(ctx, addr, a->imm);
+
+ tcg_gen_qemu_ld_tl(dest, addr, ctx->mem_idx, mop);
+ maybe_nanbox_load(dest, mop);
+ set_fpr(a->fd, dest);
+
+ return true;
+}
+
+static bool gen_fstore_i(DisasContext *ctx, arg_fr_i *a, MemOp mop)
+{
+ TCGv addr = gpr_src(ctx, a->rj, EXT_NONE);
+ TCGv src = get_fpr(ctx, a->fd);
+
+ CHECK_FPE;
+
+ addr = make_address_i(ctx, addr, a->imm);
+
+ tcg_gen_qemu_st_tl(src, addr, ctx->mem_idx, mop);
+
+ return true;
+}
+
+static bool gen_floadx(DisasContext *ctx, arg_frr *a, MemOp mop)
+{
+ TCGv src1 = gpr_src(ctx, a->rj, EXT_NONE);
+ TCGv src2 = gpr_src(ctx, a->rk, EXT_NONE);
+ TCGv dest = get_fpr(ctx, a->fd);
+ TCGv addr;
+
+ CHECK_FPE;
+
+ addr = make_address_x(ctx, src1, src2);
+ tcg_gen_qemu_ld_tl(dest, addr, ctx->mem_idx, mop);
+ maybe_nanbox_load(dest, mop);
+ set_fpr(a->fd, dest);
+
+ return true;
+}
+
+static bool gen_fstorex(DisasContext *ctx, arg_frr *a, MemOp mop)
+{
+ TCGv src1 = gpr_src(ctx, a->rj, EXT_NONE);
+ TCGv src2 = gpr_src(ctx, a->rk, EXT_NONE);
+ TCGv src3 = get_fpr(ctx, a->fd);
+ TCGv addr;
+
+ CHECK_FPE;
+
+ addr = make_address_x(ctx, src1, src2);
+ tcg_gen_qemu_st_tl(src3, addr, ctx->mem_idx, mop);
+
+ return true;
+}
+
+static bool gen_fload_gt(DisasContext *ctx, arg_frr *a, MemOp mop)
+{
+ TCGv src1 = gpr_src(ctx, a->rj, EXT_NONE);
+ TCGv src2 = gpr_src(ctx, a->rk, EXT_NONE);
+ TCGv dest = get_fpr(ctx, a->fd);
+ TCGv addr;
+
+ CHECK_FPE;
+
+ gen_helper_asrtgt_d(tcg_env, src1, src2);
+ addr = make_address_x(ctx, src1, src2);
+ tcg_gen_qemu_ld_tl(dest, addr, ctx->mem_idx, mop);
+ maybe_nanbox_load(dest, mop);
+ set_fpr(a->fd, dest);
+
+ return true;
+}
+
+static bool gen_fstore_gt(DisasContext *ctx, arg_frr *a, MemOp mop)
+{
+ TCGv src1 = gpr_src(ctx, a->rj, EXT_NONE);
+ TCGv src2 = gpr_src(ctx, a->rk, EXT_NONE);
+ TCGv src3 = get_fpr(ctx, a->fd);
+ TCGv addr;
+
+ CHECK_FPE;
+
+ gen_helper_asrtgt_d(tcg_env, src1, src2);
+ addr = make_address_x(ctx, src1, src2);
+ tcg_gen_qemu_st_tl(src3, addr, ctx->mem_idx, mop);
+
+ return true;
+}
+
+static bool gen_fload_le(DisasContext *ctx, arg_frr *a, MemOp mop)
+{
+ TCGv src1 = gpr_src(ctx, a->rj, EXT_NONE);
+ TCGv src2 = gpr_src(ctx, a->rk, EXT_NONE);
+ TCGv dest = get_fpr(ctx, a->fd);
+ TCGv addr;
+
+ CHECK_FPE;
+
+ gen_helper_asrtle_d(tcg_env, src1, src2);
+ addr = make_address_x(ctx, src1, src2);
+ tcg_gen_qemu_ld_tl(dest, addr, ctx->mem_idx, mop);
+ maybe_nanbox_load(dest, mop);
+ set_fpr(a->fd, dest);
+
+ return true;
+}
+
+static bool gen_fstore_le(DisasContext *ctx, arg_frr *a, MemOp mop)
+{
+ TCGv src1 = gpr_src(ctx, a->rj, EXT_NONE);
+ TCGv src2 = gpr_src(ctx, a->rk, EXT_NONE);
+ TCGv src3 = get_fpr(ctx, a->fd);
+ TCGv addr;
+
+ CHECK_FPE;
+
+ gen_helper_asrtle_d(tcg_env, src1, src2);
+ addr = make_address_x(ctx, src1, src2);
+ tcg_gen_qemu_st_tl(src3, addr, ctx->mem_idx, mop);
+
+ return true;
+}
+
+TRANS(fld_s, FP_SP, gen_fload_i, MO_TEUL)
+TRANS(fst_s, FP_SP, gen_fstore_i, MO_TEUL)
+TRANS(fld_d, FP_DP, gen_fload_i, MO_TEUQ)
+TRANS(fst_d, FP_DP, gen_fstore_i, MO_TEUQ)
+TRANS(fldx_s, FP_SP, gen_floadx, MO_TEUL)
+TRANS(fldx_d, FP_DP, gen_floadx, MO_TEUQ)
+TRANS(fstx_s, FP_SP, gen_fstorex, MO_TEUL)
+TRANS(fstx_d, FP_DP, gen_fstorex, MO_TEUQ)
+TRANS(fldgt_s, FP_SP, gen_fload_gt, MO_TEUL)
+TRANS(fldgt_d, FP_DP, gen_fload_gt, MO_TEUQ)
+TRANS(fldle_s, FP_SP, gen_fload_le, MO_TEUL)
+TRANS(fldle_d, FP_DP, gen_fload_le, MO_TEUQ)
+TRANS(fstgt_s, FP_SP, gen_fstore_gt, MO_TEUL)
+TRANS(fstgt_d, FP_DP, gen_fstore_gt, MO_TEUQ)
+TRANS(fstle_s, FP_SP, gen_fstore_le, MO_TEUL)
+TRANS(fstle_d, FP_DP, gen_fstore_le, MO_TEUQ)
--- /dev/null
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * Copyright (c) 2021 Loongson Technology Corporation Limited
+ */
+
+static const uint32_t fcsr_mask[4] = {
+ UINT32_MAX, FCSR0_M1, FCSR0_M2, FCSR0_M3
+};
+
+static bool trans_fsel(DisasContext *ctx, arg_fsel *a)
+{
+ TCGv zero = tcg_constant_tl(0);
+ TCGv dest = get_fpr(ctx, a->fd);
+ TCGv src1 = get_fpr(ctx, a->fj);
+ TCGv src2 = get_fpr(ctx, a->fk);
+ TCGv cond;
+
+ if (!avail_FP(ctx)) {
+ return false;
+ }
+
+ CHECK_FPE;
+
+ cond = tcg_temp_new();
+ tcg_gen_ld8u_tl(cond, tcg_env, offsetof(CPULoongArchState, cf[a->ca]));
+ tcg_gen_movcond_tl(TCG_COND_EQ, dest, cond, zero, src1, src2);
+ set_fpr(a->fd, dest);
+
+ return true;
+}
+
+static bool gen_f2f(DisasContext *ctx, arg_ff *a,
+ void (*func)(TCGv, TCGv), bool nanbox)
+{
+ TCGv dest = get_fpr(ctx, a->fd);
+ TCGv src = get_fpr(ctx, a->fj);
+
+ CHECK_FPE;
+
+ func(dest, src);
+ if (nanbox) {
+ gen_nanbox_s(dest, dest);
+ }
+ set_fpr(a->fd, dest);
+
+ return true;
+}
+
+static bool gen_r2f(DisasContext *ctx, arg_fr *a,
+ void (*func)(TCGv, TCGv))
+{
+ TCGv src = gpr_src(ctx, a->rj, EXT_NONE);
+ TCGv dest = get_fpr(ctx, a->fd);
+
+ if (!avail_FP(ctx)) {
+ return false;
+ }
+
+ CHECK_FPE;
+
+ func(dest, src);
+ set_fpr(a->fd, dest);
+
+ return true;
+}
+
+static bool gen_f2r(DisasContext *ctx, arg_rf *a,
+ void (*func)(TCGv, TCGv))
+{
+ TCGv dest = gpr_dst(ctx, a->rd, EXT_NONE);
+ TCGv src = get_fpr(ctx, a->fj);
+
+ if (!avail_FP(ctx)) {
+ return false;
+ }
+
+ CHECK_FPE;
+
+ func(dest, src);
+ gen_set_gpr(a->rd, dest, EXT_NONE);
+
+ return true;
+}
+
+static bool trans_movgr2fcsr(DisasContext *ctx, arg_movgr2fcsr *a)
+{
+ uint32_t mask = fcsr_mask[a->fcsrd];
+ TCGv Rj = gpr_src(ctx, a->rj, EXT_NONE);
+
+ if (!avail_FP(ctx)) {
+ return false;
+ }
+
+ CHECK_FPE;
+
+ if (mask == UINT32_MAX) {
+ tcg_gen_st32_i64(Rj, tcg_env, offsetof(CPULoongArchState, fcsr0));
+ } else {
+ TCGv_i32 fcsr0 = tcg_temp_new_i32();
+ TCGv_i32 temp = tcg_temp_new_i32();
+
+ tcg_gen_ld_i32(fcsr0, tcg_env, offsetof(CPULoongArchState, fcsr0));
+ tcg_gen_extrl_i64_i32(temp, Rj);
+ tcg_gen_andi_i32(temp, temp, mask);
+ tcg_gen_andi_i32(fcsr0, fcsr0, ~mask);
+ tcg_gen_or_i32(fcsr0, fcsr0, temp);
+ tcg_gen_st_i32(fcsr0, tcg_env, offsetof(CPULoongArchState, fcsr0));
+ }
+
+ /*
+ * Install the new rounding mode to fpu_status, if changed.
+ * Note that FCSR3 is exactly the rounding mode field.
+ */
+ if (mask & FCSR0_M3) {
+ gen_helper_set_rounding_mode(tcg_env);
+ }
+ return true;
+}
+
+static bool trans_movfcsr2gr(DisasContext *ctx, arg_movfcsr2gr *a)
+{
+ TCGv dest = gpr_dst(ctx, a->rd, EXT_NONE);
+
+ if (!avail_FP(ctx)) {
+ return false;
+ }
+
+ CHECK_FPE;
+
+ tcg_gen_ld32u_i64(dest, tcg_env, offsetof(CPULoongArchState, fcsr0));
+ tcg_gen_andi_i64(dest, dest, fcsr_mask[a->fcsrs]);
+ gen_set_gpr(a->rd, dest, EXT_NONE);
+
+ return true;
+}
+
+static void gen_movgr2fr_w(TCGv dest, TCGv src)
+{
+ tcg_gen_deposit_i64(dest, dest, src, 0, 32);
+}
+
+static void gen_movgr2frh_w(TCGv dest, TCGv src)
+{
+ tcg_gen_deposit_i64(dest, dest, src, 32, 32);
+}
+
+static void gen_movfrh2gr_s(TCGv dest, TCGv src)
+{
+ tcg_gen_sextract_tl(dest, src, 32, 32);
+}
+
+static bool trans_movfr2cf(DisasContext *ctx, arg_movfr2cf *a)
+{
+ TCGv t0;
+ TCGv src = get_fpr(ctx, a->fj);
+
+ if (!avail_FP(ctx)) {
+ return false;
+ }
+
+ CHECK_FPE;
+
+ t0 = tcg_temp_new();
+ tcg_gen_andi_tl(t0, src, 0x1);
+ tcg_gen_st8_tl(t0, tcg_env, offsetof(CPULoongArchState, cf[a->cd & 0x7]));
+
+ return true;
+}
+
+static bool trans_movcf2fr(DisasContext *ctx, arg_movcf2fr *a)
+{
+ TCGv dest = get_fpr(ctx, a->fd);
+
+ if (!avail_FP(ctx)) {
+ return false;
+ }
+
+ CHECK_FPE;
+
+ tcg_gen_ld8u_tl(dest, tcg_env,
+ offsetof(CPULoongArchState, cf[a->cj & 0x7]));
+ set_fpr(a->fd, dest);
+
+ return true;
+}
+
+static bool trans_movgr2cf(DisasContext *ctx, arg_movgr2cf *a)
+{
+ TCGv t0;
+
+ if (!avail_FP(ctx)) {
+ return false;
+ }
+
+ CHECK_FPE;
+
+ t0 = tcg_temp_new();
+ tcg_gen_andi_tl(t0, gpr_src(ctx, a->rj, EXT_NONE), 0x1);
+ tcg_gen_st8_tl(t0, tcg_env, offsetof(CPULoongArchState, cf[a->cd & 0x7]));
+
+ return true;
+}
+
+static bool trans_movcf2gr(DisasContext *ctx, arg_movcf2gr *a)
+{
+ if (!avail_FP(ctx)) {
+ return false;
+ }
+
+ CHECK_FPE;
+
+ tcg_gen_ld8u_tl(gpr_dst(ctx, a->rd, EXT_NONE), tcg_env,
+ offsetof(CPULoongArchState, cf[a->cj & 0x7]));
+ return true;
+}
+
+TRANS(fmov_s, FP_SP, gen_f2f, tcg_gen_mov_tl, true)
+TRANS(fmov_d, FP_DP, gen_f2f, tcg_gen_mov_tl, false)
+TRANS(movgr2fr_w, FP_SP, gen_r2f, gen_movgr2fr_w)
+TRANS(movgr2fr_d, 64, gen_r2f, tcg_gen_mov_tl)
+TRANS(movgr2frh_w, FP_DP, gen_r2f, gen_movgr2frh_w)
+TRANS(movfr2gr_s, FP_SP, gen_f2r, tcg_gen_ext32s_tl)
+TRANS(movfr2gr_d, 64, gen_f2r, tcg_gen_mov_tl)
+TRANS(movfrh2gr_s, FP_DP, gen_f2r, gen_movfrh2gr_s)
--- /dev/null
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * Copyright (c) 2021 Loongson Technology Corporation Limited
+ */
+
+static bool gen_load(DisasContext *ctx, arg_rr_i *a, MemOp mop)
+{
+ TCGv dest = gpr_dst(ctx, a->rd, EXT_NONE);
+ TCGv addr = gpr_src(ctx, a->rj, EXT_NONE);
+
+ addr = make_address_i(ctx, addr, a->imm);
+
+ tcg_gen_qemu_ld_tl(dest, addr, ctx->mem_idx, mop);
+ gen_set_gpr(a->rd, dest, EXT_NONE);
+ return true;
+}
+
+static bool gen_store(DisasContext *ctx, arg_rr_i *a, MemOp mop)
+{
+ TCGv data = gpr_src(ctx, a->rd, EXT_NONE);
+ TCGv addr = gpr_src(ctx, a->rj, EXT_NONE);
+
+ addr = make_address_i(ctx, addr, a->imm);
+
+ tcg_gen_qemu_st_tl(data, addr, ctx->mem_idx, mop);
+ return true;
+}
+
+static bool gen_loadx(DisasContext *ctx, arg_rrr *a, MemOp mop)
+{
+ TCGv dest = gpr_dst(ctx, a->rd, EXT_NONE);
+ TCGv src1 = gpr_src(ctx, a->rj, EXT_NONE);
+ TCGv src2 = gpr_src(ctx, a->rk, EXT_NONE);
+ TCGv addr = make_address_x(ctx, src1, src2);
+
+ tcg_gen_qemu_ld_tl(dest, addr, ctx->mem_idx, mop);
+ gen_set_gpr(a->rd, dest, EXT_NONE);
+
+ return true;
+}
+
+static bool gen_storex(DisasContext *ctx, arg_rrr *a, MemOp mop)
+{
+ TCGv data = gpr_src(ctx, a->rd, EXT_NONE);
+ TCGv src1 = gpr_src(ctx, a->rj, EXT_NONE);
+ TCGv src2 = gpr_src(ctx, a->rk, EXT_NONE);
+ TCGv addr = make_address_x(ctx, src1, src2);
+
+ tcg_gen_qemu_st_tl(data, addr, ctx->mem_idx, mop);
+
+ return true;
+}
+
+static bool gen_load_gt(DisasContext *ctx, arg_rrr *a, MemOp mop)
+{
+ TCGv dest = gpr_dst(ctx, a->rd, EXT_NONE);
+ TCGv src1 = gpr_src(ctx, a->rj, EXT_NONE);
+ TCGv src2 = gpr_src(ctx, a->rk, EXT_NONE);
+
+ gen_helper_asrtgt_d(tcg_env, src1, src2);
+ src1 = make_address_i(ctx, src1, 0);
+ tcg_gen_qemu_ld_tl(dest, src1, ctx->mem_idx, mop);
+ gen_set_gpr(a->rd, dest, EXT_NONE);
+
+ return true;
+}
+
+static bool gen_load_le(DisasContext *ctx, arg_rrr *a, MemOp mop)
+{
+ TCGv dest = gpr_dst(ctx, a->rd, EXT_NONE);
+ TCGv src1 = gpr_src(ctx, a->rj, EXT_NONE);
+ TCGv src2 = gpr_src(ctx, a->rk, EXT_NONE);
+
+ gen_helper_asrtle_d(tcg_env, src1, src2);
+ src1 = make_address_i(ctx, src1, 0);
+ tcg_gen_qemu_ld_tl(dest, src1, ctx->mem_idx, mop);
+ gen_set_gpr(a->rd, dest, EXT_NONE);
+
+ return true;
+}
+
+static bool gen_store_gt(DisasContext *ctx, arg_rrr *a, MemOp mop)
+{
+ TCGv data = gpr_src(ctx, a->rd, EXT_NONE);
+ TCGv src1 = gpr_src(ctx, a->rj, EXT_NONE);
+ TCGv src2 = gpr_src(ctx, a->rk, EXT_NONE);
+
+ gen_helper_asrtgt_d(tcg_env, src1, src2);
+ src1 = make_address_i(ctx, src1, 0);
+ tcg_gen_qemu_st_tl(data, src1, ctx->mem_idx, mop);
+
+ return true;
+}
+
+static bool gen_store_le(DisasContext *ctx, arg_rrr *a, MemOp mop)
+{
+ TCGv data = gpr_src(ctx, a->rd, EXT_NONE);
+ TCGv src1 = gpr_src(ctx, a->rj, EXT_NONE);
+ TCGv src2 = gpr_src(ctx, a->rk, EXT_NONE);
+
+ gen_helper_asrtle_d(tcg_env, src1, src2);
+ src1 = make_address_i(ctx, src1, 0);
+ tcg_gen_qemu_st_tl(data, src1, ctx->mem_idx, mop);
+
+ return true;
+}
+
+static bool trans_preld(DisasContext *ctx, arg_preld *a)
+{
+ return true;
+}
+
+static bool trans_preldx(DisasContext *ctx, arg_preldx * a)
+{
+ return true;
+}
+
+static bool trans_dbar(DisasContext *ctx, arg_dbar * a)
+{
+ tcg_gen_mb(TCG_BAR_SC | TCG_MO_ALL);
+ return true;
+}
+
+static bool trans_ibar(DisasContext *ctx, arg_ibar *a)
+{
+ ctx->base.is_jmp = DISAS_STOP;
+ return true;
+}
+
+static bool gen_ldptr(DisasContext *ctx, arg_rr_i *a, MemOp mop)
+{
+ TCGv dest = gpr_dst(ctx, a->rd, EXT_NONE);
+ TCGv addr = gpr_src(ctx, a->rj, EXT_NONE);
+
+ addr = make_address_i(ctx, addr, a->imm);
+
+ tcg_gen_qemu_ld_tl(dest, addr, ctx->mem_idx, mop);
+ gen_set_gpr(a->rd, dest, EXT_NONE);
+ return true;
+}
+
+static bool gen_stptr(DisasContext *ctx, arg_rr_i *a, MemOp mop)
+{
+ TCGv data = gpr_src(ctx, a->rd, EXT_NONE);
+ TCGv addr = gpr_src(ctx, a->rj, EXT_NONE);
+
+ addr = make_address_i(ctx, addr, a->imm);
+
+ tcg_gen_qemu_st_tl(data, addr, ctx->mem_idx, mop);
+ return true;
+}
+
+TRANS(ld_b, ALL, gen_load, MO_SB)
+TRANS(ld_h, ALL, gen_load, MO_TESW)
+TRANS(ld_w, ALL, gen_load, MO_TESL)
+TRANS(ld_d, 64, gen_load, MO_TEUQ)
+TRANS(st_b, ALL, gen_store, MO_UB)
+TRANS(st_h, ALL, gen_store, MO_TEUW)
+TRANS(st_w, ALL, gen_store, MO_TEUL)
+TRANS(st_d, 64, gen_store, MO_TEUQ)
+TRANS(ld_bu, ALL, gen_load, MO_UB)
+TRANS(ld_hu, ALL, gen_load, MO_TEUW)
+TRANS(ld_wu, 64, gen_load, MO_TEUL)
+TRANS(ldx_b, 64, gen_loadx, MO_SB)
+TRANS(ldx_h, 64, gen_loadx, MO_TESW)
+TRANS(ldx_w, 64, gen_loadx, MO_TESL)
+TRANS(ldx_d, 64, gen_loadx, MO_TEUQ)
+TRANS(stx_b, 64, gen_storex, MO_UB)
+TRANS(stx_h, 64, gen_storex, MO_TEUW)
+TRANS(stx_w, 64, gen_storex, MO_TEUL)
+TRANS(stx_d, 64, gen_storex, MO_TEUQ)
+TRANS(ldx_bu, 64, gen_loadx, MO_UB)
+TRANS(ldx_hu, 64, gen_loadx, MO_TEUW)
+TRANS(ldx_wu, 64, gen_loadx, MO_TEUL)
+TRANS(ldptr_w, 64, gen_ldptr, MO_TESL)
+TRANS(stptr_w, 64, gen_stptr, MO_TEUL)
+TRANS(ldptr_d, 64, gen_ldptr, MO_TEUQ)
+TRANS(stptr_d, 64, gen_stptr, MO_TEUQ)
+TRANS(ldgt_b, 64, gen_load_gt, MO_SB)
+TRANS(ldgt_h, 64, gen_load_gt, MO_TESW)
+TRANS(ldgt_w, 64, gen_load_gt, MO_TESL)
+TRANS(ldgt_d, 64, gen_load_gt, MO_TEUQ)
+TRANS(ldle_b, 64, gen_load_le, MO_SB)
+TRANS(ldle_h, 64, gen_load_le, MO_TESW)
+TRANS(ldle_w, 64, gen_load_le, MO_TESL)
+TRANS(ldle_d, 64, gen_load_le, MO_TEUQ)
+TRANS(stgt_b, 64, gen_store_gt, MO_UB)
+TRANS(stgt_h, 64, gen_store_gt, MO_TEUW)
+TRANS(stgt_w, 64, gen_store_gt, MO_TEUL)
+TRANS(stgt_d, 64, gen_store_gt, MO_TEUQ)
+TRANS(stle_b, 64, gen_store_le, MO_UB)
+TRANS(stle_h, 64, gen_store_le, MO_TEUW)
+TRANS(stle_w, 64, gen_store_le, MO_TEUL)
+TRANS(stle_d, 64, gen_store_le, MO_TEUQ)
--- /dev/null
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * Copyright (c) 2021 Loongson Technology Corporation Limited
+ *
+ * LoongArch translation routines for the privileged instructions.
+ */
+
+#include "cpu-csr.h"
+
+#ifdef CONFIG_USER_ONLY
+
+#define GEN_FALSE_TRANS(name) \
+static bool trans_##name(DisasContext *ctx, arg_##name * a) \
+{ \
+ return false; \
+}
+
+GEN_FALSE_TRANS(csrrd)
+GEN_FALSE_TRANS(csrwr)
+GEN_FALSE_TRANS(csrxchg)
+GEN_FALSE_TRANS(iocsrrd_b)
+GEN_FALSE_TRANS(iocsrrd_h)
+GEN_FALSE_TRANS(iocsrrd_w)
+GEN_FALSE_TRANS(iocsrrd_d)
+GEN_FALSE_TRANS(iocsrwr_b)
+GEN_FALSE_TRANS(iocsrwr_h)
+GEN_FALSE_TRANS(iocsrwr_w)
+GEN_FALSE_TRANS(iocsrwr_d)
+GEN_FALSE_TRANS(tlbsrch)
+GEN_FALSE_TRANS(tlbrd)
+GEN_FALSE_TRANS(tlbwr)
+GEN_FALSE_TRANS(tlbfill)
+GEN_FALSE_TRANS(tlbclr)
+GEN_FALSE_TRANS(tlbflush)
+GEN_FALSE_TRANS(invtlb)
+GEN_FALSE_TRANS(cacop)
+GEN_FALSE_TRANS(ldpte)
+GEN_FALSE_TRANS(lddir)
+GEN_FALSE_TRANS(ertn)
+GEN_FALSE_TRANS(dbcl)
+GEN_FALSE_TRANS(idle)
+
+#else
+
+typedef void (*GenCSRRead)(TCGv dest, TCGv_ptr env);
+typedef void (*GenCSRWrite)(TCGv dest, TCGv_ptr env, TCGv src);
+
+typedef struct {
+ int offset;
+ int flags;
+ GenCSRRead readfn;
+ GenCSRWrite writefn;
+} CSRInfo;
+
+enum {
+ CSRFL_READONLY = (1 << 0),
+ CSRFL_EXITTB = (1 << 1),
+ CSRFL_IO = (1 << 2),
+};
+
+#define CSR_OFF_FUNCS(NAME, FL, RD, WR) \
+ [LOONGARCH_CSR_##NAME] = { \
+ .offset = offsetof(CPULoongArchState, CSR_##NAME), \
+ .flags = FL, .readfn = RD, .writefn = WR \
+ }
+
+#define CSR_OFF_ARRAY(NAME, N) \
+ [LOONGARCH_CSR_##NAME(N)] = { \
+ .offset = offsetof(CPULoongArchState, CSR_##NAME[N]), \
+ .flags = 0, .readfn = NULL, .writefn = NULL \
+ }
+
+#define CSR_OFF_FLAGS(NAME, FL) \
+ CSR_OFF_FUNCS(NAME, FL, NULL, NULL)
+
+#define CSR_OFF(NAME) \
+ CSR_OFF_FLAGS(NAME, 0)
+
+static const CSRInfo csr_info[] = {
+ CSR_OFF_FLAGS(CRMD, CSRFL_EXITTB),
+ CSR_OFF(PRMD),
+ CSR_OFF_FLAGS(EUEN, CSRFL_EXITTB),
+ CSR_OFF_FLAGS(MISC, CSRFL_READONLY),
+ CSR_OFF(ECFG),
+ CSR_OFF_FUNCS(ESTAT, CSRFL_EXITTB, NULL, gen_helper_csrwr_estat),
+ CSR_OFF(ERA),
+ CSR_OFF(BADV),
+ CSR_OFF_FLAGS(BADI, CSRFL_READONLY),
+ CSR_OFF(EENTRY),
+ CSR_OFF(TLBIDX),
+ CSR_OFF(TLBEHI),
+ CSR_OFF(TLBELO0),
+ CSR_OFF(TLBELO1),
+ CSR_OFF_FUNCS(ASID, CSRFL_EXITTB, NULL, gen_helper_csrwr_asid),
+ CSR_OFF(PGDL),
+ CSR_OFF(PGDH),
+ CSR_OFF_FUNCS(PGD, CSRFL_READONLY, gen_helper_csrrd_pgd, NULL),
+ CSR_OFF(PWCL),
+ CSR_OFF(PWCH),
+ CSR_OFF(STLBPS),
+ CSR_OFF(RVACFG),
+ CSR_OFF_FUNCS(CPUID, CSRFL_READONLY, gen_helper_csrrd_cpuid, NULL),
+ CSR_OFF_FLAGS(PRCFG1, CSRFL_READONLY),
+ CSR_OFF_FLAGS(PRCFG2, CSRFL_READONLY),
+ CSR_OFF_FLAGS(PRCFG3, CSRFL_READONLY),
+ CSR_OFF_ARRAY(SAVE, 0),
+ CSR_OFF_ARRAY(SAVE, 1),
+ CSR_OFF_ARRAY(SAVE, 2),
+ CSR_OFF_ARRAY(SAVE, 3),
+ CSR_OFF_ARRAY(SAVE, 4),
+ CSR_OFF_ARRAY(SAVE, 5),
+ CSR_OFF_ARRAY(SAVE, 6),
+ CSR_OFF_ARRAY(SAVE, 7),
+ CSR_OFF_ARRAY(SAVE, 8),
+ CSR_OFF_ARRAY(SAVE, 9),
+ CSR_OFF_ARRAY(SAVE, 10),
+ CSR_OFF_ARRAY(SAVE, 11),
+ CSR_OFF_ARRAY(SAVE, 12),
+ CSR_OFF_ARRAY(SAVE, 13),
+ CSR_OFF_ARRAY(SAVE, 14),
+ CSR_OFF_ARRAY(SAVE, 15),
+ CSR_OFF(TID),
+ CSR_OFF_FUNCS(TCFG, CSRFL_IO, NULL, gen_helper_csrwr_tcfg),
+ CSR_OFF_FUNCS(TVAL, CSRFL_READONLY | CSRFL_IO, gen_helper_csrrd_tval, NULL),
+ CSR_OFF(CNTC),
+ CSR_OFF_FUNCS(TICLR, CSRFL_IO, NULL, gen_helper_csrwr_ticlr),
+ CSR_OFF(LLBCTL),
+ CSR_OFF(IMPCTL1),
+ CSR_OFF(IMPCTL2),
+ CSR_OFF(TLBRENTRY),
+ CSR_OFF(TLBRBADV),
+ CSR_OFF(TLBRERA),
+ CSR_OFF(TLBRSAVE),
+ CSR_OFF(TLBRELO0),
+ CSR_OFF(TLBRELO1),
+ CSR_OFF(TLBREHI),
+ CSR_OFF(TLBRPRMD),
+ CSR_OFF(MERRCTL),
+ CSR_OFF(MERRINFO1),
+ CSR_OFF(MERRINFO2),
+ CSR_OFF(MERRENTRY),
+ CSR_OFF(MERRERA),
+ CSR_OFF(MERRSAVE),
+ CSR_OFF(CTAG),
+ CSR_OFF_ARRAY(DMW, 0),
+ CSR_OFF_ARRAY(DMW, 1),
+ CSR_OFF_ARRAY(DMW, 2),
+ CSR_OFF_ARRAY(DMW, 3),
+ CSR_OFF(DBG),
+ CSR_OFF(DERA),
+ CSR_OFF(DSAVE),
+};
+
+static bool check_plv(DisasContext *ctx)
+{
+ if (ctx->plv == MMU_PLV_USER) {
+ generate_exception(ctx, EXCCODE_IPE);
+ return true;
+ }
+ return false;
+}
+
+static const CSRInfo *get_csr(unsigned csr_num)
+{
+ const CSRInfo *csr;
+
+ if (csr_num >= ARRAY_SIZE(csr_info)) {
+ return NULL;
+ }
+ csr = &csr_info[csr_num];
+ if (csr->offset == 0) {
+ return NULL;
+ }
+ return csr;
+}
+
+static bool check_csr_flags(DisasContext *ctx, const CSRInfo *csr, bool write)
+{
+ if ((csr->flags & CSRFL_READONLY) && write) {
+ return false;
+ }
+ if ((csr->flags & CSRFL_IO) && translator_io_start(&ctx->base)) {
+ ctx->base.is_jmp = DISAS_EXIT_UPDATE;
+ } else if ((csr->flags & CSRFL_EXITTB) && write) {
+ ctx->base.is_jmp = DISAS_EXIT_UPDATE;
+ }
+ return true;
+}
+
+static bool trans_csrrd(DisasContext *ctx, arg_csrrd *a)
+{
+ TCGv dest;
+ const CSRInfo *csr;
+
+ if (check_plv(ctx)) {
+ return false;
+ }
+ csr = get_csr(a->csr);
+ if (csr == NULL) {
+ /* CSR is undefined: read as 0. */
+ dest = tcg_constant_tl(0);
+ } else {
+ check_csr_flags(ctx, csr, false);
+ dest = gpr_dst(ctx, a->rd, EXT_NONE);
+ if (csr->readfn) {
+ csr->readfn(dest, tcg_env);
+ } else {
+ tcg_gen_ld_tl(dest, tcg_env, csr->offset);
+ }
+ }
+ gen_set_gpr(a->rd, dest, EXT_NONE);
+ return true;
+}
+
+static bool trans_csrwr(DisasContext *ctx, arg_csrwr *a)
+{
+ TCGv dest, src1;
+ const CSRInfo *csr;
+
+ if (check_plv(ctx)) {
+ return false;
+ }
+ csr = get_csr(a->csr);
+ if (csr == NULL) {
+ /* CSR is undefined: write ignored, read old_value as 0. */
+ gen_set_gpr(a->rd, tcg_constant_tl(0), EXT_NONE);
+ return true;
+ }
+ if (!check_csr_flags(ctx, csr, true)) {
+ /* CSR is readonly: trap. */
+ return false;
+ }
+ src1 = gpr_src(ctx, a->rd, EXT_NONE);
+ if (csr->writefn) {
+ dest = gpr_dst(ctx, a->rd, EXT_NONE);
+ csr->writefn(dest, tcg_env, src1);
+ } else {
+ dest = tcg_temp_new();
+ tcg_gen_ld_tl(dest, tcg_env, csr->offset);
+ tcg_gen_st_tl(src1, tcg_env, csr->offset);
+ }
+ gen_set_gpr(a->rd, dest, EXT_NONE);
+ return true;
+}
+
+static bool trans_csrxchg(DisasContext *ctx, arg_csrxchg *a)
+{
+ TCGv src1, mask, oldv, newv, temp;
+ const CSRInfo *csr;
+
+ if (check_plv(ctx)) {
+ return false;
+ }
+ csr = get_csr(a->csr);
+ if (csr == NULL) {
+ /* CSR is undefined: write ignored, read old_value as 0. */
+ gen_set_gpr(a->rd, tcg_constant_tl(0), EXT_NONE);
+ return true;
+ }
+
+ if (!check_csr_flags(ctx, csr, true)) {
+ /* CSR is readonly: trap. */
+ return false;
+ }
+
+ /* So far only readonly csrs have readfn. */
+ assert(csr->readfn == NULL);
+
+ src1 = gpr_src(ctx, a->rd, EXT_NONE);
+ mask = gpr_src(ctx, a->rj, EXT_NONE);
+ oldv = tcg_temp_new();
+ newv = tcg_temp_new();
+ temp = tcg_temp_new();
+
+ tcg_gen_ld_tl(oldv, tcg_env, csr->offset);
+ tcg_gen_and_tl(newv, src1, mask);
+ tcg_gen_andc_tl(temp, oldv, mask);
+ tcg_gen_or_tl(newv, newv, temp);
+
+ if (csr->writefn) {
+ csr->writefn(oldv, tcg_env, newv);
+ } else {
+ tcg_gen_st_tl(newv, tcg_env, csr->offset);
+ }
+ gen_set_gpr(a->rd, oldv, EXT_NONE);
+ return true;
+}
+
+static bool gen_iocsrrd(DisasContext *ctx, arg_rr *a,
+ void (*func)(TCGv, TCGv_ptr, TCGv))
+{
+ TCGv dest = gpr_dst(ctx, a->rd, EXT_NONE);
+ TCGv src1 = gpr_src(ctx, a->rj, EXT_NONE);
+
+ if (check_plv(ctx)) {
+ return false;
+ }
+ func(dest, tcg_env, src1);
+ return true;
+}
+
+static bool gen_iocsrwr(DisasContext *ctx, arg_rr *a,
+ void (*func)(TCGv_ptr, TCGv, TCGv))
+{
+ TCGv val = gpr_src(ctx, a->rd, EXT_NONE);
+ TCGv addr = gpr_src(ctx, a->rj, EXT_NONE);
+
+ if (check_plv(ctx)) {
+ return false;
+ }
+ func(tcg_env, addr, val);
+ return true;
+}
+
+TRANS(iocsrrd_b, IOCSR, gen_iocsrrd, gen_helper_iocsrrd_b)
+TRANS(iocsrrd_h, IOCSR, gen_iocsrrd, gen_helper_iocsrrd_h)
+TRANS(iocsrrd_w, IOCSR, gen_iocsrrd, gen_helper_iocsrrd_w)
+TRANS(iocsrrd_d, IOCSR, gen_iocsrrd, gen_helper_iocsrrd_d)
+TRANS(iocsrwr_b, IOCSR, gen_iocsrwr, gen_helper_iocsrwr_b)
+TRANS(iocsrwr_h, IOCSR, gen_iocsrwr, gen_helper_iocsrwr_h)
+TRANS(iocsrwr_w, IOCSR, gen_iocsrwr, gen_helper_iocsrwr_w)
+TRANS(iocsrwr_d, IOCSR, gen_iocsrwr, gen_helper_iocsrwr_d)
+
+static void check_mmu_idx(DisasContext *ctx)
+{
+ if (ctx->mem_idx != MMU_IDX_DA) {
+ tcg_gen_movi_tl(cpu_pc, ctx->base.pc_next + 4);
+ ctx->base.is_jmp = DISAS_EXIT;
+ }
+}
+
+static bool trans_tlbsrch(DisasContext *ctx, arg_tlbsrch *a)
+{
+ if (check_plv(ctx)) {
+ return false;
+ }
+ gen_helper_tlbsrch(tcg_env);
+ return true;
+}
+
+static bool trans_tlbrd(DisasContext *ctx, arg_tlbrd *a)
+{
+ if (check_plv(ctx)) {
+ return false;
+ }
+ gen_helper_tlbrd(tcg_env);
+ return true;
+}
+
+static bool trans_tlbwr(DisasContext *ctx, arg_tlbwr *a)
+{
+ if (check_plv(ctx)) {
+ return false;
+ }
+ gen_helper_tlbwr(tcg_env);
+ check_mmu_idx(ctx);
+ return true;
+}
+
+static bool trans_tlbfill(DisasContext *ctx, arg_tlbfill *a)
+{
+ if (check_plv(ctx)) {
+ return false;
+ }
+ gen_helper_tlbfill(tcg_env);
+ check_mmu_idx(ctx);
+ return true;
+}
+
+static bool trans_tlbclr(DisasContext *ctx, arg_tlbclr *a)
+{
+ if (check_plv(ctx)) {
+ return false;
+ }
+ gen_helper_tlbclr(tcg_env);
+ check_mmu_idx(ctx);
+ return true;
+}
+
+static bool trans_tlbflush(DisasContext *ctx, arg_tlbflush *a)
+{
+ if (check_plv(ctx)) {
+ return false;
+ }
+ gen_helper_tlbflush(tcg_env);
+ check_mmu_idx(ctx);
+ return true;
+}
+
+static bool trans_invtlb(DisasContext *ctx, arg_invtlb *a)
+{
+ TCGv rj = gpr_src(ctx, a->rj, EXT_NONE);
+ TCGv rk = gpr_src(ctx, a->rk, EXT_NONE);
+
+ if (check_plv(ctx)) {
+ return false;
+ }
+
+ switch (a->imm) {
+ case 0:
+ case 1:
+ gen_helper_invtlb_all(tcg_env);
+ break;
+ case 2:
+ gen_helper_invtlb_all_g(tcg_env, tcg_constant_i32(1));
+ break;
+ case 3:
+ gen_helper_invtlb_all_g(tcg_env, tcg_constant_i32(0));
+ break;
+ case 4:
+ gen_helper_invtlb_all_asid(tcg_env, rj);
+ break;
+ case 5:
+ gen_helper_invtlb_page_asid(tcg_env, rj, rk);
+ break;
+ case 6:
+ gen_helper_invtlb_page_asid_or_g(tcg_env, rj, rk);
+ break;
+ default:
+ return false;
+ }
+ ctx->base.is_jmp = DISAS_STOP;
+ return true;
+}
+
+static bool trans_cacop(DisasContext *ctx, arg_cacop *a)
+{
+ /* Treat the cacop as a nop */
+ if (check_plv(ctx)) {
+ return false;
+ }
+ return true;
+}
+
+static bool trans_ldpte(DisasContext *ctx, arg_ldpte *a)
+{
+ TCGv_i32 mem_idx = tcg_constant_i32(ctx->mem_idx);
+ TCGv src1 = gpr_src(ctx, a->rj, EXT_NONE);
+
+ if (!avail_LSPW(ctx)) {
+ return true;
+ }
+
+ if (check_plv(ctx)) {
+ return false;
+ }
+ gen_helper_ldpte(tcg_env, src1, tcg_constant_tl(a->imm), mem_idx);
+ return true;
+}
+
+static bool trans_lddir(DisasContext *ctx, arg_lddir *a)
+{
+ TCGv_i32 mem_idx = tcg_constant_i32(ctx->mem_idx);
+ TCGv src = gpr_src(ctx, a->rj, EXT_NONE);
+ TCGv dest = gpr_dst(ctx, a->rd, EXT_NONE);
+
+ if (!avail_LSPW(ctx)) {
+ return true;
+ }
+
+ if (check_plv(ctx)) {
+ return false;
+ }
+ gen_helper_lddir(dest, tcg_env, src, tcg_constant_tl(a->imm), mem_idx);
+ return true;
+}
+
+static bool trans_ertn(DisasContext *ctx, arg_ertn *a)
+{
+ if (check_plv(ctx)) {
+ return false;
+ }
+ gen_helper_ertn(tcg_env);
+ ctx->base.is_jmp = DISAS_EXIT;
+ return true;
+}
+
+static bool trans_dbcl(DisasContext *ctx, arg_dbcl *a)
+{
+ if (check_plv(ctx)) {
+ return false;
+ }
+ generate_exception(ctx, EXCCODE_DBP);
+ return true;
+}
+
+static bool trans_idle(DisasContext *ctx, arg_idle *a)
+{
+ if (check_plv(ctx)) {
+ return false;
+ }
+
+ tcg_gen_movi_tl(cpu_pc, ctx->base.pc_next + 4);
+ gen_helper_idle(tcg_env);
+ ctx->base.is_jmp = DISAS_NORETURN;
+ return true;
+}
+#endif
--- /dev/null
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * Copyright (c) 2021 Loongson Technology Corporation Limited
+ */
+
+static void gen_sll_w(TCGv dest, TCGv src1, TCGv src2)
+{
+ TCGv t0 = tcg_temp_new();
+ tcg_gen_andi_tl(t0, src2, 0x1f);
+ tcg_gen_shl_tl(dest, src1, t0);
+}
+
+static void gen_srl_w(TCGv dest, TCGv src1, TCGv src2)
+{
+ TCGv t0 = tcg_temp_new();
+ tcg_gen_andi_tl(t0, src2, 0x1f);
+ tcg_gen_shr_tl(dest, src1, t0);
+}
+
+static void gen_sra_w(TCGv dest, TCGv src1, TCGv src2)
+{
+ TCGv t0 = tcg_temp_new();
+ tcg_gen_andi_tl(t0, src2, 0x1f);
+ tcg_gen_sar_tl(dest, src1, t0);
+}
+
+static void gen_sll_d(TCGv dest, TCGv src1, TCGv src2)
+{
+ TCGv t0 = tcg_temp_new();
+ tcg_gen_andi_tl(t0, src2, 0x3f);
+ tcg_gen_shl_tl(dest, src1, t0);
+}
+
+static void gen_srl_d(TCGv dest, TCGv src1, TCGv src2)
+{
+ TCGv t0 = tcg_temp_new();
+ tcg_gen_andi_tl(t0, src2, 0x3f);
+ tcg_gen_shr_tl(dest, src1, t0);
+}
+
+static void gen_sra_d(TCGv dest, TCGv src1, TCGv src2)
+{
+ TCGv t0 = tcg_temp_new();
+ tcg_gen_andi_tl(t0, src2, 0x3f);
+ tcg_gen_sar_tl(dest, src1, t0);
+}
+
+static void gen_rotr_w(TCGv dest, TCGv src1, TCGv src2)
+{
+ TCGv_i32 t1 = tcg_temp_new_i32();
+ TCGv_i32 t2 = tcg_temp_new_i32();
+ TCGv t0 = tcg_temp_new();
+
+ tcg_gen_andi_tl(t0, src2, 0x1f);
+
+ tcg_gen_trunc_tl_i32(t1, src1);
+ tcg_gen_trunc_tl_i32(t2, t0);
+
+ tcg_gen_rotr_i32(t1, t1, t2);
+ tcg_gen_ext_i32_tl(dest, t1);
+}
+
+static void gen_rotr_d(TCGv dest, TCGv src1, TCGv src2)
+{
+ TCGv t0 = tcg_temp_new();
+ tcg_gen_andi_tl(t0, src2, 0x3f);
+ tcg_gen_rotr_tl(dest, src1, t0);
+}
+
+static bool trans_srai_w(DisasContext *ctx, arg_srai_w *a)
+{
+ TCGv dest = gpr_dst(ctx, a->rd, EXT_NONE);
+ TCGv src1 = gpr_src(ctx, a->rj, EXT_ZERO);
+
+ if (!avail_64(ctx)) {
+ return false;
+ }
+
+ tcg_gen_sextract_tl(dest, src1, a->imm, 32 - a->imm);
+ gen_set_gpr(a->rd, dest, EXT_NONE);
+
+ return true;
+}
+
+TRANS(sll_w, ALL, gen_rrr, EXT_ZERO, EXT_NONE, EXT_SIGN, gen_sll_w)
+TRANS(srl_w, ALL, gen_rrr, EXT_ZERO, EXT_NONE, EXT_SIGN, gen_srl_w)
+TRANS(sra_w, ALL, gen_rrr, EXT_SIGN, EXT_NONE, EXT_SIGN, gen_sra_w)
+TRANS(sll_d, 64, gen_rrr, EXT_NONE, EXT_NONE, EXT_NONE, gen_sll_d)
+TRANS(srl_d, 64, gen_rrr, EXT_NONE, EXT_NONE, EXT_NONE, gen_srl_d)
+TRANS(sra_d, 64, gen_rrr, EXT_NONE, EXT_NONE, EXT_NONE, gen_sra_d)
+TRANS(rotr_w, 64, gen_rrr, EXT_ZERO, EXT_NONE, EXT_SIGN, gen_rotr_w)
+TRANS(rotr_d, 64, gen_rrr, EXT_NONE, EXT_NONE, EXT_NONE, gen_rotr_d)
+TRANS(slli_w, ALL, gen_rri_c, EXT_NONE, EXT_SIGN, tcg_gen_shli_tl)
+TRANS(slli_d, 64, gen_rri_c, EXT_NONE, EXT_NONE, tcg_gen_shli_tl)
+TRANS(srli_w, ALL, gen_rri_c, EXT_ZERO, EXT_SIGN, tcg_gen_shri_tl)
+TRANS(srli_d, 64, gen_rri_c, EXT_NONE, EXT_NONE, tcg_gen_shri_tl)
+TRANS(srai_d, 64, gen_rri_c, EXT_NONE, EXT_NONE, tcg_gen_sari_tl)
+TRANS(rotri_w, 64, gen_rri_v, EXT_NONE, EXT_NONE, gen_rotr_w)
+TRANS(rotri_d, 64, gen_rri_c, EXT_NONE, EXT_NONE, tcg_gen_rotri_tl)
--- /dev/null
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * LoongArch vector translate functions
+ * Copyright (c) 2022-2023 Loongson Technology Corporation Limited
+ */
+
+static bool check_vec(DisasContext *ctx, uint32_t oprsz)
+{
+ if ((oprsz == 16) && ((ctx->base.tb->flags & HW_FLAGS_EUEN_SXE) == 0)) {
+ generate_exception(ctx, EXCCODE_SXD);
+ return false;
+ }
+
+ if ((oprsz == 32) && ((ctx->base.tb->flags & HW_FLAGS_EUEN_ASXE) == 0)) {
+ generate_exception(ctx, EXCCODE_ASXD);
+ return false;
+ }
+
+ return true;
+}
+
+static bool gen_vvvv_ptr_vl(DisasContext *ctx, arg_vvvv *a, uint32_t oprsz,
+ gen_helper_gvec_4_ptr *fn)
+{
+ if (!check_vec(ctx, oprsz)) {
+ return true;
+ }
+
+ tcg_gen_gvec_4_ptr(vec_full_offset(a->vd),
+ vec_full_offset(a->vj),
+ vec_full_offset(a->vk),
+ vec_full_offset(a->va),
+ tcg_env,
+ oprsz, ctx->vl / 8, 0, fn);
+ return true;
+}
+
+static bool gen_vvvv_ptr(DisasContext *ctx, arg_vvvv *a,
+ gen_helper_gvec_4_ptr *fn)
+{
+ return gen_vvvv_ptr_vl(ctx, a, 16, fn);
+}
+
+static bool gen_xxxx_ptr(DisasContext *ctx, arg_vvvv *a,
+ gen_helper_gvec_4_ptr *fn)
+{
+ return gen_vvvv_ptr_vl(ctx, a, 32, fn);
+}
+
+static bool gen_vvvv_vl(DisasContext *ctx, arg_vvvv *a, uint32_t oprsz,
+ gen_helper_gvec_4 *fn)
+{
+ if (!check_vec(ctx, oprsz)) {
+ return true;
+ }
+
+ tcg_gen_gvec_4_ool(vec_full_offset(a->vd),
+ vec_full_offset(a->vj),
+ vec_full_offset(a->vk),
+ vec_full_offset(a->va),
+ oprsz, ctx->vl / 8, 0, fn);
+ return true;
+}
+
+static bool gen_vvvv(DisasContext *ctx, arg_vvvv *a,
+ gen_helper_gvec_4 *fn)
+{
+ return gen_vvvv_vl(ctx, a, 16, fn);
+}
+
+static bool gen_xxxx(DisasContext *ctx, arg_vvvv *a,
+ gen_helper_gvec_4 *fn)
+{
+ return gen_vvvv_vl(ctx, a, 32, fn);
+}
+
+static bool gen_vvv_ptr_vl(DisasContext *ctx, arg_vvv *a, uint32_t oprsz,
+ gen_helper_gvec_3_ptr *fn)
+{
+ if (!check_vec(ctx, oprsz)) {
+ return true;
+ }
+ tcg_gen_gvec_3_ptr(vec_full_offset(a->vd),
+ vec_full_offset(a->vj),
+ vec_full_offset(a->vk),
+ tcg_env,
+ oprsz, ctx->vl / 8, 0, fn);
+ return true;
+}
+
+static bool gen_vvv_ptr(DisasContext *ctx, arg_vvv *a,
+ gen_helper_gvec_3_ptr *fn)
+{
+ return gen_vvv_ptr_vl(ctx, a, 16, fn);
+}
+
+static bool gen_xxx_ptr(DisasContext *ctx, arg_vvv *a,
+ gen_helper_gvec_3_ptr *fn)
+{
+ return gen_vvv_ptr_vl(ctx, a, 32, fn);
+}
+
+static bool gen_vvv_vl(DisasContext *ctx, arg_vvv *a, uint32_t oprsz,
+ gen_helper_gvec_3 *fn)
+{
+ if (!check_vec(ctx, oprsz)) {
+ return true;
+ }
+
+ tcg_gen_gvec_3_ool(vec_full_offset(a->vd),
+ vec_full_offset(a->vj),
+ vec_full_offset(a->vk),
+ oprsz, ctx->vl / 8, 0, fn);
+ return true;
+}
+
+static bool gen_vvv(DisasContext *ctx, arg_vvv *a, gen_helper_gvec_3 *fn)
+{
+ return gen_vvv_vl(ctx, a, 16, fn);
+}
+
+static bool gen_xxx(DisasContext *ctx, arg_vvv *a, gen_helper_gvec_3 *fn)
+{
+ return gen_vvv_vl(ctx, a, 32, fn);
+}
+
+static bool gen_vv_ptr_vl(DisasContext *ctx, arg_vv *a, uint32_t oprsz,
+ gen_helper_gvec_2_ptr *fn)
+{
+ if (!check_vec(ctx, oprsz)) {
+ return true;
+ }
+
+ tcg_gen_gvec_2_ptr(vec_full_offset(a->vd),
+ vec_full_offset(a->vj),
+ tcg_env,
+ oprsz, ctx->vl / 8, 0, fn);
+ return true;
+}
+
+static bool gen_vv_ptr(DisasContext *ctx, arg_vv *a,
+ gen_helper_gvec_2_ptr *fn)
+{
+ return gen_vv_ptr_vl(ctx, a, 16, fn);
+}
+
+static bool gen_xx_ptr(DisasContext *ctx, arg_vv *a,
+ gen_helper_gvec_2_ptr *fn)
+{
+ return gen_vv_ptr_vl(ctx, a, 32, fn);
+}
+
+static bool gen_vv_vl(DisasContext *ctx, arg_vv *a, uint32_t oprsz,
+ gen_helper_gvec_2 *fn)
+{
+ if (!check_vec(ctx, oprsz)) {
+ return true;
+ }
+
+ tcg_gen_gvec_2_ool(vec_full_offset(a->vd),
+ vec_full_offset(a->vj),
+ oprsz, ctx->vl / 8, 0, fn);
+ return true;
+}
+
+static bool gen_vv(DisasContext *ctx, arg_vv *a, gen_helper_gvec_2 *fn)
+{
+ return gen_vv_vl(ctx, a, 16, fn);
+}
+
+static bool gen_xx(DisasContext *ctx, arg_vv *a, gen_helper_gvec_2 *fn)
+{
+ return gen_vv_vl(ctx, a, 32, fn);
+}
+
+static bool gen_vv_i_vl(DisasContext *ctx, arg_vv_i *a, uint32_t oprsz,
+ gen_helper_gvec_2i *fn)
+{
+ if (!check_vec(ctx, oprsz)) {
+ return true;
+ }
+
+ tcg_gen_gvec_2i_ool(vec_full_offset(a->vd),
+ vec_full_offset(a->vj),
+ tcg_constant_i64(a->imm),
+ oprsz, ctx->vl / 8, 0, fn);
+ return true;
+}
+
+static bool gen_vv_i(DisasContext *ctx, arg_vv_i *a, gen_helper_gvec_2i *fn)
+{
+ return gen_vv_i_vl(ctx, a, 16, fn);
+}
+
+static bool gen_xx_i(DisasContext *ctx, arg_vv_i *a, gen_helper_gvec_2i *fn)
+{
+ return gen_vv_i_vl(ctx, a, 32, fn);
+}
+
+static bool gen_cv_vl(DisasContext *ctx, arg_cv *a, uint32_t sz,
+ void (*func)(TCGv_ptr, TCGv_i32, TCGv_i32, TCGv_i32))
+{
+ if (!check_vec(ctx, sz)) {
+ return true;
+ }
+
+ TCGv_i32 vj = tcg_constant_i32(a->vj);
+ TCGv_i32 cd = tcg_constant_i32(a->cd);
+ TCGv_i32 oprsz = tcg_constant_i32(sz);
+
+ func(tcg_env, oprsz, cd, vj);
+ return true;
+}
+
+static bool gen_cv(DisasContext *ctx, arg_cv *a,
+ void (*func)(TCGv_ptr, TCGv_i32, TCGv_i32, TCGv_i32))
+{
+ return gen_cv_vl(ctx, a, 16, func);
+}
+
+static bool gen_cx(DisasContext *ctx, arg_cv *a,
+ void (*func)(TCGv_ptr, TCGv_i32, TCGv_i32, TCGv_i32))
+{
+ return gen_cv_vl(ctx, a, 32, func);
+}
+
+static bool gvec_vvv_vl(DisasContext *ctx, arg_vvv *a,
+ uint32_t oprsz, MemOp mop,
+ void (*func)(unsigned, uint32_t, uint32_t,
+ uint32_t, uint32_t, uint32_t))
+{
+ uint32_t vd_ofs = vec_full_offset(a->vd);
+ uint32_t vj_ofs = vec_full_offset(a->vj);
+ uint32_t vk_ofs = vec_full_offset(a->vk);
+
+ if (!check_vec(ctx, oprsz)) {
+ return true;
+ }
+
+ func(mop, vd_ofs, vj_ofs, vk_ofs, oprsz, ctx->vl / 8);
+ return true;
+}
+
+static bool gvec_vvv(DisasContext *ctx, arg_vvv *a, MemOp mop,
+ void (*func)(unsigned, uint32_t, uint32_t,
+ uint32_t, uint32_t, uint32_t))
+{
+ return gvec_vvv_vl(ctx, a, 16, mop, func);
+}
+
+static bool gvec_xxx(DisasContext *ctx, arg_vvv *a, MemOp mop,
+ void (*func)(unsigned, uint32_t, uint32_t,
+ uint32_t, uint32_t, uint32_t))
+{
+ return gvec_vvv_vl(ctx, a, 32, mop, func);
+}
+
+static bool gvec_vv_vl(DisasContext *ctx, arg_vv *a,
+ uint32_t oprsz, MemOp mop,
+ void (*func)(unsigned, uint32_t, uint32_t,
+ uint32_t, uint32_t))
+{
+ uint32_t vd_ofs = vec_full_offset(a->vd);
+ uint32_t vj_ofs = vec_full_offset(a->vj);
+
+ if (!check_vec(ctx, oprsz)) {
+ return true;
+ }
+
+ func(mop, vd_ofs, vj_ofs, oprsz, ctx->vl / 8);
+ return true;
+}
+
+
+static bool gvec_vv(DisasContext *ctx, arg_vv *a, MemOp mop,
+ void (*func)(unsigned, uint32_t, uint32_t,
+ uint32_t, uint32_t))
+{
+ return gvec_vv_vl(ctx, a, 16, mop, func);
+}
+
+static bool gvec_xx(DisasContext *ctx, arg_vv *a, MemOp mop,
+ void (*func)(unsigned, uint32_t, uint32_t,
+ uint32_t, uint32_t))
+{
+ return gvec_vv_vl(ctx, a, 32, mop, func);
+}
+
+static bool gvec_vv_i_vl(DisasContext *ctx, arg_vv_i *a,
+ uint32_t oprsz, MemOp mop,
+ void (*func)(unsigned, uint32_t, uint32_t,
+ int64_t, uint32_t, uint32_t))
+{
+ uint32_t vd_ofs = vec_full_offset(a->vd);
+ uint32_t vj_ofs = vec_full_offset(a->vj);
+
+ if (!check_vec(ctx, oprsz)) {
+ return true;
+ }
+
+ func(mop, vd_ofs, vj_ofs, a->imm, oprsz, ctx->vl / 8);
+ return true;
+}
+
+static bool gvec_vv_i(DisasContext *ctx, arg_vv_i *a, MemOp mop,
+ void (*func)(unsigned, uint32_t, uint32_t,
+ int64_t, uint32_t, uint32_t))
+{
+ return gvec_vv_i_vl(ctx, a, 16, mop, func);
+}
+
+static bool gvec_xx_i(DisasContext *ctx, arg_vv_i *a, MemOp mop,
+ void (*func)(unsigned, uint32_t, uint32_t,
+ int64_t, uint32_t, uint32_t))
+{
+ return gvec_vv_i_vl(ctx,a, 32, mop, func);
+}
+
+static bool gvec_subi_vl(DisasContext *ctx, arg_vv_i *a,
+ uint32_t oprsz, MemOp mop)
+{
+ uint32_t vd_ofs = vec_full_offset(a->vd);
+ uint32_t vj_ofs = vec_full_offset(a->vj);
+
+ if (!check_vec(ctx, oprsz)) {
+ return true;
+ }
+
+ tcg_gen_gvec_addi(mop, vd_ofs, vj_ofs, -a->imm, oprsz, ctx->vl / 8);
+ return true;
+}
+
+static bool gvec_subi(DisasContext *ctx, arg_vv_i *a, MemOp mop)
+{
+ return gvec_subi_vl(ctx, a, 16, mop);
+}
+
+static bool gvec_xsubi(DisasContext *ctx, arg_vv_i *a, MemOp mop)
+{
+ return gvec_subi_vl(ctx, a, 32, mop);
+}
+
+TRANS(vadd_b, LSX, gvec_vvv, MO_8, tcg_gen_gvec_add)
+TRANS(vadd_h, LSX, gvec_vvv, MO_16, tcg_gen_gvec_add)
+TRANS(vadd_w, LSX, gvec_vvv, MO_32, tcg_gen_gvec_add)
+TRANS(vadd_d, LSX, gvec_vvv, MO_64, tcg_gen_gvec_add)
+TRANS(xvadd_b, LASX, gvec_xxx, MO_8, tcg_gen_gvec_add)
+TRANS(xvadd_h, LASX, gvec_xxx, MO_16, tcg_gen_gvec_add)
+TRANS(xvadd_w, LASX, gvec_xxx, MO_32, tcg_gen_gvec_add)
+TRANS(xvadd_d, LASX, gvec_xxx, MO_64, tcg_gen_gvec_add)
+
+static bool gen_vaddsub_q_vl(DisasContext *ctx, arg_vvv *a, uint32_t oprsz,
+ void (*func)(TCGv_i64, TCGv_i64, TCGv_i64,
+ TCGv_i64, TCGv_i64, TCGv_i64))
+{
+ int i;
+ TCGv_i64 rh, rl, ah, al, bh, bl;
+
+ if (!check_vec(ctx, oprsz)) {
+ return true;
+ }
+
+ rh = tcg_temp_new_i64();
+ rl = tcg_temp_new_i64();
+ ah = tcg_temp_new_i64();
+ al = tcg_temp_new_i64();
+ bh = tcg_temp_new_i64();
+ bl = tcg_temp_new_i64();
+
+ for (i = 0; i < oprsz / 16; i++) {
+ get_vreg64(ah, a->vj, 1 + i * 2);
+ get_vreg64(al, a->vj, i * 2);
+ get_vreg64(bh, a->vk, 1 + i * 2);
+ get_vreg64(bl, a->vk, i * 2);
+
+ func(rl, rh, al, ah, bl, bh);
+
+ set_vreg64(rh, a->vd, 1 + i * 2);
+ set_vreg64(rl, a->vd, i * 2);
+ }
+ return true;
+}
+
+static bool gen_vaddsub_q(DisasContext *ctx, arg_vvv *a,
+ void (*func)(TCGv_i64, TCGv_i64, TCGv_i64,
+ TCGv_i64, TCGv_i64, TCGv_i64))
+{
+ return gen_vaddsub_q_vl(ctx, a, 16, func);
+}
+
+static bool gen_xvaddsub_q(DisasContext *ctx, arg_vvv *a,
+ void (*func)(TCGv_i64, TCGv_i64, TCGv_i64,
+ TCGv_i64, TCGv_i64, TCGv_i64))
+{
+ return gen_vaddsub_q_vl(ctx, a, 32, func);
+}
+
+TRANS(vsub_b, LSX, gvec_vvv, MO_8, tcg_gen_gvec_sub)
+TRANS(vsub_h, LSX, gvec_vvv, MO_16, tcg_gen_gvec_sub)
+TRANS(vsub_w, LSX, gvec_vvv, MO_32, tcg_gen_gvec_sub)
+TRANS(vsub_d, LSX, gvec_vvv, MO_64, tcg_gen_gvec_sub)
+TRANS(xvsub_b, LASX, gvec_xxx, MO_8, tcg_gen_gvec_sub)
+TRANS(xvsub_h, LASX, gvec_xxx, MO_16, tcg_gen_gvec_sub)
+TRANS(xvsub_w, LASX, gvec_xxx, MO_32, tcg_gen_gvec_sub)
+TRANS(xvsub_d, LASX, gvec_xxx, MO_64, tcg_gen_gvec_sub)
+
+TRANS(vadd_q, LSX, gen_vaddsub_q, tcg_gen_add2_i64)
+TRANS(vsub_q, LSX, gen_vaddsub_q, tcg_gen_sub2_i64)
+TRANS(xvadd_q, LASX, gen_xvaddsub_q, tcg_gen_add2_i64)
+TRANS(xvsub_q, LASX, gen_xvaddsub_q, tcg_gen_sub2_i64)
+
+TRANS(vaddi_bu, LSX, gvec_vv_i, MO_8, tcg_gen_gvec_addi)
+TRANS(vaddi_hu, LSX, gvec_vv_i, MO_16, tcg_gen_gvec_addi)
+TRANS(vaddi_wu, LSX, gvec_vv_i, MO_32, tcg_gen_gvec_addi)
+TRANS(vaddi_du, LSX, gvec_vv_i, MO_64, tcg_gen_gvec_addi)
+TRANS(vsubi_bu, LSX, gvec_subi, MO_8)
+TRANS(vsubi_hu, LSX, gvec_subi, MO_16)
+TRANS(vsubi_wu, LSX, gvec_subi, MO_32)
+TRANS(vsubi_du, LSX, gvec_subi, MO_64)
+TRANS(xvaddi_bu, LASX, gvec_xx_i, MO_8, tcg_gen_gvec_addi)
+TRANS(xvaddi_hu, LASX, gvec_xx_i, MO_16, tcg_gen_gvec_addi)
+TRANS(xvaddi_wu, LASX, gvec_xx_i, MO_32, tcg_gen_gvec_addi)
+TRANS(xvaddi_du, LASX, gvec_xx_i, MO_64, tcg_gen_gvec_addi)
+TRANS(xvsubi_bu, LASX, gvec_xsubi, MO_8)
+TRANS(xvsubi_hu, LASX, gvec_xsubi, MO_16)
+TRANS(xvsubi_wu, LASX, gvec_xsubi, MO_32)
+TRANS(xvsubi_du, LASX, gvec_xsubi, MO_64)
+
+TRANS(vneg_b, LSX, gvec_vv, MO_8, tcg_gen_gvec_neg)
+TRANS(vneg_h, LSX, gvec_vv, MO_16, tcg_gen_gvec_neg)
+TRANS(vneg_w, LSX, gvec_vv, MO_32, tcg_gen_gvec_neg)
+TRANS(vneg_d, LSX, gvec_vv, MO_64, tcg_gen_gvec_neg)
+TRANS(xvneg_b, LASX, gvec_xx, MO_8, tcg_gen_gvec_neg)
+TRANS(xvneg_h, LASX, gvec_xx, MO_16, tcg_gen_gvec_neg)
+TRANS(xvneg_w, LASX, gvec_xx, MO_32, tcg_gen_gvec_neg)
+TRANS(xvneg_d, LASX, gvec_xx, MO_64, tcg_gen_gvec_neg)
+
+TRANS(vsadd_b, LSX, gvec_vvv, MO_8, tcg_gen_gvec_ssadd)
+TRANS(vsadd_h, LSX, gvec_vvv, MO_16, tcg_gen_gvec_ssadd)
+TRANS(vsadd_w, LSX, gvec_vvv, MO_32, tcg_gen_gvec_ssadd)
+TRANS(vsadd_d, LSX, gvec_vvv, MO_64, tcg_gen_gvec_ssadd)
+TRANS(vsadd_bu, LSX, gvec_vvv, MO_8, tcg_gen_gvec_usadd)
+TRANS(vsadd_hu, LSX, gvec_vvv, MO_16, tcg_gen_gvec_usadd)
+TRANS(vsadd_wu, LSX, gvec_vvv, MO_32, tcg_gen_gvec_usadd)
+TRANS(vsadd_du, LSX, gvec_vvv, MO_64, tcg_gen_gvec_usadd)
+TRANS(vssub_b, LSX, gvec_vvv, MO_8, tcg_gen_gvec_sssub)
+TRANS(vssub_h, LSX, gvec_vvv, MO_16, tcg_gen_gvec_sssub)
+TRANS(vssub_w, LSX, gvec_vvv, MO_32, tcg_gen_gvec_sssub)
+TRANS(vssub_d, LSX, gvec_vvv, MO_64, tcg_gen_gvec_sssub)
+TRANS(vssub_bu, LSX, gvec_vvv, MO_8, tcg_gen_gvec_ussub)
+TRANS(vssub_hu, LSX, gvec_vvv, MO_16, tcg_gen_gvec_ussub)
+TRANS(vssub_wu, LSX, gvec_vvv, MO_32, tcg_gen_gvec_ussub)
+TRANS(vssub_du, LSX, gvec_vvv, MO_64, tcg_gen_gvec_ussub)
+
+TRANS(xvsadd_b, LASX, gvec_xxx, MO_8, tcg_gen_gvec_ssadd)
+TRANS(xvsadd_h, LASX, gvec_xxx, MO_16, tcg_gen_gvec_ssadd)
+TRANS(xvsadd_w, LASX, gvec_xxx, MO_32, tcg_gen_gvec_ssadd)
+TRANS(xvsadd_d, LASX, gvec_xxx, MO_64, tcg_gen_gvec_ssadd)
+TRANS(xvsadd_bu, LASX, gvec_xxx, MO_8, tcg_gen_gvec_usadd)
+TRANS(xvsadd_hu, LASX, gvec_xxx, MO_16, tcg_gen_gvec_usadd)
+TRANS(xvsadd_wu, LASX, gvec_xxx, MO_32, tcg_gen_gvec_usadd)
+TRANS(xvsadd_du, LASX, gvec_xxx, MO_64, tcg_gen_gvec_usadd)
+TRANS(xvssub_b, LASX, gvec_xxx, MO_8, tcg_gen_gvec_sssub)
+TRANS(xvssub_h, LASX, gvec_xxx, MO_16, tcg_gen_gvec_sssub)
+TRANS(xvssub_w, LASX, gvec_xxx, MO_32, tcg_gen_gvec_sssub)
+TRANS(xvssub_d, LASX, gvec_xxx, MO_64, tcg_gen_gvec_sssub)
+TRANS(xvssub_bu, LASX, gvec_xxx, MO_8, tcg_gen_gvec_ussub)
+TRANS(xvssub_hu, LASX, gvec_xxx, MO_16, tcg_gen_gvec_ussub)
+TRANS(xvssub_wu, LASX, gvec_xxx, MO_32, tcg_gen_gvec_ussub)
+TRANS(xvssub_du, LASX, gvec_xxx, MO_64, tcg_gen_gvec_ussub)
+
+TRANS(vhaddw_h_b, LSX, gen_vvv, gen_helper_vhaddw_h_b)
+TRANS(vhaddw_w_h, LSX, gen_vvv, gen_helper_vhaddw_w_h)
+TRANS(vhaddw_d_w, LSX, gen_vvv, gen_helper_vhaddw_d_w)
+TRANS(vhaddw_q_d, LSX, gen_vvv, gen_helper_vhaddw_q_d)
+TRANS(vhaddw_hu_bu, LSX, gen_vvv, gen_helper_vhaddw_hu_bu)
+TRANS(vhaddw_wu_hu, LSX, gen_vvv, gen_helper_vhaddw_wu_hu)
+TRANS(vhaddw_du_wu, LSX, gen_vvv, gen_helper_vhaddw_du_wu)
+TRANS(vhaddw_qu_du, LSX, gen_vvv, gen_helper_vhaddw_qu_du)
+TRANS(vhsubw_h_b, LSX, gen_vvv, gen_helper_vhsubw_h_b)
+TRANS(vhsubw_w_h, LSX, gen_vvv, gen_helper_vhsubw_w_h)
+TRANS(vhsubw_d_w, LSX, gen_vvv, gen_helper_vhsubw_d_w)
+TRANS(vhsubw_q_d, LSX, gen_vvv, gen_helper_vhsubw_q_d)
+TRANS(vhsubw_hu_bu, LSX, gen_vvv, gen_helper_vhsubw_hu_bu)
+TRANS(vhsubw_wu_hu, LSX, gen_vvv, gen_helper_vhsubw_wu_hu)
+TRANS(vhsubw_du_wu, LSX, gen_vvv, gen_helper_vhsubw_du_wu)
+TRANS(vhsubw_qu_du, LSX, gen_vvv, gen_helper_vhsubw_qu_du)
+
+TRANS(xvhaddw_h_b, LASX, gen_xxx, gen_helper_vhaddw_h_b)
+TRANS(xvhaddw_w_h, LASX, gen_xxx, gen_helper_vhaddw_w_h)
+TRANS(xvhaddw_d_w, LASX, gen_xxx, gen_helper_vhaddw_d_w)
+TRANS(xvhaddw_q_d, LASX, gen_xxx, gen_helper_vhaddw_q_d)
+TRANS(xvhaddw_hu_bu, LASX, gen_xxx, gen_helper_vhaddw_hu_bu)
+TRANS(xvhaddw_wu_hu, LASX, gen_xxx, gen_helper_vhaddw_wu_hu)
+TRANS(xvhaddw_du_wu, LASX, gen_xxx, gen_helper_vhaddw_du_wu)
+TRANS(xvhaddw_qu_du, LASX, gen_xxx, gen_helper_vhaddw_qu_du)
+TRANS(xvhsubw_h_b, LASX, gen_xxx, gen_helper_vhsubw_h_b)
+TRANS(xvhsubw_w_h, LASX, gen_xxx, gen_helper_vhsubw_w_h)
+TRANS(xvhsubw_d_w, LASX, gen_xxx, gen_helper_vhsubw_d_w)
+TRANS(xvhsubw_q_d, LASX, gen_xxx, gen_helper_vhsubw_q_d)
+TRANS(xvhsubw_hu_bu, LASX, gen_xxx, gen_helper_vhsubw_hu_bu)
+TRANS(xvhsubw_wu_hu, LASX, gen_xxx, gen_helper_vhsubw_wu_hu)
+TRANS(xvhsubw_du_wu, LASX, gen_xxx, gen_helper_vhsubw_du_wu)
+TRANS(xvhsubw_qu_du, LASX, gen_xxx, gen_helper_vhsubw_qu_du)
+
+static void gen_vaddwev_s(unsigned vece, TCGv_vec t, TCGv_vec a, TCGv_vec b)
+{
+ TCGv_vec t1, t2;
+
+ int halfbits = 4 << vece;
+
+ t1 = tcg_temp_new_vec_matching(a);
+ t2 = tcg_temp_new_vec_matching(b);
+
+ /* Sign-extend the even elements from a */
+ tcg_gen_shli_vec(vece, t1, a, halfbits);
+ tcg_gen_sari_vec(vece, t1, t1, halfbits);
+
+ /* Sign-extend the even elements from b */
+ tcg_gen_shli_vec(vece, t2, b, halfbits);
+ tcg_gen_sari_vec(vece, t2, t2, halfbits);
+
+ tcg_gen_add_vec(vece, t, t1, t2);
+}
+
+static void gen_vaddwev_w_h(TCGv_i32 t, TCGv_i32 a, TCGv_i32 b)
+{
+ TCGv_i32 t1, t2;
+
+ t1 = tcg_temp_new_i32();
+ t2 = tcg_temp_new_i32();
+ tcg_gen_ext16s_i32(t1, a);
+ tcg_gen_ext16s_i32(t2, b);
+ tcg_gen_add_i32(t, t1, t2);
+}
+
+static void gen_vaddwev_d_w(TCGv_i64 t, TCGv_i64 a, TCGv_i64 b)
+{
+ TCGv_i64 t1, t2;
+
+ t1 = tcg_temp_new_i64();
+ t2 = tcg_temp_new_i64();
+ tcg_gen_ext32s_i64(t1, a);
+ tcg_gen_ext32s_i64(t2, b);
+ tcg_gen_add_i64(t, t1, t2);
+}
+
+static void do_vaddwev_s(unsigned vece, uint32_t vd_ofs, uint32_t vj_ofs,
+ uint32_t vk_ofs, uint32_t oprsz, uint32_t maxsz)
+{
+ static const TCGOpcode vecop_list[] = {
+ INDEX_op_shli_vec, INDEX_op_sari_vec, INDEX_op_add_vec, 0
+ };
+ static const GVecGen3 op[4] = {
+ {
+ .fniv = gen_vaddwev_s,
+ .fno = gen_helper_vaddwev_h_b,
+ .opt_opc = vecop_list,
+ .vece = MO_16
+ },
+ {
+ .fni4 = gen_vaddwev_w_h,
+ .fniv = gen_vaddwev_s,
+ .fno = gen_helper_vaddwev_w_h,
+ .opt_opc = vecop_list,
+ .vece = MO_32
+ },
+ {
+ .fni8 = gen_vaddwev_d_w,
+ .fniv = gen_vaddwev_s,
+ .fno = gen_helper_vaddwev_d_w,
+ .opt_opc = vecop_list,
+ .vece = MO_64
+ },
+ {
+ .fno = gen_helper_vaddwev_q_d,
+ .vece = MO_128
+ },
+ };
+
+ tcg_gen_gvec_3(vd_ofs, vj_ofs, vk_ofs, oprsz, maxsz, &op[vece]);
+}
+
+TRANS(vaddwev_h_b, LSX, gvec_vvv, MO_8, do_vaddwev_s)
+TRANS(vaddwev_w_h, LSX, gvec_vvv, MO_16, do_vaddwev_s)
+TRANS(vaddwev_d_w, LSX, gvec_vvv, MO_32, do_vaddwev_s)
+TRANS(vaddwev_q_d, LSX, gvec_vvv, MO_64, do_vaddwev_s)
+TRANS(xvaddwev_h_b, LASX, gvec_xxx, MO_8, do_vaddwev_s)
+TRANS(xvaddwev_w_h, LASX, gvec_xxx, MO_16, do_vaddwev_s)
+TRANS(xvaddwev_d_w, LASX, gvec_xxx, MO_32, do_vaddwev_s)
+TRANS(xvaddwev_q_d, LASX, gvec_xxx, MO_64, do_vaddwev_s)
+
+static void gen_vaddwod_w_h(TCGv_i32 t, TCGv_i32 a, TCGv_i32 b)
+{
+ TCGv_i32 t1, t2;
+
+ t1 = tcg_temp_new_i32();
+ t2 = tcg_temp_new_i32();
+ tcg_gen_sari_i32(t1, a, 16);
+ tcg_gen_sari_i32(t2, b, 16);
+ tcg_gen_add_i32(t, t1, t2);
+}
+
+static void gen_vaddwod_d_w(TCGv_i64 t, TCGv_i64 a, TCGv_i64 b)
+{
+ TCGv_i64 t1, t2;
+
+ t1 = tcg_temp_new_i64();
+ t2 = tcg_temp_new_i64();
+ tcg_gen_sari_i64(t1, a, 32);
+ tcg_gen_sari_i64(t2, b, 32);
+ tcg_gen_add_i64(t, t1, t2);
+}
+
+static void gen_vaddwod_s(unsigned vece, TCGv_vec t, TCGv_vec a, TCGv_vec b)
+{
+ TCGv_vec t1, t2;
+
+ int halfbits = 4 << vece;
+
+ t1 = tcg_temp_new_vec_matching(a);
+ t2 = tcg_temp_new_vec_matching(b);
+
+ /* Sign-extend the odd elements for vector */
+ tcg_gen_sari_vec(vece, t1, a, halfbits);
+ tcg_gen_sari_vec(vece, t2, b, halfbits);
+
+ tcg_gen_add_vec(vece, t, t1, t2);
+}
+
+static void do_vaddwod_s(unsigned vece, uint32_t vd_ofs, uint32_t vj_ofs,
+ uint32_t vk_ofs, uint32_t oprsz, uint32_t maxsz)
+{
+ static const TCGOpcode vecop_list[] = {
+ INDEX_op_sari_vec, INDEX_op_add_vec, 0
+ };
+ static const GVecGen3 op[4] = {
+ {
+ .fniv = gen_vaddwod_s,
+ .fno = gen_helper_vaddwod_h_b,
+ .opt_opc = vecop_list,
+ .vece = MO_16
+ },
+ {
+ .fni4 = gen_vaddwod_w_h,
+ .fniv = gen_vaddwod_s,
+ .fno = gen_helper_vaddwod_w_h,
+ .opt_opc = vecop_list,
+ .vece = MO_32
+ },
+ {
+ .fni8 = gen_vaddwod_d_w,
+ .fniv = gen_vaddwod_s,
+ .fno = gen_helper_vaddwod_d_w,
+ .opt_opc = vecop_list,
+ .vece = MO_64
+ },
+ {
+ .fno = gen_helper_vaddwod_q_d,
+ .vece = MO_128
+ },
+ };
+
+ tcg_gen_gvec_3(vd_ofs, vj_ofs, vk_ofs, oprsz, maxsz, &op[vece]);
+}
+
+TRANS(vaddwod_h_b, LSX, gvec_vvv, MO_8, do_vaddwod_s)
+TRANS(vaddwod_w_h, LSX, gvec_vvv, MO_16, do_vaddwod_s)
+TRANS(vaddwod_d_w, LSX, gvec_vvv, MO_32, do_vaddwod_s)
+TRANS(vaddwod_q_d, LSX, gvec_vvv, MO_64, do_vaddwod_s)
+TRANS(xvaddwod_h_b, LASX, gvec_xxx, MO_8, do_vaddwod_s)
+TRANS(xvaddwod_w_h, LASX, gvec_xxx, MO_16, do_vaddwod_s)
+TRANS(xvaddwod_d_w, LASX, gvec_xxx, MO_32, do_vaddwod_s)
+TRANS(xvaddwod_q_d, LASX, gvec_xxx, MO_64, do_vaddwod_s)
+
+
+static void gen_vsubwev_s(unsigned vece, TCGv_vec t, TCGv_vec a, TCGv_vec b)
+{
+ TCGv_vec t1, t2;
+
+ int halfbits = 4 << vece;
+
+ t1 = tcg_temp_new_vec_matching(a);
+ t2 = tcg_temp_new_vec_matching(b);
+
+ /* Sign-extend the even elements from a */
+ tcg_gen_shli_vec(vece, t1, a, halfbits);
+ tcg_gen_sari_vec(vece, t1, t1, halfbits);
+
+ /* Sign-extend the even elements from b */
+ tcg_gen_shli_vec(vece, t2, b, halfbits);
+ tcg_gen_sari_vec(vece, t2, t2, halfbits);
+
+ tcg_gen_sub_vec(vece, t, t1, t2);
+}
+
+static void gen_vsubwev_w_h(TCGv_i32 t, TCGv_i32 a, TCGv_i32 b)
+{
+ TCGv_i32 t1, t2;
+
+ t1 = tcg_temp_new_i32();
+ t2 = tcg_temp_new_i32();
+ tcg_gen_ext16s_i32(t1, a);
+ tcg_gen_ext16s_i32(t2, b);
+ tcg_gen_sub_i32(t, t1, t2);
+}
+
+static void gen_vsubwev_d_w(TCGv_i64 t, TCGv_i64 a, TCGv_i64 b)
+{
+ TCGv_i64 t1, t2;
+
+ t1 = tcg_temp_new_i64();
+ t2 = tcg_temp_new_i64();
+ tcg_gen_ext32s_i64(t1, a);
+ tcg_gen_ext32s_i64(t2, b);
+ tcg_gen_sub_i64(t, t1, t2);
+}
+
+static void do_vsubwev_s(unsigned vece, uint32_t vd_ofs, uint32_t vj_ofs,
+ uint32_t vk_ofs, uint32_t oprsz, uint32_t maxsz)
+{
+ static const TCGOpcode vecop_list[] = {
+ INDEX_op_shli_vec, INDEX_op_sari_vec, INDEX_op_sub_vec, 0
+ };
+ static const GVecGen3 op[4] = {
+ {
+ .fniv = gen_vsubwev_s,
+ .fno = gen_helper_vsubwev_h_b,
+ .opt_opc = vecop_list,
+ .vece = MO_16
+ },
+ {
+ .fni4 = gen_vsubwev_w_h,
+ .fniv = gen_vsubwev_s,
+ .fno = gen_helper_vsubwev_w_h,
+ .opt_opc = vecop_list,
+ .vece = MO_32
+ },
+ {
+ .fni8 = gen_vsubwev_d_w,
+ .fniv = gen_vsubwev_s,
+ .fno = gen_helper_vsubwev_d_w,
+ .opt_opc = vecop_list,
+ .vece = MO_64
+ },
+ {
+ .fno = gen_helper_vsubwev_q_d,
+ .vece = MO_128
+ },
+ };
+
+ tcg_gen_gvec_3(vd_ofs, vj_ofs, vk_ofs, oprsz, maxsz, &op[vece]);
+}
+
+TRANS(vsubwev_h_b, LSX, gvec_vvv, MO_8, do_vsubwev_s)
+TRANS(vsubwev_w_h, LSX, gvec_vvv, MO_16, do_vsubwev_s)
+TRANS(vsubwev_d_w, LSX, gvec_vvv, MO_32, do_vsubwev_s)
+TRANS(vsubwev_q_d, LSX, gvec_vvv, MO_64, do_vsubwev_s)
+TRANS(xvsubwev_h_b, LASX, gvec_xxx, MO_8, do_vsubwev_s)
+TRANS(xvsubwev_w_h, LASX, gvec_xxx, MO_16, do_vsubwev_s)
+TRANS(xvsubwev_d_w, LASX, gvec_xxx, MO_32, do_vsubwev_s)
+TRANS(xvsubwev_q_d, LASX, gvec_xxx, MO_64, do_vsubwev_s)
+
+static void gen_vsubwod_s(unsigned vece, TCGv_vec t, TCGv_vec a, TCGv_vec b)
+{
+ TCGv_vec t1, t2;
+
+ int halfbits = 4 << vece;
+
+ t1 = tcg_temp_new_vec_matching(a);
+ t2 = tcg_temp_new_vec_matching(b);
+
+ /* Sign-extend the odd elements for vector */
+ tcg_gen_sari_vec(vece, t1, a, halfbits);
+ tcg_gen_sari_vec(vece, t2, b, halfbits);
+
+ tcg_gen_sub_vec(vece, t, t1, t2);
+}
+
+static void gen_vsubwod_w_h(TCGv_i32 t, TCGv_i32 a, TCGv_i32 b)
+{
+ TCGv_i32 t1, t2;
+
+ t1 = tcg_temp_new_i32();
+ t2 = tcg_temp_new_i32();
+ tcg_gen_sari_i32(t1, a, 16);
+ tcg_gen_sari_i32(t2, b, 16);
+ tcg_gen_sub_i32(t, t1, t2);
+}
+
+static void gen_vsubwod_d_w(TCGv_i64 t, TCGv_i64 a, TCGv_i64 b)
+{
+ TCGv_i64 t1, t2;
+
+ t1 = tcg_temp_new_i64();
+ t2 = tcg_temp_new_i64();
+ tcg_gen_sari_i64(t1, a, 32);
+ tcg_gen_sari_i64(t2, b, 32);
+ tcg_gen_sub_i64(t, t1, t2);
+}
+
+static void do_vsubwod_s(unsigned vece, uint32_t vd_ofs, uint32_t vj_ofs,
+ uint32_t vk_ofs, uint32_t oprsz, uint32_t maxsz)
+{
+ static const TCGOpcode vecop_list[] = {
+ INDEX_op_sari_vec, INDEX_op_sub_vec, 0
+ };
+ static const GVecGen3 op[4] = {
+ {
+ .fniv = gen_vsubwod_s,
+ .fno = gen_helper_vsubwod_h_b,
+ .opt_opc = vecop_list,
+ .vece = MO_16
+ },
+ {
+ .fni4 = gen_vsubwod_w_h,
+ .fniv = gen_vsubwod_s,
+ .fno = gen_helper_vsubwod_w_h,
+ .opt_opc = vecop_list,
+ .vece = MO_32
+ },
+ {
+ .fni8 = gen_vsubwod_d_w,
+ .fniv = gen_vsubwod_s,
+ .fno = gen_helper_vsubwod_d_w,
+ .opt_opc = vecop_list,
+ .vece = MO_64
+ },
+ {
+ .fno = gen_helper_vsubwod_q_d,
+ .vece = MO_128
+ },
+ };
+
+ tcg_gen_gvec_3(vd_ofs, vj_ofs, vk_ofs, oprsz, maxsz, &op[vece]);
+}
+
+TRANS(vsubwod_h_b, LSX, gvec_vvv, MO_8, do_vsubwod_s)
+TRANS(vsubwod_w_h, LSX, gvec_vvv, MO_16, do_vsubwod_s)
+TRANS(vsubwod_d_w, LSX, gvec_vvv, MO_32, do_vsubwod_s)
+TRANS(vsubwod_q_d, LSX, gvec_vvv, MO_64, do_vsubwod_s)
+TRANS(xvsubwod_h_b, LASX, gvec_xxx, MO_8, do_vsubwod_s)
+TRANS(xvsubwod_w_h, LASX, gvec_xxx, MO_16, do_vsubwod_s)
+TRANS(xvsubwod_d_w, LASX, gvec_xxx, MO_32, do_vsubwod_s)
+TRANS(xvsubwod_q_d, LASX, gvec_xxx, MO_64, do_vsubwod_s)
+
+static void gen_vaddwev_u(unsigned vece, TCGv_vec t, TCGv_vec a, TCGv_vec b)
+{
+ TCGv_vec t1, t2, t3;
+
+ t1 = tcg_temp_new_vec_matching(a);
+ t2 = tcg_temp_new_vec_matching(b);
+ t3 = tcg_constant_vec_matching(t, vece, MAKE_64BIT_MASK(0, 4 << vece));
+ tcg_gen_and_vec(vece, t1, a, t3);
+ tcg_gen_and_vec(vece, t2, b, t3);
+ tcg_gen_add_vec(vece, t, t1, t2);
+}
+
+static void gen_vaddwev_w_hu(TCGv_i32 t, TCGv_i32 a, TCGv_i32 b)
+{
+ TCGv_i32 t1, t2;
+
+ t1 = tcg_temp_new_i32();
+ t2 = tcg_temp_new_i32();
+ tcg_gen_ext16u_i32(t1, a);
+ tcg_gen_ext16u_i32(t2, b);
+ tcg_gen_add_i32(t, t1, t2);
+}
+
+static void gen_vaddwev_d_wu(TCGv_i64 t, TCGv_i64 a, TCGv_i64 b)
+{
+ TCGv_i64 t1, t2;
+
+ t1 = tcg_temp_new_i64();
+ t2 = tcg_temp_new_i64();
+ tcg_gen_ext32u_i64(t1, a);
+ tcg_gen_ext32u_i64(t2, b);
+ tcg_gen_add_i64(t, t1, t2);
+}
+
+static void do_vaddwev_u(unsigned vece, uint32_t vd_ofs, uint32_t vj_ofs,
+ uint32_t vk_ofs, uint32_t oprsz, uint32_t maxsz)
+{
+ static const TCGOpcode vecop_list[] = {
+ INDEX_op_add_vec, 0
+ };
+ static const GVecGen3 op[4] = {
+ {
+ .fniv = gen_vaddwev_u,
+ .fno = gen_helper_vaddwev_h_bu,
+ .opt_opc = vecop_list,
+ .vece = MO_16
+ },
+ {
+ .fni4 = gen_vaddwev_w_hu,
+ .fniv = gen_vaddwev_u,
+ .fno = gen_helper_vaddwev_w_hu,
+ .opt_opc = vecop_list,
+ .vece = MO_32
+ },
+ {
+ .fni8 = gen_vaddwev_d_wu,
+ .fniv = gen_vaddwev_u,
+ .fno = gen_helper_vaddwev_d_wu,
+ .opt_opc = vecop_list,
+ .vece = MO_64
+ },
+ {
+ .fno = gen_helper_vaddwev_q_du,
+ .vece = MO_128
+ },
+ };
+
+ tcg_gen_gvec_3(vd_ofs, vj_ofs, vk_ofs, oprsz, maxsz, &op[vece]);
+}
+
+TRANS(vaddwev_h_bu, LSX, gvec_vvv, MO_8, do_vaddwev_u)
+TRANS(vaddwev_w_hu, LSX, gvec_vvv, MO_16, do_vaddwev_u)
+TRANS(vaddwev_d_wu, LSX, gvec_vvv, MO_32, do_vaddwev_u)
+TRANS(vaddwev_q_du, LSX, gvec_vvv, MO_64, do_vaddwev_u)
+TRANS(xvaddwev_h_bu, LASX, gvec_xxx, MO_8, do_vaddwev_u)
+TRANS(xvaddwev_w_hu, LASX, gvec_xxx, MO_16, do_vaddwev_u)
+TRANS(xvaddwev_d_wu, LASX, gvec_xxx, MO_32, do_vaddwev_u)
+TRANS(xvaddwev_q_du, LASX, gvec_xxx, MO_64, do_vaddwev_u)
+
+static void gen_vaddwod_u(unsigned vece, TCGv_vec t, TCGv_vec a, TCGv_vec b)
+{
+ TCGv_vec t1, t2;
+
+ int halfbits = 4 << vece;
+
+ t1 = tcg_temp_new_vec_matching(a);
+ t2 = tcg_temp_new_vec_matching(b);
+
+ /* Zero-extend the odd elements for vector */
+ tcg_gen_shri_vec(vece, t1, a, halfbits);
+ tcg_gen_shri_vec(vece, t2, b, halfbits);
+
+ tcg_gen_add_vec(vece, t, t1, t2);
+}
+
+static void gen_vaddwod_w_hu(TCGv_i32 t, TCGv_i32 a, TCGv_i32 b)
+{
+ TCGv_i32 t1, t2;
+
+ t1 = tcg_temp_new_i32();
+ t2 = tcg_temp_new_i32();
+ tcg_gen_shri_i32(t1, a, 16);
+ tcg_gen_shri_i32(t2, b, 16);
+ tcg_gen_add_i32(t, t1, t2);
+}
+
+static void gen_vaddwod_d_wu(TCGv_i64 t, TCGv_i64 a, TCGv_i64 b)
+{
+ TCGv_i64 t1, t2;
+
+ t1 = tcg_temp_new_i64();
+ t2 = tcg_temp_new_i64();
+ tcg_gen_shri_i64(t1, a, 32);
+ tcg_gen_shri_i64(t2, b, 32);
+ tcg_gen_add_i64(t, t1, t2);
+}
+
+static void do_vaddwod_u(unsigned vece, uint32_t vd_ofs, uint32_t vj_ofs,
+ uint32_t vk_ofs, uint32_t oprsz, uint32_t maxsz)
+{
+ static const TCGOpcode vecop_list[] = {
+ INDEX_op_shri_vec, INDEX_op_add_vec, 0
+ };
+ static const GVecGen3 op[4] = {
+ {
+ .fniv = gen_vaddwod_u,
+ .fno = gen_helper_vaddwod_h_bu,
+ .opt_opc = vecop_list,
+ .vece = MO_16
+ },
+ {
+ .fni4 = gen_vaddwod_w_hu,
+ .fniv = gen_vaddwod_u,
+ .fno = gen_helper_vaddwod_w_hu,
+ .opt_opc = vecop_list,
+ .vece = MO_32
+ },
+ {
+ .fni8 = gen_vaddwod_d_wu,
+ .fniv = gen_vaddwod_u,
+ .fno = gen_helper_vaddwod_d_wu,
+ .opt_opc = vecop_list,
+ .vece = MO_64
+ },
+ {
+ .fno = gen_helper_vaddwod_q_du,
+ .vece = MO_128
+ },
+ };
+
+ tcg_gen_gvec_3(vd_ofs, vj_ofs, vk_ofs, oprsz, maxsz, &op[vece]);
+}
+
+TRANS(vaddwod_h_bu, LSX, gvec_vvv, MO_8, do_vaddwod_u)
+TRANS(vaddwod_w_hu, LSX, gvec_vvv, MO_16, do_vaddwod_u)
+TRANS(vaddwod_d_wu, LSX, gvec_vvv, MO_32, do_vaddwod_u)
+TRANS(vaddwod_q_du, LSX, gvec_vvv, MO_64, do_vaddwod_u)
+TRANS(xvaddwod_h_bu, LASX, gvec_xxx, MO_8, do_vaddwod_u)
+TRANS(xvaddwod_w_hu, LASX, gvec_xxx, MO_16, do_vaddwod_u)
+TRANS(xvaddwod_d_wu, LASX, gvec_xxx, MO_32, do_vaddwod_u)
+TRANS(xvaddwod_q_du, LASX, gvec_xxx, MO_64, do_vaddwod_u)
+
+static void gen_vsubwev_u(unsigned vece, TCGv_vec t, TCGv_vec a, TCGv_vec b)
+{
+ TCGv_vec t1, t2, t3;
+
+ t1 = tcg_temp_new_vec_matching(a);
+ t2 = tcg_temp_new_vec_matching(b);
+ t3 = tcg_constant_vec_matching(t, vece, MAKE_64BIT_MASK(0, 4 << vece));
+ tcg_gen_and_vec(vece, t1, a, t3);
+ tcg_gen_and_vec(vece, t2, b, t3);
+ tcg_gen_sub_vec(vece, t, t1, t2);
+}
+
+static void gen_vsubwev_w_hu(TCGv_i32 t, TCGv_i32 a, TCGv_i32 b)
+{
+ TCGv_i32 t1, t2;
+
+ t1 = tcg_temp_new_i32();
+ t2 = tcg_temp_new_i32();
+ tcg_gen_ext16u_i32(t1, a);
+ tcg_gen_ext16u_i32(t2, b);
+ tcg_gen_sub_i32(t, t1, t2);
+}
+
+static void gen_vsubwev_d_wu(TCGv_i64 t, TCGv_i64 a, TCGv_i64 b)
+{
+ TCGv_i64 t1, t2;
+
+ t1 = tcg_temp_new_i64();
+ t2 = tcg_temp_new_i64();
+ tcg_gen_ext32u_i64(t1, a);
+ tcg_gen_ext32u_i64(t2, b);
+ tcg_gen_sub_i64(t, t1, t2);
+}
+
+static void do_vsubwev_u(unsigned vece, uint32_t vd_ofs, uint32_t vj_ofs,
+ uint32_t vk_ofs, uint32_t oprsz, uint32_t maxsz)
+{
+ static const TCGOpcode vecop_list[] = {
+ INDEX_op_sub_vec, 0
+ };
+ static const GVecGen3 op[4] = {
+ {
+ .fniv = gen_vsubwev_u,
+ .fno = gen_helper_vsubwev_h_bu,
+ .opt_opc = vecop_list,
+ .vece = MO_16
+ },
+ {
+ .fni4 = gen_vsubwev_w_hu,
+ .fniv = gen_vsubwev_u,
+ .fno = gen_helper_vsubwev_w_hu,
+ .opt_opc = vecop_list,
+ .vece = MO_32
+ },
+ {
+ .fni8 = gen_vsubwev_d_wu,
+ .fniv = gen_vsubwev_u,
+ .fno = gen_helper_vsubwev_d_wu,
+ .opt_opc = vecop_list,
+ .vece = MO_64
+ },
+ {
+ .fno = gen_helper_vsubwev_q_du,
+ .vece = MO_128
+ },
+ };
+
+ tcg_gen_gvec_3(vd_ofs, vj_ofs, vk_ofs, oprsz, maxsz, &op[vece]);
+}
+
+TRANS(vsubwev_h_bu, LSX, gvec_vvv, MO_8, do_vsubwev_u)
+TRANS(vsubwev_w_hu, LSX, gvec_vvv, MO_16, do_vsubwev_u)
+TRANS(vsubwev_d_wu, LSX, gvec_vvv, MO_32, do_vsubwev_u)
+TRANS(vsubwev_q_du, LSX, gvec_vvv, MO_64, do_vsubwev_u)
+TRANS(xvsubwev_h_bu, LASX, gvec_xxx, MO_8, do_vsubwev_u)
+TRANS(xvsubwev_w_hu, LASX, gvec_xxx, MO_16, do_vsubwev_u)
+TRANS(xvsubwev_d_wu, LASX, gvec_xxx, MO_32, do_vsubwev_u)
+TRANS(xvsubwev_q_du, LASX, gvec_xxx, MO_64, do_vsubwev_u)
+
+static void gen_vsubwod_u(unsigned vece, TCGv_vec t, TCGv_vec a, TCGv_vec b)
+{
+ TCGv_vec t1, t2;
+
+ int halfbits = 4 << vece;
+
+ t1 = tcg_temp_new_vec_matching(a);
+ t2 = tcg_temp_new_vec_matching(b);
+
+ /* Zero-extend the odd elements for vector */
+ tcg_gen_shri_vec(vece, t1, a, halfbits);
+ tcg_gen_shri_vec(vece, t2, b, halfbits);
+
+ tcg_gen_sub_vec(vece, t, t1, t2);
+}
+
+static void gen_vsubwod_w_hu(TCGv_i32 t, TCGv_i32 a, TCGv_i32 b)
+{
+ TCGv_i32 t1, t2;
+
+ t1 = tcg_temp_new_i32();
+ t2 = tcg_temp_new_i32();
+ tcg_gen_shri_i32(t1, a, 16);
+ tcg_gen_shri_i32(t2, b, 16);
+ tcg_gen_sub_i32(t, t1, t2);
+}
+
+static void gen_vsubwod_d_wu(TCGv_i64 t, TCGv_i64 a, TCGv_i64 b)
+{
+ TCGv_i64 t1, t2;
+
+ t1 = tcg_temp_new_i64();
+ t2 = tcg_temp_new_i64();
+ tcg_gen_shri_i64(t1, a, 32);
+ tcg_gen_shri_i64(t2, b, 32);
+ tcg_gen_sub_i64(t, t1, t2);
+}
+
+static void do_vsubwod_u(unsigned vece, uint32_t vd_ofs, uint32_t vj_ofs,
+ uint32_t vk_ofs, uint32_t oprsz, uint32_t maxsz)
+{
+ static const TCGOpcode vecop_list[] = {
+ INDEX_op_shri_vec, INDEX_op_sub_vec, 0
+ };
+ static const GVecGen3 op[4] = {
+ {
+ .fniv = gen_vsubwod_u,
+ .fno = gen_helper_vsubwod_h_bu,
+ .opt_opc = vecop_list,
+ .vece = MO_16
+ },
+ {
+ .fni4 = gen_vsubwod_w_hu,
+ .fniv = gen_vsubwod_u,
+ .fno = gen_helper_vsubwod_w_hu,
+ .opt_opc = vecop_list,
+ .vece = MO_32
+ },
+ {
+ .fni8 = gen_vsubwod_d_wu,
+ .fniv = gen_vsubwod_u,
+ .fno = gen_helper_vsubwod_d_wu,
+ .opt_opc = vecop_list,
+ .vece = MO_64
+ },
+ {
+ .fno = gen_helper_vsubwod_q_du,
+ .vece = MO_128
+ },
+ };
+
+ tcg_gen_gvec_3(vd_ofs, vj_ofs, vk_ofs, oprsz, maxsz, &op[vece]);
+}
+
+TRANS(vsubwod_h_bu, LSX, gvec_vvv, MO_8, do_vsubwod_u)
+TRANS(vsubwod_w_hu, LSX, gvec_vvv, MO_16, do_vsubwod_u)
+TRANS(vsubwod_d_wu, LSX, gvec_vvv, MO_32, do_vsubwod_u)
+TRANS(vsubwod_q_du, LSX, gvec_vvv, MO_64, do_vsubwod_u)
+TRANS(xvsubwod_h_bu, LASX, gvec_xxx, MO_8, do_vsubwod_u)
+TRANS(xvsubwod_w_hu, LASX, gvec_xxx, MO_16, do_vsubwod_u)
+TRANS(xvsubwod_d_wu, LASX, gvec_xxx, MO_32, do_vsubwod_u)
+TRANS(xvsubwod_q_du, LASX, gvec_xxx, MO_64, do_vsubwod_u)
+
+static void gen_vaddwev_u_s(unsigned vece, TCGv_vec t, TCGv_vec a, TCGv_vec b)
+{
+ TCGv_vec t1, t2, t3;
+
+ int halfbits = 4 << vece;
+
+ t1 = tcg_temp_new_vec_matching(a);
+ t2 = tcg_temp_new_vec_matching(b);
+ t3 = tcg_constant_vec_matching(t, vece, MAKE_64BIT_MASK(0, halfbits));
+
+ /* Zero-extend the even elements from a */
+ tcg_gen_and_vec(vece, t1, a, t3);
+
+ /* Sign-extend the even elements from b */
+ tcg_gen_shli_vec(vece, t2, b, halfbits);
+ tcg_gen_sari_vec(vece, t2, t2, halfbits);
+
+ tcg_gen_add_vec(vece, t, t1, t2);
+}
+
+static void gen_vaddwev_w_hu_h(TCGv_i32 t, TCGv_i32 a, TCGv_i32 b)
+{
+ TCGv_i32 t1, t2;
+
+ t1 = tcg_temp_new_i32();
+ t2 = tcg_temp_new_i32();
+ tcg_gen_ext16u_i32(t1, a);
+ tcg_gen_ext16s_i32(t2, b);
+ tcg_gen_add_i32(t, t1, t2);
+}
+
+static void gen_vaddwev_d_wu_w(TCGv_i64 t, TCGv_i64 a, TCGv_i64 b)
+{
+ TCGv_i64 t1, t2;
+
+ t1 = tcg_temp_new_i64();
+ t2 = tcg_temp_new_i64();
+ tcg_gen_ext32u_i64(t1, a);
+ tcg_gen_ext32s_i64(t2, b);
+ tcg_gen_add_i64(t, t1, t2);
+}
+
+static void do_vaddwev_u_s(unsigned vece, uint32_t vd_ofs, uint32_t vj_ofs,
+ uint32_t vk_ofs, uint32_t oprsz, uint32_t maxsz)
+{
+ static const TCGOpcode vecop_list[] = {
+ INDEX_op_shli_vec, INDEX_op_sari_vec, INDEX_op_add_vec, 0
+ };
+ static const GVecGen3 op[4] = {
+ {
+ .fniv = gen_vaddwev_u_s,
+ .fno = gen_helper_vaddwev_h_bu_b,
+ .opt_opc = vecop_list,
+ .vece = MO_16
+ },
+ {
+ .fni4 = gen_vaddwev_w_hu_h,
+ .fniv = gen_vaddwev_u_s,
+ .fno = gen_helper_vaddwev_w_hu_h,
+ .opt_opc = vecop_list,
+ .vece = MO_32
+ },
+ {
+ .fni8 = gen_vaddwev_d_wu_w,
+ .fniv = gen_vaddwev_u_s,
+ .fno = gen_helper_vaddwev_d_wu_w,
+ .opt_opc = vecop_list,
+ .vece = MO_64
+ },
+ {
+ .fno = gen_helper_vaddwev_q_du_d,
+ .vece = MO_128
+ },
+ };
+
+ tcg_gen_gvec_3(vd_ofs, vj_ofs, vk_ofs, oprsz, maxsz, &op[vece]);
+}
+
+TRANS(vaddwev_h_bu_b, LSX, gvec_vvv, MO_8, do_vaddwev_u_s)
+TRANS(vaddwev_w_hu_h, LSX, gvec_vvv, MO_16, do_vaddwev_u_s)
+TRANS(vaddwev_d_wu_w, LSX, gvec_vvv, MO_32, do_vaddwev_u_s)
+TRANS(vaddwev_q_du_d, LSX, gvec_vvv, MO_64, do_vaddwev_u_s)
+TRANS(xvaddwev_h_bu_b, LASX, gvec_xxx, MO_8, do_vaddwev_u_s)
+TRANS(xvaddwev_w_hu_h, LASX, gvec_xxx, MO_16, do_vaddwev_u_s)
+TRANS(xvaddwev_d_wu_w, LASX, gvec_xxx, MO_32, do_vaddwev_u_s)
+TRANS(xvaddwev_q_du_d, LASX, gvec_xxx, MO_64, do_vaddwev_u_s)
+
+static void gen_vaddwod_u_s(unsigned vece, TCGv_vec t, TCGv_vec a, TCGv_vec b)
+{
+ TCGv_vec t1, t2;
+
+ int halfbits = 4 << vece;
+
+ t1 = tcg_temp_new_vec_matching(a);
+ t2 = tcg_temp_new_vec_matching(b);
+
+ /* Zero-extend the odd elements from a */
+ tcg_gen_shri_vec(vece, t1, a, halfbits);
+ /* Sign-extend the odd elements from b */
+ tcg_gen_sari_vec(vece, t2, b, halfbits);
+
+ tcg_gen_add_vec(vece, t, t1, t2);
+}
+
+static void gen_vaddwod_w_hu_h(TCGv_i32 t, TCGv_i32 a, TCGv_i32 b)
+{
+ TCGv_i32 t1, t2;
+
+ t1 = tcg_temp_new_i32();
+ t2 = tcg_temp_new_i32();
+ tcg_gen_shri_i32(t1, a, 16);
+ tcg_gen_sari_i32(t2, b, 16);
+ tcg_gen_add_i32(t, t1, t2);
+}
+
+static void gen_vaddwod_d_wu_w(TCGv_i64 t, TCGv_i64 a, TCGv_i64 b)
+{
+ TCGv_i64 t1, t2;
+
+ t1 = tcg_temp_new_i64();
+ t2 = tcg_temp_new_i64();
+ tcg_gen_shri_i64(t1, a, 32);
+ tcg_gen_sari_i64(t2, b, 32);
+ tcg_gen_add_i64(t, t1, t2);
+}
+
+static void do_vaddwod_u_s(unsigned vece, uint32_t vd_ofs, uint32_t vj_ofs,
+ uint32_t vk_ofs, uint32_t oprsz, uint32_t maxsz)
+{
+ static const TCGOpcode vecop_list[] = {
+ INDEX_op_shri_vec, INDEX_op_sari_vec, INDEX_op_add_vec, 0
+ };
+ static const GVecGen3 op[4] = {
+ {
+ .fniv = gen_vaddwod_u_s,
+ .fno = gen_helper_vaddwod_h_bu_b,
+ .opt_opc = vecop_list,
+ .vece = MO_16
+ },
+ {
+ .fni4 = gen_vaddwod_w_hu_h,
+ .fniv = gen_vaddwod_u_s,
+ .fno = gen_helper_vaddwod_w_hu_h,
+ .opt_opc = vecop_list,
+ .vece = MO_32
+ },
+ {
+ .fni8 = gen_vaddwod_d_wu_w,
+ .fniv = gen_vaddwod_u_s,
+ .fno = gen_helper_vaddwod_d_wu_w,
+ .opt_opc = vecop_list,
+ .vece = MO_64
+ },
+ {
+ .fno = gen_helper_vaddwod_q_du_d,
+ .vece = MO_128
+ },
+ };
+
+ tcg_gen_gvec_3(vd_ofs, vj_ofs, vk_ofs, oprsz, maxsz, &op[vece]);
+}
+
+TRANS(vaddwod_h_bu_b, LSX, gvec_vvv, MO_8, do_vaddwod_u_s)
+TRANS(vaddwod_w_hu_h, LSX, gvec_vvv, MO_16, do_vaddwod_u_s)
+TRANS(vaddwod_d_wu_w, LSX, gvec_vvv, MO_32, do_vaddwod_u_s)
+TRANS(vaddwod_q_du_d, LSX, gvec_vvv, MO_64, do_vaddwod_u_s)
+TRANS(xvaddwod_h_bu_b, LSX, gvec_xxx, MO_8, do_vaddwod_u_s)
+TRANS(xvaddwod_w_hu_h, LSX, gvec_xxx, MO_16, do_vaddwod_u_s)
+TRANS(xvaddwod_d_wu_w, LSX, gvec_xxx, MO_32, do_vaddwod_u_s)
+TRANS(xvaddwod_q_du_d, LSX, gvec_xxx, MO_64, do_vaddwod_u_s)
+
+static void do_vavg(unsigned vece, TCGv_vec t, TCGv_vec a, TCGv_vec b,
+ void (*gen_shr_vec)(unsigned, TCGv_vec,
+ TCGv_vec, int64_t),
+ void (*gen_round_vec)(unsigned, TCGv_vec,
+ TCGv_vec, TCGv_vec))
+{
+ TCGv_vec tmp = tcg_temp_new_vec_matching(t);
+ gen_round_vec(vece, tmp, a, b);
+ tcg_gen_and_vec(vece, tmp, tmp, tcg_constant_vec_matching(t, vece, 1));
+ gen_shr_vec(vece, a, a, 1);
+ gen_shr_vec(vece, b, b, 1);
+ tcg_gen_add_vec(vece, t, a, b);
+ tcg_gen_add_vec(vece, t, t, tmp);
+}
+
+static void gen_vavg_s(unsigned vece, TCGv_vec t, TCGv_vec a, TCGv_vec b)
+{
+ do_vavg(vece, t, a, b, tcg_gen_sari_vec, tcg_gen_and_vec);
+}
+
+static void gen_vavg_u(unsigned vece, TCGv_vec t, TCGv_vec a, TCGv_vec b)
+{
+ do_vavg(vece, t, a, b, tcg_gen_shri_vec, tcg_gen_and_vec);
+}
+
+static void gen_vavgr_s(unsigned vece, TCGv_vec t, TCGv_vec a, TCGv_vec b)
+{
+ do_vavg(vece, t, a, b, tcg_gen_sari_vec, tcg_gen_or_vec);
+}
+
+static void gen_vavgr_u(unsigned vece, TCGv_vec t, TCGv_vec a, TCGv_vec b)
+{
+ do_vavg(vece, t, a, b, tcg_gen_shri_vec, tcg_gen_or_vec);
+}
+
+static void do_vavg_s(unsigned vece, uint32_t vd_ofs, uint32_t vj_ofs,
+ uint32_t vk_ofs, uint32_t oprsz, uint32_t maxsz)
+{
+ static const TCGOpcode vecop_list[] = {
+ INDEX_op_sari_vec, INDEX_op_add_vec, 0
+ };
+ static const GVecGen3 op[4] = {
+ {
+ .fniv = gen_vavg_s,
+ .fno = gen_helper_vavg_b,
+ .opt_opc = vecop_list,
+ .vece = MO_8
+ },
+ {
+ .fniv = gen_vavg_s,
+ .fno = gen_helper_vavg_h,
+ .opt_opc = vecop_list,
+ .vece = MO_16
+ },
+ {
+ .fniv = gen_vavg_s,
+ .fno = gen_helper_vavg_w,
+ .opt_opc = vecop_list,
+ .vece = MO_32
+ },
+ {
+ .fniv = gen_vavg_s,
+ .fno = gen_helper_vavg_d,
+ .opt_opc = vecop_list,
+ .vece = MO_64
+ },
+ };
+
+ tcg_gen_gvec_3(vd_ofs, vj_ofs, vk_ofs, oprsz, maxsz, &op[vece]);
+}
+
+static void do_vavg_u(unsigned vece, uint32_t vd_ofs, uint32_t vj_ofs,
+ uint32_t vk_ofs, uint32_t oprsz, uint32_t maxsz)
+{
+ static const TCGOpcode vecop_list[] = {
+ INDEX_op_shri_vec, INDEX_op_add_vec, 0
+ };
+ static const GVecGen3 op[4] = {
+ {
+ .fniv = gen_vavg_u,
+ .fno = gen_helper_vavg_bu,
+ .opt_opc = vecop_list,
+ .vece = MO_8
+ },
+ {
+ .fniv = gen_vavg_u,
+ .fno = gen_helper_vavg_hu,
+ .opt_opc = vecop_list,
+ .vece = MO_16
+ },
+ {
+ .fniv = gen_vavg_u,
+ .fno = gen_helper_vavg_wu,
+ .opt_opc = vecop_list,
+ .vece = MO_32
+ },
+ {
+ .fniv = gen_vavg_u,
+ .fno = gen_helper_vavg_du,
+ .opt_opc = vecop_list,
+ .vece = MO_64
+ },
+ };
+
+ tcg_gen_gvec_3(vd_ofs, vj_ofs, vk_ofs, oprsz, maxsz, &op[vece]);
+}
+
+TRANS(vavg_b, LSX, gvec_vvv, MO_8, do_vavg_s)
+TRANS(vavg_h, LSX, gvec_vvv, MO_16, do_vavg_s)
+TRANS(vavg_w, LSX, gvec_vvv, MO_32, do_vavg_s)
+TRANS(vavg_d, LSX, gvec_vvv, MO_64, do_vavg_s)
+TRANS(vavg_bu, LSX, gvec_vvv, MO_8, do_vavg_u)
+TRANS(vavg_hu, LSX, gvec_vvv, MO_16, do_vavg_u)
+TRANS(vavg_wu, LSX, gvec_vvv, MO_32, do_vavg_u)
+TRANS(vavg_du, LSX, gvec_vvv, MO_64, do_vavg_u)
+TRANS(xvavg_b, LASX, gvec_xxx, MO_8, do_vavg_s)
+TRANS(xvavg_h, LASX, gvec_xxx, MO_16, do_vavg_s)
+TRANS(xvavg_w, LASX, gvec_xxx, MO_32, do_vavg_s)
+TRANS(xvavg_d, LASX, gvec_xxx, MO_64, do_vavg_s)
+TRANS(xvavg_bu, LASX, gvec_xxx, MO_8, do_vavg_u)
+TRANS(xvavg_hu, LASX, gvec_xxx, MO_16, do_vavg_u)
+TRANS(xvavg_wu, LASX, gvec_xxx, MO_32, do_vavg_u)
+TRANS(xvavg_du, LASX, gvec_xxx, MO_64, do_vavg_u)
+
+static void do_vavgr_s(unsigned vece, uint32_t vd_ofs, uint32_t vj_ofs,
+ uint32_t vk_ofs, uint32_t oprsz, uint32_t maxsz)
+{
+ static const TCGOpcode vecop_list[] = {
+ INDEX_op_sari_vec, INDEX_op_add_vec, 0
+ };
+ static const GVecGen3 op[4] = {
+ {
+ .fniv = gen_vavgr_s,
+ .fno = gen_helper_vavgr_b,
+ .opt_opc = vecop_list,
+ .vece = MO_8
+ },
+ {
+ .fniv = gen_vavgr_s,
+ .fno = gen_helper_vavgr_h,
+ .opt_opc = vecop_list,
+ .vece = MO_16
+ },
+ {
+ .fniv = gen_vavgr_s,
+ .fno = gen_helper_vavgr_w,
+ .opt_opc = vecop_list,
+ .vece = MO_32
+ },
+ {
+ .fniv = gen_vavgr_s,
+ .fno = gen_helper_vavgr_d,
+ .opt_opc = vecop_list,
+ .vece = MO_64
+ },
+ };
+
+ tcg_gen_gvec_3(vd_ofs, vj_ofs, vk_ofs, oprsz, maxsz, &op[vece]);
+}
+
+static void do_vavgr_u(unsigned vece, uint32_t vd_ofs, uint32_t vj_ofs,
+ uint32_t vk_ofs, uint32_t oprsz, uint32_t maxsz)
+{
+ static const TCGOpcode vecop_list[] = {
+ INDEX_op_shri_vec, INDEX_op_add_vec, 0
+ };
+ static const GVecGen3 op[4] = {
+ {
+ .fniv = gen_vavgr_u,
+ .fno = gen_helper_vavgr_bu,
+ .opt_opc = vecop_list,
+ .vece = MO_8
+ },
+ {
+ .fniv = gen_vavgr_u,
+ .fno = gen_helper_vavgr_hu,
+ .opt_opc = vecop_list,
+ .vece = MO_16
+ },
+ {
+ .fniv = gen_vavgr_u,
+ .fno = gen_helper_vavgr_wu,
+ .opt_opc = vecop_list,
+ .vece = MO_32
+ },
+ {
+ .fniv = gen_vavgr_u,
+ .fno = gen_helper_vavgr_du,
+ .opt_opc = vecop_list,
+ .vece = MO_64
+ },
+ };
+
+ tcg_gen_gvec_3(vd_ofs, vj_ofs, vk_ofs, oprsz, maxsz, &op[vece]);
+}
+
+TRANS(vavgr_b, LSX, gvec_vvv, MO_8, do_vavgr_s)
+TRANS(vavgr_h, LSX, gvec_vvv, MO_16, do_vavgr_s)
+TRANS(vavgr_w, LSX, gvec_vvv, MO_32, do_vavgr_s)
+TRANS(vavgr_d, LSX, gvec_vvv, MO_64, do_vavgr_s)
+TRANS(vavgr_bu, LSX, gvec_vvv, MO_8, do_vavgr_u)
+TRANS(vavgr_hu, LSX, gvec_vvv, MO_16, do_vavgr_u)
+TRANS(vavgr_wu, LSX, gvec_vvv, MO_32, do_vavgr_u)
+TRANS(vavgr_du, LSX, gvec_vvv, MO_64, do_vavgr_u)
+TRANS(xvavgr_b, LASX, gvec_xxx, MO_8, do_vavgr_s)
+TRANS(xvavgr_h, LASX, gvec_xxx, MO_16, do_vavgr_s)
+TRANS(xvavgr_w, LASX, gvec_xxx, MO_32, do_vavgr_s)
+TRANS(xvavgr_d, LASX, gvec_xxx, MO_64, do_vavgr_s)
+TRANS(xvavgr_bu, LASX, gvec_xxx, MO_8, do_vavgr_u)
+TRANS(xvavgr_hu, LASX, gvec_xxx, MO_16, do_vavgr_u)
+TRANS(xvavgr_wu, LASX, gvec_xxx, MO_32, do_vavgr_u)
+TRANS(xvavgr_du, LASX, gvec_xxx, MO_64, do_vavgr_u)
+
+static void gen_vabsd_s(unsigned vece, TCGv_vec t, TCGv_vec a, TCGv_vec b)
+{
+ tcg_gen_smax_vec(vece, t, a, b);
+ tcg_gen_smin_vec(vece, a, a, b);
+ tcg_gen_sub_vec(vece, t, t, a);
+}
+
+static void do_vabsd_s(unsigned vece, uint32_t vd_ofs, uint32_t vj_ofs,
+ uint32_t vk_ofs, uint32_t oprsz, uint32_t maxsz)
+{
+ static const TCGOpcode vecop_list[] = {
+ INDEX_op_smax_vec, INDEX_op_smin_vec, INDEX_op_sub_vec, 0
+ };
+ static const GVecGen3 op[4] = {
+ {
+ .fniv = gen_vabsd_s,
+ .fno = gen_helper_vabsd_b,
+ .opt_opc = vecop_list,
+ .vece = MO_8
+ },
+ {
+ .fniv = gen_vabsd_s,
+ .fno = gen_helper_vabsd_h,
+ .opt_opc = vecop_list,
+ .vece = MO_16
+ },
+ {
+ .fniv = gen_vabsd_s,
+ .fno = gen_helper_vabsd_w,
+ .opt_opc = vecop_list,
+ .vece = MO_32
+ },
+ {
+ .fniv = gen_vabsd_s,
+ .fno = gen_helper_vabsd_d,
+ .opt_opc = vecop_list,
+ .vece = MO_64
+ },
+ };
+
+ tcg_gen_gvec_3(vd_ofs, vj_ofs, vk_ofs, oprsz, maxsz, &op[vece]);
+}
+
+static void gen_vabsd_u(unsigned vece, TCGv_vec t, TCGv_vec a, TCGv_vec b)
+{
+ tcg_gen_umax_vec(vece, t, a, b);
+ tcg_gen_umin_vec(vece, a, a, b);
+ tcg_gen_sub_vec(vece, t, t, a);
+}
+
+static void do_vabsd_u(unsigned vece, uint32_t vd_ofs, uint32_t vj_ofs,
+ uint32_t vk_ofs, uint32_t oprsz, uint32_t maxsz)
+{
+ static const TCGOpcode vecop_list[] = {
+ INDEX_op_umax_vec, INDEX_op_umin_vec, INDEX_op_sub_vec, 0
+ };
+ static const GVecGen3 op[4] = {
+ {
+ .fniv = gen_vabsd_u,
+ .fno = gen_helper_vabsd_bu,
+ .opt_opc = vecop_list,
+ .vece = MO_8
+ },
+ {
+ .fniv = gen_vabsd_u,
+ .fno = gen_helper_vabsd_hu,
+ .opt_opc = vecop_list,
+ .vece = MO_16
+ },
+ {
+ .fniv = gen_vabsd_u,
+ .fno = gen_helper_vabsd_wu,
+ .opt_opc = vecop_list,
+ .vece = MO_32
+ },
+ {
+ .fniv = gen_vabsd_u,
+ .fno = gen_helper_vabsd_du,
+ .opt_opc = vecop_list,
+ .vece = MO_64
+ },
+ };
+
+ tcg_gen_gvec_3(vd_ofs, vj_ofs, vk_ofs, oprsz, maxsz, &op[vece]);
+}
+
+TRANS(vabsd_b, LSX, gvec_vvv, MO_8, do_vabsd_s)
+TRANS(vabsd_h, LSX, gvec_vvv, MO_16, do_vabsd_s)
+TRANS(vabsd_w, LSX, gvec_vvv, MO_32, do_vabsd_s)
+TRANS(vabsd_d, LSX, gvec_vvv, MO_64, do_vabsd_s)
+TRANS(vabsd_bu, LSX, gvec_vvv, MO_8, do_vabsd_u)
+TRANS(vabsd_hu, LSX, gvec_vvv, MO_16, do_vabsd_u)
+TRANS(vabsd_wu, LSX, gvec_vvv, MO_32, do_vabsd_u)
+TRANS(vabsd_du, LSX, gvec_vvv, MO_64, do_vabsd_u)
+TRANS(xvabsd_b, LASX, gvec_xxx, MO_8, do_vabsd_s)
+TRANS(xvabsd_h, LASX, gvec_xxx, MO_16, do_vabsd_s)
+TRANS(xvabsd_w, LASX, gvec_xxx, MO_32, do_vabsd_s)
+TRANS(xvabsd_d, LASX, gvec_xxx, MO_64, do_vabsd_s)
+TRANS(xvabsd_bu, LASX, gvec_xxx, MO_8, do_vabsd_u)
+TRANS(xvabsd_hu, LASX, gvec_xxx, MO_16, do_vabsd_u)
+TRANS(xvabsd_wu, LASX, gvec_xxx, MO_32, do_vabsd_u)
+TRANS(xvabsd_du, LASX, gvec_xxx, MO_64, do_vabsd_u)
+
+static void gen_vadda(unsigned vece, TCGv_vec t, TCGv_vec a, TCGv_vec b)
+{
+ TCGv_vec t1, t2;
+
+ t1 = tcg_temp_new_vec_matching(a);
+ t2 = tcg_temp_new_vec_matching(b);
+
+ tcg_gen_abs_vec(vece, t1, a);
+ tcg_gen_abs_vec(vece, t2, b);
+ tcg_gen_add_vec(vece, t, t1, t2);
+}
+
+static void do_vadda(unsigned vece, uint32_t vd_ofs, uint32_t vj_ofs,
+ uint32_t vk_ofs, uint32_t oprsz, uint32_t maxsz)
+{
+ static const TCGOpcode vecop_list[] = {
+ INDEX_op_abs_vec, INDEX_op_add_vec, 0
+ };
+ static const GVecGen3 op[4] = {
+ {
+ .fniv = gen_vadda,
+ .fno = gen_helper_vadda_b,
+ .opt_opc = vecop_list,
+ .vece = MO_8
+ },
+ {
+ .fniv = gen_vadda,
+ .fno = gen_helper_vadda_h,
+ .opt_opc = vecop_list,
+ .vece = MO_16
+ },
+ {
+ .fniv = gen_vadda,
+ .fno = gen_helper_vadda_w,
+ .opt_opc = vecop_list,
+ .vece = MO_32
+ },
+ {
+ .fniv = gen_vadda,
+ .fno = gen_helper_vadda_d,
+ .opt_opc = vecop_list,
+ .vece = MO_64
+ },
+ };
+
+ tcg_gen_gvec_3(vd_ofs, vj_ofs, vk_ofs, oprsz, maxsz, &op[vece]);
+}
+
+TRANS(vadda_b, LSX, gvec_vvv, MO_8, do_vadda)
+TRANS(vadda_h, LSX, gvec_vvv, MO_16, do_vadda)
+TRANS(vadda_w, LSX, gvec_vvv, MO_32, do_vadda)
+TRANS(vadda_d, LSX, gvec_vvv, MO_64, do_vadda)
+TRANS(xvadda_b, LASX, gvec_xxx, MO_8, do_vadda)
+TRANS(xvadda_h, LASX, gvec_xxx, MO_16, do_vadda)
+TRANS(xvadda_w, LASX, gvec_xxx, MO_32, do_vadda)
+TRANS(xvadda_d, LASX, gvec_xxx, MO_64, do_vadda)
+
+TRANS(vmax_b, LSX, gvec_vvv, MO_8, tcg_gen_gvec_smax)
+TRANS(vmax_h, LSX, gvec_vvv, MO_16, tcg_gen_gvec_smax)
+TRANS(vmax_w, LSX, gvec_vvv, MO_32, tcg_gen_gvec_smax)
+TRANS(vmax_d, LSX, gvec_vvv, MO_64, tcg_gen_gvec_smax)
+TRANS(vmax_bu, LSX, gvec_vvv, MO_8, tcg_gen_gvec_umax)
+TRANS(vmax_hu, LSX, gvec_vvv, MO_16, tcg_gen_gvec_umax)
+TRANS(vmax_wu, LSX, gvec_vvv, MO_32, tcg_gen_gvec_umax)
+TRANS(vmax_du, LSX, gvec_vvv, MO_64, tcg_gen_gvec_umax)
+TRANS(xvmax_b, LASX, gvec_xxx, MO_8, tcg_gen_gvec_smax)
+TRANS(xvmax_h, LASX, gvec_xxx, MO_16, tcg_gen_gvec_smax)
+TRANS(xvmax_w, LASX, gvec_xxx, MO_32, tcg_gen_gvec_smax)
+TRANS(xvmax_d, LASX, gvec_xxx, MO_64, tcg_gen_gvec_smax)
+TRANS(xvmax_bu, LASX, gvec_xxx, MO_8, tcg_gen_gvec_umax)
+TRANS(xvmax_hu, LASX, gvec_xxx, MO_16, tcg_gen_gvec_umax)
+TRANS(xvmax_wu, LASX, gvec_xxx, MO_32, tcg_gen_gvec_umax)
+TRANS(xvmax_du, LASX, gvec_xxx, MO_64, tcg_gen_gvec_umax)
+
+TRANS(vmin_b, LSX, gvec_vvv, MO_8, tcg_gen_gvec_smin)
+TRANS(vmin_h, LSX, gvec_vvv, MO_16, tcg_gen_gvec_smin)
+TRANS(vmin_w, LSX, gvec_vvv, MO_32, tcg_gen_gvec_smin)
+TRANS(vmin_d, LSX, gvec_vvv, MO_64, tcg_gen_gvec_smin)
+TRANS(vmin_bu, LSX, gvec_vvv, MO_8, tcg_gen_gvec_umin)
+TRANS(vmin_hu, LSX, gvec_vvv, MO_16, tcg_gen_gvec_umin)
+TRANS(vmin_wu, LSX, gvec_vvv, MO_32, tcg_gen_gvec_umin)
+TRANS(vmin_du, LSX, gvec_vvv, MO_64, tcg_gen_gvec_umin)
+TRANS(xvmin_b, LASX, gvec_xxx, MO_8, tcg_gen_gvec_smin)
+TRANS(xvmin_h, LASX, gvec_xxx, MO_16, tcg_gen_gvec_smin)
+TRANS(xvmin_w, LASX, gvec_xxx, MO_32, tcg_gen_gvec_smin)
+TRANS(xvmin_d, LASX, gvec_xxx, MO_64, tcg_gen_gvec_smin)
+TRANS(xvmin_bu, LASX, gvec_xxx, MO_8, tcg_gen_gvec_umin)
+TRANS(xvmin_hu, LASX, gvec_xxx, MO_16, tcg_gen_gvec_umin)
+TRANS(xvmin_wu, LASX, gvec_xxx, MO_32, tcg_gen_gvec_umin)
+TRANS(xvmin_du, LASX, gvec_xxx, MO_64, tcg_gen_gvec_umin)
+
+static void gen_vmini_s(unsigned vece, TCGv_vec t, TCGv_vec a, int64_t imm)
+{
+ tcg_gen_smin_vec(vece, t, a, tcg_constant_vec_matching(t, vece, imm));
+}
+
+static void gen_vmini_u(unsigned vece, TCGv_vec t, TCGv_vec a, int64_t imm)
+{
+ tcg_gen_umin_vec(vece, t, a, tcg_constant_vec_matching(t, vece, imm));
+}
+
+static void gen_vmaxi_s(unsigned vece, TCGv_vec t, TCGv_vec a, int64_t imm)
+{
+ tcg_gen_smax_vec(vece, t, a, tcg_constant_vec_matching(t, vece, imm));
+}
+
+static void gen_vmaxi_u(unsigned vece, TCGv_vec t, TCGv_vec a, int64_t imm)
+{
+ tcg_gen_umax_vec(vece, t, a, tcg_constant_vec_matching(t, vece, imm));
+}
+
+static void do_vmini_s(unsigned vece, uint32_t vd_ofs, uint32_t vj_ofs,
+ int64_t imm, uint32_t oprsz, uint32_t maxsz)
+{
+ static const TCGOpcode vecop_list[] = {
+ INDEX_op_smin_vec, 0
+ };
+ static const GVecGen2i op[4] = {
+ {
+ .fniv = gen_vmini_s,
+ .fnoi = gen_helper_vmini_b,
+ .opt_opc = vecop_list,
+ .vece = MO_8
+ },
+ {
+ .fniv = gen_vmini_s,
+ .fnoi = gen_helper_vmini_h,
+ .opt_opc = vecop_list,
+ .vece = MO_16
+ },
+ {
+ .fniv = gen_vmini_s,
+ .fnoi = gen_helper_vmini_w,
+ .opt_opc = vecop_list,
+ .vece = MO_32
+ },
+ {
+ .fniv = gen_vmini_s,
+ .fnoi = gen_helper_vmini_d,
+ .opt_opc = vecop_list,
+ .vece = MO_64
+ },
+ };
+
+ tcg_gen_gvec_2i(vd_ofs, vj_ofs, oprsz, maxsz, imm, &op[vece]);
+}
+
+static void do_vmini_u(unsigned vece, uint32_t vd_ofs, uint32_t vj_ofs,
+ int64_t imm, uint32_t oprsz, uint32_t maxsz)
+{
+ static const TCGOpcode vecop_list[] = {
+ INDEX_op_umin_vec, 0
+ };
+ static const GVecGen2i op[4] = {
+ {
+ .fniv = gen_vmini_u,
+ .fnoi = gen_helper_vmini_bu,
+ .opt_opc = vecop_list,
+ .vece = MO_8
+ },
+ {
+ .fniv = gen_vmini_u,
+ .fnoi = gen_helper_vmini_hu,
+ .opt_opc = vecop_list,
+ .vece = MO_16
+ },
+ {
+ .fniv = gen_vmini_u,
+ .fnoi = gen_helper_vmini_wu,
+ .opt_opc = vecop_list,
+ .vece = MO_32
+ },
+ {
+ .fniv = gen_vmini_u,
+ .fnoi = gen_helper_vmini_du,
+ .opt_opc = vecop_list,
+ .vece = MO_64
+ },
+ };
+
+ tcg_gen_gvec_2i(vd_ofs, vj_ofs, oprsz, maxsz, imm, &op[vece]);
+}
+
+TRANS(vmini_b, LSX, gvec_vv_i, MO_8, do_vmini_s)
+TRANS(vmini_h, LSX, gvec_vv_i, MO_16, do_vmini_s)
+TRANS(vmini_w, LSX, gvec_vv_i, MO_32, do_vmini_s)
+TRANS(vmini_d, LSX, gvec_vv_i, MO_64, do_vmini_s)
+TRANS(vmini_bu, LSX, gvec_vv_i, MO_8, do_vmini_u)
+TRANS(vmini_hu, LSX, gvec_vv_i, MO_16, do_vmini_u)
+TRANS(vmini_wu, LSX, gvec_vv_i, MO_32, do_vmini_u)
+TRANS(vmini_du, LSX, gvec_vv_i, MO_64, do_vmini_u)
+TRANS(xvmini_b, LASX, gvec_xx_i, MO_8, do_vmini_s)
+TRANS(xvmini_h, LASX, gvec_xx_i, MO_16, do_vmini_s)
+TRANS(xvmini_w, LASX, gvec_xx_i, MO_32, do_vmini_s)
+TRANS(xvmini_d, LASX, gvec_xx_i, MO_64, do_vmini_s)
+TRANS(xvmini_bu, LASX, gvec_xx_i, MO_8, do_vmini_u)
+TRANS(xvmini_hu, LASX, gvec_xx_i, MO_16, do_vmini_u)
+TRANS(xvmini_wu, LASX, gvec_xx_i, MO_32, do_vmini_u)
+TRANS(xvmini_du, LASX, gvec_xx_i, MO_64, do_vmini_u)
+
+static void do_vmaxi_s(unsigned vece, uint32_t vd_ofs, uint32_t vj_ofs,
+ int64_t imm, uint32_t oprsz, uint32_t maxsz)
+{
+ static const TCGOpcode vecop_list[] = {
+ INDEX_op_smax_vec, 0
+ };
+ static const GVecGen2i op[4] = {
+ {
+ .fniv = gen_vmaxi_s,
+ .fnoi = gen_helper_vmaxi_b,
+ .opt_opc = vecop_list,
+ .vece = MO_8
+ },
+ {
+ .fniv = gen_vmaxi_s,
+ .fnoi = gen_helper_vmaxi_h,
+ .opt_opc = vecop_list,
+ .vece = MO_16
+ },
+ {
+ .fniv = gen_vmaxi_s,
+ .fnoi = gen_helper_vmaxi_w,
+ .opt_opc = vecop_list,
+ .vece = MO_32
+ },
+ {
+ .fniv = gen_vmaxi_s,
+ .fnoi = gen_helper_vmaxi_d,
+ .opt_opc = vecop_list,
+ .vece = MO_64
+ },
+ };
+
+ tcg_gen_gvec_2i(vd_ofs, vj_ofs, oprsz, maxsz, imm, &op[vece]);
+}
+
+static void do_vmaxi_u(unsigned vece, uint32_t vd_ofs, uint32_t vj_ofs,
+ int64_t imm, uint32_t oprsz, uint32_t maxsz)
+{
+ static const TCGOpcode vecop_list[] = {
+ INDEX_op_umax_vec, 0
+ };
+ static const GVecGen2i op[4] = {
+ {
+ .fniv = gen_vmaxi_u,
+ .fnoi = gen_helper_vmaxi_bu,
+ .opt_opc = vecop_list,
+ .vece = MO_8
+ },
+ {
+ .fniv = gen_vmaxi_u,
+ .fnoi = gen_helper_vmaxi_hu,
+ .opt_opc = vecop_list,
+ .vece = MO_16
+ },
+ {
+ .fniv = gen_vmaxi_u,
+ .fnoi = gen_helper_vmaxi_wu,
+ .opt_opc = vecop_list,
+ .vece = MO_32
+ },
+ {
+ .fniv = gen_vmaxi_u,
+ .fnoi = gen_helper_vmaxi_du,
+ .opt_opc = vecop_list,
+ .vece = MO_64
+ },
+ };
+
+ tcg_gen_gvec_2i(vd_ofs, vj_ofs, oprsz, maxsz, imm, &op[vece]);
+}
+
+TRANS(vmaxi_b, LSX, gvec_vv_i, MO_8, do_vmaxi_s)
+TRANS(vmaxi_h, LSX, gvec_vv_i, MO_16, do_vmaxi_s)
+TRANS(vmaxi_w, LSX, gvec_vv_i, MO_32, do_vmaxi_s)
+TRANS(vmaxi_d, LSX, gvec_vv_i, MO_64, do_vmaxi_s)
+TRANS(vmaxi_bu, LSX, gvec_vv_i, MO_8, do_vmaxi_u)
+TRANS(vmaxi_hu, LSX, gvec_vv_i, MO_16, do_vmaxi_u)
+TRANS(vmaxi_wu, LSX, gvec_vv_i, MO_32, do_vmaxi_u)
+TRANS(vmaxi_du, LSX, gvec_vv_i, MO_64, do_vmaxi_u)
+TRANS(xvmaxi_b, LASX, gvec_xx_i, MO_8, do_vmaxi_s)
+TRANS(xvmaxi_h, LASX, gvec_xx_i, MO_16, do_vmaxi_s)
+TRANS(xvmaxi_w, LASX, gvec_xx_i, MO_32, do_vmaxi_s)
+TRANS(xvmaxi_d, LASX, gvec_xx_i, MO_64, do_vmaxi_s)
+TRANS(xvmaxi_bu, LASX, gvec_xx_i, MO_8, do_vmaxi_u)
+TRANS(xvmaxi_hu, LASX, gvec_xx_i, MO_16, do_vmaxi_u)
+TRANS(xvmaxi_wu, LASX, gvec_xx_i, MO_32, do_vmaxi_u)
+TRANS(xvmaxi_du, LASX, gvec_xx_i, MO_64, do_vmaxi_u)
+
+TRANS(vmul_b, LSX, gvec_vvv, MO_8, tcg_gen_gvec_mul)
+TRANS(vmul_h, LSX, gvec_vvv, MO_16, tcg_gen_gvec_mul)
+TRANS(vmul_w, LSX, gvec_vvv, MO_32, tcg_gen_gvec_mul)
+TRANS(vmul_d, LSX, gvec_vvv, MO_64, tcg_gen_gvec_mul)
+TRANS(xvmul_b, LASX, gvec_xxx, MO_8, tcg_gen_gvec_mul)
+TRANS(xvmul_h, LASX, gvec_xxx, MO_16, tcg_gen_gvec_mul)
+TRANS(xvmul_w, LASX, gvec_xxx, MO_32, tcg_gen_gvec_mul)
+TRANS(xvmul_d, LASX, gvec_xxx, MO_64, tcg_gen_gvec_mul)
+
+static void gen_vmuh_w(TCGv_i32 t, TCGv_i32 a, TCGv_i32 b)
+{
+ TCGv_i32 discard = tcg_temp_new_i32();
+ tcg_gen_muls2_i32(discard, t, a, b);
+}
+
+static void gen_vmuh_d(TCGv_i64 t, TCGv_i64 a, TCGv_i64 b)
+{
+ TCGv_i64 discard = tcg_temp_new_i64();
+ tcg_gen_muls2_i64(discard, t, a, b);
+}
+
+static void do_vmuh_s(unsigned vece, uint32_t vd_ofs, uint32_t vj_ofs,
+ uint32_t vk_ofs, uint32_t oprsz, uint32_t maxsz)
+{
+ static const GVecGen3 op[4] = {
+ {
+ .fno = gen_helper_vmuh_b,
+ .vece = MO_8
+ },
+ {
+ .fno = gen_helper_vmuh_h,
+ .vece = MO_16
+ },
+ {
+ .fni4 = gen_vmuh_w,
+ .fno = gen_helper_vmuh_w,
+ .vece = MO_32
+ },
+ {
+ .fni8 = gen_vmuh_d,
+ .fno = gen_helper_vmuh_d,
+ .vece = MO_64
+ },
+ };
+
+ tcg_gen_gvec_3(vd_ofs, vj_ofs, vk_ofs, oprsz, maxsz, &op[vece]);
+}
+
+TRANS(vmuh_b, LSX, gvec_vvv, MO_8, do_vmuh_s)
+TRANS(vmuh_h, LSX, gvec_vvv, MO_16, do_vmuh_s)
+TRANS(vmuh_w, LSX, gvec_vvv, MO_32, do_vmuh_s)
+TRANS(vmuh_d, LSX, gvec_vvv, MO_64, do_vmuh_s)
+TRANS(xvmuh_b, LASX, gvec_xxx, MO_8, do_vmuh_s)
+TRANS(xvmuh_h, LASX, gvec_xxx, MO_16, do_vmuh_s)
+TRANS(xvmuh_w, LASX, gvec_xxx, MO_32, do_vmuh_s)
+TRANS(xvmuh_d, LASX, gvec_xxx, MO_64, do_vmuh_s)
+
+static void gen_vmuh_wu(TCGv_i32 t, TCGv_i32 a, TCGv_i32 b)
+{
+ TCGv_i32 discard = tcg_temp_new_i32();
+ tcg_gen_mulu2_i32(discard, t, a, b);
+}
+
+static void gen_vmuh_du(TCGv_i64 t, TCGv_i64 a, TCGv_i64 b)
+{
+ TCGv_i64 discard = tcg_temp_new_i64();
+ tcg_gen_mulu2_i64(discard, t, a, b);
+}
+
+static void do_vmuh_u(unsigned vece, uint32_t vd_ofs, uint32_t vj_ofs,
+ uint32_t vk_ofs, uint32_t oprsz, uint32_t maxsz)
+{
+ static const GVecGen3 op[4] = {
+ {
+ .fno = gen_helper_vmuh_bu,
+ .vece = MO_8
+ },
+ {
+ .fno = gen_helper_vmuh_hu,
+ .vece = MO_16
+ },
+ {
+ .fni4 = gen_vmuh_wu,
+ .fno = gen_helper_vmuh_wu,
+ .vece = MO_32
+ },
+ {
+ .fni8 = gen_vmuh_du,
+ .fno = gen_helper_vmuh_du,
+ .vece = MO_64
+ },
+ };
+
+ tcg_gen_gvec_3(vd_ofs, vj_ofs, vk_ofs, oprsz, maxsz, &op[vece]);
+}
+
+TRANS(vmuh_bu, LSX, gvec_vvv, MO_8, do_vmuh_u)
+TRANS(vmuh_hu, LSX, gvec_vvv, MO_16, do_vmuh_u)
+TRANS(vmuh_wu, LSX, gvec_vvv, MO_32, do_vmuh_u)
+TRANS(vmuh_du, LSX, gvec_vvv, MO_64, do_vmuh_u)
+TRANS(xvmuh_bu, LASX, gvec_xxx, MO_8, do_vmuh_u)
+TRANS(xvmuh_hu, LASX, gvec_xxx, MO_16, do_vmuh_u)
+TRANS(xvmuh_wu, LASX, gvec_xxx, MO_32, do_vmuh_u)
+TRANS(xvmuh_du, LASX, gvec_xxx, MO_64, do_vmuh_u)
+
+static void gen_vmulwev_s(unsigned vece, TCGv_vec t, TCGv_vec a, TCGv_vec b)
+{
+ TCGv_vec t1, t2;
+ int halfbits = 4 << vece;
+
+ t1 = tcg_temp_new_vec_matching(a);
+ t2 = tcg_temp_new_vec_matching(b);
+ tcg_gen_shli_vec(vece, t1, a, halfbits);
+ tcg_gen_sari_vec(vece, t1, t1, halfbits);
+ tcg_gen_shli_vec(vece, t2, b, halfbits);
+ tcg_gen_sari_vec(vece, t2, t2, halfbits);
+ tcg_gen_mul_vec(vece, t, t1, t2);
+}
+
+static void gen_vmulwev_w_h(TCGv_i32 t, TCGv_i32 a, TCGv_i32 b)
+{
+ TCGv_i32 t1, t2;
+
+ t1 = tcg_temp_new_i32();
+ t2 = tcg_temp_new_i32();
+ tcg_gen_ext16s_i32(t1, a);
+ tcg_gen_ext16s_i32(t2, b);
+ tcg_gen_mul_i32(t, t1, t2);
+}
+
+static void gen_vmulwev_d_w(TCGv_i64 t, TCGv_i64 a, TCGv_i64 b)
+{
+ TCGv_i64 t1, t2;
+
+ t1 = tcg_temp_new_i64();
+ t2 = tcg_temp_new_i64();
+ tcg_gen_ext32s_i64(t1, a);
+ tcg_gen_ext32s_i64(t2, b);
+ tcg_gen_mul_i64(t, t1, t2);
+}
+
+static void do_vmulwev_s(unsigned vece, uint32_t vd_ofs, uint32_t vj_ofs,
+ uint32_t vk_ofs, uint32_t oprsz, uint32_t maxsz)
+{
+ static const TCGOpcode vecop_list[] = {
+ INDEX_op_shli_vec, INDEX_op_sari_vec, INDEX_op_mul_vec, 0
+ };
+ static const GVecGen3 op[3] = {
+ {
+ .fniv = gen_vmulwev_s,
+ .fno = gen_helper_vmulwev_h_b,
+ .opt_opc = vecop_list,
+ .vece = MO_16
+ },
+ {
+ .fni4 = gen_vmulwev_w_h,
+ .fniv = gen_vmulwev_s,
+ .fno = gen_helper_vmulwev_w_h,
+ .opt_opc = vecop_list,
+ .vece = MO_32
+ },
+ {
+ .fni8 = gen_vmulwev_d_w,
+ .fniv = gen_vmulwev_s,
+ .fno = gen_helper_vmulwev_d_w,
+ .opt_opc = vecop_list,
+ .vece = MO_64
+ },
+ };
+
+ tcg_gen_gvec_3(vd_ofs, vj_ofs, vk_ofs, oprsz, maxsz, &op[vece]);
+}
+
+TRANS(vmulwev_h_b, LSX, gvec_vvv, MO_8, do_vmulwev_s)
+TRANS(vmulwev_w_h, LSX, gvec_vvv, MO_16, do_vmulwev_s)
+TRANS(vmulwev_d_w, LSX, gvec_vvv, MO_32, do_vmulwev_s)
+TRANS(xvmulwev_h_b, LASX, gvec_xxx, MO_8, do_vmulwev_s)
+TRANS(xvmulwev_w_h, LASX, gvec_xxx, MO_16, do_vmulwev_s)
+TRANS(xvmulwev_d_w, LASX, gvec_xxx, MO_32, do_vmulwev_s)
+
+static void tcg_gen_mulus2_i64(TCGv_i64 rl, TCGv_i64 rh,
+ TCGv_i64 arg1, TCGv_i64 arg2)
+{
+ tcg_gen_mulsu2_i64(rl, rh, arg2, arg1);
+}
+
+static bool gen_vmul_q_vl(DisasContext *ctx,
+ arg_vvv *a, uint32_t oprsz, int idx1, int idx2,
+ void (*func)(TCGv_i64, TCGv_i64,
+ TCGv_i64, TCGv_i64))
+{
+ TCGv_i64 rh, rl, arg1, arg2;
+ int i;
+
+ if (!check_vec(ctx, oprsz)) {
+ return true;
+ }
+
+ rh = tcg_temp_new_i64();
+ rl = tcg_temp_new_i64();
+ arg1 = tcg_temp_new_i64();
+ arg2 = tcg_temp_new_i64();
+
+ for (i = 0; i < oprsz / 16; i++) {
+ get_vreg64(arg1, a->vj, 2 * i + idx1);
+ get_vreg64(arg2, a->vk, 2 * i + idx2);
+
+ func(rl, rh, arg1, arg2);
+
+ set_vreg64(rh, a->vd, 2 * i + 1);
+ set_vreg64(rl, a->vd, 2 * i);
+ }
+
+ return true;
+}
+
+static bool gen_vmul_q(DisasContext *ctx, arg_vvv *a, int idx1, int idx2,
+ void (*func)(TCGv_i64, TCGv_i64,
+ TCGv_i64, TCGv_i64))
+{
+ return gen_vmul_q_vl(ctx, a, 16, idx1, idx2, func);
+}
+
+static bool gen_xvmul_q(DisasContext *ctx, arg_vvv *a, int idx1, int idx2,
+ void (*func)(TCGv_i64, TCGv_i64,
+ TCGv_i64, TCGv_i64))
+{
+ return gen_vmul_q_vl(ctx, a, 32, idx1, idx2, func);
+}
+
+TRANS(vmulwev_q_d, LSX, gen_vmul_q, 0, 0, tcg_gen_muls2_i64)
+TRANS(vmulwod_q_d, LSX, gen_vmul_q, 1, 1, tcg_gen_muls2_i64)
+TRANS(vmulwev_q_du, LSX, gen_vmul_q, 0, 0, tcg_gen_mulu2_i64)
+TRANS(vmulwod_q_du, LSX, gen_vmul_q, 1, 1, tcg_gen_mulu2_i64)
+TRANS(vmulwev_q_du_d, LSX, gen_vmul_q, 0, 0, tcg_gen_mulus2_i64)
+TRANS(vmulwod_q_du_d, LSX, gen_vmul_q, 1, 1, tcg_gen_mulus2_i64)
+TRANS(xvmulwev_q_d, LASX, gen_xvmul_q, 0, 0, tcg_gen_muls2_i64)
+TRANS(xvmulwod_q_d, LASX, gen_xvmul_q, 1, 1, tcg_gen_muls2_i64)
+TRANS(xvmulwev_q_du, LASX, gen_xvmul_q, 0, 0, tcg_gen_mulu2_i64)
+TRANS(xvmulwod_q_du, LASX, gen_xvmul_q, 1, 1, tcg_gen_mulu2_i64)
+TRANS(xvmulwev_q_du_d, LASX, gen_xvmul_q, 0, 0, tcg_gen_mulus2_i64)
+TRANS(xvmulwod_q_du_d, LASX, gen_xvmul_q, 1, 1, tcg_gen_mulus2_i64)
+
+static void gen_vmulwod_s(unsigned vece, TCGv_vec t, TCGv_vec a, TCGv_vec b)
+{
+ TCGv_vec t1, t2;
+ int halfbits = 4 << vece;
+
+ t1 = tcg_temp_new_vec_matching(a);
+ t2 = tcg_temp_new_vec_matching(b);
+ tcg_gen_sari_vec(vece, t1, a, halfbits);
+ tcg_gen_sari_vec(vece, t2, b, halfbits);
+ tcg_gen_mul_vec(vece, t, t1, t2);
+}
+
+static void gen_vmulwod_w_h(TCGv_i32 t, TCGv_i32 a, TCGv_i32 b)
+{
+ TCGv_i32 t1, t2;
+
+ t1 = tcg_temp_new_i32();
+ t2 = tcg_temp_new_i32();
+ tcg_gen_sari_i32(t1, a, 16);
+ tcg_gen_sari_i32(t2, b, 16);
+ tcg_gen_mul_i32(t, t1, t2);
+}
+
+static void gen_vmulwod_d_w(TCGv_i64 t, TCGv_i64 a, TCGv_i64 b)
+{
+ TCGv_i64 t1, t2;
+
+ t1 = tcg_temp_new_i64();
+ t2 = tcg_temp_new_i64();
+ tcg_gen_sari_i64(t1, a, 32);
+ tcg_gen_sari_i64(t2, b, 32);
+ tcg_gen_mul_i64(t, t1, t2);
+}
+
+static void do_vmulwod_s(unsigned vece, uint32_t vd_ofs, uint32_t vj_ofs,
+ uint32_t vk_ofs, uint32_t oprsz, uint32_t maxsz)
+{
+ static const TCGOpcode vecop_list[] = {
+ INDEX_op_sari_vec, INDEX_op_mul_vec, 0
+ };
+ static const GVecGen3 op[3] = {
+ {
+ .fniv = gen_vmulwod_s,
+ .fno = gen_helper_vmulwod_h_b,
+ .opt_opc = vecop_list,
+ .vece = MO_16
+ },
+ {
+ .fni4 = gen_vmulwod_w_h,
+ .fniv = gen_vmulwod_s,
+ .fno = gen_helper_vmulwod_w_h,
+ .opt_opc = vecop_list,
+ .vece = MO_32
+ },
+ {
+ .fni8 = gen_vmulwod_d_w,
+ .fniv = gen_vmulwod_s,
+ .fno = gen_helper_vmulwod_d_w,
+ .opt_opc = vecop_list,
+ .vece = MO_64
+ },
+ };
+
+ tcg_gen_gvec_3(vd_ofs, vj_ofs, vk_ofs, oprsz, maxsz, &op[vece]);
+}
+
+TRANS(vmulwod_h_b, LSX, gvec_vvv, MO_8, do_vmulwod_s)
+TRANS(vmulwod_w_h, LSX, gvec_vvv, MO_16, do_vmulwod_s)
+TRANS(vmulwod_d_w, LSX, gvec_vvv, MO_32, do_vmulwod_s)
+TRANS(xvmulwod_h_b, LASX, gvec_xxx, MO_8, do_vmulwod_s)
+TRANS(xvmulwod_w_h, LASX, gvec_xxx, MO_16, do_vmulwod_s)
+TRANS(xvmulwod_d_w, LASX, gvec_xxx, MO_32, do_vmulwod_s)
+
+static void gen_vmulwev_u(unsigned vece, TCGv_vec t, TCGv_vec a, TCGv_vec b)
+{
+ TCGv_vec t1, t2, mask;
+
+ t1 = tcg_temp_new_vec_matching(a);
+ t2 = tcg_temp_new_vec_matching(b);
+ mask = tcg_constant_vec_matching(t, vece, MAKE_64BIT_MASK(0, 4 << vece));
+ tcg_gen_and_vec(vece, t1, a, mask);
+ tcg_gen_and_vec(vece, t2, b, mask);
+ tcg_gen_mul_vec(vece, t, t1, t2);
+}
+
+static void gen_vmulwev_w_hu(TCGv_i32 t, TCGv_i32 a, TCGv_i32 b)
+{
+ TCGv_i32 t1, t2;
+
+ t1 = tcg_temp_new_i32();
+ t2 = tcg_temp_new_i32();
+ tcg_gen_ext16u_i32(t1, a);
+ tcg_gen_ext16u_i32(t2, b);
+ tcg_gen_mul_i32(t, t1, t2);
+}
+
+static void gen_vmulwev_d_wu(TCGv_i64 t, TCGv_i64 a, TCGv_i64 b)
+{
+ TCGv_i64 t1, t2;
+
+ t1 = tcg_temp_new_i64();
+ t2 = tcg_temp_new_i64();
+ tcg_gen_ext32u_i64(t1, a);
+ tcg_gen_ext32u_i64(t2, b);
+ tcg_gen_mul_i64(t, t1, t2);
+}
+
+static void do_vmulwev_u(unsigned vece, uint32_t vd_ofs, uint32_t vj_ofs,
+ uint32_t vk_ofs, uint32_t oprsz, uint32_t maxsz)
+{
+ static const TCGOpcode vecop_list[] = {
+ INDEX_op_mul_vec, 0
+ };
+ static const GVecGen3 op[3] = {
+ {
+ .fniv = gen_vmulwev_u,
+ .fno = gen_helper_vmulwev_h_bu,
+ .opt_opc = vecop_list,
+ .vece = MO_16
+ },
+ {
+ .fni4 = gen_vmulwev_w_hu,
+ .fniv = gen_vmulwev_u,
+ .fno = gen_helper_vmulwev_w_hu,
+ .opt_opc = vecop_list,
+ .vece = MO_32
+ },
+ {
+ .fni8 = gen_vmulwev_d_wu,
+ .fniv = gen_vmulwev_u,
+ .fno = gen_helper_vmulwev_d_wu,
+ .opt_opc = vecop_list,
+ .vece = MO_64
+ },
+ };
+
+ tcg_gen_gvec_3(vd_ofs, vj_ofs, vk_ofs, oprsz, maxsz, &op[vece]);
+}
+
+TRANS(vmulwev_h_bu, LSX, gvec_vvv, MO_8, do_vmulwev_u)
+TRANS(vmulwev_w_hu, LSX, gvec_vvv, MO_16, do_vmulwev_u)
+TRANS(vmulwev_d_wu, LSX, gvec_vvv, MO_32, do_vmulwev_u)
+TRANS(xvmulwev_h_bu, LASX, gvec_xxx, MO_8, do_vmulwev_u)
+TRANS(xvmulwev_w_hu, LASX, gvec_xxx, MO_16, do_vmulwev_u)
+TRANS(xvmulwev_d_wu, LASX, gvec_xxx, MO_32, do_vmulwev_u)
+
+static void gen_vmulwod_u(unsigned vece, TCGv_vec t, TCGv_vec a, TCGv_vec b)
+{
+ TCGv_vec t1, t2;
+ int halfbits = 4 << vece;
+
+ t1 = tcg_temp_new_vec_matching(a);
+ t2 = tcg_temp_new_vec_matching(b);
+ tcg_gen_shri_vec(vece, t1, a, halfbits);
+ tcg_gen_shri_vec(vece, t2, b, halfbits);
+ tcg_gen_mul_vec(vece, t, t1, t2);
+}
+
+static void gen_vmulwod_w_hu(TCGv_i32 t, TCGv_i32 a, TCGv_i32 b)
+{
+ TCGv_i32 t1, t2;
+
+ t1 = tcg_temp_new_i32();
+ t2 = tcg_temp_new_i32();
+ tcg_gen_shri_i32(t1, a, 16);
+ tcg_gen_shri_i32(t2, b, 16);
+ tcg_gen_mul_i32(t, t1, t2);
+}
+
+static void gen_vmulwod_d_wu(TCGv_i64 t, TCGv_i64 a, TCGv_i64 b)
+{
+ TCGv_i64 t1, t2;
+
+ t1 = tcg_temp_new_i64();
+ t2 = tcg_temp_new_i64();
+ tcg_gen_shri_i64(t1, a, 32);
+ tcg_gen_shri_i64(t2, b, 32);
+ tcg_gen_mul_i64(t, t1, t2);
+}
+
+static void do_vmulwod_u(unsigned vece, uint32_t vd_ofs, uint32_t vj_ofs,
+ uint32_t vk_ofs, uint32_t oprsz, uint32_t maxsz)
+{
+ static const TCGOpcode vecop_list[] = {
+ INDEX_op_shri_vec, INDEX_op_mul_vec, 0
+ };
+ static const GVecGen3 op[3] = {
+ {
+ .fniv = gen_vmulwod_u,
+ .fno = gen_helper_vmulwod_h_bu,
+ .opt_opc = vecop_list,
+ .vece = MO_16
+ },
+ {
+ .fni4 = gen_vmulwod_w_hu,
+ .fniv = gen_vmulwod_u,
+ .fno = gen_helper_vmulwod_w_hu,
+ .opt_opc = vecop_list,
+ .vece = MO_32
+ },
+ {
+ .fni8 = gen_vmulwod_d_wu,
+ .fniv = gen_vmulwod_u,
+ .fno = gen_helper_vmulwod_d_wu,
+ .opt_opc = vecop_list,
+ .vece = MO_64
+ },
+ };
+
+ tcg_gen_gvec_3(vd_ofs, vj_ofs, vk_ofs, oprsz, maxsz, &op[vece]);
+}
+
+TRANS(vmulwod_h_bu, LSX, gvec_vvv, MO_8, do_vmulwod_u)
+TRANS(vmulwod_w_hu, LSX, gvec_vvv, MO_16, do_vmulwod_u)
+TRANS(vmulwod_d_wu, LSX, gvec_vvv, MO_32, do_vmulwod_u)
+TRANS(xvmulwod_h_bu, LASX, gvec_xxx, MO_8, do_vmulwod_u)
+TRANS(xvmulwod_w_hu, LASX, gvec_xxx, MO_16, do_vmulwod_u)
+TRANS(xvmulwod_d_wu, LASX, gvec_xxx, MO_32, do_vmulwod_u)
+
+static void gen_vmulwev_u_s(unsigned vece, TCGv_vec t, TCGv_vec a, TCGv_vec b)
+{
+ TCGv_vec t1, t2, mask;
+ int halfbits = 4 << vece;
+
+ t1 = tcg_temp_new_vec_matching(a);
+ t2 = tcg_temp_new_vec_matching(b);
+ mask = tcg_constant_vec_matching(t, vece, MAKE_64BIT_MASK(0, 4 << vece));
+ tcg_gen_and_vec(vece, t1, a, mask);
+ tcg_gen_shli_vec(vece, t2, b, halfbits);
+ tcg_gen_sari_vec(vece, t2, t2, halfbits);
+ tcg_gen_mul_vec(vece, t, t1, t2);
+}
+
+static void gen_vmulwev_w_hu_h(TCGv_i32 t, TCGv_i32 a, TCGv_i32 b)
+{
+ TCGv_i32 t1, t2;
+
+ t1 = tcg_temp_new_i32();
+ t2 = tcg_temp_new_i32();
+ tcg_gen_ext16u_i32(t1, a);
+ tcg_gen_ext16s_i32(t2, b);
+ tcg_gen_mul_i32(t, t1, t2);
+}
+
+static void gen_vmulwev_d_wu_w(TCGv_i64 t, TCGv_i64 a, TCGv_i64 b)
+{
+ TCGv_i64 t1, t2;
+
+ t1 = tcg_temp_new_i64();
+ t2 = tcg_temp_new_i64();
+ tcg_gen_ext32u_i64(t1, a);
+ tcg_gen_ext32s_i64(t2, b);
+ tcg_gen_mul_i64(t, t1, t2);
+}
+
+static void do_vmulwev_u_s(unsigned vece, uint32_t vd_ofs, uint32_t vj_ofs,
+ uint32_t vk_ofs, uint32_t oprsz, uint32_t maxsz)
+{
+ static const TCGOpcode vecop_list[] = {
+ INDEX_op_shli_vec, INDEX_op_sari_vec, INDEX_op_mul_vec, 0
+ };
+ static const GVecGen3 op[3] = {
+ {
+ .fniv = gen_vmulwev_u_s,
+ .fno = gen_helper_vmulwev_h_bu_b,
+ .opt_opc = vecop_list,
+ .vece = MO_16
+ },
+ {
+ .fni4 = gen_vmulwev_w_hu_h,
+ .fniv = gen_vmulwev_u_s,
+ .fno = gen_helper_vmulwev_w_hu_h,
+ .opt_opc = vecop_list,
+ .vece = MO_32
+ },
+ {
+ .fni8 = gen_vmulwev_d_wu_w,
+ .fniv = gen_vmulwev_u_s,
+ .fno = gen_helper_vmulwev_d_wu_w,
+ .opt_opc = vecop_list,
+ .vece = MO_64
+ },
+ };
+
+ tcg_gen_gvec_3(vd_ofs, vj_ofs, vk_ofs, oprsz, maxsz, &op[vece]);
+}
+
+TRANS(vmulwev_h_bu_b, LSX, gvec_vvv, MO_8, do_vmulwev_u_s)
+TRANS(vmulwev_w_hu_h, LSX, gvec_vvv, MO_16, do_vmulwev_u_s)
+TRANS(vmulwev_d_wu_w, LSX, gvec_vvv, MO_32, do_vmulwev_u_s)
+TRANS(xvmulwev_h_bu_b, LASX, gvec_xxx, MO_8, do_vmulwev_u_s)
+TRANS(xvmulwev_w_hu_h, LASX, gvec_xxx, MO_16, do_vmulwev_u_s)
+TRANS(xvmulwev_d_wu_w, LASX, gvec_xxx, MO_32, do_vmulwev_u_s)
+
+static void gen_vmulwod_u_s(unsigned vece, TCGv_vec t, TCGv_vec a, TCGv_vec b)
+{
+ TCGv_vec t1, t2;
+ int halfbits = 4 << vece;
+
+ t1 = tcg_temp_new_vec_matching(a);
+ t2 = tcg_temp_new_vec_matching(b);
+ tcg_gen_shri_vec(vece, t1, a, halfbits);
+ tcg_gen_sari_vec(vece, t2, b, halfbits);
+ tcg_gen_mul_vec(vece, t, t1, t2);
+}
+
+static void gen_vmulwod_w_hu_h(TCGv_i32 t, TCGv_i32 a, TCGv_i32 b)
+{
+ TCGv_i32 t1, t2;
+
+ t1 = tcg_temp_new_i32();
+ t2 = tcg_temp_new_i32();
+ tcg_gen_shri_i32(t1, a, 16);
+ tcg_gen_sari_i32(t2, b, 16);
+ tcg_gen_mul_i32(t, t1, t2);
+}
+static void gen_vmulwod_d_wu_w(TCGv_i64 t, TCGv_i64 a, TCGv_i64 b)
+{
+ TCGv_i64 t1, t2;
+
+ t1 = tcg_temp_new_i64();
+ t2 = tcg_temp_new_i64();
+ tcg_gen_shri_i64(t1, a, 32);
+ tcg_gen_sari_i64(t2, b, 32);
+ tcg_gen_mul_i64(t, t1, t2);
+}
+
+static void do_vmulwod_u_s(unsigned vece, uint32_t vd_ofs, uint32_t vj_ofs,
+ uint32_t vk_ofs, uint32_t oprsz, uint32_t maxsz)
+{
+ static const TCGOpcode vecop_list[] = {
+ INDEX_op_shri_vec, INDEX_op_sari_vec, INDEX_op_mul_vec, 0
+ };
+ static const GVecGen3 op[3] = {
+ {
+ .fniv = gen_vmulwod_u_s,
+ .fno = gen_helper_vmulwod_h_bu_b,
+ .opt_opc = vecop_list,
+ .vece = MO_16
+ },
+ {
+ .fni4 = gen_vmulwod_w_hu_h,
+ .fniv = gen_vmulwod_u_s,
+ .fno = gen_helper_vmulwod_w_hu_h,
+ .opt_opc = vecop_list,
+ .vece = MO_32
+ },
+ {
+ .fni8 = gen_vmulwod_d_wu_w,
+ .fniv = gen_vmulwod_u_s,
+ .fno = gen_helper_vmulwod_d_wu_w,
+ .opt_opc = vecop_list,
+ .vece = MO_64
+ },
+ };
+
+ tcg_gen_gvec_3(vd_ofs, vj_ofs, vk_ofs, oprsz, maxsz, &op[vece]);
+}
+
+TRANS(vmulwod_h_bu_b, LSX, gvec_vvv, MO_8, do_vmulwod_u_s)
+TRANS(vmulwod_w_hu_h, LSX, gvec_vvv, MO_16, do_vmulwod_u_s)
+TRANS(vmulwod_d_wu_w, LSX, gvec_vvv, MO_32, do_vmulwod_u_s)
+TRANS(xvmulwod_h_bu_b, LASX, gvec_xxx, MO_8, do_vmulwod_u_s)
+TRANS(xvmulwod_w_hu_h, LASX, gvec_xxx, MO_16, do_vmulwod_u_s)
+TRANS(xvmulwod_d_wu_w, LASX, gvec_xxx, MO_32, do_vmulwod_u_s)
+
+static void gen_vmadd(unsigned vece, TCGv_vec t, TCGv_vec a, TCGv_vec b)
+{
+ TCGv_vec t1;
+
+ t1 = tcg_temp_new_vec_matching(t);
+ tcg_gen_mul_vec(vece, t1, a, b);
+ tcg_gen_add_vec(vece, t, t, t1);
+}
+
+static void gen_vmadd_w(TCGv_i32 t, TCGv_i32 a, TCGv_i32 b)
+{
+ TCGv_i32 t1;
+
+ t1 = tcg_temp_new_i32();
+ tcg_gen_mul_i32(t1, a, b);
+ tcg_gen_add_i32(t, t, t1);
+}
+
+static void gen_vmadd_d(TCGv_i64 t, TCGv_i64 a, TCGv_i64 b)
+{
+ TCGv_i64 t1;
+
+ t1 = tcg_temp_new_i64();
+ tcg_gen_mul_i64(t1, a, b);
+ tcg_gen_add_i64(t, t, t1);
+}
+
+static void do_vmadd(unsigned vece, uint32_t vd_ofs, uint32_t vj_ofs,
+ uint32_t vk_ofs, uint32_t oprsz, uint32_t maxsz)
+{
+ static const TCGOpcode vecop_list[] = {
+ INDEX_op_mul_vec, INDEX_op_add_vec, 0
+ };
+ static const GVecGen3 op[4] = {
+ {
+ .fniv = gen_vmadd,
+ .fno = gen_helper_vmadd_b,
+ .load_dest = true,
+ .opt_opc = vecop_list,
+ .vece = MO_8
+ },
+ {
+ .fniv = gen_vmadd,
+ .fno = gen_helper_vmadd_h,
+ .load_dest = true,
+ .opt_opc = vecop_list,
+ .vece = MO_16
+ },
+ {
+ .fni4 = gen_vmadd_w,
+ .fniv = gen_vmadd,
+ .fno = gen_helper_vmadd_w,
+ .load_dest = true,
+ .opt_opc = vecop_list,
+ .vece = MO_32
+ },
+ {
+ .fni8 = gen_vmadd_d,
+ .fniv = gen_vmadd,
+ .fno = gen_helper_vmadd_d,
+ .load_dest = true,
+ .opt_opc = vecop_list,
+ .vece = MO_64
+ },
+ };
+
+ tcg_gen_gvec_3(vd_ofs, vj_ofs, vk_ofs, oprsz, maxsz, &op[vece]);
+}
+
+TRANS(vmadd_b, LSX, gvec_vvv, MO_8, do_vmadd)
+TRANS(vmadd_h, LSX, gvec_vvv, MO_16, do_vmadd)
+TRANS(vmadd_w, LSX, gvec_vvv, MO_32, do_vmadd)
+TRANS(vmadd_d, LSX, gvec_vvv, MO_64, do_vmadd)
+TRANS(xvmadd_b, LASX, gvec_xxx, MO_8, do_vmadd)
+TRANS(xvmadd_h, LASX, gvec_xxx, MO_16, do_vmadd)
+TRANS(xvmadd_w, LASX, gvec_xxx, MO_32, do_vmadd)
+TRANS(xvmadd_d, LASX, gvec_xxx, MO_64, do_vmadd)
+
+static void gen_vmsub(unsigned vece, TCGv_vec t, TCGv_vec a, TCGv_vec b)
+{
+ TCGv_vec t1;
+
+ t1 = tcg_temp_new_vec_matching(t);
+ tcg_gen_mul_vec(vece, t1, a, b);
+ tcg_gen_sub_vec(vece, t, t, t1);
+}
+
+static void gen_vmsub_w(TCGv_i32 t, TCGv_i32 a, TCGv_i32 b)
+{
+ TCGv_i32 t1;
+
+ t1 = tcg_temp_new_i32();
+ tcg_gen_mul_i32(t1, a, b);
+ tcg_gen_sub_i32(t, t, t1);
+}
+
+static void gen_vmsub_d(TCGv_i64 t, TCGv_i64 a, TCGv_i64 b)
+{
+ TCGv_i64 t1;
+
+ t1 = tcg_temp_new_i64();
+ tcg_gen_mul_i64(t1, a, b);
+ tcg_gen_sub_i64(t, t, t1);
+}
+
+static void do_vmsub(unsigned vece, uint32_t vd_ofs, uint32_t vj_ofs,
+ uint32_t vk_ofs, uint32_t oprsz, uint32_t maxsz)
+{
+ static const TCGOpcode vecop_list[] = {
+ INDEX_op_mul_vec, INDEX_op_sub_vec, 0
+ };
+ static const GVecGen3 op[4] = {
+ {
+ .fniv = gen_vmsub,
+ .fno = gen_helper_vmsub_b,
+ .load_dest = true,
+ .opt_opc = vecop_list,
+ .vece = MO_8
+ },
+ {
+ .fniv = gen_vmsub,
+ .fno = gen_helper_vmsub_h,
+ .load_dest = true,
+ .opt_opc = vecop_list,
+ .vece = MO_16
+ },
+ {
+ .fni4 = gen_vmsub_w,
+ .fniv = gen_vmsub,
+ .fno = gen_helper_vmsub_w,
+ .load_dest = true,
+ .opt_opc = vecop_list,
+ .vece = MO_32
+ },
+ {
+ .fni8 = gen_vmsub_d,
+ .fniv = gen_vmsub,
+ .fno = gen_helper_vmsub_d,
+ .load_dest = true,
+ .opt_opc = vecop_list,
+ .vece = MO_64
+ },
+ };
+
+ tcg_gen_gvec_3(vd_ofs, vj_ofs, vk_ofs, oprsz, maxsz, &op[vece]);
+}
+
+TRANS(vmsub_b, LSX, gvec_vvv, MO_8, do_vmsub)
+TRANS(vmsub_h, LSX, gvec_vvv, MO_16, do_vmsub)
+TRANS(vmsub_w, LSX, gvec_vvv, MO_32, do_vmsub)
+TRANS(vmsub_d, LSX, gvec_vvv, MO_64, do_vmsub)
+TRANS(xvmsub_b, LASX, gvec_xxx, MO_8, do_vmsub)
+TRANS(xvmsub_h, LASX, gvec_xxx, MO_16, do_vmsub)
+TRANS(xvmsub_w, LASX, gvec_xxx, MO_32, do_vmsub)
+TRANS(xvmsub_d, LASX, gvec_xxx, MO_64, do_vmsub)
+
+static void gen_vmaddwev_s(unsigned vece, TCGv_vec t, TCGv_vec a, TCGv_vec b)
+{
+ TCGv_vec t1, t2, t3;
+ int halfbits = 4 << vece;
+
+ t1 = tcg_temp_new_vec_matching(a);
+ t2 = tcg_temp_new_vec_matching(b);
+ t3 = tcg_temp_new_vec_matching(t);
+ tcg_gen_shli_vec(vece, t1, a, halfbits);
+ tcg_gen_sari_vec(vece, t1, t1, halfbits);
+ tcg_gen_shli_vec(vece, t2, b, halfbits);
+ tcg_gen_sari_vec(vece, t2, t2, halfbits);
+ tcg_gen_mul_vec(vece, t3, t1, t2);
+ tcg_gen_add_vec(vece, t, t, t3);
+}
+
+static void gen_vmaddwev_w_h(TCGv_i32 t, TCGv_i32 a, TCGv_i32 b)
+{
+ TCGv_i32 t1;
+
+ t1 = tcg_temp_new_i32();
+ gen_vmulwev_w_h(t1, a, b);
+ tcg_gen_add_i32(t, t, t1);
+}
+
+static void gen_vmaddwev_d_w(TCGv_i64 t, TCGv_i64 a, TCGv_i64 b)
+{
+ TCGv_i64 t1;
+
+ t1 = tcg_temp_new_i64();
+ gen_vmulwev_d_w(t1, a, b);
+ tcg_gen_add_i64(t, t, t1);
+}
+
+static void do_vmaddwev_s(unsigned vece, uint32_t vd_ofs, uint32_t vj_ofs,
+ uint32_t vk_ofs, uint32_t oprsz, uint32_t maxsz)
+{
+ static const TCGOpcode vecop_list[] = {
+ INDEX_op_shli_vec, INDEX_op_sari_vec,
+ INDEX_op_mul_vec, INDEX_op_add_vec, 0
+ };
+ static const GVecGen3 op[3] = {
+ {
+ .fniv = gen_vmaddwev_s,
+ .fno = gen_helper_vmaddwev_h_b,
+ .load_dest = true,
+ .opt_opc = vecop_list,
+ .vece = MO_16
+ },
+ {
+ .fni4 = gen_vmaddwev_w_h,
+ .fniv = gen_vmaddwev_s,
+ .fno = gen_helper_vmaddwev_w_h,
+ .load_dest = true,
+ .opt_opc = vecop_list,
+ .vece = MO_32
+ },
+ {
+ .fni8 = gen_vmaddwev_d_w,
+ .fniv = gen_vmaddwev_s,
+ .fno = gen_helper_vmaddwev_d_w,
+ .load_dest = true,
+ .opt_opc = vecop_list,
+ .vece = MO_64
+ },
+ };
+
+ tcg_gen_gvec_3(vd_ofs, vj_ofs, vk_ofs, oprsz, maxsz, &op[vece]);
+}
+
+TRANS(vmaddwev_h_b, LSX, gvec_vvv, MO_8, do_vmaddwev_s)
+TRANS(vmaddwev_w_h, LSX, gvec_vvv, MO_16, do_vmaddwev_s)
+TRANS(vmaddwev_d_w, LSX, gvec_vvv, MO_32, do_vmaddwev_s)
+TRANS(xvmaddwev_h_b, LASX, gvec_xxx, MO_8, do_vmaddwev_s)
+TRANS(xvmaddwev_w_h, LASX, gvec_xxx, MO_16, do_vmaddwev_s)
+TRANS(xvmaddwev_d_w, LASX, gvec_xxx, MO_32, do_vmaddwev_s)
+
+static bool gen_vmadd_q_vl(DisasContext * ctx,
+ arg_vvv *a, uint32_t oprsz, int idx1, int idx2,
+ void (*func)(TCGv_i64, TCGv_i64,
+ TCGv_i64, TCGv_i64))
+{
+ TCGv_i64 rh, rl, arg1, arg2, th, tl;
+ int i;
+
+ if (!check_vec(ctx, oprsz)) {
+ return true;
+ }
+
+ rh = tcg_temp_new_i64();
+ rl = tcg_temp_new_i64();
+ arg1 = tcg_temp_new_i64();
+ arg2 = tcg_temp_new_i64();
+ th = tcg_temp_new_i64();
+ tl = tcg_temp_new_i64();
+
+ for (i = 0; i < oprsz / 16; i++) {
+ get_vreg64(arg1, a->vj, 2 * i + idx1);
+ get_vreg64(arg2, a->vk, 2 * i + idx2);
+ get_vreg64(rh, a->vd, 2 * i + 1);
+ get_vreg64(rl, a->vd, 2 * i);
+
+ func(tl, th, arg1, arg2);
+ tcg_gen_add2_i64(rl, rh, rl, rh, tl, th);
+
+ set_vreg64(rh, a->vd, 2 * i + 1);
+ set_vreg64(rl, a->vd, 2 * i);
+ }
+
+ return true;
+}
+
+static bool gen_vmadd_q(DisasContext *ctx, arg_vvv *a, int idx1, int idx2,
+ void (*func)(TCGv_i64, TCGv_i64, TCGv_i64, TCGv_i64))
+{
+ return gen_vmadd_q_vl(ctx, a, 16, idx1, idx2, func);
+}
+
+static bool gen_xvmadd_q(DisasContext *ctx, arg_vvv *a, int idx1, int idx2,
+ void (*func)(TCGv_i64, TCGv_i64, TCGv_i64, TCGv_i64))
+{
+ return gen_vmadd_q_vl(ctx, a, 32, idx1, idx2, func);
+}
+
+TRANS(vmaddwev_q_d, LSX, gen_vmadd_q, 0, 0, tcg_gen_muls2_i64)
+TRANS(vmaddwod_q_d, LSX, gen_vmadd_q, 1, 1, tcg_gen_muls2_i64)
+TRANS(vmaddwev_q_du, LSX, gen_vmadd_q, 0, 0, tcg_gen_mulu2_i64)
+TRANS(vmaddwod_q_du, LSX, gen_vmadd_q, 1, 1, tcg_gen_mulu2_i64)
+TRANS(vmaddwev_q_du_d, LSX, gen_vmadd_q, 0, 0, tcg_gen_mulus2_i64)
+TRANS(vmaddwod_q_du_d, LSX, gen_vmadd_q, 1, 1, tcg_gen_mulus2_i64)
+TRANS(xvmaddwev_q_d, LASX, gen_xvmadd_q, 0, 0, tcg_gen_muls2_i64)
+TRANS(xvmaddwod_q_d, LASX, gen_xvmadd_q, 1, 1, tcg_gen_muls2_i64)
+TRANS(xvmaddwev_q_du, LASX, gen_xvmadd_q, 0, 0, tcg_gen_mulu2_i64)
+TRANS(xvmaddwod_q_du, LASX, gen_xvmadd_q, 1, 1, tcg_gen_mulu2_i64)
+TRANS(xvmaddwev_q_du_d, LASX, gen_xvmadd_q, 0, 0, tcg_gen_mulus2_i64)
+TRANS(xvmaddwod_q_du_d, LASX, gen_xvmadd_q, 1, 1, tcg_gen_mulus2_i64)
+
+static void gen_vmaddwod_s(unsigned vece, TCGv_vec t, TCGv_vec a, TCGv_vec b)
+{
+ TCGv_vec t1, t2, t3;
+ int halfbits = 4 << vece;
+
+ t1 = tcg_temp_new_vec_matching(a);
+ t2 = tcg_temp_new_vec_matching(b);
+ t3 = tcg_temp_new_vec_matching(t);
+ tcg_gen_sari_vec(vece, t1, a, halfbits);
+ tcg_gen_sari_vec(vece, t2, b, halfbits);
+ tcg_gen_mul_vec(vece, t3, t1, t2);
+ tcg_gen_add_vec(vece, t, t, t3);
+}
+
+static void gen_vmaddwod_w_h(TCGv_i32 t, TCGv_i32 a, TCGv_i32 b)
+{
+ TCGv_i32 t1;
+
+ t1 = tcg_temp_new_i32();
+ gen_vmulwod_w_h(t1, a, b);
+ tcg_gen_add_i32(t, t, t1);
+}
+
+static void gen_vmaddwod_d_w(TCGv_i64 t, TCGv_i64 a, TCGv_i64 b)
+{
+ TCGv_i64 t1;
+
+ t1 = tcg_temp_new_i64();
+ gen_vmulwod_d_w(t1, a, b);
+ tcg_gen_add_i64(t, t, t1);
+}
+
+static void do_vmaddwod_s(unsigned vece, uint32_t vd_ofs, uint32_t vj_ofs,
+ uint32_t vk_ofs, uint32_t oprsz, uint32_t maxsz)
+{
+ static const TCGOpcode vecop_list[] = {
+ INDEX_op_sari_vec, INDEX_op_mul_vec, INDEX_op_add_vec, 0
+ };
+ static const GVecGen3 op[3] = {
+ {
+ .fniv = gen_vmaddwod_s,
+ .fno = gen_helper_vmaddwod_h_b,
+ .load_dest = true,
+ .opt_opc = vecop_list,
+ .vece = MO_16
+ },
+ {
+ .fni4 = gen_vmaddwod_w_h,
+ .fniv = gen_vmaddwod_s,
+ .fno = gen_helper_vmaddwod_w_h,
+ .load_dest = true,
+ .opt_opc = vecop_list,
+ .vece = MO_32
+ },
+ {
+ .fni8 = gen_vmaddwod_d_w,
+ .fniv = gen_vmaddwod_s,
+ .fno = gen_helper_vmaddwod_d_w,
+ .load_dest = true,
+ .opt_opc = vecop_list,
+ .vece = MO_64
+ },
+ };
+
+ tcg_gen_gvec_3(vd_ofs, vj_ofs, vk_ofs, oprsz, maxsz, &op[vece]);
+}
+
+TRANS(vmaddwod_h_b, LSX, gvec_vvv, MO_8, do_vmaddwod_s)
+TRANS(vmaddwod_w_h, LSX, gvec_vvv, MO_16, do_vmaddwod_s)
+TRANS(vmaddwod_d_w, LSX, gvec_vvv, MO_32, do_vmaddwod_s)
+TRANS(xvmaddwod_h_b, LASX, gvec_xxx, MO_8, do_vmaddwod_s)
+TRANS(xvmaddwod_w_h, LASX, gvec_xxx, MO_16, do_vmaddwod_s)
+TRANS(xvmaddwod_d_w, LASX, gvec_xxx, MO_32, do_vmaddwod_s)
+
+static void gen_vmaddwev_u(unsigned vece, TCGv_vec t, TCGv_vec a, TCGv_vec b)
+{
+ TCGv_vec t1, t2, mask;
+
+ t1 = tcg_temp_new_vec_matching(t);
+ t2 = tcg_temp_new_vec_matching(b);
+ mask = tcg_constant_vec_matching(t, vece, MAKE_64BIT_MASK(0, 4 << vece));
+ tcg_gen_and_vec(vece, t1, a, mask);
+ tcg_gen_and_vec(vece, t2, b, mask);
+ tcg_gen_mul_vec(vece, t1, t1, t2);
+ tcg_gen_add_vec(vece, t, t, t1);
+}
+
+static void gen_vmaddwev_w_hu(TCGv_i32 t, TCGv_i32 a, TCGv_i32 b)
+{
+ TCGv_i32 t1;
+
+ t1 = tcg_temp_new_i32();
+ gen_vmulwev_w_hu(t1, a, b);
+ tcg_gen_add_i32(t, t, t1);
+}
+
+static void gen_vmaddwev_d_wu(TCGv_i64 t, TCGv_i64 a, TCGv_i64 b)
+{
+ TCGv_i64 t1;
+
+ t1 = tcg_temp_new_i64();
+ gen_vmulwev_d_wu(t1, a, b);
+ tcg_gen_add_i64(t, t, t1);
+}
+
+static void do_vmaddwev_u(unsigned vece, uint32_t vd_ofs, uint32_t vj_ofs,
+ uint32_t vk_ofs, uint32_t oprsz, uint32_t maxsz)
+{
+ static const TCGOpcode vecop_list[] = {
+ INDEX_op_mul_vec, INDEX_op_add_vec, 0
+ };
+ static const GVecGen3 op[3] = {
+ {
+ .fniv = gen_vmaddwev_u,
+ .fno = gen_helper_vmaddwev_h_bu,
+ .load_dest = true,
+ .opt_opc = vecop_list,
+ .vece = MO_16
+ },
+ {
+ .fni4 = gen_vmaddwev_w_hu,
+ .fniv = gen_vmaddwev_u,
+ .fno = gen_helper_vmaddwev_w_hu,
+ .load_dest = true,
+ .opt_opc = vecop_list,
+ .vece = MO_32
+ },
+ {
+ .fni8 = gen_vmaddwev_d_wu,
+ .fniv = gen_vmaddwev_u,
+ .fno = gen_helper_vmaddwev_d_wu,
+ .load_dest = true,
+ .opt_opc = vecop_list,
+ .vece = MO_64
+ },
+ };
+
+ tcg_gen_gvec_3(vd_ofs, vj_ofs, vk_ofs, oprsz, maxsz, &op[vece]);
+}
+
+TRANS(vmaddwev_h_bu, LSX, gvec_vvv, MO_8, do_vmaddwev_u)
+TRANS(vmaddwev_w_hu, LSX, gvec_vvv, MO_16, do_vmaddwev_u)
+TRANS(vmaddwev_d_wu, LSX, gvec_vvv, MO_32, do_vmaddwev_u)
+TRANS(xvmaddwev_h_bu, LASX, gvec_xxx, MO_8, do_vmaddwev_u)
+TRANS(xvmaddwev_w_hu, LASX, gvec_xxx, MO_16, do_vmaddwev_u)
+TRANS(xvmaddwev_d_wu, LASX, gvec_xxx, MO_32, do_vmaddwev_u)
+
+static void gen_vmaddwod_u(unsigned vece, TCGv_vec t, TCGv_vec a, TCGv_vec b)
+{
+ TCGv_vec t1, t2, t3;
+ int halfbits = 4 << vece;
+
+ t1 = tcg_temp_new_vec_matching(a);
+ t2 = tcg_temp_new_vec_matching(b);
+ t3 = tcg_temp_new_vec_matching(t);
+ tcg_gen_shri_vec(vece, t1, a, halfbits);
+ tcg_gen_shri_vec(vece, t2, b, halfbits);
+ tcg_gen_mul_vec(vece, t3, t1, t2);
+ tcg_gen_add_vec(vece, t, t, t3);
+}
+
+static void gen_vmaddwod_w_hu(TCGv_i32 t, TCGv_i32 a, TCGv_i32 b)
+{
+ TCGv_i32 t1;
+
+ t1 = tcg_temp_new_i32();
+ gen_vmulwod_w_hu(t1, a, b);
+ tcg_gen_add_i32(t, t, t1);
+}
+
+static void gen_vmaddwod_d_wu(TCGv_i64 t, TCGv_i64 a, TCGv_i64 b)
+{
+ TCGv_i64 t1;
+
+ t1 = tcg_temp_new_i64();
+ gen_vmulwod_d_wu(t1, a, b);
+ tcg_gen_add_i64(t, t, t1);
+}
+
+static void do_vmaddwod_u(unsigned vece, uint32_t vd_ofs, uint32_t vj_ofs,
+ uint32_t vk_ofs, uint32_t oprsz, uint32_t maxsz)
+{
+ static const TCGOpcode vecop_list[] = {
+ INDEX_op_shri_vec, INDEX_op_mul_vec, INDEX_op_add_vec, 0
+ };
+ static const GVecGen3 op[3] = {
+ {
+ .fniv = gen_vmaddwod_u,
+ .fno = gen_helper_vmaddwod_h_bu,
+ .load_dest = true,
+ .opt_opc = vecop_list,
+ .vece = MO_16
+ },
+ {
+ .fni4 = gen_vmaddwod_w_hu,
+ .fniv = gen_vmaddwod_u,
+ .fno = gen_helper_vmaddwod_w_hu,
+ .load_dest = true,
+ .opt_opc = vecop_list,
+ .vece = MO_32
+ },
+ {
+ .fni8 = gen_vmaddwod_d_wu,
+ .fniv = gen_vmaddwod_u,
+ .fno = gen_helper_vmaddwod_d_wu,
+ .load_dest = true,
+ .opt_opc = vecop_list,
+ .vece = MO_64
+ },
+ };
+
+ tcg_gen_gvec_3(vd_ofs, vj_ofs, vk_ofs, oprsz, maxsz, &op[vece]);
+}
+
+TRANS(vmaddwod_h_bu, LSX, gvec_vvv, MO_8, do_vmaddwod_u)
+TRANS(vmaddwod_w_hu, LSX, gvec_vvv, MO_16, do_vmaddwod_u)
+TRANS(vmaddwod_d_wu, LSX, gvec_vvv, MO_32, do_vmaddwod_u)
+TRANS(xvmaddwod_h_bu, LASX, gvec_xxx, MO_8, do_vmaddwod_u)
+TRANS(xvmaddwod_w_hu, LASX, gvec_xxx, MO_16, do_vmaddwod_u)
+TRANS(xvmaddwod_d_wu, LASX, gvec_xxx, MO_32, do_vmaddwod_u)
+
+static void gen_vmaddwev_u_s(unsigned vece, TCGv_vec t, TCGv_vec a, TCGv_vec b)
+{
+ TCGv_vec t1, t2, mask;
+ int halfbits = 4 << vece;
+
+ t1 = tcg_temp_new_vec_matching(a);
+ t2 = tcg_temp_new_vec_matching(b);
+ mask = tcg_constant_vec_matching(t, vece, MAKE_64BIT_MASK(0, 4 << vece));
+ tcg_gen_and_vec(vece, t1, a, mask);
+ tcg_gen_shli_vec(vece, t2, b, halfbits);
+ tcg_gen_sari_vec(vece, t2, t2, halfbits);
+ tcg_gen_mul_vec(vece, t1, t1, t2);
+ tcg_gen_add_vec(vece, t, t, t1);
+}
+
+static void gen_vmaddwev_w_hu_h(TCGv_i32 t, TCGv_i32 a, TCGv_i32 b)
+{
+ TCGv_i32 t1;
+
+ t1 = tcg_temp_new_i32();
+ gen_vmulwev_w_hu_h(t1, a, b);
+ tcg_gen_add_i32(t, t, t1);
+}
+
+static void gen_vmaddwev_d_wu_w(TCGv_i64 t, TCGv_i64 a, TCGv_i64 b)
+{
+ TCGv_i64 t1;
+
+ t1 = tcg_temp_new_i64();
+ gen_vmulwev_d_wu_w(t1, a, b);
+ tcg_gen_add_i64(t, t, t1);
+}
+
+static void do_vmaddwev_u_s(unsigned vece, uint32_t vd_ofs, uint32_t vj_ofs,
+ uint32_t vk_ofs, uint32_t oprsz, uint32_t maxsz)
+{
+ static const TCGOpcode vecop_list[] = {
+ INDEX_op_shli_vec, INDEX_op_sari_vec,
+ INDEX_op_mul_vec, INDEX_op_add_vec, 0
+ };
+ static const GVecGen3 op[3] = {
+ {
+ .fniv = gen_vmaddwev_u_s,
+ .fno = gen_helper_vmaddwev_h_bu_b,
+ .load_dest = true,
+ .opt_opc = vecop_list,
+ .vece = MO_16
+ },
+ {
+ .fni4 = gen_vmaddwev_w_hu_h,
+ .fniv = gen_vmaddwev_u_s,
+ .fno = gen_helper_vmaddwev_w_hu_h,
+ .load_dest = true,
+ .opt_opc = vecop_list,
+ .vece = MO_32
+ },
+ {
+ .fni8 = gen_vmaddwev_d_wu_w,
+ .fniv = gen_vmaddwev_u_s,
+ .fno = gen_helper_vmaddwev_d_wu_w,
+ .load_dest = true,
+ .opt_opc = vecop_list,
+ .vece = MO_64
+ },
+ };
+
+ tcg_gen_gvec_3(vd_ofs, vj_ofs, vk_ofs, oprsz, maxsz, &op[vece]);
+}
+
+TRANS(vmaddwev_h_bu_b, LSX, gvec_vvv, MO_8, do_vmaddwev_u_s)
+TRANS(vmaddwev_w_hu_h, LSX, gvec_vvv, MO_16, do_vmaddwev_u_s)
+TRANS(vmaddwev_d_wu_w, LSX, gvec_vvv, MO_32, do_vmaddwev_u_s)
+TRANS(xvmaddwev_h_bu_b, LASX, gvec_xxx, MO_8, do_vmaddwev_u_s)
+TRANS(xvmaddwev_w_hu_h, LASX, gvec_xxx, MO_16, do_vmaddwev_u_s)
+TRANS(xvmaddwev_d_wu_w, LASX, gvec_xxx, MO_32, do_vmaddwev_u_s)
+
+static void gen_vmaddwod_u_s(unsigned vece, TCGv_vec t, TCGv_vec a, TCGv_vec b)
+{
+ TCGv_vec t1, t2, t3;
+ int halfbits = 4 << vece;
+
+ t1 = tcg_temp_new_vec_matching(a);
+ t2 = tcg_temp_new_vec_matching(b);
+ t3 = tcg_temp_new_vec_matching(t);
+ tcg_gen_shri_vec(vece, t1, a, halfbits);
+ tcg_gen_sari_vec(vece, t2, b, halfbits);
+ tcg_gen_mul_vec(vece, t3, t1, t2);
+ tcg_gen_add_vec(vece, t, t, t3);
+}
+
+static void gen_vmaddwod_w_hu_h(TCGv_i32 t, TCGv_i32 a, TCGv_i32 b)
+{
+ TCGv_i32 t1;
+
+ t1 = tcg_temp_new_i32();
+ gen_vmulwod_w_hu_h(t1, a, b);
+ tcg_gen_add_i32(t, t, t1);
+}
+
+static void gen_vmaddwod_d_wu_w(TCGv_i64 t, TCGv_i64 a, TCGv_i64 b)
+{
+ TCGv_i64 t1;
+
+ t1 = tcg_temp_new_i64();
+ gen_vmulwod_d_wu_w(t1, a, b);
+ tcg_gen_add_i64(t, t, t1);
+}
+
+static void do_vmaddwod_u_s(unsigned vece, uint32_t vd_ofs, uint32_t vj_ofs,
+ uint32_t vk_ofs, uint32_t oprsz, uint32_t maxsz)
+{
+ static const TCGOpcode vecop_list[] = {
+ INDEX_op_shri_vec, INDEX_op_sari_vec,
+ INDEX_op_mul_vec, INDEX_op_add_vec, 0
+ };
+ static const GVecGen3 op[3] = {
+ {
+ .fniv = gen_vmaddwod_u_s,
+ .fno = gen_helper_vmaddwod_h_bu_b,
+ .load_dest = true,
+ .opt_opc = vecop_list,
+ .vece = MO_16
+ },
+ {
+ .fni4 = gen_vmaddwod_w_hu_h,
+ .fniv = gen_vmaddwod_u_s,
+ .fno = gen_helper_vmaddwod_w_hu_h,
+ .load_dest = true,
+ .opt_opc = vecop_list,
+ .vece = MO_32
+ },
+ {
+ .fni8 = gen_vmaddwod_d_wu_w,
+ .fniv = gen_vmaddwod_u_s,
+ .fno = gen_helper_vmaddwod_d_wu_w,
+ .load_dest = true,
+ .opt_opc = vecop_list,
+ .vece = MO_64
+ },
+ };
+
+ tcg_gen_gvec_3(vd_ofs, vj_ofs, vk_ofs, oprsz, maxsz, &op[vece]);
+}
+
+TRANS(vmaddwod_h_bu_b, LSX, gvec_vvv, MO_8, do_vmaddwod_u_s)
+TRANS(vmaddwod_w_hu_h, LSX, gvec_vvv, MO_16, do_vmaddwod_u_s)
+TRANS(vmaddwod_d_wu_w, LSX, gvec_vvv, MO_32, do_vmaddwod_u_s)
+TRANS(xvmaddwod_h_bu_b, LASX, gvec_xxx, MO_8, do_vmaddwod_u_s)
+TRANS(xvmaddwod_w_hu_h, LASX, gvec_xxx, MO_16, do_vmaddwod_u_s)
+TRANS(xvmaddwod_d_wu_w, LASX, gvec_xxx, MO_32, do_vmaddwod_u_s)
+
+TRANS(vdiv_b, LSX, gen_vvv, gen_helper_vdiv_b)
+TRANS(vdiv_h, LSX, gen_vvv, gen_helper_vdiv_h)
+TRANS(vdiv_w, LSX, gen_vvv, gen_helper_vdiv_w)
+TRANS(vdiv_d, LSX, gen_vvv, gen_helper_vdiv_d)
+TRANS(vdiv_bu, LSX, gen_vvv, gen_helper_vdiv_bu)
+TRANS(vdiv_hu, LSX, gen_vvv, gen_helper_vdiv_hu)
+TRANS(vdiv_wu, LSX, gen_vvv, gen_helper_vdiv_wu)
+TRANS(vdiv_du, LSX, gen_vvv, gen_helper_vdiv_du)
+TRANS(vmod_b, LSX, gen_vvv, gen_helper_vmod_b)
+TRANS(vmod_h, LSX, gen_vvv, gen_helper_vmod_h)
+TRANS(vmod_w, LSX, gen_vvv, gen_helper_vmod_w)
+TRANS(vmod_d, LSX, gen_vvv, gen_helper_vmod_d)
+TRANS(vmod_bu, LSX, gen_vvv, gen_helper_vmod_bu)
+TRANS(vmod_hu, LSX, gen_vvv, gen_helper_vmod_hu)
+TRANS(vmod_wu, LSX, gen_vvv, gen_helper_vmod_wu)
+TRANS(vmod_du, LSX, gen_vvv, gen_helper_vmod_du)
+TRANS(xvdiv_b, LASX, gen_xxx, gen_helper_vdiv_b)
+TRANS(xvdiv_h, LASX, gen_xxx, gen_helper_vdiv_h)
+TRANS(xvdiv_w, LASX, gen_xxx, gen_helper_vdiv_w)
+TRANS(xvdiv_d, LASX, gen_xxx, gen_helper_vdiv_d)
+TRANS(xvdiv_bu, LASX, gen_xxx, gen_helper_vdiv_bu)
+TRANS(xvdiv_hu, LASX, gen_xxx, gen_helper_vdiv_hu)
+TRANS(xvdiv_wu, LASX, gen_xxx, gen_helper_vdiv_wu)
+TRANS(xvdiv_du, LASX, gen_xxx, gen_helper_vdiv_du)
+TRANS(xvmod_b, LASX, gen_xxx, gen_helper_vmod_b)
+TRANS(xvmod_h, LASX, gen_xxx, gen_helper_vmod_h)
+TRANS(xvmod_w, LASX, gen_xxx, gen_helper_vmod_w)
+TRANS(xvmod_d, LASX, gen_xxx, gen_helper_vmod_d)
+TRANS(xvmod_bu, LASX, gen_xxx, gen_helper_vmod_bu)
+TRANS(xvmod_hu, LASX, gen_xxx, gen_helper_vmod_hu)
+TRANS(xvmod_wu, LASX, gen_xxx, gen_helper_vmod_wu)
+TRANS(xvmod_du, LASX, gen_xxx, gen_helper_vmod_du)
+
+static void gen_vsat_s(unsigned vece, TCGv_vec t, TCGv_vec a, TCGv_vec max)
+{
+ TCGv_vec min;
+
+ min = tcg_temp_new_vec_matching(t);
+ tcg_gen_not_vec(vece, min, max);
+ tcg_gen_smax_vec(vece, t, a, min);
+ tcg_gen_smin_vec(vece, t, t, max);
+}
+
+static void do_vsat_s(unsigned vece, uint32_t vd_ofs, uint32_t vj_ofs,
+ int64_t imm, uint32_t oprsz, uint32_t maxsz)
+{
+ static const TCGOpcode vecop_list[] = {
+ INDEX_op_smax_vec, INDEX_op_smin_vec, 0
+ };
+ static const GVecGen2s op[4] = {
+ {
+ .fniv = gen_vsat_s,
+ .fno = gen_helper_vsat_b,
+ .opt_opc = vecop_list,
+ .vece = MO_8
+ },
+ {
+ .fniv = gen_vsat_s,
+ .fno = gen_helper_vsat_h,
+ .opt_opc = vecop_list,
+ .vece = MO_16
+ },
+ {
+ .fniv = gen_vsat_s,
+ .fno = gen_helper_vsat_w,
+ .opt_opc = vecop_list,
+ .vece = MO_32
+ },
+ {
+ .fniv = gen_vsat_s,
+ .fno = gen_helper_vsat_d,
+ .opt_opc = vecop_list,
+ .vece = MO_64
+ },
+ };
+
+ tcg_gen_gvec_2s(vd_ofs, vj_ofs, oprsz, maxsz,
+ tcg_constant_i64((1ll<< imm) -1), &op[vece]);
+}
+
+TRANS(vsat_b, LSX, gvec_vv_i, MO_8, do_vsat_s)
+TRANS(vsat_h, LSX, gvec_vv_i, MO_16, do_vsat_s)
+TRANS(vsat_w, LSX, gvec_vv_i, MO_32, do_vsat_s)
+TRANS(vsat_d, LSX, gvec_vv_i, MO_64, do_vsat_s)
+TRANS(xvsat_b, LASX, gvec_xx_i, MO_8, do_vsat_s)
+TRANS(xvsat_h, LASX, gvec_xx_i, MO_16, do_vsat_s)
+TRANS(xvsat_w, LASX, gvec_xx_i, MO_32, do_vsat_s)
+TRANS(xvsat_d, LASX, gvec_xx_i, MO_64, do_vsat_s)
+
+static void gen_vsat_u(unsigned vece, TCGv_vec t, TCGv_vec a, TCGv_vec max)
+{
+ tcg_gen_umin_vec(vece, t, a, max);
+}
+
+static void do_vsat_u(unsigned vece, uint32_t vd_ofs, uint32_t vj_ofs,
+ int64_t imm, uint32_t oprsz, uint32_t maxsz)
+{
+ uint64_t max;
+ static const TCGOpcode vecop_list[] = {
+ INDEX_op_umin_vec, 0
+ };
+ static const GVecGen2s op[4] = {
+ {
+ .fniv = gen_vsat_u,
+ .fno = gen_helper_vsat_bu,
+ .opt_opc = vecop_list,
+ .vece = MO_8
+ },
+ {
+ .fniv = gen_vsat_u,
+ .fno = gen_helper_vsat_hu,
+ .opt_opc = vecop_list,
+ .vece = MO_16
+ },
+ {
+ .fniv = gen_vsat_u,
+ .fno = gen_helper_vsat_wu,
+ .opt_opc = vecop_list,
+ .vece = MO_32
+ },
+ {
+ .fniv = gen_vsat_u,
+ .fno = gen_helper_vsat_du,
+ .opt_opc = vecop_list,
+ .vece = MO_64
+ },
+ };
+
+ max = (imm == 0x3f) ? UINT64_MAX : (1ull << (imm + 1)) - 1;
+ tcg_gen_gvec_2s(vd_ofs, vj_ofs, oprsz, maxsz,
+ tcg_constant_i64(max), &op[vece]);
+}
+
+TRANS(vsat_bu, LSX, gvec_vv_i, MO_8, do_vsat_u)
+TRANS(vsat_hu, LSX, gvec_vv_i, MO_16, do_vsat_u)
+TRANS(vsat_wu, LSX, gvec_vv_i, MO_32, do_vsat_u)
+TRANS(vsat_du, LSX, gvec_vv_i, MO_64, do_vsat_u)
+TRANS(xvsat_bu, LASX, gvec_xx_i, MO_8, do_vsat_u)
+TRANS(xvsat_hu, LASX, gvec_xx_i, MO_16, do_vsat_u)
+TRANS(xvsat_wu, LASX, gvec_xx_i, MO_32, do_vsat_u)
+TRANS(xvsat_du, LASX, gvec_xx_i, MO_64, do_vsat_u)
+
+TRANS(vexth_h_b, LSX, gen_vv, gen_helper_vexth_h_b)
+TRANS(vexth_w_h, LSX, gen_vv, gen_helper_vexth_w_h)
+TRANS(vexth_d_w, LSX, gen_vv, gen_helper_vexth_d_w)
+TRANS(vexth_q_d, LSX, gen_vv, gen_helper_vexth_q_d)
+TRANS(vexth_hu_bu, LSX, gen_vv, gen_helper_vexth_hu_bu)
+TRANS(vexth_wu_hu, LSX, gen_vv, gen_helper_vexth_wu_hu)
+TRANS(vexth_du_wu, LSX, gen_vv, gen_helper_vexth_du_wu)
+TRANS(vexth_qu_du, LSX, gen_vv, gen_helper_vexth_qu_du)
+TRANS(xvexth_h_b, LASX, gen_xx, gen_helper_vexth_h_b)
+TRANS(xvexth_w_h, LASX, gen_xx, gen_helper_vexth_w_h)
+TRANS(xvexth_d_w, LASX, gen_xx, gen_helper_vexth_d_w)
+TRANS(xvexth_q_d, LASX, gen_xx, gen_helper_vexth_q_d)
+TRANS(xvexth_hu_bu, LASX, gen_xx, gen_helper_vexth_hu_bu)
+TRANS(xvexth_wu_hu, LASX, gen_xx, gen_helper_vexth_wu_hu)
+TRANS(xvexth_du_wu, LASX, gen_xx, gen_helper_vexth_du_wu)
+TRANS(xvexth_qu_du, LASX, gen_xx, gen_helper_vexth_qu_du)
+
+TRANS(vext2xv_h_b, LASX, gen_xx, gen_helper_vext2xv_h_b)
+TRANS(vext2xv_w_b, LASX, gen_xx, gen_helper_vext2xv_w_b)
+TRANS(vext2xv_d_b, LASX, gen_xx, gen_helper_vext2xv_d_b)
+TRANS(vext2xv_w_h, LASX, gen_xx, gen_helper_vext2xv_w_h)
+TRANS(vext2xv_d_h, LASX, gen_xx, gen_helper_vext2xv_d_h)
+TRANS(vext2xv_d_w, LASX, gen_xx, gen_helper_vext2xv_d_w)
+TRANS(vext2xv_hu_bu, LASX, gen_xx, gen_helper_vext2xv_hu_bu)
+TRANS(vext2xv_wu_bu, LASX, gen_xx, gen_helper_vext2xv_wu_bu)
+TRANS(vext2xv_du_bu, LASX, gen_xx, gen_helper_vext2xv_du_bu)
+TRANS(vext2xv_wu_hu, LASX, gen_xx, gen_helper_vext2xv_wu_hu)
+TRANS(vext2xv_du_hu, LASX, gen_xx, gen_helper_vext2xv_du_hu)
+TRANS(vext2xv_du_wu, LASX, gen_xx, gen_helper_vext2xv_du_wu)
+
+static void gen_vsigncov(unsigned vece, TCGv_vec t, TCGv_vec a, TCGv_vec b)
+{
+ TCGv_vec t1, zero;
+
+ t1 = tcg_temp_new_vec_matching(t);
+ zero = tcg_constant_vec_matching(t, vece, 0);
+
+ tcg_gen_neg_vec(vece, t1, b);
+ tcg_gen_cmpsel_vec(TCG_COND_LT, vece, t, a, zero, t1, b);
+ tcg_gen_cmpsel_vec(TCG_COND_EQ, vece, t, a, zero, zero, t);
+}
+
+static void do_vsigncov(unsigned vece, uint32_t vd_ofs, uint32_t vj_ofs,
+ uint32_t vk_ofs, uint32_t oprsz, uint32_t maxsz)
+{
+ static const TCGOpcode vecop_list[] = {
+ INDEX_op_neg_vec, INDEX_op_cmpsel_vec, 0
+ };
+ static const GVecGen3 op[4] = {
+ {
+ .fniv = gen_vsigncov,
+ .fno = gen_helper_vsigncov_b,
+ .opt_opc = vecop_list,
+ .vece = MO_8
+ },
+ {
+ .fniv = gen_vsigncov,
+ .fno = gen_helper_vsigncov_h,
+ .opt_opc = vecop_list,
+ .vece = MO_16
+ },
+ {
+ .fniv = gen_vsigncov,
+ .fno = gen_helper_vsigncov_w,
+ .opt_opc = vecop_list,
+ .vece = MO_32
+ },
+ {
+ .fniv = gen_vsigncov,
+ .fno = gen_helper_vsigncov_d,
+ .opt_opc = vecop_list,
+ .vece = MO_64
+ },
+ };
+
+ tcg_gen_gvec_3(vd_ofs, vj_ofs, vk_ofs, oprsz, maxsz, &op[vece]);
+}
+
+TRANS(vsigncov_b, LSX, gvec_vvv, MO_8, do_vsigncov)
+TRANS(vsigncov_h, LSX, gvec_vvv, MO_16, do_vsigncov)
+TRANS(vsigncov_w, LSX, gvec_vvv, MO_32, do_vsigncov)
+TRANS(vsigncov_d, LSX, gvec_vvv, MO_64, do_vsigncov)
+TRANS(xvsigncov_b, LASX, gvec_xxx, MO_8, do_vsigncov)
+TRANS(xvsigncov_h, LASX, gvec_xxx, MO_16, do_vsigncov)
+TRANS(xvsigncov_w, LASX, gvec_xxx, MO_32, do_vsigncov)
+TRANS(xvsigncov_d, LASX, gvec_xxx, MO_64, do_vsigncov)
+
+TRANS(vmskltz_b, LSX, gen_vv, gen_helper_vmskltz_b)
+TRANS(vmskltz_h, LSX, gen_vv, gen_helper_vmskltz_h)
+TRANS(vmskltz_w, LSX, gen_vv, gen_helper_vmskltz_w)
+TRANS(vmskltz_d, LSX, gen_vv, gen_helper_vmskltz_d)
+TRANS(vmskgez_b, LSX, gen_vv, gen_helper_vmskgez_b)
+TRANS(vmsknz_b, LSX, gen_vv, gen_helper_vmsknz_b)
+TRANS(xvmskltz_b, LASX, gen_xx, gen_helper_vmskltz_b)
+TRANS(xvmskltz_h, LASX, gen_xx, gen_helper_vmskltz_h)
+TRANS(xvmskltz_w, LASX, gen_xx, gen_helper_vmskltz_w)
+TRANS(xvmskltz_d, LASX, gen_xx, gen_helper_vmskltz_d)
+TRANS(xvmskgez_b, LASX, gen_xx, gen_helper_vmskgez_b)
+TRANS(xvmsknz_b, LASX, gen_xx, gen_helper_vmsknz_b)
+
+#define EXPAND_BYTE(bit) ((uint64_t)(bit ? 0xff : 0))
+
+static uint64_t vldi_get_value(DisasContext *ctx, uint32_t imm)
+{
+ int mode;
+ uint64_t data, t;
+
+ /*
+ * imm bit [11:8] is mode, mode value is 0-12.
+ * other values are invalid.
+ */
+ mode = (imm >> 8) & 0xf;
+ t = imm & 0xff;
+ switch (mode) {
+ case 0:
+ /* data: {2{24'0, imm[7:0]}} */
+ data = (t << 32) | t ;
+ break;
+ case 1:
+ /* data: {2{16'0, imm[7:0], 8'0}} */
+ data = (t << 24) | (t << 8);
+ break;
+ case 2:
+ /* data: {2{8'0, imm[7:0], 16'0}} */
+ data = (t << 48) | (t << 16);
+ break;
+ case 3:
+ /* data: {2{imm[7:0], 24'0}} */
+ data = (t << 56) | (t << 24);
+ break;
+ case 4:
+ /* data: {4{8'0, imm[7:0]}} */
+ data = (t << 48) | (t << 32) | (t << 16) | t;
+ break;
+ case 5:
+ /* data: {4{imm[7:0], 8'0}} */
+ data = (t << 56) |(t << 40) | (t << 24) | (t << 8);
+ break;
+ case 6:
+ /* data: {2{16'0, imm[7:0], 8'1}} */
+ data = (t << 40) | ((uint64_t)0xff << 32) | (t << 8) | 0xff;
+ break;
+ case 7:
+ /* data: {2{8'0, imm[7:0], 16'1}} */
+ data = (t << 48) | ((uint64_t)0xffff << 32) | (t << 16) | 0xffff;
+ break;
+ case 8:
+ /* data: {8{imm[7:0]}} */
+ data =(t << 56) | (t << 48) | (t << 40) | (t << 32) |
+ (t << 24) | (t << 16) | (t << 8) | t;
+ break;
+ case 9:
+ /* data: {{8{imm[7]}, ..., 8{imm[0]}}} */
+ {
+ uint64_t b0,b1,b2,b3,b4,b5,b6,b7;
+ b0 = t& 0x1;
+ b1 = (t & 0x2) >> 1;
+ b2 = (t & 0x4) >> 2;
+ b3 = (t & 0x8) >> 3;
+ b4 = (t & 0x10) >> 4;
+ b5 = (t & 0x20) >> 5;
+ b6 = (t & 0x40) >> 6;
+ b7 = (t & 0x80) >> 7;
+ data = (EXPAND_BYTE(b7) << 56) |
+ (EXPAND_BYTE(b6) << 48) |
+ (EXPAND_BYTE(b5) << 40) |
+ (EXPAND_BYTE(b4) << 32) |
+ (EXPAND_BYTE(b3) << 24) |
+ (EXPAND_BYTE(b2) << 16) |
+ (EXPAND_BYTE(b1) << 8) |
+ EXPAND_BYTE(b0);
+ }
+ break;
+ case 10:
+ /* data: {2{imm[7], ~imm[6], {5{imm[6]}}, imm[5:0], 19'0}} */
+ {
+ uint64_t b6, b7;
+ uint64_t t0, t1;
+ b6 = (imm & 0x40) >> 6;
+ b7 = (imm & 0x80) >> 7;
+ t0 = (imm & 0x3f);
+ t1 = (b7 << 6) | ((1-b6) << 5) | (uint64_t)(b6 ? 0x1f : 0);
+ data = (t1 << 57) | (t0 << 51) | (t1 << 25) | (t0 << 19);
+ }
+ break;
+ case 11:
+ /* data: {32'0, imm[7], ~{imm[6]}, 5{imm[6]}, imm[5:0], 19'0} */
+ {
+ uint64_t b6,b7;
+ uint64_t t0, t1;
+ b6 = (imm & 0x40) >> 6;
+ b7 = (imm & 0x80) >> 7;
+ t0 = (imm & 0x3f);
+ t1 = (b7 << 6) | ((1-b6) << 5) | (b6 ? 0x1f : 0);
+ data = (t1 << 25) | (t0 << 19);
+ }
+ break;
+ case 12:
+ /* data: {imm[7], ~imm[6], 8{imm[6]}, imm[5:0], 48'0} */
+ {
+ uint64_t b6,b7;
+ uint64_t t0, t1;
+ b6 = (imm & 0x40) >> 6;
+ b7 = (imm & 0x80) >> 7;
+ t0 = (imm & 0x3f);
+ t1 = (b7 << 9) | ((1-b6) << 8) | (b6 ? 0xff : 0);
+ data = (t1 << 54) | (t0 << 48);
+ }
+ break;
+ default:
+ generate_exception(ctx, EXCCODE_INE);
+ g_assert_not_reached();
+ }
+ return data;
+}
+
+static bool gen_vldi(DisasContext *ctx, arg_vldi *a, uint32_t oprsz)
+{
+ int sel, vece;
+ uint64_t value;
+
+ if (!check_vec(ctx, oprsz)) {
+ return true;
+ }
+
+ sel = (a->imm >> 12) & 0x1;
+
+ if (sel) {
+ value = vldi_get_value(ctx, a->imm);
+ vece = MO_64;
+ } else {
+ value = ((int32_t)(a->imm << 22)) >> 22;
+ vece = (a->imm >> 10) & 0x3;
+ }
+
+ tcg_gen_gvec_dup_i64(vece, vec_full_offset(a->vd), oprsz, ctx->vl/8,
+ tcg_constant_i64(value));
+ return true;
+}
+
+TRANS(vldi, LSX, gen_vldi, 16)
+TRANS(xvldi, LASX, gen_vldi, 32)
+
+static bool gen_vandn_v(DisasContext *ctx, arg_vvv *a, uint32_t oprsz)
+{
+ uint32_t vd_ofs, vj_ofs, vk_ofs;
+
+ if (!check_vec(ctx, oprsz)) {
+ return true;
+ }
+
+ vd_ofs = vec_full_offset(a->vd);
+ vj_ofs = vec_full_offset(a->vj);
+ vk_ofs = vec_full_offset(a->vk);
+
+ tcg_gen_gvec_andc(MO_64, vd_ofs, vk_ofs, vj_ofs, oprsz, ctx->vl / 8);
+ return true;
+}
+
+static void gen_vnori(unsigned vece, TCGv_vec t, TCGv_vec a, int64_t imm)
+{
+ TCGv_vec t1;
+
+ t1 = tcg_constant_vec_matching(t, vece, imm);
+ tcg_gen_nor_vec(vece, t, a, t1);
+}
+
+static void gen_vnori_b(TCGv_i64 t, TCGv_i64 a, int64_t imm)
+{
+ tcg_gen_movi_i64(t, dup_const(MO_8, imm));
+ tcg_gen_nor_i64(t, a, t);
+}
+
+static void do_vnori_b(unsigned vece, uint32_t vd_ofs, uint32_t vj_ofs,
+ int64_t imm, uint32_t oprsz, uint32_t maxsz)
+{
+ static const TCGOpcode vecop_list[] = {
+ INDEX_op_nor_vec, 0
+ };
+ static const GVecGen2i op = {
+ .fni8 = gen_vnori_b,
+ .fniv = gen_vnori,
+ .fnoi = gen_helper_vnori_b,
+ .opt_opc = vecop_list,
+ .vece = MO_8
+ };
+
+ tcg_gen_gvec_2i(vd_ofs, vj_ofs, oprsz, maxsz, imm, &op);
+}
+
+TRANS(vand_v, LSX, gvec_vvv, MO_64, tcg_gen_gvec_and)
+TRANS(vor_v, LSX, gvec_vvv, MO_64, tcg_gen_gvec_or)
+TRANS(vxor_v, LSX, gvec_vvv, MO_64, tcg_gen_gvec_xor)
+TRANS(vnor_v, LSX, gvec_vvv, MO_64, tcg_gen_gvec_nor)
+TRANS(vandn_v, LSX, gen_vandn_v, 16)
+TRANS(vorn_v, LSX, gvec_vvv, MO_64, tcg_gen_gvec_orc)
+TRANS(vandi_b, LSX, gvec_vv_i, MO_8, tcg_gen_gvec_andi)
+TRANS(vori_b, LSX, gvec_vv_i, MO_8, tcg_gen_gvec_ori)
+TRANS(vxori_b, LSX, gvec_vv_i, MO_8, tcg_gen_gvec_xori)
+TRANS(vnori_b, LSX, gvec_vv_i, MO_8, do_vnori_b)
+TRANS(xvand_v, LASX, gvec_xxx, MO_64, tcg_gen_gvec_and)
+TRANS(xvor_v, LASX, gvec_xxx, MO_64, tcg_gen_gvec_or)
+TRANS(xvxor_v, LASX, gvec_xxx, MO_64, tcg_gen_gvec_xor)
+TRANS(xvnor_v, LASX, gvec_xxx, MO_64, tcg_gen_gvec_nor)
+TRANS(xvandn_v, LASX, gen_vandn_v, 32)
+TRANS(xvorn_v, LASX, gvec_xxx, MO_64, tcg_gen_gvec_orc)
+TRANS(xvandi_b, LASX, gvec_xx_i, MO_8, tcg_gen_gvec_andi)
+TRANS(xvori_b, LASX, gvec_xx_i, MO_8, tcg_gen_gvec_ori)
+TRANS(xvxori_b, LASX, gvec_xx_i, MO_8, tcg_gen_gvec_xori)
+TRANS(xvnori_b, LASX, gvec_xx_i, MO_8, do_vnori_b)
+
+TRANS(vsll_b, LSX, gvec_vvv, MO_8, tcg_gen_gvec_shlv)
+TRANS(vsll_h, LSX, gvec_vvv, MO_16, tcg_gen_gvec_shlv)
+TRANS(vsll_w, LSX, gvec_vvv, MO_32, tcg_gen_gvec_shlv)
+TRANS(vsll_d, LSX, gvec_vvv, MO_64, tcg_gen_gvec_shlv)
+TRANS(vslli_b, LSX, gvec_vv_i, MO_8, tcg_gen_gvec_shli)
+TRANS(vslli_h, LSX, gvec_vv_i, MO_16, tcg_gen_gvec_shli)
+TRANS(vslli_w, LSX, gvec_vv_i, MO_32, tcg_gen_gvec_shli)
+TRANS(vslli_d, LSX, gvec_vv_i, MO_64, tcg_gen_gvec_shli)
+TRANS(xvsll_b, LASX, gvec_xxx, MO_8, tcg_gen_gvec_shlv)
+TRANS(xvsll_h, LASX, gvec_xxx, MO_16, tcg_gen_gvec_shlv)
+TRANS(xvsll_w, LASX, gvec_xxx, MO_32, tcg_gen_gvec_shlv)
+TRANS(xvsll_d, LASX, gvec_xxx, MO_64, tcg_gen_gvec_shlv)
+TRANS(xvslli_b, LASX, gvec_xx_i, MO_8, tcg_gen_gvec_shli)
+TRANS(xvslli_h, LASX, gvec_xx_i, MO_16, tcg_gen_gvec_shli)
+TRANS(xvslli_w, LASX, gvec_xx_i, MO_32, tcg_gen_gvec_shli)
+TRANS(xvslli_d, LASX, gvec_xx_i, MO_64, tcg_gen_gvec_shli)
+
+TRANS(vsrl_b, LSX, gvec_vvv, MO_8, tcg_gen_gvec_shrv)
+TRANS(vsrl_h, LSX, gvec_vvv, MO_16, tcg_gen_gvec_shrv)
+TRANS(vsrl_w, LSX, gvec_vvv, MO_32, tcg_gen_gvec_shrv)
+TRANS(vsrl_d, LSX, gvec_vvv, MO_64, tcg_gen_gvec_shrv)
+TRANS(vsrli_b, LSX, gvec_vv_i, MO_8, tcg_gen_gvec_shri)
+TRANS(vsrli_h, LSX, gvec_vv_i, MO_16, tcg_gen_gvec_shri)
+TRANS(vsrli_w, LSX, gvec_vv_i, MO_32, tcg_gen_gvec_shri)
+TRANS(vsrli_d, LSX, gvec_vv_i, MO_64, tcg_gen_gvec_shri)
+TRANS(xvsrl_b, LASX, gvec_xxx, MO_8, tcg_gen_gvec_shrv)
+TRANS(xvsrl_h, LASX, gvec_xxx, MO_16, tcg_gen_gvec_shrv)
+TRANS(xvsrl_w, LASX, gvec_xxx, MO_32, tcg_gen_gvec_shrv)
+TRANS(xvsrl_d, LASX, gvec_xxx, MO_64, tcg_gen_gvec_shrv)
+TRANS(xvsrli_b, LASX, gvec_xx_i, MO_8, tcg_gen_gvec_shri)
+TRANS(xvsrli_h, LASX, gvec_xx_i, MO_16, tcg_gen_gvec_shri)
+TRANS(xvsrli_w, LASX, gvec_xx_i, MO_32, tcg_gen_gvec_shri)
+TRANS(xvsrli_d, LASX, gvec_xx_i, MO_64, tcg_gen_gvec_shri)
+
+TRANS(vsra_b, LSX, gvec_vvv, MO_8, tcg_gen_gvec_sarv)
+TRANS(vsra_h, LSX, gvec_vvv, MO_16, tcg_gen_gvec_sarv)
+TRANS(vsra_w, LSX, gvec_vvv, MO_32, tcg_gen_gvec_sarv)
+TRANS(vsra_d, LSX, gvec_vvv, MO_64, tcg_gen_gvec_sarv)
+TRANS(vsrai_b, LSX, gvec_vv_i, MO_8, tcg_gen_gvec_sari)
+TRANS(vsrai_h, LSX, gvec_vv_i, MO_16, tcg_gen_gvec_sari)
+TRANS(vsrai_w, LSX, gvec_vv_i, MO_32, tcg_gen_gvec_sari)
+TRANS(vsrai_d, LSX, gvec_vv_i, MO_64, tcg_gen_gvec_sari)
+TRANS(xvsra_b, LASX, gvec_xxx, MO_8, tcg_gen_gvec_sarv)
+TRANS(xvsra_h, LASX, gvec_xxx, MO_16, tcg_gen_gvec_sarv)
+TRANS(xvsra_w, LASX, gvec_xxx, MO_32, tcg_gen_gvec_sarv)
+TRANS(xvsra_d, LASX, gvec_xxx, MO_64, tcg_gen_gvec_sarv)
+TRANS(xvsrai_b, LASX, gvec_xx_i, MO_8, tcg_gen_gvec_sari)
+TRANS(xvsrai_h, LASX, gvec_xx_i, MO_16, tcg_gen_gvec_sari)
+TRANS(xvsrai_w, LASX, gvec_xx_i, MO_32, tcg_gen_gvec_sari)
+TRANS(xvsrai_d, LASX, gvec_xx_i, MO_64, tcg_gen_gvec_sari)
+
+TRANS(vrotr_b, LSX, gvec_vvv, MO_8, tcg_gen_gvec_rotrv)
+TRANS(vrotr_h, LSX, gvec_vvv, MO_16, tcg_gen_gvec_rotrv)
+TRANS(vrotr_w, LSX, gvec_vvv, MO_32, tcg_gen_gvec_rotrv)
+TRANS(vrotr_d, LSX, gvec_vvv, MO_64, tcg_gen_gvec_rotrv)
+TRANS(vrotri_b, LSX, gvec_vv_i, MO_8, tcg_gen_gvec_rotri)
+TRANS(vrotri_h, LSX, gvec_vv_i, MO_16, tcg_gen_gvec_rotri)
+TRANS(vrotri_w, LSX, gvec_vv_i, MO_32, tcg_gen_gvec_rotri)
+TRANS(vrotri_d, LSX, gvec_vv_i, MO_64, tcg_gen_gvec_rotri)
+TRANS(xvrotr_b, LASX, gvec_xxx, MO_8, tcg_gen_gvec_rotrv)
+TRANS(xvrotr_h, LASX, gvec_xxx, MO_16, tcg_gen_gvec_rotrv)
+TRANS(xvrotr_w, LASX, gvec_xxx, MO_32, tcg_gen_gvec_rotrv)
+TRANS(xvrotr_d, LASX, gvec_xxx, MO_64, tcg_gen_gvec_rotrv)
+TRANS(xvrotri_b, LASX, gvec_xx_i, MO_8, tcg_gen_gvec_rotri)
+TRANS(xvrotri_h, LASX, gvec_xx_i, MO_16, tcg_gen_gvec_rotri)
+TRANS(xvrotri_w, LASX, gvec_xx_i, MO_32, tcg_gen_gvec_rotri)
+TRANS(xvrotri_d, LASX, gvec_xx_i, MO_64, tcg_gen_gvec_rotri)
+
+TRANS(vsllwil_h_b, LSX, gen_vv_i, gen_helper_vsllwil_h_b)
+TRANS(vsllwil_w_h, LSX, gen_vv_i, gen_helper_vsllwil_w_h)
+TRANS(vsllwil_d_w, LSX, gen_vv_i, gen_helper_vsllwil_d_w)
+TRANS(vextl_q_d, LSX, gen_vv, gen_helper_vextl_q_d)
+TRANS(vsllwil_hu_bu, LSX, gen_vv_i, gen_helper_vsllwil_hu_bu)
+TRANS(vsllwil_wu_hu, LSX, gen_vv_i, gen_helper_vsllwil_wu_hu)
+TRANS(vsllwil_du_wu, LSX, gen_vv_i, gen_helper_vsllwil_du_wu)
+TRANS(vextl_qu_du, LSX, gen_vv, gen_helper_vextl_qu_du)
+TRANS(xvsllwil_h_b, LASX, gen_xx_i, gen_helper_vsllwil_h_b)
+TRANS(xvsllwil_w_h, LASX, gen_xx_i, gen_helper_vsllwil_w_h)
+TRANS(xvsllwil_d_w, LASX, gen_xx_i, gen_helper_vsllwil_d_w)
+TRANS(xvextl_q_d, LASX, gen_xx, gen_helper_vextl_q_d)
+TRANS(xvsllwil_hu_bu, LASX, gen_xx_i, gen_helper_vsllwil_hu_bu)
+TRANS(xvsllwil_wu_hu, LASX, gen_xx_i, gen_helper_vsllwil_wu_hu)
+TRANS(xvsllwil_du_wu, LASX, gen_xx_i, gen_helper_vsllwil_du_wu)
+TRANS(xvextl_qu_du, LASX, gen_xx, gen_helper_vextl_qu_du)
+
+TRANS(vsrlr_b, LSX, gen_vvv, gen_helper_vsrlr_b)
+TRANS(vsrlr_h, LSX, gen_vvv, gen_helper_vsrlr_h)
+TRANS(vsrlr_w, LSX, gen_vvv, gen_helper_vsrlr_w)
+TRANS(vsrlr_d, LSX, gen_vvv, gen_helper_vsrlr_d)
+TRANS(vsrlri_b, LSX, gen_vv_i, gen_helper_vsrlri_b)
+TRANS(vsrlri_h, LSX, gen_vv_i, gen_helper_vsrlri_h)
+TRANS(vsrlri_w, LSX, gen_vv_i, gen_helper_vsrlri_w)
+TRANS(vsrlri_d, LSX, gen_vv_i, gen_helper_vsrlri_d)
+TRANS(xvsrlr_b, LASX, gen_xxx, gen_helper_vsrlr_b)
+TRANS(xvsrlr_h, LASX, gen_xxx, gen_helper_vsrlr_h)
+TRANS(xvsrlr_w, LASX, gen_xxx, gen_helper_vsrlr_w)
+TRANS(xvsrlr_d, LASX, gen_xxx, gen_helper_vsrlr_d)
+TRANS(xvsrlri_b, LASX, gen_xx_i, gen_helper_vsrlri_b)
+TRANS(xvsrlri_h, LASX, gen_xx_i, gen_helper_vsrlri_h)
+TRANS(xvsrlri_w, LASX, gen_xx_i, gen_helper_vsrlri_w)
+TRANS(xvsrlri_d, LASX, gen_xx_i, gen_helper_vsrlri_d)
+
+TRANS(vsrar_b, LSX, gen_vvv, gen_helper_vsrar_b)
+TRANS(vsrar_h, LSX, gen_vvv, gen_helper_vsrar_h)
+TRANS(vsrar_w, LSX, gen_vvv, gen_helper_vsrar_w)
+TRANS(vsrar_d, LSX, gen_vvv, gen_helper_vsrar_d)
+TRANS(vsrari_b, LSX, gen_vv_i, gen_helper_vsrari_b)
+TRANS(vsrari_h, LSX, gen_vv_i, gen_helper_vsrari_h)
+TRANS(vsrari_w, LSX, gen_vv_i, gen_helper_vsrari_w)
+TRANS(vsrari_d, LSX, gen_vv_i, gen_helper_vsrari_d)
+TRANS(xvsrar_b, LASX, gen_xxx, gen_helper_vsrar_b)
+TRANS(xvsrar_h, LASX, gen_xxx, gen_helper_vsrar_h)
+TRANS(xvsrar_w, LASX, gen_xxx, gen_helper_vsrar_w)
+TRANS(xvsrar_d, LASX, gen_xxx, gen_helper_vsrar_d)
+TRANS(xvsrari_b, LASX, gen_xx_i, gen_helper_vsrari_b)
+TRANS(xvsrari_h, LASX, gen_xx_i, gen_helper_vsrari_h)
+TRANS(xvsrari_w, LASX, gen_xx_i, gen_helper_vsrari_w)
+TRANS(xvsrari_d, LASX, gen_xx_i, gen_helper_vsrari_d)
+
+TRANS(vsrln_b_h, LSX, gen_vvv, gen_helper_vsrln_b_h)
+TRANS(vsrln_h_w, LSX, gen_vvv, gen_helper_vsrln_h_w)
+TRANS(vsrln_w_d, LSX, gen_vvv, gen_helper_vsrln_w_d)
+TRANS(vsran_b_h, LSX, gen_vvv, gen_helper_vsran_b_h)
+TRANS(vsran_h_w, LSX, gen_vvv, gen_helper_vsran_h_w)
+TRANS(vsran_w_d, LSX, gen_vvv, gen_helper_vsran_w_d)
+TRANS(xvsrln_b_h, LASX, gen_xxx, gen_helper_vsrln_b_h)
+TRANS(xvsrln_h_w, LASX, gen_xxx, gen_helper_vsrln_h_w)
+TRANS(xvsrln_w_d, LASX, gen_xxx, gen_helper_vsrln_w_d)
+TRANS(xvsran_b_h, LASX, gen_xxx, gen_helper_vsran_b_h)
+TRANS(xvsran_h_w, LASX, gen_xxx, gen_helper_vsran_h_w)
+TRANS(xvsran_w_d, LASX, gen_xxx, gen_helper_vsran_w_d)
+
+TRANS(vsrlni_b_h, LSX, gen_vv_i, gen_helper_vsrlni_b_h)
+TRANS(vsrlni_h_w, LSX, gen_vv_i, gen_helper_vsrlni_h_w)
+TRANS(vsrlni_w_d, LSX, gen_vv_i, gen_helper_vsrlni_w_d)
+TRANS(vsrlni_d_q, LSX, gen_vv_i, gen_helper_vsrlni_d_q)
+TRANS(vsrani_b_h, LSX, gen_vv_i, gen_helper_vsrani_b_h)
+TRANS(vsrani_h_w, LSX, gen_vv_i, gen_helper_vsrani_h_w)
+TRANS(vsrani_w_d, LSX, gen_vv_i, gen_helper_vsrani_w_d)
+TRANS(vsrani_d_q, LSX, gen_vv_i, gen_helper_vsrani_d_q)
+TRANS(xvsrlni_b_h, LASX, gen_xx_i, gen_helper_vsrlni_b_h)
+TRANS(xvsrlni_h_w, LASX, gen_xx_i, gen_helper_vsrlni_h_w)
+TRANS(xvsrlni_w_d, LASX, gen_xx_i, gen_helper_vsrlni_w_d)
+TRANS(xvsrlni_d_q, LASX, gen_xx_i, gen_helper_vsrlni_d_q)
+TRANS(xvsrani_b_h, LASX, gen_xx_i, gen_helper_vsrani_b_h)
+TRANS(xvsrani_h_w, LASX, gen_xx_i, gen_helper_vsrani_h_w)
+TRANS(xvsrani_w_d, LASX, gen_xx_i, gen_helper_vsrani_w_d)
+TRANS(xvsrani_d_q, LASX, gen_xx_i, gen_helper_vsrani_d_q)
+
+TRANS(vsrlrn_b_h, LSX, gen_vvv, gen_helper_vsrlrn_b_h)
+TRANS(vsrlrn_h_w, LSX, gen_vvv, gen_helper_vsrlrn_h_w)
+TRANS(vsrlrn_w_d, LSX, gen_vvv, gen_helper_vsrlrn_w_d)
+TRANS(vsrarn_b_h, LSX, gen_vvv, gen_helper_vsrarn_b_h)
+TRANS(vsrarn_h_w, LSX, gen_vvv, gen_helper_vsrarn_h_w)
+TRANS(vsrarn_w_d, LSX, gen_vvv, gen_helper_vsrarn_w_d)
+TRANS(xvsrlrn_b_h, LASX, gen_xxx, gen_helper_vsrlrn_b_h)
+TRANS(xvsrlrn_h_w, LASX, gen_xxx, gen_helper_vsrlrn_h_w)
+TRANS(xvsrlrn_w_d, LASX, gen_xxx, gen_helper_vsrlrn_w_d)
+TRANS(xvsrarn_b_h, LASX, gen_xxx, gen_helper_vsrarn_b_h)
+TRANS(xvsrarn_h_w, LASX, gen_xxx, gen_helper_vsrarn_h_w)
+TRANS(xvsrarn_w_d, LASX, gen_xxx, gen_helper_vsrarn_w_d)
+
+TRANS(vsrlrni_b_h, LSX, gen_vv_i, gen_helper_vsrlrni_b_h)
+TRANS(vsrlrni_h_w, LSX, gen_vv_i, gen_helper_vsrlrni_h_w)
+TRANS(vsrlrni_w_d, LSX, gen_vv_i, gen_helper_vsrlrni_w_d)
+TRANS(vsrlrni_d_q, LSX, gen_vv_i, gen_helper_vsrlrni_d_q)
+TRANS(vsrarni_b_h, LSX, gen_vv_i, gen_helper_vsrarni_b_h)
+TRANS(vsrarni_h_w, LSX, gen_vv_i, gen_helper_vsrarni_h_w)
+TRANS(vsrarni_w_d, LSX, gen_vv_i, gen_helper_vsrarni_w_d)
+TRANS(vsrarni_d_q, LSX, gen_vv_i, gen_helper_vsrarni_d_q)
+TRANS(xvsrlrni_b_h, LASX, gen_xx_i, gen_helper_vsrlrni_b_h)
+TRANS(xvsrlrni_h_w, LASX, gen_xx_i, gen_helper_vsrlrni_h_w)
+TRANS(xvsrlrni_w_d, LASX, gen_xx_i, gen_helper_vsrlrni_w_d)
+TRANS(xvsrlrni_d_q, LASX, gen_xx_i, gen_helper_vsrlrni_d_q)
+TRANS(xvsrarni_b_h, LASX, gen_xx_i, gen_helper_vsrarni_b_h)
+TRANS(xvsrarni_h_w, LASX, gen_xx_i, gen_helper_vsrarni_h_w)
+TRANS(xvsrarni_w_d, LASX, gen_xx_i, gen_helper_vsrarni_w_d)
+TRANS(xvsrarni_d_q, LASX, gen_xx_i, gen_helper_vsrarni_d_q)
+
+TRANS(vssrln_b_h, LSX, gen_vvv, gen_helper_vssrln_b_h)
+TRANS(vssrln_h_w, LSX, gen_vvv, gen_helper_vssrln_h_w)
+TRANS(vssrln_w_d, LSX, gen_vvv, gen_helper_vssrln_w_d)
+TRANS(vssran_b_h, LSX, gen_vvv, gen_helper_vssran_b_h)
+TRANS(vssran_h_w, LSX, gen_vvv, gen_helper_vssran_h_w)
+TRANS(vssran_w_d, LSX, gen_vvv, gen_helper_vssran_w_d)
+TRANS(vssrln_bu_h, LSX, gen_vvv, gen_helper_vssrln_bu_h)
+TRANS(vssrln_hu_w, LSX, gen_vvv, gen_helper_vssrln_hu_w)
+TRANS(vssrln_wu_d, LSX, gen_vvv, gen_helper_vssrln_wu_d)
+TRANS(vssran_bu_h, LSX, gen_vvv, gen_helper_vssran_bu_h)
+TRANS(vssran_hu_w, LSX, gen_vvv, gen_helper_vssran_hu_w)
+TRANS(vssran_wu_d, LSX, gen_vvv, gen_helper_vssran_wu_d)
+TRANS(xvssrln_b_h, LASX, gen_xxx, gen_helper_vssrln_b_h)
+TRANS(xvssrln_h_w, LASX, gen_xxx, gen_helper_vssrln_h_w)
+TRANS(xvssrln_w_d, LASX, gen_xxx, gen_helper_vssrln_w_d)
+TRANS(xvssran_b_h, LASX, gen_xxx, gen_helper_vssran_b_h)
+TRANS(xvssran_h_w, LASX, gen_xxx, gen_helper_vssran_h_w)
+TRANS(xvssran_w_d, LASX, gen_xxx, gen_helper_vssran_w_d)
+TRANS(xvssrln_bu_h, LASX, gen_xxx, gen_helper_vssrln_bu_h)
+TRANS(xvssrln_hu_w, LASX, gen_xxx, gen_helper_vssrln_hu_w)
+TRANS(xvssrln_wu_d, LASX, gen_xxx, gen_helper_vssrln_wu_d)
+TRANS(xvssran_bu_h, LASX, gen_xxx, gen_helper_vssran_bu_h)
+TRANS(xvssran_hu_w, LASX, gen_xxx, gen_helper_vssran_hu_w)
+TRANS(xvssran_wu_d, LASX, gen_xxx, gen_helper_vssran_wu_d)
+
+TRANS(vssrlni_b_h, LSX, gen_vv_i, gen_helper_vssrlni_b_h)
+TRANS(vssrlni_h_w, LSX, gen_vv_i, gen_helper_vssrlni_h_w)
+TRANS(vssrlni_w_d, LSX, gen_vv_i, gen_helper_vssrlni_w_d)
+TRANS(vssrlni_d_q, LSX, gen_vv_i, gen_helper_vssrlni_d_q)
+TRANS(vssrani_b_h, LSX, gen_vv_i, gen_helper_vssrani_b_h)
+TRANS(vssrani_h_w, LSX, gen_vv_i, gen_helper_vssrani_h_w)
+TRANS(vssrani_w_d, LSX, gen_vv_i, gen_helper_vssrani_w_d)
+TRANS(vssrani_d_q, LSX, gen_vv_i, gen_helper_vssrani_d_q)
+TRANS(vssrlni_bu_h, LSX, gen_vv_i, gen_helper_vssrlni_bu_h)
+TRANS(vssrlni_hu_w, LSX, gen_vv_i, gen_helper_vssrlni_hu_w)
+TRANS(vssrlni_wu_d, LSX, gen_vv_i, gen_helper_vssrlni_wu_d)
+TRANS(vssrlni_du_q, LSX, gen_vv_i, gen_helper_vssrlni_du_q)
+TRANS(vssrani_bu_h, LSX, gen_vv_i, gen_helper_vssrani_bu_h)
+TRANS(vssrani_hu_w, LSX, gen_vv_i, gen_helper_vssrani_hu_w)
+TRANS(vssrani_wu_d, LSX, gen_vv_i, gen_helper_vssrani_wu_d)
+TRANS(vssrani_du_q, LSX, gen_vv_i, gen_helper_vssrani_du_q)
+TRANS(xvssrlni_b_h, LASX, gen_xx_i, gen_helper_vssrlni_b_h)
+TRANS(xvssrlni_h_w, LASX, gen_xx_i, gen_helper_vssrlni_h_w)
+TRANS(xvssrlni_w_d, LASX, gen_xx_i, gen_helper_vssrlni_w_d)
+TRANS(xvssrlni_d_q, LASX, gen_xx_i, gen_helper_vssrlni_d_q)
+TRANS(xvssrani_b_h, LASX, gen_xx_i, gen_helper_vssrani_b_h)
+TRANS(xvssrani_h_w, LASX, gen_xx_i, gen_helper_vssrani_h_w)
+TRANS(xvssrani_w_d, LASX, gen_xx_i, gen_helper_vssrani_w_d)
+TRANS(xvssrani_d_q, LASX, gen_xx_i, gen_helper_vssrani_d_q)
+TRANS(xvssrlni_bu_h, LASX, gen_xx_i, gen_helper_vssrlni_bu_h)
+TRANS(xvssrlni_hu_w, LASX, gen_xx_i, gen_helper_vssrlni_hu_w)
+TRANS(xvssrlni_wu_d, LASX, gen_xx_i, gen_helper_vssrlni_wu_d)
+TRANS(xvssrlni_du_q, LASX, gen_xx_i, gen_helper_vssrlni_du_q)
+TRANS(xvssrani_bu_h, LASX, gen_xx_i, gen_helper_vssrani_bu_h)
+TRANS(xvssrani_hu_w, LASX, gen_xx_i, gen_helper_vssrani_hu_w)
+TRANS(xvssrani_wu_d, LASX, gen_xx_i, gen_helper_vssrani_wu_d)
+TRANS(xvssrani_du_q, LASX, gen_xx_i, gen_helper_vssrani_du_q)
+
+TRANS(vssrlrn_b_h, LSX, gen_vvv, gen_helper_vssrlrn_b_h)
+TRANS(vssrlrn_h_w, LSX, gen_vvv, gen_helper_vssrlrn_h_w)
+TRANS(vssrlrn_w_d, LSX, gen_vvv, gen_helper_vssrlrn_w_d)
+TRANS(vssrarn_b_h, LSX, gen_vvv, gen_helper_vssrarn_b_h)
+TRANS(vssrarn_h_w, LSX, gen_vvv, gen_helper_vssrarn_h_w)
+TRANS(vssrarn_w_d, LSX, gen_vvv, gen_helper_vssrarn_w_d)
+TRANS(vssrlrn_bu_h, LSX, gen_vvv, gen_helper_vssrlrn_bu_h)
+TRANS(vssrlrn_hu_w, LSX, gen_vvv, gen_helper_vssrlrn_hu_w)
+TRANS(vssrlrn_wu_d, LSX, gen_vvv, gen_helper_vssrlrn_wu_d)
+TRANS(vssrarn_bu_h, LSX, gen_vvv, gen_helper_vssrarn_bu_h)
+TRANS(vssrarn_hu_w, LSX, gen_vvv, gen_helper_vssrarn_hu_w)
+TRANS(vssrarn_wu_d, LSX, gen_vvv, gen_helper_vssrarn_wu_d)
+TRANS(xvssrlrn_b_h, LASX, gen_xxx, gen_helper_vssrlrn_b_h)
+TRANS(xvssrlrn_h_w, LASX, gen_xxx, gen_helper_vssrlrn_h_w)
+TRANS(xvssrlrn_w_d, LASX, gen_xxx, gen_helper_vssrlrn_w_d)
+TRANS(xvssrarn_b_h, LASX, gen_xxx, gen_helper_vssrarn_b_h)
+TRANS(xvssrarn_h_w, LASX, gen_xxx, gen_helper_vssrarn_h_w)
+TRANS(xvssrarn_w_d, LASX, gen_xxx, gen_helper_vssrarn_w_d)
+TRANS(xvssrlrn_bu_h, LASX, gen_xxx, gen_helper_vssrlrn_bu_h)
+TRANS(xvssrlrn_hu_w, LASX, gen_xxx, gen_helper_vssrlrn_hu_w)
+TRANS(xvssrlrn_wu_d, LASX, gen_xxx, gen_helper_vssrlrn_wu_d)
+TRANS(xvssrarn_bu_h, LASX, gen_xxx, gen_helper_vssrarn_bu_h)
+TRANS(xvssrarn_hu_w, LASX, gen_xxx, gen_helper_vssrarn_hu_w)
+TRANS(xvssrarn_wu_d, LASX, gen_xxx, gen_helper_vssrarn_wu_d)
+
+TRANS(vssrlrni_b_h, LSX, gen_vv_i, gen_helper_vssrlrni_b_h)
+TRANS(vssrlrni_h_w, LSX, gen_vv_i, gen_helper_vssrlrni_h_w)
+TRANS(vssrlrni_w_d, LSX, gen_vv_i, gen_helper_vssrlrni_w_d)
+TRANS(vssrlrni_d_q, LSX, gen_vv_i, gen_helper_vssrlrni_d_q)
+TRANS(vssrarni_b_h, LSX, gen_vv_i, gen_helper_vssrarni_b_h)
+TRANS(vssrarni_h_w, LSX, gen_vv_i, gen_helper_vssrarni_h_w)
+TRANS(vssrarni_w_d, LSX, gen_vv_i, gen_helper_vssrarni_w_d)
+TRANS(vssrarni_d_q, LSX, gen_vv_i, gen_helper_vssrarni_d_q)
+TRANS(vssrlrni_bu_h, LSX, gen_vv_i, gen_helper_vssrlrni_bu_h)
+TRANS(vssrlrni_hu_w, LSX, gen_vv_i, gen_helper_vssrlrni_hu_w)
+TRANS(vssrlrni_wu_d, LSX, gen_vv_i, gen_helper_vssrlrni_wu_d)
+TRANS(vssrlrni_du_q, LSX, gen_vv_i, gen_helper_vssrlrni_du_q)
+TRANS(vssrarni_bu_h, LSX, gen_vv_i, gen_helper_vssrarni_bu_h)
+TRANS(vssrarni_hu_w, LSX, gen_vv_i, gen_helper_vssrarni_hu_w)
+TRANS(vssrarni_wu_d, LSX, gen_vv_i, gen_helper_vssrarni_wu_d)
+TRANS(vssrarni_du_q, LSX, gen_vv_i, gen_helper_vssrarni_du_q)
+TRANS(xvssrlrni_b_h, LASX, gen_xx_i, gen_helper_vssrlrni_b_h)
+TRANS(xvssrlrni_h_w, LASX, gen_xx_i, gen_helper_vssrlrni_h_w)
+TRANS(xvssrlrni_w_d, LASX, gen_xx_i, gen_helper_vssrlrni_w_d)
+TRANS(xvssrlrni_d_q, LASX, gen_xx_i, gen_helper_vssrlrni_d_q)
+TRANS(xvssrarni_b_h, LASX, gen_xx_i, gen_helper_vssrarni_b_h)
+TRANS(xvssrarni_h_w, LASX, gen_xx_i, gen_helper_vssrarni_h_w)
+TRANS(xvssrarni_w_d, LASX, gen_xx_i, gen_helper_vssrarni_w_d)
+TRANS(xvssrarni_d_q, LASX, gen_xx_i, gen_helper_vssrarni_d_q)
+TRANS(xvssrlrni_bu_h, LASX, gen_xx_i, gen_helper_vssrlrni_bu_h)
+TRANS(xvssrlrni_hu_w, LASX, gen_xx_i, gen_helper_vssrlrni_hu_w)
+TRANS(xvssrlrni_wu_d, LASX, gen_xx_i, gen_helper_vssrlrni_wu_d)
+TRANS(xvssrlrni_du_q, LASX, gen_xx_i, gen_helper_vssrlrni_du_q)
+TRANS(xvssrarni_bu_h, LASX, gen_xx_i, gen_helper_vssrarni_bu_h)
+TRANS(xvssrarni_hu_w, LASX, gen_xx_i, gen_helper_vssrarni_hu_w)
+TRANS(xvssrarni_wu_d, LASX, gen_xx_i, gen_helper_vssrarni_wu_d)
+TRANS(xvssrarni_du_q, LASX, gen_xx_i, gen_helper_vssrarni_du_q)
+
+TRANS(vclo_b, LSX, gen_vv, gen_helper_vclo_b)
+TRANS(vclo_h, LSX, gen_vv, gen_helper_vclo_h)
+TRANS(vclo_w, LSX, gen_vv, gen_helper_vclo_w)
+TRANS(vclo_d, LSX, gen_vv, gen_helper_vclo_d)
+TRANS(vclz_b, LSX, gen_vv, gen_helper_vclz_b)
+TRANS(vclz_h, LSX, gen_vv, gen_helper_vclz_h)
+TRANS(vclz_w, LSX, gen_vv, gen_helper_vclz_w)
+TRANS(vclz_d, LSX, gen_vv, gen_helper_vclz_d)
+TRANS(xvclo_b, LASX, gen_xx, gen_helper_vclo_b)
+TRANS(xvclo_h, LASX, gen_xx, gen_helper_vclo_h)
+TRANS(xvclo_w, LASX, gen_xx, gen_helper_vclo_w)
+TRANS(xvclo_d, LASX, gen_xx, gen_helper_vclo_d)
+TRANS(xvclz_b, LASX, gen_xx, gen_helper_vclz_b)
+TRANS(xvclz_h, LASX, gen_xx, gen_helper_vclz_h)
+TRANS(xvclz_w, LASX, gen_xx, gen_helper_vclz_w)
+TRANS(xvclz_d, LASX, gen_xx, gen_helper_vclz_d)
+
+TRANS(vpcnt_b, LSX, gen_vv, gen_helper_vpcnt_b)
+TRANS(vpcnt_h, LSX, gen_vv, gen_helper_vpcnt_h)
+TRANS(vpcnt_w, LSX, gen_vv, gen_helper_vpcnt_w)
+TRANS(vpcnt_d, LSX, gen_vv, gen_helper_vpcnt_d)
+TRANS(xvpcnt_b, LASX, gen_xx, gen_helper_vpcnt_b)
+TRANS(xvpcnt_h, LASX, gen_xx, gen_helper_vpcnt_h)
+TRANS(xvpcnt_w, LASX, gen_xx, gen_helper_vpcnt_w)
+TRANS(xvpcnt_d, LASX, gen_xx, gen_helper_vpcnt_d)
+
+static void do_vbit(unsigned vece, TCGv_vec t, TCGv_vec a, TCGv_vec b,
+ void (*func)(unsigned, TCGv_vec, TCGv_vec, TCGv_vec))
+{
+ TCGv_vec mask, lsh, t1, one;
+
+ lsh = tcg_temp_new_vec_matching(t);
+ t1 = tcg_temp_new_vec_matching(t);
+ mask = tcg_constant_vec_matching(t, vece, (8 << vece) - 1);
+ one = tcg_constant_vec_matching(t, vece, 1);
+
+ tcg_gen_and_vec(vece, lsh, b, mask);
+ tcg_gen_shlv_vec(vece, t1, one, lsh);
+ func(vece, t, a, t1);
+}
+
+static void gen_vbitclr(unsigned vece, TCGv_vec t, TCGv_vec a, TCGv_vec b)
+{
+ do_vbit(vece, t, a, b, tcg_gen_andc_vec);
+}
+
+static void gen_vbitset(unsigned vece, TCGv_vec t, TCGv_vec a, TCGv_vec b)
+{
+ do_vbit(vece, t, a, b, tcg_gen_or_vec);
+}
+
+static void gen_vbitrev(unsigned vece, TCGv_vec t, TCGv_vec a, TCGv_vec b)
+{
+ do_vbit(vece, t, a, b, tcg_gen_xor_vec);
+}
+
+static void do_vbitclr(unsigned vece, uint32_t vd_ofs, uint32_t vj_ofs,
+ uint32_t vk_ofs, uint32_t oprsz, uint32_t maxsz)
+{
+ static const TCGOpcode vecop_list[] = {
+ INDEX_op_shlv_vec, INDEX_op_andc_vec, 0
+ };
+ static const GVecGen3 op[4] = {
+ {
+ .fniv = gen_vbitclr,
+ .fno = gen_helper_vbitclr_b,
+ .opt_opc = vecop_list,
+ .vece = MO_8
+ },
+ {
+ .fniv = gen_vbitclr,
+ .fno = gen_helper_vbitclr_h,
+ .opt_opc = vecop_list,
+ .vece = MO_16
+ },
+ {
+ .fniv = gen_vbitclr,
+ .fno = gen_helper_vbitclr_w,
+ .opt_opc = vecop_list,
+ .vece = MO_32
+ },
+ {
+ .fniv = gen_vbitclr,
+ .fno = gen_helper_vbitclr_d,
+ .opt_opc = vecop_list,
+ .vece = MO_64
+ },
+ };
+
+ tcg_gen_gvec_3(vd_ofs, vj_ofs, vk_ofs, oprsz, maxsz, &op[vece]);
+}
+
+TRANS(vbitclr_b, LSX, gvec_vvv, MO_8, do_vbitclr)
+TRANS(vbitclr_h, LSX, gvec_vvv, MO_16, do_vbitclr)
+TRANS(vbitclr_w, LSX, gvec_vvv, MO_32, do_vbitclr)
+TRANS(vbitclr_d, LSX, gvec_vvv, MO_64, do_vbitclr)
+TRANS(xvbitclr_b, LASX, gvec_xxx, MO_8, do_vbitclr)
+TRANS(xvbitclr_h, LASX, gvec_xxx, MO_16, do_vbitclr)
+TRANS(xvbitclr_w, LASX, gvec_xxx, MO_32, do_vbitclr)
+TRANS(xvbitclr_d, LASX, gvec_xxx, MO_64, do_vbitclr)
+
+static void do_vbiti(unsigned vece, TCGv_vec t, TCGv_vec a, int64_t imm,
+ void (*func)(unsigned, TCGv_vec, TCGv_vec, TCGv_vec))
+{
+ int lsh;
+ TCGv_vec t1, one;
+
+ lsh = imm & ((8 << vece) -1);
+ t1 = tcg_temp_new_vec_matching(t);
+ one = tcg_constant_vec_matching(t, vece, 1);
+
+ tcg_gen_shli_vec(vece, t1, one, lsh);
+ func(vece, t, a, t1);
+}
+
+static void gen_vbitclri(unsigned vece, TCGv_vec t, TCGv_vec a, int64_t imm)
+{
+ do_vbiti(vece, t, a, imm, tcg_gen_andc_vec);
+}
+
+static void gen_vbitseti(unsigned vece, TCGv_vec t, TCGv_vec a, int64_t imm)
+{
+ do_vbiti(vece, t, a, imm, tcg_gen_or_vec);
+}
+
+static void gen_vbitrevi(unsigned vece, TCGv_vec t, TCGv_vec a, int64_t imm)
+{
+ do_vbiti(vece, t, a, imm, tcg_gen_xor_vec);
+}
+
+static void do_vbitclri(unsigned vece, uint32_t vd_ofs, uint32_t vj_ofs,
+ int64_t imm, uint32_t oprsz, uint32_t maxsz)
+{
+ static const TCGOpcode vecop_list[] = {
+ INDEX_op_shli_vec, INDEX_op_andc_vec, 0
+ };
+ static const GVecGen2i op[4] = {
+ {
+ .fniv = gen_vbitclri,
+ .fnoi = gen_helper_vbitclri_b,
+ .opt_opc = vecop_list,
+ .vece = MO_8
+ },
+ {
+ .fniv = gen_vbitclri,
+ .fnoi = gen_helper_vbitclri_h,
+ .opt_opc = vecop_list,
+ .vece = MO_16
+ },
+ {
+ .fniv = gen_vbitclri,
+ .fnoi = gen_helper_vbitclri_w,
+ .opt_opc = vecop_list,
+ .vece = MO_32
+ },
+ {
+ .fniv = gen_vbitclri,
+ .fnoi = gen_helper_vbitclri_d,
+ .opt_opc = vecop_list,
+ .vece = MO_64
+ },
+ };
+
+ tcg_gen_gvec_2i(vd_ofs, vj_ofs, oprsz, maxsz, imm, &op[vece]);
+}
+
+TRANS(vbitclri_b, LSX, gvec_vv_i, MO_8, do_vbitclri)
+TRANS(vbitclri_h, LSX, gvec_vv_i, MO_16, do_vbitclri)
+TRANS(vbitclri_w, LSX, gvec_vv_i, MO_32, do_vbitclri)
+TRANS(vbitclri_d, LSX, gvec_vv_i, MO_64, do_vbitclri)
+TRANS(xvbitclri_b, LASX, gvec_xx_i, MO_8, do_vbitclri)
+TRANS(xvbitclri_h, LASX, gvec_xx_i, MO_16, do_vbitclri)
+TRANS(xvbitclri_w, LASX, gvec_xx_i, MO_32, do_vbitclri)
+TRANS(xvbitclri_d, LASX, gvec_xx_i, MO_64, do_vbitclri)
+
+static void do_vbitset(unsigned vece, uint32_t vd_ofs, uint32_t vj_ofs,
+ uint32_t vk_ofs, uint32_t oprsz, uint32_t maxsz)
+{
+ static const TCGOpcode vecop_list[] = {
+ INDEX_op_shlv_vec, 0
+ };
+ static const GVecGen3 op[4] = {
+ {
+ .fniv = gen_vbitset,
+ .fno = gen_helper_vbitset_b,
+ .opt_opc = vecop_list,
+ .vece = MO_8
+ },
+ {
+ .fniv = gen_vbitset,
+ .fno = gen_helper_vbitset_h,
+ .opt_opc = vecop_list,
+ .vece = MO_16
+ },
+ {
+ .fniv = gen_vbitset,
+ .fno = gen_helper_vbitset_w,
+ .opt_opc = vecop_list,
+ .vece = MO_32
+ },
+ {
+ .fniv = gen_vbitset,
+ .fno = gen_helper_vbitset_d,
+ .opt_opc = vecop_list,
+ .vece = MO_64
+ },
+ };
+
+ tcg_gen_gvec_3(vd_ofs, vj_ofs, vk_ofs, oprsz, maxsz, &op[vece]);
+}
+
+TRANS(vbitset_b, LSX, gvec_vvv, MO_8, do_vbitset)
+TRANS(vbitset_h, LSX, gvec_vvv, MO_16, do_vbitset)
+TRANS(vbitset_w, LSX, gvec_vvv, MO_32, do_vbitset)
+TRANS(vbitset_d, LSX, gvec_vvv, MO_64, do_vbitset)
+TRANS(xvbitset_b, LASX, gvec_xxx, MO_8, do_vbitset)
+TRANS(xvbitset_h, LASX, gvec_xxx, MO_16, do_vbitset)
+TRANS(xvbitset_w, LASX, gvec_xxx, MO_32, do_vbitset)
+TRANS(xvbitset_d, LASX, gvec_xxx, MO_64, do_vbitset)
+
+static void do_vbitseti(unsigned vece, uint32_t vd_ofs, uint32_t vj_ofs,
+ int64_t imm, uint32_t oprsz, uint32_t maxsz)
+{
+ static const TCGOpcode vecop_list[] = {
+ INDEX_op_shli_vec, 0
+ };
+ static const GVecGen2i op[4] = {
+ {
+ .fniv = gen_vbitseti,
+ .fnoi = gen_helper_vbitseti_b,
+ .opt_opc = vecop_list,
+ .vece = MO_8
+ },
+ {
+ .fniv = gen_vbitseti,
+ .fnoi = gen_helper_vbitseti_h,
+ .opt_opc = vecop_list,
+ .vece = MO_16
+ },
+ {
+ .fniv = gen_vbitseti,
+ .fnoi = gen_helper_vbitseti_w,
+ .opt_opc = vecop_list,
+ .vece = MO_32
+ },
+ {
+ .fniv = gen_vbitseti,
+ .fnoi = gen_helper_vbitseti_d,
+ .opt_opc = vecop_list,
+ .vece = MO_64
+ },
+ };
+
+ tcg_gen_gvec_2i(vd_ofs, vj_ofs, oprsz, maxsz, imm, &op[vece]);
+}
+
+TRANS(vbitseti_b, LSX, gvec_vv_i, MO_8, do_vbitseti)
+TRANS(vbitseti_h, LSX, gvec_vv_i, MO_16, do_vbitseti)
+TRANS(vbitseti_w, LSX, gvec_vv_i, MO_32, do_vbitseti)
+TRANS(vbitseti_d, LSX, gvec_vv_i, MO_64, do_vbitseti)
+TRANS(xvbitseti_b, LASX, gvec_xx_i, MO_8, do_vbitseti)
+TRANS(xvbitseti_h, LASX, gvec_xx_i, MO_16, do_vbitseti)
+TRANS(xvbitseti_w, LASX, gvec_xx_i, MO_32, do_vbitseti)
+TRANS(xvbitseti_d, LASX, gvec_xx_i, MO_64, do_vbitseti)
+
+static void do_vbitrev(unsigned vece, uint32_t vd_ofs, uint32_t vj_ofs,
+ uint32_t vk_ofs, uint32_t oprsz, uint32_t maxsz)
+{
+ static const TCGOpcode vecop_list[] = {
+ INDEX_op_shlv_vec, 0
+ };
+ static const GVecGen3 op[4] = {
+ {
+ .fniv = gen_vbitrev,
+ .fno = gen_helper_vbitrev_b,
+ .opt_opc = vecop_list,
+ .vece = MO_8
+ },
+ {
+ .fniv = gen_vbitrev,
+ .fno = gen_helper_vbitrev_h,
+ .opt_opc = vecop_list,
+ .vece = MO_16
+ },
+ {
+ .fniv = gen_vbitrev,
+ .fno = gen_helper_vbitrev_w,
+ .opt_opc = vecop_list,
+ .vece = MO_32
+ },
+ {
+ .fniv = gen_vbitrev,
+ .fno = gen_helper_vbitrev_d,
+ .opt_opc = vecop_list,
+ .vece = MO_64
+ },
+ };
+
+ tcg_gen_gvec_3(vd_ofs, vj_ofs, vk_ofs, oprsz, maxsz, &op[vece]);
+}
+
+TRANS(vbitrev_b, LSX, gvec_vvv, MO_8, do_vbitrev)
+TRANS(vbitrev_h, LSX, gvec_vvv, MO_16, do_vbitrev)
+TRANS(vbitrev_w, LSX, gvec_vvv, MO_32, do_vbitrev)
+TRANS(vbitrev_d, LSX, gvec_vvv, MO_64, do_vbitrev)
+TRANS(xvbitrev_b, LASX, gvec_xxx, MO_8, do_vbitrev)
+TRANS(xvbitrev_h, LASX, gvec_xxx, MO_16, do_vbitrev)
+TRANS(xvbitrev_w, LASX, gvec_xxx, MO_32, do_vbitrev)
+TRANS(xvbitrev_d, LASX, gvec_xxx, MO_64, do_vbitrev)
+
+static void do_vbitrevi(unsigned vece, uint32_t vd_ofs, uint32_t vj_ofs,
+ int64_t imm, uint32_t oprsz, uint32_t maxsz)
+{
+ static const TCGOpcode vecop_list[] = {
+ INDEX_op_shli_vec, 0
+ };
+ static const GVecGen2i op[4] = {
+ {
+ .fniv = gen_vbitrevi,
+ .fnoi = gen_helper_vbitrevi_b,
+ .opt_opc = vecop_list,
+ .vece = MO_8
+ },
+ {
+ .fniv = gen_vbitrevi,
+ .fnoi = gen_helper_vbitrevi_h,
+ .opt_opc = vecop_list,
+ .vece = MO_16
+ },
+ {
+ .fniv = gen_vbitrevi,
+ .fnoi = gen_helper_vbitrevi_w,
+ .opt_opc = vecop_list,
+ .vece = MO_32
+ },
+ {
+ .fniv = gen_vbitrevi,
+ .fnoi = gen_helper_vbitrevi_d,
+ .opt_opc = vecop_list,
+ .vece = MO_64
+ },
+ };
+
+ tcg_gen_gvec_2i(vd_ofs, vj_ofs, oprsz, maxsz, imm, &op[vece]);
+}
+
+TRANS(vbitrevi_b, LSX, gvec_vv_i, MO_8, do_vbitrevi)
+TRANS(vbitrevi_h, LSX, gvec_vv_i, MO_16, do_vbitrevi)
+TRANS(vbitrevi_w, LSX, gvec_vv_i, MO_32, do_vbitrevi)
+TRANS(vbitrevi_d, LSX, gvec_vv_i, MO_64, do_vbitrevi)
+TRANS(xvbitrevi_b, LASX, gvec_xx_i, MO_8, do_vbitrevi)
+TRANS(xvbitrevi_h, LASX, gvec_xx_i, MO_16, do_vbitrevi)
+TRANS(xvbitrevi_w, LASX, gvec_xx_i, MO_32, do_vbitrevi)
+TRANS(xvbitrevi_d, LASX, gvec_xx_i, MO_64, do_vbitrevi)
+
+TRANS(vfrstp_b, LSX, gen_vvv, gen_helper_vfrstp_b)
+TRANS(vfrstp_h, LSX, gen_vvv, gen_helper_vfrstp_h)
+TRANS(vfrstpi_b, LSX, gen_vv_i, gen_helper_vfrstpi_b)
+TRANS(vfrstpi_h, LSX, gen_vv_i, gen_helper_vfrstpi_h)
+TRANS(xvfrstp_b, LASX, gen_xxx, gen_helper_vfrstp_b)
+TRANS(xvfrstp_h, LASX, gen_xxx, gen_helper_vfrstp_h)
+TRANS(xvfrstpi_b, LASX, gen_xx_i, gen_helper_vfrstpi_b)
+TRANS(xvfrstpi_h, LASX, gen_xx_i, gen_helper_vfrstpi_h)
+
+TRANS(vfadd_s, LSX, gen_vvv_ptr, gen_helper_vfadd_s)
+TRANS(vfadd_d, LSX, gen_vvv_ptr, gen_helper_vfadd_d)
+TRANS(vfsub_s, LSX, gen_vvv_ptr, gen_helper_vfsub_s)
+TRANS(vfsub_d, LSX, gen_vvv_ptr, gen_helper_vfsub_d)
+TRANS(vfmul_s, LSX, gen_vvv_ptr, gen_helper_vfmul_s)
+TRANS(vfmul_d, LSX, gen_vvv_ptr, gen_helper_vfmul_d)
+TRANS(vfdiv_s, LSX, gen_vvv_ptr, gen_helper_vfdiv_s)
+TRANS(vfdiv_d, LSX, gen_vvv_ptr, gen_helper_vfdiv_d)
+TRANS(xvfadd_s, LASX, gen_xxx_ptr, gen_helper_vfadd_s)
+TRANS(xvfadd_d, LASX, gen_xxx_ptr, gen_helper_vfadd_d)
+TRANS(xvfsub_s, LASX, gen_xxx_ptr, gen_helper_vfsub_s)
+TRANS(xvfsub_d, LASX, gen_xxx_ptr, gen_helper_vfsub_d)
+TRANS(xvfmul_s, LASX, gen_xxx_ptr, gen_helper_vfmul_s)
+TRANS(xvfmul_d, LASX, gen_xxx_ptr, gen_helper_vfmul_d)
+TRANS(xvfdiv_s, LASX, gen_xxx_ptr, gen_helper_vfdiv_s)
+TRANS(xvfdiv_d, LASX, gen_xxx_ptr, gen_helper_vfdiv_d)
+
+TRANS(vfmadd_s, LSX, gen_vvvv_ptr, gen_helper_vfmadd_s)
+TRANS(vfmadd_d, LSX, gen_vvvv_ptr, gen_helper_vfmadd_d)
+TRANS(vfmsub_s, LSX, gen_vvvv_ptr, gen_helper_vfmsub_s)
+TRANS(vfmsub_d, LSX, gen_vvvv_ptr, gen_helper_vfmsub_d)
+TRANS(vfnmadd_s, LSX, gen_vvvv_ptr, gen_helper_vfnmadd_s)
+TRANS(vfnmadd_d, LSX, gen_vvvv_ptr, gen_helper_vfnmadd_d)
+TRANS(vfnmsub_s, LSX, gen_vvvv_ptr, gen_helper_vfnmsub_s)
+TRANS(vfnmsub_d, LSX, gen_vvvv_ptr, gen_helper_vfnmsub_d)
+TRANS(xvfmadd_s, LASX, gen_xxxx_ptr, gen_helper_vfmadd_s)
+TRANS(xvfmadd_d, LASX, gen_xxxx_ptr, gen_helper_vfmadd_d)
+TRANS(xvfmsub_s, LASX, gen_xxxx_ptr, gen_helper_vfmsub_s)
+TRANS(xvfmsub_d, LASX, gen_xxxx_ptr, gen_helper_vfmsub_d)
+TRANS(xvfnmadd_s, LASX, gen_xxxx_ptr, gen_helper_vfnmadd_s)
+TRANS(xvfnmadd_d, LASX, gen_xxxx_ptr, gen_helper_vfnmadd_d)
+TRANS(xvfnmsub_s, LASX, gen_xxxx_ptr, gen_helper_vfnmsub_s)
+TRANS(xvfnmsub_d, LASX, gen_xxxx_ptr, gen_helper_vfnmsub_d)
+
+TRANS(vfmax_s, LSX, gen_vvv_ptr, gen_helper_vfmax_s)
+TRANS(vfmax_d, LSX, gen_vvv_ptr, gen_helper_vfmax_d)
+TRANS(vfmin_s, LSX, gen_vvv_ptr, gen_helper_vfmin_s)
+TRANS(vfmin_d, LSX, gen_vvv_ptr, gen_helper_vfmin_d)
+TRANS(xvfmax_s, LASX, gen_xxx_ptr, gen_helper_vfmax_s)
+TRANS(xvfmax_d, LASX, gen_xxx_ptr, gen_helper_vfmax_d)
+TRANS(xvfmin_s, LASX, gen_xxx_ptr, gen_helper_vfmin_s)
+TRANS(xvfmin_d, LASX, gen_xxx_ptr, gen_helper_vfmin_d)
+
+TRANS(vfmaxa_s, LSX, gen_vvv_ptr, gen_helper_vfmaxa_s)
+TRANS(vfmaxa_d, LSX, gen_vvv_ptr, gen_helper_vfmaxa_d)
+TRANS(vfmina_s, LSX, gen_vvv_ptr, gen_helper_vfmina_s)
+TRANS(vfmina_d, LSX, gen_vvv_ptr, gen_helper_vfmina_d)
+TRANS(xvfmaxa_s, LASX, gen_xxx_ptr, gen_helper_vfmaxa_s)
+TRANS(xvfmaxa_d, LASX, gen_xxx_ptr, gen_helper_vfmaxa_d)
+TRANS(xvfmina_s, LASX, gen_xxx_ptr, gen_helper_vfmina_s)
+TRANS(xvfmina_d, LASX, gen_xxx_ptr, gen_helper_vfmina_d)
+
+TRANS(vflogb_s, LSX, gen_vv_ptr, gen_helper_vflogb_s)
+TRANS(vflogb_d, LSX, gen_vv_ptr, gen_helper_vflogb_d)
+TRANS(xvflogb_s, LASX, gen_xx_ptr, gen_helper_vflogb_s)
+TRANS(xvflogb_d, LASX, gen_xx_ptr, gen_helper_vflogb_d)
+
+TRANS(vfclass_s, LSX, gen_vv_ptr, gen_helper_vfclass_s)
+TRANS(vfclass_d, LSX, gen_vv_ptr, gen_helper_vfclass_d)
+TRANS(xvfclass_s, LASX, gen_xx_ptr, gen_helper_vfclass_s)
+TRANS(xvfclass_d, LASX, gen_xx_ptr, gen_helper_vfclass_d)
+
+TRANS(vfsqrt_s, LSX, gen_vv_ptr, gen_helper_vfsqrt_s)
+TRANS(vfsqrt_d, LSX, gen_vv_ptr, gen_helper_vfsqrt_d)
+TRANS(vfrecip_s, LSX, gen_vv_ptr, gen_helper_vfrecip_s)
+TRANS(vfrecip_d, LSX, gen_vv_ptr, gen_helper_vfrecip_d)
+TRANS(vfrsqrt_s, LSX, gen_vv_ptr, gen_helper_vfrsqrt_s)
+TRANS(vfrsqrt_d, LSX, gen_vv_ptr, gen_helper_vfrsqrt_d)
+TRANS(xvfsqrt_s, LASX, gen_xx_ptr, gen_helper_vfsqrt_s)
+TRANS(xvfsqrt_d, LASX, gen_xx_ptr, gen_helper_vfsqrt_d)
+TRANS(xvfrecip_s, LASX, gen_xx_ptr, gen_helper_vfrecip_s)
+TRANS(xvfrecip_d, LASX, gen_xx_ptr, gen_helper_vfrecip_d)
+TRANS(xvfrsqrt_s, LASX, gen_xx_ptr, gen_helper_vfrsqrt_s)
+TRANS(xvfrsqrt_d, LASX, gen_xx_ptr, gen_helper_vfrsqrt_d)
+
+TRANS(vfcvtl_s_h, LSX, gen_vv_ptr, gen_helper_vfcvtl_s_h)
+TRANS(vfcvth_s_h, LSX, gen_vv_ptr, gen_helper_vfcvth_s_h)
+TRANS(vfcvtl_d_s, LSX, gen_vv_ptr, gen_helper_vfcvtl_d_s)
+TRANS(vfcvth_d_s, LSX, gen_vv_ptr, gen_helper_vfcvth_d_s)
+TRANS(vfcvt_h_s, LSX, gen_vvv_ptr, gen_helper_vfcvt_h_s)
+TRANS(vfcvt_s_d, LSX, gen_vvv_ptr, gen_helper_vfcvt_s_d)
+TRANS(xvfcvtl_s_h, LASX, gen_xx_ptr, gen_helper_vfcvtl_s_h)
+TRANS(xvfcvth_s_h, LASX, gen_xx_ptr, gen_helper_vfcvth_s_h)
+TRANS(xvfcvtl_d_s, LASX, gen_xx_ptr, gen_helper_vfcvtl_d_s)
+TRANS(xvfcvth_d_s, LASX, gen_xx_ptr, gen_helper_vfcvth_d_s)
+TRANS(xvfcvt_h_s, LASX, gen_xxx_ptr, gen_helper_vfcvt_h_s)
+TRANS(xvfcvt_s_d, LASX, gen_xxx_ptr, gen_helper_vfcvt_s_d)
+
+TRANS(vfrintrne_s, LSX, gen_vv_ptr, gen_helper_vfrintrne_s)
+TRANS(vfrintrne_d, LSX, gen_vv_ptr, gen_helper_vfrintrne_d)
+TRANS(vfrintrz_s, LSX, gen_vv_ptr, gen_helper_vfrintrz_s)
+TRANS(vfrintrz_d, LSX, gen_vv_ptr, gen_helper_vfrintrz_d)
+TRANS(vfrintrp_s, LSX, gen_vv_ptr, gen_helper_vfrintrp_s)
+TRANS(vfrintrp_d, LSX, gen_vv_ptr, gen_helper_vfrintrp_d)
+TRANS(vfrintrm_s, LSX, gen_vv_ptr, gen_helper_vfrintrm_s)
+TRANS(vfrintrm_d, LSX, gen_vv_ptr, gen_helper_vfrintrm_d)
+TRANS(vfrint_s, LSX, gen_vv_ptr, gen_helper_vfrint_s)
+TRANS(vfrint_d, LSX, gen_vv_ptr, gen_helper_vfrint_d)
+TRANS(xvfrintrne_s, LASX, gen_xx_ptr, gen_helper_vfrintrne_s)
+TRANS(xvfrintrne_d, LASX, gen_xx_ptr, gen_helper_vfrintrne_d)
+TRANS(xvfrintrz_s, LASX, gen_xx_ptr, gen_helper_vfrintrz_s)
+TRANS(xvfrintrz_d, LASX, gen_xx_ptr, gen_helper_vfrintrz_d)
+TRANS(xvfrintrp_s, LASX, gen_xx_ptr, gen_helper_vfrintrp_s)
+TRANS(xvfrintrp_d, LASX, gen_xx_ptr, gen_helper_vfrintrp_d)
+TRANS(xvfrintrm_s, LASX, gen_xx_ptr, gen_helper_vfrintrm_s)
+TRANS(xvfrintrm_d, LASX, gen_xx_ptr, gen_helper_vfrintrm_d)
+TRANS(xvfrint_s, LASX, gen_xx_ptr, gen_helper_vfrint_s)
+TRANS(xvfrint_d, LASX, gen_xx_ptr, gen_helper_vfrint_d)
+
+TRANS(vftintrne_w_s, LSX, gen_vv_ptr, gen_helper_vftintrne_w_s)
+TRANS(vftintrne_l_d, LSX, gen_vv_ptr, gen_helper_vftintrne_l_d)
+TRANS(vftintrz_w_s, LSX, gen_vv_ptr, gen_helper_vftintrz_w_s)
+TRANS(vftintrz_l_d, LSX, gen_vv_ptr, gen_helper_vftintrz_l_d)
+TRANS(vftintrp_w_s, LSX, gen_vv_ptr, gen_helper_vftintrp_w_s)
+TRANS(vftintrp_l_d, LSX, gen_vv_ptr, gen_helper_vftintrp_l_d)
+TRANS(vftintrm_w_s, LSX, gen_vv_ptr, gen_helper_vftintrm_w_s)
+TRANS(vftintrm_l_d, LSX, gen_vv_ptr, gen_helper_vftintrm_l_d)
+TRANS(vftint_w_s, LSX, gen_vv_ptr, gen_helper_vftint_w_s)
+TRANS(vftint_l_d, LSX, gen_vv_ptr, gen_helper_vftint_l_d)
+TRANS(vftintrz_wu_s, LSX, gen_vv_ptr, gen_helper_vftintrz_wu_s)
+TRANS(vftintrz_lu_d, LSX, gen_vv_ptr, gen_helper_vftintrz_lu_d)
+TRANS(vftint_wu_s, LSX, gen_vv_ptr, gen_helper_vftint_wu_s)
+TRANS(vftint_lu_d, LSX, gen_vv_ptr, gen_helper_vftint_lu_d)
+TRANS(vftintrne_w_d, LSX, gen_vvv_ptr, gen_helper_vftintrne_w_d)
+TRANS(vftintrz_w_d, LSX, gen_vvv_ptr, gen_helper_vftintrz_w_d)
+TRANS(vftintrp_w_d, LSX, gen_vvv_ptr, gen_helper_vftintrp_w_d)
+TRANS(vftintrm_w_d, LSX, gen_vvv_ptr, gen_helper_vftintrm_w_d)
+TRANS(vftint_w_d, LSX, gen_vvv_ptr, gen_helper_vftint_w_d)
+TRANS(vftintrnel_l_s, LSX, gen_vv_ptr, gen_helper_vftintrnel_l_s)
+TRANS(vftintrneh_l_s, LSX, gen_vv_ptr, gen_helper_vftintrneh_l_s)
+TRANS(vftintrzl_l_s, LSX, gen_vv_ptr, gen_helper_vftintrzl_l_s)
+TRANS(vftintrzh_l_s, LSX, gen_vv_ptr, gen_helper_vftintrzh_l_s)
+TRANS(vftintrpl_l_s, LSX, gen_vv_ptr, gen_helper_vftintrpl_l_s)
+TRANS(vftintrph_l_s, LSX, gen_vv_ptr, gen_helper_vftintrph_l_s)
+TRANS(vftintrml_l_s, LSX, gen_vv_ptr, gen_helper_vftintrml_l_s)
+TRANS(vftintrmh_l_s, LSX, gen_vv_ptr, gen_helper_vftintrmh_l_s)
+TRANS(vftintl_l_s, LSX, gen_vv_ptr, gen_helper_vftintl_l_s)
+TRANS(vftinth_l_s, LSX, gen_vv_ptr, gen_helper_vftinth_l_s)
+TRANS(xvftintrne_w_s, LASX, gen_xx_ptr, gen_helper_vftintrne_w_s)
+TRANS(xvftintrne_l_d, LASX, gen_xx_ptr, gen_helper_vftintrne_l_d)
+TRANS(xvftintrz_w_s, LASX, gen_xx_ptr, gen_helper_vftintrz_w_s)
+TRANS(xvftintrz_l_d, LASX, gen_xx_ptr, gen_helper_vftintrz_l_d)
+TRANS(xvftintrp_w_s, LASX, gen_xx_ptr, gen_helper_vftintrp_w_s)
+TRANS(xvftintrp_l_d, LASX, gen_xx_ptr, gen_helper_vftintrp_l_d)
+TRANS(xvftintrm_w_s, LASX, gen_xx_ptr, gen_helper_vftintrm_w_s)
+TRANS(xvftintrm_l_d, LASX, gen_xx_ptr, gen_helper_vftintrm_l_d)
+TRANS(xvftint_w_s, LASX, gen_xx_ptr, gen_helper_vftint_w_s)
+TRANS(xvftint_l_d, LASX, gen_xx_ptr, gen_helper_vftint_l_d)
+TRANS(xvftintrz_wu_s, LASX, gen_xx_ptr, gen_helper_vftintrz_wu_s)
+TRANS(xvftintrz_lu_d, LASX, gen_xx_ptr, gen_helper_vftintrz_lu_d)
+TRANS(xvftint_wu_s, LASX, gen_xx_ptr, gen_helper_vftint_wu_s)
+TRANS(xvftint_lu_d, LASX, gen_xx_ptr, gen_helper_vftint_lu_d)
+TRANS(xvftintrne_w_d, LASX, gen_xxx_ptr, gen_helper_vftintrne_w_d)
+TRANS(xvftintrz_w_d, LASX, gen_xxx_ptr, gen_helper_vftintrz_w_d)
+TRANS(xvftintrp_w_d, LASX, gen_xxx_ptr, gen_helper_vftintrp_w_d)
+TRANS(xvftintrm_w_d, LASX, gen_xxx_ptr, gen_helper_vftintrm_w_d)
+TRANS(xvftint_w_d, LASX, gen_xxx_ptr, gen_helper_vftint_w_d)
+TRANS(xvftintrnel_l_s, LASX, gen_xx_ptr, gen_helper_vftintrnel_l_s)
+TRANS(xvftintrneh_l_s, LASX, gen_xx_ptr, gen_helper_vftintrneh_l_s)
+TRANS(xvftintrzl_l_s, LASX, gen_xx_ptr, gen_helper_vftintrzl_l_s)
+TRANS(xvftintrzh_l_s, LASX, gen_xx_ptr, gen_helper_vftintrzh_l_s)
+TRANS(xvftintrpl_l_s, LASX, gen_xx_ptr, gen_helper_vftintrpl_l_s)
+TRANS(xvftintrph_l_s, LASX, gen_xx_ptr, gen_helper_vftintrph_l_s)
+TRANS(xvftintrml_l_s, LASX, gen_xx_ptr, gen_helper_vftintrml_l_s)
+TRANS(xvftintrmh_l_s, LASX, gen_xx_ptr, gen_helper_vftintrmh_l_s)
+TRANS(xvftintl_l_s, LASX, gen_xx_ptr, gen_helper_vftintl_l_s)
+TRANS(xvftinth_l_s, LASX, gen_xx_ptr, gen_helper_vftinth_l_s)
+
+TRANS(vffint_s_w, LSX, gen_vv_ptr, gen_helper_vffint_s_w)
+TRANS(vffint_d_l, LSX, gen_vv_ptr, gen_helper_vffint_d_l)
+TRANS(vffint_s_wu, LSX, gen_vv_ptr, gen_helper_vffint_s_wu)
+TRANS(vffint_d_lu, LSX, gen_vv_ptr, gen_helper_vffint_d_lu)
+TRANS(vffintl_d_w, LSX, gen_vv_ptr, gen_helper_vffintl_d_w)
+TRANS(vffinth_d_w, LSX, gen_vv_ptr, gen_helper_vffinth_d_w)
+TRANS(vffint_s_l, LSX, gen_vvv_ptr, gen_helper_vffint_s_l)
+TRANS(xvffint_s_w, LASX, gen_xx_ptr, gen_helper_vffint_s_w)
+TRANS(xvffint_d_l, LASX, gen_xx_ptr, gen_helper_vffint_d_l)
+TRANS(xvffint_s_wu, LASX, gen_xx_ptr, gen_helper_vffint_s_wu)
+TRANS(xvffint_d_lu, LASX, gen_xx_ptr, gen_helper_vffint_d_lu)
+TRANS(xvffintl_d_w, LASX, gen_xx_ptr, gen_helper_vffintl_d_w)
+TRANS(xvffinth_d_w, LASX, gen_xx_ptr, gen_helper_vffinth_d_w)
+TRANS(xvffint_s_l, LASX, gen_xxx_ptr, gen_helper_vffint_s_l)
+
+static bool do_cmp_vl(DisasContext *ctx, arg_vvv *a,
+ uint32_t oprsz, MemOp mop, TCGCond cond)
+{
+ uint32_t vd_ofs, vj_ofs, vk_ofs;
+
+ if (!check_vec(ctx, oprsz)) {
+ return true;
+ }
+
+ vd_ofs = vec_full_offset(a->vd);
+ vj_ofs = vec_full_offset(a->vj);
+ vk_ofs = vec_full_offset(a->vk);
+
+ tcg_gen_gvec_cmp(cond, mop, vd_ofs, vj_ofs, vk_ofs, oprsz, ctx->vl / 8);
+ return true;
+}
+
+static bool do_cmp(DisasContext *ctx, arg_vvv *a,
+ MemOp mop, TCGCond cond)
+{
+ return do_cmp_vl(ctx, a, 16, mop, cond);
+}
+
+static bool do_xcmp(DisasContext *ctx, arg_vvv *a,
+ MemOp mop, TCGCond cond)
+{
+ return do_cmp_vl(ctx, a, 32, mop, cond);
+}
+
+static bool do_cmpi_vl(DisasContext *ctx, arg_vv_i *a,
+ uint32_t oprsz, MemOp mop, TCGCond cond)
+{
+ uint32_t vd_ofs, vj_ofs;
+
+ if (!check_vec(ctx, oprsz)) {
+ return true;
+ }
+
+ vd_ofs = vec_full_offset(a->vd);
+ vj_ofs = vec_full_offset(a->vj);
+
+ tcg_gen_gvec_cmpi(cond, mop, vd_ofs, vj_ofs, a->imm, oprsz, ctx->vl / 8);
+ return true;
+}
+
+static bool do_cmpi(DisasContext *ctx, arg_vv_i *a,
+ MemOp mop, TCGCond cond)
+{
+ return do_cmpi_vl(ctx, a, 16, mop, cond);
+}
+
+static bool do_xcmpi(DisasContext *ctx, arg_vv_i *a,
+ MemOp mop, TCGCond cond)
+{
+ return do_cmpi_vl(ctx, a, 32, mop, cond);
+}
+
+TRANS(vseq_b, LSX, do_cmp, MO_8, TCG_COND_EQ)
+TRANS(vseq_h, LSX, do_cmp, MO_16, TCG_COND_EQ)
+TRANS(vseq_w, LSX, do_cmp, MO_32, TCG_COND_EQ)
+TRANS(vseq_d, LSX, do_cmp, MO_64, TCG_COND_EQ)
+TRANS(vseqi_b, LSX, do_cmpi, MO_8, TCG_COND_EQ)
+TRANS(vseqi_h, LSX, do_cmpi, MO_16, TCG_COND_EQ)
+TRANS(vseqi_w, LSX, do_cmpi, MO_32, TCG_COND_EQ)
+TRANS(vseqi_d, LSX, do_cmpi, MO_64, TCG_COND_EQ)
+TRANS(xvseq_b, LASX, do_xcmp, MO_8, TCG_COND_EQ)
+TRANS(xvseq_h, LASX, do_xcmp, MO_16, TCG_COND_EQ)
+TRANS(xvseq_w, LASX, do_xcmp, MO_32, TCG_COND_EQ)
+TRANS(xvseq_d, LASX, do_xcmp, MO_64, TCG_COND_EQ)
+TRANS(xvseqi_b, LASX, do_xcmpi, MO_8, TCG_COND_EQ)
+TRANS(xvseqi_h, LASX, do_xcmpi, MO_16, TCG_COND_EQ)
+TRANS(xvseqi_w, LASX, do_xcmpi, MO_32, TCG_COND_EQ)
+TRANS(xvseqi_d, LASX, do_xcmpi, MO_64, TCG_COND_EQ)
+
+TRANS(vsle_b, LSX, do_cmp, MO_8, TCG_COND_LE)
+TRANS(vsle_h, LSX, do_cmp, MO_16, TCG_COND_LE)
+TRANS(vsle_w, LSX, do_cmp, MO_32, TCG_COND_LE)
+TRANS(vsle_d, LSX, do_cmp, MO_64, TCG_COND_LE)
+TRANS(vslei_b, LSX, do_cmpi, MO_8, TCG_COND_LE)
+TRANS(vslei_h, LSX, do_cmpi, MO_16, TCG_COND_LE)
+TRANS(vslei_w, LSX, do_cmpi, MO_32, TCG_COND_LE)
+TRANS(vslei_d, LSX, do_cmpi, MO_64, TCG_COND_LE)
+TRANS(vsle_bu, LSX, do_cmp, MO_8, TCG_COND_LEU)
+TRANS(vsle_hu, LSX, do_cmp, MO_16, TCG_COND_LEU)
+TRANS(vsle_wu, LSX, do_cmp, MO_32, TCG_COND_LEU)
+TRANS(vsle_du, LSX, do_cmp, MO_64, TCG_COND_LEU)
+TRANS(vslei_bu, LSX, do_cmpi, MO_8, TCG_COND_LEU)
+TRANS(vslei_hu, LSX, do_cmpi, MO_16, TCG_COND_LEU)
+TRANS(vslei_wu, LSX, do_cmpi, MO_32, TCG_COND_LEU)
+TRANS(vslei_du, LSX, do_cmpi, MO_64, TCG_COND_LEU)
+TRANS(xvsle_b, LASX, do_xcmp, MO_8, TCG_COND_LE)
+TRANS(xvsle_h, LASX, do_xcmp, MO_16, TCG_COND_LE)
+TRANS(xvsle_w, LASX, do_xcmp, MO_32, TCG_COND_LE)
+TRANS(xvsle_d, LASX, do_xcmp, MO_64, TCG_COND_LE)
+TRANS(xvslei_b, LASX, do_xcmpi, MO_8, TCG_COND_LE)
+TRANS(xvslei_h, LASX, do_xcmpi, MO_16, TCG_COND_LE)
+TRANS(xvslei_w, LASX, do_xcmpi, MO_32, TCG_COND_LE)
+TRANS(xvslei_d, LASX, do_xcmpi, MO_64, TCG_COND_LE)
+TRANS(xvsle_bu, LASX, do_xcmp, MO_8, TCG_COND_LEU)
+TRANS(xvsle_hu, LASX, do_xcmp, MO_16, TCG_COND_LEU)
+TRANS(xvsle_wu, LASX, do_xcmp, MO_32, TCG_COND_LEU)
+TRANS(xvsle_du, LASX, do_xcmp, MO_64, TCG_COND_LEU)
+TRANS(xvslei_bu, LASX, do_xcmpi, MO_8, TCG_COND_LEU)
+TRANS(xvslei_hu, LASX, do_xcmpi, MO_16, TCG_COND_LEU)
+TRANS(xvslei_wu, LASX, do_xcmpi, MO_32, TCG_COND_LEU)
+TRANS(xvslei_du, LASX, do_xcmpi, MO_64, TCG_COND_LEU)
+
+TRANS(vslt_b, LSX, do_cmp, MO_8, TCG_COND_LT)
+TRANS(vslt_h, LSX, do_cmp, MO_16, TCG_COND_LT)
+TRANS(vslt_w, LSX, do_cmp, MO_32, TCG_COND_LT)
+TRANS(vslt_d, LSX, do_cmp, MO_64, TCG_COND_LT)
+TRANS(vslti_b, LSX, do_cmpi, MO_8, TCG_COND_LT)
+TRANS(vslti_h, LSX, do_cmpi, MO_16, TCG_COND_LT)
+TRANS(vslti_w, LSX, do_cmpi, MO_32, TCG_COND_LT)
+TRANS(vslti_d, LSX, do_cmpi, MO_64, TCG_COND_LT)
+TRANS(vslt_bu, LSX, do_cmp, MO_8, TCG_COND_LTU)
+TRANS(vslt_hu, LSX, do_cmp, MO_16, TCG_COND_LTU)
+TRANS(vslt_wu, LSX, do_cmp, MO_32, TCG_COND_LTU)
+TRANS(vslt_du, LSX, do_cmp, MO_64, TCG_COND_LTU)
+TRANS(vslti_bu, LSX, do_cmpi, MO_8, TCG_COND_LTU)
+TRANS(vslti_hu, LSX, do_cmpi, MO_16, TCG_COND_LTU)
+TRANS(vslti_wu, LSX, do_cmpi, MO_32, TCG_COND_LTU)
+TRANS(vslti_du, LSX, do_cmpi, MO_64, TCG_COND_LTU)
+TRANS(xvslt_b, LASX, do_xcmp, MO_8, TCG_COND_LT)
+TRANS(xvslt_h, LASX, do_xcmp, MO_16, TCG_COND_LT)
+TRANS(xvslt_w, LASX, do_xcmp, MO_32, TCG_COND_LT)
+TRANS(xvslt_d, LASX, do_xcmp, MO_64, TCG_COND_LT)
+TRANS(xvslti_b, LASX, do_xcmpi, MO_8, TCG_COND_LT)
+TRANS(xvslti_h, LASX, do_xcmpi, MO_16, TCG_COND_LT)
+TRANS(xvslti_w, LASX, do_xcmpi, MO_32, TCG_COND_LT)
+TRANS(xvslti_d, LASX, do_xcmpi, MO_64, TCG_COND_LT)
+TRANS(xvslt_bu, LASX, do_xcmp, MO_8, TCG_COND_LTU)
+TRANS(xvslt_hu, LASX, do_xcmp, MO_16, TCG_COND_LTU)
+TRANS(xvslt_wu, LASX, do_xcmp, MO_32, TCG_COND_LTU)
+TRANS(xvslt_du, LASX, do_xcmp, MO_64, TCG_COND_LTU)
+TRANS(xvslti_bu, LASX, do_xcmpi, MO_8, TCG_COND_LTU)
+TRANS(xvslti_hu, LASX, do_xcmpi, MO_16, TCG_COND_LTU)
+TRANS(xvslti_wu, LASX, do_xcmpi, MO_32, TCG_COND_LTU)
+TRANS(xvslti_du, LASX, do_xcmpi, MO_64, TCG_COND_LTU)
+
+static bool do_vfcmp_cond_s(DisasContext *ctx, arg_vvv_fcond *a, uint32_t sz)
+{
+ uint32_t flags;
+ void (*fn)(TCGv_env, TCGv_i32, TCGv_i32, TCGv_i32, TCGv_i32, TCGv_i32);
+ TCGv_i32 vd = tcg_constant_i32(a->vd);
+ TCGv_i32 vj = tcg_constant_i32(a->vj);
+ TCGv_i32 vk = tcg_constant_i32(a->vk);
+ TCGv_i32 oprsz = tcg_constant_i32(sz);
+
+ if (!check_vec(ctx, sz)) {
+ return true;
+ }
+
+ fn = (a->fcond & 1 ? gen_helper_vfcmp_s_s : gen_helper_vfcmp_c_s);
+ flags = get_fcmp_flags(a->fcond >> 1);
+ fn(tcg_env, oprsz, vd, vj, vk, tcg_constant_i32(flags));
+
+ return true;
+}
+
+static bool do_vfcmp_cond_d(DisasContext *ctx, arg_vvv_fcond *a, uint32_t sz)
+{
+ uint32_t flags;
+ void (*fn)(TCGv_env, TCGv_i32, TCGv_i32, TCGv_i32, TCGv_i32, TCGv_i32);
+ TCGv_i32 vd = tcg_constant_i32(a->vd);
+ TCGv_i32 vj = tcg_constant_i32(a->vj);
+ TCGv_i32 vk = tcg_constant_i32(a->vk);
+ TCGv_i32 oprsz = tcg_constant_i32(sz);
+
+ if (!check_vec(ctx, sz)) {
+ return true;
+ }
+
+ fn = (a->fcond & 1 ? gen_helper_vfcmp_s_d : gen_helper_vfcmp_c_d);
+ flags = get_fcmp_flags(a->fcond >> 1);
+ fn(tcg_env, oprsz, vd, vj, vk, tcg_constant_i32(flags));
+
+ return true;
+}
+
+TRANS(vfcmp_cond_s, LSX, do_vfcmp_cond_s, 16)
+TRANS(vfcmp_cond_d, LSX, do_vfcmp_cond_d, 16)
+TRANS(xvfcmp_cond_s, LASX, do_vfcmp_cond_s, 32)
+TRANS(xvfcmp_cond_d, LASX, do_vfcmp_cond_d, 32)
+
+static bool do_vbitsel_v(DisasContext *ctx, arg_vvvv *a, uint32_t oprsz)
+{
+ if (!check_vec(ctx, oprsz)) {
+ return true;
+ }
+
+ tcg_gen_gvec_bitsel(MO_64, vec_full_offset(a->vd), vec_full_offset(a->va),
+ vec_full_offset(a->vk), vec_full_offset(a->vj),
+ oprsz, ctx->vl / 8);
+ return true;
+}
+
+TRANS(vbitsel_v, LSX, do_vbitsel_v, 16)
+TRANS(xvbitsel_v, LASX, do_vbitsel_v, 32)
+
+static void gen_vbitseli(unsigned vece, TCGv_vec a, TCGv_vec b, int64_t imm)
+{
+ tcg_gen_bitsel_vec(vece, a, a, tcg_constant_vec_matching(a, vece, imm), b);
+}
+
+static bool do_vbitseli_b(DisasContext *ctx, arg_vv_i *a, uint32_t oprsz)
+{
+ static const GVecGen2i op = {
+ .fniv = gen_vbitseli,
+ .fnoi = gen_helper_vbitseli_b,
+ .vece = MO_8,
+ .load_dest = true
+ };
+
+ if (!check_vec(ctx, oprsz)) {
+ return true;
+ }
+
+ tcg_gen_gvec_2i(vec_full_offset(a->vd), vec_full_offset(a->vj),
+ oprsz, ctx->vl / 8, a->imm , &op);
+ return true;
+}
+
+TRANS(vbitseli_b, LSX, do_vbitseli_b, 16)
+TRANS(xvbitseli_b, LASX, do_vbitseli_b, 32)
+
+#define VSET(NAME, COND) \
+static bool trans_## NAME (DisasContext *ctx, arg_cv *a) \
+{ \
+ TCGv_i64 t1, al, ah; \
+ \
+ al = tcg_temp_new_i64(); \
+ ah = tcg_temp_new_i64(); \
+ t1 = tcg_temp_new_i64(); \
+ \
+ get_vreg64(ah, a->vj, 1); \
+ get_vreg64(al, a->vj, 0); \
+ \
+ if (!avail_LSX(ctx)) { \
+ return false; \
+ } \
+ \
+ if (!check_vec(ctx, 16)) { \
+ return true; \
+ } \
+ \
+ tcg_gen_or_i64(t1, al, ah); \
+ tcg_gen_setcondi_i64(COND, t1, t1, 0); \
+ tcg_gen_st8_tl(t1, tcg_env, offsetof(CPULoongArchState, cf[a->cd & 0x7])); \
+ \
+ return true; \
+}
+
+VSET(vseteqz_v, TCG_COND_EQ)
+VSET(vsetnez_v, TCG_COND_NE)
+
+TRANS(vsetanyeqz_b, LSX, gen_cv, gen_helper_vsetanyeqz_b)
+TRANS(vsetanyeqz_h, LSX, gen_cv, gen_helper_vsetanyeqz_h)
+TRANS(vsetanyeqz_w, LSX, gen_cv, gen_helper_vsetanyeqz_w)
+TRANS(vsetanyeqz_d, LSX, gen_cv, gen_helper_vsetanyeqz_d)
+TRANS(vsetallnez_b, LSX, gen_cv, gen_helper_vsetallnez_b)
+TRANS(vsetallnez_h, LSX, gen_cv, gen_helper_vsetallnez_h)
+TRANS(vsetallnez_w, LSX, gen_cv, gen_helper_vsetallnez_w)
+TRANS(vsetallnez_d, LSX, gen_cv, gen_helper_vsetallnez_d)
+
+#define XVSET(NAME, COND) \
+static bool trans_## NAME(DisasContext *ctx, arg_cv * a) \
+{ \
+ TCGv_i64 t1, t2, d[4]; \
+ \
+ d[0] = tcg_temp_new_i64(); \
+ d[1] = tcg_temp_new_i64(); \
+ d[2] = tcg_temp_new_i64(); \
+ d[3] = tcg_temp_new_i64(); \
+ t1 = tcg_temp_new_i64(); \
+ t2 = tcg_temp_new_i64(); \
+ \
+ get_vreg64(d[0], a->vj, 0); \
+ get_vreg64(d[1], a->vj, 1); \
+ get_vreg64(d[2], a->vj, 2); \
+ get_vreg64(d[3], a->vj, 3); \
+ \
+ if (!avail_LASX(ctx)) { \
+ return false; \
+ } \
+ \
+ if (!check_vec(ctx, 32)) { \
+ return true; \
+ } \
+ \
+ tcg_gen_or_i64(t1, d[0], d[1]); \
+ tcg_gen_or_i64(t2, d[2], d[3]); \
+ tcg_gen_or_i64(t1, t2, t1); \
+ tcg_gen_setcondi_i64(COND, t1, t1, 0); \
+ tcg_gen_st8_tl(t1, tcg_env, offsetof(CPULoongArchState, cf[a->cd & 0x7])); \
+ \
+ return true; \
+}
+
+XVSET(xvseteqz_v, TCG_COND_EQ)
+XVSET(xvsetnez_v, TCG_COND_NE)
+
+TRANS(xvsetanyeqz_b, LASX, gen_cx, gen_helper_vsetanyeqz_b)
+TRANS(xvsetanyeqz_h, LASX, gen_cx, gen_helper_vsetanyeqz_h)
+TRANS(xvsetanyeqz_w, LASX, gen_cx, gen_helper_vsetanyeqz_w)
+TRANS(xvsetanyeqz_d, LASX, gen_cx, gen_helper_vsetanyeqz_d)
+TRANS(xvsetallnez_b, LASX, gen_cx, gen_helper_vsetallnez_b)
+TRANS(xvsetallnez_h, LASX, gen_cx, gen_helper_vsetallnez_h)
+TRANS(xvsetallnez_w, LASX, gen_cx, gen_helper_vsetallnez_w)
+TRANS(xvsetallnez_d, LASX, gen_cx, gen_helper_vsetallnez_d)
+
+static bool gen_g2v_vl(DisasContext *ctx, arg_vr_i *a, uint32_t oprsz, MemOp mop,
+ void (*func)(TCGv, TCGv_ptr, tcg_target_long))
+{
+ TCGv src = gpr_src(ctx, a->rj, EXT_NONE);
+
+ if (!check_vec(ctx, oprsz)) {
+ return true;
+ }
+
+ func(src, tcg_env, vec_reg_offset(a->vd, a->imm, mop));
+
+ return true;
+}
+
+static bool gen_g2v(DisasContext *ctx, arg_vr_i *a, MemOp mop,
+ void (*func)(TCGv, TCGv_ptr, tcg_target_long))
+{
+ return gen_g2v_vl(ctx, a, 16, mop, func);
+}
+
+static bool gen_g2x(DisasContext *ctx, arg_vr_i *a, MemOp mop,
+ void (*func)(TCGv, TCGv_ptr, tcg_target_long))
+{
+ return gen_g2v_vl(ctx, a, 32, mop, func);
+}
+
+TRANS(vinsgr2vr_b, LSX, gen_g2v, MO_8, tcg_gen_st8_i64)
+TRANS(vinsgr2vr_h, LSX, gen_g2v, MO_16, tcg_gen_st16_i64)
+TRANS(vinsgr2vr_w, LSX, gen_g2v, MO_32, tcg_gen_st32_i64)
+TRANS(vinsgr2vr_d, LSX, gen_g2v, MO_64, tcg_gen_st_i64)
+TRANS(xvinsgr2vr_w, LASX, gen_g2x, MO_32, tcg_gen_st32_i64)
+TRANS(xvinsgr2vr_d, LASX, gen_g2x, MO_64, tcg_gen_st_i64)
+
+static bool gen_v2g_vl(DisasContext *ctx, arg_rv_i *a, uint32_t oprsz, MemOp mop,
+ void (*func)(TCGv, TCGv_ptr, tcg_target_long))
+{
+ TCGv dst = gpr_dst(ctx, a->rd, EXT_NONE);
+
+ if (!check_vec(ctx, oprsz)) {
+ return true;
+ }
+
+ func(dst, tcg_env, vec_reg_offset(a->vj, a->imm, mop));
+
+ return true;
+}
+
+static bool gen_v2g(DisasContext *ctx, arg_rv_i *a, MemOp mop,
+ void (*func)(TCGv, TCGv_ptr, tcg_target_long))
+{
+ return gen_v2g_vl(ctx, a, 16, mop, func);
+}
+
+static bool gen_x2g(DisasContext *ctx, arg_rv_i *a, MemOp mop,
+ void (*func)(TCGv, TCGv_ptr, tcg_target_long))
+{
+ return gen_v2g_vl(ctx, a, 32, mop, func);
+}
+
+TRANS(vpickve2gr_b, LSX, gen_v2g, MO_8, tcg_gen_ld8s_i64)
+TRANS(vpickve2gr_h, LSX, gen_v2g, MO_16, tcg_gen_ld16s_i64)
+TRANS(vpickve2gr_w, LSX, gen_v2g, MO_32, tcg_gen_ld32s_i64)
+TRANS(vpickve2gr_d, LSX, gen_v2g, MO_64, tcg_gen_ld_i64)
+TRANS(vpickve2gr_bu, LSX, gen_v2g, MO_8, tcg_gen_ld8u_i64)
+TRANS(vpickve2gr_hu, LSX, gen_v2g, MO_16, tcg_gen_ld16u_i64)
+TRANS(vpickve2gr_wu, LSX, gen_v2g, MO_32, tcg_gen_ld32u_i64)
+TRANS(vpickve2gr_du, LSX, gen_v2g, MO_64, tcg_gen_ld_i64)
+TRANS(xvpickve2gr_w, LASX, gen_x2g, MO_32, tcg_gen_ld32s_i64)
+TRANS(xvpickve2gr_d, LASX, gen_x2g, MO_64, tcg_gen_ld_i64)
+TRANS(xvpickve2gr_wu, LASX, gen_x2g, MO_32, tcg_gen_ld32u_i64)
+TRANS(xvpickve2gr_du, LASX, gen_x2g, MO_64, tcg_gen_ld_i64)
+
+static bool gvec_dup_vl(DisasContext *ctx, arg_vr *a,
+ uint32_t oprsz, MemOp mop)
+{
+ TCGv src = gpr_src(ctx, a->rj, EXT_NONE);
+
+ if (!check_vec(ctx, oprsz)) {
+ return true;
+ }
+
+ tcg_gen_gvec_dup_i64(mop, vec_full_offset(a->vd),
+ oprsz, ctx->vl/8, src);
+ return true;
+}
+
+static bool gvec_dup(DisasContext *ctx, arg_vr *a, MemOp mop)
+{
+ return gvec_dup_vl(ctx, a, 16, mop);
+}
+
+static bool gvec_dupx(DisasContext *ctx, arg_vr *a, MemOp mop)
+{
+ return gvec_dup_vl(ctx, a, 32, mop);
+}
+
+TRANS(vreplgr2vr_b, LSX, gvec_dup, MO_8)
+TRANS(vreplgr2vr_h, LSX, gvec_dup, MO_16)
+TRANS(vreplgr2vr_w, LSX, gvec_dup, MO_32)
+TRANS(vreplgr2vr_d, LSX, gvec_dup, MO_64)
+TRANS(xvreplgr2vr_b, LASX, gvec_dupx, MO_8)
+TRANS(xvreplgr2vr_h, LASX, gvec_dupx, MO_16)
+TRANS(xvreplgr2vr_w, LASX, gvec_dupx, MO_32)
+TRANS(xvreplgr2vr_d, LASX, gvec_dupx, MO_64)
+
+static bool trans_vreplvei_b(DisasContext *ctx, arg_vv_i *a)
+{
+ if (!avail_LSX(ctx)) {
+ return false;
+ }
+
+ if (!check_vec(ctx, 16)) {
+ return true;
+ }
+
+ tcg_gen_gvec_dup_mem(MO_8,vec_full_offset(a->vd),
+ offsetof(CPULoongArchState,
+ fpr[a->vj].vreg.B((a->imm))),
+ 16, ctx->vl/8);
+ return true;
+}
+
+static bool trans_vreplvei_h(DisasContext *ctx, arg_vv_i *a)
+{
+ if (!avail_LSX(ctx)) {
+ return false;
+ }
+
+ if (!check_vec(ctx, 16)) {
+ return true;
+ }
+
+ tcg_gen_gvec_dup_mem(MO_16, vec_full_offset(a->vd),
+ offsetof(CPULoongArchState,
+ fpr[a->vj].vreg.H((a->imm))),
+ 16, ctx->vl/8);
+ return true;
+}
+static bool trans_vreplvei_w(DisasContext *ctx, arg_vv_i *a)
+{
+ if (!avail_LSX(ctx)) {
+ return false;
+ }
+
+ if (!check_vec(ctx, 16)) {
+ return true;
+ }
+
+ tcg_gen_gvec_dup_mem(MO_32, vec_full_offset(a->vd),
+ offsetof(CPULoongArchState,
+ fpr[a->vj].vreg.W((a->imm))),
+ 16, ctx->vl/8);
+ return true;
+}
+static bool trans_vreplvei_d(DisasContext *ctx, arg_vv_i *a)
+{
+ if (!avail_LSX(ctx)) {
+ return false;
+ }
+
+ if (!check_vec(ctx, 16)) {
+ return true;
+ }
+
+ tcg_gen_gvec_dup_mem(MO_64, vec_full_offset(a->vd),
+ offsetof(CPULoongArchState,
+ fpr[a->vj].vreg.D((a->imm))),
+ 16, ctx->vl/8);
+ return true;
+}
+
+static bool gen_vreplve_vl(DisasContext *ctx, arg_vvr *a,
+ uint32_t oprsz, int vece, int bit,
+ void (*func)(TCGv_i64, TCGv_ptr, tcg_target_long))
+{
+ int i;
+ TCGv_i64 t0 = tcg_temp_new_i64();
+ TCGv_ptr t1 = tcg_temp_new_ptr();
+ TCGv_i64 t2 = tcg_temp_new_i64();
+
+ if (!check_vec(ctx, oprsz)) {
+ return true;
+ }
+
+ tcg_gen_andi_i64(t0, gpr_src(ctx, a->rk, EXT_NONE), (LSX_LEN / bit) - 1);
+ tcg_gen_shli_i64(t0, t0, vece);
+ if (HOST_BIG_ENDIAN) {
+ tcg_gen_xori_i64(t0, t0, vece << ((LSX_LEN / bit) - 1));
+ }
+
+ tcg_gen_trunc_i64_ptr(t1, t0);
+ tcg_gen_add_ptr(t1, t1, tcg_env);
+
+ for (i = 0; i < oprsz; i += 16) {
+ func(t2, t1, vec_full_offset(a->vj) + i);
+ tcg_gen_gvec_dup_i64(vece, vec_full_offset(a->vd) + i, 16, 16, t2);
+ }
+
+ return true;
+}
+
+static bool gen_vreplve(DisasContext *ctx, arg_vvr *a, int vece, int bit,
+ void (*func)(TCGv_i64, TCGv_ptr, tcg_target_long))
+{
+ return gen_vreplve_vl(ctx, a, 16, vece, bit, func);
+}
+
+static bool gen_xvreplve(DisasContext *ctx, arg_vvr *a, int vece, int bit,
+ void (*func)(TCGv_i64, TCGv_ptr, tcg_target_long))
+{
+ return gen_vreplve_vl(ctx, a, 32, vece, bit, func);
+}
+
+TRANS(vreplve_b, LSX, gen_vreplve, MO_8, 8, tcg_gen_ld8u_i64)
+TRANS(vreplve_h, LSX, gen_vreplve, MO_16, 16, tcg_gen_ld16u_i64)
+TRANS(vreplve_w, LSX, gen_vreplve, MO_32, 32, tcg_gen_ld32u_i64)
+TRANS(vreplve_d, LSX, gen_vreplve, MO_64, 64, tcg_gen_ld_i64)
+TRANS(xvreplve_b, LASX, gen_xvreplve, MO_8, 8, tcg_gen_ld8u_i64)
+TRANS(xvreplve_h, LASX, gen_xvreplve, MO_16, 16, tcg_gen_ld16u_i64)
+TRANS(xvreplve_w, LASX, gen_xvreplve, MO_32, 32, tcg_gen_ld32u_i64)
+TRANS(xvreplve_d, LASX, gen_xvreplve, MO_64, 64, tcg_gen_ld_i64)
+
+static bool gen_xvrepl128(DisasContext *ctx, arg_vv_i *a, MemOp mop)
+{
+ int i;
+
+ if (!check_vec(ctx, 32)) {
+ return true;
+ }
+
+ for (i = 0; i < 32; i += 16) {
+ tcg_gen_gvec_dup_mem(mop, vec_full_offset(a->vd) + i,
+ vec_reg_offset(a->vj, a->imm, mop) + i, 16, 16);
+
+ }
+ return true;
+}
+
+TRANS(xvrepl128vei_b, LASX, gen_xvrepl128, MO_8)
+TRANS(xvrepl128vei_h, LASX, gen_xvrepl128, MO_16)
+TRANS(xvrepl128vei_w, LASX, gen_xvrepl128, MO_32)
+TRANS(xvrepl128vei_d, LASX, gen_xvrepl128, MO_64)
+
+static bool gen_xvreplve0(DisasContext *ctx, arg_vv *a, MemOp mop)
+{
+ if (!check_vec(ctx, 32)) {
+ return true;
+ }
+
+ tcg_gen_gvec_dup_mem(mop, vec_full_offset(a->vd),
+ vec_full_offset(a->vj), 32, 32);
+ return true;
+}
+
+TRANS(xvreplve0_b, LASX, gen_xvreplve0, MO_8)
+TRANS(xvreplve0_h, LASX, gen_xvreplve0, MO_16)
+TRANS(xvreplve0_w, LASX, gen_xvreplve0, MO_32)
+TRANS(xvreplve0_d, LASX, gen_xvreplve0, MO_64)
+TRANS(xvreplve0_q, LASX, gen_xvreplve0, MO_128)
+
+TRANS(xvinsve0_w, LASX, gen_xx_i, gen_helper_xvinsve0_w)
+TRANS(xvinsve0_d, LASX, gen_xx_i, gen_helper_xvinsve0_d)
+
+TRANS(xvpickve_w, LASX, gen_xx_i, gen_helper_xvpickve_w)
+TRANS(xvpickve_d, LASX, gen_xx_i, gen_helper_xvpickve_d)
+
+static bool do_vbsll_v(DisasContext *ctx, arg_vv_i *a, uint32_t oprsz)
+{
+ int i, ofs;
+
+ if (!check_vec(ctx, oprsz)) {
+ return true;
+ }
+
+ for (i = 0; i < oprsz / 16; i++) {
+ TCGv desthigh = tcg_temp_new_i64();
+ TCGv destlow = tcg_temp_new_i64();
+ TCGv high = tcg_temp_new_i64();
+ TCGv low = tcg_temp_new_i64();
+
+ get_vreg64(low, a->vj, 2 * i);
+
+ ofs = ((a->imm) & 0xf) * 8;
+ if (ofs < 64) {
+ get_vreg64(high, a->vj, 2 * i + 1);
+ tcg_gen_extract2_i64(desthigh, low, high, 64 - ofs);
+ tcg_gen_shli_i64(destlow, low, ofs);
+ } else {
+ tcg_gen_shli_i64(desthigh, low, ofs - 64);
+ destlow = tcg_constant_i64(0);
+ }
+ set_vreg64(desthigh, a->vd, 2 * i + 1);
+ set_vreg64(destlow, a->vd, 2 * i);
+ }
+
+ return true;
+}
+
+static bool do_vbsrl_v(DisasContext *ctx, arg_vv_i *a, uint32_t oprsz)
+{
+ int i, ofs;
+
+ if (!check_vec(ctx, 32)) {
+ return true;
+ }
+
+ for (i = 0; i < oprsz / 16; i++) {
+ TCGv desthigh = tcg_temp_new_i64();
+ TCGv destlow = tcg_temp_new_i64();
+ TCGv high = tcg_temp_new_i64();
+ TCGv low = tcg_temp_new_i64();
+ get_vreg64(high, a->vj, 2 * i + 1);
+
+ ofs = ((a->imm) & 0xf) * 8;
+ if (ofs < 64) {
+ get_vreg64(low, a->vj, 2 * i);
+ tcg_gen_extract2_i64(destlow, low, high, ofs);
+ tcg_gen_shri_i64(desthigh, high, ofs);
+ } else {
+ tcg_gen_shri_i64(destlow, high, ofs - 64);
+ desthigh = tcg_constant_i64(0);
+ }
+ set_vreg64(desthigh, a->vd, 2 * i + 1);
+ set_vreg64(destlow, a->vd, 2 * i);
+ }
+
+ return true;
+}
+
+TRANS(vbsll_v, LSX, do_vbsll_v, 16)
+TRANS(vbsrl_v, LSX, do_vbsrl_v, 16)
+TRANS(xvbsll_v, LASX, do_vbsll_v, 32)
+TRANS(xvbsrl_v, LASX, do_vbsrl_v, 32)
+
+TRANS(vpackev_b, LSX, gen_vvv, gen_helper_vpackev_b)
+TRANS(vpackev_h, LSX, gen_vvv, gen_helper_vpackev_h)
+TRANS(vpackev_w, LSX, gen_vvv, gen_helper_vpackev_w)
+TRANS(vpackev_d, LSX, gen_vvv, gen_helper_vpackev_d)
+TRANS(vpackod_b, LSX, gen_vvv, gen_helper_vpackod_b)
+TRANS(vpackod_h, LSX, gen_vvv, gen_helper_vpackod_h)
+TRANS(vpackod_w, LSX, gen_vvv, gen_helper_vpackod_w)
+TRANS(vpackod_d, LSX, gen_vvv, gen_helper_vpackod_d)
+TRANS(xvpackev_b, LASX, gen_xxx, gen_helper_vpackev_b)
+TRANS(xvpackev_h, LASX, gen_xxx, gen_helper_vpackev_h)
+TRANS(xvpackev_w, LASX, gen_xxx, gen_helper_vpackev_w)
+TRANS(xvpackev_d, LASX, gen_xxx, gen_helper_vpackev_d)
+TRANS(xvpackod_b, LASX, gen_xxx, gen_helper_vpackod_b)
+TRANS(xvpackod_h, LASX, gen_xxx, gen_helper_vpackod_h)
+TRANS(xvpackod_w, LASX, gen_xxx, gen_helper_vpackod_w)
+TRANS(xvpackod_d, LASX, gen_xxx, gen_helper_vpackod_d)
+
+TRANS(vpickev_b, LSX, gen_vvv, gen_helper_vpickev_b)
+TRANS(vpickev_h, LSX, gen_vvv, gen_helper_vpickev_h)
+TRANS(vpickev_w, LSX, gen_vvv, gen_helper_vpickev_w)
+TRANS(vpickev_d, LSX, gen_vvv, gen_helper_vpickev_d)
+TRANS(vpickod_b, LSX, gen_vvv, gen_helper_vpickod_b)
+TRANS(vpickod_h, LSX, gen_vvv, gen_helper_vpickod_h)
+TRANS(vpickod_w, LSX, gen_vvv, gen_helper_vpickod_w)
+TRANS(vpickod_d, LSX, gen_vvv, gen_helper_vpickod_d)
+TRANS(xvpickev_b, LASX, gen_xxx, gen_helper_vpickev_b)
+TRANS(xvpickev_h, LASX, gen_xxx, gen_helper_vpickev_h)
+TRANS(xvpickev_w, LASX, gen_xxx, gen_helper_vpickev_w)
+TRANS(xvpickev_d, LASX, gen_xxx, gen_helper_vpickev_d)
+TRANS(xvpickod_b, LASX, gen_xxx, gen_helper_vpickod_b)
+TRANS(xvpickod_h, LASX, gen_xxx, gen_helper_vpickod_h)
+TRANS(xvpickod_w, LASX, gen_xxx, gen_helper_vpickod_w)
+TRANS(xvpickod_d, LASX, gen_xxx, gen_helper_vpickod_d)
+
+TRANS(vilvl_b, LSX, gen_vvv, gen_helper_vilvl_b)
+TRANS(vilvl_h, LSX, gen_vvv, gen_helper_vilvl_h)
+TRANS(vilvl_w, LSX, gen_vvv, gen_helper_vilvl_w)
+TRANS(vilvl_d, LSX, gen_vvv, gen_helper_vilvl_d)
+TRANS(vilvh_b, LSX, gen_vvv, gen_helper_vilvh_b)
+TRANS(vilvh_h, LSX, gen_vvv, gen_helper_vilvh_h)
+TRANS(vilvh_w, LSX, gen_vvv, gen_helper_vilvh_w)
+TRANS(vilvh_d, LSX, gen_vvv, gen_helper_vilvh_d)
+TRANS(xvilvl_b, LASX, gen_xxx, gen_helper_vilvl_b)
+TRANS(xvilvl_h, LASX, gen_xxx, gen_helper_vilvl_h)
+TRANS(xvilvl_w, LASX, gen_xxx, gen_helper_vilvl_w)
+TRANS(xvilvl_d, LASX, gen_xxx, gen_helper_vilvl_d)
+TRANS(xvilvh_b, LASX, gen_xxx, gen_helper_vilvh_b)
+TRANS(xvilvh_h, LASX, gen_xxx, gen_helper_vilvh_h)
+TRANS(xvilvh_w, LASX, gen_xxx, gen_helper_vilvh_w)
+TRANS(xvilvh_d, LASX, gen_xxx, gen_helper_vilvh_d)
+
+TRANS(vshuf_b, LSX, gen_vvvv, gen_helper_vshuf_b)
+TRANS(vshuf_h, LSX, gen_vvv, gen_helper_vshuf_h)
+TRANS(vshuf_w, LSX, gen_vvv, gen_helper_vshuf_w)
+TRANS(vshuf_d, LSX, gen_vvv, gen_helper_vshuf_d)
+TRANS(xvshuf_b, LASX, gen_xxxx, gen_helper_vshuf_b)
+TRANS(xvshuf_h, LASX, gen_xxx, gen_helper_vshuf_h)
+TRANS(xvshuf_w, LASX, gen_xxx, gen_helper_vshuf_w)
+TRANS(xvshuf_d, LASX, gen_xxx, gen_helper_vshuf_d)
+TRANS(vshuf4i_b, LSX, gen_vv_i, gen_helper_vshuf4i_b)
+TRANS(vshuf4i_h, LSX, gen_vv_i, gen_helper_vshuf4i_h)
+TRANS(vshuf4i_w, LSX, gen_vv_i, gen_helper_vshuf4i_w)
+TRANS(vshuf4i_d, LSX, gen_vv_i, gen_helper_vshuf4i_d)
+TRANS(xvshuf4i_b, LASX, gen_xx_i, gen_helper_vshuf4i_b)
+TRANS(xvshuf4i_h, LASX, gen_xx_i, gen_helper_vshuf4i_h)
+TRANS(xvshuf4i_w, LASX, gen_xx_i, gen_helper_vshuf4i_w)
+TRANS(xvshuf4i_d, LASX, gen_xx_i, gen_helper_vshuf4i_d)
+
+TRANS(xvperm_w, LASX, gen_xxx, gen_helper_vperm_w)
+TRANS(vpermi_w, LSX, gen_vv_i, gen_helper_vpermi_w)
+TRANS(xvpermi_w, LASX, gen_xx_i, gen_helper_vpermi_w)
+TRANS(xvpermi_d, LASX, gen_xx_i, gen_helper_vpermi_d)
+TRANS(xvpermi_q, LASX, gen_xx_i, gen_helper_vpermi_q)
+
+TRANS(vextrins_b, LSX, gen_vv_i, gen_helper_vextrins_b)
+TRANS(vextrins_h, LSX, gen_vv_i, gen_helper_vextrins_h)
+TRANS(vextrins_w, LSX, gen_vv_i, gen_helper_vextrins_w)
+TRANS(vextrins_d, LSX, gen_vv_i, gen_helper_vextrins_d)
+TRANS(xvextrins_b, LASX, gen_xx_i, gen_helper_vextrins_b)
+TRANS(xvextrins_h, LASX, gen_xx_i, gen_helper_vextrins_h)
+TRANS(xvextrins_w, LASX, gen_xx_i, gen_helper_vextrins_w)
+TRANS(xvextrins_d, LASX, gen_xx_i, gen_helper_vextrins_d)
+
+static bool trans_vld(DisasContext *ctx, arg_vr_i *a)
+{
+ TCGv addr;
+ TCGv_i64 rl, rh;
+ TCGv_i128 val;
+
+ if (!avail_LSX(ctx)) {
+ return false;
+ }
+
+ if (!check_vec(ctx, 16)) {
+ return true;
+ }
+
+ addr = gpr_src(ctx, a->rj, EXT_NONE);
+ val = tcg_temp_new_i128();
+ rl = tcg_temp_new_i64();
+ rh = tcg_temp_new_i64();
+
+ addr = make_address_i(ctx, addr, a->imm);
+
+ tcg_gen_qemu_ld_i128(val, addr, ctx->mem_idx, MO_128 | MO_TE);
+ tcg_gen_extr_i128_i64(rl, rh, val);
+ set_vreg64(rh, a->vd, 1);
+ set_vreg64(rl, a->vd, 0);
+
+ return true;
+}
+
+static bool trans_vst(DisasContext *ctx, arg_vr_i *a)
+{
+ TCGv addr;
+ TCGv_i128 val;
+ TCGv_i64 ah, al;
+
+ if (!avail_LSX(ctx)) {
+ return false;
+ }
+
+ if (!check_vec(ctx, 16)) {
+ return true;
+ }
+
+ addr = gpr_src(ctx, a->rj, EXT_NONE);
+ val = tcg_temp_new_i128();
+ ah = tcg_temp_new_i64();
+ al = tcg_temp_new_i64();
+
+ addr = make_address_i(ctx, addr, a->imm);
+
+ get_vreg64(ah, a->vd, 1);
+ get_vreg64(al, a->vd, 0);
+ tcg_gen_concat_i64_i128(val, al, ah);
+ tcg_gen_qemu_st_i128(val, addr, ctx->mem_idx, MO_128 | MO_TE);
+
+ return true;
+}
+
+static bool trans_vldx(DisasContext *ctx, arg_vrr *a)
+{
+ TCGv addr, src1, src2;
+ TCGv_i64 rl, rh;
+ TCGv_i128 val;
+
+ if (!avail_LSX(ctx)) {
+ return false;
+ }
+
+ if (!check_vec(ctx, 16)) {
+ return true;
+ }
+
+ src1 = gpr_src(ctx, a->rj, EXT_NONE);
+ src2 = gpr_src(ctx, a->rk, EXT_NONE);
+ val = tcg_temp_new_i128();
+ rl = tcg_temp_new_i64();
+ rh = tcg_temp_new_i64();
+
+ addr = make_address_x(ctx, src1, src2);
+ tcg_gen_qemu_ld_i128(val, addr, ctx->mem_idx, MO_128 | MO_TE);
+ tcg_gen_extr_i128_i64(rl, rh, val);
+ set_vreg64(rh, a->vd, 1);
+ set_vreg64(rl, a->vd, 0);
+
+ return true;
+}
+
+static bool trans_vstx(DisasContext *ctx, arg_vrr *a)
+{
+ TCGv addr, src1, src2;
+ TCGv_i64 ah, al;
+ TCGv_i128 val;
+
+ if (!avail_LSX(ctx)) {
+ return false;
+ }
+
+ if (!check_vec(ctx, 16)) {
+ return true;
+ }
+
+ src1 = gpr_src(ctx, a->rj, EXT_NONE);
+ src2 = gpr_src(ctx, a->rk, EXT_NONE);
+ val = tcg_temp_new_i128();
+ ah = tcg_temp_new_i64();
+ al = tcg_temp_new_i64();
+
+ addr = make_address_x(ctx, src1, src2);
+ get_vreg64(ah, a->vd, 1);
+ get_vreg64(al, a->vd, 0);
+ tcg_gen_concat_i64_i128(val, al, ah);
+ tcg_gen_qemu_st_i128(val, addr, ctx->mem_idx, MO_128 | MO_TE);
+
+ return true;
+}
+
+static bool do_vldrepl_vl(DisasContext *ctx, arg_vr_i *a,
+ uint32_t oprsz, MemOp mop)
+{
+ TCGv addr;
+ TCGv_i64 val;
+
+ if (!check_vec(ctx, oprsz)) {
+ return true;
+ }
+
+ addr = gpr_src(ctx, a->rj, EXT_NONE);
+ val = tcg_temp_new_i64();
+
+ addr = make_address_i(ctx, addr, a->imm);
+
+ tcg_gen_qemu_ld_i64(val, addr, ctx->mem_idx, mop);
+ tcg_gen_gvec_dup_i64(mop, vec_full_offset(a->vd), oprsz, ctx->vl / 8, val);
+
+ return true;
+}
+
+static bool do_vldrepl(DisasContext *ctx, arg_vr_i *a, MemOp mop)
+{
+ return do_vldrepl_vl(ctx, a, 16, mop);
+}
+
+static bool do_xvldrepl(DisasContext *ctx, arg_vr_i *a, MemOp mop)
+{
+ return do_vldrepl_vl(ctx, a, 32, mop);
+}
+
+TRANS(vldrepl_b, LSX, do_vldrepl, MO_8)
+TRANS(vldrepl_h, LSX, do_vldrepl, MO_16)
+TRANS(vldrepl_w, LSX, do_vldrepl, MO_32)
+TRANS(vldrepl_d, LSX, do_vldrepl, MO_64)
+TRANS(xvldrepl_b, LASX, do_xvldrepl, MO_8)
+TRANS(xvldrepl_h, LASX, do_xvldrepl, MO_16)
+TRANS(xvldrepl_w, LASX, do_xvldrepl, MO_32)
+TRANS(xvldrepl_d, LASX, do_xvldrepl, MO_64)
+
+static bool do_vstelm_vl(DisasContext *ctx,
+ arg_vr_ii *a, uint32_t oprsz, MemOp mop)
+{
+ TCGv addr;
+ TCGv_i64 val;
+
+ if (!check_vec(ctx, oprsz)) {
+ return true;
+ }
+
+ addr = gpr_src(ctx, a->rj, EXT_NONE);
+ val = tcg_temp_new_i64();
+
+ addr = make_address_i(ctx, addr, a->imm);
+ tcg_gen_ld_i64(val, tcg_env, vec_reg_offset(a->vd, a->imm2, mop));
+ tcg_gen_qemu_st_i64(val, addr, ctx->mem_idx, mop);
+ return true;
+}
+
+static bool do_vstelm(DisasContext *ctx, arg_vr_ii *a, MemOp mop)
+{
+ return do_vstelm_vl(ctx, a, 16, mop);
+}
+
+static bool do_xvstelm(DisasContext *ctx, arg_vr_ii *a, MemOp mop)
+{
+ return do_vstelm_vl(ctx, a, 32, mop);
+}
+
+TRANS(vstelm_b, LSX, do_vstelm, MO_8)
+TRANS(vstelm_h, LSX, do_vstelm, MO_16)
+TRANS(vstelm_w, LSX, do_vstelm, MO_32)
+TRANS(vstelm_d, LSX, do_vstelm, MO_64)
+TRANS(xvstelm_b, LASX, do_xvstelm, MO_8)
+TRANS(xvstelm_h, LASX, do_xvstelm, MO_16)
+TRANS(xvstelm_w, LASX, do_xvstelm, MO_32)
+TRANS(xvstelm_d, LASX, do_xvstelm, MO_64)
+
+static bool gen_lasx_memory(DisasContext *ctx, arg_vr_i *a,
+ void (*func)(DisasContext *, int, TCGv))
+{
+ TCGv addr = gpr_src(ctx, a->rj, EXT_NONE);
+ TCGv temp = NULL;
+
+ if (!check_vec(ctx, 32)) {
+ return true;
+ }
+
+ if (a->imm) {
+ temp = tcg_temp_new();
+ tcg_gen_addi_tl(temp, addr, a->imm);
+ addr = temp;
+ }
+
+ func(ctx, a->vd, addr);
+ return true;
+}
+
+static void gen_xvld(DisasContext *ctx, int vreg, TCGv addr)
+{
+ int i;
+ TCGv temp = tcg_temp_new();
+ TCGv dest = tcg_temp_new();
+
+ tcg_gen_qemu_ld_i64(dest, addr, ctx->mem_idx, MO_TEUQ);
+ set_vreg64(dest, vreg, 0);
+
+ for (i = 1; i < 4; i++) {
+ tcg_gen_addi_tl(temp, addr, 8 * i);
+ tcg_gen_qemu_ld_i64(dest, temp, ctx->mem_idx, MO_TEUQ);
+ set_vreg64(dest, vreg, i);
+ }
+}
+
+static void gen_xvst(DisasContext * ctx, int vreg, TCGv addr)
+{
+ int i;
+ TCGv temp = tcg_temp_new();
+ TCGv dest = tcg_temp_new();
+
+ get_vreg64(dest, vreg, 0);
+ tcg_gen_qemu_st_i64(dest, addr, ctx->mem_idx, MO_TEUQ);
+
+ for (i = 1; i < 4; i++) {
+ tcg_gen_addi_tl(temp, addr, 8 * i);
+ get_vreg64(dest, vreg, i);
+ tcg_gen_qemu_st_i64(dest, temp, ctx->mem_idx, MO_TEUQ);
+ }
+}
+
+TRANS(xvld, LASX, gen_lasx_memory, gen_xvld)
+TRANS(xvst, LASX, gen_lasx_memory, gen_xvst)
+
+static bool gen_lasx_memoryx(DisasContext *ctx, arg_vrr *a,
+ void (*func)(DisasContext*, int, TCGv))
+{
+ TCGv src1 = gpr_src(ctx, a->rj, EXT_NONE);
+ TCGv src2 = gpr_src(ctx, a->rk, EXT_NONE);
+ TCGv addr = tcg_temp_new();
+
+ if (!check_vec(ctx, 32)) {
+ return true;
+ }
+
+ tcg_gen_add_tl(addr, src1, src2);
+ func(ctx, a->vd, addr);
+
+ return true;
+}
+
+TRANS(xvldx, LASX, gen_lasx_memoryx, gen_xvld)
+TRANS(xvstx, LASX, gen_lasx_memoryx, gen_xvst)
--- /dev/null
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * Copyright (c) 2021 Loongson Technology Corporation Limited
+ *
+ * Helpers for IOCSR reads/writes
+ */
+
+#include "qemu/osdep.h"
+#include "cpu.h"
+#include "qemu/host-utils.h"
+#include "exec/helper-proto.h"
+#include "exec/exec-all.h"
+#include "exec/cpu_ldst.h"
+
+#define GET_MEMTXATTRS(cas) \
+ ((MemTxAttrs){.requester_id = env_cpu(cas)->cpu_index})
+
+uint64_t helper_iocsrrd_b(CPULoongArchState *env, target_ulong r_addr)
+{
+ return address_space_ldub(&env->address_space_iocsr, r_addr,
+ GET_MEMTXATTRS(env), NULL);
+}
+
+uint64_t helper_iocsrrd_h(CPULoongArchState *env, target_ulong r_addr)
+{
+ return address_space_lduw(&env->address_space_iocsr, r_addr,
+ GET_MEMTXATTRS(env), NULL);
+}
+
+uint64_t helper_iocsrrd_w(CPULoongArchState *env, target_ulong r_addr)
+{
+ return address_space_ldl(&env->address_space_iocsr, r_addr,
+ GET_MEMTXATTRS(env), NULL);
+}
+
+uint64_t helper_iocsrrd_d(CPULoongArchState *env, target_ulong r_addr)
+{
+ return address_space_ldq(&env->address_space_iocsr, r_addr,
+ GET_MEMTXATTRS(env), NULL);
+}
+
+void helper_iocsrwr_b(CPULoongArchState *env, target_ulong w_addr,
+ target_ulong val)
+{
+ address_space_stb(&env->address_space_iocsr, w_addr,
+ val, GET_MEMTXATTRS(env), NULL);
+}
+
+void helper_iocsrwr_h(CPULoongArchState *env, target_ulong w_addr,
+ target_ulong val)
+{
+ address_space_stw(&env->address_space_iocsr, w_addr,
+ val, GET_MEMTXATTRS(env), NULL);
+}
+
+void helper_iocsrwr_w(CPULoongArchState *env, target_ulong w_addr,
+ target_ulong val)
+{
+ address_space_stl(&env->address_space_iocsr, w_addr,
+ val, GET_MEMTXATTRS(env), NULL);
+}
+
+void helper_iocsrwr_d(CPULoongArchState *env, target_ulong w_addr,
+ target_ulong val)
+{
+ address_space_stq(&env->address_space_iocsr, w_addr,
+ val, GET_MEMTXATTRS(env), NULL);
+}
--- /dev/null
+if 'CONFIG_TCG' not in config_all_accel
+ subdir_done()
+endif
+
+loongarch_ss.add([zlib, gen])
+
+loongarch_ss.add(files(
+ 'fpu_helper.c',
+ 'op_helper.c',
+ 'translate.c',
+ 'vec_helper.c',
+))
+
+loongarch_system_ss.add(files(
+ 'constant_timer.c',
+ 'csr_helper.c',
+ 'iocsr_helper.c',
+ 'tlb_helper.c',
+))
--- /dev/null
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * LoongArch emulation helpers for QEMU.
+ *
+ * Copyright (c) 2021 Loongson Technology Corporation Limited
+ */
+
+#include "qemu/osdep.h"
+#include "qemu/log.h"
+#include "cpu.h"
+#include "qemu/host-utils.h"
+#include "exec/helper-proto.h"
+#include "exec/exec-all.h"
+#include "exec/cpu_ldst.h"
+#include "internals.h"
+#include "qemu/crc32c.h"
+#include <zlib.h>
+#include "cpu-csr.h"
+
+/* Exceptions helpers */
+void helper_raise_exception(CPULoongArchState *env, uint32_t exception)
+{
+ do_raise_exception(env, exception, GETPC());
+}
+
+target_ulong helper_bitrev_w(target_ulong rj)
+{
+ return (int32_t)revbit32(rj);
+}
+
+target_ulong helper_bitrev_d(target_ulong rj)
+{
+ return revbit64(rj);
+}
+
+target_ulong helper_bitswap(target_ulong v)
+{
+ v = ((v >> 1) & (target_ulong)0x5555555555555555ULL) |
+ ((v & (target_ulong)0x5555555555555555ULL) << 1);
+ v = ((v >> 2) & (target_ulong)0x3333333333333333ULL) |
+ ((v & (target_ulong)0x3333333333333333ULL) << 2);
+ v = ((v >> 4) & (target_ulong)0x0F0F0F0F0F0F0F0FULL) |
+ ((v & (target_ulong)0x0F0F0F0F0F0F0F0FULL) << 4);
+ return v;
+}
+
+/* loongarch assert op */
+void helper_asrtle_d(CPULoongArchState *env, target_ulong rj, target_ulong rk)
+{
+ if (rj > rk) {
+ env->CSR_BADV = rj;
+ do_raise_exception(env, EXCCODE_BCE, GETPC());
+ }
+}
+
+void helper_asrtgt_d(CPULoongArchState *env, target_ulong rj, target_ulong rk)
+{
+ if (rj <= rk) {
+ env->CSR_BADV = rj;
+ do_raise_exception(env, EXCCODE_BCE, GETPC());
+ }
+}
+
+target_ulong helper_crc32(target_ulong val, target_ulong m, uint64_t sz)
+{
+ uint8_t buf[8];
+ target_ulong mask = ((sz * 8) == 64) ? -1ULL : ((1ULL << (sz * 8)) - 1);
+
+ m &= mask;
+ stq_le_p(buf, m);
+ return (int32_t) (crc32(val ^ 0xffffffff, buf, sz) ^ 0xffffffff);
+}
+
+target_ulong helper_crc32c(target_ulong val, target_ulong m, uint64_t sz)
+{
+ uint8_t buf[8];
+ target_ulong mask = ((sz * 8) == 64) ? -1ULL : ((1ULL << (sz * 8)) - 1);
+ m &= mask;
+ stq_le_p(buf, m);
+ return (int32_t) (crc32c(val, buf, sz) ^ 0xffffffff);
+}
+
+target_ulong helper_cpucfg(CPULoongArchState *env, target_ulong rj)
+{
+ return rj >= ARRAY_SIZE(env->cpucfg) ? 0 : env->cpucfg[rj];
+}
+
+uint64_t helper_rdtime_d(CPULoongArchState *env)
+{
+#ifdef CONFIG_USER_ONLY
+ return cpu_get_host_ticks();
+#else
+ uint64_t plv;
+ LoongArchCPU *cpu = env_archcpu(env);
+
+ plv = FIELD_EX64(env->CSR_CRMD, CSR_CRMD, PLV);
+ if (extract64(env->CSR_MISC, R_CSR_MISC_DRDTL_SHIFT + plv, 1)) {
+ do_raise_exception(env, EXCCODE_IPE, GETPC());
+ }
+
+ return cpu_loongarch_get_constant_timer_counter(cpu);
+#endif
+}
+
+#ifndef CONFIG_USER_ONLY
+void helper_ertn(CPULoongArchState *env)
+{
+ uint64_t csr_pplv, csr_pie;
+ if (FIELD_EX64(env->CSR_TLBRERA, CSR_TLBRERA, ISTLBR)) {
+ csr_pplv = FIELD_EX64(env->CSR_TLBRPRMD, CSR_TLBRPRMD, PPLV);
+ csr_pie = FIELD_EX64(env->CSR_TLBRPRMD, CSR_TLBRPRMD, PIE);
+
+ env->CSR_TLBRERA = FIELD_DP64(env->CSR_TLBRERA, CSR_TLBRERA, ISTLBR, 0);
+ env->CSR_CRMD = FIELD_DP64(env->CSR_CRMD, CSR_CRMD, DA, 0);
+ env->CSR_CRMD = FIELD_DP64(env->CSR_CRMD, CSR_CRMD, PG, 1);
+ set_pc(env, env->CSR_TLBRERA);
+ qemu_log_mask(CPU_LOG_INT, "%s: TLBRERA " TARGET_FMT_lx "\n",
+ __func__, env->CSR_TLBRERA);
+ } else {
+ csr_pplv = FIELD_EX64(env->CSR_PRMD, CSR_PRMD, PPLV);
+ csr_pie = FIELD_EX64(env->CSR_PRMD, CSR_PRMD, PIE);
+
+ set_pc(env, env->CSR_ERA);
+ qemu_log_mask(CPU_LOG_INT, "%s: ERA " TARGET_FMT_lx "\n",
+ __func__, env->CSR_ERA);
+ }
+ env->CSR_CRMD = FIELD_DP64(env->CSR_CRMD, CSR_CRMD, PLV, csr_pplv);
+ env->CSR_CRMD = FIELD_DP64(env->CSR_CRMD, CSR_CRMD, IE, csr_pie);
+
+ env->lladdr = 1;
+}
+
+void helper_idle(CPULoongArchState *env)
+{
+ CPUState *cs = env_cpu(env);
+
+ cs->halted = 1;
+ do_raise_exception(env, EXCP_HLT, 0);
+}
+#endif
--- /dev/null
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * QEMU LoongArch TLB helpers
+ *
+ * Copyright (c) 2021 Loongson Technology Corporation Limited
+ *
+ */
+
+#include "qemu/osdep.h"
+#include "qemu/guest-random.h"
+
+#include "cpu.h"
+#include "internals.h"
+#include "exec/helper-proto.h"
+#include "exec/exec-all.h"
+#include "exec/cpu_ldst.h"
+#include "exec/log.h"
+#include "cpu-csr.h"
+
+enum {
+ TLBRET_MATCH = 0,
+ TLBRET_BADADDR = 1,
+ TLBRET_NOMATCH = 2,
+ TLBRET_INVALID = 3,
+ TLBRET_DIRTY = 4,
+ TLBRET_RI = 5,
+ TLBRET_XI = 6,
+ TLBRET_PE = 7,
+};
+
+static int loongarch_map_tlb_entry(CPULoongArchState *env, hwaddr *physical,
+ int *prot, target_ulong address,
+ int access_type, int index, int mmu_idx)
+{
+ LoongArchTLB *tlb = &env->tlb[index];
+ uint64_t plv = mmu_idx;
+ uint64_t tlb_entry, tlb_ppn;
+ uint8_t tlb_ps, n, tlb_v, tlb_d, tlb_plv, tlb_nx, tlb_nr, tlb_rplv;
+
+ if (index >= LOONGARCH_STLB) {
+ tlb_ps = FIELD_EX64(tlb->tlb_misc, TLB_MISC, PS);
+ } else {
+ tlb_ps = FIELD_EX64(env->CSR_STLBPS, CSR_STLBPS, PS);
+ }
+ n = (address >> tlb_ps) & 0x1;/* Odd or even */
+
+ tlb_entry = n ? tlb->tlb_entry1 : tlb->tlb_entry0;
+ tlb_v = FIELD_EX64(tlb_entry, TLBENTRY, V);
+ tlb_d = FIELD_EX64(tlb_entry, TLBENTRY, D);
+ tlb_plv = FIELD_EX64(tlb_entry, TLBENTRY, PLV);
+ if (is_la64(env)) {
+ tlb_ppn = FIELD_EX64(tlb_entry, TLBENTRY_64, PPN);
+ tlb_nx = FIELD_EX64(tlb_entry, TLBENTRY_64, NX);
+ tlb_nr = FIELD_EX64(tlb_entry, TLBENTRY_64, NR);
+ tlb_rplv = FIELD_EX64(tlb_entry, TLBENTRY_64, RPLV);
+ } else {
+ tlb_ppn = FIELD_EX64(tlb_entry, TLBENTRY_32, PPN);
+ tlb_nx = 0;
+ tlb_nr = 0;
+ tlb_rplv = 0;
+ }
+
+ /* Remove sw bit between bit12 -- bit PS*/
+ tlb_ppn = tlb_ppn & ~(((0x1UL << (tlb_ps - 12)) -1));
+
+ /* Check access rights */
+ if (!tlb_v) {
+ return TLBRET_INVALID;
+ }
+
+ if (access_type == MMU_INST_FETCH && tlb_nx) {
+ return TLBRET_XI;
+ }
+
+ if (access_type == MMU_DATA_LOAD && tlb_nr) {
+ return TLBRET_RI;
+ }
+
+ if (((tlb_rplv == 0) && (plv > tlb_plv)) ||
+ ((tlb_rplv == 1) && (plv != tlb_plv))) {
+ return TLBRET_PE;
+ }
+
+ if ((access_type == MMU_DATA_STORE) && !tlb_d) {
+ return TLBRET_DIRTY;
+ }
+
+ *physical = (tlb_ppn << R_TLBENTRY_64_PPN_SHIFT) |
+ (address & MAKE_64BIT_MASK(0, tlb_ps));
+ *prot = PAGE_READ;
+ if (tlb_d) {
+ *prot |= PAGE_WRITE;
+ }
+ if (!tlb_nx) {
+ *prot |= PAGE_EXEC;
+ }
+ return TLBRET_MATCH;
+}
+
+/*
+ * One tlb entry holds an adjacent odd/even pair, the vpn is the
+ * content of the virtual page number divided by 2. So the
+ * compare vpn is bit[47:15] for 16KiB page. while the vppn
+ * field in tlb entry contains bit[47:13], so need adjust.
+ * virt_vpn = vaddr[47:13]
+ */
+static bool loongarch_tlb_search(CPULoongArchState *env, target_ulong vaddr,
+ int *index)
+{
+ LoongArchTLB *tlb;
+ uint16_t csr_asid, tlb_asid, stlb_idx;
+ uint8_t tlb_e, tlb_ps, tlb_g, stlb_ps;
+ int i, compare_shift;
+ uint64_t vpn, tlb_vppn;
+
+ csr_asid = FIELD_EX64(env->CSR_ASID, CSR_ASID, ASID);
+ stlb_ps = FIELD_EX64(env->CSR_STLBPS, CSR_STLBPS, PS);
+ vpn = (vaddr & TARGET_VIRT_MASK) >> (stlb_ps + 1);
+ stlb_idx = vpn & 0xff; /* VA[25:15] <==> TLBIDX.index for 16KiB Page */
+ compare_shift = stlb_ps + 1 - R_TLB_MISC_VPPN_SHIFT;
+
+ /* Search STLB */
+ for (i = 0; i < 8; ++i) {
+ tlb = &env->tlb[i * 256 + stlb_idx];
+ tlb_e = FIELD_EX64(tlb->tlb_misc, TLB_MISC, E);
+ if (tlb_e) {
+ tlb_vppn = FIELD_EX64(tlb->tlb_misc, TLB_MISC, VPPN);
+ tlb_asid = FIELD_EX64(tlb->tlb_misc, TLB_MISC, ASID);
+ tlb_g = FIELD_EX64(tlb->tlb_entry0, TLBENTRY, G);
+
+ if ((tlb_g == 1 || tlb_asid == csr_asid) &&
+ (vpn == (tlb_vppn >> compare_shift))) {
+ *index = i * 256 + stlb_idx;
+ return true;
+ }
+ }
+ }
+
+ /* Search MTLB */
+ for (i = LOONGARCH_STLB; i < LOONGARCH_TLB_MAX; ++i) {
+ tlb = &env->tlb[i];
+ tlb_e = FIELD_EX64(tlb->tlb_misc, TLB_MISC, E);
+ if (tlb_e) {
+ tlb_vppn = FIELD_EX64(tlb->tlb_misc, TLB_MISC, VPPN);
+ tlb_ps = FIELD_EX64(tlb->tlb_misc, TLB_MISC, PS);
+ tlb_asid = FIELD_EX64(tlb->tlb_misc, TLB_MISC, ASID);
+ tlb_g = FIELD_EX64(tlb->tlb_entry0, TLBENTRY, G);
+ compare_shift = tlb_ps + 1 - R_TLB_MISC_VPPN_SHIFT;
+ vpn = (vaddr & TARGET_VIRT_MASK) >> (tlb_ps + 1);
+ if ((tlb_g == 1 || tlb_asid == csr_asid) &&
+ (vpn == (tlb_vppn >> compare_shift))) {
+ *index = i;
+ return true;
+ }
+ }
+ }
+ return false;
+}
+
+static int loongarch_map_address(CPULoongArchState *env, hwaddr *physical,
+ int *prot, target_ulong address,
+ MMUAccessType access_type, int mmu_idx)
+{
+ int index, match;
+
+ match = loongarch_tlb_search(env, address, &index);
+ if (match) {
+ return loongarch_map_tlb_entry(env, physical, prot,
+ address, access_type, index, mmu_idx);
+ }
+
+ return TLBRET_NOMATCH;
+}
+
+static hwaddr dmw_va2pa(CPULoongArchState *env, target_ulong va,
+ target_ulong dmw)
+{
+ if (is_la64(env)) {
+ return va & TARGET_VIRT_MASK;
+ } else {
+ uint32_t pseg = FIELD_EX32(dmw, CSR_DMW_32, PSEG);
+ return (va & MAKE_64BIT_MASK(0, R_CSR_DMW_32_VSEG_SHIFT)) | \
+ (pseg << R_CSR_DMW_32_VSEG_SHIFT);
+ }
+}
+
+static int get_physical_address(CPULoongArchState *env, hwaddr *physical,
+ int *prot, target_ulong address,
+ MMUAccessType access_type, int mmu_idx)
+{
+ int user_mode = mmu_idx == MMU_IDX_USER;
+ int kernel_mode = mmu_idx == MMU_IDX_KERNEL;
+ uint32_t plv, base_c, base_v;
+ int64_t addr_high;
+ uint8_t da = FIELD_EX64(env->CSR_CRMD, CSR_CRMD, DA);
+ uint8_t pg = FIELD_EX64(env->CSR_CRMD, CSR_CRMD, PG);
+
+ /* Check PG and DA */
+ if (da & !pg) {
+ *physical = address & TARGET_PHYS_MASK;
+ *prot = PAGE_READ | PAGE_WRITE | PAGE_EXEC;
+ return TLBRET_MATCH;
+ }
+
+ plv = kernel_mode | (user_mode << R_CSR_DMW_PLV3_SHIFT);
+ if (is_la64(env)) {
+ base_v = address >> R_CSR_DMW_64_VSEG_SHIFT;
+ } else {
+ base_v = address >> R_CSR_DMW_32_VSEG_SHIFT;
+ }
+ /* Check direct map window */
+ for (int i = 0; i < 4; i++) {
+ if (is_la64(env)) {
+ base_c = FIELD_EX64(env->CSR_DMW[i], CSR_DMW_64, VSEG);
+ } else {
+ base_c = FIELD_EX64(env->CSR_DMW[i], CSR_DMW_32, VSEG);
+ }
+ if ((plv & env->CSR_DMW[i]) && (base_c == base_v)) {
+ *physical = dmw_va2pa(env, address, env->CSR_DMW[i]);
+ *prot = PAGE_READ | PAGE_WRITE | PAGE_EXEC;
+ return TLBRET_MATCH;
+ }
+ }
+
+ /* Check valid extension */
+ addr_high = sextract64(address, TARGET_VIRT_ADDR_SPACE_BITS, 16);
+ if (!(addr_high == 0 || addr_high == -1)) {
+ return TLBRET_BADADDR;
+ }
+
+ /* Mapped address */
+ return loongarch_map_address(env, physical, prot, address,
+ access_type, mmu_idx);
+}
+
+hwaddr loongarch_cpu_get_phys_page_debug(CPUState *cs, vaddr addr)
+{
+ LoongArchCPU *cpu = LOONGARCH_CPU(cs);
+ CPULoongArchState *env = &cpu->env;
+ hwaddr phys_addr;
+ int prot;
+
+ if (get_physical_address(env, &phys_addr, &prot, addr, MMU_DATA_LOAD,
+ cpu_mmu_index(env, false)) != 0) {
+ return -1;
+ }
+ return phys_addr;
+}
+
+static void raise_mmu_exception(CPULoongArchState *env, target_ulong address,
+ MMUAccessType access_type, int tlb_error)
+{
+ CPUState *cs = env_cpu(env);
+
+ switch (tlb_error) {
+ default:
+ case TLBRET_BADADDR:
+ cs->exception_index = access_type == MMU_INST_FETCH
+ ? EXCCODE_ADEF : EXCCODE_ADEM;
+ break;
+ case TLBRET_NOMATCH:
+ /* No TLB match for a mapped address */
+ if (access_type == MMU_DATA_LOAD) {
+ cs->exception_index = EXCCODE_PIL;
+ } else if (access_type == MMU_DATA_STORE) {
+ cs->exception_index = EXCCODE_PIS;
+ } else if (access_type == MMU_INST_FETCH) {
+ cs->exception_index = EXCCODE_PIF;
+ }
+ env->CSR_TLBRERA = FIELD_DP64(env->CSR_TLBRERA, CSR_TLBRERA, ISTLBR, 1);
+ break;
+ case TLBRET_INVALID:
+ /* TLB match with no valid bit */
+ if (access_type == MMU_DATA_LOAD) {
+ cs->exception_index = EXCCODE_PIL;
+ } else if (access_type == MMU_DATA_STORE) {
+ cs->exception_index = EXCCODE_PIS;
+ } else if (access_type == MMU_INST_FETCH) {
+ cs->exception_index = EXCCODE_PIF;
+ }
+ break;
+ case TLBRET_DIRTY:
+ /* TLB match but 'D' bit is cleared */
+ cs->exception_index = EXCCODE_PME;
+ break;
+ case TLBRET_XI:
+ /* Execute-Inhibit Exception */
+ cs->exception_index = EXCCODE_PNX;
+ break;
+ case TLBRET_RI:
+ /* Read-Inhibit Exception */
+ cs->exception_index = EXCCODE_PNR;
+ break;
+ case TLBRET_PE:
+ /* Privileged Exception */
+ cs->exception_index = EXCCODE_PPI;
+ break;
+ }
+
+ if (tlb_error == TLBRET_NOMATCH) {
+ env->CSR_TLBRBADV = address;
+ if (is_la64(env)) {
+ env->CSR_TLBREHI = FIELD_DP64(env->CSR_TLBREHI, CSR_TLBREHI_64,
+ VPPN, extract64(address, 13, 35));
+ } else {
+ env->CSR_TLBREHI = FIELD_DP64(env->CSR_TLBREHI, CSR_TLBREHI_32,
+ VPPN, extract64(address, 13, 19));
+ }
+ } else {
+ if (!FIELD_EX64(env->CSR_DBG, CSR_DBG, DST)) {
+ env->CSR_BADV = address;
+ }
+ env->CSR_TLBEHI = address & (TARGET_PAGE_MASK << 1);
+ }
+}
+
+static void invalidate_tlb_entry(CPULoongArchState *env, int index)
+{
+ target_ulong addr, mask, pagesize;
+ uint8_t tlb_ps;
+ LoongArchTLB *tlb = &env->tlb[index];
+
+ int mmu_idx = cpu_mmu_index(env, false);
+ uint8_t tlb_v0 = FIELD_EX64(tlb->tlb_entry0, TLBENTRY, V);
+ uint8_t tlb_v1 = FIELD_EX64(tlb->tlb_entry1, TLBENTRY, V);
+ uint64_t tlb_vppn = FIELD_EX64(tlb->tlb_misc, TLB_MISC, VPPN);
+
+ if (index >= LOONGARCH_STLB) {
+ tlb_ps = FIELD_EX64(tlb->tlb_misc, TLB_MISC, PS);
+ } else {
+ tlb_ps = FIELD_EX64(env->CSR_STLBPS, CSR_STLBPS, PS);
+ }
+ pagesize = MAKE_64BIT_MASK(tlb_ps, 1);
+ mask = MAKE_64BIT_MASK(0, tlb_ps + 1);
+
+ if (tlb_v0) {
+ addr = (tlb_vppn << R_TLB_MISC_VPPN_SHIFT) & ~mask; /* even */
+ tlb_flush_range_by_mmuidx(env_cpu(env), addr, pagesize,
+ mmu_idx, TARGET_LONG_BITS);
+ }
+
+ if (tlb_v1) {
+ addr = (tlb_vppn << R_TLB_MISC_VPPN_SHIFT) & pagesize; /* odd */
+ tlb_flush_range_by_mmuidx(env_cpu(env), addr, pagesize,
+ mmu_idx, TARGET_LONG_BITS);
+ }
+}
+
+static void invalidate_tlb(CPULoongArchState *env, int index)
+{
+ LoongArchTLB *tlb;
+ uint16_t csr_asid, tlb_asid, tlb_g;
+
+ csr_asid = FIELD_EX64(env->CSR_ASID, CSR_ASID, ASID);
+ tlb = &env->tlb[index];
+ tlb_asid = FIELD_EX64(tlb->tlb_misc, TLB_MISC, ASID);
+ tlb_g = FIELD_EX64(tlb->tlb_entry0, TLBENTRY, G);
+ if (tlb_g == 0 && tlb_asid != csr_asid) {
+ return;
+ }
+ invalidate_tlb_entry(env, index);
+}
+
+static void fill_tlb_entry(CPULoongArchState *env, int index)
+{
+ LoongArchTLB *tlb = &env->tlb[index];
+ uint64_t lo0, lo1, csr_vppn;
+ uint16_t csr_asid;
+ uint8_t csr_ps;
+
+ if (FIELD_EX64(env->CSR_TLBRERA, CSR_TLBRERA, ISTLBR)) {
+ csr_ps = FIELD_EX64(env->CSR_TLBREHI, CSR_TLBREHI, PS);
+ if (is_la64(env)) {
+ csr_vppn = FIELD_EX64(env->CSR_TLBREHI, CSR_TLBREHI_64, VPPN);
+ } else {
+ csr_vppn = FIELD_EX64(env->CSR_TLBREHI, CSR_TLBREHI_32, VPPN);
+ }
+ lo0 = env->CSR_TLBRELO0;
+ lo1 = env->CSR_TLBRELO1;
+ } else {
+ csr_ps = FIELD_EX64(env->CSR_TLBIDX, CSR_TLBIDX, PS);
+ if (is_la64(env)) {
+ csr_vppn = FIELD_EX64(env->CSR_TLBEHI, CSR_TLBEHI_64, VPPN);
+ } else {
+ csr_vppn = FIELD_EX64(env->CSR_TLBEHI, CSR_TLBEHI_32, VPPN);
+ }
+ lo0 = env->CSR_TLBELO0;
+ lo1 = env->CSR_TLBELO1;
+ }
+
+ if (csr_ps == 0) {
+ qemu_log_mask(CPU_LOG_MMU, "page size is 0\n");
+ }
+
+ /* Only MTLB has the ps fields */
+ if (index >= LOONGARCH_STLB) {
+ tlb->tlb_misc = FIELD_DP64(tlb->tlb_misc, TLB_MISC, PS, csr_ps);
+ }
+
+ tlb->tlb_misc = FIELD_DP64(tlb->tlb_misc, TLB_MISC, VPPN, csr_vppn);
+ tlb->tlb_misc = FIELD_DP64(tlb->tlb_misc, TLB_MISC, E, 1);
+ csr_asid = FIELD_EX64(env->CSR_ASID, CSR_ASID, ASID);
+ tlb->tlb_misc = FIELD_DP64(tlb->tlb_misc, TLB_MISC, ASID, csr_asid);
+
+ tlb->tlb_entry0 = lo0;
+ tlb->tlb_entry1 = lo1;
+}
+
+/* Return an random value between low and high */
+static uint32_t get_random_tlb(uint32_t low, uint32_t high)
+{
+ uint32_t val;
+
+ qemu_guest_getrandom_nofail(&val, sizeof(val));
+ return val % (high - low + 1) + low;
+}
+
+void helper_tlbsrch(CPULoongArchState *env)
+{
+ int index, match;
+
+ if (FIELD_EX64(env->CSR_TLBRERA, CSR_TLBRERA, ISTLBR)) {
+ match = loongarch_tlb_search(env, env->CSR_TLBREHI, &index);
+ } else {
+ match = loongarch_tlb_search(env, env->CSR_TLBEHI, &index);
+ }
+
+ if (match) {
+ env->CSR_TLBIDX = FIELD_DP64(env->CSR_TLBIDX, CSR_TLBIDX, INDEX, index);
+ env->CSR_TLBIDX = FIELD_DP64(env->CSR_TLBIDX, CSR_TLBIDX, NE, 0);
+ return;
+ }
+
+ env->CSR_TLBIDX = FIELD_DP64(env->CSR_TLBIDX, CSR_TLBIDX, NE, 1);
+}
+
+void helper_tlbrd(CPULoongArchState *env)
+{
+ LoongArchTLB *tlb;
+ int index;
+ uint8_t tlb_ps, tlb_e;
+
+ index = FIELD_EX64(env->CSR_TLBIDX, CSR_TLBIDX, INDEX);
+ tlb = &env->tlb[index];
+
+ if (index >= LOONGARCH_STLB) {
+ tlb_ps = FIELD_EX64(tlb->tlb_misc, TLB_MISC, PS);
+ } else {
+ tlb_ps = FIELD_EX64(env->CSR_STLBPS, CSR_STLBPS, PS);
+ }
+ tlb_e = FIELD_EX64(tlb->tlb_misc, TLB_MISC, E);
+
+ if (!tlb_e) {
+ /* Invalid TLB entry */
+ env->CSR_TLBIDX = FIELD_DP64(env->CSR_TLBIDX, CSR_TLBIDX, NE, 1);
+ env->CSR_ASID = FIELD_DP64(env->CSR_ASID, CSR_ASID, ASID, 0);
+ env->CSR_TLBEHI = 0;
+ env->CSR_TLBELO0 = 0;
+ env->CSR_TLBELO1 = 0;
+ env->CSR_TLBIDX = FIELD_DP64(env->CSR_TLBIDX, CSR_TLBIDX, PS, 0);
+ } else {
+ /* Valid TLB entry */
+ env->CSR_TLBIDX = FIELD_DP64(env->CSR_TLBIDX, CSR_TLBIDX, NE, 0);
+ env->CSR_TLBIDX = FIELD_DP64(env->CSR_TLBIDX, CSR_TLBIDX,
+ PS, (tlb_ps & 0x3f));
+ env->CSR_TLBEHI = FIELD_EX64(tlb->tlb_misc, TLB_MISC, VPPN) <<
+ R_TLB_MISC_VPPN_SHIFT;
+ env->CSR_TLBELO0 = tlb->tlb_entry0;
+ env->CSR_TLBELO1 = tlb->tlb_entry1;
+ }
+}
+
+void helper_tlbwr(CPULoongArchState *env)
+{
+ int index = FIELD_EX64(env->CSR_TLBIDX, CSR_TLBIDX, INDEX);
+
+ invalidate_tlb(env, index);
+
+ if (FIELD_EX64(env->CSR_TLBIDX, CSR_TLBIDX, NE)) {
+ env->tlb[index].tlb_misc = FIELD_DP64(env->tlb[index].tlb_misc,
+ TLB_MISC, E, 0);
+ return;
+ }
+
+ fill_tlb_entry(env, index);
+}
+
+void helper_tlbfill(CPULoongArchState *env)
+{
+ uint64_t address, entryhi;
+ int index, set, stlb_idx;
+ uint16_t pagesize, stlb_ps;
+
+ if (FIELD_EX64(env->CSR_TLBRERA, CSR_TLBRERA, ISTLBR)) {
+ entryhi = env->CSR_TLBREHI;
+ pagesize = FIELD_EX64(env->CSR_TLBREHI, CSR_TLBREHI, PS);
+ } else {
+ entryhi = env->CSR_TLBEHI;
+ pagesize = FIELD_EX64(env->CSR_TLBIDX, CSR_TLBIDX, PS);
+ }
+
+ stlb_ps = FIELD_EX64(env->CSR_STLBPS, CSR_STLBPS, PS);
+
+ if (pagesize == stlb_ps) {
+ /* Only write into STLB bits [47:13] */
+ address = entryhi & ~MAKE_64BIT_MASK(0, R_CSR_TLBEHI_64_VPPN_SHIFT);
+
+ /* Choose one set ramdomly */
+ set = get_random_tlb(0, 7);
+
+ /* Index in one set */
+ stlb_idx = (address >> (stlb_ps + 1)) & 0xff; /* [0,255] */
+
+ index = set * 256 + stlb_idx;
+ } else {
+ /* Only write into MTLB */
+ index = get_random_tlb(LOONGARCH_STLB, LOONGARCH_TLB_MAX - 1);
+ }
+
+ invalidate_tlb(env, index);
+ fill_tlb_entry(env, index);
+}
+
+void helper_tlbclr(CPULoongArchState *env)
+{
+ LoongArchTLB *tlb;
+ int i, index;
+ uint16_t csr_asid, tlb_asid, tlb_g;
+
+ csr_asid = FIELD_EX64(env->CSR_ASID, CSR_ASID, ASID);
+ index = FIELD_EX64(env->CSR_TLBIDX, CSR_TLBIDX, INDEX);
+
+ if (index < LOONGARCH_STLB) {
+ /* STLB. One line per operation */
+ for (i = 0; i < 8; i++) {
+ tlb = &env->tlb[i * 256 + (index % 256)];
+ tlb_asid = FIELD_EX64(tlb->tlb_misc, TLB_MISC, ASID);
+ tlb_g = FIELD_EX64(tlb->tlb_entry0, TLBENTRY, G);
+ if (!tlb_g && tlb_asid == csr_asid) {
+ tlb->tlb_misc = FIELD_DP64(tlb->tlb_misc, TLB_MISC, E, 0);
+ }
+ }
+ } else if (index < LOONGARCH_TLB_MAX) {
+ /* All MTLB entries */
+ for (i = LOONGARCH_STLB; i < LOONGARCH_TLB_MAX; i++) {
+ tlb = &env->tlb[i];
+ tlb_asid = FIELD_EX64(tlb->tlb_misc, TLB_MISC, ASID);
+ tlb_g = FIELD_EX64(tlb->tlb_entry0, TLBENTRY, G);
+ if (!tlb_g && tlb_asid == csr_asid) {
+ tlb->tlb_misc = FIELD_DP64(tlb->tlb_misc, TLB_MISC, E, 0);
+ }
+ }
+ }
+
+ tlb_flush(env_cpu(env));
+}
+
+void helper_tlbflush(CPULoongArchState *env)
+{
+ int i, index;
+
+ index = FIELD_EX64(env->CSR_TLBIDX, CSR_TLBIDX, INDEX);
+
+ if (index < LOONGARCH_STLB) {
+ /* STLB. One line per operation */
+ for (i = 0; i < 8; i++) {
+ int s_idx = i * 256 + (index % 256);
+ env->tlb[s_idx].tlb_misc = FIELD_DP64(env->tlb[s_idx].tlb_misc,
+ TLB_MISC, E, 0);
+ }
+ } else if (index < LOONGARCH_TLB_MAX) {
+ /* All MTLB entries */
+ for (i = LOONGARCH_STLB; i < LOONGARCH_TLB_MAX; i++) {
+ env->tlb[i].tlb_misc = FIELD_DP64(env->tlb[i].tlb_misc,
+ TLB_MISC, E, 0);
+ }
+ }
+
+ tlb_flush(env_cpu(env));
+}
+
+void helper_invtlb_all(CPULoongArchState *env)
+{
+ for (int i = 0; i < LOONGARCH_TLB_MAX; i++) {
+ env->tlb[i].tlb_misc = FIELD_DP64(env->tlb[i].tlb_misc,
+ TLB_MISC, E, 0);
+ }
+ tlb_flush(env_cpu(env));
+}
+
+void helper_invtlb_all_g(CPULoongArchState *env, uint32_t g)
+{
+ for (int i = 0; i < LOONGARCH_TLB_MAX; i++) {
+ LoongArchTLB *tlb = &env->tlb[i];
+ uint8_t tlb_g = FIELD_EX64(tlb->tlb_entry0, TLBENTRY, G);
+
+ if (tlb_g == g) {
+ tlb->tlb_misc = FIELD_DP64(tlb->tlb_misc, TLB_MISC, E, 0);
+ }
+ }
+ tlb_flush(env_cpu(env));
+}
+
+void helper_invtlb_all_asid(CPULoongArchState *env, target_ulong info)
+{
+ uint16_t asid = info & R_CSR_ASID_ASID_MASK;
+
+ for (int i = 0; i < LOONGARCH_TLB_MAX; i++) {
+ LoongArchTLB *tlb = &env->tlb[i];
+ uint8_t tlb_g = FIELD_EX64(tlb->tlb_entry0, TLBENTRY, G);
+ uint16_t tlb_asid = FIELD_EX64(tlb->tlb_misc, TLB_MISC, ASID);
+
+ if (!tlb_g && (tlb_asid == asid)) {
+ tlb->tlb_misc = FIELD_DP64(tlb->tlb_misc, TLB_MISC, E, 0);
+ }
+ }
+ tlb_flush(env_cpu(env));
+}
+
+void helper_invtlb_page_asid(CPULoongArchState *env, target_ulong info,
+ target_ulong addr)
+{
+ uint16_t asid = info & 0x3ff;
+
+ for (int i = 0; i < LOONGARCH_TLB_MAX; i++) {
+ LoongArchTLB *tlb = &env->tlb[i];
+ uint8_t tlb_g = FIELD_EX64(tlb->tlb_entry0, TLBENTRY, G);
+ uint16_t tlb_asid = FIELD_EX64(tlb->tlb_misc, TLB_MISC, ASID);
+ uint64_t vpn, tlb_vppn;
+ uint8_t tlb_ps, compare_shift;
+
+ if (i >= LOONGARCH_STLB) {
+ tlb_ps = FIELD_EX64(tlb->tlb_misc, TLB_MISC, PS);
+ } else {
+ tlb_ps = FIELD_EX64(env->CSR_STLBPS, CSR_STLBPS, PS);
+ }
+ tlb_vppn = FIELD_EX64(tlb->tlb_misc, TLB_MISC, VPPN);
+ vpn = (addr & TARGET_VIRT_MASK) >> (tlb_ps + 1);
+ compare_shift = tlb_ps + 1 - R_TLB_MISC_VPPN_SHIFT;
+
+ if (!tlb_g && (tlb_asid == asid) &&
+ (vpn == (tlb_vppn >> compare_shift))) {
+ tlb->tlb_misc = FIELD_DP64(tlb->tlb_misc, TLB_MISC, E, 0);
+ }
+ }
+ tlb_flush(env_cpu(env));
+}
+
+void helper_invtlb_page_asid_or_g(CPULoongArchState *env,
+ target_ulong info, target_ulong addr)
+{
+ uint16_t asid = info & 0x3ff;
+
+ for (int i = 0; i < LOONGARCH_TLB_MAX; i++) {
+ LoongArchTLB *tlb = &env->tlb[i];
+ uint8_t tlb_g = FIELD_EX64(tlb->tlb_entry0, TLBENTRY, G);
+ uint16_t tlb_asid = FIELD_EX64(tlb->tlb_misc, TLB_MISC, ASID);
+ uint64_t vpn, tlb_vppn;
+ uint8_t tlb_ps, compare_shift;
+
+ if (i >= LOONGARCH_STLB) {
+ tlb_ps = FIELD_EX64(tlb->tlb_misc, TLB_MISC, PS);
+ } else {
+ tlb_ps = FIELD_EX64(env->CSR_STLBPS, CSR_STLBPS, PS);
+ }
+ tlb_vppn = FIELD_EX64(tlb->tlb_misc, TLB_MISC, VPPN);
+ vpn = (addr & TARGET_VIRT_MASK) >> (tlb_ps + 1);
+ compare_shift = tlb_ps + 1 - R_TLB_MISC_VPPN_SHIFT;
+
+ if ((tlb_g || (tlb_asid == asid)) &&
+ (vpn == (tlb_vppn >> compare_shift))) {
+ tlb->tlb_misc = FIELD_DP64(tlb->tlb_misc, TLB_MISC, E, 0);
+ }
+ }
+ tlb_flush(env_cpu(env));
+}
+
+bool loongarch_cpu_tlb_fill(CPUState *cs, vaddr address, int size,
+ MMUAccessType access_type, int mmu_idx,
+ bool probe, uintptr_t retaddr)
+{
+ LoongArchCPU *cpu = LOONGARCH_CPU(cs);
+ CPULoongArchState *env = &cpu->env;
+ hwaddr physical;
+ int prot;
+ int ret;
+
+ /* Data access */
+ ret = get_physical_address(env, &physical, &prot, address,
+ access_type, mmu_idx);
+
+ if (ret == TLBRET_MATCH) {
+ tlb_set_page(cs, address & TARGET_PAGE_MASK,
+ physical & TARGET_PAGE_MASK, prot,
+ mmu_idx, TARGET_PAGE_SIZE);
+ qemu_log_mask(CPU_LOG_MMU,
+ "%s address=%" VADDR_PRIx " physical " HWADDR_FMT_plx
+ " prot %d\n", __func__, address, physical, prot);
+ return true;
+ } else {
+ qemu_log_mask(CPU_LOG_MMU,
+ "%s address=%" VADDR_PRIx " ret %d\n", __func__, address,
+ ret);
+ }
+ if (probe) {
+ return false;
+ }
+ raise_mmu_exception(env, address, access_type, ret);
+ cpu_loop_exit_restore(cs, retaddr);
+}
+
+target_ulong helper_lddir(CPULoongArchState *env, target_ulong base,
+ target_ulong level, uint32_t mem_idx)
+{
+ CPUState *cs = env_cpu(env);
+ target_ulong badvaddr, index, phys, ret;
+ int shift;
+ uint64_t dir_base, dir_width;
+ bool huge = (base >> LOONGARCH_PAGE_HUGE_SHIFT) & 0x1;
+
+ badvaddr = env->CSR_TLBRBADV;
+ base = base & TARGET_PHYS_MASK;
+
+ /* 0:64bit, 1:128bit, 2:192bit, 3:256bit */
+ shift = FIELD_EX64(env->CSR_PWCL, CSR_PWCL, PTEWIDTH);
+ shift = (shift + 1) * 3;
+
+ if (huge) {
+ return base;
+ }
+ switch (level) {
+ case 1:
+ dir_base = FIELD_EX64(env->CSR_PWCL, CSR_PWCL, DIR1_BASE);
+ dir_width = FIELD_EX64(env->CSR_PWCL, CSR_PWCL, DIR1_WIDTH);
+ break;
+ case 2:
+ dir_base = FIELD_EX64(env->CSR_PWCL, CSR_PWCL, DIR2_BASE);
+ dir_width = FIELD_EX64(env->CSR_PWCL, CSR_PWCL, DIR2_WIDTH);
+ break;
+ case 3:
+ dir_base = FIELD_EX64(env->CSR_PWCH, CSR_PWCH, DIR3_BASE);
+ dir_width = FIELD_EX64(env->CSR_PWCH, CSR_PWCH, DIR3_WIDTH);
+ break;
+ case 4:
+ dir_base = FIELD_EX64(env->CSR_PWCH, CSR_PWCH, DIR4_BASE);
+ dir_width = FIELD_EX64(env->CSR_PWCH, CSR_PWCH, DIR4_WIDTH);
+ break;
+ default:
+ do_raise_exception(env, EXCCODE_INE, GETPC());
+ return 0;
+ }
+ index = (badvaddr >> dir_base) & ((1 << dir_width) - 1);
+ phys = base | index << shift;
+ ret = ldq_phys(cs->as, phys) & TARGET_PHYS_MASK;
+ return ret;
+}
+
+void helper_ldpte(CPULoongArchState *env, target_ulong base, target_ulong odd,
+ uint32_t mem_idx)
+{
+ CPUState *cs = env_cpu(env);
+ target_ulong phys, tmp0, ptindex, ptoffset0, ptoffset1, ps, badv;
+ int shift;
+ bool huge = (base >> LOONGARCH_PAGE_HUGE_SHIFT) & 0x1;
+ uint64_t ptbase = FIELD_EX64(env->CSR_PWCL, CSR_PWCL, PTBASE);
+ uint64_t ptwidth = FIELD_EX64(env->CSR_PWCL, CSR_PWCL, PTWIDTH);
+
+ base = base & TARGET_PHYS_MASK;
+
+ if (huge) {
+ /* Huge Page. base is paddr */
+ tmp0 = base ^ (1 << LOONGARCH_PAGE_HUGE_SHIFT);
+ /* Move Global bit */
+ tmp0 = ((tmp0 & (1 << LOONGARCH_HGLOBAL_SHIFT)) >>
+ LOONGARCH_HGLOBAL_SHIFT) << R_TLBENTRY_G_SHIFT |
+ (tmp0 & (~(1 << LOONGARCH_HGLOBAL_SHIFT)));
+ ps = ptbase + ptwidth - 1;
+ if (odd) {
+ tmp0 += MAKE_64BIT_MASK(ps, 1);
+ }
+ } else {
+ /* 0:64bit, 1:128bit, 2:192bit, 3:256bit */
+ shift = FIELD_EX64(env->CSR_PWCL, CSR_PWCL, PTEWIDTH);
+ shift = (shift + 1) * 3;
+ badv = env->CSR_TLBRBADV;
+
+ ptindex = (badv >> ptbase) & ((1 << ptwidth) - 1);
+ ptindex = ptindex & ~0x1; /* clear bit 0 */
+ ptoffset0 = ptindex << shift;
+ ptoffset1 = (ptindex + 1) << shift;
+
+ phys = base | (odd ? ptoffset1 : ptoffset0);
+ tmp0 = ldq_phys(cs->as, phys) & TARGET_PHYS_MASK;
+ ps = ptbase;
+ }
+
+ if (odd) {
+ env->CSR_TLBRELO1 = tmp0;
+ } else {
+ env->CSR_TLBRELO0 = tmp0;
+ }
+ env->CSR_TLBREHI = FIELD_DP64(env->CSR_TLBREHI, CSR_TLBREHI, PS, ps);
+}
--- /dev/null
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * LoongArch emulation for QEMU - main translation routines.
+ *
+ * Copyright (c) 2021 Loongson Technology Corporation Limited
+ */
+
+#include "qemu/osdep.h"
+#include "cpu.h"
+#include "tcg/tcg-op.h"
+#include "tcg/tcg-op-gvec.h"
+#include "exec/translation-block.h"
+#include "exec/translator.h"
+#include "exec/helper-proto.h"
+#include "exec/helper-gen.h"
+#include "exec/log.h"
+#include "qemu/qemu-print.h"
+#include "fpu/softfloat.h"
+#include "translate.h"
+#include "internals.h"
+#include "vec.h"
+
+/* Global register indices */
+TCGv cpu_gpr[32], cpu_pc;
+static TCGv cpu_lladdr, cpu_llval;
+
+#define HELPER_H "helper.h"
+#include "exec/helper-info.c.inc"
+#undef HELPER_H
+
+#define DISAS_STOP DISAS_TARGET_0
+#define DISAS_EXIT DISAS_TARGET_1
+#define DISAS_EXIT_UPDATE DISAS_TARGET_2
+
+static inline int vec_full_offset(int regno)
+{
+ return offsetof(CPULoongArchState, fpr[regno]);
+}
+
+static inline int vec_reg_offset(int regno, int index, MemOp mop)
+{
+ const uint8_t size = 1 << mop;
+ int offs = index * size;
+
+ if (HOST_BIG_ENDIAN && size < 8 ) {
+ offs ^= (8 - size);
+ }
+
+ return offs + vec_full_offset(regno);
+}
+
+static inline void get_vreg64(TCGv_i64 dest, int regno, int index)
+{
+ tcg_gen_ld_i64(dest, tcg_env,
+ offsetof(CPULoongArchState, fpr[regno].vreg.D(index)));
+}
+
+static inline void set_vreg64(TCGv_i64 src, int regno, int index)
+{
+ tcg_gen_st_i64(src, tcg_env,
+ offsetof(CPULoongArchState, fpr[regno].vreg.D(index)));
+}
+
+static inline int plus_1(DisasContext *ctx, int x)
+{
+ return x + 1;
+}
+
+static inline int shl_1(DisasContext *ctx, int x)
+{
+ return x << 1;
+}
+
+static inline int shl_2(DisasContext *ctx, int x)
+{
+ return x << 2;
+}
+
+static inline int shl_3(DisasContext *ctx, int x)
+{
+ return x << 3;
+}
+
+/*
+ * LoongArch the upper 32 bits are undefined ("can be any value").
+ * QEMU chooses to nanbox, because it is most likely to show guest bugs early.
+ */
+static void gen_nanbox_s(TCGv_i64 out, TCGv_i64 in)
+{
+ tcg_gen_ori_i64(out, in, MAKE_64BIT_MASK(32, 32));
+}
+
+void generate_exception(DisasContext *ctx, int excp)
+{
+ tcg_gen_movi_tl(cpu_pc, ctx->base.pc_next);
+ gen_helper_raise_exception(tcg_env, tcg_constant_i32(excp));
+ ctx->base.is_jmp = DISAS_NORETURN;
+}
+
+static inline void gen_goto_tb(DisasContext *ctx, int n, target_ulong dest)
+{
+ if (ctx->va32) {
+ dest = (uint32_t) dest;
+ }
+
+ if (translator_use_goto_tb(&ctx->base, dest)) {
+ tcg_gen_goto_tb(n);
+ tcg_gen_movi_tl(cpu_pc, dest);
+ tcg_gen_exit_tb(ctx->base.tb, n);
+ } else {
+ tcg_gen_movi_tl(cpu_pc, dest);
+ tcg_gen_lookup_and_goto_ptr();
+ }
+}
+
+static void loongarch_tr_init_disas_context(DisasContextBase *dcbase,
+ CPUState *cs)
+{
+ int64_t bound;
+ CPULoongArchState *env = cpu_env(cs);
+ DisasContext *ctx = container_of(dcbase, DisasContext, base);
+
+ ctx->page_start = ctx->base.pc_first & TARGET_PAGE_MASK;
+ ctx->plv = ctx->base.tb->flags & HW_FLAGS_PLV_MASK;
+ if (ctx->base.tb->flags & HW_FLAGS_CRMD_PG) {
+ ctx->mem_idx = ctx->plv;
+ } else {
+ ctx->mem_idx = MMU_IDX_DA;
+ }
+
+ /* Bound the number of insns to execute to those left on the page. */
+ bound = -(ctx->base.pc_first | TARGET_PAGE_MASK) / 4;
+ ctx->base.max_insns = MIN(ctx->base.max_insns, bound);
+
+ if (FIELD_EX64(env->cpucfg[2], CPUCFG2, LSX)) {
+ ctx->vl = LSX_LEN;
+ }
+
+ if (FIELD_EX64(env->cpucfg[2], CPUCFG2, LASX)) {
+ ctx->vl = LASX_LEN;
+ }
+
+ ctx->la64 = is_la64(env);
+ ctx->va32 = (ctx->base.tb->flags & HW_FLAGS_VA32) != 0;
+
+ ctx->zero = tcg_constant_tl(0);
+
+ ctx->cpucfg1 = env->cpucfg[1];
+ ctx->cpucfg2 = env->cpucfg[2];
+}
+
+static void loongarch_tr_tb_start(DisasContextBase *dcbase, CPUState *cs)
+{
+}
+
+static void loongarch_tr_insn_start(DisasContextBase *dcbase, CPUState *cs)
+{
+ DisasContext *ctx = container_of(dcbase, DisasContext, base);
+
+ tcg_gen_insn_start(ctx->base.pc_next);
+}
+
+/*
+ * Wrappers for getting reg values.
+ *
+ * The $zero register does not have cpu_gpr[0] allocated -- we supply the
+ * constant zero as a source, and an uninitialized sink as destination.
+ *
+ * Further, we may provide an extension for word operations.
+ */
+static TCGv gpr_src(DisasContext *ctx, int reg_num, DisasExtend src_ext)
+{
+ TCGv t;
+
+ if (reg_num == 0) {
+ return ctx->zero;
+ }
+
+ switch (src_ext) {
+ case EXT_NONE:
+ return cpu_gpr[reg_num];
+ case EXT_SIGN:
+ t = tcg_temp_new();
+ tcg_gen_ext32s_tl(t, cpu_gpr[reg_num]);
+ return t;
+ case EXT_ZERO:
+ t = tcg_temp_new();
+ tcg_gen_ext32u_tl(t, cpu_gpr[reg_num]);
+ return t;
+ }
+ g_assert_not_reached();
+}
+
+static TCGv gpr_dst(DisasContext *ctx, int reg_num, DisasExtend dst_ext)
+{
+ if (reg_num == 0 || dst_ext) {
+ return tcg_temp_new();
+ }
+ return cpu_gpr[reg_num];
+}
+
+static void gen_set_gpr(int reg_num, TCGv t, DisasExtend dst_ext)
+{
+ if (reg_num != 0) {
+ switch (dst_ext) {
+ case EXT_NONE:
+ tcg_gen_mov_tl(cpu_gpr[reg_num], t);
+ break;
+ case EXT_SIGN:
+ tcg_gen_ext32s_tl(cpu_gpr[reg_num], t);
+ break;
+ case EXT_ZERO:
+ tcg_gen_ext32u_tl(cpu_gpr[reg_num], t);
+ break;
+ default:
+ g_assert_not_reached();
+ }
+ }
+}
+
+static TCGv get_fpr(DisasContext *ctx, int reg_num)
+{
+ TCGv t = tcg_temp_new();
+ tcg_gen_ld_i64(t, tcg_env,
+ offsetof(CPULoongArchState, fpr[reg_num].vreg.D(0)));
+ return t;
+}
+
+static void set_fpr(int reg_num, TCGv val)
+{
+ tcg_gen_st_i64(val, tcg_env,
+ offsetof(CPULoongArchState, fpr[reg_num].vreg.D(0)));
+}
+
+static TCGv make_address_x(DisasContext *ctx, TCGv base, TCGv addend)
+{
+ TCGv temp = NULL;
+
+ if (addend || ctx->va32) {
+ temp = tcg_temp_new();
+ }
+ if (addend) {
+ tcg_gen_add_tl(temp, base, addend);
+ base = temp;
+ }
+ if (ctx->va32) {
+ tcg_gen_ext32u_tl(temp, base);
+ base = temp;
+ }
+ return base;
+}
+
+static TCGv make_address_i(DisasContext *ctx, TCGv base, target_long ofs)
+{
+ TCGv addend = ofs ? tcg_constant_tl(ofs) : NULL;
+ return make_address_x(ctx, base, addend);
+}
+
+static uint64_t make_address_pc(DisasContext *ctx, uint64_t addr)
+{
+ if (ctx->va32) {
+ addr = (int32_t)addr;
+ }
+ return addr;
+}
+
+#include "decode-insns.c.inc"
+#include "insn_trans/trans_arith.c.inc"
+#include "insn_trans/trans_shift.c.inc"
+#include "insn_trans/trans_bit.c.inc"
+#include "insn_trans/trans_memory.c.inc"
+#include "insn_trans/trans_atomic.c.inc"
+#include "insn_trans/trans_extra.c.inc"
+#include "insn_trans/trans_farith.c.inc"
+#include "insn_trans/trans_fcmp.c.inc"
+#include "insn_trans/trans_fcnv.c.inc"
+#include "insn_trans/trans_fmov.c.inc"
+#include "insn_trans/trans_fmemory.c.inc"
+#include "insn_trans/trans_branch.c.inc"
+#include "insn_trans/trans_privileged.c.inc"
+#include "insn_trans/trans_vec.c.inc"
+
+static void loongarch_tr_translate_insn(DisasContextBase *dcbase, CPUState *cs)
+{
+ CPULoongArchState *env = cpu_env(cs);
+ DisasContext *ctx = container_of(dcbase, DisasContext, base);
+
+ ctx->opcode = translator_ldl(env, &ctx->base, ctx->base.pc_next);
+
+ if (!decode(ctx, ctx->opcode)) {
+ qemu_log_mask(LOG_UNIMP, "Error: unknown opcode. "
+ TARGET_FMT_lx ": 0x%x\n",
+ ctx->base.pc_next, ctx->opcode);
+ generate_exception(ctx, EXCCODE_INE);
+ }
+
+ ctx->base.pc_next += 4;
+
+ if (ctx->va32) {
+ ctx->base.pc_next = (uint32_t)ctx->base.pc_next;
+ }
+}
+
+static void loongarch_tr_tb_stop(DisasContextBase *dcbase, CPUState *cs)
+{
+ DisasContext *ctx = container_of(dcbase, DisasContext, base);
+
+ switch (ctx->base.is_jmp) {
+ case DISAS_STOP:
+ tcg_gen_movi_tl(cpu_pc, ctx->base.pc_next);
+ tcg_gen_lookup_and_goto_ptr();
+ break;
+ case DISAS_TOO_MANY:
+ gen_goto_tb(ctx, 0, ctx->base.pc_next);
+ break;
+ case DISAS_NORETURN:
+ break;
+ case DISAS_EXIT_UPDATE:
+ tcg_gen_movi_tl(cpu_pc, ctx->base.pc_next);
+ QEMU_FALLTHROUGH;
+ case DISAS_EXIT:
+ tcg_gen_exit_tb(NULL, 0);
+ break;
+ default:
+ g_assert_not_reached();
+ }
+}
+
+static void loongarch_tr_disas_log(const DisasContextBase *dcbase,
+ CPUState *cpu, FILE *logfile)
+{
+ qemu_log("IN: %s\n", lookup_symbol(dcbase->pc_first));
+ target_disas(logfile, cpu, dcbase->pc_first, dcbase->tb->size);
+}
+
+static const TranslatorOps loongarch_tr_ops = {
+ .init_disas_context = loongarch_tr_init_disas_context,
+ .tb_start = loongarch_tr_tb_start,
+ .insn_start = loongarch_tr_insn_start,
+ .translate_insn = loongarch_tr_translate_insn,
+ .tb_stop = loongarch_tr_tb_stop,
+ .disas_log = loongarch_tr_disas_log,
+};
+
+void gen_intermediate_code(CPUState *cs, TranslationBlock *tb, int *max_insns,
+ target_ulong pc, void *host_pc)
+{
+ DisasContext ctx;
+
+ translator_loop(cs, tb, max_insns, pc, host_pc,
+ &loongarch_tr_ops, &ctx.base);
+}
+
+void loongarch_translate_init(void)
+{
+ int i;
+
+ cpu_gpr[0] = NULL;
+ for (i = 1; i < 32; i++) {
+ cpu_gpr[i] = tcg_global_mem_new(tcg_env,
+ offsetof(CPULoongArchState, gpr[i]),
+ regnames[i]);
+ }
+
+ cpu_pc = tcg_global_mem_new(tcg_env, offsetof(CPULoongArchState, pc), "pc");
+ cpu_lladdr = tcg_global_mem_new(tcg_env,
+ offsetof(CPULoongArchState, lladdr), "lladdr");
+ cpu_llval = tcg_global_mem_new(tcg_env,
+ offsetof(CPULoongArchState, llval), "llval");
+}
--- /dev/null
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * QEMU LoongArch vector helper functions.
+ *
+ * Copyright (c) 2022-2023 Loongson Technology Corporation Limited
+ */
+
+#include "qemu/osdep.h"
+#include "cpu.h"
+#include "exec/exec-all.h"
+#include "exec/helper-proto.h"
+#include "fpu/softfloat.h"
+#include "internals.h"
+#include "tcg/tcg.h"
+#include "vec.h"
+#include "tcg/tcg-gvec-desc.h"
+
+#define DO_ODD_EVEN(NAME, BIT, E1, E2, DO_OP) \
+void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t desc) \
+{ \
+ int i; \
+ VReg *Vd = (VReg *)vd; \
+ VReg *Vj = (VReg *)vj; \
+ VReg *Vk = (VReg *)vk; \
+ typedef __typeof(Vd->E1(0)) TD; \
+ int oprsz = simd_oprsz(desc); \
+ \
+ for (i = 0; i < oprsz / (BIT / 8); i++) { \
+ Vd->E1(i) = DO_OP((TD)Vj->E2(2 * i + 1), (TD)Vk->E2(2 * i)); \
+ } \
+}
+
+DO_ODD_EVEN(vhaddw_h_b, 16, H, B, DO_ADD)
+DO_ODD_EVEN(vhaddw_w_h, 32, W, H, DO_ADD)
+DO_ODD_EVEN(vhaddw_d_w, 64, D, W, DO_ADD)
+
+void HELPER(vhaddw_q_d)(void *vd, void *vj, void *vk, uint32_t desc)
+{
+ int i;
+ VReg *Vd = (VReg *)vd;
+ VReg *Vj = (VReg *)vj;
+ VReg *Vk = (VReg *)vk;
+ int oprsz = simd_oprsz(desc);
+
+ for (i = 0; i < oprsz / 16 ; i++) {
+ Vd->Q(i) = int128_add(int128_makes64(Vj->D(2 * i + 1)),
+ int128_makes64(Vk->D(2 * i)));
+ }
+}
+
+DO_ODD_EVEN(vhsubw_h_b, 16, H, B, DO_SUB)
+DO_ODD_EVEN(vhsubw_w_h, 32, W, H, DO_SUB)
+DO_ODD_EVEN(vhsubw_d_w, 64, D, W, DO_SUB)
+
+void HELPER(vhsubw_q_d)(void *vd, void *vj, void *vk, uint32_t desc)
+{
+ int i;
+ VReg *Vd = (VReg *)vd;
+ VReg *Vj = (VReg *)vj;
+ VReg *Vk = (VReg *)vk;
+ int oprsz = simd_oprsz(desc);
+
+ for (i = 0; i < oprsz / 16; i++) {
+ Vd->Q(i) = int128_sub(int128_makes64(Vj->D(2 * i + 1)),
+ int128_makes64(Vk->D(2 * i)));
+ }
+}
+
+DO_ODD_EVEN(vhaddw_hu_bu, 16, UH, UB, DO_ADD)
+DO_ODD_EVEN(vhaddw_wu_hu, 32, UW, UH, DO_ADD)
+DO_ODD_EVEN(vhaddw_du_wu, 64, UD, UW, DO_ADD)
+
+void HELPER(vhaddw_qu_du)(void *vd, void *vj, void *vk, uint32_t desc)
+{
+ int i;
+ VReg *Vd = (VReg *)vd;
+ VReg *Vj = (VReg *)vj;
+ VReg *Vk = (VReg *)vk;
+ int oprsz = simd_oprsz(desc);
+
+ for (i = 0; i < oprsz / 16; i ++) {
+ Vd->Q(i) = int128_add(int128_make64(Vj->UD(2 * i + 1)),
+ int128_make64(Vk->UD(2 * i)));
+ }
+}
+
+DO_ODD_EVEN(vhsubw_hu_bu, 16, UH, UB, DO_SUB)
+DO_ODD_EVEN(vhsubw_wu_hu, 32, UW, UH, DO_SUB)
+DO_ODD_EVEN(vhsubw_du_wu, 64, UD, UW, DO_SUB)
+
+void HELPER(vhsubw_qu_du)(void *vd, void *vj, void *vk, uint32_t desc)
+{
+ int i;
+ VReg *Vd = (VReg *)vd;
+ VReg *Vj = (VReg *)vj;
+ VReg *Vk = (VReg *)vk;
+ int oprsz = simd_oprsz(desc);
+
+ for (i = 0; i < oprsz / 16; i++) {
+ Vd->Q(i) = int128_sub(int128_make64(Vj->UD(2 * i + 1)),
+ int128_make64(Vk->UD(2 * i)));
+ }
+}
+
+#define DO_EVEN(NAME, BIT, E1, E2, DO_OP) \
+void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t desc) \
+{ \
+ int i; \
+ VReg *Vd = (VReg *)vd; \
+ VReg *Vj = (VReg *)vj; \
+ VReg *Vk = (VReg *)vk; \
+ typedef __typeof(Vd->E1(0)) TD; \
+ int oprsz = simd_oprsz(desc); \
+ \
+ for (i = 0; i < oprsz / (BIT / 8); i++) { \
+ Vd->E1(i) = DO_OP((TD)Vj->E2(2 * i) ,(TD)Vk->E2(2 * i)); \
+ } \
+}
+
+#define DO_ODD(NAME, BIT, E1, E2, DO_OP) \
+void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t desc) \
+{ \
+ int i; \
+ VReg *Vd = (VReg *)vd; \
+ VReg *Vj = (VReg *)vj; \
+ VReg *Vk = (VReg *)vk; \
+ typedef __typeof(Vd->E1(0)) TD; \
+ int oprsz = simd_oprsz(desc); \
+ \
+ for (i = 0; i < oprsz / (BIT / 8); i++) { \
+ Vd->E1(i) = DO_OP((TD)Vj->E2(2 * i + 1), (TD)Vk->E2(2 * i + 1)); \
+ } \
+}
+
+void HELPER(vaddwev_q_d)(void *vd, void *vj, void *vk, uint32_t desc)
+{
+ int i;
+ VReg *Vd = (VReg *)vd;
+ VReg *Vj = (VReg *)vj;
+ VReg *Vk = (VReg *)vk;
+ int oprsz = simd_oprsz(desc);
+
+ for (i = 0; i < oprsz / 16; i++) {
+ Vd->Q(i) = int128_add(int128_makes64(Vj->D(2 * i)),
+ int128_makes64(Vk->D(2 * i)));
+ }
+}
+
+DO_EVEN(vaddwev_h_b, 16, H, B, DO_ADD)
+DO_EVEN(vaddwev_w_h, 32, W, H, DO_ADD)
+DO_EVEN(vaddwev_d_w, 64, D, W, DO_ADD)
+
+void HELPER(vaddwod_q_d)(void *vd, void *vj, void *vk, uint32_t desc)
+{
+ int i;
+ VReg *Vd = (VReg *)vd;
+ VReg *Vj = (VReg *)vj;
+ VReg *Vk = (VReg *)vk;
+ int oprsz = simd_oprsz(desc);
+
+ for (i = 0; i < oprsz / 16; i++) {
+ Vd->Q(i) = int128_add(int128_makes64(Vj->D(2 * i +1)),
+ int128_makes64(Vk->D(2 * i +1)));
+ }
+}
+
+DO_ODD(vaddwod_h_b, 16, H, B, DO_ADD)
+DO_ODD(vaddwod_w_h, 32, W, H, DO_ADD)
+DO_ODD(vaddwod_d_w, 64, D, W, DO_ADD)
+
+void HELPER(vsubwev_q_d)(void *vd, void *vj, void *vk, uint32_t desc)
+{
+ int i;
+ VReg *Vd = (VReg *)vd;
+ VReg *Vj = (VReg *)vj;
+ VReg *Vk = (VReg *)vk;
+ int oprsz = simd_oprsz(desc);
+
+ for (i = 0; i < oprsz / 16; i++) {
+ Vd->Q(i) = int128_sub(int128_makes64(Vj->D(2 * i)),
+ int128_makes64(Vk->D(2 * i)));
+ }
+}
+
+DO_EVEN(vsubwev_h_b, 16, H, B, DO_SUB)
+DO_EVEN(vsubwev_w_h, 32, W, H, DO_SUB)
+DO_EVEN(vsubwev_d_w, 64, D, W, DO_SUB)
+
+void HELPER(vsubwod_q_d)(void *vd, void *vj, void *vk, uint32_t desc)
+{
+ int i;
+ VReg *Vd = (VReg *)vd;
+ VReg *Vj = (VReg *)vj;
+ VReg *Vk = (VReg *)vk;
+ int oprsz = simd_oprsz(desc);
+
+ for (i = 0; i < oprsz / 16; i++) {
+ Vd->Q(i) = int128_sub(int128_makes64(Vj->D(2 * i + 1)),
+ int128_makes64(Vk->D(2 * i + 1)));
+ }
+}
+
+DO_ODD(vsubwod_h_b, 16, H, B, DO_SUB)
+DO_ODD(vsubwod_w_h, 32, W, H, DO_SUB)
+DO_ODD(vsubwod_d_w, 64, D, W, DO_SUB)
+
+void HELPER(vaddwev_q_du)(void *vd, void *vj, void *vk, uint32_t desc)
+{
+ int i;
+ VReg *Vd = (VReg *)vd;
+ VReg *Vj = (VReg *)vj;
+ VReg *Vk = (VReg *)vk;
+ int oprsz = simd_oprsz(desc);
+
+ for (i = 0; i < oprsz / 16; i++) {
+ Vd->Q(i) = int128_add(int128_make64(Vj->UD(2 * i)),
+ int128_make64(Vk->UD(2 * i)));
+ }
+}
+
+DO_EVEN(vaddwev_h_bu, 16, UH, UB, DO_ADD)
+DO_EVEN(vaddwev_w_hu, 32, UW, UH, DO_ADD)
+DO_EVEN(vaddwev_d_wu, 64, UD, UW, DO_ADD)
+
+void HELPER(vaddwod_q_du)(void *vd, void *vj, void *vk, uint32_t desc)
+{
+ int i;
+ VReg *Vd = (VReg *)vd;
+ VReg *Vj = (VReg *)vj;
+ VReg *Vk = (VReg *)vk;
+ int oprsz = simd_oprsz(desc);
+
+ for (i = 0; i < oprsz / 16; i++) {
+ Vd->Q(i) = int128_add(int128_make64(Vj->UD(2 * i + 1)),
+ int128_make64(Vk->UD(2 * i + 1)));
+ }
+}
+
+DO_ODD(vaddwod_h_bu, 16, UH, UB, DO_ADD)
+DO_ODD(vaddwod_w_hu, 32, UW, UH, DO_ADD)
+DO_ODD(vaddwod_d_wu, 64, UD, UW, DO_ADD)
+
+void HELPER(vsubwev_q_du)(void *vd, void *vj, void *vk, uint32_t desc)
+{
+ int i;
+ VReg *Vd = (VReg *)vd;
+ VReg *Vj = (VReg *)vj;
+ VReg *Vk = (VReg *)vk;
+ int oprsz = simd_oprsz(desc);
+
+ for (i = 0; i < oprsz / 16; i++) {
+ Vd->Q(i) = int128_sub(int128_make64(Vj->UD(2 * i)),
+ int128_make64(Vk->UD(2 * i)));
+ }
+}
+
+DO_EVEN(vsubwev_h_bu, 16, UH, UB, DO_SUB)
+DO_EVEN(vsubwev_w_hu, 32, UW, UH, DO_SUB)
+DO_EVEN(vsubwev_d_wu, 64, UD, UW, DO_SUB)
+
+void HELPER(vsubwod_q_du)(void *vd, void *vj, void *vk, uint32_t desc)
+{
+ int i;
+ VReg *Vd = (VReg *)vd;
+ VReg *Vj = (VReg *)vj;
+ VReg *Vk = (VReg *)vk;
+ int oprsz = simd_oprsz(desc);
+
+ for (i = 0; i < oprsz / 16; i++) {
+ Vd->Q(i) = int128_sub(int128_make64(Vj->UD(2 * i + 1)),
+ int128_make64(Vk->UD(2 * i + 1)));
+ }
+}
+
+DO_ODD(vsubwod_h_bu, 16, UH, UB, DO_SUB)
+DO_ODD(vsubwod_w_hu, 32, UW, UH, DO_SUB)
+DO_ODD(vsubwod_d_wu, 64, UD, UW, DO_SUB)
+
+#define DO_EVEN_U_S(NAME, BIT, ES1, EU1, ES2, EU2, DO_OP) \
+void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t desc) \
+{ \
+ int i; \
+ VReg *Vd = (VReg *)vd; \
+ VReg *Vj = (VReg *)vj; \
+ VReg *Vk = (VReg *)vk; \
+ typedef __typeof(Vd->ES1(0)) TDS; \
+ typedef __typeof(Vd->EU1(0)) TDU; \
+ int oprsz = simd_oprsz(desc); \
+ \
+ for (i = 0; i < oprsz / (BIT / 8); i++) { \
+ Vd->ES1(i) = DO_OP((TDU)Vj->EU2(2 * i) ,(TDS)Vk->ES2(2 * i)); \
+ } \
+}
+
+#define DO_ODD_U_S(NAME, BIT, ES1, EU1, ES2, EU2, DO_OP) \
+void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t desc) \
+{ \
+ int i; \
+ VReg *Vd = (VReg *)vd; \
+ VReg *Vj = (VReg *)vj; \
+ VReg *Vk = (VReg *)vk; \
+ typedef __typeof(Vd->ES1(0)) TDS; \
+ typedef __typeof(Vd->EU1(0)) TDU; \
+ int oprsz = simd_oprsz(desc); \
+ \
+ for (i = 0; i < oprsz / (BIT / 8); i++) { \
+ Vd->ES1(i) = DO_OP((TDU)Vj->EU2(2 * i + 1), (TDS)Vk->ES2(2 * i + 1)); \
+ } \
+}
+
+void HELPER(vaddwev_q_du_d)(void *vd, void *vj, void *vk, uint32_t desc)
+{
+ int i;
+ VReg *Vd = (VReg *)vd;
+ VReg *Vj = (VReg *)vj;
+ VReg *Vk = (VReg *)vk;
+ int oprsz = simd_oprsz(desc);
+
+ for (i = 0; i < oprsz / 16; i++) {
+ Vd->Q(i) = int128_add(int128_make64(Vj->UD(2 * i)),
+ int128_makes64(Vk->D(2 * i)));
+ }
+}
+
+DO_EVEN_U_S(vaddwev_h_bu_b, 16, H, UH, B, UB, DO_ADD)
+DO_EVEN_U_S(vaddwev_w_hu_h, 32, W, UW, H, UH, DO_ADD)
+DO_EVEN_U_S(vaddwev_d_wu_w, 64, D, UD, W, UW, DO_ADD)
+
+void HELPER(vaddwod_q_du_d)(void *vd, void *vj, void *vk, uint32_t desc)
+{
+ int i;
+ VReg *Vd = (VReg *)vd;
+ VReg *Vj = (VReg *)vj;
+ VReg *Vk = (VReg *)vk;
+ int oprsz = simd_oprsz(desc);
+
+ for (i = 0; i < oprsz / 16; i++) {
+ Vd->Q(i) = int128_add(int128_make64(Vj->UD(2 * i + 1)),
+ int128_makes64(Vk->D(2 * i + 1)));
+ }
+}
+
+DO_ODD_U_S(vaddwod_h_bu_b, 16, H, UH, B, UB, DO_ADD)
+DO_ODD_U_S(vaddwod_w_hu_h, 32, W, UW, H, UH, DO_ADD)
+DO_ODD_U_S(vaddwod_d_wu_w, 64, D, UD, W, UW, DO_ADD)
+
+#define DO_3OP(NAME, BIT, E, DO_OP) \
+void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t desc) \
+{ \
+ int i; \
+ VReg *Vd = (VReg *)vd; \
+ VReg *Vj = (VReg *)vj; \
+ VReg *Vk = (VReg *)vk; \
+ int oprsz = simd_oprsz(desc); \
+ \
+ for (i = 0; i < oprsz / (BIT / 8); i++) { \
+ Vd->E(i) = DO_OP(Vj->E(i), Vk->E(i)); \
+ } \
+}
+
+DO_3OP(vavg_b, 8, B, DO_VAVG)
+DO_3OP(vavg_h, 16, H, DO_VAVG)
+DO_3OP(vavg_w, 32, W, DO_VAVG)
+DO_3OP(vavg_d, 64, D, DO_VAVG)
+DO_3OP(vavgr_b, 8, B, DO_VAVGR)
+DO_3OP(vavgr_h, 16, H, DO_VAVGR)
+DO_3OP(vavgr_w, 32, W, DO_VAVGR)
+DO_3OP(vavgr_d, 64, D, DO_VAVGR)
+DO_3OP(vavg_bu, 8, UB, DO_VAVG)
+DO_3OP(vavg_hu, 16, UH, DO_VAVG)
+DO_3OP(vavg_wu, 32, UW, DO_VAVG)
+DO_3OP(vavg_du, 64, UD, DO_VAVG)
+DO_3OP(vavgr_bu, 8, UB, DO_VAVGR)
+DO_3OP(vavgr_hu, 16, UH, DO_VAVGR)
+DO_3OP(vavgr_wu, 32, UW, DO_VAVGR)
+DO_3OP(vavgr_du, 64, UD, DO_VAVGR)
+
+DO_3OP(vabsd_b, 8, B, DO_VABSD)
+DO_3OP(vabsd_h, 16, H, DO_VABSD)
+DO_3OP(vabsd_w, 32, W, DO_VABSD)
+DO_3OP(vabsd_d, 64, D, DO_VABSD)
+DO_3OP(vabsd_bu, 8, UB, DO_VABSD)
+DO_3OP(vabsd_hu, 16, UH, DO_VABSD)
+DO_3OP(vabsd_wu, 32, UW, DO_VABSD)
+DO_3OP(vabsd_du, 64, UD, DO_VABSD)
+
+#define DO_VADDA(NAME, BIT, E) \
+void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t desc) \
+{ \
+ int i; \
+ VReg *Vd = (VReg *)vd; \
+ VReg *Vj = (VReg *)vj; \
+ VReg *Vk = (VReg *)vk; \
+ int oprsz = simd_oprsz(desc); \
+ \
+ for (i = 0; i < oprsz / (BIT / 8); i++) { \
+ Vd->E(i) = DO_VABS(Vj->E(i)) + DO_VABS(Vk->E(i)); \
+ } \
+}
+
+DO_VADDA(vadda_b, 8, B)
+DO_VADDA(vadda_h, 16, H)
+DO_VADDA(vadda_w, 32, W)
+DO_VADDA(vadda_d, 64, D)
+
+#define VMINMAXI(NAME, BIT, E, DO_OP) \
+void HELPER(NAME)(void *vd, void *vj, uint64_t imm, uint32_t desc) \
+{ \
+ int i; \
+ VReg *Vd = (VReg *)vd; \
+ VReg *Vj = (VReg *)vj; \
+ typedef __typeof(Vd->E(0)) TD; \
+ int oprsz = simd_oprsz(desc); \
+ \
+ for (i = 0; i < oprsz / (BIT / 8); i++) { \
+ Vd->E(i) = DO_OP(Vj->E(i), (TD)imm); \
+ } \
+}
+
+VMINMAXI(vmini_b, 8, B, DO_MIN)
+VMINMAXI(vmini_h, 16, H, DO_MIN)
+VMINMAXI(vmini_w, 32, W, DO_MIN)
+VMINMAXI(vmini_d, 64, D, DO_MIN)
+VMINMAXI(vmaxi_b, 8, B, DO_MAX)
+VMINMAXI(vmaxi_h, 16, H, DO_MAX)
+VMINMAXI(vmaxi_w, 32, W, DO_MAX)
+VMINMAXI(vmaxi_d, 64, D, DO_MAX)
+VMINMAXI(vmini_bu, 8, UB, DO_MIN)
+VMINMAXI(vmini_hu, 16, UH, DO_MIN)
+VMINMAXI(vmini_wu, 32, UW, DO_MIN)
+VMINMAXI(vmini_du, 64, UD, DO_MIN)
+VMINMAXI(vmaxi_bu, 8, UB, DO_MAX)
+VMINMAXI(vmaxi_hu, 16, UH, DO_MAX)
+VMINMAXI(vmaxi_wu, 32, UW, DO_MAX)
+VMINMAXI(vmaxi_du, 64, UD, DO_MAX)
+
+#define DO_VMUH(NAME, BIT, E1, E2, DO_OP) \
+void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t desc) \
+{ \
+ int i; \
+ VReg *Vd = (VReg *)vd; \
+ VReg *Vj = (VReg *)vj; \
+ VReg *Vk = (VReg *)vk; \
+ typedef __typeof(Vd->E1(0)) T; \
+ int oprsz = simd_oprsz(desc); \
+ \
+ for (i = 0; i < oprsz / (BIT / 8); i++) { \
+ Vd->E2(i) = ((T)Vj->E2(i)) * ((T)Vk->E2(i)) >> BIT; \
+ } \
+}
+
+void HELPER(vmuh_d)(void *vd, void *vj, void *vk, uint32_t desc)
+{
+ int i;
+ uint64_t l, h;
+ VReg *Vd = (VReg *)vd;
+ VReg *Vj = (VReg *)vj;
+ VReg *Vk = (VReg *)vk;
+ int oprsz = simd_oprsz(desc);
+
+ for (i = 0; i < oprsz / 8; i++) {
+ muls64(&l, &h, Vj->D(i), Vk->D(i));
+ Vd->D(i) = h;
+ }
+}
+
+DO_VMUH(vmuh_b, 8, H, B, DO_MUH)
+DO_VMUH(vmuh_h, 16, W, H, DO_MUH)
+DO_VMUH(vmuh_w, 32, D, W, DO_MUH)
+
+void HELPER(vmuh_du)(void *vd, void *vj, void *vk, uint32_t desc)
+{
+ int i;
+ uint64_t l, h;
+ VReg *Vd = (VReg *)vd;
+ VReg *Vj = (VReg *)vj;
+ VReg *Vk = (VReg *)vk;
+ int oprsz = simd_oprsz(desc);
+
+ for (i = 0; i < oprsz / 8; i++) {
+ mulu64(&l, &h, Vj->D(i), Vk->D(i));
+ Vd->D(i) = h;
+ }
+}
+
+DO_VMUH(vmuh_bu, 8, UH, UB, DO_MUH)
+DO_VMUH(vmuh_hu, 16, UW, UH, DO_MUH)
+DO_VMUH(vmuh_wu, 32, UD, UW, DO_MUH)
+
+DO_EVEN(vmulwev_h_b, 16, H, B, DO_MUL)
+DO_EVEN(vmulwev_w_h, 32, W, H, DO_MUL)
+DO_EVEN(vmulwev_d_w, 64, D, W, DO_MUL)
+
+DO_ODD(vmulwod_h_b, 16, H, B, DO_MUL)
+DO_ODD(vmulwod_w_h, 32, W, H, DO_MUL)
+DO_ODD(vmulwod_d_w, 64, D, W, DO_MUL)
+
+DO_EVEN(vmulwev_h_bu, 16, UH, UB, DO_MUL)
+DO_EVEN(vmulwev_w_hu, 32, UW, UH, DO_MUL)
+DO_EVEN(vmulwev_d_wu, 64, UD, UW, DO_MUL)
+
+DO_ODD(vmulwod_h_bu, 16, UH, UB, DO_MUL)
+DO_ODD(vmulwod_w_hu, 32, UW, UH, DO_MUL)
+DO_ODD(vmulwod_d_wu, 64, UD, UW, DO_MUL)
+
+DO_EVEN_U_S(vmulwev_h_bu_b, 16, H, UH, B, UB, DO_MUL)
+DO_EVEN_U_S(vmulwev_w_hu_h, 32, W, UW, H, UH, DO_MUL)
+DO_EVEN_U_S(vmulwev_d_wu_w, 64, D, UD, W, UW, DO_MUL)
+
+DO_ODD_U_S(vmulwod_h_bu_b, 16, H, UH, B, UB, DO_MUL)
+DO_ODD_U_S(vmulwod_w_hu_h, 32, W, UW, H, UH, DO_MUL)
+DO_ODD_U_S(vmulwod_d_wu_w, 64, D, UD, W, UW, DO_MUL)
+
+#define VMADDSUB(NAME, BIT, E, DO_OP) \
+void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t desc) \
+{ \
+ int i; \
+ VReg *Vd = (VReg *)vd; \
+ VReg *Vj = (VReg *)vj; \
+ VReg *Vk = (VReg *)vk; \
+ int oprsz = simd_oprsz(desc); \
+ \
+ for (i = 0; i < oprsz / (BIT / 8); i++) { \
+ Vd->E(i) = DO_OP(Vd->E(i), Vj->E(i) ,Vk->E(i)); \
+ } \
+}
+
+VMADDSUB(vmadd_b, 8, B, DO_MADD)
+VMADDSUB(vmadd_h, 16, H, DO_MADD)
+VMADDSUB(vmadd_w, 32, W, DO_MADD)
+VMADDSUB(vmadd_d, 64, D, DO_MADD)
+VMADDSUB(vmsub_b, 8, B, DO_MSUB)
+VMADDSUB(vmsub_h, 16, H, DO_MSUB)
+VMADDSUB(vmsub_w, 32, W, DO_MSUB)
+VMADDSUB(vmsub_d, 64, D, DO_MSUB)
+
+#define VMADDWEV(NAME, BIT, E1, E2, DO_OP) \
+void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t desc) \
+{ \
+ int i; \
+ VReg *Vd = (VReg *)vd; \
+ VReg *Vj = (VReg *)vj; \
+ VReg *Vk = (VReg *)vk; \
+ typedef __typeof(Vd->E1(0)) TD; \
+ int oprsz = simd_oprsz(desc); \
+ \
+ for (i = 0; i < oprsz / (BIT / 8); i++) { \
+ Vd->E1(i) += DO_OP((TD)Vj->E2(2 * i), (TD)Vk->E2(2 * i)); \
+ } \
+}
+
+VMADDWEV(vmaddwev_h_b, 16, H, B, DO_MUL)
+VMADDWEV(vmaddwev_w_h, 32, W, H, DO_MUL)
+VMADDWEV(vmaddwev_d_w, 64, D, W, DO_MUL)
+VMADDWEV(vmaddwev_h_bu, 16, UH, UB, DO_MUL)
+VMADDWEV(vmaddwev_w_hu, 32, UW, UH, DO_MUL)
+VMADDWEV(vmaddwev_d_wu, 64, UD, UW, DO_MUL)
+
+#define VMADDWOD(NAME, BIT, E1, E2, DO_OP) \
+void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t desc) \
+{ \
+ int i; \
+ VReg *Vd = (VReg *)vd; \
+ VReg *Vj = (VReg *)vj; \
+ VReg *Vk = (VReg *)vk; \
+ typedef __typeof(Vd->E1(0)) TD; \
+ int oprsz = simd_oprsz(desc); \
+ \
+ for (i = 0; i < oprsz / (BIT / 8); i++) { \
+ Vd->E1(i) += DO_OP((TD)Vj->E2(2 * i + 1), \
+ (TD)Vk->E2(2 * i + 1)); \
+ } \
+}
+
+VMADDWOD(vmaddwod_h_b, 16, H, B, DO_MUL)
+VMADDWOD(vmaddwod_w_h, 32, W, H, DO_MUL)
+VMADDWOD(vmaddwod_d_w, 64, D, W, DO_MUL)
+VMADDWOD(vmaddwod_h_bu, 16, UH, UB, DO_MUL)
+VMADDWOD(vmaddwod_w_hu, 32, UW, UH, DO_MUL)
+VMADDWOD(vmaddwod_d_wu, 64, UD, UW, DO_MUL)
+
+#define VMADDWEV_U_S(NAME, BIT, ES1, EU1, ES2, EU2, DO_OP) \
+void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t desc) \
+{ \
+ int i; \
+ VReg *Vd = (VReg *)vd; \
+ VReg *Vj = (VReg *)vj; \
+ VReg *Vk = (VReg *)vk; \
+ typedef __typeof(Vd->ES1(0)) TS1; \
+ typedef __typeof(Vd->EU1(0)) TU1; \
+ int oprsz = simd_oprsz(desc); \
+ \
+ for (i = 0; i < oprsz / (BIT / 8); i++) { \
+ Vd->ES1(i) += DO_OP((TU1)Vj->EU2(2 * i), \
+ (TS1)Vk->ES2(2 * i)); \
+ } \
+}
+
+VMADDWEV_U_S(vmaddwev_h_bu_b, 16, H, UH, B, UB, DO_MUL)
+VMADDWEV_U_S(vmaddwev_w_hu_h, 32, W, UW, H, UH, DO_MUL)
+VMADDWEV_U_S(vmaddwev_d_wu_w, 64, D, UD, W, UW, DO_MUL)
+
+#define VMADDWOD_U_S(NAME, BIT, ES1, EU1, ES2, EU2, DO_OP) \
+void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t desc) \
+{ \
+ int i; \
+ VReg *Vd = (VReg *)vd; \
+ VReg *Vj = (VReg *)vj; \
+ VReg *Vk = (VReg *)vk; \
+ typedef __typeof(Vd->ES1(0)) TS1; \
+ typedef __typeof(Vd->EU1(0)) TU1; \
+ int oprsz = simd_oprsz(desc); \
+ \
+ for (i = 0; i < oprsz / (BIT / 8); i++) { \
+ Vd->ES1(i) += DO_OP((TU1)Vj->EU2(2 * i + 1), \
+ (TS1)Vk->ES2(2 * i + 1)); \
+ } \
+}
+
+VMADDWOD_U_S(vmaddwod_h_bu_b, 16, H, UH, B, UB, DO_MUL)
+VMADDWOD_U_S(vmaddwod_w_hu_h, 32, W, UW, H, UH, DO_MUL)
+VMADDWOD_U_S(vmaddwod_d_wu_w, 64, D, UD, W, UW, DO_MUL)
+
+#define VDIV(NAME, BIT, E, DO_OP) \
+void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t desc) \
+{ \
+ int i; \
+ VReg *Vd = (VReg *)vd; \
+ VReg *Vj = (VReg *)vj; \
+ VReg *Vk = (VReg *)vk; \
+ int oprsz = simd_oprsz(desc); \
+ \
+ for (i = 0; i < oprsz / (BIT / 8); i++) { \
+ Vd->E(i) = DO_OP(Vj->E(i), Vk->E(i)); \
+ } \
+}
+
+VDIV(vdiv_b, 8, B, DO_DIV)
+VDIV(vdiv_h, 16, H, DO_DIV)
+VDIV(vdiv_w, 32, W, DO_DIV)
+VDIV(vdiv_d, 64, D, DO_DIV)
+VDIV(vdiv_bu, 8, UB, DO_DIVU)
+VDIV(vdiv_hu, 16, UH, DO_DIVU)
+VDIV(vdiv_wu, 32, UW, DO_DIVU)
+VDIV(vdiv_du, 64, UD, DO_DIVU)
+VDIV(vmod_b, 8, B, DO_REM)
+VDIV(vmod_h, 16, H, DO_REM)
+VDIV(vmod_w, 32, W, DO_REM)
+VDIV(vmod_d, 64, D, DO_REM)
+VDIV(vmod_bu, 8, UB, DO_REMU)
+VDIV(vmod_hu, 16, UH, DO_REMU)
+VDIV(vmod_wu, 32, UW, DO_REMU)
+VDIV(vmod_du, 64, UD, DO_REMU)
+
+#define VSAT_S(NAME, BIT, E) \
+void HELPER(NAME)(void *vd, void *vj, uint64_t max, uint32_t desc) \
+{ \
+ int i; \
+ VReg *Vd = (VReg *)vd; \
+ VReg *Vj = (VReg *)vj; \
+ typedef __typeof(Vd->E(0)) TD; \
+ int oprsz = simd_oprsz(desc); \
+ \
+ for (i = 0; i < oprsz / (BIT / 8); i++) { \
+ Vd->E(i) = Vj->E(i) > (TD)max ? (TD)max : \
+ Vj->E(i) < (TD)~max ? (TD)~max: Vj->E(i); \
+ } \
+}
+
+VSAT_S(vsat_b, 8, B)
+VSAT_S(vsat_h, 16, H)
+VSAT_S(vsat_w, 32, W)
+VSAT_S(vsat_d, 64, D)
+
+#define VSAT_U(NAME, BIT, E) \
+void HELPER(NAME)(void *vd, void *vj, uint64_t max, uint32_t desc) \
+{ \
+ int i; \
+ VReg *Vd = (VReg *)vd; \
+ VReg *Vj = (VReg *)vj; \
+ typedef __typeof(Vd->E(0)) TD; \
+ int oprsz = simd_oprsz(desc); \
+ \
+ for (i = 0; i < oprsz / (BIT / 8); i++) { \
+ Vd->E(i) = Vj->E(i) > (TD)max ? (TD)max : Vj->E(i); \
+ } \
+}
+
+VSAT_U(vsat_bu, 8, UB)
+VSAT_U(vsat_hu, 16, UH)
+VSAT_U(vsat_wu, 32, UW)
+VSAT_U(vsat_du, 64, UD)
+
+#define VEXTH(NAME, BIT, E1, E2) \
+void HELPER(NAME)(void *vd, void *vj, uint32_t desc) \
+{ \
+ int i, j, ofs; \
+ VReg *Vd = (VReg *)vd; \
+ VReg *Vj = (VReg *)vj; \
+ int oprsz = simd_oprsz(desc); \
+ \
+ ofs = LSX_LEN / BIT; \
+ for (i = 0; i < oprsz / 16; i++) { \
+ for (j = 0; j < ofs; j++) { \
+ Vd->E1(j + i * ofs) = Vj->E2(j + ofs + ofs * 2 * i); \
+ } \
+ } \
+}
+
+void HELPER(vexth_q_d)(void *vd, void *vj, uint32_t desc)
+{
+ int i;
+ VReg *Vd = (VReg *)vd;
+ VReg *Vj = (VReg *)vj;
+ int oprsz = simd_oprsz(desc);
+
+ for (i = 0; i < oprsz / 16; i++) {
+ Vd->Q(i) = int128_makes64(Vj->D(2 * i + 1));
+ }
+}
+
+void HELPER(vexth_qu_du)(void *vd, void *vj, uint32_t desc)
+{
+ int i;
+ VReg *Vd = (VReg *)vd;
+ VReg *Vj = (VReg *)vj;
+ int oprsz = simd_oprsz(desc);
+
+ for (i = 0; i < oprsz / 16; i++) {
+ Vd->Q(i) = int128_make64(Vj->UD(2 * i + 1));
+ }
+}
+
+VEXTH(vexth_h_b, 16, H, B)
+VEXTH(vexth_w_h, 32, W, H)
+VEXTH(vexth_d_w, 64, D, W)
+VEXTH(vexth_hu_bu, 16, UH, UB)
+VEXTH(vexth_wu_hu, 32, UW, UH)
+VEXTH(vexth_du_wu, 64, UD, UW)
+
+#define VEXT2XV(NAME, BIT, E1, E2) \
+void HELPER(NAME)(void *vd, void *vj, uint32_t desc) \
+{ \
+ int i; \
+ VReg temp = {}; \
+ VReg *Vd = (VReg *)vd; \
+ VReg *Vj = (VReg *)vj; \
+ int oprsz = simd_oprsz(desc); \
+ \
+ for (i = 0; i < oprsz / (BIT / 8); i++) { \
+ temp.E1(i) = Vj->E2(i); \
+ } \
+ *Vd = temp; \
+}
+
+VEXT2XV(vext2xv_h_b, 16, H, B)
+VEXT2XV(vext2xv_w_b, 32, W, B)
+VEXT2XV(vext2xv_d_b, 64, D, B)
+VEXT2XV(vext2xv_w_h, 32, W, H)
+VEXT2XV(vext2xv_d_h, 64, D, H)
+VEXT2XV(vext2xv_d_w, 64, D, W)
+VEXT2XV(vext2xv_hu_bu, 16, UH, UB)
+VEXT2XV(vext2xv_wu_bu, 32, UW, UB)
+VEXT2XV(vext2xv_du_bu, 64, UD, UB)
+VEXT2XV(vext2xv_wu_hu, 32, UW, UH)
+VEXT2XV(vext2xv_du_hu, 64, UD, UH)
+VEXT2XV(vext2xv_du_wu, 64, UD, UW)
+
+DO_3OP(vsigncov_b, 8, B, DO_SIGNCOV)
+DO_3OP(vsigncov_h, 16, H, DO_SIGNCOV)
+DO_3OP(vsigncov_w, 32, W, DO_SIGNCOV)
+DO_3OP(vsigncov_d, 64, D, DO_SIGNCOV)
+
+static uint64_t do_vmskltz_b(int64_t val)
+{
+ uint64_t m = 0x8080808080808080ULL;
+ uint64_t c = val & m;
+ c |= c << 7;
+ c |= c << 14;
+ c |= c << 28;
+ return c >> 56;
+}
+
+void HELPER(vmskltz_b)(void *vd, void *vj, uint32_t desc)
+{
+ int i;
+ uint16_t temp = 0;
+ VReg *Vd = (VReg *)vd;
+ VReg *Vj = (VReg *)vj;
+ int oprsz = simd_oprsz(desc);
+
+ for (i = 0; i < oprsz / 16; i++) {
+ temp = 0;
+ temp = do_vmskltz_b(Vj->D(2 * i));
+ temp |= (do_vmskltz_b(Vj->D(2 * i + 1)) << 8);
+ Vd->D(2 * i) = temp;
+ Vd->D(2 * i + 1) = 0;
+ }
+}
+
+static uint64_t do_vmskltz_h(int64_t val)
+{
+ uint64_t m = 0x8000800080008000ULL;
+ uint64_t c = val & m;
+ c |= c << 15;
+ c |= c << 30;
+ return c >> 60;
+}
+
+void HELPER(vmskltz_h)(void *vd, void *vj, uint32_t desc)
+{
+ int i;
+ uint16_t temp = 0;
+ VReg *Vd = (VReg *)vd;
+ VReg *Vj = (VReg *)vj;
+ int oprsz = simd_oprsz(desc);
+
+ for (i = 0; i < oprsz / 16; i++) {
+ temp = 0;
+ temp = do_vmskltz_h(Vj->D(2 * i));
+ temp |= (do_vmskltz_h(Vj->D(2 * i + 1)) << 4);
+ Vd->D(2 * i) = temp;
+ Vd->D(2 * i + 1) = 0;
+ }
+}
+
+static uint64_t do_vmskltz_w(int64_t val)
+{
+ uint64_t m = 0x8000000080000000ULL;
+ uint64_t c = val & m;
+ c |= c << 31;
+ return c >> 62;
+}
+
+void HELPER(vmskltz_w)(void *vd, void *vj, uint32_t desc)
+{
+ int i;
+ uint16_t temp = 0;
+ VReg *Vd = (VReg *)vd;
+ VReg *Vj = (VReg *)vj;
+ int oprsz = simd_oprsz(desc);
+
+ for (i = 0; i < oprsz / 16; i++) {
+ temp = 0;
+ temp = do_vmskltz_w(Vj->D(2 * i));
+ temp |= (do_vmskltz_w(Vj->D(2 * i + 1)) << 2);
+ Vd->D(2 * i) = temp;
+ Vd->D(2 * i + 1) = 0;
+ }
+}
+
+static uint64_t do_vmskltz_d(int64_t val)
+{
+ return (uint64_t)val >> 63;
+}
+void HELPER(vmskltz_d)(void *vd, void *vj, uint32_t desc)
+{
+ int i;
+ uint16_t temp = 0;
+ VReg *Vd = (VReg *)vd;
+ VReg *Vj = (VReg *)vj;
+ int oprsz = simd_oprsz(desc);
+
+ for (i = 0; i < oprsz / 16; i++) {
+ temp = 0;
+ temp = do_vmskltz_d(Vj->D(2 * i));
+ temp |= (do_vmskltz_d(Vj->D(2 * i + 1)) << 1);
+ Vd->D(2 * i) = temp;
+ Vd->D(2 * i + 1) = 0;
+ }
+}
+
+void HELPER(vmskgez_b)(void *vd, void *vj, uint32_t desc)
+{
+ int i;
+ uint16_t temp = 0;
+ VReg *Vd = (VReg *)vd;
+ VReg *Vj = (VReg *)vj;
+ int oprsz = simd_oprsz(desc);
+
+ for (i = 0; i < oprsz / 16; i++) {
+ temp = 0;
+ temp = do_vmskltz_b(Vj->D(2 * i));
+ temp |= (do_vmskltz_b(Vj->D(2 * i + 1)) << 8);
+ Vd->D(2 * i) = (uint16_t)(~temp);
+ Vd->D(2 * i + 1) = 0;
+ }
+}
+
+static uint64_t do_vmskez_b(uint64_t a)
+{
+ uint64_t m = 0x7f7f7f7f7f7f7f7fULL;
+ uint64_t c = ~(((a & m) + m) | a | m);
+ c |= c << 7;
+ c |= c << 14;
+ c |= c << 28;
+ return c >> 56;
+}
+
+void HELPER(vmsknz_b)(void *vd, void *vj, uint32_t desc)
+{
+ int i;
+ uint16_t temp = 0;
+ VReg *Vd = (VReg *)vd;
+ VReg *Vj = (VReg *)vj;
+ int oprsz = simd_oprsz(desc);
+
+ for (i = 0; i < oprsz / 16; i++) {
+ temp = 0;
+ temp = do_vmskez_b(Vj->D(2 * i));
+ temp |= (do_vmskez_b(Vj->D(2 * i + 1)) << 8);
+ Vd->D(2 * i) = (uint16_t)(~temp);
+ Vd->D(2 * i + 1) = 0;
+ }
+}
+
+void HELPER(vnori_b)(void *vd, void *vj, uint64_t imm, uint32_t desc)
+{
+ int i;
+ VReg *Vd = (VReg *)vd;
+ VReg *Vj = (VReg *)vj;
+
+ for (i = 0; i < simd_oprsz(desc); i++) {
+ Vd->B(i) = ~(Vj->B(i) | (uint8_t)imm);
+ }
+}
+
+#define VSLLWIL(NAME, BIT, E1, E2) \
+void HELPER(NAME)(void *vd, void *vj, uint64_t imm, uint32_t desc) \
+{ \
+ int i, j, ofs; \
+ VReg temp = {}; \
+ VReg *Vd = (VReg *)vd; \
+ VReg *Vj = (VReg *)vj; \
+ int oprsz = simd_oprsz(desc); \
+ typedef __typeof(temp.E1(0)) TD; \
+ \
+ ofs = LSX_LEN / BIT; \
+ for (i = 0; i < oprsz / 16; i++) { \
+ for (j = 0; j < ofs; j++) { \
+ temp.E1(j + ofs * i) = (TD)Vj->E2(j + ofs * 2 * i) << (imm % BIT); \
+ } \
+ } \
+ *Vd = temp; \
+}
+
+
+void HELPER(vextl_q_d)(void *vd, void *vj, uint32_t desc)
+{
+ int i;
+ VReg *Vd = (VReg *)vd;
+ VReg *Vj = (VReg *)vj;
+ int oprsz = simd_oprsz(desc);
+
+ for (i = 0; i < oprsz / 16; i++) {
+ Vd->Q(i) = int128_makes64(Vj->D(2 * i));
+ }
+}
+
+void HELPER(vextl_qu_du)(void *vd, void *vj, uint32_t desc)
+{
+ int i;
+ VReg *Vd = (VReg *)vd;
+ VReg *Vj = (VReg *)vj;
+ int oprsz = simd_oprsz(desc);
+
+ for (i = 0; i < oprsz / 16; i++) {
+ Vd->Q(i) = int128_make64(Vj->UD(2 * i));
+ }
+}
+
+VSLLWIL(vsllwil_h_b, 16, H, B)
+VSLLWIL(vsllwil_w_h, 32, W, H)
+VSLLWIL(vsllwil_d_w, 64, D, W)
+VSLLWIL(vsllwil_hu_bu, 16, UH, UB)
+VSLLWIL(vsllwil_wu_hu, 32, UW, UH)
+VSLLWIL(vsllwil_du_wu, 64, UD, UW)
+
+#define do_vsrlr(E, T) \
+static T do_vsrlr_ ##E(T s1, int sh) \
+{ \
+ if (sh == 0) { \
+ return s1; \
+ } else { \
+ return (s1 >> sh) + ((s1 >> (sh - 1)) & 0x1); \
+ } \
+}
+
+do_vsrlr(B, uint8_t)
+do_vsrlr(H, uint16_t)
+do_vsrlr(W, uint32_t)
+do_vsrlr(D, uint64_t)
+
+#define VSRLR(NAME, BIT, T, E) \
+void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t desc) \
+{ \
+ int i; \
+ VReg *Vd = (VReg *)vd; \
+ VReg *Vj = (VReg *)vj; \
+ VReg *Vk = (VReg *)vk; \
+ int oprsz = simd_oprsz(desc); \
+ \
+ for (i = 0; i < oprsz / (BIT / 8); i++) { \
+ Vd->E(i) = do_vsrlr_ ## E(Vj->E(i), ((T)Vk->E(i))%BIT); \
+ } \
+}
+
+VSRLR(vsrlr_b, 8, uint8_t, B)
+VSRLR(vsrlr_h, 16, uint16_t, H)
+VSRLR(vsrlr_w, 32, uint32_t, W)
+VSRLR(vsrlr_d, 64, uint64_t, D)
+
+#define VSRLRI(NAME, BIT, E) \
+void HELPER(NAME)(void *vd, void *vj, uint64_t imm, uint32_t desc) \
+{ \
+ int i; \
+ VReg *Vd = (VReg *)vd; \
+ VReg *Vj = (VReg *)vj; \
+ int oprsz = simd_oprsz(desc); \
+ \
+ for (i = 0; i < oprsz / (BIT / 8); i++) { \
+ Vd->E(i) = do_vsrlr_ ## E(Vj->E(i), imm); \
+ } \
+}
+
+VSRLRI(vsrlri_b, 8, B)
+VSRLRI(vsrlri_h, 16, H)
+VSRLRI(vsrlri_w, 32, W)
+VSRLRI(vsrlri_d, 64, D)
+
+#define do_vsrar(E, T) \
+static T do_vsrar_ ##E(T s1, int sh) \
+{ \
+ if (sh == 0) { \
+ return s1; \
+ } else { \
+ return (s1 >> sh) + ((s1 >> (sh - 1)) & 0x1); \
+ } \
+}
+
+do_vsrar(B, int8_t)
+do_vsrar(H, int16_t)
+do_vsrar(W, int32_t)
+do_vsrar(D, int64_t)
+
+#define VSRAR(NAME, BIT, T, E) \
+void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t desc) \
+{ \
+ int i; \
+ VReg *Vd = (VReg *)vd; \
+ VReg *Vj = (VReg *)vj; \
+ VReg *Vk = (VReg *)vk; \
+ int oprsz = simd_oprsz(desc); \
+ \
+ for (i = 0; i < oprsz / (BIT / 8); i++) { \
+ Vd->E(i) = do_vsrar_ ## E(Vj->E(i), ((T)Vk->E(i))%BIT); \
+ } \
+}
+
+VSRAR(vsrar_b, 8, uint8_t, B)
+VSRAR(vsrar_h, 16, uint16_t, H)
+VSRAR(vsrar_w, 32, uint32_t, W)
+VSRAR(vsrar_d, 64, uint64_t, D)
+
+#define VSRARI(NAME, BIT, E) \
+void HELPER(NAME)(void *vd, void *vj, uint64_t imm, uint32_t desc) \
+{ \
+ int i; \
+ VReg *Vd = (VReg *)vd; \
+ VReg *Vj = (VReg *)vj; \
+ int oprsz = simd_oprsz(desc); \
+ \
+ for (i = 0; i < oprsz / (BIT / 8); i++) { \
+ Vd->E(i) = do_vsrar_ ## E(Vj->E(i), imm); \
+ } \
+}
+
+VSRARI(vsrari_b, 8, B)
+VSRARI(vsrari_h, 16, H)
+VSRARI(vsrari_w, 32, W)
+VSRARI(vsrari_d, 64, D)
+
+#define VSRLN(NAME, BIT, E1, E2) \
+void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t desc) \
+{ \
+ int i, j, ofs; \
+ VReg *Vd = (VReg *)vd; \
+ VReg *Vj = (VReg *)vj; \
+ VReg *Vk = (VReg *)vk; \
+ int oprsz = simd_oprsz(desc); \
+ \
+ ofs = LSX_LEN / BIT; \
+ for (i = 0; i < oprsz / 16; i++) { \
+ for (j = 0; j < ofs; j++) { \
+ Vd->E1(j + ofs * 2 * i) = R_SHIFT(Vj->E2(j + ofs * i), \
+ Vk->E2(j + ofs * i) % BIT); \
+ } \
+ Vd->D(2 * i + 1) = 0; \
+ } \
+}
+
+VSRLN(vsrln_b_h, 16, B, UH)
+VSRLN(vsrln_h_w, 32, H, UW)
+VSRLN(vsrln_w_d, 64, W, UD)
+
+#define VSRAN(NAME, BIT, E1, E2, E3) \
+void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t desc) \
+{ \
+ int i, j, ofs; \
+ VReg *Vd = (VReg *)vd; \
+ VReg *Vj = (VReg *)vj; \
+ VReg *Vk = (VReg *)vk; \
+ int oprsz = simd_oprsz(desc); \
+ \
+ ofs = LSX_LEN / BIT; \
+ for (i = 0; i < oprsz / 16; i++) { \
+ for (j = 0; j < ofs; j++) { \
+ Vd->E1(j + ofs * 2 * i) = R_SHIFT(Vj->E2(j + ofs * i), \
+ Vk->E3(j + ofs * i) % BIT); \
+ } \
+ Vd->D(2 * i + 1) = 0; \
+ } \
+}
+
+VSRAN(vsran_b_h, 16, B, H, UH)
+VSRAN(vsran_h_w, 32, H, W, UW)
+VSRAN(vsran_w_d, 64, W, D, UD)
+
+#define VSRLNI(NAME, BIT, E1, E2) \
+void HELPER(NAME)(void *vd, void *vj, uint64_t imm, uint32_t desc) \
+{ \
+ int i, j, ofs; \
+ VReg temp = {}; \
+ VReg *Vd = (VReg *)vd; \
+ VReg *Vj = (VReg *)vj; \
+ int oprsz = simd_oprsz(desc); \
+ \
+ ofs = LSX_LEN / BIT; \
+ for (i = 0; i < oprsz / 16; i++) { \
+ for (j = 0; j < ofs; j++) { \
+ temp.E1(j + ofs * 2 * i) = R_SHIFT(Vj->E2(j + ofs * i), imm); \
+ temp.E1(j + ofs * (2 * i + 1)) = R_SHIFT(Vd->E2(j + ofs * i), \
+ imm); \
+ } \
+ } \
+ *Vd = temp; \
+}
+
+void HELPER(vsrlni_d_q)(void *vd, void *vj, uint64_t imm, uint32_t desc)
+{
+ int i;
+ VReg temp = {};
+ VReg *Vd = (VReg *)vd;
+ VReg *Vj = (VReg *)vj;
+
+ for (i = 0; i < 2; i++) {
+ temp.D(2 * i) = int128_getlo(int128_urshift(Vj->Q(i), imm % 128));
+ temp.D(2 * i +1) = int128_getlo(int128_urshift(Vd->Q(i), imm % 128));
+ }
+ *Vd = temp;
+}
+
+VSRLNI(vsrlni_b_h, 16, B, UH)
+VSRLNI(vsrlni_h_w, 32, H, UW)
+VSRLNI(vsrlni_w_d, 64, W, UD)
+
+#define VSRANI(NAME, BIT, E1, E2) \
+void HELPER(NAME)(void *vd, void *vj, uint64_t imm, uint32_t desc) \
+{ \
+ int i, j, ofs; \
+ VReg temp = {}; \
+ VReg *Vd = (VReg *)vd; \
+ VReg *Vj = (VReg *)vj; \
+ int oprsz = simd_oprsz(desc); \
+ \
+ ofs = LSX_LEN / BIT; \
+ for (i = 0; i < oprsz / 16; i++) { \
+ for (j = 0; j < ofs; j++) { \
+ temp.E1(j + ofs * 2 * i) = R_SHIFT(Vj->E2(j + ofs * i), imm); \
+ temp.E1(j + ofs * (2 * i + 1)) = R_SHIFT(Vd->E2(j + ofs * i), \
+ imm); \
+ } \
+ } \
+ *Vd = temp; \
+}
+
+void HELPER(vsrani_d_q)(void *vd, void *vj, uint64_t imm, uint32_t desc)
+{
+ int i;
+ VReg temp = {};
+ VReg *Vd = (VReg *)vd;
+ VReg *Vj = (VReg *)vj;
+
+ for (i = 0; i < 2; i++) {
+ temp.D(2 * i) = int128_getlo(int128_rshift(Vj->Q(i), imm % 128));
+ temp.D(2 * i + 1) = int128_getlo(int128_rshift(Vd->Q(i), imm % 128));
+ }
+ *Vd = temp;
+}
+
+VSRANI(vsrani_b_h, 16, B, H)
+VSRANI(vsrani_h_w, 32, H, W)
+VSRANI(vsrani_w_d, 64, W, D)
+
+#define VSRLRN(NAME, BIT, E1, E2, E3) \
+void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t desc) \
+{ \
+ int i, j, ofs; \
+ VReg *Vd = (VReg *)vd; \
+ VReg *Vj = (VReg *)vj; \
+ VReg *Vk = (VReg *)vk; \
+ int oprsz = simd_oprsz(desc); \
+ \
+ ofs = LSX_LEN / BIT; \
+ for (i = 0; i < oprsz / 16; i++) { \
+ for (j = 0; j < ofs; j++) { \
+ Vd->E1(j + ofs * 2 * i) = do_vsrlr_ ##E2(Vj->E2(j + ofs * i), \
+ Vk->E3(j + ofs * i) % BIT); \
+ } \
+ Vd->D(2 * i + 1) = 0; \
+ } \
+}
+
+VSRLRN(vsrlrn_b_h, 16, B, H, UH)
+VSRLRN(vsrlrn_h_w, 32, H, W, UW)
+VSRLRN(vsrlrn_w_d, 64, W, D, UD)
+
+#define VSRARN(NAME, BIT, E1, E2, E3) \
+void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t desc) \
+{ \
+ int i, j, ofs; \
+ VReg *Vd = (VReg *)vd; \
+ VReg *Vj = (VReg *)vj; \
+ VReg *Vk = (VReg *)vk; \
+ int oprsz = simd_oprsz(desc); \
+ \
+ ofs = LSX_LEN / BIT; \
+ for (i = 0; i < oprsz / 16; i++) { \
+ for (j = 0; j < ofs; j++) { \
+ Vd->E1(j + ofs * 2 * i) = do_vsrar_ ## E2(Vj->E2(j + ofs * i), \
+ Vk->E3(j + ofs * i) % BIT); \
+ } \
+ Vd->D(2 * i + 1) = 0; \
+ } \
+}
+
+VSRARN(vsrarn_b_h, 16, B, H, UH)
+VSRARN(vsrarn_h_w, 32, H, W, UW)
+VSRARN(vsrarn_w_d, 64, W, D, UD)
+
+#define VSRLRNI(NAME, BIT, E1, E2) \
+void HELPER(NAME)(void *vd, void *vj, uint64_t imm, uint32_t desc) \
+{ \
+ int i, j, ofs; \
+ VReg temp = {}; \
+ VReg *Vd = (VReg *)vd; \
+ VReg *Vj = (VReg *)vj; \
+ int oprsz = simd_oprsz(desc); \
+ \
+ ofs = LSX_LEN / BIT; \
+ for (i = 0; i < oprsz / 16; i++) { \
+ for (j = 0; j < ofs; j++) { \
+ temp.E1(j + ofs * 2 * i) = do_vsrlr_ ## E2(Vj->E2(j + ofs * i), imm); \
+ temp.E1(j + ofs * (2 * i + 1)) = do_vsrlr_ ## E2(Vd->E2(j + ofs * i), \
+ imm); \
+ } \
+ } \
+ *Vd = temp; \
+}
+
+void HELPER(vsrlrni_d_q)(void *vd, void *vj, uint64_t imm, uint32_t desc)
+{
+ int i;
+ VReg temp = {};
+ VReg *Vd = (VReg *)vd;
+ VReg *Vj = (VReg *)vj;
+ Int128 r[4];
+ int oprsz = simd_oprsz(desc);
+
+ for (i = 0; i < oprsz / 16; i++) {
+ if (imm == 0) {
+ temp.D(2 * i) = int128_getlo(Vj->Q(i));
+ temp.D(2 * i + 1) = int128_getlo(Vd->Q(i));
+ } else {
+ r[2 * i] = int128_and(int128_urshift(Vj->Q(i), (imm - 1)),
+ int128_one());
+ r[2 * i + 1] = int128_and(int128_urshift(Vd->Q(i), (imm - 1)),
+ int128_one());
+ temp.D(2 * i) = int128_getlo(int128_add(int128_urshift(Vj->Q(i),
+ imm), r[2 * i]));
+ temp.D(2 * i + 1) = int128_getlo(int128_add(int128_urshift(Vd->Q(i),
+ imm), r[ 2 * i + 1]));
+ }
+ }
+ *Vd = temp;
+}
+
+VSRLRNI(vsrlrni_b_h, 16, B, H)
+VSRLRNI(vsrlrni_h_w, 32, H, W)
+VSRLRNI(vsrlrni_w_d, 64, W, D)
+
+#define VSRARNI(NAME, BIT, E1, E2) \
+void HELPER(NAME)(void *vd, void *vj, uint64_t imm, uint32_t desc) \
+{ \
+ int i, j, ofs; \
+ VReg temp = {}; \
+ VReg *Vd = (VReg *)vd; \
+ VReg *Vj = (VReg *)vj; \
+ int oprsz = simd_oprsz(desc); \
+ \
+ ofs = LSX_LEN / BIT; \
+ for (i = 0; i < oprsz / 16; i++) { \
+ for (j = 0; j < ofs; j++) { \
+ temp.E1(j + ofs * 2 * i) = do_vsrar_ ## E2(Vj->E2(j + ofs * i), imm); \
+ temp.E1(j + ofs * (2 * i + 1)) = do_vsrar_ ## E2(Vd->E2(j + ofs * i), \
+ imm); \
+ } \
+ } \
+ *Vd = temp; \
+}
+
+void HELPER(vsrarni_d_q)(void *vd, void *vj, uint64_t imm, uint32_t desc)
+{
+ int i;
+ VReg temp = {};
+ VReg *Vd = (VReg *)vd;
+ VReg *Vj = (VReg *)vj;
+ Int128 r[4];
+ int oprsz = simd_oprsz(desc);
+
+ for (i = 0; i < oprsz / 16; i++) {
+ if (imm == 0) {
+ temp.D(2 * i) = int128_getlo(Vj->Q(i));
+ temp.D(2 * i + 1) = int128_getlo(Vd->Q(i));
+ } else {
+ r[2 * i] = int128_and(int128_rshift(Vj->Q(i), (imm - 1)),
+ int128_one());
+ r[2 * i + 1] = int128_and(int128_rshift(Vd->Q(i), (imm - 1)),
+ int128_one());
+ temp.D(2 * i) = int128_getlo(int128_add(int128_rshift(Vj->Q(i),
+ imm), r[2 * i]));
+ temp.D(2 * i + 1) = int128_getlo(int128_add(int128_rshift(Vd->Q(i),
+ imm), r[2 * i + 1]));
+ }
+ }
+ *Vd = temp;
+}
+
+VSRARNI(vsrarni_b_h, 16, B, H)
+VSRARNI(vsrarni_h_w, 32, H, W)
+VSRARNI(vsrarni_w_d, 64, W, D)
+
+#define SSRLNS(NAME, T1, T2, T3) \
+static T1 do_ssrlns_ ## NAME(T2 e2, int sa, int sh) \
+{ \
+ T1 shft_res; \
+ if (sa == 0) { \
+ shft_res = e2; \
+ } else { \
+ shft_res = (((T1)e2) >> sa); \
+ } \
+ T3 mask; \
+ mask = (1ull << sh) -1; \
+ if (shft_res > mask) { \
+ return mask; \
+ } else { \
+ return shft_res; \
+ } \
+}
+
+SSRLNS(B, uint16_t, int16_t, uint8_t)
+SSRLNS(H, uint32_t, int32_t, uint16_t)
+SSRLNS(W, uint64_t, int64_t, uint32_t)
+
+#define VSSRLN(NAME, BIT, E1, E2, E3) \
+void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t desc) \
+{ \
+ int i, j, ofs; \
+ VReg *Vd = (VReg *)vd; \
+ VReg *Vj = (VReg *)vj; \
+ VReg *Vk = (VReg *)vk; \
+ int oprsz = simd_oprsz(desc); \
+ \
+ ofs = LSX_LEN / BIT; \
+ for (i = 0; i < oprsz / 16; i++) { \
+ for (j = 0; j < ofs; j++) { \
+ Vd->E1(j + ofs * 2 * i) = do_ssrlns_ ## E1(Vj->E2(j + ofs * i), \
+ Vk->E3(j + ofs * i) % BIT, \
+ BIT / 2 - 1); \
+ } \
+ Vd->D(2 * i + 1) = 0; \
+ } \
+}
+
+VSSRLN(vssrln_b_h, 16, B, H, UH)
+VSSRLN(vssrln_h_w, 32, H, W, UW)
+VSSRLN(vssrln_w_d, 64, W, D, UD)
+
+#define SSRANS(E, T1, T2) \
+static T1 do_ssrans_ ## E(T1 e2, int sa, int sh) \
+{ \
+ T1 shft_res; \
+ if (sa == 0) { \
+ shft_res = e2; \
+ } else { \
+ shft_res = e2 >> sa; \
+ } \
+ T2 mask; \
+ mask = (1ll << sh) - 1; \
+ if (shft_res > mask) { \
+ return mask; \
+ } else if (shft_res < -(mask + 1)) { \
+ return ~mask; \
+ } else { \
+ return shft_res; \
+ } \
+}
+
+SSRANS(B, int16_t, int8_t)
+SSRANS(H, int32_t, int16_t)
+SSRANS(W, int64_t, int32_t)
+
+#define VSSRAN(NAME, BIT, E1, E2, E3) \
+void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t desc) \
+{ \
+ int i, j, ofs; \
+ VReg *Vd = (VReg *)vd; \
+ VReg *Vj = (VReg *)vj; \
+ VReg *Vk = (VReg *)vk; \
+ int oprsz = simd_oprsz(desc); \
+ \
+ ofs = LSX_LEN / BIT; \
+ for (i = 0; i < oprsz / 16; i++) { \
+ for (j = 0; j < ofs; j++) { \
+ Vd->E1(j + ofs * 2 * i) = do_ssrans_ ## E1(Vj->E2(j + ofs * i), \
+ Vk->E3(j + ofs * i) % BIT, \
+ BIT / 2 - 1); \
+ } \
+ Vd->D(2 * i + 1) = 0; \
+ } \
+}
+
+VSSRAN(vssran_b_h, 16, B, H, UH)
+VSSRAN(vssran_h_w, 32, H, W, UW)
+VSSRAN(vssran_w_d, 64, W, D, UD)
+
+#define SSRLNU(E, T1, T2, T3) \
+static T1 do_ssrlnu_ ## E(T3 e2, int sa, int sh) \
+{ \
+ T1 shft_res; \
+ if (sa == 0) { \
+ shft_res = e2; \
+ } else { \
+ shft_res = (((T1)e2) >> sa); \
+ } \
+ T2 mask; \
+ mask = (1ull << sh) - 1; \
+ if (shft_res > mask) { \
+ return mask; \
+ } else { \
+ return shft_res; \
+ } \
+}
+
+SSRLNU(B, uint16_t, uint8_t, int16_t)
+SSRLNU(H, uint32_t, uint16_t, int32_t)
+SSRLNU(W, uint64_t, uint32_t, int64_t)
+
+#define VSSRLNU(NAME, BIT, E1, E2, E3) \
+void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t desc) \
+{ \
+ int i, j, ofs; \
+ VReg *Vd = (VReg *)vd; \
+ VReg *Vj = (VReg *)vj; \
+ VReg *Vk = (VReg *)vk; \
+ int oprsz = simd_oprsz(desc); \
+ \
+ ofs = LSX_LEN / BIT; \
+ for (i = 0; i < oprsz / 16; i++) { \
+ for (j = 0; j < ofs; j++) { \
+ Vd->E1(j + ofs * 2 * i) = do_ssrlnu_ ## E1(Vj->E2(j + ofs * i), \
+ Vk->E3(j + ofs * i) % BIT, \
+ BIT / 2); \
+ } \
+ Vd->D(2 * i + 1) = 0; \
+ } \
+}
+
+VSSRLNU(vssrln_bu_h, 16, B, H, UH)
+VSSRLNU(vssrln_hu_w, 32, H, W, UW)
+VSSRLNU(vssrln_wu_d, 64, W, D, UD)
+
+#define SSRANU(E, T1, T2, T3) \
+static T1 do_ssranu_ ## E(T3 e2, int sa, int sh) \
+{ \
+ T1 shft_res; \
+ if (sa == 0) { \
+ shft_res = e2; \
+ } else { \
+ shft_res = e2 >> sa; \
+ } \
+ if (e2 < 0) { \
+ shft_res = 0; \
+ } \
+ T2 mask; \
+ mask = (1ull << sh) - 1; \
+ if (shft_res > mask) { \
+ return mask; \
+ } else { \
+ return shft_res; \
+ } \
+}
+
+SSRANU(B, uint16_t, uint8_t, int16_t)
+SSRANU(H, uint32_t, uint16_t, int32_t)
+SSRANU(W, uint64_t, uint32_t, int64_t)
+
+#define VSSRANU(NAME, BIT, E1, E2, E3) \
+void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t desc) \
+{ \
+ int i, j, ofs; \
+ VReg *Vd = (VReg *)vd; \
+ VReg *Vj = (VReg *)vj; \
+ VReg *Vk = (VReg *)vk; \
+ int oprsz = simd_oprsz(desc); \
+ \
+ ofs = LSX_LEN / BIT; \
+ for (i = 0; i < oprsz / 16; i++) { \
+ for (j = 0; j < ofs; j++) { \
+ Vd->E1(j + ofs * 2 * i) = do_ssranu_ ## E1(Vj->E2(j + ofs * i), \
+ Vk->E3(j + ofs * i) % BIT, \
+ BIT / 2); \
+ } \
+ Vd->D(2 * i + 1) = 0; \
+ } \
+}
+
+VSSRANU(vssran_bu_h, 16, B, H, UH)
+VSSRANU(vssran_hu_w, 32, H, W, UW)
+VSSRANU(vssran_wu_d, 64, W, D, UD)
+
+#define VSSRLNI(NAME, BIT, E1, E2) \
+void HELPER(NAME)(void *vd, void *vj, uint64_t imm, uint32_t desc) \
+{ \
+ int i, j, ofs; \
+ VReg temp = {}; \
+ VReg *Vd = (VReg *)vd; \
+ VReg *Vj = (VReg *)vj; \
+ int oprsz = simd_oprsz(desc); \
+ \
+ ofs = LSX_LEN / BIT; \
+ for (i = 0; i < oprsz / 16; i++) { \
+ for (j = 0; j < ofs; j++) { \
+ temp.E1(j + ofs * 2 * i) = do_ssrlns_ ## E1(Vj->E2(j + ofs * i), \
+ imm, BIT / 2 - 1); \
+ temp.E1(j + ofs * (2 * i + 1)) = do_ssrlns_ ## E1(Vd->E2(j + ofs * i), \
+ imm, BIT / 2 - 1); \
+ } \
+ } \
+ *Vd = temp; \
+}
+
+static void do_vssrlni_q(VReg *Vd, VReg *Vj,
+ uint64_t imm, int idx, Int128 mask)
+{
+ Int128 shft_res1, shft_res2;
+
+ if (imm == 0) {
+ shft_res1 = Vj->Q(idx);
+ shft_res2 = Vd->Q(idx);
+ } else {
+ shft_res1 = int128_urshift(Vj->Q(idx), imm);
+ shft_res2 = int128_urshift(Vd->Q(idx), imm);
+ }
+
+ if (int128_ult(mask, shft_res1)) {
+ Vd->D(idx * 2) = int128_getlo(mask);
+ }else {
+ Vd->D(idx * 2) = int128_getlo(shft_res1);
+ }
+
+ if (int128_ult(mask, shft_res2)) {
+ Vd->D(idx * 2 + 1) = int128_getlo(mask);
+ }else {
+ Vd->D(idx * 2 + 1) = int128_getlo(shft_res2);
+ }
+}
+
+void HELPER(vssrlni_d_q)(void *vd, void *vj, uint64_t imm, uint32_t desc)
+{
+ int i;
+ Int128 mask;
+ VReg *Vd = (VReg *)vd;
+ VReg *Vj = (VReg *)vj;
+ int oprsz = simd_oprsz(desc);
+
+ mask = int128_sub(int128_lshift(int128_one(), 63), int128_one());
+
+ for (i = 0; i < oprsz / 16; i++) {
+ do_vssrlni_q(Vd, Vj, imm, i, mask);
+ }
+}
+
+VSSRLNI(vssrlni_b_h, 16, B, H)
+VSSRLNI(vssrlni_h_w, 32, H, W)
+VSSRLNI(vssrlni_w_d, 64, W, D)
+
+#define VSSRANI(NAME, BIT, E1, E2) \
+void HELPER(NAME)(void *vd, void *vj, uint64_t imm, uint32_t desc) \
+{ \
+ int i, j, ofs; \
+ VReg temp = {}; \
+ VReg *Vd = (VReg *)vd; \
+ VReg *Vj = (VReg *)vj; \
+ int oprsz = simd_oprsz(desc); \
+ \
+ ofs = LSX_LEN / BIT; \
+ for (i = 0; i < oprsz / 16; i++) { \
+ for (j = 0; j < ofs; j++) { \
+ temp.E1(j + ofs * 2 * i) = do_ssrans_ ## E1(Vj->E2(j + ofs * i), \
+ imm, BIT / 2 - 1); \
+ temp.E1(j + ofs * (2 * i + 1)) = do_ssrans_ ## E1(Vd->E2(j + ofs * i), \
+ imm, BIT / 2 - 1); \
+ } \
+ } \
+ *Vd = temp; \
+}
+
+static void do_vssrani_d_q(VReg *Vd, VReg *Vj,
+ uint64_t imm, int idx, Int128 mask, Int128 min)
+{
+ Int128 shft_res1, shft_res2;
+
+ if (imm == 0) {
+ shft_res1 = Vj->Q(idx);
+ shft_res2 = Vd->Q(idx);
+ } else {
+ shft_res1 = int128_rshift(Vj->Q(idx), imm);
+ shft_res2 = int128_rshift(Vd->Q(idx), imm);
+ }
+
+ if (int128_gt(shft_res1, mask)) {
+ Vd->D(idx * 2) = int128_getlo(mask);
+ } else if (int128_lt(shft_res1, int128_neg(min))) {
+ Vd->D(idx * 2) = int128_getlo(min);
+ } else {
+ Vd->D(idx * 2) = int128_getlo(shft_res1);
+ }
+
+ if (int128_gt(shft_res2, mask)) {
+ Vd->D(idx * 2 + 1) = int128_getlo(mask);
+ } else if (int128_lt(shft_res2, int128_neg(min))) {
+ Vd->D(idx * 2 + 1) = int128_getlo(min);
+ } else {
+ Vd->D(idx * 2 + 1) = int128_getlo(shft_res2);
+ }
+}
+
+void HELPER(vssrani_d_q)(void *vd, void *vj, uint64_t imm, uint32_t desc)
+{
+ int i;
+ Int128 mask, min;
+ VReg *Vd = (VReg *)vd;
+ VReg *Vj = (VReg *)vj;
+ int oprsz = simd_oprsz(desc);
+
+ mask = int128_sub(int128_lshift(int128_one(), 63), int128_one());
+ min = int128_lshift(int128_one(), 63);
+
+ for (i = 0; i < oprsz / 16; i++) {
+ do_vssrani_d_q(Vd, Vj, imm, i, mask, min);
+ }
+}
+
+
+VSSRANI(vssrani_b_h, 16, B, H)
+VSSRANI(vssrani_h_w, 32, H, W)
+VSSRANI(vssrani_w_d, 64, W, D)
+
+#define VSSRLNUI(NAME, BIT, E1, E2) \
+void HELPER(NAME)(void *vd, void *vj, uint64_t imm, uint32_t desc) \
+{ \
+ int i, j, ofs; \
+ VReg temp = {}; \
+ VReg *Vd = (VReg *)vd; \
+ VReg *Vj = (VReg *)vj; \
+ int oprsz = simd_oprsz(desc); \
+ \
+ ofs = LSX_LEN / BIT; \
+ for (i = 0; i < oprsz / 16; i++) { \
+ for (j = 0; j < ofs; j++) { \
+ temp.E1(j + ofs * 2 * i) = do_ssrlnu_ ## E1(Vj->E2(j + ofs * i), \
+ imm, BIT / 2); \
+ temp.E1(j + ofs * (2 * i + 1)) = do_ssrlnu_ ## E1(Vd->E2(j + ofs * i), \
+ imm, BIT / 2); \
+ } \
+ } \
+ *Vd = temp; \
+}
+
+void HELPER(vssrlni_du_q)(void *vd, void *vj, uint64_t imm, uint32_t desc)
+{
+ int i;
+ Int128 mask;
+ VReg *Vd = (VReg *)vd;
+ VReg *Vj = (VReg *)vj;
+ int oprsz = simd_oprsz(desc);
+
+ mask = int128_sub(int128_lshift(int128_one(), 64), int128_one());
+
+ for (i = 0; i < oprsz / 16; i++) {
+ do_vssrlni_q(Vd, Vj, imm, i, mask);
+ }
+}
+
+VSSRLNUI(vssrlni_bu_h, 16, B, H)
+VSSRLNUI(vssrlni_hu_w, 32, H, W)
+VSSRLNUI(vssrlni_wu_d, 64, W, D)
+
+#define VSSRANUI(NAME, BIT, E1, E2) \
+void HELPER(NAME)(void *vd, void *vj, uint64_t imm, uint32_t desc) \
+{ \
+ int i, j, ofs; \
+ VReg temp = {}; \
+ VReg *Vd = (VReg *)vd; \
+ VReg *Vj = (VReg *)vj; \
+ int oprsz = simd_oprsz(desc); \
+ \
+ ofs = LSX_LEN / BIT; \
+ for (i = 0; i < oprsz / 16; i++) { \
+ for (j = 0; j < ofs; j++) { \
+ temp.E1(j + ofs * 2 * i) = do_ssranu_ ## E1(Vj->E2(j + ofs * i), \
+ imm, BIT / 2); \
+ temp.E1(j + ofs * (2 * i + 1)) = do_ssranu_ ## E1(Vd->E2(j + ofs * i), \
+ imm, BIT / 2); \
+ } \
+ } \
+ *Vd = temp; \
+}
+
+static void do_vssrani_du_q(VReg *Vd, VReg *Vj,
+ uint64_t imm, int idx, Int128 mask)
+{
+ Int128 shft_res1, shft_res2;
+
+ if (imm == 0) {
+ shft_res1 = Vj->Q(idx);
+ shft_res2 = Vd->Q(idx);
+ } else {
+ shft_res1 = int128_rshift(Vj->Q(idx), imm);
+ shft_res2 = int128_rshift(Vd->Q(idx), imm);
+ }
+
+ if (int128_lt(Vj->Q(idx), int128_zero())) {
+ shft_res1 = int128_zero();
+ }
+
+ if (int128_lt(Vd->Q(idx), int128_zero())) {
+ shft_res2 = int128_zero();
+ }
+ if (int128_ult(mask, shft_res1)) {
+ Vd->D(idx * 2) = int128_getlo(mask);
+ }else {
+ Vd->D(idx * 2) = int128_getlo(shft_res1);
+ }
+
+ if (int128_ult(mask, shft_res2)) {
+ Vd->D(idx * 2 + 1) = int128_getlo(mask);
+ }else {
+ Vd->D(idx * 2 + 1) = int128_getlo(shft_res2);
+ }
+
+}
+
+void HELPER(vssrani_du_q)(void *vd, void *vj, uint64_t imm, uint32_t desc)
+{
+ int i;
+ Int128 mask;
+ VReg *Vd = (VReg *)vd;
+ VReg *Vj = (VReg *)vj;
+ int oprsz = simd_oprsz(desc);
+
+ mask = int128_sub(int128_lshift(int128_one(), 64), int128_one());
+
+ for (i = 0; i < oprsz / 16; i++) {
+ do_vssrani_du_q(Vd, Vj, imm, i, mask);
+ }
+}
+
+VSSRANUI(vssrani_bu_h, 16, B, H)
+VSSRANUI(vssrani_hu_w, 32, H, W)
+VSSRANUI(vssrani_wu_d, 64, W, D)
+
+#define SSRLRNS(E1, E2, T1, T2, T3) \
+static T1 do_ssrlrns_ ## E1(T2 e2, int sa, int sh) \
+{ \
+ T1 shft_res; \
+ \
+ shft_res = do_vsrlr_ ## E2(e2, sa); \
+ T1 mask; \
+ mask = (1ull << sh) - 1; \
+ if (shft_res > mask) { \
+ return mask; \
+ } else { \
+ return shft_res; \
+ } \
+}
+
+SSRLRNS(B, H, uint16_t, int16_t, uint8_t)
+SSRLRNS(H, W, uint32_t, int32_t, uint16_t)
+SSRLRNS(W, D, uint64_t, int64_t, uint32_t)
+
+#define VSSRLRN(NAME, BIT, E1, E2, E3) \
+void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t desc) \
+{ \
+ int i, j, ofs; \
+ VReg *Vd = (VReg *)vd; \
+ VReg *Vj = (VReg *)vj; \
+ VReg *Vk = (VReg *)vk; \
+ int oprsz = simd_oprsz(desc); \
+ \
+ ofs = LSX_LEN / BIT; \
+ for (i = 0; i < oprsz / 16; i++) { \
+ for (j = 0; j < ofs; j++) { \
+ Vd->E1(j + ofs * 2 * i) = do_ssrlrns_ ## E1(Vj->E2(j + ofs * i), \
+ Vk->E3(j + ofs * i) % BIT, \
+ BIT / 2 - 1); \
+ } \
+ Vd->D(2 * i + 1) = 0; \
+ } \
+}
+
+VSSRLRN(vssrlrn_b_h, 16, B, H, UH)
+VSSRLRN(vssrlrn_h_w, 32, H, W, UW)
+VSSRLRN(vssrlrn_w_d, 64, W, D, UD)
+
+#define SSRARNS(E1, E2, T1, T2) \
+static T1 do_ssrarns_ ## E1(T1 e2, int sa, int sh) \
+{ \
+ T1 shft_res; \
+ \
+ shft_res = do_vsrar_ ## E2(e2, sa); \
+ T2 mask; \
+ mask = (1ll << sh) - 1; \
+ if (shft_res > mask) { \
+ return mask; \
+ } else if (shft_res < -(mask +1)) { \
+ return ~mask; \
+ } else { \
+ return shft_res; \
+ } \
+}
+
+SSRARNS(B, H, int16_t, int8_t)
+SSRARNS(H, W, int32_t, int16_t)
+SSRARNS(W, D, int64_t, int32_t)
+
+#define VSSRARN(NAME, BIT, E1, E2, E3) \
+void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t desc) \
+{ \
+ int i, j, ofs; \
+ VReg *Vd = (VReg *)vd; \
+ VReg *Vj = (VReg *)vj; \
+ VReg *Vk = (VReg *)vk; \
+ int oprsz = simd_oprsz(desc); \
+ \
+ ofs = LSX_LEN / BIT; \
+ for (i = 0; i < oprsz / 16; i++) { \
+ for (j = 0; j < ofs; j++) { \
+ Vd->E1(j + ofs * 2 * i) = do_ssrarns_ ## E1(Vj->E2(j + ofs * i), \
+ Vk->E3(j + ofs * i) % BIT, \
+ BIT/ 2 - 1); \
+ } \
+ Vd->D(2 * i + 1) = 0; \
+ } \
+}
+
+VSSRARN(vssrarn_b_h, 16, B, H, UH)
+VSSRARN(vssrarn_h_w, 32, H, W, UW)
+VSSRARN(vssrarn_w_d, 64, W, D, UD)
+
+#define SSRLRNU(E1, E2, T1, T2, T3) \
+static T1 do_ssrlrnu_ ## E1(T3 e2, int sa, int sh) \
+{ \
+ T1 shft_res; \
+ \
+ shft_res = do_vsrlr_ ## E2(e2, sa); \
+ \
+ T2 mask; \
+ mask = (1ull << sh) - 1; \
+ if (shft_res > mask) { \
+ return mask; \
+ } else { \
+ return shft_res; \
+ } \
+}
+
+SSRLRNU(B, H, uint16_t, uint8_t, int16_t)
+SSRLRNU(H, W, uint32_t, uint16_t, int32_t)
+SSRLRNU(W, D, uint64_t, uint32_t, int64_t)
+
+#define VSSRLRNU(NAME, BIT, E1, E2, E3) \
+void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t desc) \
+{ \
+ int i, j, ofs; \
+ VReg *Vd = (VReg *)vd; \
+ VReg *Vj = (VReg *)vj; \
+ VReg *Vk = (VReg *)vk; \
+ int oprsz = simd_oprsz(desc); \
+ \
+ ofs = LSX_LEN / BIT; \
+ for (i = 0; i < oprsz / 16; i++) { \
+ for (j = 0; j < ofs; j++) { \
+ Vd->E1(j + ofs * 2 * i) = do_ssrlrnu_ ## E1(Vj->E2(j + ofs * i), \
+ Vk->E3(j + ofs * i) % BIT, \
+ BIT / 2); \
+ } \
+ Vd->D(2 * i + 1) = 0; \
+ } \
+}
+
+VSSRLRNU(vssrlrn_bu_h, 16, B, H, UH)
+VSSRLRNU(vssrlrn_hu_w, 32, H, W, UW)
+VSSRLRNU(vssrlrn_wu_d, 64, W, D, UD)
+
+#define SSRARNU(E1, E2, T1, T2, T3) \
+static T1 do_ssrarnu_ ## E1(T3 e2, int sa, int sh) \
+{ \
+ T1 shft_res; \
+ \
+ if (e2 < 0) { \
+ shft_res = 0; \
+ } else { \
+ shft_res = do_vsrar_ ## E2(e2, sa); \
+ } \
+ T2 mask; \
+ mask = (1ull << sh) - 1; \
+ if (shft_res > mask) { \
+ return mask; \
+ } else { \
+ return shft_res; \
+ } \
+}
+
+SSRARNU(B, H, uint16_t, uint8_t, int16_t)
+SSRARNU(H, W, uint32_t, uint16_t, int32_t)
+SSRARNU(W, D, uint64_t, uint32_t, int64_t)
+
+#define VSSRARNU(NAME, BIT, E1, E2, E3) \
+void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t desc) \
+{ \
+ int i, j, ofs; \
+ VReg *Vd = (VReg *)vd; \
+ VReg *Vj = (VReg *)vj; \
+ VReg *Vk = (VReg *)vk; \
+ int oprsz = simd_oprsz(desc); \
+ \
+ ofs = LSX_LEN / BIT; \
+ for (i = 0; i < oprsz / 16; i++) { \
+ for (j = 0; j < ofs; j++) { \
+ Vd->E1(j + ofs * 2 * i) = do_ssrarnu_ ## E1(Vj->E2(j + ofs * i), \
+ Vk->E3(j + ofs * i) % BIT, \
+ BIT / 2); \
+ } \
+ Vd->D(2 * i + 1) = 0; \
+ } \
+}
+
+VSSRARNU(vssrarn_bu_h, 16, B, H, UH)
+VSSRARNU(vssrarn_hu_w, 32, H, W, UW)
+VSSRARNU(vssrarn_wu_d, 64, W, D, UD)
+
+#define VSSRLRNI(NAME, BIT, E1, E2) \
+void HELPER(NAME)(void *vd, void *vj, uint64_t imm, uint32_t desc) \
+{ \
+ int i, j, ofs; \
+ VReg temp = {}; \
+ VReg *Vd = (VReg *)vd; \
+ VReg *Vj = (VReg *)vj; \
+ int oprsz = simd_oprsz(desc); \
+ \
+ ofs = LSX_LEN / BIT; \
+ for (i = 0; i < oprsz / 16; i++) { \
+ for (j = 0; j < ofs; j++) { \
+ temp.E1(j + ofs * 2 * i) = do_ssrlrns_ ## E1(Vj->E2(j + ofs * i), \
+ imm, BIT / 2 - 1); \
+ temp.E1(j + ofs * (2 * i + 1)) = do_ssrlrns_ ## E1(Vd->E2(j + ofs * i), \
+ imm, BIT / 2 - 1); \
+ } \
+ } \
+ *Vd = temp; \
+}
+
+static void do_vssrlrni_q(VReg *Vd, VReg * Vj,
+ uint64_t imm, int idx, Int128 mask)
+{
+ Int128 shft_res1, shft_res2, r1, r2;
+ if (imm == 0) {
+ shft_res1 = Vj->Q(idx);
+ shft_res2 = Vd->Q(idx);
+ } else {
+ r1 = int128_and(int128_urshift(Vj->Q(idx), (imm - 1)), int128_one());
+ r2 = int128_and(int128_urshift(Vd->Q(idx), (imm - 1)), int128_one());
+ shft_res1 = (int128_add(int128_urshift(Vj->Q(idx), imm), r1));
+ shft_res2 = (int128_add(int128_urshift(Vd->Q(idx), imm), r2));
+ }
+
+ if (int128_ult(mask, shft_res1)) {
+ Vd->D(idx * 2) = int128_getlo(mask);
+ }else {
+ Vd->D(idx * 2) = int128_getlo(shft_res1);
+ }
+
+ if (int128_ult(mask, shft_res2)) {
+ Vd->D(idx * 2 + 1) = int128_getlo(mask);
+ }else {
+ Vd->D(idx * 2 + 1) = int128_getlo(shft_res2);
+ }
+}
+
+void HELPER(vssrlrni_d_q)(void *vd, void *vj, uint64_t imm, uint32_t desc)
+{
+ int i;
+ Int128 mask;
+ VReg *Vd = (VReg *)vd;
+ VReg *Vj = (VReg *)vj;
+ int oprsz = simd_oprsz(desc);
+
+ mask = int128_sub(int128_lshift(int128_one(), 63), int128_one());
+
+ for (i = 0; i < oprsz / 16; i++) {
+ do_vssrlrni_q(Vd, Vj, imm, i, mask);
+ }
+}
+
+VSSRLRNI(vssrlrni_b_h, 16, B, H)
+VSSRLRNI(vssrlrni_h_w, 32, H, W)
+VSSRLRNI(vssrlrni_w_d, 64, W, D)
+
+#define VSSRARNI(NAME, BIT, E1, E2) \
+void HELPER(NAME)(void *vd, void *vj, uint64_t imm, uint32_t desc) \
+{ \
+ int i, j, ofs; \
+ VReg temp = {}; \
+ VReg *Vd = (VReg *)vd; \
+ VReg *Vj = (VReg *)vj; \
+ int oprsz = simd_oprsz(desc); \
+ \
+ ofs = LSX_LEN / BIT; \
+ for (i = 0; i < oprsz / 16; i++) { \
+ for (j = 0; j < ofs; j++) { \
+ temp.E1(j + ofs * 2 * i) = do_ssrarns_ ## E1(Vj->E2(j + ofs * i), \
+ imm, BIT / 2 - 1); \
+ temp.E1(j + ofs * (2 * i + 1)) = do_ssrarns_ ## E1(Vd->E2(j + ofs * i), \
+ imm, BIT / 2 - 1); \
+ } \
+ } \
+ *Vd = temp; \
+}
+
+static void do_vssrarni_d_q(VReg *Vd, VReg *Vj,
+ uint64_t imm, int idx, Int128 mask1, Int128 mask2)
+{
+ Int128 shft_res1, shft_res2, r1, r2;
+
+ if (imm == 0) {
+ shft_res1 = Vj->Q(idx);
+ shft_res2 = Vd->Q(idx);
+ } else {
+ r1 = int128_and(int128_rshift(Vj->Q(idx), (imm - 1)), int128_one());
+ r2 = int128_and(int128_rshift(Vd->Q(idx), (imm - 1)), int128_one());
+ shft_res1 = int128_add(int128_rshift(Vj->Q(idx), imm), r1);
+ shft_res2 = int128_add(int128_rshift(Vd->Q(idx), imm), r2);
+ }
+ if (int128_gt(shft_res1, mask1)) {
+ Vd->D(idx * 2) = int128_getlo(mask1);
+ } else if (int128_lt(shft_res1, int128_neg(mask2))) {
+ Vd->D(idx * 2) = int128_getlo(mask2);
+ } else {
+ Vd->D(idx * 2) = int128_getlo(shft_res1);
+ }
+
+ if (int128_gt(shft_res2, mask1)) {
+ Vd->D(idx * 2 + 1) = int128_getlo(mask1);
+ } else if (int128_lt(shft_res2, int128_neg(mask2))) {
+ Vd->D(idx * 2 + 1) = int128_getlo(mask2);
+ } else {
+ Vd->D(idx * 2 + 1) = int128_getlo(shft_res2);
+ }
+}
+
+void HELPER(vssrarni_d_q)(void *vd, void *vj, uint64_t imm, uint32_t desc)
+{
+ int i;
+ Int128 mask1, mask2;
+ VReg *Vd = (VReg *)vd;
+ VReg *Vj = (VReg *)vj;
+ int oprsz = simd_oprsz(desc);
+
+ mask1 = int128_sub(int128_lshift(int128_one(), 63), int128_one());
+ mask2 = int128_lshift(int128_one(), 63);
+
+ for (i = 0; i < oprsz / 16; i++) {
+ do_vssrarni_d_q(Vd, Vj, imm, i, mask1, mask2);
+ }
+}
+
+VSSRARNI(vssrarni_b_h, 16, B, H)
+VSSRARNI(vssrarni_h_w, 32, H, W)
+VSSRARNI(vssrarni_w_d, 64, W, D)
+
+#define VSSRLRNUI(NAME, BIT, E1, E2) \
+void HELPER(NAME)(void *vd, void *vj, uint64_t imm, uint32_t desc) \
+{ \
+ int i, j, ofs; \
+ VReg temp = {}; \
+ VReg *Vd = (VReg *)vd; \
+ VReg *Vj = (VReg *)vj; \
+ int oprsz = simd_oprsz(desc); \
+ \
+ ofs = LSX_LEN / BIT; \
+ for (i = 0; i < oprsz / 16; i++) { \
+ for (j = 0; j < ofs; j++) { \
+ temp.E1(j + ofs * 2 * i) = do_ssrlrnu_ ## E1(Vj->E2(j + ofs * i), \
+ imm, BIT / 2); \
+ temp.E1(j + ofs * (2 * i + 1)) = do_ssrlrnu_ ## E1(Vd->E2(j + ofs * i), \
+ imm, BIT / 2); \
+ } \
+ } \
+ *Vd = temp; \
+}
+
+void HELPER(vssrlrni_du_q)(void *vd, void *vj, uint64_t imm, uint32_t desc)
+{
+ int i;
+ Int128 mask;
+ VReg *Vd = (VReg *)vd;
+ VReg *Vj = (VReg *)vj;
+ int oprsz = simd_oprsz(desc);
+
+ mask = int128_sub(int128_lshift(int128_one(), 64), int128_one());
+
+ for (i = 0; i < oprsz / 16; i++) {
+ do_vssrlrni_q(Vd, Vj, imm, i, mask);
+ }
+}
+
+VSSRLRNUI(vssrlrni_bu_h, 16, B, H)
+VSSRLRNUI(vssrlrni_hu_w, 32, H, W)
+VSSRLRNUI(vssrlrni_wu_d, 64, W, D)
+
+#define VSSRARNUI(NAME, BIT, E1, E2) \
+void HELPER(NAME)(void *vd, void *vj, uint64_t imm, uint32_t desc) \
+{ \
+ int i, j, ofs; \
+ VReg temp = {}; \
+ VReg *Vd = (VReg *)vd; \
+ VReg *Vj = (VReg *)vj; \
+ int oprsz = simd_oprsz(desc); \
+ \
+ ofs = LSX_LEN / BIT; \
+ for (i = 0; i < oprsz / 16; i++) { \
+ for (j = 0; j < ofs; j++) { \
+ temp.E1(j + ofs * 2 * i) = do_ssrarnu_ ## E1(Vj->E2(j + ofs * i), \
+ imm, BIT / 2); \
+ temp.E1(j + ofs * (2 * i + 1)) = do_ssrarnu_ ## E1(Vd->E2(j + ofs * i), \
+ imm, BIT / 2); \
+ } \
+ } \
+ *Vd = temp; \
+}
+
+static void do_vssrarni_du_q(VReg *Vd, VReg *Vj,
+ uint64_t imm, int idx, Int128 mask1, Int128 mask2)
+{
+ Int128 shft_res1, shft_res2, r1, r2;
+
+ if (imm == 0) {
+ shft_res1 = Vj->Q(idx);
+ shft_res2 = Vd->Q(idx);
+ } else {
+ r1 = int128_and(int128_rshift(Vj->Q(idx), (imm - 1)), int128_one());
+ r2 = int128_and(int128_rshift(Vd->Q(idx), (imm - 1)), int128_one());
+ shft_res1 = int128_add(int128_rshift(Vj->Q(idx), imm), r1);
+ shft_res2 = int128_add(int128_rshift(Vd->Q(idx), imm), r2);
+ }
+
+ if (int128_lt(Vj->Q(idx), int128_zero())) {
+ shft_res1 = int128_zero();
+ }
+ if (int128_lt(Vd->Q(idx), int128_zero())) {
+ shft_res2 = int128_zero();
+ }
+
+ if (int128_gt(shft_res1, mask1)) {
+ Vd->D(idx * 2) = int128_getlo(mask1);
+ } else if (int128_lt(shft_res1, int128_neg(mask2))) {
+ Vd->D(idx * 2) = int128_getlo(mask2);
+ } else {
+ Vd->D(idx * 2) = int128_getlo(shft_res1);
+ }
+
+ if (int128_gt(shft_res2, mask1)) {
+ Vd->D(idx * 2 + 1) = int128_getlo(mask1);
+ } else if (int128_lt(shft_res2, int128_neg(mask2))) {
+ Vd->D(idx * 2 + 1) = int128_getlo(mask2);
+ } else {
+ Vd->D(idx * 2 + 1) = int128_getlo(shft_res2);
+ }
+}
+
+void HELPER(vssrarni_du_q)(void *vd, void *vj, uint64_t imm, uint32_t desc)
+{
+ int i;
+ Int128 mask1, mask2;
+ VReg *Vd = (VReg *)vd;
+ VReg *Vj = (VReg *)vj;
+ int oprsz = simd_oprsz(desc);
+
+ mask1 = int128_sub(int128_lshift(int128_one(), 64), int128_one());
+ mask2 = int128_lshift(int128_one(), 64);
+
+ for (i = 0; i < oprsz / 16; i++) {
+ do_vssrarni_du_q(Vd, Vj, imm, i, mask1, mask2);
+ }
+}
+
+VSSRARNUI(vssrarni_bu_h, 16, B, H)
+VSSRARNUI(vssrarni_hu_w, 32, H, W)
+VSSRARNUI(vssrarni_wu_d, 64, W, D)
+
+#define DO_2OP(NAME, BIT, E, DO_OP) \
+void HELPER(NAME)(void *vd, void *vj, uint32_t desc) \
+{ \
+ int i; \
+ VReg *Vd = (VReg *)vd; \
+ VReg *Vj = (VReg *)vj; \
+ int oprsz = simd_oprsz(desc); \
+ \
+ for (i = 0; i < oprsz / (BIT / 8); i++) \
+ { \
+ Vd->E(i) = DO_OP(Vj->E(i)); \
+ } \
+}
+
+DO_2OP(vclo_b, 8, UB, DO_CLO_B)
+DO_2OP(vclo_h, 16, UH, DO_CLO_H)
+DO_2OP(vclo_w, 32, UW, DO_CLO_W)
+DO_2OP(vclo_d, 64, UD, DO_CLO_D)
+DO_2OP(vclz_b, 8, UB, DO_CLZ_B)
+DO_2OP(vclz_h, 16, UH, DO_CLZ_H)
+DO_2OP(vclz_w, 32, UW, DO_CLZ_W)
+DO_2OP(vclz_d, 64, UD, DO_CLZ_D)
+
+#define VPCNT(NAME, BIT, E, FN) \
+void HELPER(NAME)(void *vd, void *vj, uint32_t desc) \
+{ \
+ int i; \
+ VReg *Vd = (VReg *)vd; \
+ VReg *Vj = (VReg *)vj; \
+ int oprsz = simd_oprsz(desc); \
+ \
+ for (i = 0; i < oprsz / (BIT / 8); i++) \
+ { \
+ Vd->E(i) = FN(Vj->E(i)); \
+ } \
+}
+
+VPCNT(vpcnt_b, 8, UB, ctpop8)
+VPCNT(vpcnt_h, 16, UH, ctpop16)
+VPCNT(vpcnt_w, 32, UW, ctpop32)
+VPCNT(vpcnt_d, 64, UD, ctpop64)
+
+#define DO_BIT(NAME, BIT, E, DO_OP) \
+void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t desc) \
+{ \
+ int i; \
+ VReg *Vd = (VReg *)vd; \
+ VReg *Vj = (VReg *)vj; \
+ VReg *Vk = (VReg *)vk; \
+ int oprsz = simd_oprsz(desc); \
+ \
+ for (i = 0; i < oprsz / (BIT / 8); i++) { \
+ Vd->E(i) = DO_OP(Vj->E(i), Vk->E(i)%BIT); \
+ } \
+}
+
+DO_BIT(vbitclr_b, 8, UB, DO_BITCLR)
+DO_BIT(vbitclr_h, 16, UH, DO_BITCLR)
+DO_BIT(vbitclr_w, 32, UW, DO_BITCLR)
+DO_BIT(vbitclr_d, 64, UD, DO_BITCLR)
+DO_BIT(vbitset_b, 8, UB, DO_BITSET)
+DO_BIT(vbitset_h, 16, UH, DO_BITSET)
+DO_BIT(vbitset_w, 32, UW, DO_BITSET)
+DO_BIT(vbitset_d, 64, UD, DO_BITSET)
+DO_BIT(vbitrev_b, 8, UB, DO_BITREV)
+DO_BIT(vbitrev_h, 16, UH, DO_BITREV)
+DO_BIT(vbitrev_w, 32, UW, DO_BITREV)
+DO_BIT(vbitrev_d, 64, UD, DO_BITREV)
+
+#define DO_BITI(NAME, BIT, E, DO_OP) \
+void HELPER(NAME)(void *vd, void *vj, uint64_t imm, uint32_t desc) \
+{ \
+ int i; \
+ VReg *Vd = (VReg *)vd; \
+ VReg *Vj = (VReg *)vj; \
+ int oprsz = simd_oprsz(desc); \
+ \
+ for (i = 0; i < oprsz / (BIT / 8); i++) { \
+ Vd->E(i) = DO_OP(Vj->E(i), imm); \
+ } \
+}
+
+DO_BITI(vbitclri_b, 8, UB, DO_BITCLR)
+DO_BITI(vbitclri_h, 16, UH, DO_BITCLR)
+DO_BITI(vbitclri_w, 32, UW, DO_BITCLR)
+DO_BITI(vbitclri_d, 64, UD, DO_BITCLR)
+DO_BITI(vbitseti_b, 8, UB, DO_BITSET)
+DO_BITI(vbitseti_h, 16, UH, DO_BITSET)
+DO_BITI(vbitseti_w, 32, UW, DO_BITSET)
+DO_BITI(vbitseti_d, 64, UD, DO_BITSET)
+DO_BITI(vbitrevi_b, 8, UB, DO_BITREV)
+DO_BITI(vbitrevi_h, 16, UH, DO_BITREV)
+DO_BITI(vbitrevi_w, 32, UW, DO_BITREV)
+DO_BITI(vbitrevi_d, 64, UD, DO_BITREV)
+
+#define VFRSTP(NAME, BIT, MASK, E) \
+void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t desc) \
+{ \
+ int i, j, m, ofs; \
+ VReg *Vd = (VReg *)vd; \
+ VReg *Vj = (VReg *)vj; \
+ VReg *Vk = (VReg *)vk; \
+ int oprsz = simd_oprsz(desc); \
+ \
+ ofs = LSX_LEN / BIT; \
+ for (i = 0; i < oprsz / 16; i++) { \
+ m = Vk->E(i * ofs) & MASK; \
+ for (j = 0; j < ofs; j++) { \
+ if (Vj->E(j + ofs * i) < 0) { \
+ break; \
+ } \
+ } \
+ Vd->E(m + i * ofs) = j; \
+ } \
+}
+
+VFRSTP(vfrstp_b, 8, 0xf, B)
+VFRSTP(vfrstp_h, 16, 0x7, H)
+
+#define VFRSTPI(NAME, BIT, E) \
+void HELPER(NAME)(void *vd, void *vj, uint64_t imm, uint32_t desc) \
+{ \
+ int i, j, m, ofs; \
+ VReg *Vd = (VReg *)vd; \
+ VReg *Vj = (VReg *)vj; \
+ int oprsz = simd_oprsz(desc); \
+ \
+ ofs = LSX_LEN / BIT; \
+ m = imm % ofs; \
+ for (i = 0; i < oprsz / 16; i++) { \
+ for (j = 0; j < ofs; j++) { \
+ if (Vj->E(j + ofs * i) < 0) { \
+ break; \
+ } \
+ } \
+ Vd->E(m + i * ofs) = j; \
+ } \
+}
+
+VFRSTPI(vfrstpi_b, 8, B)
+VFRSTPI(vfrstpi_h, 16, H)
+
+static void vec_update_fcsr0_mask(CPULoongArchState *env,
+ uintptr_t pc, int mask)
+{
+ int flags = get_float_exception_flags(&env->fp_status);
+
+ set_float_exception_flags(0, &env->fp_status);
+
+ flags &= ~mask;
+
+ if (flags) {
+ flags = ieee_ex_to_loongarch(flags);
+ UPDATE_FP_CAUSE(env->fcsr0, flags);
+ }
+
+ if (GET_FP_ENABLES(env->fcsr0) & flags) {
+ do_raise_exception(env, EXCCODE_FPE, pc);
+ } else {
+ UPDATE_FP_FLAGS(env->fcsr0, flags);
+ }
+}
+
+static void vec_update_fcsr0(CPULoongArchState *env, uintptr_t pc)
+{
+ vec_update_fcsr0_mask(env, pc, 0);
+}
+
+static inline void vec_clear_cause(CPULoongArchState *env)
+{
+ SET_FP_CAUSE(env->fcsr0, 0);
+}
+
+#define DO_3OP_F(NAME, BIT, E, FN) \
+void HELPER(NAME)(void *vd, void *vj, void *vk, \
+ CPULoongArchState *env, uint32_t desc) \
+{ \
+ int i; \
+ VReg *Vd = (VReg *)vd; \
+ VReg *Vj = (VReg *)vj; \
+ VReg *Vk = (VReg *)vk; \
+ int oprsz = simd_oprsz(desc); \
+ \
+ vec_clear_cause(env); \
+ for (i = 0; i < oprsz / (BIT / 8); i++) { \
+ Vd->E(i) = FN(Vj->E(i), Vk->E(i), &env->fp_status); \
+ vec_update_fcsr0(env, GETPC()); \
+ } \
+}
+
+DO_3OP_F(vfadd_s, 32, UW, float32_add)
+DO_3OP_F(vfadd_d, 64, UD, float64_add)
+DO_3OP_F(vfsub_s, 32, UW, float32_sub)
+DO_3OP_F(vfsub_d, 64, UD, float64_sub)
+DO_3OP_F(vfmul_s, 32, UW, float32_mul)
+DO_3OP_F(vfmul_d, 64, UD, float64_mul)
+DO_3OP_F(vfdiv_s, 32, UW, float32_div)
+DO_3OP_F(vfdiv_d, 64, UD, float64_div)
+DO_3OP_F(vfmax_s, 32, UW, float32_maxnum)
+DO_3OP_F(vfmax_d, 64, UD, float64_maxnum)
+DO_3OP_F(vfmin_s, 32, UW, float32_minnum)
+DO_3OP_F(vfmin_d, 64, UD, float64_minnum)
+DO_3OP_F(vfmaxa_s, 32, UW, float32_maxnummag)
+DO_3OP_F(vfmaxa_d, 64, UD, float64_maxnummag)
+DO_3OP_F(vfmina_s, 32, UW, float32_minnummag)
+DO_3OP_F(vfmina_d, 64, UD, float64_minnummag)
+
+#define DO_4OP_F(NAME, BIT, E, FN, flags) \
+void HELPER(NAME)(void *vd, void *vj, void *vk, void *va, \
+ CPULoongArchState *env, uint32_t desc) \
+{ \
+ int i; \
+ VReg *Vd = (VReg *)vd; \
+ VReg *Vj = (VReg *)vj; \
+ VReg *Vk = (VReg *)vk; \
+ VReg *Va = (VReg *)va; \
+ int oprsz = simd_oprsz(desc); \
+ \
+ vec_clear_cause(env); \
+ for (i = 0; i < oprsz / (BIT / 8); i++) { \
+ Vd->E(i) = FN(Vj->E(i), Vk->E(i), Va->E(i), flags, &env->fp_status); \
+ vec_update_fcsr0(env, GETPC()); \
+ } \
+}
+
+DO_4OP_F(vfmadd_s, 32, UW, float32_muladd, 0)
+DO_4OP_F(vfmadd_d, 64, UD, float64_muladd, 0)
+DO_4OP_F(vfmsub_s, 32, UW, float32_muladd, float_muladd_negate_c)
+DO_4OP_F(vfmsub_d, 64, UD, float64_muladd, float_muladd_negate_c)
+DO_4OP_F(vfnmadd_s, 32, UW, float32_muladd, float_muladd_negate_result)
+DO_4OP_F(vfnmadd_d, 64, UD, float64_muladd, float_muladd_negate_result)
+DO_4OP_F(vfnmsub_s, 32, UW, float32_muladd,
+ float_muladd_negate_c | float_muladd_negate_result)
+DO_4OP_F(vfnmsub_d, 64, UD, float64_muladd,
+ float_muladd_negate_c | float_muladd_negate_result)
+
+#define DO_2OP_F(NAME, BIT, E, FN) \
+void HELPER(NAME)(void *vd, void *vj, \
+ CPULoongArchState *env, uint32_t desc) \
+{ \
+ int i; \
+ VReg *Vd = (VReg *)vd; \
+ VReg *Vj = (VReg *)vj; \
+ int oprsz = simd_oprsz(desc); \
+ \
+ vec_clear_cause(env); \
+ for (i = 0; i < oprsz / (BIT / 8); i++) { \
+ Vd->E(i) = FN(env, Vj->E(i)); \
+ } \
+}
+
+#define FLOGB(BIT, T) \
+static T do_flogb_## BIT(CPULoongArchState *env, T fj) \
+{ \
+ T fp, fd; \
+ float_status *status = &env->fp_status; \
+ FloatRoundMode old_mode = get_float_rounding_mode(status); \
+ \
+ set_float_rounding_mode(float_round_down, status); \
+ fp = float ## BIT ##_log2(fj, status); \
+ fd = float ## BIT ##_round_to_int(fp, status); \
+ set_float_rounding_mode(old_mode, status); \
+ vec_update_fcsr0_mask(env, GETPC(), float_flag_inexact); \
+ return fd; \
+}
+
+FLOGB(32, uint32_t)
+FLOGB(64, uint64_t)
+
+#define FCLASS(NAME, BIT, E, FN) \
+void HELPER(NAME)(void *vd, void *vj, \
+ CPULoongArchState *env, uint32_t desc) \
+{ \
+ int i; \
+ VReg *Vd = (VReg *)vd; \
+ VReg *Vj = (VReg *)vj; \
+ int oprsz = simd_oprsz(desc); \
+ \
+ for (i = 0; i < oprsz / (BIT / 8); i++) { \
+ Vd->E(i) = FN(env, Vj->E(i)); \
+ } \
+}
+
+FCLASS(vfclass_s, 32, UW, helper_fclass_s)
+FCLASS(vfclass_d, 64, UD, helper_fclass_d)
+
+#define FSQRT(BIT, T) \
+static T do_fsqrt_## BIT(CPULoongArchState *env, T fj) \
+{ \
+ T fd; \
+ fd = float ## BIT ##_sqrt(fj, &env->fp_status); \
+ vec_update_fcsr0(env, GETPC()); \
+ return fd; \
+}
+
+FSQRT(32, uint32_t)
+FSQRT(64, uint64_t)
+
+#define FRECIP(BIT, T) \
+static T do_frecip_## BIT(CPULoongArchState *env, T fj) \
+{ \
+ T fd; \
+ fd = float ## BIT ##_div(float ## BIT ##_one, fj, &env->fp_status); \
+ vec_update_fcsr0(env, GETPC()); \
+ return fd; \
+}
+
+FRECIP(32, uint32_t)
+FRECIP(64, uint64_t)
+
+#define FRSQRT(BIT, T) \
+static T do_frsqrt_## BIT(CPULoongArchState *env, T fj) \
+{ \
+ T fd, fp; \
+ fp = float ## BIT ##_sqrt(fj, &env->fp_status); \
+ fd = float ## BIT ##_div(float ## BIT ##_one, fp, &env->fp_status); \
+ vec_update_fcsr0(env, GETPC()); \
+ return fd; \
+}
+
+FRSQRT(32, uint32_t)
+FRSQRT(64, uint64_t)
+
+DO_2OP_F(vflogb_s, 32, UW, do_flogb_32)
+DO_2OP_F(vflogb_d, 64, UD, do_flogb_64)
+DO_2OP_F(vfsqrt_s, 32, UW, do_fsqrt_32)
+DO_2OP_F(vfsqrt_d, 64, UD, do_fsqrt_64)
+DO_2OP_F(vfrecip_s, 32, UW, do_frecip_32)
+DO_2OP_F(vfrecip_d, 64, UD, do_frecip_64)
+DO_2OP_F(vfrsqrt_s, 32, UW, do_frsqrt_32)
+DO_2OP_F(vfrsqrt_d, 64, UD, do_frsqrt_64)
+
+static uint32_t float16_cvt_float32(uint16_t h, float_status *status)
+{
+ return float16_to_float32(h, true, status);
+}
+static uint64_t float32_cvt_float64(uint32_t s, float_status *status)
+{
+ return float32_to_float64(s, status);
+}
+
+static uint16_t float32_cvt_float16(uint32_t s, float_status *status)
+{
+ return float32_to_float16(s, true, status);
+}
+static uint32_t float64_cvt_float32(uint64_t d, float_status *status)
+{
+ return float64_to_float32(d, status);
+}
+
+void HELPER(vfcvtl_s_h)(void *vd, void *vj,
+ CPULoongArchState *env, uint32_t desc)
+{
+ int i, j, ofs;
+ VReg temp = {};
+ VReg *Vd = (VReg *)vd;
+ VReg *Vj = (VReg *)vj;
+ int oprsz = simd_oprsz(desc);
+
+ ofs = LSX_LEN / 32;
+ vec_clear_cause(env);
+ for (i = 0; i < oprsz / 16; i++) {
+ for (j = 0; j < ofs; j++) {
+ temp.UW(j + ofs * i) =float16_cvt_float32(Vj->UH(j + ofs * 2 * i),
+ &env->fp_status);
+ }
+ vec_update_fcsr0(env, GETPC());
+ }
+ *Vd = temp;
+}
+
+void HELPER(vfcvtl_d_s)(void *vd, void *vj,
+ CPULoongArchState *env, uint32_t desc)
+{
+ int i, j, ofs;
+ VReg temp = {};
+ VReg *Vd = (VReg *)vd;
+ VReg *Vj = (VReg *)vj;
+ int oprsz = simd_oprsz(desc);
+
+ ofs = LSX_LEN / 64;
+ vec_clear_cause(env);
+ for (i = 0; i < oprsz / 16; i++) {
+ for (j = 0; j < ofs; j++) {
+ temp.UD(j + ofs * i) = float32_cvt_float64(Vj->UW(j + ofs * 2 * i),
+ &env->fp_status);
+ }
+ vec_update_fcsr0(env, GETPC());
+ }
+ *Vd = temp;
+}
+
+void HELPER(vfcvth_s_h)(void *vd, void *vj,
+ CPULoongArchState *env, uint32_t desc)
+{
+ int i, j, ofs;
+ VReg temp = {};
+ VReg *Vd = (VReg *)vd;
+ VReg *Vj = (VReg *)vj;
+ int oprsz = simd_oprsz(desc);
+
+ ofs = LSX_LEN / 32;
+ vec_clear_cause(env);
+ for (i = 0; i < oprsz / 16; i++) {
+ for (j = 0; j < ofs; j++) {
+ temp.UW(j + ofs * i) = float16_cvt_float32(Vj->UH(j + ofs * (2 * i + 1)),
+ &env->fp_status);
+ }
+ vec_update_fcsr0(env, GETPC());
+ }
+ *Vd = temp;
+}
+
+void HELPER(vfcvth_d_s)(void *vd, void *vj,
+ CPULoongArchState *env, uint32_t desc)
+{
+ int i, j, ofs;
+ VReg temp = {};
+ VReg *Vd = (VReg *)vd;
+ VReg *Vj = (VReg *)vj;
+ int oprsz = simd_oprsz(desc);
+
+ ofs = LSX_LEN / 64;
+ vec_clear_cause(env);
+ for (i = 0; i < oprsz / 16; i++) {
+ for (j = 0; j < ofs; j++) {
+ temp.UD(j + ofs * i) = float32_cvt_float64(Vj->UW(j + ofs * (2 * i + 1)),
+ &env->fp_status);
+ }
+ vec_update_fcsr0(env, GETPC());
+ }
+ *Vd = temp;
+}
+
+void HELPER(vfcvt_h_s)(void *vd, void *vj, void *vk,
+ CPULoongArchState *env, uint32_t desc)
+{
+ int i, j, ofs;
+ VReg temp = {};
+ VReg *Vd = (VReg *)vd;
+ VReg *Vj = (VReg *)vj;
+ VReg *Vk = (VReg *)vk;
+ int oprsz = simd_oprsz(desc);
+
+ ofs = LSX_LEN / 32;
+ vec_clear_cause(env);
+ for(i = 0; i < oprsz / 16; i++) {
+ for (j = 0; j < ofs; j++) {
+ temp.UH(j + ofs * (2 * i + 1)) = float32_cvt_float16(Vj->UW(j + ofs * i),
+ &env->fp_status);
+ temp.UH(j + ofs * 2 * i) = float32_cvt_float16(Vk->UW(j + ofs * i),
+ &env->fp_status);
+ }
+ vec_update_fcsr0(env, GETPC());
+ }
+ *Vd = temp;
+}
+
+void HELPER(vfcvt_s_d)(void *vd, void *vj, void *vk,
+ CPULoongArchState *env, uint32_t desc)
+{
+ int i, j, ofs;
+ VReg temp = {};
+ VReg *Vd = (VReg *)vd;
+ VReg *Vj = (VReg *)vj;
+ VReg *Vk = (VReg *)vk;
+ int oprsz = simd_oprsz(desc);
+
+ ofs = LSX_LEN / 64;
+ vec_clear_cause(env);
+ for(i = 0; i < oprsz / 16; i++) {
+ for (j = 0; j < ofs; j++) {
+ temp.UW(j + ofs * (2 * i + 1)) = float64_cvt_float32(Vj->UD(j + ofs * i),
+ &env->fp_status);
+ temp.UW(j + ofs * 2 * i) = float64_cvt_float32(Vk->UD(j + ofs * i),
+ &env->fp_status);
+ }
+ vec_update_fcsr0(env, GETPC());
+ }
+ *Vd = temp;
+}
+
+void HELPER(vfrint_s)(void *vd, void *vj,
+ CPULoongArchState *env, uint32_t desc)
+{
+ int i;
+ VReg *Vd = (VReg *)vd;
+ VReg *Vj = (VReg *)vj;
+ int oprsz = simd_oprsz(desc);
+
+ vec_clear_cause(env);
+ for (i = 0; i < oprsz / 4; i++) {
+ Vd->W(i) = float32_round_to_int(Vj->UW(i), &env->fp_status);
+ vec_update_fcsr0(env, GETPC());
+ }
+}
+
+void HELPER(vfrint_d)(void *vd, void *vj,
+ CPULoongArchState *env, uint32_t desc)
+{
+ int i;
+ VReg *Vd = (VReg *)vd;
+ VReg *Vj = (VReg *)vj;
+ int oprsz = simd_oprsz(desc);
+
+ vec_clear_cause(env);
+ for (i = 0; i < oprsz / 8; i++) {
+ Vd->D(i) = float64_round_to_int(Vj->UD(i), &env->fp_status);
+ vec_update_fcsr0(env, GETPC());
+ }
+}
+
+#define FCVT_2OP(NAME, BIT, E, MODE) \
+void HELPER(NAME)(void *vd, void *vj, \
+ CPULoongArchState *env, uint32_t desc) \
+{ \
+ int i; \
+ VReg *Vd = (VReg *)vd; \
+ VReg *Vj = (VReg *)vj; \
+ int oprsz = simd_oprsz(desc); \
+ \
+ vec_clear_cause(env); \
+ for (i = 0; i < oprsz / (BIT / 8); i++) { \
+ FloatRoundMode old_mode = get_float_rounding_mode(&env->fp_status); \
+ set_float_rounding_mode(MODE, &env->fp_status); \
+ Vd->E(i) = float## BIT ## _round_to_int(Vj->E(i), &env->fp_status); \
+ set_float_rounding_mode(old_mode, &env->fp_status); \
+ vec_update_fcsr0(env, GETPC()); \
+ } \
+}
+
+FCVT_2OP(vfrintrne_s, 32, UW, float_round_nearest_even)
+FCVT_2OP(vfrintrne_d, 64, UD, float_round_nearest_even)
+FCVT_2OP(vfrintrz_s, 32, UW, float_round_to_zero)
+FCVT_2OP(vfrintrz_d, 64, UD, float_round_to_zero)
+FCVT_2OP(vfrintrp_s, 32, UW, float_round_up)
+FCVT_2OP(vfrintrp_d, 64, UD, float_round_up)
+FCVT_2OP(vfrintrm_s, 32, UW, float_round_down)
+FCVT_2OP(vfrintrm_d, 64, UD, float_round_down)
+
+#define FTINT(NAME, FMT1, FMT2, T1, T2, MODE) \
+static T2 do_ftint ## NAME(CPULoongArchState *env, T1 fj) \
+{ \
+ T2 fd; \
+ FloatRoundMode old_mode = get_float_rounding_mode(&env->fp_status); \
+ \
+ set_float_rounding_mode(MODE, &env->fp_status); \
+ fd = do_## FMT1 ##_to_## FMT2(env, fj); \
+ set_float_rounding_mode(old_mode, &env->fp_status); \
+ return fd; \
+}
+
+#define DO_FTINT(FMT1, FMT2, T1, T2) \
+static T2 do_## FMT1 ##_to_## FMT2(CPULoongArchState *env, T1 fj) \
+{ \
+ T2 fd; \
+ \
+ fd = FMT1 ##_to_## FMT2(fj, &env->fp_status); \
+ if (get_float_exception_flags(&env->fp_status) & (float_flag_invalid)) { \
+ if (FMT1 ##_is_any_nan(fj)) { \
+ fd = 0; \
+ } \
+ } \
+ vec_update_fcsr0(env, GETPC()); \
+ return fd; \
+}
+
+DO_FTINT(float32, int32, uint32_t, uint32_t)
+DO_FTINT(float64, int64, uint64_t, uint64_t)
+DO_FTINT(float32, uint32, uint32_t, uint32_t)
+DO_FTINT(float64, uint64, uint64_t, uint64_t)
+DO_FTINT(float64, int32, uint64_t, uint32_t)
+DO_FTINT(float32, int64, uint32_t, uint64_t)
+
+FTINT(rne_w_s, float32, int32, uint32_t, uint32_t, float_round_nearest_even)
+FTINT(rne_l_d, float64, int64, uint64_t, uint64_t, float_round_nearest_even)
+FTINT(rp_w_s, float32, int32, uint32_t, uint32_t, float_round_up)
+FTINT(rp_l_d, float64, int64, uint64_t, uint64_t, float_round_up)
+FTINT(rz_w_s, float32, int32, uint32_t, uint32_t, float_round_to_zero)
+FTINT(rz_l_d, float64, int64, uint64_t, uint64_t, float_round_to_zero)
+FTINT(rm_w_s, float32, int32, uint32_t, uint32_t, float_round_down)
+FTINT(rm_l_d, float64, int64, uint64_t, uint64_t, float_round_down)
+
+DO_2OP_F(vftintrne_w_s, 32, UW, do_ftintrne_w_s)
+DO_2OP_F(vftintrne_l_d, 64, UD, do_ftintrne_l_d)
+DO_2OP_F(vftintrp_w_s, 32, UW, do_ftintrp_w_s)
+DO_2OP_F(vftintrp_l_d, 64, UD, do_ftintrp_l_d)
+DO_2OP_F(vftintrz_w_s, 32, UW, do_ftintrz_w_s)
+DO_2OP_F(vftintrz_l_d, 64, UD, do_ftintrz_l_d)
+DO_2OP_F(vftintrm_w_s, 32, UW, do_ftintrm_w_s)
+DO_2OP_F(vftintrm_l_d, 64, UD, do_ftintrm_l_d)
+DO_2OP_F(vftint_w_s, 32, UW, do_float32_to_int32)
+DO_2OP_F(vftint_l_d, 64, UD, do_float64_to_int64)
+
+FTINT(rz_wu_s, float32, uint32, uint32_t, uint32_t, float_round_to_zero)
+FTINT(rz_lu_d, float64, uint64, uint64_t, uint64_t, float_round_to_zero)
+
+DO_2OP_F(vftintrz_wu_s, 32, UW, do_ftintrz_wu_s)
+DO_2OP_F(vftintrz_lu_d, 64, UD, do_ftintrz_lu_d)
+DO_2OP_F(vftint_wu_s, 32, UW, do_float32_to_uint32)
+DO_2OP_F(vftint_lu_d, 64, UD, do_float64_to_uint64)
+
+FTINT(rm_w_d, float64, int32, uint64_t, uint32_t, float_round_down)
+FTINT(rp_w_d, float64, int32, uint64_t, uint32_t, float_round_up)
+FTINT(rz_w_d, float64, int32, uint64_t, uint32_t, float_round_to_zero)
+FTINT(rne_w_d, float64, int32, uint64_t, uint32_t, float_round_nearest_even)
+
+#define FTINT_W_D(NAME, FN) \
+void HELPER(NAME)(void *vd, void *vj, void *vk, \
+ CPULoongArchState *env, uint32_t desc) \
+{ \
+ int i, j, ofs; \
+ VReg temp = {}; \
+ VReg *Vd = (VReg *)vd; \
+ VReg *Vj = (VReg *)vj; \
+ VReg *Vk = (VReg *)vk; \
+ int oprsz = simd_oprsz(desc); \
+ \
+ ofs = LSX_LEN / 64; \
+ vec_clear_cause(env); \
+ for (i = 0; i < oprsz / 16; i++) { \
+ for (j = 0; j < ofs; j++) { \
+ temp.W(j + ofs * (2 * i + 1)) = FN(env, Vj->UD(j + ofs * i)); \
+ temp.W(j + ofs * 2 * i) = FN(env, Vk->UD(j + ofs * i)); \
+ } \
+ } \
+ *Vd = temp; \
+}
+
+FTINT_W_D(vftint_w_d, do_float64_to_int32)
+FTINT_W_D(vftintrm_w_d, do_ftintrm_w_d)
+FTINT_W_D(vftintrp_w_d, do_ftintrp_w_d)
+FTINT_W_D(vftintrz_w_d, do_ftintrz_w_d)
+FTINT_W_D(vftintrne_w_d, do_ftintrne_w_d)
+
+FTINT(rml_l_s, float32, int64, uint32_t, uint64_t, float_round_down)
+FTINT(rpl_l_s, float32, int64, uint32_t, uint64_t, float_round_up)
+FTINT(rzl_l_s, float32, int64, uint32_t, uint64_t, float_round_to_zero)
+FTINT(rnel_l_s, float32, int64, uint32_t, uint64_t, float_round_nearest_even)
+FTINT(rmh_l_s, float32, int64, uint32_t, uint64_t, float_round_down)
+FTINT(rph_l_s, float32, int64, uint32_t, uint64_t, float_round_up)
+FTINT(rzh_l_s, float32, int64, uint32_t, uint64_t, float_round_to_zero)
+FTINT(rneh_l_s, float32, int64, uint32_t, uint64_t, float_round_nearest_even)
+
+#define FTINTL_L_S(NAME, FN) \
+void HELPER(NAME)(void *vd, void *vj, \
+ CPULoongArchState *env, uint32_t desc) \
+{ \
+ int i, j, ofs; \
+ VReg temp; \
+ VReg *Vd = (VReg *)vd; \
+ VReg *Vj = (VReg *)vj; \
+ int oprsz = simd_oprsz(desc); \
+ \
+ ofs = LSX_LEN / 64; \
+ vec_clear_cause(env); \
+ for (i = 0; i < oprsz / 16; i++) { \
+ for (j = 0; j < ofs; j++) { \
+ temp.D(j + ofs * i) = FN(env, Vj->UW(j + ofs * 2 * i)); \
+ } \
+ } \
+ *Vd = temp; \
+}
+
+FTINTL_L_S(vftintl_l_s, do_float32_to_int64)
+FTINTL_L_S(vftintrml_l_s, do_ftintrml_l_s)
+FTINTL_L_S(vftintrpl_l_s, do_ftintrpl_l_s)
+FTINTL_L_S(vftintrzl_l_s, do_ftintrzl_l_s)
+FTINTL_L_S(vftintrnel_l_s, do_ftintrnel_l_s)
+
+#define FTINTH_L_S(NAME, FN) \
+void HELPER(NAME)(void *vd, void *vj, \
+ CPULoongArchState *env, uint32_t desc) \
+{ \
+ int i, j, ofs; \
+ VReg temp = {}; \
+ VReg *Vd = (VReg *)vd; \
+ VReg *Vj = (VReg *)vj; \
+ int oprsz = simd_oprsz(desc); \
+ \
+ ofs = LSX_LEN / 64; \
+ vec_clear_cause(env); \
+ for (i = 0; i < oprsz / 16; i++) { \
+ for (j = 0; j < ofs; j++) { \
+ temp.D(j + ofs * i) = FN(env, Vj->UW(j + ofs * (2 * i + 1))); \
+ } \
+ } \
+ *Vd = temp; \
+}
+
+FTINTH_L_S(vftinth_l_s, do_float32_to_int64)
+FTINTH_L_S(vftintrmh_l_s, do_ftintrmh_l_s)
+FTINTH_L_S(vftintrph_l_s, do_ftintrph_l_s)
+FTINTH_L_S(vftintrzh_l_s, do_ftintrzh_l_s)
+FTINTH_L_S(vftintrneh_l_s, do_ftintrneh_l_s)
+
+#define FFINT(NAME, FMT1, FMT2, T1, T2) \
+static T2 do_ffint_ ## NAME(CPULoongArchState *env, T1 fj) \
+{ \
+ T2 fd; \
+ \
+ fd = FMT1 ##_to_## FMT2(fj, &env->fp_status); \
+ vec_update_fcsr0(env, GETPC()); \
+ return fd; \
+}
+
+FFINT(s_w, int32, float32, int32_t, uint32_t)
+FFINT(d_l, int64, float64, int64_t, uint64_t)
+FFINT(s_wu, uint32, float32, uint32_t, uint32_t)
+FFINT(d_lu, uint64, float64, uint64_t, uint64_t)
+
+DO_2OP_F(vffint_s_w, 32, W, do_ffint_s_w)
+DO_2OP_F(vffint_d_l, 64, D, do_ffint_d_l)
+DO_2OP_F(vffint_s_wu, 32, UW, do_ffint_s_wu)
+DO_2OP_F(vffint_d_lu, 64, UD, do_ffint_d_lu)
+
+void HELPER(vffintl_d_w)(void *vd, void *vj,
+ CPULoongArchState *env, uint32_t desc)
+{
+ int i, j, ofs;
+ VReg temp = {};
+ VReg *Vd = (VReg *)vd;
+ VReg *Vj = (VReg *)vj;
+ int oprsz = simd_oprsz(desc);
+
+ ofs = LSX_LEN / 64;
+ vec_clear_cause(env);
+ for (i = 0; i < oprsz / 16; i++) {
+ for (j = 0; j < ofs; j++) {
+ temp.D(j + ofs * i) = int32_to_float64(Vj->W(j + ofs * 2 * i),
+ &env->fp_status);
+ }
+ vec_update_fcsr0(env, GETPC());
+ }
+ *Vd = temp;
+}
+
+void HELPER(vffinth_d_w)(void *vd, void *vj,
+ CPULoongArchState *env, uint32_t desc)
+{
+ int i, j, ofs;
+ VReg temp = {};
+ VReg *Vd = (VReg *)vd;
+ VReg *Vj = (VReg *)vj;
+ int oprsz = simd_oprsz(desc);
+
+ ofs = LSX_LEN / 64;
+ vec_clear_cause(env);
+ for (i = 0; i < oprsz /16; i++) {
+ for (j = 0; j < ofs; j++) {
+ temp.D(j + ofs * i) = int32_to_float64(Vj->W(j + ofs * (2 * i + 1)),
+ &env->fp_status);
+ }
+ vec_update_fcsr0(env, GETPC());
+ }
+ *Vd = temp;
+}
+
+void HELPER(vffint_s_l)(void *vd, void *vj, void *vk,
+ CPULoongArchState *env, uint32_t desc)
+{
+ int i, j, ofs;
+ VReg temp = {};
+ VReg *Vd = (VReg *)vd;
+ VReg *Vj = (VReg *)vj;
+ VReg *Vk = (VReg *)vk;
+ int oprsz = simd_oprsz(desc);
+
+ ofs = LSX_LEN / 64;
+ vec_clear_cause(env);
+ for (i = 0; i < oprsz / 16; i++) {
+ for (j = 0; j < ofs; j++) {
+ temp.W(j + ofs * (2 * i + 1)) = int64_to_float32(Vj->D(j + ofs * i),
+ &env->fp_status);
+ temp.W(j + ofs * 2 * i) = int64_to_float32(Vk->D(j + ofs * i),
+ &env->fp_status);
+ }
+ vec_update_fcsr0(env, GETPC());
+ }
+ *Vd = temp;
+}
+
+#define VCMPI(NAME, BIT, E, DO_OP) \
+void HELPER(NAME)(void *vd, void *vj, uint64_t imm, uint32_t desc) \
+{ \
+ int i; \
+ VReg *Vd = (VReg *)vd; \
+ VReg *Vj = (VReg *)vj; \
+ typedef __typeof(Vd->E(0)) TD; \
+ int oprsz = simd_oprsz(desc); \
+ \
+ for (i = 0; i < oprsz / (BIT / 8); i++) { \
+ Vd->E(i) = DO_OP(Vj->E(i), (TD)imm); \
+ } \
+}
+
+VCMPI(vseqi_b, 8, B, VSEQ)
+VCMPI(vseqi_h, 16, H, VSEQ)
+VCMPI(vseqi_w, 32, W, VSEQ)
+VCMPI(vseqi_d, 64, D, VSEQ)
+VCMPI(vslei_b, 8, B, VSLE)
+VCMPI(vslei_h, 16, H, VSLE)
+VCMPI(vslei_w, 32, W, VSLE)
+VCMPI(vslei_d, 64, D, VSLE)
+VCMPI(vslei_bu, 8, UB, VSLE)
+VCMPI(vslei_hu, 16, UH, VSLE)
+VCMPI(vslei_wu, 32, UW, VSLE)
+VCMPI(vslei_du, 64, UD, VSLE)
+VCMPI(vslti_b, 8, B, VSLT)
+VCMPI(vslti_h, 16, H, VSLT)
+VCMPI(vslti_w, 32, W, VSLT)
+VCMPI(vslti_d, 64, D, VSLT)
+VCMPI(vslti_bu, 8, UB, VSLT)
+VCMPI(vslti_hu, 16, UH, VSLT)
+VCMPI(vslti_wu, 32, UW, VSLT)
+VCMPI(vslti_du, 64, UD, VSLT)
+
+static uint64_t vfcmp_common(CPULoongArchState *env,
+ FloatRelation cmp, uint32_t flags)
+{
+ uint64_t ret = 0;
+
+ switch (cmp) {
+ case float_relation_less:
+ ret = (flags & FCMP_LT);
+ break;
+ case float_relation_equal:
+ ret = (flags & FCMP_EQ);
+ break;
+ case float_relation_greater:
+ ret = (flags & FCMP_GT);
+ break;
+ case float_relation_unordered:
+ ret = (flags & FCMP_UN);
+ break;
+ default:
+ g_assert_not_reached();
+ }
+
+ if (ret) {
+ ret = -1;
+ }
+
+ return ret;
+}
+
+#define VFCMP(NAME, BIT, E, FN) \
+void HELPER(NAME)(CPULoongArchState *env, uint32_t oprsz, \
+ uint32_t vd, uint32_t vj, uint32_t vk, uint32_t flags) \
+{ \
+ int i; \
+ VReg t; \
+ VReg *Vd = &(env->fpr[vd].vreg); \
+ VReg *Vj = &(env->fpr[vj].vreg); \
+ VReg *Vk = &(env->fpr[vk].vreg); \
+ \
+ vec_clear_cause(env); \
+ for (i = 0; i < oprsz / (BIT / 8); i++) { \
+ FloatRelation cmp; \
+ cmp = FN(Vj->E(i), Vk->E(i), &env->fp_status); \
+ t.E(i) = vfcmp_common(env, cmp, flags); \
+ vec_update_fcsr0(env, GETPC()); \
+ } \
+ *Vd = t; \
+}
+
+VFCMP(vfcmp_c_s, 32, UW, float32_compare_quiet)
+VFCMP(vfcmp_s_s, 32, UW, float32_compare)
+VFCMP(vfcmp_c_d, 64, UD, float64_compare_quiet)
+VFCMP(vfcmp_s_d, 64, UD, float64_compare)
+
+void HELPER(vbitseli_b)(void *vd, void *vj, uint64_t imm, uint32_t desc)
+{
+ int i;
+ VReg *Vd = (VReg *)vd;
+ VReg *Vj = (VReg *)vj;
+
+ for (i = 0; i < simd_oprsz(desc); i++) {
+ Vd->B(i) = (~Vd->B(i) & Vj->B(i)) | (Vd->B(i) & imm);
+ }
+}
+
+/* Copy from target/arm/tcg/sve_helper.c */
+static inline bool do_match2(uint64_t n, uint64_t m0, uint64_t m1, int esz)
+{
+ int bits = 8 << esz;
+ uint64_t ones = dup_const(esz, 1);
+ uint64_t signs = ones << (bits - 1);
+ uint64_t cmp0, cmp1;
+
+ cmp1 = dup_const(esz, n);
+ cmp0 = cmp1 ^ m0;
+ cmp1 = cmp1 ^ m1;
+ cmp0 = (cmp0 - ones) & ~cmp0;
+ cmp1 = (cmp1 - ones) & ~cmp1;
+ return (cmp0 | cmp1) & signs;
+}
+
+#define SETANYEQZ(NAME, MO) \
+void HELPER(NAME)(CPULoongArchState *env, \
+ uint32_t oprsz, uint32_t cd, uint32_t vj) \
+{ \
+ VReg *Vj = &(env->fpr[vj].vreg); \
+ \
+ env->cf[cd & 0x7] = do_match2(0, Vj->D(0), Vj->D(1), MO); \
+ if (oprsz == 32) { \
+ env->cf[cd & 0x7] = env->cf[cd & 0x7] || \
+ do_match2(0, Vj->D(2), Vj->D(3), MO); \
+ } \
+}
+
+SETANYEQZ(vsetanyeqz_b, MO_8)
+SETANYEQZ(vsetanyeqz_h, MO_16)
+SETANYEQZ(vsetanyeqz_w, MO_32)
+SETANYEQZ(vsetanyeqz_d, MO_64)
+
+#define SETALLNEZ(NAME, MO) \
+void HELPER(NAME)(CPULoongArchState *env, \
+ uint32_t oprsz, uint32_t cd, uint32_t vj) \
+{ \
+ VReg *Vj = &(env->fpr[vj].vreg); \
+ \
+ env->cf[cd & 0x7]= !do_match2(0, Vj->D(0), Vj->D(1), MO); \
+ if (oprsz == 32) { \
+ env->cf[cd & 0x7] = env->cf[cd & 0x7] && \
+ !do_match2(0, Vj->D(2), Vj->D(3), MO); \
+ } \
+}
+
+SETALLNEZ(vsetallnez_b, MO_8)
+SETALLNEZ(vsetallnez_h, MO_16)
+SETALLNEZ(vsetallnez_w, MO_32)
+SETALLNEZ(vsetallnez_d, MO_64)
+
+#define XVINSVE0(NAME, E, MASK) \
+void HELPER(NAME)(void *vd, void *vj, uint64_t imm, uint32_t desc) \
+{ \
+ VReg *Vd = (VReg *)vd; \
+ VReg *Vj = (VReg *)vj; \
+ Vd->E(imm & MASK) = Vj->E(0); \
+}
+
+XVINSVE0(xvinsve0_w, W, 0x7)
+XVINSVE0(xvinsve0_d, D, 0x3)
+
+#define XVPICKVE(NAME, E, BIT, MASK) \
+void HELPER(NAME)(void *vd, void *vj, uint64_t imm, uint32_t desc) \
+{ \
+ int i; \
+ VReg *Vd = (VReg *)vd; \
+ VReg *Vj = (VReg *)vj; \
+ int oprsz = simd_oprsz(desc); \
+ \
+ Vd->E(0) = Vj->E(imm & MASK); \
+ for (i = 1; i < oprsz / (BIT / 8); i++) { \
+ Vd->E(i) = 0; \
+ } \
+}
+
+XVPICKVE(xvpickve_w, W, 32, 0x7)
+XVPICKVE(xvpickve_d, D, 64, 0x3)
+
+#define VPACKEV(NAME, BIT, E) \
+void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t desc) \
+{ \
+ int i; \
+ VReg temp = {}; \
+ VReg *Vd = (VReg *)vd; \
+ VReg *Vj = (VReg *)vj; \
+ VReg *Vk = (VReg *)vk; \
+ int oprsz = simd_oprsz(desc); \
+ \
+ for (i = 0; i < oprsz / (BIT / 8); i++) { \
+ temp.E(2 * i + 1) = Vj->E(2 * i); \
+ temp.E(2 *i) = Vk->E(2 * i); \
+ } \
+ *Vd = temp; \
+}
+
+VPACKEV(vpackev_b, 16, B)
+VPACKEV(vpackev_h, 32, H)
+VPACKEV(vpackev_w, 64, W)
+VPACKEV(vpackev_d, 128, D)
+
+#define VPACKOD(NAME, BIT, E) \
+void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t desc) \
+{ \
+ int i; \
+ VReg temp = {}; \
+ VReg *Vd = (VReg *)vd; \
+ VReg *Vj = (VReg *)vj; \
+ VReg *Vk = (VReg *)vk; \
+ int oprsz = simd_oprsz(desc); \
+ \
+ for (i = 0; i < oprsz / (BIT / 8); i++) { \
+ temp.E(2 * i + 1) = Vj->E(2 * i + 1); \
+ temp.E(2 * i) = Vk->E(2 * i + 1); \
+ } \
+ *Vd = temp; \
+}
+
+VPACKOD(vpackod_b, 16, B)
+VPACKOD(vpackod_h, 32, H)
+VPACKOD(vpackod_w, 64, W)
+VPACKOD(vpackod_d, 128, D)
+
+#define VPICKEV(NAME, BIT, E) \
+void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t desc) \
+{ \
+ int i, j, ofs; \
+ VReg temp = {}; \
+ VReg *Vd = (VReg *)vd; \
+ VReg *Vj = (VReg *)vj; \
+ VReg *Vk = (VReg *)vk; \
+ int oprsz = simd_oprsz(desc); \
+ \
+ ofs = LSX_LEN / BIT; \
+ for (i = 0; i < oprsz / 16; i++) { \
+ for (j = 0; j < ofs; j++) { \
+ temp.E(j + ofs * (2 * i + 1)) = Vj->E(2 * (j + ofs * i)); \
+ temp.E(j + ofs * 2 * i) = Vk->E(2 * (j + ofs * i)); \
+ } \
+ } \
+ *Vd = temp; \
+}
+
+VPICKEV(vpickev_b, 16, B)
+VPICKEV(vpickev_h, 32, H)
+VPICKEV(vpickev_w, 64, W)
+VPICKEV(vpickev_d, 128, D)
+
+#define VPICKOD(NAME, BIT, E) \
+void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t desc) \
+{ \
+ int i, j, ofs; \
+ VReg temp = {}; \
+ VReg *Vd = (VReg *)vd; \
+ VReg *Vj = (VReg *)vj; \
+ VReg *Vk = (VReg *)vk; \
+ int oprsz = simd_oprsz(desc); \
+ \
+ ofs = LSX_LEN / BIT; \
+ for (i = 0; i < oprsz / 16; i++) { \
+ for (j = 0; j < ofs; j++) { \
+ temp.E(j + ofs * (2 * i + 1)) = Vj->E(2 * (j + ofs * i) + 1); \
+ temp.E(j + ofs * 2 * i) = Vk->E(2 * (j + ofs * i) + 1); \
+ } \
+ } \
+ *Vd = temp; \
+}
+
+VPICKOD(vpickod_b, 16, B)
+VPICKOD(vpickod_h, 32, H)
+VPICKOD(vpickod_w, 64, W)
+VPICKOD(vpickod_d, 128, D)
+
+#define VILVL(NAME, BIT, E) \
+void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t desc) \
+{ \
+ int i, j, ofs; \
+ VReg temp = {}; \
+ VReg *Vd = (VReg *)vd; \
+ VReg *Vj = (VReg *)vj; \
+ VReg *Vk = (VReg *)vk; \
+ int oprsz = simd_oprsz(desc); \
+ \
+ ofs = LSX_LEN / BIT; \
+ for (i = 0; i < oprsz / 16; i++) { \
+ for (j = 0; j < ofs; j++) { \
+ temp.E(2 * (j + ofs * i) + 1) = Vj->E(j + ofs * 2 * i); \
+ temp.E(2 * (j + ofs * i)) = Vk->E(j + ofs * 2 * i); \
+ } \
+ } \
+ *Vd = temp; \
+}
+
+VILVL(vilvl_b, 16, B)
+VILVL(vilvl_h, 32, H)
+VILVL(vilvl_w, 64, W)
+VILVL(vilvl_d, 128, D)
+
+#define VILVH(NAME, BIT, E) \
+void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t desc) \
+{ \
+ int i, j, ofs; \
+ VReg temp = {}; \
+ VReg *Vd = (VReg *)vd; \
+ VReg *Vj = (VReg *)vj; \
+ VReg *Vk = (VReg *)vk; \
+ int oprsz = simd_oprsz(desc); \
+ \
+ ofs = LSX_LEN / BIT; \
+ for (i = 0; i < oprsz / 16; i++) { \
+ for (j = 0; j < ofs; j++) { \
+ temp.E(2 * (j + ofs * i) + 1) = Vj->E(j + ofs * (2 * i + 1)); \
+ temp.E(2 * (j + ofs * i)) = Vk->E(j + ofs * (2 * i + 1)); \
+ } \
+ } \
+ *Vd = temp; \
+}
+
+VILVH(vilvh_b, 16, B)
+VILVH(vilvh_h, 32, H)
+VILVH(vilvh_w, 64, W)
+VILVH(vilvh_d, 128, D)
+
+void HELPER(vshuf_b)(void *vd, void *vj, void *vk, void *va, uint32_t desc)
+{
+ int i, j, m;
+ VReg temp = {};
+ VReg *Vd = (VReg *)vd;
+ VReg *Vj = (VReg *)vj;
+ VReg *Vk = (VReg *)vk;
+ VReg *Va = (VReg *)va;
+ int oprsz = simd_oprsz(desc);
+
+ m = LSX_LEN / 8;
+ for (i = 0; i < (oprsz / 16) * m; i++) {
+ j = i < m ? 0 : 1;
+ uint64_t k = (uint8_t)Va->B(i) % (2 * m);
+ temp.B(i) = k < m ? Vk->B(k + j * m): Vj->B(k + (j - 1) * m);
+ }
+ *Vd = temp;
+}
+
+#define VSHUF(NAME, BIT, E) \
+void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t desc) \
+{ \
+ int i, j, m; \
+ VReg temp = {}; \
+ VReg *Vd = (VReg *)vd; \
+ VReg *Vj = (VReg *)vj; \
+ VReg *Vk = (VReg *)vk; \
+ int oprsz = simd_oprsz(desc); \
+ \
+ m = LSX_LEN / BIT; \
+ for (i = 0; i < (oprsz / 16) * m; i++) { \
+ j = i < m ? 0 : 1; \
+ uint64_t k = ((uint8_t)Vd->E(i)) % (2 * m); \
+ temp.E(i) = k < m ? Vk->E(k + j * m) : Vj->E(k + (j - 1) * m); \
+ } \
+ *Vd = temp; \
+}
+
+VSHUF(vshuf_h, 16, H)
+VSHUF(vshuf_w, 32, W)
+VSHUF(vshuf_d, 64, D)
+
+#define VSHUF4I(NAME, BIT, E) \
+void HELPER(NAME)(void *vd, void *vj, uint64_t imm, uint32_t desc) \
+{ \
+ int i, j, max; \
+ VReg temp = {}; \
+ VReg *Vd = (VReg *)vd; \
+ VReg *Vj = (VReg *)vj; \
+ int oprsz = simd_oprsz(desc); \
+ \
+ max = LSX_LEN / BIT; \
+ for (i = 0; i < oprsz / (BIT / 8); i++) { \
+ j = i < max ? 1 : 2; \
+ temp.E(i) = Vj->E(SHF_POS(i - ((j -1)* max), imm) + (j - 1) * max); \
+ } \
+ *Vd = temp; \
+}
+
+VSHUF4I(vshuf4i_b, 8, B)
+VSHUF4I(vshuf4i_h, 16, H)
+VSHUF4I(vshuf4i_w, 32, W)
+
+void HELPER(vshuf4i_d)(void *vd, void *vj, uint64_t imm, uint32_t desc)
+{
+ int i;
+ VReg temp = {};
+ VReg *Vd = (VReg *)vd;
+ VReg *Vj = (VReg *)vj;
+ int oprsz = simd_oprsz(desc);
+
+ for (i = 0; i < oprsz / 16; i++) {
+ temp.D(2 * i) = (imm & 2 ? Vj : Vd)->D((imm & 1) + 2 * i);
+ temp.D(2 * i + 1) = (imm & 8 ? Vj : Vd)->D(((imm >> 2) & 1) + 2 * i);
+ }
+ *Vd = temp;
+}
+
+void HELPER(vperm_w)(void *vd, void *vj, void *vk, uint32_t desc)
+{
+ int i, m;
+ VReg temp = {};
+ VReg *Vd = (VReg *)vd;
+ VReg *Vj = (VReg *)vj;
+ VReg *Vk = (VReg *)vk;
+
+ m = LASX_LEN / 32;
+ for (i = 0; i < m ; i++) {
+ uint64_t k = (uint8_t)Vk->W(i) % 8;
+ temp.W(i) = Vj->W(k);
+ }
+ *Vd = temp;
+}
+
+void HELPER(vpermi_w)(void *vd, void *vj, uint64_t imm, uint32_t desc)
+{
+ int i;
+ VReg temp = {};
+ VReg *Vd = (VReg *)vd;
+ VReg *Vj = (VReg *)vj;
+ int oprsz = simd_oprsz(desc);
+
+ for (i = 0; i < oprsz / 16; i++) {
+ temp.W(4 * i) = Vj->W((imm & 0x3) + 4 * i);
+ temp.W(4 * i + 1) = Vj->W(((imm >> 2) & 0x3) + 4 * i);
+ temp.W(4 * i + 2) = Vd->W(((imm >> 4) & 0x3) + 4 * i);
+ temp.W(4 * i + 3) = Vd->W(((imm >> 6) & 0x3) + 4 * i);
+ }
+ *Vd = temp;
+}
+
+void HELPER(vpermi_d)(void *vd, void *vj, uint64_t imm, uint32_t desc)
+{
+ VReg temp = {};
+ VReg *Vd = (VReg *)vd;
+ VReg *Vj = (VReg *)vj;
+
+ temp.D(0) = Vj->D(imm & 0x3);
+ temp.D(1) = Vj->D((imm >> 2) & 0x3);
+ temp.D(2) = Vj->D((imm >> 4) & 0x3);
+ temp.D(3) = Vj->D((imm >> 6) & 0x3);
+ *Vd = temp;
+}
+
+void HELPER(vpermi_q)(void *vd, void *vj, uint64_t imm, uint32_t desc)
+{
+ int i;
+ VReg temp;
+ VReg *Vd = (VReg *)vd;
+ VReg *Vj = (VReg *)vj;
+
+ for (i = 0; i < 2; i++, imm >>= 4) {
+ temp.Q(i) = (imm & 2 ? Vd: Vj)->Q(imm & 1);
+ }
+ *Vd = temp;
+}
+
+#define VEXTRINS(NAME, BIT, E, MASK) \
+void HELPER(NAME)(void *vd, void *vj, uint64_t imm, uint32_t desc) \
+{ \
+ int i, ins, extr, max; \
+ VReg *Vd = (VReg *)vd; \
+ VReg *Vj = (VReg *)vj; \
+ int oprsz = simd_oprsz(desc); \
+ \
+ max = LSX_LEN / BIT; \
+ ins = (imm >> 4) & MASK; \
+ extr = imm & MASK; \
+ for (i = 0; i < oprsz / 16; i++) { \
+ Vd->E(ins + i * max) = Vj->E(extr + i * max); \
+ } \
+}
+
+VEXTRINS(vextrins_b, 8, B, 0xf)
+VEXTRINS(vextrins_h, 16, H, 0x7)
+VEXTRINS(vextrins_w, 32, W, 0x3)
+VEXTRINS(vextrins_d, 64, D, 0x1)
+++ /dev/null
-/* SPDX-License-Identifier: GPL-2.0-or-later */
-/*
- * QEMU LoongArch TLB helpers
- *
- * Copyright (c) 2021 Loongson Technology Corporation Limited
- *
- */
-
-#include "qemu/osdep.h"
-#include "qemu/guest-random.h"
-
-#include "cpu.h"
-#include "internals.h"
-#include "exec/helper-proto.h"
-#include "exec/exec-all.h"
-#include "exec/cpu_ldst.h"
-#include "exec/log.h"
-#include "cpu-csr.h"
-
-enum {
- TLBRET_MATCH = 0,
- TLBRET_BADADDR = 1,
- TLBRET_NOMATCH = 2,
- TLBRET_INVALID = 3,
- TLBRET_DIRTY = 4,
- TLBRET_RI = 5,
- TLBRET_XI = 6,
- TLBRET_PE = 7,
-};
-
-static int loongarch_map_tlb_entry(CPULoongArchState *env, hwaddr *physical,
- int *prot, target_ulong address,
- int access_type, int index, int mmu_idx)
-{
- LoongArchTLB *tlb = &env->tlb[index];
- uint64_t plv = mmu_idx;
- uint64_t tlb_entry, tlb_ppn;
- uint8_t tlb_ps, n, tlb_v, tlb_d, tlb_plv, tlb_nx, tlb_nr, tlb_rplv;
-
- if (index >= LOONGARCH_STLB) {
- tlb_ps = FIELD_EX64(tlb->tlb_misc, TLB_MISC, PS);
- } else {
- tlb_ps = FIELD_EX64(env->CSR_STLBPS, CSR_STLBPS, PS);
- }
- n = (address >> tlb_ps) & 0x1;/* Odd or even */
-
- tlb_entry = n ? tlb->tlb_entry1 : tlb->tlb_entry0;
- tlb_v = FIELD_EX64(tlb_entry, TLBENTRY, V);
- tlb_d = FIELD_EX64(tlb_entry, TLBENTRY, D);
- tlb_plv = FIELD_EX64(tlb_entry, TLBENTRY, PLV);
- if (is_la64(env)) {
- tlb_ppn = FIELD_EX64(tlb_entry, TLBENTRY_64, PPN);
- tlb_nx = FIELD_EX64(tlb_entry, TLBENTRY_64, NX);
- tlb_nr = FIELD_EX64(tlb_entry, TLBENTRY_64, NR);
- tlb_rplv = FIELD_EX64(tlb_entry, TLBENTRY_64, RPLV);
- } else {
- tlb_ppn = FIELD_EX64(tlb_entry, TLBENTRY_32, PPN);
- tlb_nx = 0;
- tlb_nr = 0;
- tlb_rplv = 0;
- }
-
- /* Remove sw bit between bit12 -- bit PS*/
- tlb_ppn = tlb_ppn & ~(((0x1UL << (tlb_ps - 12)) -1));
-
- /* Check access rights */
- if (!tlb_v) {
- return TLBRET_INVALID;
- }
-
- if (access_type == MMU_INST_FETCH && tlb_nx) {
- return TLBRET_XI;
- }
-
- if (access_type == MMU_DATA_LOAD && tlb_nr) {
- return TLBRET_RI;
- }
-
- if (((tlb_rplv == 0) && (plv > tlb_plv)) ||
- ((tlb_rplv == 1) && (plv != tlb_plv))) {
- return TLBRET_PE;
- }
-
- if ((access_type == MMU_DATA_STORE) && !tlb_d) {
- return TLBRET_DIRTY;
- }
-
- *physical = (tlb_ppn << R_TLBENTRY_64_PPN_SHIFT) |
- (address & MAKE_64BIT_MASK(0, tlb_ps));
- *prot = PAGE_READ;
- if (tlb_d) {
- *prot |= PAGE_WRITE;
- }
- if (!tlb_nx) {
- *prot |= PAGE_EXEC;
- }
- return TLBRET_MATCH;
-}
-
-/*
- * One tlb entry holds an adjacent odd/even pair, the vpn is the
- * content of the virtual page number divided by 2. So the
- * compare vpn is bit[47:15] for 16KiB page. while the vppn
- * field in tlb entry contains bit[47:13], so need adjust.
- * virt_vpn = vaddr[47:13]
- */
-static bool loongarch_tlb_search(CPULoongArchState *env, target_ulong vaddr,
- int *index)
-{
- LoongArchTLB *tlb;
- uint16_t csr_asid, tlb_asid, stlb_idx;
- uint8_t tlb_e, tlb_ps, tlb_g, stlb_ps;
- int i, compare_shift;
- uint64_t vpn, tlb_vppn;
-
- csr_asid = FIELD_EX64(env->CSR_ASID, CSR_ASID, ASID);
- stlb_ps = FIELD_EX64(env->CSR_STLBPS, CSR_STLBPS, PS);
- vpn = (vaddr & TARGET_VIRT_MASK) >> (stlb_ps + 1);
- stlb_idx = vpn & 0xff; /* VA[25:15] <==> TLBIDX.index for 16KiB Page */
- compare_shift = stlb_ps + 1 - R_TLB_MISC_VPPN_SHIFT;
-
- /* Search STLB */
- for (i = 0; i < 8; ++i) {
- tlb = &env->tlb[i * 256 + stlb_idx];
- tlb_e = FIELD_EX64(tlb->tlb_misc, TLB_MISC, E);
- if (tlb_e) {
- tlb_vppn = FIELD_EX64(tlb->tlb_misc, TLB_MISC, VPPN);
- tlb_asid = FIELD_EX64(tlb->tlb_misc, TLB_MISC, ASID);
- tlb_g = FIELD_EX64(tlb->tlb_entry0, TLBENTRY, G);
-
- if ((tlb_g == 1 || tlb_asid == csr_asid) &&
- (vpn == (tlb_vppn >> compare_shift))) {
- *index = i * 256 + stlb_idx;
- return true;
- }
- }
- }
-
- /* Search MTLB */
- for (i = LOONGARCH_STLB; i < LOONGARCH_TLB_MAX; ++i) {
- tlb = &env->tlb[i];
- tlb_e = FIELD_EX64(tlb->tlb_misc, TLB_MISC, E);
- if (tlb_e) {
- tlb_vppn = FIELD_EX64(tlb->tlb_misc, TLB_MISC, VPPN);
- tlb_ps = FIELD_EX64(tlb->tlb_misc, TLB_MISC, PS);
- tlb_asid = FIELD_EX64(tlb->tlb_misc, TLB_MISC, ASID);
- tlb_g = FIELD_EX64(tlb->tlb_entry0, TLBENTRY, G);
- compare_shift = tlb_ps + 1 - R_TLB_MISC_VPPN_SHIFT;
- vpn = (vaddr & TARGET_VIRT_MASK) >> (tlb_ps + 1);
- if ((tlb_g == 1 || tlb_asid == csr_asid) &&
- (vpn == (tlb_vppn >> compare_shift))) {
- *index = i;
- return true;
- }
- }
- }
- return false;
-}
-
-static int loongarch_map_address(CPULoongArchState *env, hwaddr *physical,
- int *prot, target_ulong address,
- MMUAccessType access_type, int mmu_idx)
-{
- int index, match;
-
- match = loongarch_tlb_search(env, address, &index);
- if (match) {
- return loongarch_map_tlb_entry(env, physical, prot,
- address, access_type, index, mmu_idx);
- }
-
- return TLBRET_NOMATCH;
-}
-
-static hwaddr dmw_va2pa(CPULoongArchState *env, target_ulong va,
- target_ulong dmw)
-{
- if (is_la64(env)) {
- return va & TARGET_VIRT_MASK;
- } else {
- uint32_t pseg = FIELD_EX32(dmw, CSR_DMW_32, PSEG);
- return (va & MAKE_64BIT_MASK(0, R_CSR_DMW_32_VSEG_SHIFT)) | \
- (pseg << R_CSR_DMW_32_VSEG_SHIFT);
- }
-}
-
-static int get_physical_address(CPULoongArchState *env, hwaddr *physical,
- int *prot, target_ulong address,
- MMUAccessType access_type, int mmu_idx)
-{
- int user_mode = mmu_idx == MMU_IDX_USER;
- int kernel_mode = mmu_idx == MMU_IDX_KERNEL;
- uint32_t plv, base_c, base_v;
- int64_t addr_high;
- uint8_t da = FIELD_EX64(env->CSR_CRMD, CSR_CRMD, DA);
- uint8_t pg = FIELD_EX64(env->CSR_CRMD, CSR_CRMD, PG);
-
- /* Check PG and DA */
- if (da & !pg) {
- *physical = address & TARGET_PHYS_MASK;
- *prot = PAGE_READ | PAGE_WRITE | PAGE_EXEC;
- return TLBRET_MATCH;
- }
-
- plv = kernel_mode | (user_mode << R_CSR_DMW_PLV3_SHIFT);
- if (is_la64(env)) {
- base_v = address >> R_CSR_DMW_64_VSEG_SHIFT;
- } else {
- base_v = address >> R_CSR_DMW_32_VSEG_SHIFT;
- }
- /* Check direct map window */
- for (int i = 0; i < 4; i++) {
- if (is_la64(env)) {
- base_c = FIELD_EX64(env->CSR_DMW[i], CSR_DMW_64, VSEG);
- } else {
- base_c = FIELD_EX64(env->CSR_DMW[i], CSR_DMW_32, VSEG);
- }
- if ((plv & env->CSR_DMW[i]) && (base_c == base_v)) {
- *physical = dmw_va2pa(env, address, env->CSR_DMW[i]);
- *prot = PAGE_READ | PAGE_WRITE | PAGE_EXEC;
- return TLBRET_MATCH;
- }
- }
-
- /* Check valid extension */
- addr_high = sextract64(address, TARGET_VIRT_ADDR_SPACE_BITS, 16);
- if (!(addr_high == 0 || addr_high == -1)) {
- return TLBRET_BADADDR;
- }
-
- /* Mapped address */
- return loongarch_map_address(env, physical, prot, address,
- access_type, mmu_idx);
-}
-
-hwaddr loongarch_cpu_get_phys_page_debug(CPUState *cs, vaddr addr)
-{
- LoongArchCPU *cpu = LOONGARCH_CPU(cs);
- CPULoongArchState *env = &cpu->env;
- hwaddr phys_addr;
- int prot;
-
- if (get_physical_address(env, &phys_addr, &prot, addr, MMU_DATA_LOAD,
- cpu_mmu_index(env, false)) != 0) {
- return -1;
- }
- return phys_addr;
-}
-
-static void raise_mmu_exception(CPULoongArchState *env, target_ulong address,
- MMUAccessType access_type, int tlb_error)
-{
- CPUState *cs = env_cpu(env);
-
- switch (tlb_error) {
- default:
- case TLBRET_BADADDR:
- cs->exception_index = access_type == MMU_INST_FETCH
- ? EXCCODE_ADEF : EXCCODE_ADEM;
- break;
- case TLBRET_NOMATCH:
- /* No TLB match for a mapped address */
- if (access_type == MMU_DATA_LOAD) {
- cs->exception_index = EXCCODE_PIL;
- } else if (access_type == MMU_DATA_STORE) {
- cs->exception_index = EXCCODE_PIS;
- } else if (access_type == MMU_INST_FETCH) {
- cs->exception_index = EXCCODE_PIF;
- }
- env->CSR_TLBRERA = FIELD_DP64(env->CSR_TLBRERA, CSR_TLBRERA, ISTLBR, 1);
- break;
- case TLBRET_INVALID:
- /* TLB match with no valid bit */
- if (access_type == MMU_DATA_LOAD) {
- cs->exception_index = EXCCODE_PIL;
- } else if (access_type == MMU_DATA_STORE) {
- cs->exception_index = EXCCODE_PIS;
- } else if (access_type == MMU_INST_FETCH) {
- cs->exception_index = EXCCODE_PIF;
- }
- break;
- case TLBRET_DIRTY:
- /* TLB match but 'D' bit is cleared */
- cs->exception_index = EXCCODE_PME;
- break;
- case TLBRET_XI:
- /* Execute-Inhibit Exception */
- cs->exception_index = EXCCODE_PNX;
- break;
- case TLBRET_RI:
- /* Read-Inhibit Exception */
- cs->exception_index = EXCCODE_PNR;
- break;
- case TLBRET_PE:
- /* Privileged Exception */
- cs->exception_index = EXCCODE_PPI;
- break;
- }
-
- if (tlb_error == TLBRET_NOMATCH) {
- env->CSR_TLBRBADV = address;
- if (is_la64(env)) {
- env->CSR_TLBREHI = FIELD_DP64(env->CSR_TLBREHI, CSR_TLBREHI_64,
- VPPN, extract64(address, 13, 35));
- } else {
- env->CSR_TLBREHI = FIELD_DP64(env->CSR_TLBREHI, CSR_TLBREHI_32,
- VPPN, extract64(address, 13, 19));
- }
- } else {
- if (!FIELD_EX64(env->CSR_DBG, CSR_DBG, DST)) {
- env->CSR_BADV = address;
- }
- env->CSR_TLBEHI = address & (TARGET_PAGE_MASK << 1);
- }
-}
-
-static void invalidate_tlb_entry(CPULoongArchState *env, int index)
-{
- target_ulong addr, mask, pagesize;
- uint8_t tlb_ps;
- LoongArchTLB *tlb = &env->tlb[index];
-
- int mmu_idx = cpu_mmu_index(env, false);
- uint8_t tlb_v0 = FIELD_EX64(tlb->tlb_entry0, TLBENTRY, V);
- uint8_t tlb_v1 = FIELD_EX64(tlb->tlb_entry1, TLBENTRY, V);
- uint64_t tlb_vppn = FIELD_EX64(tlb->tlb_misc, TLB_MISC, VPPN);
-
- if (index >= LOONGARCH_STLB) {
- tlb_ps = FIELD_EX64(tlb->tlb_misc, TLB_MISC, PS);
- } else {
- tlb_ps = FIELD_EX64(env->CSR_STLBPS, CSR_STLBPS, PS);
- }
- pagesize = MAKE_64BIT_MASK(tlb_ps, 1);
- mask = MAKE_64BIT_MASK(0, tlb_ps + 1);
-
- if (tlb_v0) {
- addr = (tlb_vppn << R_TLB_MISC_VPPN_SHIFT) & ~mask; /* even */
- tlb_flush_range_by_mmuidx(env_cpu(env), addr, pagesize,
- mmu_idx, TARGET_LONG_BITS);
- }
-
- if (tlb_v1) {
- addr = (tlb_vppn << R_TLB_MISC_VPPN_SHIFT) & pagesize; /* odd */
- tlb_flush_range_by_mmuidx(env_cpu(env), addr, pagesize,
- mmu_idx, TARGET_LONG_BITS);
- }
-}
-
-static void invalidate_tlb(CPULoongArchState *env, int index)
-{
- LoongArchTLB *tlb;
- uint16_t csr_asid, tlb_asid, tlb_g;
-
- csr_asid = FIELD_EX64(env->CSR_ASID, CSR_ASID, ASID);
- tlb = &env->tlb[index];
- tlb_asid = FIELD_EX64(tlb->tlb_misc, TLB_MISC, ASID);
- tlb_g = FIELD_EX64(tlb->tlb_entry0, TLBENTRY, G);
- if (tlb_g == 0 && tlb_asid != csr_asid) {
- return;
- }
- invalidate_tlb_entry(env, index);
-}
-
-static void fill_tlb_entry(CPULoongArchState *env, int index)
-{
- LoongArchTLB *tlb = &env->tlb[index];
- uint64_t lo0, lo1, csr_vppn;
- uint16_t csr_asid;
- uint8_t csr_ps;
-
- if (FIELD_EX64(env->CSR_TLBRERA, CSR_TLBRERA, ISTLBR)) {
- csr_ps = FIELD_EX64(env->CSR_TLBREHI, CSR_TLBREHI, PS);
- if (is_la64(env)) {
- csr_vppn = FIELD_EX64(env->CSR_TLBREHI, CSR_TLBREHI_64, VPPN);
- } else {
- csr_vppn = FIELD_EX64(env->CSR_TLBREHI, CSR_TLBREHI_32, VPPN);
- }
- lo0 = env->CSR_TLBRELO0;
- lo1 = env->CSR_TLBRELO1;
- } else {
- csr_ps = FIELD_EX64(env->CSR_TLBIDX, CSR_TLBIDX, PS);
- if (is_la64(env)) {
- csr_vppn = FIELD_EX64(env->CSR_TLBEHI, CSR_TLBEHI_64, VPPN);
- } else {
- csr_vppn = FIELD_EX64(env->CSR_TLBEHI, CSR_TLBEHI_32, VPPN);
- }
- lo0 = env->CSR_TLBELO0;
- lo1 = env->CSR_TLBELO1;
- }
-
- if (csr_ps == 0) {
- qemu_log_mask(CPU_LOG_MMU, "page size is 0\n");
- }
-
- /* Only MTLB has the ps fields */
- if (index >= LOONGARCH_STLB) {
- tlb->tlb_misc = FIELD_DP64(tlb->tlb_misc, TLB_MISC, PS, csr_ps);
- }
-
- tlb->tlb_misc = FIELD_DP64(tlb->tlb_misc, TLB_MISC, VPPN, csr_vppn);
- tlb->tlb_misc = FIELD_DP64(tlb->tlb_misc, TLB_MISC, E, 1);
- csr_asid = FIELD_EX64(env->CSR_ASID, CSR_ASID, ASID);
- tlb->tlb_misc = FIELD_DP64(tlb->tlb_misc, TLB_MISC, ASID, csr_asid);
-
- tlb->tlb_entry0 = lo0;
- tlb->tlb_entry1 = lo1;
-}
-
-/* Return an random value between low and high */
-static uint32_t get_random_tlb(uint32_t low, uint32_t high)
-{
- uint32_t val;
-
- qemu_guest_getrandom_nofail(&val, sizeof(val));
- return val % (high - low + 1) + low;
-}
-
-void helper_tlbsrch(CPULoongArchState *env)
-{
- int index, match;
-
- if (FIELD_EX64(env->CSR_TLBRERA, CSR_TLBRERA, ISTLBR)) {
- match = loongarch_tlb_search(env, env->CSR_TLBREHI, &index);
- } else {
- match = loongarch_tlb_search(env, env->CSR_TLBEHI, &index);
- }
-
- if (match) {
- env->CSR_TLBIDX = FIELD_DP64(env->CSR_TLBIDX, CSR_TLBIDX, INDEX, index);
- env->CSR_TLBIDX = FIELD_DP64(env->CSR_TLBIDX, CSR_TLBIDX, NE, 0);
- return;
- }
-
- env->CSR_TLBIDX = FIELD_DP64(env->CSR_TLBIDX, CSR_TLBIDX, NE, 1);
-}
-
-void helper_tlbrd(CPULoongArchState *env)
-{
- LoongArchTLB *tlb;
- int index;
- uint8_t tlb_ps, tlb_e;
-
- index = FIELD_EX64(env->CSR_TLBIDX, CSR_TLBIDX, INDEX);
- tlb = &env->tlb[index];
-
- if (index >= LOONGARCH_STLB) {
- tlb_ps = FIELD_EX64(tlb->tlb_misc, TLB_MISC, PS);
- } else {
- tlb_ps = FIELD_EX64(env->CSR_STLBPS, CSR_STLBPS, PS);
- }
- tlb_e = FIELD_EX64(tlb->tlb_misc, TLB_MISC, E);
-
- if (!tlb_e) {
- /* Invalid TLB entry */
- env->CSR_TLBIDX = FIELD_DP64(env->CSR_TLBIDX, CSR_TLBIDX, NE, 1);
- env->CSR_ASID = FIELD_DP64(env->CSR_ASID, CSR_ASID, ASID, 0);
- env->CSR_TLBEHI = 0;
- env->CSR_TLBELO0 = 0;
- env->CSR_TLBELO1 = 0;
- env->CSR_TLBIDX = FIELD_DP64(env->CSR_TLBIDX, CSR_TLBIDX, PS, 0);
- } else {
- /* Valid TLB entry */
- env->CSR_TLBIDX = FIELD_DP64(env->CSR_TLBIDX, CSR_TLBIDX, NE, 0);
- env->CSR_TLBIDX = FIELD_DP64(env->CSR_TLBIDX, CSR_TLBIDX,
- PS, (tlb_ps & 0x3f));
- env->CSR_TLBEHI = FIELD_EX64(tlb->tlb_misc, TLB_MISC, VPPN) <<
- R_TLB_MISC_VPPN_SHIFT;
- env->CSR_TLBELO0 = tlb->tlb_entry0;
- env->CSR_TLBELO1 = tlb->tlb_entry1;
- }
-}
-
-void helper_tlbwr(CPULoongArchState *env)
-{
- int index = FIELD_EX64(env->CSR_TLBIDX, CSR_TLBIDX, INDEX);
-
- invalidate_tlb(env, index);
-
- if (FIELD_EX64(env->CSR_TLBIDX, CSR_TLBIDX, NE)) {
- env->tlb[index].tlb_misc = FIELD_DP64(env->tlb[index].tlb_misc,
- TLB_MISC, E, 0);
- return;
- }
-
- fill_tlb_entry(env, index);
-}
-
-void helper_tlbfill(CPULoongArchState *env)
-{
- uint64_t address, entryhi;
- int index, set, stlb_idx;
- uint16_t pagesize, stlb_ps;
-
- if (FIELD_EX64(env->CSR_TLBRERA, CSR_TLBRERA, ISTLBR)) {
- entryhi = env->CSR_TLBREHI;
- pagesize = FIELD_EX64(env->CSR_TLBREHI, CSR_TLBREHI, PS);
- } else {
- entryhi = env->CSR_TLBEHI;
- pagesize = FIELD_EX64(env->CSR_TLBIDX, CSR_TLBIDX, PS);
- }
-
- stlb_ps = FIELD_EX64(env->CSR_STLBPS, CSR_STLBPS, PS);
-
- if (pagesize == stlb_ps) {
- /* Only write into STLB bits [47:13] */
- address = entryhi & ~MAKE_64BIT_MASK(0, R_CSR_TLBEHI_64_VPPN_SHIFT);
-
- /* Choose one set ramdomly */
- set = get_random_tlb(0, 7);
-
- /* Index in one set */
- stlb_idx = (address >> (stlb_ps + 1)) & 0xff; /* [0,255] */
-
- index = set * 256 + stlb_idx;
- } else {
- /* Only write into MTLB */
- index = get_random_tlb(LOONGARCH_STLB, LOONGARCH_TLB_MAX - 1);
- }
-
- invalidate_tlb(env, index);
- fill_tlb_entry(env, index);
-}
-
-void helper_tlbclr(CPULoongArchState *env)
-{
- LoongArchTLB *tlb;
- int i, index;
- uint16_t csr_asid, tlb_asid, tlb_g;
-
- csr_asid = FIELD_EX64(env->CSR_ASID, CSR_ASID, ASID);
- index = FIELD_EX64(env->CSR_TLBIDX, CSR_TLBIDX, INDEX);
-
- if (index < LOONGARCH_STLB) {
- /* STLB. One line per operation */
- for (i = 0; i < 8; i++) {
- tlb = &env->tlb[i * 256 + (index % 256)];
- tlb_asid = FIELD_EX64(tlb->tlb_misc, TLB_MISC, ASID);
- tlb_g = FIELD_EX64(tlb->tlb_entry0, TLBENTRY, G);
- if (!tlb_g && tlb_asid == csr_asid) {
- tlb->tlb_misc = FIELD_DP64(tlb->tlb_misc, TLB_MISC, E, 0);
- }
- }
- } else if (index < LOONGARCH_TLB_MAX) {
- /* All MTLB entries */
- for (i = LOONGARCH_STLB; i < LOONGARCH_TLB_MAX; i++) {
- tlb = &env->tlb[i];
- tlb_asid = FIELD_EX64(tlb->tlb_misc, TLB_MISC, ASID);
- tlb_g = FIELD_EX64(tlb->tlb_entry0, TLBENTRY, G);
- if (!tlb_g && tlb_asid == csr_asid) {
- tlb->tlb_misc = FIELD_DP64(tlb->tlb_misc, TLB_MISC, E, 0);
- }
- }
- }
-
- tlb_flush(env_cpu(env));
-}
-
-void helper_tlbflush(CPULoongArchState *env)
-{
- int i, index;
-
- index = FIELD_EX64(env->CSR_TLBIDX, CSR_TLBIDX, INDEX);
-
- if (index < LOONGARCH_STLB) {
- /* STLB. One line per operation */
- for (i = 0; i < 8; i++) {
- int s_idx = i * 256 + (index % 256);
- env->tlb[s_idx].tlb_misc = FIELD_DP64(env->tlb[s_idx].tlb_misc,
- TLB_MISC, E, 0);
- }
- } else if (index < LOONGARCH_TLB_MAX) {
- /* All MTLB entries */
- for (i = LOONGARCH_STLB; i < LOONGARCH_TLB_MAX; i++) {
- env->tlb[i].tlb_misc = FIELD_DP64(env->tlb[i].tlb_misc,
- TLB_MISC, E, 0);
- }
- }
-
- tlb_flush(env_cpu(env));
-}
-
-void helper_invtlb_all(CPULoongArchState *env)
-{
- for (int i = 0; i < LOONGARCH_TLB_MAX; i++) {
- env->tlb[i].tlb_misc = FIELD_DP64(env->tlb[i].tlb_misc,
- TLB_MISC, E, 0);
- }
- tlb_flush(env_cpu(env));
-}
-
-void helper_invtlb_all_g(CPULoongArchState *env, uint32_t g)
-{
- for (int i = 0; i < LOONGARCH_TLB_MAX; i++) {
- LoongArchTLB *tlb = &env->tlb[i];
- uint8_t tlb_g = FIELD_EX64(tlb->tlb_entry0, TLBENTRY, G);
-
- if (tlb_g == g) {
- tlb->tlb_misc = FIELD_DP64(tlb->tlb_misc, TLB_MISC, E, 0);
- }
- }
- tlb_flush(env_cpu(env));
-}
-
-void helper_invtlb_all_asid(CPULoongArchState *env, target_ulong info)
-{
- uint16_t asid = info & R_CSR_ASID_ASID_MASK;
-
- for (int i = 0; i < LOONGARCH_TLB_MAX; i++) {
- LoongArchTLB *tlb = &env->tlb[i];
- uint8_t tlb_g = FIELD_EX64(tlb->tlb_entry0, TLBENTRY, G);
- uint16_t tlb_asid = FIELD_EX64(tlb->tlb_misc, TLB_MISC, ASID);
-
- if (!tlb_g && (tlb_asid == asid)) {
- tlb->tlb_misc = FIELD_DP64(tlb->tlb_misc, TLB_MISC, E, 0);
- }
- }
- tlb_flush(env_cpu(env));
-}
-
-void helper_invtlb_page_asid(CPULoongArchState *env, target_ulong info,
- target_ulong addr)
-{
- uint16_t asid = info & 0x3ff;
-
- for (int i = 0; i < LOONGARCH_TLB_MAX; i++) {
- LoongArchTLB *tlb = &env->tlb[i];
- uint8_t tlb_g = FIELD_EX64(tlb->tlb_entry0, TLBENTRY, G);
- uint16_t tlb_asid = FIELD_EX64(tlb->tlb_misc, TLB_MISC, ASID);
- uint64_t vpn, tlb_vppn;
- uint8_t tlb_ps, compare_shift;
-
- if (i >= LOONGARCH_STLB) {
- tlb_ps = FIELD_EX64(tlb->tlb_misc, TLB_MISC, PS);
- } else {
- tlb_ps = FIELD_EX64(env->CSR_STLBPS, CSR_STLBPS, PS);
- }
- tlb_vppn = FIELD_EX64(tlb->tlb_misc, TLB_MISC, VPPN);
- vpn = (addr & TARGET_VIRT_MASK) >> (tlb_ps + 1);
- compare_shift = tlb_ps + 1 - R_TLB_MISC_VPPN_SHIFT;
-
- if (!tlb_g && (tlb_asid == asid) &&
- (vpn == (tlb_vppn >> compare_shift))) {
- tlb->tlb_misc = FIELD_DP64(tlb->tlb_misc, TLB_MISC, E, 0);
- }
- }
- tlb_flush(env_cpu(env));
-}
-
-void helper_invtlb_page_asid_or_g(CPULoongArchState *env,
- target_ulong info, target_ulong addr)
-{
- uint16_t asid = info & 0x3ff;
-
- for (int i = 0; i < LOONGARCH_TLB_MAX; i++) {
- LoongArchTLB *tlb = &env->tlb[i];
- uint8_t tlb_g = FIELD_EX64(tlb->tlb_entry0, TLBENTRY, G);
- uint16_t tlb_asid = FIELD_EX64(tlb->tlb_misc, TLB_MISC, ASID);
- uint64_t vpn, tlb_vppn;
- uint8_t tlb_ps, compare_shift;
-
- if (i >= LOONGARCH_STLB) {
- tlb_ps = FIELD_EX64(tlb->tlb_misc, TLB_MISC, PS);
- } else {
- tlb_ps = FIELD_EX64(env->CSR_STLBPS, CSR_STLBPS, PS);
- }
- tlb_vppn = FIELD_EX64(tlb->tlb_misc, TLB_MISC, VPPN);
- vpn = (addr & TARGET_VIRT_MASK) >> (tlb_ps + 1);
- compare_shift = tlb_ps + 1 - R_TLB_MISC_VPPN_SHIFT;
-
- if ((tlb_g || (tlb_asid == asid)) &&
- (vpn == (tlb_vppn >> compare_shift))) {
- tlb->tlb_misc = FIELD_DP64(tlb->tlb_misc, TLB_MISC, E, 0);
- }
- }
- tlb_flush(env_cpu(env));
-}
-
-bool loongarch_cpu_tlb_fill(CPUState *cs, vaddr address, int size,
- MMUAccessType access_type, int mmu_idx,
- bool probe, uintptr_t retaddr)
-{
- LoongArchCPU *cpu = LOONGARCH_CPU(cs);
- CPULoongArchState *env = &cpu->env;
- hwaddr physical;
- int prot;
- int ret;
-
- /* Data access */
- ret = get_physical_address(env, &physical, &prot, address,
- access_type, mmu_idx);
-
- if (ret == TLBRET_MATCH) {
- tlb_set_page(cs, address & TARGET_PAGE_MASK,
- physical & TARGET_PAGE_MASK, prot,
- mmu_idx, TARGET_PAGE_SIZE);
- qemu_log_mask(CPU_LOG_MMU,
- "%s address=%" VADDR_PRIx " physical " HWADDR_FMT_plx
- " prot %d\n", __func__, address, physical, prot);
- return true;
- } else {
- qemu_log_mask(CPU_LOG_MMU,
- "%s address=%" VADDR_PRIx " ret %d\n", __func__, address,
- ret);
- }
- if (probe) {
- return false;
- }
- raise_mmu_exception(env, address, access_type, ret);
- cpu_loop_exit_restore(cs, retaddr);
-}
-
-target_ulong helper_lddir(CPULoongArchState *env, target_ulong base,
- target_ulong level, uint32_t mem_idx)
-{
- CPUState *cs = env_cpu(env);
- target_ulong badvaddr, index, phys, ret;
- int shift;
- uint64_t dir_base, dir_width;
- bool huge = (base >> LOONGARCH_PAGE_HUGE_SHIFT) & 0x1;
-
- badvaddr = env->CSR_TLBRBADV;
- base = base & TARGET_PHYS_MASK;
-
- /* 0:64bit, 1:128bit, 2:192bit, 3:256bit */
- shift = FIELD_EX64(env->CSR_PWCL, CSR_PWCL, PTEWIDTH);
- shift = (shift + 1) * 3;
-
- if (huge) {
- return base;
- }
- switch (level) {
- case 1:
- dir_base = FIELD_EX64(env->CSR_PWCL, CSR_PWCL, DIR1_BASE);
- dir_width = FIELD_EX64(env->CSR_PWCL, CSR_PWCL, DIR1_WIDTH);
- break;
- case 2:
- dir_base = FIELD_EX64(env->CSR_PWCL, CSR_PWCL, DIR2_BASE);
- dir_width = FIELD_EX64(env->CSR_PWCL, CSR_PWCL, DIR2_WIDTH);
- break;
- case 3:
- dir_base = FIELD_EX64(env->CSR_PWCH, CSR_PWCH, DIR3_BASE);
- dir_width = FIELD_EX64(env->CSR_PWCH, CSR_PWCH, DIR3_WIDTH);
- break;
- case 4:
- dir_base = FIELD_EX64(env->CSR_PWCH, CSR_PWCH, DIR4_BASE);
- dir_width = FIELD_EX64(env->CSR_PWCH, CSR_PWCH, DIR4_WIDTH);
- break;
- default:
- do_raise_exception(env, EXCCODE_INE, GETPC());
- return 0;
- }
- index = (badvaddr >> dir_base) & ((1 << dir_width) - 1);
- phys = base | index << shift;
- ret = ldq_phys(cs->as, phys) & TARGET_PHYS_MASK;
- return ret;
-}
-
-void helper_ldpte(CPULoongArchState *env, target_ulong base, target_ulong odd,
- uint32_t mem_idx)
-{
- CPUState *cs = env_cpu(env);
- target_ulong phys, tmp0, ptindex, ptoffset0, ptoffset1, ps, badv;
- int shift;
- bool huge = (base >> LOONGARCH_PAGE_HUGE_SHIFT) & 0x1;
- uint64_t ptbase = FIELD_EX64(env->CSR_PWCL, CSR_PWCL, PTBASE);
- uint64_t ptwidth = FIELD_EX64(env->CSR_PWCL, CSR_PWCL, PTWIDTH);
-
- base = base & TARGET_PHYS_MASK;
-
- if (huge) {
- /* Huge Page. base is paddr */
- tmp0 = base ^ (1 << LOONGARCH_PAGE_HUGE_SHIFT);
- /* Move Global bit */
- tmp0 = ((tmp0 & (1 << LOONGARCH_HGLOBAL_SHIFT)) >>
- LOONGARCH_HGLOBAL_SHIFT) << R_TLBENTRY_G_SHIFT |
- (tmp0 & (~(1 << LOONGARCH_HGLOBAL_SHIFT)));
- ps = ptbase + ptwidth - 1;
- if (odd) {
- tmp0 += MAKE_64BIT_MASK(ps, 1);
- }
- } else {
- /* 0:64bit, 1:128bit, 2:192bit, 3:256bit */
- shift = FIELD_EX64(env->CSR_PWCL, CSR_PWCL, PTEWIDTH);
- shift = (shift + 1) * 3;
- badv = env->CSR_TLBRBADV;
-
- ptindex = (badv >> ptbase) & ((1 << ptwidth) - 1);
- ptindex = ptindex & ~0x1; /* clear bit 0 */
- ptoffset0 = ptindex << shift;
- ptoffset1 = (ptindex + 1) << shift;
-
- phys = base | (odd ? ptoffset1 : ptoffset0);
- tmp0 = ldq_phys(cs->as, phys) & TARGET_PHYS_MASK;
- ps = ptbase;
- }
-
- if (odd) {
- env->CSR_TLBRELO1 = tmp0;
- } else {
- env->CSR_TLBRELO0 = tmp0;
- }
- env->CSR_TLBREHI = FIELD_DP64(env->CSR_TLBREHI, CSR_TLBREHI, PS, ps);
-}
+++ /dev/null
-/* SPDX-License-Identifier: GPL-2.0-or-later */
-/*
- * LoongArch emulation for QEMU - main translation routines.
- *
- * Copyright (c) 2021 Loongson Technology Corporation Limited
- */
-
-#include "qemu/osdep.h"
-#include "cpu.h"
-#include "tcg/tcg-op.h"
-#include "tcg/tcg-op-gvec.h"
-#include "exec/translation-block.h"
-#include "exec/translator.h"
-#include "exec/helper-proto.h"
-#include "exec/helper-gen.h"
-#include "exec/log.h"
-#include "qemu/qemu-print.h"
-#include "fpu/softfloat.h"
-#include "translate.h"
-#include "internals.h"
-#include "vec.h"
-
-/* Global register indices */
-TCGv cpu_gpr[32], cpu_pc;
-static TCGv cpu_lladdr, cpu_llval;
-
-#define HELPER_H "helper.h"
-#include "exec/helper-info.c.inc"
-#undef HELPER_H
-
-#define DISAS_STOP DISAS_TARGET_0
-#define DISAS_EXIT DISAS_TARGET_1
-#define DISAS_EXIT_UPDATE DISAS_TARGET_2
-
-static inline int vec_full_offset(int regno)
-{
- return offsetof(CPULoongArchState, fpr[regno]);
-}
-
-static inline int vec_reg_offset(int regno, int index, MemOp mop)
-{
- const uint8_t size = 1 << mop;
- int offs = index * size;
-
- if (HOST_BIG_ENDIAN && size < 8 ) {
- offs ^= (8 - size);
- }
-
- return offs + vec_full_offset(regno);
-}
-
-static inline void get_vreg64(TCGv_i64 dest, int regno, int index)
-{
- tcg_gen_ld_i64(dest, tcg_env,
- offsetof(CPULoongArchState, fpr[regno].vreg.D(index)));
-}
-
-static inline void set_vreg64(TCGv_i64 src, int regno, int index)
-{
- tcg_gen_st_i64(src, tcg_env,
- offsetof(CPULoongArchState, fpr[regno].vreg.D(index)));
-}
-
-static inline int plus_1(DisasContext *ctx, int x)
-{
- return x + 1;
-}
-
-static inline int shl_1(DisasContext *ctx, int x)
-{
- return x << 1;
-}
-
-static inline int shl_2(DisasContext *ctx, int x)
-{
- return x << 2;
-}
-
-static inline int shl_3(DisasContext *ctx, int x)
-{
- return x << 3;
-}
-
-/*
- * LoongArch the upper 32 bits are undefined ("can be any value").
- * QEMU chooses to nanbox, because it is most likely to show guest bugs early.
- */
-static void gen_nanbox_s(TCGv_i64 out, TCGv_i64 in)
-{
- tcg_gen_ori_i64(out, in, MAKE_64BIT_MASK(32, 32));
-}
-
-void generate_exception(DisasContext *ctx, int excp)
-{
- tcg_gen_movi_tl(cpu_pc, ctx->base.pc_next);
- gen_helper_raise_exception(tcg_env, tcg_constant_i32(excp));
- ctx->base.is_jmp = DISAS_NORETURN;
-}
-
-static inline void gen_goto_tb(DisasContext *ctx, int n, target_ulong dest)
-{
- if (ctx->va32) {
- dest = (uint32_t) dest;
- }
-
- if (translator_use_goto_tb(&ctx->base, dest)) {
- tcg_gen_goto_tb(n);
- tcg_gen_movi_tl(cpu_pc, dest);
- tcg_gen_exit_tb(ctx->base.tb, n);
- } else {
- tcg_gen_movi_tl(cpu_pc, dest);
- tcg_gen_lookup_and_goto_ptr();
- }
-}
-
-static void loongarch_tr_init_disas_context(DisasContextBase *dcbase,
- CPUState *cs)
-{
- int64_t bound;
- CPULoongArchState *env = cpu_env(cs);
- DisasContext *ctx = container_of(dcbase, DisasContext, base);
-
- ctx->page_start = ctx->base.pc_first & TARGET_PAGE_MASK;
- ctx->plv = ctx->base.tb->flags & HW_FLAGS_PLV_MASK;
- if (ctx->base.tb->flags & HW_FLAGS_CRMD_PG) {
- ctx->mem_idx = ctx->plv;
- } else {
- ctx->mem_idx = MMU_IDX_DA;
- }
-
- /* Bound the number of insns to execute to those left on the page. */
- bound = -(ctx->base.pc_first | TARGET_PAGE_MASK) / 4;
- ctx->base.max_insns = MIN(ctx->base.max_insns, bound);
-
- if (FIELD_EX64(env->cpucfg[2], CPUCFG2, LSX)) {
- ctx->vl = LSX_LEN;
- }
-
- if (FIELD_EX64(env->cpucfg[2], CPUCFG2, LASX)) {
- ctx->vl = LASX_LEN;
- }
-
- ctx->la64 = is_la64(env);
- ctx->va32 = (ctx->base.tb->flags & HW_FLAGS_VA32) != 0;
-
- ctx->zero = tcg_constant_tl(0);
-
- ctx->cpucfg1 = env->cpucfg[1];
- ctx->cpucfg2 = env->cpucfg[2];
-}
-
-static void loongarch_tr_tb_start(DisasContextBase *dcbase, CPUState *cs)
-{
-}
-
-static void loongarch_tr_insn_start(DisasContextBase *dcbase, CPUState *cs)
-{
- DisasContext *ctx = container_of(dcbase, DisasContext, base);
-
- tcg_gen_insn_start(ctx->base.pc_next);
-}
-
-/*
- * Wrappers for getting reg values.
- *
- * The $zero register does not have cpu_gpr[0] allocated -- we supply the
- * constant zero as a source, and an uninitialized sink as destination.
- *
- * Further, we may provide an extension for word operations.
- */
-static TCGv gpr_src(DisasContext *ctx, int reg_num, DisasExtend src_ext)
-{
- TCGv t;
-
- if (reg_num == 0) {
- return ctx->zero;
- }
-
- switch (src_ext) {
- case EXT_NONE:
- return cpu_gpr[reg_num];
- case EXT_SIGN:
- t = tcg_temp_new();
- tcg_gen_ext32s_tl(t, cpu_gpr[reg_num]);
- return t;
- case EXT_ZERO:
- t = tcg_temp_new();
- tcg_gen_ext32u_tl(t, cpu_gpr[reg_num]);
- return t;
- }
- g_assert_not_reached();
-}
-
-static TCGv gpr_dst(DisasContext *ctx, int reg_num, DisasExtend dst_ext)
-{
- if (reg_num == 0 || dst_ext) {
- return tcg_temp_new();
- }
- return cpu_gpr[reg_num];
-}
-
-static void gen_set_gpr(int reg_num, TCGv t, DisasExtend dst_ext)
-{
- if (reg_num != 0) {
- switch (dst_ext) {
- case EXT_NONE:
- tcg_gen_mov_tl(cpu_gpr[reg_num], t);
- break;
- case EXT_SIGN:
- tcg_gen_ext32s_tl(cpu_gpr[reg_num], t);
- break;
- case EXT_ZERO:
- tcg_gen_ext32u_tl(cpu_gpr[reg_num], t);
- break;
- default:
- g_assert_not_reached();
- }
- }
-}
-
-static TCGv get_fpr(DisasContext *ctx, int reg_num)
-{
- TCGv t = tcg_temp_new();
- tcg_gen_ld_i64(t, tcg_env,
- offsetof(CPULoongArchState, fpr[reg_num].vreg.D(0)));
- return t;
-}
-
-static void set_fpr(int reg_num, TCGv val)
-{
- tcg_gen_st_i64(val, tcg_env,
- offsetof(CPULoongArchState, fpr[reg_num].vreg.D(0)));
-}
-
-static TCGv make_address_x(DisasContext *ctx, TCGv base, TCGv addend)
-{
- TCGv temp = NULL;
-
- if (addend || ctx->va32) {
- temp = tcg_temp_new();
- }
- if (addend) {
- tcg_gen_add_tl(temp, base, addend);
- base = temp;
- }
- if (ctx->va32) {
- tcg_gen_ext32u_tl(temp, base);
- base = temp;
- }
- return base;
-}
-
-static TCGv make_address_i(DisasContext *ctx, TCGv base, target_long ofs)
-{
- TCGv addend = ofs ? tcg_constant_tl(ofs) : NULL;
- return make_address_x(ctx, base, addend);
-}
-
-static uint64_t make_address_pc(DisasContext *ctx, uint64_t addr)
-{
- if (ctx->va32) {
- addr = (int32_t)addr;
- }
- return addr;
-}
-
-#include "decode-insns.c.inc"
-#include "insn_trans/trans_arith.c.inc"
-#include "insn_trans/trans_shift.c.inc"
-#include "insn_trans/trans_bit.c.inc"
-#include "insn_trans/trans_memory.c.inc"
-#include "insn_trans/trans_atomic.c.inc"
-#include "insn_trans/trans_extra.c.inc"
-#include "insn_trans/trans_farith.c.inc"
-#include "insn_trans/trans_fcmp.c.inc"
-#include "insn_trans/trans_fcnv.c.inc"
-#include "insn_trans/trans_fmov.c.inc"
-#include "insn_trans/trans_fmemory.c.inc"
-#include "insn_trans/trans_branch.c.inc"
-#include "insn_trans/trans_privileged.c.inc"
-#include "insn_trans/trans_vec.c.inc"
-
-static void loongarch_tr_translate_insn(DisasContextBase *dcbase, CPUState *cs)
-{
- CPULoongArchState *env = cpu_env(cs);
- DisasContext *ctx = container_of(dcbase, DisasContext, base);
-
- ctx->opcode = translator_ldl(env, &ctx->base, ctx->base.pc_next);
-
- if (!decode(ctx, ctx->opcode)) {
- qemu_log_mask(LOG_UNIMP, "Error: unknown opcode. "
- TARGET_FMT_lx ": 0x%x\n",
- ctx->base.pc_next, ctx->opcode);
- generate_exception(ctx, EXCCODE_INE);
- }
-
- ctx->base.pc_next += 4;
-
- if (ctx->va32) {
- ctx->base.pc_next = (uint32_t)ctx->base.pc_next;
- }
-}
-
-static void loongarch_tr_tb_stop(DisasContextBase *dcbase, CPUState *cs)
-{
- DisasContext *ctx = container_of(dcbase, DisasContext, base);
-
- switch (ctx->base.is_jmp) {
- case DISAS_STOP:
- tcg_gen_movi_tl(cpu_pc, ctx->base.pc_next);
- tcg_gen_lookup_and_goto_ptr();
- break;
- case DISAS_TOO_MANY:
- gen_goto_tb(ctx, 0, ctx->base.pc_next);
- break;
- case DISAS_NORETURN:
- break;
- case DISAS_EXIT_UPDATE:
- tcg_gen_movi_tl(cpu_pc, ctx->base.pc_next);
- QEMU_FALLTHROUGH;
- case DISAS_EXIT:
- tcg_gen_exit_tb(NULL, 0);
- break;
- default:
- g_assert_not_reached();
- }
-}
-
-static void loongarch_tr_disas_log(const DisasContextBase *dcbase,
- CPUState *cpu, FILE *logfile)
-{
- qemu_log("IN: %s\n", lookup_symbol(dcbase->pc_first));
- target_disas(logfile, cpu, dcbase->pc_first, dcbase->tb->size);
-}
-
-static const TranslatorOps loongarch_tr_ops = {
- .init_disas_context = loongarch_tr_init_disas_context,
- .tb_start = loongarch_tr_tb_start,
- .insn_start = loongarch_tr_insn_start,
- .translate_insn = loongarch_tr_translate_insn,
- .tb_stop = loongarch_tr_tb_stop,
- .disas_log = loongarch_tr_disas_log,
-};
-
-void gen_intermediate_code(CPUState *cs, TranslationBlock *tb, int *max_insns,
- target_ulong pc, void *host_pc)
-{
- DisasContext ctx;
-
- translator_loop(cs, tb, max_insns, pc, host_pc,
- &loongarch_tr_ops, &ctx.base);
-}
-
-void loongarch_translate_init(void)
-{
- int i;
-
- cpu_gpr[0] = NULL;
- for (i = 1; i < 32; i++) {
- cpu_gpr[i] = tcg_global_mem_new(tcg_env,
- offsetof(CPULoongArchState, gpr[i]),
- regnames[i]);
- }
-
- cpu_pc = tcg_global_mem_new(tcg_env, offsetof(CPULoongArchState, pc), "pc");
- cpu_lladdr = tcg_global_mem_new(tcg_env,
- offsetof(CPULoongArchState, lladdr), "lladdr");
- cpu_llval = tcg_global_mem_new(tcg_env,
- offsetof(CPULoongArchState, llval), "llval");
-}
+++ /dev/null
-/* SPDX-License-Identifier: GPL-2.0-or-later */
-/*
- * QEMU LoongArch vector helper functions.
- *
- * Copyright (c) 2022-2023 Loongson Technology Corporation Limited
- */
-
-#include "qemu/osdep.h"
-#include "cpu.h"
-#include "exec/exec-all.h"
-#include "exec/helper-proto.h"
-#include "fpu/softfloat.h"
-#include "internals.h"
-#include "tcg/tcg.h"
-#include "vec.h"
-#include "tcg/tcg-gvec-desc.h"
-
-#define DO_ODD_EVEN(NAME, BIT, E1, E2, DO_OP) \
-void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t desc) \
-{ \
- int i; \
- VReg *Vd = (VReg *)vd; \
- VReg *Vj = (VReg *)vj; \
- VReg *Vk = (VReg *)vk; \
- typedef __typeof(Vd->E1(0)) TD; \
- int oprsz = simd_oprsz(desc); \
- \
- for (i = 0; i < oprsz / (BIT / 8); i++) { \
- Vd->E1(i) = DO_OP((TD)Vj->E2(2 * i + 1), (TD)Vk->E2(2 * i)); \
- } \
-}
-
-DO_ODD_EVEN(vhaddw_h_b, 16, H, B, DO_ADD)
-DO_ODD_EVEN(vhaddw_w_h, 32, W, H, DO_ADD)
-DO_ODD_EVEN(vhaddw_d_w, 64, D, W, DO_ADD)
-
-void HELPER(vhaddw_q_d)(void *vd, void *vj, void *vk, uint32_t desc)
-{
- int i;
- VReg *Vd = (VReg *)vd;
- VReg *Vj = (VReg *)vj;
- VReg *Vk = (VReg *)vk;
- int oprsz = simd_oprsz(desc);
-
- for (i = 0; i < oprsz / 16 ; i++) {
- Vd->Q(i) = int128_add(int128_makes64(Vj->D(2 * i + 1)),
- int128_makes64(Vk->D(2 * i)));
- }
-}
-
-DO_ODD_EVEN(vhsubw_h_b, 16, H, B, DO_SUB)
-DO_ODD_EVEN(vhsubw_w_h, 32, W, H, DO_SUB)
-DO_ODD_EVEN(vhsubw_d_w, 64, D, W, DO_SUB)
-
-void HELPER(vhsubw_q_d)(void *vd, void *vj, void *vk, uint32_t desc)
-{
- int i;
- VReg *Vd = (VReg *)vd;
- VReg *Vj = (VReg *)vj;
- VReg *Vk = (VReg *)vk;
- int oprsz = simd_oprsz(desc);
-
- for (i = 0; i < oprsz / 16; i++) {
- Vd->Q(i) = int128_sub(int128_makes64(Vj->D(2 * i + 1)),
- int128_makes64(Vk->D(2 * i)));
- }
-}
-
-DO_ODD_EVEN(vhaddw_hu_bu, 16, UH, UB, DO_ADD)
-DO_ODD_EVEN(vhaddw_wu_hu, 32, UW, UH, DO_ADD)
-DO_ODD_EVEN(vhaddw_du_wu, 64, UD, UW, DO_ADD)
-
-void HELPER(vhaddw_qu_du)(void *vd, void *vj, void *vk, uint32_t desc)
-{
- int i;
- VReg *Vd = (VReg *)vd;
- VReg *Vj = (VReg *)vj;
- VReg *Vk = (VReg *)vk;
- int oprsz = simd_oprsz(desc);
-
- for (i = 0; i < oprsz / 16; i ++) {
- Vd->Q(i) = int128_add(int128_make64(Vj->UD(2 * i + 1)),
- int128_make64(Vk->UD(2 * i)));
- }
-}
-
-DO_ODD_EVEN(vhsubw_hu_bu, 16, UH, UB, DO_SUB)
-DO_ODD_EVEN(vhsubw_wu_hu, 32, UW, UH, DO_SUB)
-DO_ODD_EVEN(vhsubw_du_wu, 64, UD, UW, DO_SUB)
-
-void HELPER(vhsubw_qu_du)(void *vd, void *vj, void *vk, uint32_t desc)
-{
- int i;
- VReg *Vd = (VReg *)vd;
- VReg *Vj = (VReg *)vj;
- VReg *Vk = (VReg *)vk;
- int oprsz = simd_oprsz(desc);
-
- for (i = 0; i < oprsz / 16; i++) {
- Vd->Q(i) = int128_sub(int128_make64(Vj->UD(2 * i + 1)),
- int128_make64(Vk->UD(2 * i)));
- }
-}
-
-#define DO_EVEN(NAME, BIT, E1, E2, DO_OP) \
-void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t desc) \
-{ \
- int i; \
- VReg *Vd = (VReg *)vd; \
- VReg *Vj = (VReg *)vj; \
- VReg *Vk = (VReg *)vk; \
- typedef __typeof(Vd->E1(0)) TD; \
- int oprsz = simd_oprsz(desc); \
- \
- for (i = 0; i < oprsz / (BIT / 8); i++) { \
- Vd->E1(i) = DO_OP((TD)Vj->E2(2 * i) ,(TD)Vk->E2(2 * i)); \
- } \
-}
-
-#define DO_ODD(NAME, BIT, E1, E2, DO_OP) \
-void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t desc) \
-{ \
- int i; \
- VReg *Vd = (VReg *)vd; \
- VReg *Vj = (VReg *)vj; \
- VReg *Vk = (VReg *)vk; \
- typedef __typeof(Vd->E1(0)) TD; \
- int oprsz = simd_oprsz(desc); \
- \
- for (i = 0; i < oprsz / (BIT / 8); i++) { \
- Vd->E1(i) = DO_OP((TD)Vj->E2(2 * i + 1), (TD)Vk->E2(2 * i + 1)); \
- } \
-}
-
-void HELPER(vaddwev_q_d)(void *vd, void *vj, void *vk, uint32_t desc)
-{
- int i;
- VReg *Vd = (VReg *)vd;
- VReg *Vj = (VReg *)vj;
- VReg *Vk = (VReg *)vk;
- int oprsz = simd_oprsz(desc);
-
- for (i = 0; i < oprsz / 16; i++) {
- Vd->Q(i) = int128_add(int128_makes64(Vj->D(2 * i)),
- int128_makes64(Vk->D(2 * i)));
- }
-}
-
-DO_EVEN(vaddwev_h_b, 16, H, B, DO_ADD)
-DO_EVEN(vaddwev_w_h, 32, W, H, DO_ADD)
-DO_EVEN(vaddwev_d_w, 64, D, W, DO_ADD)
-
-void HELPER(vaddwod_q_d)(void *vd, void *vj, void *vk, uint32_t desc)
-{
- int i;
- VReg *Vd = (VReg *)vd;
- VReg *Vj = (VReg *)vj;
- VReg *Vk = (VReg *)vk;
- int oprsz = simd_oprsz(desc);
-
- for (i = 0; i < oprsz / 16; i++) {
- Vd->Q(i) = int128_add(int128_makes64(Vj->D(2 * i +1)),
- int128_makes64(Vk->D(2 * i +1)));
- }
-}
-
-DO_ODD(vaddwod_h_b, 16, H, B, DO_ADD)
-DO_ODD(vaddwod_w_h, 32, W, H, DO_ADD)
-DO_ODD(vaddwod_d_w, 64, D, W, DO_ADD)
-
-void HELPER(vsubwev_q_d)(void *vd, void *vj, void *vk, uint32_t desc)
-{
- int i;
- VReg *Vd = (VReg *)vd;
- VReg *Vj = (VReg *)vj;
- VReg *Vk = (VReg *)vk;
- int oprsz = simd_oprsz(desc);
-
- for (i = 0; i < oprsz / 16; i++) {
- Vd->Q(i) = int128_sub(int128_makes64(Vj->D(2 * i)),
- int128_makes64(Vk->D(2 * i)));
- }
-}
-
-DO_EVEN(vsubwev_h_b, 16, H, B, DO_SUB)
-DO_EVEN(vsubwev_w_h, 32, W, H, DO_SUB)
-DO_EVEN(vsubwev_d_w, 64, D, W, DO_SUB)
-
-void HELPER(vsubwod_q_d)(void *vd, void *vj, void *vk, uint32_t desc)
-{
- int i;
- VReg *Vd = (VReg *)vd;
- VReg *Vj = (VReg *)vj;
- VReg *Vk = (VReg *)vk;
- int oprsz = simd_oprsz(desc);
-
- for (i = 0; i < oprsz / 16; i++) {
- Vd->Q(i) = int128_sub(int128_makes64(Vj->D(2 * i + 1)),
- int128_makes64(Vk->D(2 * i + 1)));
- }
-}
-
-DO_ODD(vsubwod_h_b, 16, H, B, DO_SUB)
-DO_ODD(vsubwod_w_h, 32, W, H, DO_SUB)
-DO_ODD(vsubwod_d_w, 64, D, W, DO_SUB)
-
-void HELPER(vaddwev_q_du)(void *vd, void *vj, void *vk, uint32_t desc)
-{
- int i;
- VReg *Vd = (VReg *)vd;
- VReg *Vj = (VReg *)vj;
- VReg *Vk = (VReg *)vk;
- int oprsz = simd_oprsz(desc);
-
- for (i = 0; i < oprsz / 16; i++) {
- Vd->Q(i) = int128_add(int128_make64(Vj->UD(2 * i)),
- int128_make64(Vk->UD(2 * i)));
- }
-}
-
-DO_EVEN(vaddwev_h_bu, 16, UH, UB, DO_ADD)
-DO_EVEN(vaddwev_w_hu, 32, UW, UH, DO_ADD)
-DO_EVEN(vaddwev_d_wu, 64, UD, UW, DO_ADD)
-
-void HELPER(vaddwod_q_du)(void *vd, void *vj, void *vk, uint32_t desc)
-{
- int i;
- VReg *Vd = (VReg *)vd;
- VReg *Vj = (VReg *)vj;
- VReg *Vk = (VReg *)vk;
- int oprsz = simd_oprsz(desc);
-
- for (i = 0; i < oprsz / 16; i++) {
- Vd->Q(i) = int128_add(int128_make64(Vj->UD(2 * i + 1)),
- int128_make64(Vk->UD(2 * i + 1)));
- }
-}
-
-DO_ODD(vaddwod_h_bu, 16, UH, UB, DO_ADD)
-DO_ODD(vaddwod_w_hu, 32, UW, UH, DO_ADD)
-DO_ODD(vaddwod_d_wu, 64, UD, UW, DO_ADD)
-
-void HELPER(vsubwev_q_du)(void *vd, void *vj, void *vk, uint32_t desc)
-{
- int i;
- VReg *Vd = (VReg *)vd;
- VReg *Vj = (VReg *)vj;
- VReg *Vk = (VReg *)vk;
- int oprsz = simd_oprsz(desc);
-
- for (i = 0; i < oprsz / 16; i++) {
- Vd->Q(i) = int128_sub(int128_make64(Vj->UD(2 * i)),
- int128_make64(Vk->UD(2 * i)));
- }
-}
-
-DO_EVEN(vsubwev_h_bu, 16, UH, UB, DO_SUB)
-DO_EVEN(vsubwev_w_hu, 32, UW, UH, DO_SUB)
-DO_EVEN(vsubwev_d_wu, 64, UD, UW, DO_SUB)
-
-void HELPER(vsubwod_q_du)(void *vd, void *vj, void *vk, uint32_t desc)
-{
- int i;
- VReg *Vd = (VReg *)vd;
- VReg *Vj = (VReg *)vj;
- VReg *Vk = (VReg *)vk;
- int oprsz = simd_oprsz(desc);
-
- for (i = 0; i < oprsz / 16; i++) {
- Vd->Q(i) = int128_sub(int128_make64(Vj->UD(2 * i + 1)),
- int128_make64(Vk->UD(2 * i + 1)));
- }
-}
-
-DO_ODD(vsubwod_h_bu, 16, UH, UB, DO_SUB)
-DO_ODD(vsubwod_w_hu, 32, UW, UH, DO_SUB)
-DO_ODD(vsubwod_d_wu, 64, UD, UW, DO_SUB)
-
-#define DO_EVEN_U_S(NAME, BIT, ES1, EU1, ES2, EU2, DO_OP) \
-void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t desc) \
-{ \
- int i; \
- VReg *Vd = (VReg *)vd; \
- VReg *Vj = (VReg *)vj; \
- VReg *Vk = (VReg *)vk; \
- typedef __typeof(Vd->ES1(0)) TDS; \
- typedef __typeof(Vd->EU1(0)) TDU; \
- int oprsz = simd_oprsz(desc); \
- \
- for (i = 0; i < oprsz / (BIT / 8); i++) { \
- Vd->ES1(i) = DO_OP((TDU)Vj->EU2(2 * i) ,(TDS)Vk->ES2(2 * i)); \
- } \
-}
-
-#define DO_ODD_U_S(NAME, BIT, ES1, EU1, ES2, EU2, DO_OP) \
-void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t desc) \
-{ \
- int i; \
- VReg *Vd = (VReg *)vd; \
- VReg *Vj = (VReg *)vj; \
- VReg *Vk = (VReg *)vk; \
- typedef __typeof(Vd->ES1(0)) TDS; \
- typedef __typeof(Vd->EU1(0)) TDU; \
- int oprsz = simd_oprsz(desc); \
- \
- for (i = 0; i < oprsz / (BIT / 8); i++) { \
- Vd->ES1(i) = DO_OP((TDU)Vj->EU2(2 * i + 1), (TDS)Vk->ES2(2 * i + 1)); \
- } \
-}
-
-void HELPER(vaddwev_q_du_d)(void *vd, void *vj, void *vk, uint32_t desc)
-{
- int i;
- VReg *Vd = (VReg *)vd;
- VReg *Vj = (VReg *)vj;
- VReg *Vk = (VReg *)vk;
- int oprsz = simd_oprsz(desc);
-
- for (i = 0; i < oprsz / 16; i++) {
- Vd->Q(i) = int128_add(int128_make64(Vj->UD(2 * i)),
- int128_makes64(Vk->D(2 * i)));
- }
-}
-
-DO_EVEN_U_S(vaddwev_h_bu_b, 16, H, UH, B, UB, DO_ADD)
-DO_EVEN_U_S(vaddwev_w_hu_h, 32, W, UW, H, UH, DO_ADD)
-DO_EVEN_U_S(vaddwev_d_wu_w, 64, D, UD, W, UW, DO_ADD)
-
-void HELPER(vaddwod_q_du_d)(void *vd, void *vj, void *vk, uint32_t desc)
-{
- int i;
- VReg *Vd = (VReg *)vd;
- VReg *Vj = (VReg *)vj;
- VReg *Vk = (VReg *)vk;
- int oprsz = simd_oprsz(desc);
-
- for (i = 0; i < oprsz / 16; i++) {
- Vd->Q(i) = int128_add(int128_make64(Vj->UD(2 * i + 1)),
- int128_makes64(Vk->D(2 * i + 1)));
- }
-}
-
-DO_ODD_U_S(vaddwod_h_bu_b, 16, H, UH, B, UB, DO_ADD)
-DO_ODD_U_S(vaddwod_w_hu_h, 32, W, UW, H, UH, DO_ADD)
-DO_ODD_U_S(vaddwod_d_wu_w, 64, D, UD, W, UW, DO_ADD)
-
-#define DO_3OP(NAME, BIT, E, DO_OP) \
-void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t desc) \
-{ \
- int i; \
- VReg *Vd = (VReg *)vd; \
- VReg *Vj = (VReg *)vj; \
- VReg *Vk = (VReg *)vk; \
- int oprsz = simd_oprsz(desc); \
- \
- for (i = 0; i < oprsz / (BIT / 8); i++) { \
- Vd->E(i) = DO_OP(Vj->E(i), Vk->E(i)); \
- } \
-}
-
-DO_3OP(vavg_b, 8, B, DO_VAVG)
-DO_3OP(vavg_h, 16, H, DO_VAVG)
-DO_3OP(vavg_w, 32, W, DO_VAVG)
-DO_3OP(vavg_d, 64, D, DO_VAVG)
-DO_3OP(vavgr_b, 8, B, DO_VAVGR)
-DO_3OP(vavgr_h, 16, H, DO_VAVGR)
-DO_3OP(vavgr_w, 32, W, DO_VAVGR)
-DO_3OP(vavgr_d, 64, D, DO_VAVGR)
-DO_3OP(vavg_bu, 8, UB, DO_VAVG)
-DO_3OP(vavg_hu, 16, UH, DO_VAVG)
-DO_3OP(vavg_wu, 32, UW, DO_VAVG)
-DO_3OP(vavg_du, 64, UD, DO_VAVG)
-DO_3OP(vavgr_bu, 8, UB, DO_VAVGR)
-DO_3OP(vavgr_hu, 16, UH, DO_VAVGR)
-DO_3OP(vavgr_wu, 32, UW, DO_VAVGR)
-DO_3OP(vavgr_du, 64, UD, DO_VAVGR)
-
-DO_3OP(vabsd_b, 8, B, DO_VABSD)
-DO_3OP(vabsd_h, 16, H, DO_VABSD)
-DO_3OP(vabsd_w, 32, W, DO_VABSD)
-DO_3OP(vabsd_d, 64, D, DO_VABSD)
-DO_3OP(vabsd_bu, 8, UB, DO_VABSD)
-DO_3OP(vabsd_hu, 16, UH, DO_VABSD)
-DO_3OP(vabsd_wu, 32, UW, DO_VABSD)
-DO_3OP(vabsd_du, 64, UD, DO_VABSD)
-
-#define DO_VADDA(NAME, BIT, E) \
-void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t desc) \
-{ \
- int i; \
- VReg *Vd = (VReg *)vd; \
- VReg *Vj = (VReg *)vj; \
- VReg *Vk = (VReg *)vk; \
- int oprsz = simd_oprsz(desc); \
- \
- for (i = 0; i < oprsz / (BIT / 8); i++) { \
- Vd->E(i) = DO_VABS(Vj->E(i)) + DO_VABS(Vk->E(i)); \
- } \
-}
-
-DO_VADDA(vadda_b, 8, B)
-DO_VADDA(vadda_h, 16, H)
-DO_VADDA(vadda_w, 32, W)
-DO_VADDA(vadda_d, 64, D)
-
-#define VMINMAXI(NAME, BIT, E, DO_OP) \
-void HELPER(NAME)(void *vd, void *vj, uint64_t imm, uint32_t desc) \
-{ \
- int i; \
- VReg *Vd = (VReg *)vd; \
- VReg *Vj = (VReg *)vj; \
- typedef __typeof(Vd->E(0)) TD; \
- int oprsz = simd_oprsz(desc); \
- \
- for (i = 0; i < oprsz / (BIT / 8); i++) { \
- Vd->E(i) = DO_OP(Vj->E(i), (TD)imm); \
- } \
-}
-
-VMINMAXI(vmini_b, 8, B, DO_MIN)
-VMINMAXI(vmini_h, 16, H, DO_MIN)
-VMINMAXI(vmini_w, 32, W, DO_MIN)
-VMINMAXI(vmini_d, 64, D, DO_MIN)
-VMINMAXI(vmaxi_b, 8, B, DO_MAX)
-VMINMAXI(vmaxi_h, 16, H, DO_MAX)
-VMINMAXI(vmaxi_w, 32, W, DO_MAX)
-VMINMAXI(vmaxi_d, 64, D, DO_MAX)
-VMINMAXI(vmini_bu, 8, UB, DO_MIN)
-VMINMAXI(vmini_hu, 16, UH, DO_MIN)
-VMINMAXI(vmini_wu, 32, UW, DO_MIN)
-VMINMAXI(vmini_du, 64, UD, DO_MIN)
-VMINMAXI(vmaxi_bu, 8, UB, DO_MAX)
-VMINMAXI(vmaxi_hu, 16, UH, DO_MAX)
-VMINMAXI(vmaxi_wu, 32, UW, DO_MAX)
-VMINMAXI(vmaxi_du, 64, UD, DO_MAX)
-
-#define DO_VMUH(NAME, BIT, E1, E2, DO_OP) \
-void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t desc) \
-{ \
- int i; \
- VReg *Vd = (VReg *)vd; \
- VReg *Vj = (VReg *)vj; \
- VReg *Vk = (VReg *)vk; \
- typedef __typeof(Vd->E1(0)) T; \
- int oprsz = simd_oprsz(desc); \
- \
- for (i = 0; i < oprsz / (BIT / 8); i++) { \
- Vd->E2(i) = ((T)Vj->E2(i)) * ((T)Vk->E2(i)) >> BIT; \
- } \
-}
-
-void HELPER(vmuh_d)(void *vd, void *vj, void *vk, uint32_t desc)
-{
- int i;
- uint64_t l, h;
- VReg *Vd = (VReg *)vd;
- VReg *Vj = (VReg *)vj;
- VReg *Vk = (VReg *)vk;
- int oprsz = simd_oprsz(desc);
-
- for (i = 0; i < oprsz / 8; i++) {
- muls64(&l, &h, Vj->D(i), Vk->D(i));
- Vd->D(i) = h;
- }
-}
-
-DO_VMUH(vmuh_b, 8, H, B, DO_MUH)
-DO_VMUH(vmuh_h, 16, W, H, DO_MUH)
-DO_VMUH(vmuh_w, 32, D, W, DO_MUH)
-
-void HELPER(vmuh_du)(void *vd, void *vj, void *vk, uint32_t desc)
-{
- int i;
- uint64_t l, h;
- VReg *Vd = (VReg *)vd;
- VReg *Vj = (VReg *)vj;
- VReg *Vk = (VReg *)vk;
- int oprsz = simd_oprsz(desc);
-
- for (i = 0; i < oprsz / 8; i++) {
- mulu64(&l, &h, Vj->D(i), Vk->D(i));
- Vd->D(i) = h;
- }
-}
-
-DO_VMUH(vmuh_bu, 8, UH, UB, DO_MUH)
-DO_VMUH(vmuh_hu, 16, UW, UH, DO_MUH)
-DO_VMUH(vmuh_wu, 32, UD, UW, DO_MUH)
-
-DO_EVEN(vmulwev_h_b, 16, H, B, DO_MUL)
-DO_EVEN(vmulwev_w_h, 32, W, H, DO_MUL)
-DO_EVEN(vmulwev_d_w, 64, D, W, DO_MUL)
-
-DO_ODD(vmulwod_h_b, 16, H, B, DO_MUL)
-DO_ODD(vmulwod_w_h, 32, W, H, DO_MUL)
-DO_ODD(vmulwod_d_w, 64, D, W, DO_MUL)
-
-DO_EVEN(vmulwev_h_bu, 16, UH, UB, DO_MUL)
-DO_EVEN(vmulwev_w_hu, 32, UW, UH, DO_MUL)
-DO_EVEN(vmulwev_d_wu, 64, UD, UW, DO_MUL)
-
-DO_ODD(vmulwod_h_bu, 16, UH, UB, DO_MUL)
-DO_ODD(vmulwod_w_hu, 32, UW, UH, DO_MUL)
-DO_ODD(vmulwod_d_wu, 64, UD, UW, DO_MUL)
-
-DO_EVEN_U_S(vmulwev_h_bu_b, 16, H, UH, B, UB, DO_MUL)
-DO_EVEN_U_S(vmulwev_w_hu_h, 32, W, UW, H, UH, DO_MUL)
-DO_EVEN_U_S(vmulwev_d_wu_w, 64, D, UD, W, UW, DO_MUL)
-
-DO_ODD_U_S(vmulwod_h_bu_b, 16, H, UH, B, UB, DO_MUL)
-DO_ODD_U_S(vmulwod_w_hu_h, 32, W, UW, H, UH, DO_MUL)
-DO_ODD_U_S(vmulwod_d_wu_w, 64, D, UD, W, UW, DO_MUL)
-
-#define VMADDSUB(NAME, BIT, E, DO_OP) \
-void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t desc) \
-{ \
- int i; \
- VReg *Vd = (VReg *)vd; \
- VReg *Vj = (VReg *)vj; \
- VReg *Vk = (VReg *)vk; \
- int oprsz = simd_oprsz(desc); \
- \
- for (i = 0; i < oprsz / (BIT / 8); i++) { \
- Vd->E(i) = DO_OP(Vd->E(i), Vj->E(i) ,Vk->E(i)); \
- } \
-}
-
-VMADDSUB(vmadd_b, 8, B, DO_MADD)
-VMADDSUB(vmadd_h, 16, H, DO_MADD)
-VMADDSUB(vmadd_w, 32, W, DO_MADD)
-VMADDSUB(vmadd_d, 64, D, DO_MADD)
-VMADDSUB(vmsub_b, 8, B, DO_MSUB)
-VMADDSUB(vmsub_h, 16, H, DO_MSUB)
-VMADDSUB(vmsub_w, 32, W, DO_MSUB)
-VMADDSUB(vmsub_d, 64, D, DO_MSUB)
-
-#define VMADDWEV(NAME, BIT, E1, E2, DO_OP) \
-void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t desc) \
-{ \
- int i; \
- VReg *Vd = (VReg *)vd; \
- VReg *Vj = (VReg *)vj; \
- VReg *Vk = (VReg *)vk; \
- typedef __typeof(Vd->E1(0)) TD; \
- int oprsz = simd_oprsz(desc); \
- \
- for (i = 0; i < oprsz / (BIT / 8); i++) { \
- Vd->E1(i) += DO_OP((TD)Vj->E2(2 * i), (TD)Vk->E2(2 * i)); \
- } \
-}
-
-VMADDWEV(vmaddwev_h_b, 16, H, B, DO_MUL)
-VMADDWEV(vmaddwev_w_h, 32, W, H, DO_MUL)
-VMADDWEV(vmaddwev_d_w, 64, D, W, DO_MUL)
-VMADDWEV(vmaddwev_h_bu, 16, UH, UB, DO_MUL)
-VMADDWEV(vmaddwev_w_hu, 32, UW, UH, DO_MUL)
-VMADDWEV(vmaddwev_d_wu, 64, UD, UW, DO_MUL)
-
-#define VMADDWOD(NAME, BIT, E1, E2, DO_OP) \
-void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t desc) \
-{ \
- int i; \
- VReg *Vd = (VReg *)vd; \
- VReg *Vj = (VReg *)vj; \
- VReg *Vk = (VReg *)vk; \
- typedef __typeof(Vd->E1(0)) TD; \
- int oprsz = simd_oprsz(desc); \
- \
- for (i = 0; i < oprsz / (BIT / 8); i++) { \
- Vd->E1(i) += DO_OP((TD)Vj->E2(2 * i + 1), \
- (TD)Vk->E2(2 * i + 1)); \
- } \
-}
-
-VMADDWOD(vmaddwod_h_b, 16, H, B, DO_MUL)
-VMADDWOD(vmaddwod_w_h, 32, W, H, DO_MUL)
-VMADDWOD(vmaddwod_d_w, 64, D, W, DO_MUL)
-VMADDWOD(vmaddwod_h_bu, 16, UH, UB, DO_MUL)
-VMADDWOD(vmaddwod_w_hu, 32, UW, UH, DO_MUL)
-VMADDWOD(vmaddwod_d_wu, 64, UD, UW, DO_MUL)
-
-#define VMADDWEV_U_S(NAME, BIT, ES1, EU1, ES2, EU2, DO_OP) \
-void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t desc) \
-{ \
- int i; \
- VReg *Vd = (VReg *)vd; \
- VReg *Vj = (VReg *)vj; \
- VReg *Vk = (VReg *)vk; \
- typedef __typeof(Vd->ES1(0)) TS1; \
- typedef __typeof(Vd->EU1(0)) TU1; \
- int oprsz = simd_oprsz(desc); \
- \
- for (i = 0; i < oprsz / (BIT / 8); i++) { \
- Vd->ES1(i) += DO_OP((TU1)Vj->EU2(2 * i), \
- (TS1)Vk->ES2(2 * i)); \
- } \
-}
-
-VMADDWEV_U_S(vmaddwev_h_bu_b, 16, H, UH, B, UB, DO_MUL)
-VMADDWEV_U_S(vmaddwev_w_hu_h, 32, W, UW, H, UH, DO_MUL)
-VMADDWEV_U_S(vmaddwev_d_wu_w, 64, D, UD, W, UW, DO_MUL)
-
-#define VMADDWOD_U_S(NAME, BIT, ES1, EU1, ES2, EU2, DO_OP) \
-void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t desc) \
-{ \
- int i; \
- VReg *Vd = (VReg *)vd; \
- VReg *Vj = (VReg *)vj; \
- VReg *Vk = (VReg *)vk; \
- typedef __typeof(Vd->ES1(0)) TS1; \
- typedef __typeof(Vd->EU1(0)) TU1; \
- int oprsz = simd_oprsz(desc); \
- \
- for (i = 0; i < oprsz / (BIT / 8); i++) { \
- Vd->ES1(i) += DO_OP((TU1)Vj->EU2(2 * i + 1), \
- (TS1)Vk->ES2(2 * i + 1)); \
- } \
-}
-
-VMADDWOD_U_S(vmaddwod_h_bu_b, 16, H, UH, B, UB, DO_MUL)
-VMADDWOD_U_S(vmaddwod_w_hu_h, 32, W, UW, H, UH, DO_MUL)
-VMADDWOD_U_S(vmaddwod_d_wu_w, 64, D, UD, W, UW, DO_MUL)
-
-#define VDIV(NAME, BIT, E, DO_OP) \
-void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t desc) \
-{ \
- int i; \
- VReg *Vd = (VReg *)vd; \
- VReg *Vj = (VReg *)vj; \
- VReg *Vk = (VReg *)vk; \
- int oprsz = simd_oprsz(desc); \
- \
- for (i = 0; i < oprsz / (BIT / 8); i++) { \
- Vd->E(i) = DO_OP(Vj->E(i), Vk->E(i)); \
- } \
-}
-
-VDIV(vdiv_b, 8, B, DO_DIV)
-VDIV(vdiv_h, 16, H, DO_DIV)
-VDIV(vdiv_w, 32, W, DO_DIV)
-VDIV(vdiv_d, 64, D, DO_DIV)
-VDIV(vdiv_bu, 8, UB, DO_DIVU)
-VDIV(vdiv_hu, 16, UH, DO_DIVU)
-VDIV(vdiv_wu, 32, UW, DO_DIVU)
-VDIV(vdiv_du, 64, UD, DO_DIVU)
-VDIV(vmod_b, 8, B, DO_REM)
-VDIV(vmod_h, 16, H, DO_REM)
-VDIV(vmod_w, 32, W, DO_REM)
-VDIV(vmod_d, 64, D, DO_REM)
-VDIV(vmod_bu, 8, UB, DO_REMU)
-VDIV(vmod_hu, 16, UH, DO_REMU)
-VDIV(vmod_wu, 32, UW, DO_REMU)
-VDIV(vmod_du, 64, UD, DO_REMU)
-
-#define VSAT_S(NAME, BIT, E) \
-void HELPER(NAME)(void *vd, void *vj, uint64_t max, uint32_t desc) \
-{ \
- int i; \
- VReg *Vd = (VReg *)vd; \
- VReg *Vj = (VReg *)vj; \
- typedef __typeof(Vd->E(0)) TD; \
- int oprsz = simd_oprsz(desc); \
- \
- for (i = 0; i < oprsz / (BIT / 8); i++) { \
- Vd->E(i) = Vj->E(i) > (TD)max ? (TD)max : \
- Vj->E(i) < (TD)~max ? (TD)~max: Vj->E(i); \
- } \
-}
-
-VSAT_S(vsat_b, 8, B)
-VSAT_S(vsat_h, 16, H)
-VSAT_S(vsat_w, 32, W)
-VSAT_S(vsat_d, 64, D)
-
-#define VSAT_U(NAME, BIT, E) \
-void HELPER(NAME)(void *vd, void *vj, uint64_t max, uint32_t desc) \
-{ \
- int i; \
- VReg *Vd = (VReg *)vd; \
- VReg *Vj = (VReg *)vj; \
- typedef __typeof(Vd->E(0)) TD; \
- int oprsz = simd_oprsz(desc); \
- \
- for (i = 0; i < oprsz / (BIT / 8); i++) { \
- Vd->E(i) = Vj->E(i) > (TD)max ? (TD)max : Vj->E(i); \
- } \
-}
-
-VSAT_U(vsat_bu, 8, UB)
-VSAT_U(vsat_hu, 16, UH)
-VSAT_U(vsat_wu, 32, UW)
-VSAT_U(vsat_du, 64, UD)
-
-#define VEXTH(NAME, BIT, E1, E2) \
-void HELPER(NAME)(void *vd, void *vj, uint32_t desc) \
-{ \
- int i, j, ofs; \
- VReg *Vd = (VReg *)vd; \
- VReg *Vj = (VReg *)vj; \
- int oprsz = simd_oprsz(desc); \
- \
- ofs = LSX_LEN / BIT; \
- for (i = 0; i < oprsz / 16; i++) { \
- for (j = 0; j < ofs; j++) { \
- Vd->E1(j + i * ofs) = Vj->E2(j + ofs + ofs * 2 * i); \
- } \
- } \
-}
-
-void HELPER(vexth_q_d)(void *vd, void *vj, uint32_t desc)
-{
- int i;
- VReg *Vd = (VReg *)vd;
- VReg *Vj = (VReg *)vj;
- int oprsz = simd_oprsz(desc);
-
- for (i = 0; i < oprsz / 16; i++) {
- Vd->Q(i) = int128_makes64(Vj->D(2 * i + 1));
- }
-}
-
-void HELPER(vexth_qu_du)(void *vd, void *vj, uint32_t desc)
-{
- int i;
- VReg *Vd = (VReg *)vd;
- VReg *Vj = (VReg *)vj;
- int oprsz = simd_oprsz(desc);
-
- for (i = 0; i < oprsz / 16; i++) {
- Vd->Q(i) = int128_make64(Vj->UD(2 * i + 1));
- }
-}
-
-VEXTH(vexth_h_b, 16, H, B)
-VEXTH(vexth_w_h, 32, W, H)
-VEXTH(vexth_d_w, 64, D, W)
-VEXTH(vexth_hu_bu, 16, UH, UB)
-VEXTH(vexth_wu_hu, 32, UW, UH)
-VEXTH(vexth_du_wu, 64, UD, UW)
-
-#define VEXT2XV(NAME, BIT, E1, E2) \
-void HELPER(NAME)(void *vd, void *vj, uint32_t desc) \
-{ \
- int i; \
- VReg temp = {}; \
- VReg *Vd = (VReg *)vd; \
- VReg *Vj = (VReg *)vj; \
- int oprsz = simd_oprsz(desc); \
- \
- for (i = 0; i < oprsz / (BIT / 8); i++) { \
- temp.E1(i) = Vj->E2(i); \
- } \
- *Vd = temp; \
-}
-
-VEXT2XV(vext2xv_h_b, 16, H, B)
-VEXT2XV(vext2xv_w_b, 32, W, B)
-VEXT2XV(vext2xv_d_b, 64, D, B)
-VEXT2XV(vext2xv_w_h, 32, W, H)
-VEXT2XV(vext2xv_d_h, 64, D, H)
-VEXT2XV(vext2xv_d_w, 64, D, W)
-VEXT2XV(vext2xv_hu_bu, 16, UH, UB)
-VEXT2XV(vext2xv_wu_bu, 32, UW, UB)
-VEXT2XV(vext2xv_du_bu, 64, UD, UB)
-VEXT2XV(vext2xv_wu_hu, 32, UW, UH)
-VEXT2XV(vext2xv_du_hu, 64, UD, UH)
-VEXT2XV(vext2xv_du_wu, 64, UD, UW)
-
-DO_3OP(vsigncov_b, 8, B, DO_SIGNCOV)
-DO_3OP(vsigncov_h, 16, H, DO_SIGNCOV)
-DO_3OP(vsigncov_w, 32, W, DO_SIGNCOV)
-DO_3OP(vsigncov_d, 64, D, DO_SIGNCOV)
-
-static uint64_t do_vmskltz_b(int64_t val)
-{
- uint64_t m = 0x8080808080808080ULL;
- uint64_t c = val & m;
- c |= c << 7;
- c |= c << 14;
- c |= c << 28;
- return c >> 56;
-}
-
-void HELPER(vmskltz_b)(void *vd, void *vj, uint32_t desc)
-{
- int i;
- uint16_t temp = 0;
- VReg *Vd = (VReg *)vd;
- VReg *Vj = (VReg *)vj;
- int oprsz = simd_oprsz(desc);
-
- for (i = 0; i < oprsz / 16; i++) {
- temp = 0;
- temp = do_vmskltz_b(Vj->D(2 * i));
- temp |= (do_vmskltz_b(Vj->D(2 * i + 1)) << 8);
- Vd->D(2 * i) = temp;
- Vd->D(2 * i + 1) = 0;
- }
-}
-
-static uint64_t do_vmskltz_h(int64_t val)
-{
- uint64_t m = 0x8000800080008000ULL;
- uint64_t c = val & m;
- c |= c << 15;
- c |= c << 30;
- return c >> 60;
-}
-
-void HELPER(vmskltz_h)(void *vd, void *vj, uint32_t desc)
-{
- int i;
- uint16_t temp = 0;
- VReg *Vd = (VReg *)vd;
- VReg *Vj = (VReg *)vj;
- int oprsz = simd_oprsz(desc);
-
- for (i = 0; i < oprsz / 16; i++) {
- temp = 0;
- temp = do_vmskltz_h(Vj->D(2 * i));
- temp |= (do_vmskltz_h(Vj->D(2 * i + 1)) << 4);
- Vd->D(2 * i) = temp;
- Vd->D(2 * i + 1) = 0;
- }
-}
-
-static uint64_t do_vmskltz_w(int64_t val)
-{
- uint64_t m = 0x8000000080000000ULL;
- uint64_t c = val & m;
- c |= c << 31;
- return c >> 62;
-}
-
-void HELPER(vmskltz_w)(void *vd, void *vj, uint32_t desc)
-{
- int i;
- uint16_t temp = 0;
- VReg *Vd = (VReg *)vd;
- VReg *Vj = (VReg *)vj;
- int oprsz = simd_oprsz(desc);
-
- for (i = 0; i < oprsz / 16; i++) {
- temp = 0;
- temp = do_vmskltz_w(Vj->D(2 * i));
- temp |= (do_vmskltz_w(Vj->D(2 * i + 1)) << 2);
- Vd->D(2 * i) = temp;
- Vd->D(2 * i + 1) = 0;
- }
-}
-
-static uint64_t do_vmskltz_d(int64_t val)
-{
- return (uint64_t)val >> 63;
-}
-void HELPER(vmskltz_d)(void *vd, void *vj, uint32_t desc)
-{
- int i;
- uint16_t temp = 0;
- VReg *Vd = (VReg *)vd;
- VReg *Vj = (VReg *)vj;
- int oprsz = simd_oprsz(desc);
-
- for (i = 0; i < oprsz / 16; i++) {
- temp = 0;
- temp = do_vmskltz_d(Vj->D(2 * i));
- temp |= (do_vmskltz_d(Vj->D(2 * i + 1)) << 1);
- Vd->D(2 * i) = temp;
- Vd->D(2 * i + 1) = 0;
- }
-}
-
-void HELPER(vmskgez_b)(void *vd, void *vj, uint32_t desc)
-{
- int i;
- uint16_t temp = 0;
- VReg *Vd = (VReg *)vd;
- VReg *Vj = (VReg *)vj;
- int oprsz = simd_oprsz(desc);
-
- for (i = 0; i < oprsz / 16; i++) {
- temp = 0;
- temp = do_vmskltz_b(Vj->D(2 * i));
- temp |= (do_vmskltz_b(Vj->D(2 * i + 1)) << 8);
- Vd->D(2 * i) = (uint16_t)(~temp);
- Vd->D(2 * i + 1) = 0;
- }
-}
-
-static uint64_t do_vmskez_b(uint64_t a)
-{
- uint64_t m = 0x7f7f7f7f7f7f7f7fULL;
- uint64_t c = ~(((a & m) + m) | a | m);
- c |= c << 7;
- c |= c << 14;
- c |= c << 28;
- return c >> 56;
-}
-
-void HELPER(vmsknz_b)(void *vd, void *vj, uint32_t desc)
-{
- int i;
- uint16_t temp = 0;
- VReg *Vd = (VReg *)vd;
- VReg *Vj = (VReg *)vj;
- int oprsz = simd_oprsz(desc);
-
- for (i = 0; i < oprsz / 16; i++) {
- temp = 0;
- temp = do_vmskez_b(Vj->D(2 * i));
- temp |= (do_vmskez_b(Vj->D(2 * i + 1)) << 8);
- Vd->D(2 * i) = (uint16_t)(~temp);
- Vd->D(2 * i + 1) = 0;
- }
-}
-
-void HELPER(vnori_b)(void *vd, void *vj, uint64_t imm, uint32_t desc)
-{
- int i;
- VReg *Vd = (VReg *)vd;
- VReg *Vj = (VReg *)vj;
-
- for (i = 0; i < simd_oprsz(desc); i++) {
- Vd->B(i) = ~(Vj->B(i) | (uint8_t)imm);
- }
-}
-
-#define VSLLWIL(NAME, BIT, E1, E2) \
-void HELPER(NAME)(void *vd, void *vj, uint64_t imm, uint32_t desc) \
-{ \
- int i, j, ofs; \
- VReg temp = {}; \
- VReg *Vd = (VReg *)vd; \
- VReg *Vj = (VReg *)vj; \
- int oprsz = simd_oprsz(desc); \
- typedef __typeof(temp.E1(0)) TD; \
- \
- ofs = LSX_LEN / BIT; \
- for (i = 0; i < oprsz / 16; i++) { \
- for (j = 0; j < ofs; j++) { \
- temp.E1(j + ofs * i) = (TD)Vj->E2(j + ofs * 2 * i) << (imm % BIT); \
- } \
- } \
- *Vd = temp; \
-}
-
-
-void HELPER(vextl_q_d)(void *vd, void *vj, uint32_t desc)
-{
- int i;
- VReg *Vd = (VReg *)vd;
- VReg *Vj = (VReg *)vj;
- int oprsz = simd_oprsz(desc);
-
- for (i = 0; i < oprsz / 16; i++) {
- Vd->Q(i) = int128_makes64(Vj->D(2 * i));
- }
-}
-
-void HELPER(vextl_qu_du)(void *vd, void *vj, uint32_t desc)
-{
- int i;
- VReg *Vd = (VReg *)vd;
- VReg *Vj = (VReg *)vj;
- int oprsz = simd_oprsz(desc);
-
- for (i = 0; i < oprsz / 16; i++) {
- Vd->Q(i) = int128_make64(Vj->UD(2 * i));
- }
-}
-
-VSLLWIL(vsllwil_h_b, 16, H, B)
-VSLLWIL(vsllwil_w_h, 32, W, H)
-VSLLWIL(vsllwil_d_w, 64, D, W)
-VSLLWIL(vsllwil_hu_bu, 16, UH, UB)
-VSLLWIL(vsllwil_wu_hu, 32, UW, UH)
-VSLLWIL(vsllwil_du_wu, 64, UD, UW)
-
-#define do_vsrlr(E, T) \
-static T do_vsrlr_ ##E(T s1, int sh) \
-{ \
- if (sh == 0) { \
- return s1; \
- } else { \
- return (s1 >> sh) + ((s1 >> (sh - 1)) & 0x1); \
- } \
-}
-
-do_vsrlr(B, uint8_t)
-do_vsrlr(H, uint16_t)
-do_vsrlr(W, uint32_t)
-do_vsrlr(D, uint64_t)
-
-#define VSRLR(NAME, BIT, T, E) \
-void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t desc) \
-{ \
- int i; \
- VReg *Vd = (VReg *)vd; \
- VReg *Vj = (VReg *)vj; \
- VReg *Vk = (VReg *)vk; \
- int oprsz = simd_oprsz(desc); \
- \
- for (i = 0; i < oprsz / (BIT / 8); i++) { \
- Vd->E(i) = do_vsrlr_ ## E(Vj->E(i), ((T)Vk->E(i))%BIT); \
- } \
-}
-
-VSRLR(vsrlr_b, 8, uint8_t, B)
-VSRLR(vsrlr_h, 16, uint16_t, H)
-VSRLR(vsrlr_w, 32, uint32_t, W)
-VSRLR(vsrlr_d, 64, uint64_t, D)
-
-#define VSRLRI(NAME, BIT, E) \
-void HELPER(NAME)(void *vd, void *vj, uint64_t imm, uint32_t desc) \
-{ \
- int i; \
- VReg *Vd = (VReg *)vd; \
- VReg *Vj = (VReg *)vj; \
- int oprsz = simd_oprsz(desc); \
- \
- for (i = 0; i < oprsz / (BIT / 8); i++) { \
- Vd->E(i) = do_vsrlr_ ## E(Vj->E(i), imm); \
- } \
-}
-
-VSRLRI(vsrlri_b, 8, B)
-VSRLRI(vsrlri_h, 16, H)
-VSRLRI(vsrlri_w, 32, W)
-VSRLRI(vsrlri_d, 64, D)
-
-#define do_vsrar(E, T) \
-static T do_vsrar_ ##E(T s1, int sh) \
-{ \
- if (sh == 0) { \
- return s1; \
- } else { \
- return (s1 >> sh) + ((s1 >> (sh - 1)) & 0x1); \
- } \
-}
-
-do_vsrar(B, int8_t)
-do_vsrar(H, int16_t)
-do_vsrar(W, int32_t)
-do_vsrar(D, int64_t)
-
-#define VSRAR(NAME, BIT, T, E) \
-void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t desc) \
-{ \
- int i; \
- VReg *Vd = (VReg *)vd; \
- VReg *Vj = (VReg *)vj; \
- VReg *Vk = (VReg *)vk; \
- int oprsz = simd_oprsz(desc); \
- \
- for (i = 0; i < oprsz / (BIT / 8); i++) { \
- Vd->E(i) = do_vsrar_ ## E(Vj->E(i), ((T)Vk->E(i))%BIT); \
- } \
-}
-
-VSRAR(vsrar_b, 8, uint8_t, B)
-VSRAR(vsrar_h, 16, uint16_t, H)
-VSRAR(vsrar_w, 32, uint32_t, W)
-VSRAR(vsrar_d, 64, uint64_t, D)
-
-#define VSRARI(NAME, BIT, E) \
-void HELPER(NAME)(void *vd, void *vj, uint64_t imm, uint32_t desc) \
-{ \
- int i; \
- VReg *Vd = (VReg *)vd; \
- VReg *Vj = (VReg *)vj; \
- int oprsz = simd_oprsz(desc); \
- \
- for (i = 0; i < oprsz / (BIT / 8); i++) { \
- Vd->E(i) = do_vsrar_ ## E(Vj->E(i), imm); \
- } \
-}
-
-VSRARI(vsrari_b, 8, B)
-VSRARI(vsrari_h, 16, H)
-VSRARI(vsrari_w, 32, W)
-VSRARI(vsrari_d, 64, D)
-
-#define VSRLN(NAME, BIT, E1, E2) \
-void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t desc) \
-{ \
- int i, j, ofs; \
- VReg *Vd = (VReg *)vd; \
- VReg *Vj = (VReg *)vj; \
- VReg *Vk = (VReg *)vk; \
- int oprsz = simd_oprsz(desc); \
- \
- ofs = LSX_LEN / BIT; \
- for (i = 0; i < oprsz / 16; i++) { \
- for (j = 0; j < ofs; j++) { \
- Vd->E1(j + ofs * 2 * i) = R_SHIFT(Vj->E2(j + ofs * i), \
- Vk->E2(j + ofs * i) % BIT); \
- } \
- Vd->D(2 * i + 1) = 0; \
- } \
-}
-
-VSRLN(vsrln_b_h, 16, B, UH)
-VSRLN(vsrln_h_w, 32, H, UW)
-VSRLN(vsrln_w_d, 64, W, UD)
-
-#define VSRAN(NAME, BIT, E1, E2, E3) \
-void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t desc) \
-{ \
- int i, j, ofs; \
- VReg *Vd = (VReg *)vd; \
- VReg *Vj = (VReg *)vj; \
- VReg *Vk = (VReg *)vk; \
- int oprsz = simd_oprsz(desc); \
- \
- ofs = LSX_LEN / BIT; \
- for (i = 0; i < oprsz / 16; i++) { \
- for (j = 0; j < ofs; j++) { \
- Vd->E1(j + ofs * 2 * i) = R_SHIFT(Vj->E2(j + ofs * i), \
- Vk->E3(j + ofs * i) % BIT); \
- } \
- Vd->D(2 * i + 1) = 0; \
- } \
-}
-
-VSRAN(vsran_b_h, 16, B, H, UH)
-VSRAN(vsran_h_w, 32, H, W, UW)
-VSRAN(vsran_w_d, 64, W, D, UD)
-
-#define VSRLNI(NAME, BIT, E1, E2) \
-void HELPER(NAME)(void *vd, void *vj, uint64_t imm, uint32_t desc) \
-{ \
- int i, j, ofs; \
- VReg temp = {}; \
- VReg *Vd = (VReg *)vd; \
- VReg *Vj = (VReg *)vj; \
- int oprsz = simd_oprsz(desc); \
- \
- ofs = LSX_LEN / BIT; \
- for (i = 0; i < oprsz / 16; i++) { \
- for (j = 0; j < ofs; j++) { \
- temp.E1(j + ofs * 2 * i) = R_SHIFT(Vj->E2(j + ofs * i), imm); \
- temp.E1(j + ofs * (2 * i + 1)) = R_SHIFT(Vd->E2(j + ofs * i), \
- imm); \
- } \
- } \
- *Vd = temp; \
-}
-
-void HELPER(vsrlni_d_q)(void *vd, void *vj, uint64_t imm, uint32_t desc)
-{
- int i;
- VReg temp = {};
- VReg *Vd = (VReg *)vd;
- VReg *Vj = (VReg *)vj;
-
- for (i = 0; i < 2; i++) {
- temp.D(2 * i) = int128_getlo(int128_urshift(Vj->Q(i), imm % 128));
- temp.D(2 * i +1) = int128_getlo(int128_urshift(Vd->Q(i), imm % 128));
- }
- *Vd = temp;
-}
-
-VSRLNI(vsrlni_b_h, 16, B, UH)
-VSRLNI(vsrlni_h_w, 32, H, UW)
-VSRLNI(vsrlni_w_d, 64, W, UD)
-
-#define VSRANI(NAME, BIT, E1, E2) \
-void HELPER(NAME)(void *vd, void *vj, uint64_t imm, uint32_t desc) \
-{ \
- int i, j, ofs; \
- VReg temp = {}; \
- VReg *Vd = (VReg *)vd; \
- VReg *Vj = (VReg *)vj; \
- int oprsz = simd_oprsz(desc); \
- \
- ofs = LSX_LEN / BIT; \
- for (i = 0; i < oprsz / 16; i++) { \
- for (j = 0; j < ofs; j++) { \
- temp.E1(j + ofs * 2 * i) = R_SHIFT(Vj->E2(j + ofs * i), imm); \
- temp.E1(j + ofs * (2 * i + 1)) = R_SHIFT(Vd->E2(j + ofs * i), \
- imm); \
- } \
- } \
- *Vd = temp; \
-}
-
-void HELPER(vsrani_d_q)(void *vd, void *vj, uint64_t imm, uint32_t desc)
-{
- int i;
- VReg temp = {};
- VReg *Vd = (VReg *)vd;
- VReg *Vj = (VReg *)vj;
-
- for (i = 0; i < 2; i++) {
- temp.D(2 * i) = int128_getlo(int128_rshift(Vj->Q(i), imm % 128));
- temp.D(2 * i + 1) = int128_getlo(int128_rshift(Vd->Q(i), imm % 128));
- }
- *Vd = temp;
-}
-
-VSRANI(vsrani_b_h, 16, B, H)
-VSRANI(vsrani_h_w, 32, H, W)
-VSRANI(vsrani_w_d, 64, W, D)
-
-#define VSRLRN(NAME, BIT, E1, E2, E3) \
-void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t desc) \
-{ \
- int i, j, ofs; \
- VReg *Vd = (VReg *)vd; \
- VReg *Vj = (VReg *)vj; \
- VReg *Vk = (VReg *)vk; \
- int oprsz = simd_oprsz(desc); \
- \
- ofs = LSX_LEN / BIT; \
- for (i = 0; i < oprsz / 16; i++) { \
- for (j = 0; j < ofs; j++) { \
- Vd->E1(j + ofs * 2 * i) = do_vsrlr_ ##E2(Vj->E2(j + ofs * i), \
- Vk->E3(j + ofs * i) % BIT); \
- } \
- Vd->D(2 * i + 1) = 0; \
- } \
-}
-
-VSRLRN(vsrlrn_b_h, 16, B, H, UH)
-VSRLRN(vsrlrn_h_w, 32, H, W, UW)
-VSRLRN(vsrlrn_w_d, 64, W, D, UD)
-
-#define VSRARN(NAME, BIT, E1, E2, E3) \
-void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t desc) \
-{ \
- int i, j, ofs; \
- VReg *Vd = (VReg *)vd; \
- VReg *Vj = (VReg *)vj; \
- VReg *Vk = (VReg *)vk; \
- int oprsz = simd_oprsz(desc); \
- \
- ofs = LSX_LEN / BIT; \
- for (i = 0; i < oprsz / 16; i++) { \
- for (j = 0; j < ofs; j++) { \
- Vd->E1(j + ofs * 2 * i) = do_vsrar_ ## E2(Vj->E2(j + ofs * i), \
- Vk->E3(j + ofs * i) % BIT); \
- } \
- Vd->D(2 * i + 1) = 0; \
- } \
-}
-
-VSRARN(vsrarn_b_h, 16, B, H, UH)
-VSRARN(vsrarn_h_w, 32, H, W, UW)
-VSRARN(vsrarn_w_d, 64, W, D, UD)
-
-#define VSRLRNI(NAME, BIT, E1, E2) \
-void HELPER(NAME)(void *vd, void *vj, uint64_t imm, uint32_t desc) \
-{ \
- int i, j, ofs; \
- VReg temp = {}; \
- VReg *Vd = (VReg *)vd; \
- VReg *Vj = (VReg *)vj; \
- int oprsz = simd_oprsz(desc); \
- \
- ofs = LSX_LEN / BIT; \
- for (i = 0; i < oprsz / 16; i++) { \
- for (j = 0; j < ofs; j++) { \
- temp.E1(j + ofs * 2 * i) = do_vsrlr_ ## E2(Vj->E2(j + ofs * i), imm); \
- temp.E1(j + ofs * (2 * i + 1)) = do_vsrlr_ ## E2(Vd->E2(j + ofs * i), \
- imm); \
- } \
- } \
- *Vd = temp; \
-}
-
-void HELPER(vsrlrni_d_q)(void *vd, void *vj, uint64_t imm, uint32_t desc)
-{
- int i;
- VReg temp = {};
- VReg *Vd = (VReg *)vd;
- VReg *Vj = (VReg *)vj;
- Int128 r[4];
- int oprsz = simd_oprsz(desc);
-
- for (i = 0; i < oprsz / 16; i++) {
- if (imm == 0) {
- temp.D(2 * i) = int128_getlo(Vj->Q(i));
- temp.D(2 * i + 1) = int128_getlo(Vd->Q(i));
- } else {
- r[2 * i] = int128_and(int128_urshift(Vj->Q(i), (imm - 1)),
- int128_one());
- r[2 * i + 1] = int128_and(int128_urshift(Vd->Q(i), (imm - 1)),
- int128_one());
- temp.D(2 * i) = int128_getlo(int128_add(int128_urshift(Vj->Q(i),
- imm), r[2 * i]));
- temp.D(2 * i + 1) = int128_getlo(int128_add(int128_urshift(Vd->Q(i),
- imm), r[ 2 * i + 1]));
- }
- }
- *Vd = temp;
-}
-
-VSRLRNI(vsrlrni_b_h, 16, B, H)
-VSRLRNI(vsrlrni_h_w, 32, H, W)
-VSRLRNI(vsrlrni_w_d, 64, W, D)
-
-#define VSRARNI(NAME, BIT, E1, E2) \
-void HELPER(NAME)(void *vd, void *vj, uint64_t imm, uint32_t desc) \
-{ \
- int i, j, ofs; \
- VReg temp = {}; \
- VReg *Vd = (VReg *)vd; \
- VReg *Vj = (VReg *)vj; \
- int oprsz = simd_oprsz(desc); \
- \
- ofs = LSX_LEN / BIT; \
- for (i = 0; i < oprsz / 16; i++) { \
- for (j = 0; j < ofs; j++) { \
- temp.E1(j + ofs * 2 * i) = do_vsrar_ ## E2(Vj->E2(j + ofs * i), imm); \
- temp.E1(j + ofs * (2 * i + 1)) = do_vsrar_ ## E2(Vd->E2(j + ofs * i), \
- imm); \
- } \
- } \
- *Vd = temp; \
-}
-
-void HELPER(vsrarni_d_q)(void *vd, void *vj, uint64_t imm, uint32_t desc)
-{
- int i;
- VReg temp = {};
- VReg *Vd = (VReg *)vd;
- VReg *Vj = (VReg *)vj;
- Int128 r[4];
- int oprsz = simd_oprsz(desc);
-
- for (i = 0; i < oprsz / 16; i++) {
- if (imm == 0) {
- temp.D(2 * i) = int128_getlo(Vj->Q(i));
- temp.D(2 * i + 1) = int128_getlo(Vd->Q(i));
- } else {
- r[2 * i] = int128_and(int128_rshift(Vj->Q(i), (imm - 1)),
- int128_one());
- r[2 * i + 1] = int128_and(int128_rshift(Vd->Q(i), (imm - 1)),
- int128_one());
- temp.D(2 * i) = int128_getlo(int128_add(int128_rshift(Vj->Q(i),
- imm), r[2 * i]));
- temp.D(2 * i + 1) = int128_getlo(int128_add(int128_rshift(Vd->Q(i),
- imm), r[2 * i + 1]));
- }
- }
- *Vd = temp;
-}
-
-VSRARNI(vsrarni_b_h, 16, B, H)
-VSRARNI(vsrarni_h_w, 32, H, W)
-VSRARNI(vsrarni_w_d, 64, W, D)
-
-#define SSRLNS(NAME, T1, T2, T3) \
-static T1 do_ssrlns_ ## NAME(T2 e2, int sa, int sh) \
-{ \
- T1 shft_res; \
- if (sa == 0) { \
- shft_res = e2; \
- } else { \
- shft_res = (((T1)e2) >> sa); \
- } \
- T3 mask; \
- mask = (1ull << sh) -1; \
- if (shft_res > mask) { \
- return mask; \
- } else { \
- return shft_res; \
- } \
-}
-
-SSRLNS(B, uint16_t, int16_t, uint8_t)
-SSRLNS(H, uint32_t, int32_t, uint16_t)
-SSRLNS(W, uint64_t, int64_t, uint32_t)
-
-#define VSSRLN(NAME, BIT, E1, E2, E3) \
-void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t desc) \
-{ \
- int i, j, ofs; \
- VReg *Vd = (VReg *)vd; \
- VReg *Vj = (VReg *)vj; \
- VReg *Vk = (VReg *)vk; \
- int oprsz = simd_oprsz(desc); \
- \
- ofs = LSX_LEN / BIT; \
- for (i = 0; i < oprsz / 16; i++) { \
- for (j = 0; j < ofs; j++) { \
- Vd->E1(j + ofs * 2 * i) = do_ssrlns_ ## E1(Vj->E2(j + ofs * i), \
- Vk->E3(j + ofs * i) % BIT, \
- BIT / 2 - 1); \
- } \
- Vd->D(2 * i + 1) = 0; \
- } \
-}
-
-VSSRLN(vssrln_b_h, 16, B, H, UH)
-VSSRLN(vssrln_h_w, 32, H, W, UW)
-VSSRLN(vssrln_w_d, 64, W, D, UD)
-
-#define SSRANS(E, T1, T2) \
-static T1 do_ssrans_ ## E(T1 e2, int sa, int sh) \
-{ \
- T1 shft_res; \
- if (sa == 0) { \
- shft_res = e2; \
- } else { \
- shft_res = e2 >> sa; \
- } \
- T2 mask; \
- mask = (1ll << sh) - 1; \
- if (shft_res > mask) { \
- return mask; \
- } else if (shft_res < -(mask + 1)) { \
- return ~mask; \
- } else { \
- return shft_res; \
- } \
-}
-
-SSRANS(B, int16_t, int8_t)
-SSRANS(H, int32_t, int16_t)
-SSRANS(W, int64_t, int32_t)
-
-#define VSSRAN(NAME, BIT, E1, E2, E3) \
-void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t desc) \
-{ \
- int i, j, ofs; \
- VReg *Vd = (VReg *)vd; \
- VReg *Vj = (VReg *)vj; \
- VReg *Vk = (VReg *)vk; \
- int oprsz = simd_oprsz(desc); \
- \
- ofs = LSX_LEN / BIT; \
- for (i = 0; i < oprsz / 16; i++) { \
- for (j = 0; j < ofs; j++) { \
- Vd->E1(j + ofs * 2 * i) = do_ssrans_ ## E1(Vj->E2(j + ofs * i), \
- Vk->E3(j + ofs * i) % BIT, \
- BIT / 2 - 1); \
- } \
- Vd->D(2 * i + 1) = 0; \
- } \
-}
-
-VSSRAN(vssran_b_h, 16, B, H, UH)
-VSSRAN(vssran_h_w, 32, H, W, UW)
-VSSRAN(vssran_w_d, 64, W, D, UD)
-
-#define SSRLNU(E, T1, T2, T3) \
-static T1 do_ssrlnu_ ## E(T3 e2, int sa, int sh) \
-{ \
- T1 shft_res; \
- if (sa == 0) { \
- shft_res = e2; \
- } else { \
- shft_res = (((T1)e2) >> sa); \
- } \
- T2 mask; \
- mask = (1ull << sh) - 1; \
- if (shft_res > mask) { \
- return mask; \
- } else { \
- return shft_res; \
- } \
-}
-
-SSRLNU(B, uint16_t, uint8_t, int16_t)
-SSRLNU(H, uint32_t, uint16_t, int32_t)
-SSRLNU(W, uint64_t, uint32_t, int64_t)
-
-#define VSSRLNU(NAME, BIT, E1, E2, E3) \
-void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t desc) \
-{ \
- int i, j, ofs; \
- VReg *Vd = (VReg *)vd; \
- VReg *Vj = (VReg *)vj; \
- VReg *Vk = (VReg *)vk; \
- int oprsz = simd_oprsz(desc); \
- \
- ofs = LSX_LEN / BIT; \
- for (i = 0; i < oprsz / 16; i++) { \
- for (j = 0; j < ofs; j++) { \
- Vd->E1(j + ofs * 2 * i) = do_ssrlnu_ ## E1(Vj->E2(j + ofs * i), \
- Vk->E3(j + ofs * i) % BIT, \
- BIT / 2); \
- } \
- Vd->D(2 * i + 1) = 0; \
- } \
-}
-
-VSSRLNU(vssrln_bu_h, 16, B, H, UH)
-VSSRLNU(vssrln_hu_w, 32, H, W, UW)
-VSSRLNU(vssrln_wu_d, 64, W, D, UD)
-
-#define SSRANU(E, T1, T2, T3) \
-static T1 do_ssranu_ ## E(T3 e2, int sa, int sh) \
-{ \
- T1 shft_res; \
- if (sa == 0) { \
- shft_res = e2; \
- } else { \
- shft_res = e2 >> sa; \
- } \
- if (e2 < 0) { \
- shft_res = 0; \
- } \
- T2 mask; \
- mask = (1ull << sh) - 1; \
- if (shft_res > mask) { \
- return mask; \
- } else { \
- return shft_res; \
- } \
-}
-
-SSRANU(B, uint16_t, uint8_t, int16_t)
-SSRANU(H, uint32_t, uint16_t, int32_t)
-SSRANU(W, uint64_t, uint32_t, int64_t)
-
-#define VSSRANU(NAME, BIT, E1, E2, E3) \
-void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t desc) \
-{ \
- int i, j, ofs; \
- VReg *Vd = (VReg *)vd; \
- VReg *Vj = (VReg *)vj; \
- VReg *Vk = (VReg *)vk; \
- int oprsz = simd_oprsz(desc); \
- \
- ofs = LSX_LEN / BIT; \
- for (i = 0; i < oprsz / 16; i++) { \
- for (j = 0; j < ofs; j++) { \
- Vd->E1(j + ofs * 2 * i) = do_ssranu_ ## E1(Vj->E2(j + ofs * i), \
- Vk->E3(j + ofs * i) % BIT, \
- BIT / 2); \
- } \
- Vd->D(2 * i + 1) = 0; \
- } \
-}
-
-VSSRANU(vssran_bu_h, 16, B, H, UH)
-VSSRANU(vssran_hu_w, 32, H, W, UW)
-VSSRANU(vssran_wu_d, 64, W, D, UD)
-
-#define VSSRLNI(NAME, BIT, E1, E2) \
-void HELPER(NAME)(void *vd, void *vj, uint64_t imm, uint32_t desc) \
-{ \
- int i, j, ofs; \
- VReg temp = {}; \
- VReg *Vd = (VReg *)vd; \
- VReg *Vj = (VReg *)vj; \
- int oprsz = simd_oprsz(desc); \
- \
- ofs = LSX_LEN / BIT; \
- for (i = 0; i < oprsz / 16; i++) { \
- for (j = 0; j < ofs; j++) { \
- temp.E1(j + ofs * 2 * i) = do_ssrlns_ ## E1(Vj->E2(j + ofs * i), \
- imm, BIT / 2 - 1); \
- temp.E1(j + ofs * (2 * i + 1)) = do_ssrlns_ ## E1(Vd->E2(j + ofs * i), \
- imm, BIT / 2 - 1); \
- } \
- } \
- *Vd = temp; \
-}
-
-static void do_vssrlni_q(VReg *Vd, VReg *Vj,
- uint64_t imm, int idx, Int128 mask)
-{
- Int128 shft_res1, shft_res2;
-
- if (imm == 0) {
- shft_res1 = Vj->Q(idx);
- shft_res2 = Vd->Q(idx);
- } else {
- shft_res1 = int128_urshift(Vj->Q(idx), imm);
- shft_res2 = int128_urshift(Vd->Q(idx), imm);
- }
-
- if (int128_ult(mask, shft_res1)) {
- Vd->D(idx * 2) = int128_getlo(mask);
- }else {
- Vd->D(idx * 2) = int128_getlo(shft_res1);
- }
-
- if (int128_ult(mask, shft_res2)) {
- Vd->D(idx * 2 + 1) = int128_getlo(mask);
- }else {
- Vd->D(idx * 2 + 1) = int128_getlo(shft_res2);
- }
-}
-
-void HELPER(vssrlni_d_q)(void *vd, void *vj, uint64_t imm, uint32_t desc)
-{
- int i;
- Int128 mask;
- VReg *Vd = (VReg *)vd;
- VReg *Vj = (VReg *)vj;
- int oprsz = simd_oprsz(desc);
-
- mask = int128_sub(int128_lshift(int128_one(), 63), int128_one());
-
- for (i = 0; i < oprsz / 16; i++) {
- do_vssrlni_q(Vd, Vj, imm, i, mask);
- }
-}
-
-VSSRLNI(vssrlni_b_h, 16, B, H)
-VSSRLNI(vssrlni_h_w, 32, H, W)
-VSSRLNI(vssrlni_w_d, 64, W, D)
-
-#define VSSRANI(NAME, BIT, E1, E2) \
-void HELPER(NAME)(void *vd, void *vj, uint64_t imm, uint32_t desc) \
-{ \
- int i, j, ofs; \
- VReg temp = {}; \
- VReg *Vd = (VReg *)vd; \
- VReg *Vj = (VReg *)vj; \
- int oprsz = simd_oprsz(desc); \
- \
- ofs = LSX_LEN / BIT; \
- for (i = 0; i < oprsz / 16; i++) { \
- for (j = 0; j < ofs; j++) { \
- temp.E1(j + ofs * 2 * i) = do_ssrans_ ## E1(Vj->E2(j + ofs * i), \
- imm, BIT / 2 - 1); \
- temp.E1(j + ofs * (2 * i + 1)) = do_ssrans_ ## E1(Vd->E2(j + ofs * i), \
- imm, BIT / 2 - 1); \
- } \
- } \
- *Vd = temp; \
-}
-
-static void do_vssrani_d_q(VReg *Vd, VReg *Vj,
- uint64_t imm, int idx, Int128 mask, Int128 min)
-{
- Int128 shft_res1, shft_res2;
-
- if (imm == 0) {
- shft_res1 = Vj->Q(idx);
- shft_res2 = Vd->Q(idx);
- } else {
- shft_res1 = int128_rshift(Vj->Q(idx), imm);
- shft_res2 = int128_rshift(Vd->Q(idx), imm);
- }
-
- if (int128_gt(shft_res1, mask)) {
- Vd->D(idx * 2) = int128_getlo(mask);
- } else if (int128_lt(shft_res1, int128_neg(min))) {
- Vd->D(idx * 2) = int128_getlo(min);
- } else {
- Vd->D(idx * 2) = int128_getlo(shft_res1);
- }
-
- if (int128_gt(shft_res2, mask)) {
- Vd->D(idx * 2 + 1) = int128_getlo(mask);
- } else if (int128_lt(shft_res2, int128_neg(min))) {
- Vd->D(idx * 2 + 1) = int128_getlo(min);
- } else {
- Vd->D(idx * 2 + 1) = int128_getlo(shft_res2);
- }
-}
-
-void HELPER(vssrani_d_q)(void *vd, void *vj, uint64_t imm, uint32_t desc)
-{
- int i;
- Int128 mask, min;
- VReg *Vd = (VReg *)vd;
- VReg *Vj = (VReg *)vj;
- int oprsz = simd_oprsz(desc);
-
- mask = int128_sub(int128_lshift(int128_one(), 63), int128_one());
- min = int128_lshift(int128_one(), 63);
-
- for (i = 0; i < oprsz / 16; i++) {
- do_vssrani_d_q(Vd, Vj, imm, i, mask, min);
- }
-}
-
-
-VSSRANI(vssrani_b_h, 16, B, H)
-VSSRANI(vssrani_h_w, 32, H, W)
-VSSRANI(vssrani_w_d, 64, W, D)
-
-#define VSSRLNUI(NAME, BIT, E1, E2) \
-void HELPER(NAME)(void *vd, void *vj, uint64_t imm, uint32_t desc) \
-{ \
- int i, j, ofs; \
- VReg temp = {}; \
- VReg *Vd = (VReg *)vd; \
- VReg *Vj = (VReg *)vj; \
- int oprsz = simd_oprsz(desc); \
- \
- ofs = LSX_LEN / BIT; \
- for (i = 0; i < oprsz / 16; i++) { \
- for (j = 0; j < ofs; j++) { \
- temp.E1(j + ofs * 2 * i) = do_ssrlnu_ ## E1(Vj->E2(j + ofs * i), \
- imm, BIT / 2); \
- temp.E1(j + ofs * (2 * i + 1)) = do_ssrlnu_ ## E1(Vd->E2(j + ofs * i), \
- imm, BIT / 2); \
- } \
- } \
- *Vd = temp; \
-}
-
-void HELPER(vssrlni_du_q)(void *vd, void *vj, uint64_t imm, uint32_t desc)
-{
- int i;
- Int128 mask;
- VReg *Vd = (VReg *)vd;
- VReg *Vj = (VReg *)vj;
- int oprsz = simd_oprsz(desc);
-
- mask = int128_sub(int128_lshift(int128_one(), 64), int128_one());
-
- for (i = 0; i < oprsz / 16; i++) {
- do_vssrlni_q(Vd, Vj, imm, i, mask);
- }
-}
-
-VSSRLNUI(vssrlni_bu_h, 16, B, H)
-VSSRLNUI(vssrlni_hu_w, 32, H, W)
-VSSRLNUI(vssrlni_wu_d, 64, W, D)
-
-#define VSSRANUI(NAME, BIT, E1, E2) \
-void HELPER(NAME)(void *vd, void *vj, uint64_t imm, uint32_t desc) \
-{ \
- int i, j, ofs; \
- VReg temp = {}; \
- VReg *Vd = (VReg *)vd; \
- VReg *Vj = (VReg *)vj; \
- int oprsz = simd_oprsz(desc); \
- \
- ofs = LSX_LEN / BIT; \
- for (i = 0; i < oprsz / 16; i++) { \
- for (j = 0; j < ofs; j++) { \
- temp.E1(j + ofs * 2 * i) = do_ssranu_ ## E1(Vj->E2(j + ofs * i), \
- imm, BIT / 2); \
- temp.E1(j + ofs * (2 * i + 1)) = do_ssranu_ ## E1(Vd->E2(j + ofs * i), \
- imm, BIT / 2); \
- } \
- } \
- *Vd = temp; \
-}
-
-static void do_vssrani_du_q(VReg *Vd, VReg *Vj,
- uint64_t imm, int idx, Int128 mask)
-{
- Int128 shft_res1, shft_res2;
-
- if (imm == 0) {
- shft_res1 = Vj->Q(idx);
- shft_res2 = Vd->Q(idx);
- } else {
- shft_res1 = int128_rshift(Vj->Q(idx), imm);
- shft_res2 = int128_rshift(Vd->Q(idx), imm);
- }
-
- if (int128_lt(Vj->Q(idx), int128_zero())) {
- shft_res1 = int128_zero();
- }
-
- if (int128_lt(Vd->Q(idx), int128_zero())) {
- shft_res2 = int128_zero();
- }
- if (int128_ult(mask, shft_res1)) {
- Vd->D(idx * 2) = int128_getlo(mask);
- }else {
- Vd->D(idx * 2) = int128_getlo(shft_res1);
- }
-
- if (int128_ult(mask, shft_res2)) {
- Vd->D(idx * 2 + 1) = int128_getlo(mask);
- }else {
- Vd->D(idx * 2 + 1) = int128_getlo(shft_res2);
- }
-
-}
-
-void HELPER(vssrani_du_q)(void *vd, void *vj, uint64_t imm, uint32_t desc)
-{
- int i;
- Int128 mask;
- VReg *Vd = (VReg *)vd;
- VReg *Vj = (VReg *)vj;
- int oprsz = simd_oprsz(desc);
-
- mask = int128_sub(int128_lshift(int128_one(), 64), int128_one());
-
- for (i = 0; i < oprsz / 16; i++) {
- do_vssrani_du_q(Vd, Vj, imm, i, mask);
- }
-}
-
-VSSRANUI(vssrani_bu_h, 16, B, H)
-VSSRANUI(vssrani_hu_w, 32, H, W)
-VSSRANUI(vssrani_wu_d, 64, W, D)
-
-#define SSRLRNS(E1, E2, T1, T2, T3) \
-static T1 do_ssrlrns_ ## E1(T2 e2, int sa, int sh) \
-{ \
- T1 shft_res; \
- \
- shft_res = do_vsrlr_ ## E2(e2, sa); \
- T1 mask; \
- mask = (1ull << sh) - 1; \
- if (shft_res > mask) { \
- return mask; \
- } else { \
- return shft_res; \
- } \
-}
-
-SSRLRNS(B, H, uint16_t, int16_t, uint8_t)
-SSRLRNS(H, W, uint32_t, int32_t, uint16_t)
-SSRLRNS(W, D, uint64_t, int64_t, uint32_t)
-
-#define VSSRLRN(NAME, BIT, E1, E2, E3) \
-void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t desc) \
-{ \
- int i, j, ofs; \
- VReg *Vd = (VReg *)vd; \
- VReg *Vj = (VReg *)vj; \
- VReg *Vk = (VReg *)vk; \
- int oprsz = simd_oprsz(desc); \
- \
- ofs = LSX_LEN / BIT; \
- for (i = 0; i < oprsz / 16; i++) { \
- for (j = 0; j < ofs; j++) { \
- Vd->E1(j + ofs * 2 * i) = do_ssrlrns_ ## E1(Vj->E2(j + ofs * i), \
- Vk->E3(j + ofs * i) % BIT, \
- BIT / 2 - 1); \
- } \
- Vd->D(2 * i + 1) = 0; \
- } \
-}
-
-VSSRLRN(vssrlrn_b_h, 16, B, H, UH)
-VSSRLRN(vssrlrn_h_w, 32, H, W, UW)
-VSSRLRN(vssrlrn_w_d, 64, W, D, UD)
-
-#define SSRARNS(E1, E2, T1, T2) \
-static T1 do_ssrarns_ ## E1(T1 e2, int sa, int sh) \
-{ \
- T1 shft_res; \
- \
- shft_res = do_vsrar_ ## E2(e2, sa); \
- T2 mask; \
- mask = (1ll << sh) - 1; \
- if (shft_res > mask) { \
- return mask; \
- } else if (shft_res < -(mask +1)) { \
- return ~mask; \
- } else { \
- return shft_res; \
- } \
-}
-
-SSRARNS(B, H, int16_t, int8_t)
-SSRARNS(H, W, int32_t, int16_t)
-SSRARNS(W, D, int64_t, int32_t)
-
-#define VSSRARN(NAME, BIT, E1, E2, E3) \
-void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t desc) \
-{ \
- int i, j, ofs; \
- VReg *Vd = (VReg *)vd; \
- VReg *Vj = (VReg *)vj; \
- VReg *Vk = (VReg *)vk; \
- int oprsz = simd_oprsz(desc); \
- \
- ofs = LSX_LEN / BIT; \
- for (i = 0; i < oprsz / 16; i++) { \
- for (j = 0; j < ofs; j++) { \
- Vd->E1(j + ofs * 2 * i) = do_ssrarns_ ## E1(Vj->E2(j + ofs * i), \
- Vk->E3(j + ofs * i) % BIT, \
- BIT/ 2 - 1); \
- } \
- Vd->D(2 * i + 1) = 0; \
- } \
-}
-
-VSSRARN(vssrarn_b_h, 16, B, H, UH)
-VSSRARN(vssrarn_h_w, 32, H, W, UW)
-VSSRARN(vssrarn_w_d, 64, W, D, UD)
-
-#define SSRLRNU(E1, E2, T1, T2, T3) \
-static T1 do_ssrlrnu_ ## E1(T3 e2, int sa, int sh) \
-{ \
- T1 shft_res; \
- \
- shft_res = do_vsrlr_ ## E2(e2, sa); \
- \
- T2 mask; \
- mask = (1ull << sh) - 1; \
- if (shft_res > mask) { \
- return mask; \
- } else { \
- return shft_res; \
- } \
-}
-
-SSRLRNU(B, H, uint16_t, uint8_t, int16_t)
-SSRLRNU(H, W, uint32_t, uint16_t, int32_t)
-SSRLRNU(W, D, uint64_t, uint32_t, int64_t)
-
-#define VSSRLRNU(NAME, BIT, E1, E2, E3) \
-void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t desc) \
-{ \
- int i, j, ofs; \
- VReg *Vd = (VReg *)vd; \
- VReg *Vj = (VReg *)vj; \
- VReg *Vk = (VReg *)vk; \
- int oprsz = simd_oprsz(desc); \
- \
- ofs = LSX_LEN / BIT; \
- for (i = 0; i < oprsz / 16; i++) { \
- for (j = 0; j < ofs; j++) { \
- Vd->E1(j + ofs * 2 * i) = do_ssrlrnu_ ## E1(Vj->E2(j + ofs * i), \
- Vk->E3(j + ofs * i) % BIT, \
- BIT / 2); \
- } \
- Vd->D(2 * i + 1) = 0; \
- } \
-}
-
-VSSRLRNU(vssrlrn_bu_h, 16, B, H, UH)
-VSSRLRNU(vssrlrn_hu_w, 32, H, W, UW)
-VSSRLRNU(vssrlrn_wu_d, 64, W, D, UD)
-
-#define SSRARNU(E1, E2, T1, T2, T3) \
-static T1 do_ssrarnu_ ## E1(T3 e2, int sa, int sh) \
-{ \
- T1 shft_res; \
- \
- if (e2 < 0) { \
- shft_res = 0; \
- } else { \
- shft_res = do_vsrar_ ## E2(e2, sa); \
- } \
- T2 mask; \
- mask = (1ull << sh) - 1; \
- if (shft_res > mask) { \
- return mask; \
- } else { \
- return shft_res; \
- } \
-}
-
-SSRARNU(B, H, uint16_t, uint8_t, int16_t)
-SSRARNU(H, W, uint32_t, uint16_t, int32_t)
-SSRARNU(W, D, uint64_t, uint32_t, int64_t)
-
-#define VSSRARNU(NAME, BIT, E1, E2, E3) \
-void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t desc) \
-{ \
- int i, j, ofs; \
- VReg *Vd = (VReg *)vd; \
- VReg *Vj = (VReg *)vj; \
- VReg *Vk = (VReg *)vk; \
- int oprsz = simd_oprsz(desc); \
- \
- ofs = LSX_LEN / BIT; \
- for (i = 0; i < oprsz / 16; i++) { \
- for (j = 0; j < ofs; j++) { \
- Vd->E1(j + ofs * 2 * i) = do_ssrarnu_ ## E1(Vj->E2(j + ofs * i), \
- Vk->E3(j + ofs * i) % BIT, \
- BIT / 2); \
- } \
- Vd->D(2 * i + 1) = 0; \
- } \
-}
-
-VSSRARNU(vssrarn_bu_h, 16, B, H, UH)
-VSSRARNU(vssrarn_hu_w, 32, H, W, UW)
-VSSRARNU(vssrarn_wu_d, 64, W, D, UD)
-
-#define VSSRLRNI(NAME, BIT, E1, E2) \
-void HELPER(NAME)(void *vd, void *vj, uint64_t imm, uint32_t desc) \
-{ \
- int i, j, ofs; \
- VReg temp = {}; \
- VReg *Vd = (VReg *)vd; \
- VReg *Vj = (VReg *)vj; \
- int oprsz = simd_oprsz(desc); \
- \
- ofs = LSX_LEN / BIT; \
- for (i = 0; i < oprsz / 16; i++) { \
- for (j = 0; j < ofs; j++) { \
- temp.E1(j + ofs * 2 * i) = do_ssrlrns_ ## E1(Vj->E2(j + ofs * i), \
- imm, BIT / 2 - 1); \
- temp.E1(j + ofs * (2 * i + 1)) = do_ssrlrns_ ## E1(Vd->E2(j + ofs * i), \
- imm, BIT / 2 - 1); \
- } \
- } \
- *Vd = temp; \
-}
-
-static void do_vssrlrni_q(VReg *Vd, VReg * Vj,
- uint64_t imm, int idx, Int128 mask)
-{
- Int128 shft_res1, shft_res2, r1, r2;
- if (imm == 0) {
- shft_res1 = Vj->Q(idx);
- shft_res2 = Vd->Q(idx);
- } else {
- r1 = int128_and(int128_urshift(Vj->Q(idx), (imm - 1)), int128_one());
- r2 = int128_and(int128_urshift(Vd->Q(idx), (imm - 1)), int128_one());
- shft_res1 = (int128_add(int128_urshift(Vj->Q(idx), imm), r1));
- shft_res2 = (int128_add(int128_urshift(Vd->Q(idx), imm), r2));
- }
-
- if (int128_ult(mask, shft_res1)) {
- Vd->D(idx * 2) = int128_getlo(mask);
- }else {
- Vd->D(idx * 2) = int128_getlo(shft_res1);
- }
-
- if (int128_ult(mask, shft_res2)) {
- Vd->D(idx * 2 + 1) = int128_getlo(mask);
- }else {
- Vd->D(idx * 2 + 1) = int128_getlo(shft_res2);
- }
-}
-
-void HELPER(vssrlrni_d_q)(void *vd, void *vj, uint64_t imm, uint32_t desc)
-{
- int i;
- Int128 mask;
- VReg *Vd = (VReg *)vd;
- VReg *Vj = (VReg *)vj;
- int oprsz = simd_oprsz(desc);
-
- mask = int128_sub(int128_lshift(int128_one(), 63), int128_one());
-
- for (i = 0; i < oprsz / 16; i++) {
- do_vssrlrni_q(Vd, Vj, imm, i, mask);
- }
-}
-
-VSSRLRNI(vssrlrni_b_h, 16, B, H)
-VSSRLRNI(vssrlrni_h_w, 32, H, W)
-VSSRLRNI(vssrlrni_w_d, 64, W, D)
-
-#define VSSRARNI(NAME, BIT, E1, E2) \
-void HELPER(NAME)(void *vd, void *vj, uint64_t imm, uint32_t desc) \
-{ \
- int i, j, ofs; \
- VReg temp = {}; \
- VReg *Vd = (VReg *)vd; \
- VReg *Vj = (VReg *)vj; \
- int oprsz = simd_oprsz(desc); \
- \
- ofs = LSX_LEN / BIT; \
- for (i = 0; i < oprsz / 16; i++) { \
- for (j = 0; j < ofs; j++) { \
- temp.E1(j + ofs * 2 * i) = do_ssrarns_ ## E1(Vj->E2(j + ofs * i), \
- imm, BIT / 2 - 1); \
- temp.E1(j + ofs * (2 * i + 1)) = do_ssrarns_ ## E1(Vd->E2(j + ofs * i), \
- imm, BIT / 2 - 1); \
- } \
- } \
- *Vd = temp; \
-}
-
-static void do_vssrarni_d_q(VReg *Vd, VReg *Vj,
- uint64_t imm, int idx, Int128 mask1, Int128 mask2)
-{
- Int128 shft_res1, shft_res2, r1, r2;
-
- if (imm == 0) {
- shft_res1 = Vj->Q(idx);
- shft_res2 = Vd->Q(idx);
- } else {
- r1 = int128_and(int128_rshift(Vj->Q(idx), (imm - 1)), int128_one());
- r2 = int128_and(int128_rshift(Vd->Q(idx), (imm - 1)), int128_one());
- shft_res1 = int128_add(int128_rshift(Vj->Q(idx), imm), r1);
- shft_res2 = int128_add(int128_rshift(Vd->Q(idx), imm), r2);
- }
- if (int128_gt(shft_res1, mask1)) {
- Vd->D(idx * 2) = int128_getlo(mask1);
- } else if (int128_lt(shft_res1, int128_neg(mask2))) {
- Vd->D(idx * 2) = int128_getlo(mask2);
- } else {
- Vd->D(idx * 2) = int128_getlo(shft_res1);
- }
-
- if (int128_gt(shft_res2, mask1)) {
- Vd->D(idx * 2 + 1) = int128_getlo(mask1);
- } else if (int128_lt(shft_res2, int128_neg(mask2))) {
- Vd->D(idx * 2 + 1) = int128_getlo(mask2);
- } else {
- Vd->D(idx * 2 + 1) = int128_getlo(shft_res2);
- }
-}
-
-void HELPER(vssrarni_d_q)(void *vd, void *vj, uint64_t imm, uint32_t desc)
-{
- int i;
- Int128 mask1, mask2;
- VReg *Vd = (VReg *)vd;
- VReg *Vj = (VReg *)vj;
- int oprsz = simd_oprsz(desc);
-
- mask1 = int128_sub(int128_lshift(int128_one(), 63), int128_one());
- mask2 = int128_lshift(int128_one(), 63);
-
- for (i = 0; i < oprsz / 16; i++) {
- do_vssrarni_d_q(Vd, Vj, imm, i, mask1, mask2);
- }
-}
-
-VSSRARNI(vssrarni_b_h, 16, B, H)
-VSSRARNI(vssrarni_h_w, 32, H, W)
-VSSRARNI(vssrarni_w_d, 64, W, D)
-
-#define VSSRLRNUI(NAME, BIT, E1, E2) \
-void HELPER(NAME)(void *vd, void *vj, uint64_t imm, uint32_t desc) \
-{ \
- int i, j, ofs; \
- VReg temp = {}; \
- VReg *Vd = (VReg *)vd; \
- VReg *Vj = (VReg *)vj; \
- int oprsz = simd_oprsz(desc); \
- \
- ofs = LSX_LEN / BIT; \
- for (i = 0; i < oprsz / 16; i++) { \
- for (j = 0; j < ofs; j++) { \
- temp.E1(j + ofs * 2 * i) = do_ssrlrnu_ ## E1(Vj->E2(j + ofs * i), \
- imm, BIT / 2); \
- temp.E1(j + ofs * (2 * i + 1)) = do_ssrlrnu_ ## E1(Vd->E2(j + ofs * i), \
- imm, BIT / 2); \
- } \
- } \
- *Vd = temp; \
-}
-
-void HELPER(vssrlrni_du_q)(void *vd, void *vj, uint64_t imm, uint32_t desc)
-{
- int i;
- Int128 mask;
- VReg *Vd = (VReg *)vd;
- VReg *Vj = (VReg *)vj;
- int oprsz = simd_oprsz(desc);
-
- mask = int128_sub(int128_lshift(int128_one(), 64), int128_one());
-
- for (i = 0; i < oprsz / 16; i++) {
- do_vssrlrni_q(Vd, Vj, imm, i, mask);
- }
-}
-
-VSSRLRNUI(vssrlrni_bu_h, 16, B, H)
-VSSRLRNUI(vssrlrni_hu_w, 32, H, W)
-VSSRLRNUI(vssrlrni_wu_d, 64, W, D)
-
-#define VSSRARNUI(NAME, BIT, E1, E2) \
-void HELPER(NAME)(void *vd, void *vj, uint64_t imm, uint32_t desc) \
-{ \
- int i, j, ofs; \
- VReg temp = {}; \
- VReg *Vd = (VReg *)vd; \
- VReg *Vj = (VReg *)vj; \
- int oprsz = simd_oprsz(desc); \
- \
- ofs = LSX_LEN / BIT; \
- for (i = 0; i < oprsz / 16; i++) { \
- for (j = 0; j < ofs; j++) { \
- temp.E1(j + ofs * 2 * i) = do_ssrarnu_ ## E1(Vj->E2(j + ofs * i), \
- imm, BIT / 2); \
- temp.E1(j + ofs * (2 * i + 1)) = do_ssrarnu_ ## E1(Vd->E2(j + ofs * i), \
- imm, BIT / 2); \
- } \
- } \
- *Vd = temp; \
-}
-
-static void do_vssrarni_du_q(VReg *Vd, VReg *Vj,
- uint64_t imm, int idx, Int128 mask1, Int128 mask2)
-{
- Int128 shft_res1, shft_res2, r1, r2;
-
- if (imm == 0) {
- shft_res1 = Vj->Q(idx);
- shft_res2 = Vd->Q(idx);
- } else {
- r1 = int128_and(int128_rshift(Vj->Q(idx), (imm - 1)), int128_one());
- r2 = int128_and(int128_rshift(Vd->Q(idx), (imm - 1)), int128_one());
- shft_res1 = int128_add(int128_rshift(Vj->Q(idx), imm), r1);
- shft_res2 = int128_add(int128_rshift(Vd->Q(idx), imm), r2);
- }
-
- if (int128_lt(Vj->Q(idx), int128_zero())) {
- shft_res1 = int128_zero();
- }
- if (int128_lt(Vd->Q(idx), int128_zero())) {
- shft_res2 = int128_zero();
- }
-
- if (int128_gt(shft_res1, mask1)) {
- Vd->D(idx * 2) = int128_getlo(mask1);
- } else if (int128_lt(shft_res1, int128_neg(mask2))) {
- Vd->D(idx * 2) = int128_getlo(mask2);
- } else {
- Vd->D(idx * 2) = int128_getlo(shft_res1);
- }
-
- if (int128_gt(shft_res2, mask1)) {
- Vd->D(idx * 2 + 1) = int128_getlo(mask1);
- } else if (int128_lt(shft_res2, int128_neg(mask2))) {
- Vd->D(idx * 2 + 1) = int128_getlo(mask2);
- } else {
- Vd->D(idx * 2 + 1) = int128_getlo(shft_res2);
- }
-}
-
-void HELPER(vssrarni_du_q)(void *vd, void *vj, uint64_t imm, uint32_t desc)
-{
- int i;
- Int128 mask1, mask2;
- VReg *Vd = (VReg *)vd;
- VReg *Vj = (VReg *)vj;
- int oprsz = simd_oprsz(desc);
-
- mask1 = int128_sub(int128_lshift(int128_one(), 64), int128_one());
- mask2 = int128_lshift(int128_one(), 64);
-
- for (i = 0; i < oprsz / 16; i++) {
- do_vssrarni_du_q(Vd, Vj, imm, i, mask1, mask2);
- }
-}
-
-VSSRARNUI(vssrarni_bu_h, 16, B, H)
-VSSRARNUI(vssrarni_hu_w, 32, H, W)
-VSSRARNUI(vssrarni_wu_d, 64, W, D)
-
-#define DO_2OP(NAME, BIT, E, DO_OP) \
-void HELPER(NAME)(void *vd, void *vj, uint32_t desc) \
-{ \
- int i; \
- VReg *Vd = (VReg *)vd; \
- VReg *Vj = (VReg *)vj; \
- int oprsz = simd_oprsz(desc); \
- \
- for (i = 0; i < oprsz / (BIT / 8); i++) \
- { \
- Vd->E(i) = DO_OP(Vj->E(i)); \
- } \
-}
-
-DO_2OP(vclo_b, 8, UB, DO_CLO_B)
-DO_2OP(vclo_h, 16, UH, DO_CLO_H)
-DO_2OP(vclo_w, 32, UW, DO_CLO_W)
-DO_2OP(vclo_d, 64, UD, DO_CLO_D)
-DO_2OP(vclz_b, 8, UB, DO_CLZ_B)
-DO_2OP(vclz_h, 16, UH, DO_CLZ_H)
-DO_2OP(vclz_w, 32, UW, DO_CLZ_W)
-DO_2OP(vclz_d, 64, UD, DO_CLZ_D)
-
-#define VPCNT(NAME, BIT, E, FN) \
-void HELPER(NAME)(void *vd, void *vj, uint32_t desc) \
-{ \
- int i; \
- VReg *Vd = (VReg *)vd; \
- VReg *Vj = (VReg *)vj; \
- int oprsz = simd_oprsz(desc); \
- \
- for (i = 0; i < oprsz / (BIT / 8); i++) \
- { \
- Vd->E(i) = FN(Vj->E(i)); \
- } \
-}
-
-VPCNT(vpcnt_b, 8, UB, ctpop8)
-VPCNT(vpcnt_h, 16, UH, ctpop16)
-VPCNT(vpcnt_w, 32, UW, ctpop32)
-VPCNT(vpcnt_d, 64, UD, ctpop64)
-
-#define DO_BIT(NAME, BIT, E, DO_OP) \
-void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t desc) \
-{ \
- int i; \
- VReg *Vd = (VReg *)vd; \
- VReg *Vj = (VReg *)vj; \
- VReg *Vk = (VReg *)vk; \
- int oprsz = simd_oprsz(desc); \
- \
- for (i = 0; i < oprsz / (BIT / 8); i++) { \
- Vd->E(i) = DO_OP(Vj->E(i), Vk->E(i)%BIT); \
- } \
-}
-
-DO_BIT(vbitclr_b, 8, UB, DO_BITCLR)
-DO_BIT(vbitclr_h, 16, UH, DO_BITCLR)
-DO_BIT(vbitclr_w, 32, UW, DO_BITCLR)
-DO_BIT(vbitclr_d, 64, UD, DO_BITCLR)
-DO_BIT(vbitset_b, 8, UB, DO_BITSET)
-DO_BIT(vbitset_h, 16, UH, DO_BITSET)
-DO_BIT(vbitset_w, 32, UW, DO_BITSET)
-DO_BIT(vbitset_d, 64, UD, DO_BITSET)
-DO_BIT(vbitrev_b, 8, UB, DO_BITREV)
-DO_BIT(vbitrev_h, 16, UH, DO_BITREV)
-DO_BIT(vbitrev_w, 32, UW, DO_BITREV)
-DO_BIT(vbitrev_d, 64, UD, DO_BITREV)
-
-#define DO_BITI(NAME, BIT, E, DO_OP) \
-void HELPER(NAME)(void *vd, void *vj, uint64_t imm, uint32_t desc) \
-{ \
- int i; \
- VReg *Vd = (VReg *)vd; \
- VReg *Vj = (VReg *)vj; \
- int oprsz = simd_oprsz(desc); \
- \
- for (i = 0; i < oprsz / (BIT / 8); i++) { \
- Vd->E(i) = DO_OP(Vj->E(i), imm); \
- } \
-}
-
-DO_BITI(vbitclri_b, 8, UB, DO_BITCLR)
-DO_BITI(vbitclri_h, 16, UH, DO_BITCLR)
-DO_BITI(vbitclri_w, 32, UW, DO_BITCLR)
-DO_BITI(vbitclri_d, 64, UD, DO_BITCLR)
-DO_BITI(vbitseti_b, 8, UB, DO_BITSET)
-DO_BITI(vbitseti_h, 16, UH, DO_BITSET)
-DO_BITI(vbitseti_w, 32, UW, DO_BITSET)
-DO_BITI(vbitseti_d, 64, UD, DO_BITSET)
-DO_BITI(vbitrevi_b, 8, UB, DO_BITREV)
-DO_BITI(vbitrevi_h, 16, UH, DO_BITREV)
-DO_BITI(vbitrevi_w, 32, UW, DO_BITREV)
-DO_BITI(vbitrevi_d, 64, UD, DO_BITREV)
-
-#define VFRSTP(NAME, BIT, MASK, E) \
-void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t desc) \
-{ \
- int i, j, m, ofs; \
- VReg *Vd = (VReg *)vd; \
- VReg *Vj = (VReg *)vj; \
- VReg *Vk = (VReg *)vk; \
- int oprsz = simd_oprsz(desc); \
- \
- ofs = LSX_LEN / BIT; \
- for (i = 0; i < oprsz / 16; i++) { \
- m = Vk->E(i * ofs) & MASK; \
- for (j = 0; j < ofs; j++) { \
- if (Vj->E(j + ofs * i) < 0) { \
- break; \
- } \
- } \
- Vd->E(m + i * ofs) = j; \
- } \
-}
-
-VFRSTP(vfrstp_b, 8, 0xf, B)
-VFRSTP(vfrstp_h, 16, 0x7, H)
-
-#define VFRSTPI(NAME, BIT, E) \
-void HELPER(NAME)(void *vd, void *vj, uint64_t imm, uint32_t desc) \
-{ \
- int i, j, m, ofs; \
- VReg *Vd = (VReg *)vd; \
- VReg *Vj = (VReg *)vj; \
- int oprsz = simd_oprsz(desc); \
- \
- ofs = LSX_LEN / BIT; \
- m = imm % ofs; \
- for (i = 0; i < oprsz / 16; i++) { \
- for (j = 0; j < ofs; j++) { \
- if (Vj->E(j + ofs * i) < 0) { \
- break; \
- } \
- } \
- Vd->E(m + i * ofs) = j; \
- } \
-}
-
-VFRSTPI(vfrstpi_b, 8, B)
-VFRSTPI(vfrstpi_h, 16, H)
-
-static void vec_update_fcsr0_mask(CPULoongArchState *env,
- uintptr_t pc, int mask)
-{
- int flags = get_float_exception_flags(&env->fp_status);
-
- set_float_exception_flags(0, &env->fp_status);
-
- flags &= ~mask;
-
- if (flags) {
- flags = ieee_ex_to_loongarch(flags);
- UPDATE_FP_CAUSE(env->fcsr0, flags);
- }
-
- if (GET_FP_ENABLES(env->fcsr0) & flags) {
- do_raise_exception(env, EXCCODE_FPE, pc);
- } else {
- UPDATE_FP_FLAGS(env->fcsr0, flags);
- }
-}
-
-static void vec_update_fcsr0(CPULoongArchState *env, uintptr_t pc)
-{
- vec_update_fcsr0_mask(env, pc, 0);
-}
-
-static inline void vec_clear_cause(CPULoongArchState *env)
-{
- SET_FP_CAUSE(env->fcsr0, 0);
-}
-
-#define DO_3OP_F(NAME, BIT, E, FN) \
-void HELPER(NAME)(void *vd, void *vj, void *vk, \
- CPULoongArchState *env, uint32_t desc) \
-{ \
- int i; \
- VReg *Vd = (VReg *)vd; \
- VReg *Vj = (VReg *)vj; \
- VReg *Vk = (VReg *)vk; \
- int oprsz = simd_oprsz(desc); \
- \
- vec_clear_cause(env); \
- for (i = 0; i < oprsz / (BIT / 8); i++) { \
- Vd->E(i) = FN(Vj->E(i), Vk->E(i), &env->fp_status); \
- vec_update_fcsr0(env, GETPC()); \
- } \
-}
-
-DO_3OP_F(vfadd_s, 32, UW, float32_add)
-DO_3OP_F(vfadd_d, 64, UD, float64_add)
-DO_3OP_F(vfsub_s, 32, UW, float32_sub)
-DO_3OP_F(vfsub_d, 64, UD, float64_sub)
-DO_3OP_F(vfmul_s, 32, UW, float32_mul)
-DO_3OP_F(vfmul_d, 64, UD, float64_mul)
-DO_3OP_F(vfdiv_s, 32, UW, float32_div)
-DO_3OP_F(vfdiv_d, 64, UD, float64_div)
-DO_3OP_F(vfmax_s, 32, UW, float32_maxnum)
-DO_3OP_F(vfmax_d, 64, UD, float64_maxnum)
-DO_3OP_F(vfmin_s, 32, UW, float32_minnum)
-DO_3OP_F(vfmin_d, 64, UD, float64_minnum)
-DO_3OP_F(vfmaxa_s, 32, UW, float32_maxnummag)
-DO_3OP_F(vfmaxa_d, 64, UD, float64_maxnummag)
-DO_3OP_F(vfmina_s, 32, UW, float32_minnummag)
-DO_3OP_F(vfmina_d, 64, UD, float64_minnummag)
-
-#define DO_4OP_F(NAME, BIT, E, FN, flags) \
-void HELPER(NAME)(void *vd, void *vj, void *vk, void *va, \
- CPULoongArchState *env, uint32_t desc) \
-{ \
- int i; \
- VReg *Vd = (VReg *)vd; \
- VReg *Vj = (VReg *)vj; \
- VReg *Vk = (VReg *)vk; \
- VReg *Va = (VReg *)va; \
- int oprsz = simd_oprsz(desc); \
- \
- vec_clear_cause(env); \
- for (i = 0; i < oprsz / (BIT / 8); i++) { \
- Vd->E(i) = FN(Vj->E(i), Vk->E(i), Va->E(i), flags, &env->fp_status); \
- vec_update_fcsr0(env, GETPC()); \
- } \
-}
-
-DO_4OP_F(vfmadd_s, 32, UW, float32_muladd, 0)
-DO_4OP_F(vfmadd_d, 64, UD, float64_muladd, 0)
-DO_4OP_F(vfmsub_s, 32, UW, float32_muladd, float_muladd_negate_c)
-DO_4OP_F(vfmsub_d, 64, UD, float64_muladd, float_muladd_negate_c)
-DO_4OP_F(vfnmadd_s, 32, UW, float32_muladd, float_muladd_negate_result)
-DO_4OP_F(vfnmadd_d, 64, UD, float64_muladd, float_muladd_negate_result)
-DO_4OP_F(vfnmsub_s, 32, UW, float32_muladd,
- float_muladd_negate_c | float_muladd_negate_result)
-DO_4OP_F(vfnmsub_d, 64, UD, float64_muladd,
- float_muladd_negate_c | float_muladd_negate_result)
-
-#define DO_2OP_F(NAME, BIT, E, FN) \
-void HELPER(NAME)(void *vd, void *vj, \
- CPULoongArchState *env, uint32_t desc) \
-{ \
- int i; \
- VReg *Vd = (VReg *)vd; \
- VReg *Vj = (VReg *)vj; \
- int oprsz = simd_oprsz(desc); \
- \
- vec_clear_cause(env); \
- for (i = 0; i < oprsz / (BIT / 8); i++) { \
- Vd->E(i) = FN(env, Vj->E(i)); \
- } \
-}
-
-#define FLOGB(BIT, T) \
-static T do_flogb_## BIT(CPULoongArchState *env, T fj) \
-{ \
- T fp, fd; \
- float_status *status = &env->fp_status; \
- FloatRoundMode old_mode = get_float_rounding_mode(status); \
- \
- set_float_rounding_mode(float_round_down, status); \
- fp = float ## BIT ##_log2(fj, status); \
- fd = float ## BIT ##_round_to_int(fp, status); \
- set_float_rounding_mode(old_mode, status); \
- vec_update_fcsr0_mask(env, GETPC(), float_flag_inexact); \
- return fd; \
-}
-
-FLOGB(32, uint32_t)
-FLOGB(64, uint64_t)
-
-#define FCLASS(NAME, BIT, E, FN) \
-void HELPER(NAME)(void *vd, void *vj, \
- CPULoongArchState *env, uint32_t desc) \
-{ \
- int i; \
- VReg *Vd = (VReg *)vd; \
- VReg *Vj = (VReg *)vj; \
- int oprsz = simd_oprsz(desc); \
- \
- for (i = 0; i < oprsz / (BIT / 8); i++) { \
- Vd->E(i) = FN(env, Vj->E(i)); \
- } \
-}
-
-FCLASS(vfclass_s, 32, UW, helper_fclass_s)
-FCLASS(vfclass_d, 64, UD, helper_fclass_d)
-
-#define FSQRT(BIT, T) \
-static T do_fsqrt_## BIT(CPULoongArchState *env, T fj) \
-{ \
- T fd; \
- fd = float ## BIT ##_sqrt(fj, &env->fp_status); \
- vec_update_fcsr0(env, GETPC()); \
- return fd; \
-}
-
-FSQRT(32, uint32_t)
-FSQRT(64, uint64_t)
-
-#define FRECIP(BIT, T) \
-static T do_frecip_## BIT(CPULoongArchState *env, T fj) \
-{ \
- T fd; \
- fd = float ## BIT ##_div(float ## BIT ##_one, fj, &env->fp_status); \
- vec_update_fcsr0(env, GETPC()); \
- return fd; \
-}
-
-FRECIP(32, uint32_t)
-FRECIP(64, uint64_t)
-
-#define FRSQRT(BIT, T) \
-static T do_frsqrt_## BIT(CPULoongArchState *env, T fj) \
-{ \
- T fd, fp; \
- fp = float ## BIT ##_sqrt(fj, &env->fp_status); \
- fd = float ## BIT ##_div(float ## BIT ##_one, fp, &env->fp_status); \
- vec_update_fcsr0(env, GETPC()); \
- return fd; \
-}
-
-FRSQRT(32, uint32_t)
-FRSQRT(64, uint64_t)
-
-DO_2OP_F(vflogb_s, 32, UW, do_flogb_32)
-DO_2OP_F(vflogb_d, 64, UD, do_flogb_64)
-DO_2OP_F(vfsqrt_s, 32, UW, do_fsqrt_32)
-DO_2OP_F(vfsqrt_d, 64, UD, do_fsqrt_64)
-DO_2OP_F(vfrecip_s, 32, UW, do_frecip_32)
-DO_2OP_F(vfrecip_d, 64, UD, do_frecip_64)
-DO_2OP_F(vfrsqrt_s, 32, UW, do_frsqrt_32)
-DO_2OP_F(vfrsqrt_d, 64, UD, do_frsqrt_64)
-
-static uint32_t float16_cvt_float32(uint16_t h, float_status *status)
-{
- return float16_to_float32(h, true, status);
-}
-static uint64_t float32_cvt_float64(uint32_t s, float_status *status)
-{
- return float32_to_float64(s, status);
-}
-
-static uint16_t float32_cvt_float16(uint32_t s, float_status *status)
-{
- return float32_to_float16(s, true, status);
-}
-static uint32_t float64_cvt_float32(uint64_t d, float_status *status)
-{
- return float64_to_float32(d, status);
-}
-
-void HELPER(vfcvtl_s_h)(void *vd, void *vj,
- CPULoongArchState *env, uint32_t desc)
-{
- int i, j, ofs;
- VReg temp = {};
- VReg *Vd = (VReg *)vd;
- VReg *Vj = (VReg *)vj;
- int oprsz = simd_oprsz(desc);
-
- ofs = LSX_LEN / 32;
- vec_clear_cause(env);
- for (i = 0; i < oprsz / 16; i++) {
- for (j = 0; j < ofs; j++) {
- temp.UW(j + ofs * i) =float16_cvt_float32(Vj->UH(j + ofs * 2 * i),
- &env->fp_status);
- }
- vec_update_fcsr0(env, GETPC());
- }
- *Vd = temp;
-}
-
-void HELPER(vfcvtl_d_s)(void *vd, void *vj,
- CPULoongArchState *env, uint32_t desc)
-{
- int i, j, ofs;
- VReg temp = {};
- VReg *Vd = (VReg *)vd;
- VReg *Vj = (VReg *)vj;
- int oprsz = simd_oprsz(desc);
-
- ofs = LSX_LEN / 64;
- vec_clear_cause(env);
- for (i = 0; i < oprsz / 16; i++) {
- for (j = 0; j < ofs; j++) {
- temp.UD(j + ofs * i) = float32_cvt_float64(Vj->UW(j + ofs * 2 * i),
- &env->fp_status);
- }
- vec_update_fcsr0(env, GETPC());
- }
- *Vd = temp;
-}
-
-void HELPER(vfcvth_s_h)(void *vd, void *vj,
- CPULoongArchState *env, uint32_t desc)
-{
- int i, j, ofs;
- VReg temp = {};
- VReg *Vd = (VReg *)vd;
- VReg *Vj = (VReg *)vj;
- int oprsz = simd_oprsz(desc);
-
- ofs = LSX_LEN / 32;
- vec_clear_cause(env);
- for (i = 0; i < oprsz / 16; i++) {
- for (j = 0; j < ofs; j++) {
- temp.UW(j + ofs * i) = float16_cvt_float32(Vj->UH(j + ofs * (2 * i + 1)),
- &env->fp_status);
- }
- vec_update_fcsr0(env, GETPC());
- }
- *Vd = temp;
-}
-
-void HELPER(vfcvth_d_s)(void *vd, void *vj,
- CPULoongArchState *env, uint32_t desc)
-{
- int i, j, ofs;
- VReg temp = {};
- VReg *Vd = (VReg *)vd;
- VReg *Vj = (VReg *)vj;
- int oprsz = simd_oprsz(desc);
-
- ofs = LSX_LEN / 64;
- vec_clear_cause(env);
- for (i = 0; i < oprsz / 16; i++) {
- for (j = 0; j < ofs; j++) {
- temp.UD(j + ofs * i) = float32_cvt_float64(Vj->UW(j + ofs * (2 * i + 1)),
- &env->fp_status);
- }
- vec_update_fcsr0(env, GETPC());
- }
- *Vd = temp;
-}
-
-void HELPER(vfcvt_h_s)(void *vd, void *vj, void *vk,
- CPULoongArchState *env, uint32_t desc)
-{
- int i, j, ofs;
- VReg temp = {};
- VReg *Vd = (VReg *)vd;
- VReg *Vj = (VReg *)vj;
- VReg *Vk = (VReg *)vk;
- int oprsz = simd_oprsz(desc);
-
- ofs = LSX_LEN / 32;
- vec_clear_cause(env);
- for(i = 0; i < oprsz / 16; i++) {
- for (j = 0; j < ofs; j++) {
- temp.UH(j + ofs * (2 * i + 1)) = float32_cvt_float16(Vj->UW(j + ofs * i),
- &env->fp_status);
- temp.UH(j + ofs * 2 * i) = float32_cvt_float16(Vk->UW(j + ofs * i),
- &env->fp_status);
- }
- vec_update_fcsr0(env, GETPC());
- }
- *Vd = temp;
-}
-
-void HELPER(vfcvt_s_d)(void *vd, void *vj, void *vk,
- CPULoongArchState *env, uint32_t desc)
-{
- int i, j, ofs;
- VReg temp = {};
- VReg *Vd = (VReg *)vd;
- VReg *Vj = (VReg *)vj;
- VReg *Vk = (VReg *)vk;
- int oprsz = simd_oprsz(desc);
-
- ofs = LSX_LEN / 64;
- vec_clear_cause(env);
- for(i = 0; i < oprsz / 16; i++) {
- for (j = 0; j < ofs; j++) {
- temp.UW(j + ofs * (2 * i + 1)) = float64_cvt_float32(Vj->UD(j + ofs * i),
- &env->fp_status);
- temp.UW(j + ofs * 2 * i) = float64_cvt_float32(Vk->UD(j + ofs * i),
- &env->fp_status);
- }
- vec_update_fcsr0(env, GETPC());
- }
- *Vd = temp;
-}
-
-void HELPER(vfrint_s)(void *vd, void *vj,
- CPULoongArchState *env, uint32_t desc)
-{
- int i;
- VReg *Vd = (VReg *)vd;
- VReg *Vj = (VReg *)vj;
- int oprsz = simd_oprsz(desc);
-
- vec_clear_cause(env);
- for (i = 0; i < oprsz / 4; i++) {
- Vd->W(i) = float32_round_to_int(Vj->UW(i), &env->fp_status);
- vec_update_fcsr0(env, GETPC());
- }
-}
-
-void HELPER(vfrint_d)(void *vd, void *vj,
- CPULoongArchState *env, uint32_t desc)
-{
- int i;
- VReg *Vd = (VReg *)vd;
- VReg *Vj = (VReg *)vj;
- int oprsz = simd_oprsz(desc);
-
- vec_clear_cause(env);
- for (i = 0; i < oprsz / 8; i++) {
- Vd->D(i) = float64_round_to_int(Vj->UD(i), &env->fp_status);
- vec_update_fcsr0(env, GETPC());
- }
-}
-
-#define FCVT_2OP(NAME, BIT, E, MODE) \
-void HELPER(NAME)(void *vd, void *vj, \
- CPULoongArchState *env, uint32_t desc) \
-{ \
- int i; \
- VReg *Vd = (VReg *)vd; \
- VReg *Vj = (VReg *)vj; \
- int oprsz = simd_oprsz(desc); \
- \
- vec_clear_cause(env); \
- for (i = 0; i < oprsz / (BIT / 8); i++) { \
- FloatRoundMode old_mode = get_float_rounding_mode(&env->fp_status); \
- set_float_rounding_mode(MODE, &env->fp_status); \
- Vd->E(i) = float## BIT ## _round_to_int(Vj->E(i), &env->fp_status); \
- set_float_rounding_mode(old_mode, &env->fp_status); \
- vec_update_fcsr0(env, GETPC()); \
- } \
-}
-
-FCVT_2OP(vfrintrne_s, 32, UW, float_round_nearest_even)
-FCVT_2OP(vfrintrne_d, 64, UD, float_round_nearest_even)
-FCVT_2OP(vfrintrz_s, 32, UW, float_round_to_zero)
-FCVT_2OP(vfrintrz_d, 64, UD, float_round_to_zero)
-FCVT_2OP(vfrintrp_s, 32, UW, float_round_up)
-FCVT_2OP(vfrintrp_d, 64, UD, float_round_up)
-FCVT_2OP(vfrintrm_s, 32, UW, float_round_down)
-FCVT_2OP(vfrintrm_d, 64, UD, float_round_down)
-
-#define FTINT(NAME, FMT1, FMT2, T1, T2, MODE) \
-static T2 do_ftint ## NAME(CPULoongArchState *env, T1 fj) \
-{ \
- T2 fd; \
- FloatRoundMode old_mode = get_float_rounding_mode(&env->fp_status); \
- \
- set_float_rounding_mode(MODE, &env->fp_status); \
- fd = do_## FMT1 ##_to_## FMT2(env, fj); \
- set_float_rounding_mode(old_mode, &env->fp_status); \
- return fd; \
-}
-
-#define DO_FTINT(FMT1, FMT2, T1, T2) \
-static T2 do_## FMT1 ##_to_## FMT2(CPULoongArchState *env, T1 fj) \
-{ \
- T2 fd; \
- \
- fd = FMT1 ##_to_## FMT2(fj, &env->fp_status); \
- if (get_float_exception_flags(&env->fp_status) & (float_flag_invalid)) { \
- if (FMT1 ##_is_any_nan(fj)) { \
- fd = 0; \
- } \
- } \
- vec_update_fcsr0(env, GETPC()); \
- return fd; \
-}
-
-DO_FTINT(float32, int32, uint32_t, uint32_t)
-DO_FTINT(float64, int64, uint64_t, uint64_t)
-DO_FTINT(float32, uint32, uint32_t, uint32_t)
-DO_FTINT(float64, uint64, uint64_t, uint64_t)
-DO_FTINT(float64, int32, uint64_t, uint32_t)
-DO_FTINT(float32, int64, uint32_t, uint64_t)
-
-FTINT(rne_w_s, float32, int32, uint32_t, uint32_t, float_round_nearest_even)
-FTINT(rne_l_d, float64, int64, uint64_t, uint64_t, float_round_nearest_even)
-FTINT(rp_w_s, float32, int32, uint32_t, uint32_t, float_round_up)
-FTINT(rp_l_d, float64, int64, uint64_t, uint64_t, float_round_up)
-FTINT(rz_w_s, float32, int32, uint32_t, uint32_t, float_round_to_zero)
-FTINT(rz_l_d, float64, int64, uint64_t, uint64_t, float_round_to_zero)
-FTINT(rm_w_s, float32, int32, uint32_t, uint32_t, float_round_down)
-FTINT(rm_l_d, float64, int64, uint64_t, uint64_t, float_round_down)
-
-DO_2OP_F(vftintrne_w_s, 32, UW, do_ftintrne_w_s)
-DO_2OP_F(vftintrne_l_d, 64, UD, do_ftintrne_l_d)
-DO_2OP_F(vftintrp_w_s, 32, UW, do_ftintrp_w_s)
-DO_2OP_F(vftintrp_l_d, 64, UD, do_ftintrp_l_d)
-DO_2OP_F(vftintrz_w_s, 32, UW, do_ftintrz_w_s)
-DO_2OP_F(vftintrz_l_d, 64, UD, do_ftintrz_l_d)
-DO_2OP_F(vftintrm_w_s, 32, UW, do_ftintrm_w_s)
-DO_2OP_F(vftintrm_l_d, 64, UD, do_ftintrm_l_d)
-DO_2OP_F(vftint_w_s, 32, UW, do_float32_to_int32)
-DO_2OP_F(vftint_l_d, 64, UD, do_float64_to_int64)
-
-FTINT(rz_wu_s, float32, uint32, uint32_t, uint32_t, float_round_to_zero)
-FTINT(rz_lu_d, float64, uint64, uint64_t, uint64_t, float_round_to_zero)
-
-DO_2OP_F(vftintrz_wu_s, 32, UW, do_ftintrz_wu_s)
-DO_2OP_F(vftintrz_lu_d, 64, UD, do_ftintrz_lu_d)
-DO_2OP_F(vftint_wu_s, 32, UW, do_float32_to_uint32)
-DO_2OP_F(vftint_lu_d, 64, UD, do_float64_to_uint64)
-
-FTINT(rm_w_d, float64, int32, uint64_t, uint32_t, float_round_down)
-FTINT(rp_w_d, float64, int32, uint64_t, uint32_t, float_round_up)
-FTINT(rz_w_d, float64, int32, uint64_t, uint32_t, float_round_to_zero)
-FTINT(rne_w_d, float64, int32, uint64_t, uint32_t, float_round_nearest_even)
-
-#define FTINT_W_D(NAME, FN) \
-void HELPER(NAME)(void *vd, void *vj, void *vk, \
- CPULoongArchState *env, uint32_t desc) \
-{ \
- int i, j, ofs; \
- VReg temp = {}; \
- VReg *Vd = (VReg *)vd; \
- VReg *Vj = (VReg *)vj; \
- VReg *Vk = (VReg *)vk; \
- int oprsz = simd_oprsz(desc); \
- \
- ofs = LSX_LEN / 64; \
- vec_clear_cause(env); \
- for (i = 0; i < oprsz / 16; i++) { \
- for (j = 0; j < ofs; j++) { \
- temp.W(j + ofs * (2 * i + 1)) = FN(env, Vj->UD(j + ofs * i)); \
- temp.W(j + ofs * 2 * i) = FN(env, Vk->UD(j + ofs * i)); \
- } \
- } \
- *Vd = temp; \
-}
-
-FTINT_W_D(vftint_w_d, do_float64_to_int32)
-FTINT_W_D(vftintrm_w_d, do_ftintrm_w_d)
-FTINT_W_D(vftintrp_w_d, do_ftintrp_w_d)
-FTINT_W_D(vftintrz_w_d, do_ftintrz_w_d)
-FTINT_W_D(vftintrne_w_d, do_ftintrne_w_d)
-
-FTINT(rml_l_s, float32, int64, uint32_t, uint64_t, float_round_down)
-FTINT(rpl_l_s, float32, int64, uint32_t, uint64_t, float_round_up)
-FTINT(rzl_l_s, float32, int64, uint32_t, uint64_t, float_round_to_zero)
-FTINT(rnel_l_s, float32, int64, uint32_t, uint64_t, float_round_nearest_even)
-FTINT(rmh_l_s, float32, int64, uint32_t, uint64_t, float_round_down)
-FTINT(rph_l_s, float32, int64, uint32_t, uint64_t, float_round_up)
-FTINT(rzh_l_s, float32, int64, uint32_t, uint64_t, float_round_to_zero)
-FTINT(rneh_l_s, float32, int64, uint32_t, uint64_t, float_round_nearest_even)
-
-#define FTINTL_L_S(NAME, FN) \
-void HELPER(NAME)(void *vd, void *vj, \
- CPULoongArchState *env, uint32_t desc) \
-{ \
- int i, j, ofs; \
- VReg temp; \
- VReg *Vd = (VReg *)vd; \
- VReg *Vj = (VReg *)vj; \
- int oprsz = simd_oprsz(desc); \
- \
- ofs = LSX_LEN / 64; \
- vec_clear_cause(env); \
- for (i = 0; i < oprsz / 16; i++) { \
- for (j = 0; j < ofs; j++) { \
- temp.D(j + ofs * i) = FN(env, Vj->UW(j + ofs * 2 * i)); \
- } \
- } \
- *Vd = temp; \
-}
-
-FTINTL_L_S(vftintl_l_s, do_float32_to_int64)
-FTINTL_L_S(vftintrml_l_s, do_ftintrml_l_s)
-FTINTL_L_S(vftintrpl_l_s, do_ftintrpl_l_s)
-FTINTL_L_S(vftintrzl_l_s, do_ftintrzl_l_s)
-FTINTL_L_S(vftintrnel_l_s, do_ftintrnel_l_s)
-
-#define FTINTH_L_S(NAME, FN) \
-void HELPER(NAME)(void *vd, void *vj, \
- CPULoongArchState *env, uint32_t desc) \
-{ \
- int i, j, ofs; \
- VReg temp = {}; \
- VReg *Vd = (VReg *)vd; \
- VReg *Vj = (VReg *)vj; \
- int oprsz = simd_oprsz(desc); \
- \
- ofs = LSX_LEN / 64; \
- vec_clear_cause(env); \
- for (i = 0; i < oprsz / 16; i++) { \
- for (j = 0; j < ofs; j++) { \
- temp.D(j + ofs * i) = FN(env, Vj->UW(j + ofs * (2 * i + 1))); \
- } \
- } \
- *Vd = temp; \
-}
-
-FTINTH_L_S(vftinth_l_s, do_float32_to_int64)
-FTINTH_L_S(vftintrmh_l_s, do_ftintrmh_l_s)
-FTINTH_L_S(vftintrph_l_s, do_ftintrph_l_s)
-FTINTH_L_S(vftintrzh_l_s, do_ftintrzh_l_s)
-FTINTH_L_S(vftintrneh_l_s, do_ftintrneh_l_s)
-
-#define FFINT(NAME, FMT1, FMT2, T1, T2) \
-static T2 do_ffint_ ## NAME(CPULoongArchState *env, T1 fj) \
-{ \
- T2 fd; \
- \
- fd = FMT1 ##_to_## FMT2(fj, &env->fp_status); \
- vec_update_fcsr0(env, GETPC()); \
- return fd; \
-}
-
-FFINT(s_w, int32, float32, int32_t, uint32_t)
-FFINT(d_l, int64, float64, int64_t, uint64_t)
-FFINT(s_wu, uint32, float32, uint32_t, uint32_t)
-FFINT(d_lu, uint64, float64, uint64_t, uint64_t)
-
-DO_2OP_F(vffint_s_w, 32, W, do_ffint_s_w)
-DO_2OP_F(vffint_d_l, 64, D, do_ffint_d_l)
-DO_2OP_F(vffint_s_wu, 32, UW, do_ffint_s_wu)
-DO_2OP_F(vffint_d_lu, 64, UD, do_ffint_d_lu)
-
-void HELPER(vffintl_d_w)(void *vd, void *vj,
- CPULoongArchState *env, uint32_t desc)
-{
- int i, j, ofs;
- VReg temp = {};
- VReg *Vd = (VReg *)vd;
- VReg *Vj = (VReg *)vj;
- int oprsz = simd_oprsz(desc);
-
- ofs = LSX_LEN / 64;
- vec_clear_cause(env);
- for (i = 0; i < oprsz / 16; i++) {
- for (j = 0; j < ofs; j++) {
- temp.D(j + ofs * i) = int32_to_float64(Vj->W(j + ofs * 2 * i),
- &env->fp_status);
- }
- vec_update_fcsr0(env, GETPC());
- }
- *Vd = temp;
-}
-
-void HELPER(vffinth_d_w)(void *vd, void *vj,
- CPULoongArchState *env, uint32_t desc)
-{
- int i, j, ofs;
- VReg temp = {};
- VReg *Vd = (VReg *)vd;
- VReg *Vj = (VReg *)vj;
- int oprsz = simd_oprsz(desc);
-
- ofs = LSX_LEN / 64;
- vec_clear_cause(env);
- for (i = 0; i < oprsz /16; i++) {
- for (j = 0; j < ofs; j++) {
- temp.D(j + ofs * i) = int32_to_float64(Vj->W(j + ofs * (2 * i + 1)),
- &env->fp_status);
- }
- vec_update_fcsr0(env, GETPC());
- }
- *Vd = temp;
-}
-
-void HELPER(vffint_s_l)(void *vd, void *vj, void *vk,
- CPULoongArchState *env, uint32_t desc)
-{
- int i, j, ofs;
- VReg temp = {};
- VReg *Vd = (VReg *)vd;
- VReg *Vj = (VReg *)vj;
- VReg *Vk = (VReg *)vk;
- int oprsz = simd_oprsz(desc);
-
- ofs = LSX_LEN / 64;
- vec_clear_cause(env);
- for (i = 0; i < oprsz / 16; i++) {
- for (j = 0; j < ofs; j++) {
- temp.W(j + ofs * (2 * i + 1)) = int64_to_float32(Vj->D(j + ofs * i),
- &env->fp_status);
- temp.W(j + ofs * 2 * i) = int64_to_float32(Vk->D(j + ofs * i),
- &env->fp_status);
- }
- vec_update_fcsr0(env, GETPC());
- }
- *Vd = temp;
-}
-
-#define VCMPI(NAME, BIT, E, DO_OP) \
-void HELPER(NAME)(void *vd, void *vj, uint64_t imm, uint32_t desc) \
-{ \
- int i; \
- VReg *Vd = (VReg *)vd; \
- VReg *Vj = (VReg *)vj; \
- typedef __typeof(Vd->E(0)) TD; \
- int oprsz = simd_oprsz(desc); \
- \
- for (i = 0; i < oprsz / (BIT / 8); i++) { \
- Vd->E(i) = DO_OP(Vj->E(i), (TD)imm); \
- } \
-}
-
-VCMPI(vseqi_b, 8, B, VSEQ)
-VCMPI(vseqi_h, 16, H, VSEQ)
-VCMPI(vseqi_w, 32, W, VSEQ)
-VCMPI(vseqi_d, 64, D, VSEQ)
-VCMPI(vslei_b, 8, B, VSLE)
-VCMPI(vslei_h, 16, H, VSLE)
-VCMPI(vslei_w, 32, W, VSLE)
-VCMPI(vslei_d, 64, D, VSLE)
-VCMPI(vslei_bu, 8, UB, VSLE)
-VCMPI(vslei_hu, 16, UH, VSLE)
-VCMPI(vslei_wu, 32, UW, VSLE)
-VCMPI(vslei_du, 64, UD, VSLE)
-VCMPI(vslti_b, 8, B, VSLT)
-VCMPI(vslti_h, 16, H, VSLT)
-VCMPI(vslti_w, 32, W, VSLT)
-VCMPI(vslti_d, 64, D, VSLT)
-VCMPI(vslti_bu, 8, UB, VSLT)
-VCMPI(vslti_hu, 16, UH, VSLT)
-VCMPI(vslti_wu, 32, UW, VSLT)
-VCMPI(vslti_du, 64, UD, VSLT)
-
-static uint64_t vfcmp_common(CPULoongArchState *env,
- FloatRelation cmp, uint32_t flags)
-{
- uint64_t ret = 0;
-
- switch (cmp) {
- case float_relation_less:
- ret = (flags & FCMP_LT);
- break;
- case float_relation_equal:
- ret = (flags & FCMP_EQ);
- break;
- case float_relation_greater:
- ret = (flags & FCMP_GT);
- break;
- case float_relation_unordered:
- ret = (flags & FCMP_UN);
- break;
- default:
- g_assert_not_reached();
- }
-
- if (ret) {
- ret = -1;
- }
-
- return ret;
-}
-
-#define VFCMP(NAME, BIT, E, FN) \
-void HELPER(NAME)(CPULoongArchState *env, uint32_t oprsz, \
- uint32_t vd, uint32_t vj, uint32_t vk, uint32_t flags) \
-{ \
- int i; \
- VReg t; \
- VReg *Vd = &(env->fpr[vd].vreg); \
- VReg *Vj = &(env->fpr[vj].vreg); \
- VReg *Vk = &(env->fpr[vk].vreg); \
- \
- vec_clear_cause(env); \
- for (i = 0; i < oprsz / (BIT / 8); i++) { \
- FloatRelation cmp; \
- cmp = FN(Vj->E(i), Vk->E(i), &env->fp_status); \
- t.E(i) = vfcmp_common(env, cmp, flags); \
- vec_update_fcsr0(env, GETPC()); \
- } \
- *Vd = t; \
-}
-
-VFCMP(vfcmp_c_s, 32, UW, float32_compare_quiet)
-VFCMP(vfcmp_s_s, 32, UW, float32_compare)
-VFCMP(vfcmp_c_d, 64, UD, float64_compare_quiet)
-VFCMP(vfcmp_s_d, 64, UD, float64_compare)
-
-void HELPER(vbitseli_b)(void *vd, void *vj, uint64_t imm, uint32_t desc)
-{
- int i;
- VReg *Vd = (VReg *)vd;
- VReg *Vj = (VReg *)vj;
-
- for (i = 0; i < simd_oprsz(desc); i++) {
- Vd->B(i) = (~Vd->B(i) & Vj->B(i)) | (Vd->B(i) & imm);
- }
-}
-
-/* Copy from target/arm/tcg/sve_helper.c */
-static inline bool do_match2(uint64_t n, uint64_t m0, uint64_t m1, int esz)
-{
- int bits = 8 << esz;
- uint64_t ones = dup_const(esz, 1);
- uint64_t signs = ones << (bits - 1);
- uint64_t cmp0, cmp1;
-
- cmp1 = dup_const(esz, n);
- cmp0 = cmp1 ^ m0;
- cmp1 = cmp1 ^ m1;
- cmp0 = (cmp0 - ones) & ~cmp0;
- cmp1 = (cmp1 - ones) & ~cmp1;
- return (cmp0 | cmp1) & signs;
-}
-
-#define SETANYEQZ(NAME, MO) \
-void HELPER(NAME)(CPULoongArchState *env, \
- uint32_t oprsz, uint32_t cd, uint32_t vj) \
-{ \
- VReg *Vj = &(env->fpr[vj].vreg); \
- \
- env->cf[cd & 0x7] = do_match2(0, Vj->D(0), Vj->D(1), MO); \
- if (oprsz == 32) { \
- env->cf[cd & 0x7] = env->cf[cd & 0x7] || \
- do_match2(0, Vj->D(2), Vj->D(3), MO); \
- } \
-}
-
-SETANYEQZ(vsetanyeqz_b, MO_8)
-SETANYEQZ(vsetanyeqz_h, MO_16)
-SETANYEQZ(vsetanyeqz_w, MO_32)
-SETANYEQZ(vsetanyeqz_d, MO_64)
-
-#define SETALLNEZ(NAME, MO) \
-void HELPER(NAME)(CPULoongArchState *env, \
- uint32_t oprsz, uint32_t cd, uint32_t vj) \
-{ \
- VReg *Vj = &(env->fpr[vj].vreg); \
- \
- env->cf[cd & 0x7]= !do_match2(0, Vj->D(0), Vj->D(1), MO); \
- if (oprsz == 32) { \
- env->cf[cd & 0x7] = env->cf[cd & 0x7] && \
- !do_match2(0, Vj->D(2), Vj->D(3), MO); \
- } \
-}
-
-SETALLNEZ(vsetallnez_b, MO_8)
-SETALLNEZ(vsetallnez_h, MO_16)
-SETALLNEZ(vsetallnez_w, MO_32)
-SETALLNEZ(vsetallnez_d, MO_64)
-
-#define XVINSVE0(NAME, E, MASK) \
-void HELPER(NAME)(void *vd, void *vj, uint64_t imm, uint32_t desc) \
-{ \
- VReg *Vd = (VReg *)vd; \
- VReg *Vj = (VReg *)vj; \
- Vd->E(imm & MASK) = Vj->E(0); \
-}
-
-XVINSVE0(xvinsve0_w, W, 0x7)
-XVINSVE0(xvinsve0_d, D, 0x3)
-
-#define XVPICKVE(NAME, E, BIT, MASK) \
-void HELPER(NAME)(void *vd, void *vj, uint64_t imm, uint32_t desc) \
-{ \
- int i; \
- VReg *Vd = (VReg *)vd; \
- VReg *Vj = (VReg *)vj; \
- int oprsz = simd_oprsz(desc); \
- \
- Vd->E(0) = Vj->E(imm & MASK); \
- for (i = 1; i < oprsz / (BIT / 8); i++) { \
- Vd->E(i) = 0; \
- } \
-}
-
-XVPICKVE(xvpickve_w, W, 32, 0x7)
-XVPICKVE(xvpickve_d, D, 64, 0x3)
-
-#define VPACKEV(NAME, BIT, E) \
-void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t desc) \
-{ \
- int i; \
- VReg temp = {}; \
- VReg *Vd = (VReg *)vd; \
- VReg *Vj = (VReg *)vj; \
- VReg *Vk = (VReg *)vk; \
- int oprsz = simd_oprsz(desc); \
- \
- for (i = 0; i < oprsz / (BIT / 8); i++) { \
- temp.E(2 * i + 1) = Vj->E(2 * i); \
- temp.E(2 *i) = Vk->E(2 * i); \
- } \
- *Vd = temp; \
-}
-
-VPACKEV(vpackev_b, 16, B)
-VPACKEV(vpackev_h, 32, H)
-VPACKEV(vpackev_w, 64, W)
-VPACKEV(vpackev_d, 128, D)
-
-#define VPACKOD(NAME, BIT, E) \
-void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t desc) \
-{ \
- int i; \
- VReg temp = {}; \
- VReg *Vd = (VReg *)vd; \
- VReg *Vj = (VReg *)vj; \
- VReg *Vk = (VReg *)vk; \
- int oprsz = simd_oprsz(desc); \
- \
- for (i = 0; i < oprsz / (BIT / 8); i++) { \
- temp.E(2 * i + 1) = Vj->E(2 * i + 1); \
- temp.E(2 * i) = Vk->E(2 * i + 1); \
- } \
- *Vd = temp; \
-}
-
-VPACKOD(vpackod_b, 16, B)
-VPACKOD(vpackod_h, 32, H)
-VPACKOD(vpackod_w, 64, W)
-VPACKOD(vpackod_d, 128, D)
-
-#define VPICKEV(NAME, BIT, E) \
-void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t desc) \
-{ \
- int i, j, ofs; \
- VReg temp = {}; \
- VReg *Vd = (VReg *)vd; \
- VReg *Vj = (VReg *)vj; \
- VReg *Vk = (VReg *)vk; \
- int oprsz = simd_oprsz(desc); \
- \
- ofs = LSX_LEN / BIT; \
- for (i = 0; i < oprsz / 16; i++) { \
- for (j = 0; j < ofs; j++) { \
- temp.E(j + ofs * (2 * i + 1)) = Vj->E(2 * (j + ofs * i)); \
- temp.E(j + ofs * 2 * i) = Vk->E(2 * (j + ofs * i)); \
- } \
- } \
- *Vd = temp; \
-}
-
-VPICKEV(vpickev_b, 16, B)
-VPICKEV(vpickev_h, 32, H)
-VPICKEV(vpickev_w, 64, W)
-VPICKEV(vpickev_d, 128, D)
-
-#define VPICKOD(NAME, BIT, E) \
-void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t desc) \
-{ \
- int i, j, ofs; \
- VReg temp = {}; \
- VReg *Vd = (VReg *)vd; \
- VReg *Vj = (VReg *)vj; \
- VReg *Vk = (VReg *)vk; \
- int oprsz = simd_oprsz(desc); \
- \
- ofs = LSX_LEN / BIT; \
- for (i = 0; i < oprsz / 16; i++) { \
- for (j = 0; j < ofs; j++) { \
- temp.E(j + ofs * (2 * i + 1)) = Vj->E(2 * (j + ofs * i) + 1); \
- temp.E(j + ofs * 2 * i) = Vk->E(2 * (j + ofs * i) + 1); \
- } \
- } \
- *Vd = temp; \
-}
-
-VPICKOD(vpickod_b, 16, B)
-VPICKOD(vpickod_h, 32, H)
-VPICKOD(vpickod_w, 64, W)
-VPICKOD(vpickod_d, 128, D)
-
-#define VILVL(NAME, BIT, E) \
-void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t desc) \
-{ \
- int i, j, ofs; \
- VReg temp = {}; \
- VReg *Vd = (VReg *)vd; \
- VReg *Vj = (VReg *)vj; \
- VReg *Vk = (VReg *)vk; \
- int oprsz = simd_oprsz(desc); \
- \
- ofs = LSX_LEN / BIT; \
- for (i = 0; i < oprsz / 16; i++) { \
- for (j = 0; j < ofs; j++) { \
- temp.E(2 * (j + ofs * i) + 1) = Vj->E(j + ofs * 2 * i); \
- temp.E(2 * (j + ofs * i)) = Vk->E(j + ofs * 2 * i); \
- } \
- } \
- *Vd = temp; \
-}
-
-VILVL(vilvl_b, 16, B)
-VILVL(vilvl_h, 32, H)
-VILVL(vilvl_w, 64, W)
-VILVL(vilvl_d, 128, D)
-
-#define VILVH(NAME, BIT, E) \
-void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t desc) \
-{ \
- int i, j, ofs; \
- VReg temp = {}; \
- VReg *Vd = (VReg *)vd; \
- VReg *Vj = (VReg *)vj; \
- VReg *Vk = (VReg *)vk; \
- int oprsz = simd_oprsz(desc); \
- \
- ofs = LSX_LEN / BIT; \
- for (i = 0; i < oprsz / 16; i++) { \
- for (j = 0; j < ofs; j++) { \
- temp.E(2 * (j + ofs * i) + 1) = Vj->E(j + ofs * (2 * i + 1)); \
- temp.E(2 * (j + ofs * i)) = Vk->E(j + ofs * (2 * i + 1)); \
- } \
- } \
- *Vd = temp; \
-}
-
-VILVH(vilvh_b, 16, B)
-VILVH(vilvh_h, 32, H)
-VILVH(vilvh_w, 64, W)
-VILVH(vilvh_d, 128, D)
-
-void HELPER(vshuf_b)(void *vd, void *vj, void *vk, void *va, uint32_t desc)
-{
- int i, j, m;
- VReg temp = {};
- VReg *Vd = (VReg *)vd;
- VReg *Vj = (VReg *)vj;
- VReg *Vk = (VReg *)vk;
- VReg *Va = (VReg *)va;
- int oprsz = simd_oprsz(desc);
-
- m = LSX_LEN / 8;
- for (i = 0; i < (oprsz / 16) * m; i++) {
- j = i < m ? 0 : 1;
- uint64_t k = (uint8_t)Va->B(i) % (2 * m);
- temp.B(i) = k < m ? Vk->B(k + j * m): Vj->B(k + (j - 1) * m);
- }
- *Vd = temp;
-}
-
-#define VSHUF(NAME, BIT, E) \
-void HELPER(NAME)(void *vd, void *vj, void *vk, uint32_t desc) \
-{ \
- int i, j, m; \
- VReg temp = {}; \
- VReg *Vd = (VReg *)vd; \
- VReg *Vj = (VReg *)vj; \
- VReg *Vk = (VReg *)vk; \
- int oprsz = simd_oprsz(desc); \
- \
- m = LSX_LEN / BIT; \
- for (i = 0; i < (oprsz / 16) * m; i++) { \
- j = i < m ? 0 : 1; \
- uint64_t k = ((uint8_t)Vd->E(i)) % (2 * m); \
- temp.E(i) = k < m ? Vk->E(k + j * m) : Vj->E(k + (j - 1) * m); \
- } \
- *Vd = temp; \
-}
-
-VSHUF(vshuf_h, 16, H)
-VSHUF(vshuf_w, 32, W)
-VSHUF(vshuf_d, 64, D)
-
-#define VSHUF4I(NAME, BIT, E) \
-void HELPER(NAME)(void *vd, void *vj, uint64_t imm, uint32_t desc) \
-{ \
- int i, j, max; \
- VReg temp = {}; \
- VReg *Vd = (VReg *)vd; \
- VReg *Vj = (VReg *)vj; \
- int oprsz = simd_oprsz(desc); \
- \
- max = LSX_LEN / BIT; \
- for (i = 0; i < oprsz / (BIT / 8); i++) { \
- j = i < max ? 1 : 2; \
- temp.E(i) = Vj->E(SHF_POS(i - ((j -1)* max), imm) + (j - 1) * max); \
- } \
- *Vd = temp; \
-}
-
-VSHUF4I(vshuf4i_b, 8, B)
-VSHUF4I(vshuf4i_h, 16, H)
-VSHUF4I(vshuf4i_w, 32, W)
-
-void HELPER(vshuf4i_d)(void *vd, void *vj, uint64_t imm, uint32_t desc)
-{
- int i;
- VReg temp = {};
- VReg *Vd = (VReg *)vd;
- VReg *Vj = (VReg *)vj;
- int oprsz = simd_oprsz(desc);
-
- for (i = 0; i < oprsz / 16; i++) {
- temp.D(2 * i) = (imm & 2 ? Vj : Vd)->D((imm & 1) + 2 * i);
- temp.D(2 * i + 1) = (imm & 8 ? Vj : Vd)->D(((imm >> 2) & 1) + 2 * i);
- }
- *Vd = temp;
-}
-
-void HELPER(vperm_w)(void *vd, void *vj, void *vk, uint32_t desc)
-{
- int i, m;
- VReg temp = {};
- VReg *Vd = (VReg *)vd;
- VReg *Vj = (VReg *)vj;
- VReg *Vk = (VReg *)vk;
-
- m = LASX_LEN / 32;
- for (i = 0; i < m ; i++) {
- uint64_t k = (uint8_t)Vk->W(i) % 8;
- temp.W(i) = Vj->W(k);
- }
- *Vd = temp;
-}
-
-void HELPER(vpermi_w)(void *vd, void *vj, uint64_t imm, uint32_t desc)
-{
- int i;
- VReg temp = {};
- VReg *Vd = (VReg *)vd;
- VReg *Vj = (VReg *)vj;
- int oprsz = simd_oprsz(desc);
-
- for (i = 0; i < oprsz / 16; i++) {
- temp.W(4 * i) = Vj->W((imm & 0x3) + 4 * i);
- temp.W(4 * i + 1) = Vj->W(((imm >> 2) & 0x3) + 4 * i);
- temp.W(4 * i + 2) = Vd->W(((imm >> 4) & 0x3) + 4 * i);
- temp.W(4 * i + 3) = Vd->W(((imm >> 6) & 0x3) + 4 * i);
- }
- *Vd = temp;
-}
-
-void HELPER(vpermi_d)(void *vd, void *vj, uint64_t imm, uint32_t desc)
-{
- VReg temp = {};
- VReg *Vd = (VReg *)vd;
- VReg *Vj = (VReg *)vj;
-
- temp.D(0) = Vj->D(imm & 0x3);
- temp.D(1) = Vj->D((imm >> 2) & 0x3);
- temp.D(2) = Vj->D((imm >> 4) & 0x3);
- temp.D(3) = Vj->D((imm >> 6) & 0x3);
- *Vd = temp;
-}
-
-void HELPER(vpermi_q)(void *vd, void *vj, uint64_t imm, uint32_t desc)
-{
- int i;
- VReg temp;
- VReg *Vd = (VReg *)vd;
- VReg *Vj = (VReg *)vj;
-
- for (i = 0; i < 2; i++, imm >>= 4) {
- temp.Q(i) = (imm & 2 ? Vd: Vj)->Q(imm & 1);
- }
- *Vd = temp;
-}
-
-#define VEXTRINS(NAME, BIT, E, MASK) \
-void HELPER(NAME)(void *vd, void *vj, uint64_t imm, uint32_t desc) \
-{ \
- int i, ins, extr, max; \
- VReg *Vd = (VReg *)vd; \
- VReg *Vj = (VReg *)vj; \
- int oprsz = simd_oprsz(desc); \
- \
- max = LSX_LEN / BIT; \
- ins = (imm >> 4) & MASK; \
- extr = imm & MASK; \
- for (i = 0; i < oprsz / 16; i++) { \
- Vd->E(ins + i * max) = Vj->E(extr + i * max); \
- } \
-}
-
-VEXTRINS(vextrins_b, 8, B, 0xf)
-VEXTRINS(vextrins_h, 16, H, 0x7)
-VEXTRINS(vextrins_w, 32, W, 0x3)
-VEXTRINS(vextrins_d, 64, D, 0x1)