From: Philippe Mathieu-Daudé Date: Mon, 25 Mar 2024 14:30:33 +0000 (+0100) Subject: accel/tcg: Rename load-extract/store-insert headers using .h.inc suffix X-Git-Url: http://git.maquefel.me/?a=commitdiff_plain;h=e4751d340a49b117b90a411b179b8c892cf43d85;p=qemu.git accel/tcg: Rename load-extract/store-insert headers using .h.inc suffix Since commit 139c1837db ("meson: rename included C source files to .c.inc"), QEMU standard procedure for included C files is to use *.c.inc. Besides, since commit 6a0057aa22 ("docs/devel: make a statement about includes") this is documented in the Coding Style: If you do use template header files they should be named with the ``.c.inc`` or ``.h.inc`` suffix to make it clear they are being included for expansion. Therefore rename 'store-insert-al16.h' as 'store-insert-al16.h.inc' and 'load-extract-al16-al8.h' as 'load-extract-al16-al8.h.inc'. Signed-off-by: Philippe Mathieu-Daudé Acked-by: Richard Henderson Message-Id: <20240424173333.96148-3-philmd@linaro.org> --- diff --git a/accel/tcg/ldst_atomicity.c.inc b/accel/tcg/ldst_atomicity.c.inc index 97dae70d53..134da3c1da 100644 --- a/accel/tcg/ldst_atomicity.c.inc +++ b/accel/tcg/ldst_atomicity.c.inc @@ -9,8 +9,8 @@ * See the COPYING file in the top-level directory. */ -#include "host/load-extract-al16-al8.h" -#include "host/store-insert-al16.h" +#include "host/load-extract-al16-al8.h.inc" +#include "host/store-insert-al16.h.inc" #ifdef CONFIG_ATOMIC64 # define HAVE_al8 true diff --git a/host/include/aarch64/host/load-extract-al16-al8.h b/host/include/aarch64/host/load-extract-al16-al8.h deleted file mode 100644 index bd677c5e26..0000000000 --- a/host/include/aarch64/host/load-extract-al16-al8.h +++ /dev/null @@ -1,40 +0,0 @@ -/* - * SPDX-License-Identifier: GPL-2.0-or-later - * Atomic extract 64 from 128-bit, AArch64 version. - * - * Copyright (C) 2023 Linaro, Ltd. - */ - -#ifndef AARCH64_LOAD_EXTRACT_AL16_AL8_H -#define AARCH64_LOAD_EXTRACT_AL16_AL8_H - -#include "host/cpuinfo.h" -#include "tcg/debug-assert.h" - -/** - * load_atom_extract_al16_or_al8: - * @pv: host address - * @s: object size in bytes, @s <= 8. - * - * Load @s bytes from @pv, when pv % s != 0. If [p, p+s-1] does not - * cross an 16-byte boundary then the access must be 16-byte atomic, - * otherwise the access must be 8-byte atomic. - */ -static inline uint64_t load_atom_extract_al16_or_al8(void *pv, int s) -{ - uintptr_t pi = (uintptr_t)pv; - __int128_t *ptr_align = (__int128_t *)(pi & ~7); - int shr = (pi & 7) * 8; - uint64_t l, h; - - /* - * With FEAT_LSE2, LDP is single-copy atomic if 16-byte aligned - * and single-copy atomic on the parts if 8-byte aligned. - * All we need do is align the pointer mod 8. - */ - tcg_debug_assert(HAVE_ATOMIC128_RO); - asm("ldp %0, %1, %2" : "=r"(l), "=r"(h) : "m"(*ptr_align)); - return (l >> shr) | (h << (-shr & 63)); -} - -#endif /* AARCH64_LOAD_EXTRACT_AL16_AL8_H */ diff --git a/host/include/aarch64/host/load-extract-al16-al8.h.inc b/host/include/aarch64/host/load-extract-al16-al8.h.inc new file mode 100644 index 0000000000..bd677c5e26 --- /dev/null +++ b/host/include/aarch64/host/load-extract-al16-al8.h.inc @@ -0,0 +1,40 @@ +/* + * SPDX-License-Identifier: GPL-2.0-or-later + * Atomic extract 64 from 128-bit, AArch64 version. + * + * Copyright (C) 2023 Linaro, Ltd. + */ + +#ifndef AARCH64_LOAD_EXTRACT_AL16_AL8_H +#define AARCH64_LOAD_EXTRACT_AL16_AL8_H + +#include "host/cpuinfo.h" +#include "tcg/debug-assert.h" + +/** + * load_atom_extract_al16_or_al8: + * @pv: host address + * @s: object size in bytes, @s <= 8. + * + * Load @s bytes from @pv, when pv % s != 0. If [p, p+s-1] does not + * cross an 16-byte boundary then the access must be 16-byte atomic, + * otherwise the access must be 8-byte atomic. + */ +static inline uint64_t load_atom_extract_al16_or_al8(void *pv, int s) +{ + uintptr_t pi = (uintptr_t)pv; + __int128_t *ptr_align = (__int128_t *)(pi & ~7); + int shr = (pi & 7) * 8; + uint64_t l, h; + + /* + * With FEAT_LSE2, LDP is single-copy atomic if 16-byte aligned + * and single-copy atomic on the parts if 8-byte aligned. + * All we need do is align the pointer mod 8. + */ + tcg_debug_assert(HAVE_ATOMIC128_RO); + asm("ldp %0, %1, %2" : "=r"(l), "=r"(h) : "m"(*ptr_align)); + return (l >> shr) | (h << (-shr & 63)); +} + +#endif /* AARCH64_LOAD_EXTRACT_AL16_AL8_H */ diff --git a/host/include/aarch64/host/store-insert-al16.h b/host/include/aarch64/host/store-insert-al16.h deleted file mode 100644 index 1943155bc6..0000000000 --- a/host/include/aarch64/host/store-insert-al16.h +++ /dev/null @@ -1,47 +0,0 @@ -/* - * SPDX-License-Identifier: GPL-2.0-or-later - * Atomic store insert into 128-bit, AArch64 version. - * - * Copyright (C) 2023 Linaro, Ltd. - */ - -#ifndef AARCH64_STORE_INSERT_AL16_H -#define AARCH64_STORE_INSERT_AL16_H - -/** - * store_atom_insert_al16: - * @p: host address - * @val: shifted value to store - * @msk: mask for value to store - * - * Atomically store @val to @p masked by @msk. - */ -static inline void ATTRIBUTE_ATOMIC128_OPT -store_atom_insert_al16(Int128 *ps, Int128 val, Int128 msk) -{ - /* - * GCC only implements __sync* primitives for int128 on aarch64. - * We can do better without the barriers, and integrating the - * arithmetic into the load-exclusive/store-conditional pair. - */ - uint64_t tl, th, vl, vh, ml, mh; - uint32_t fail; - - qemu_build_assert(!HOST_BIG_ENDIAN); - vl = int128_getlo(val); - vh = int128_gethi(val); - ml = int128_getlo(msk); - mh = int128_gethi(msk); - - asm("0: ldxp %[l], %[h], %[mem]\n\t" - "bic %[l], %[l], %[ml]\n\t" - "bic %[h], %[h], %[mh]\n\t" - "orr %[l], %[l], %[vl]\n\t" - "orr %[h], %[h], %[vh]\n\t" - "stxp %w[f], %[l], %[h], %[mem]\n\t" - "cbnz %w[f], 0b\n" - : [mem] "+Q"(*ps), [f] "=&r"(fail), [l] "=&r"(tl), [h] "=&r"(th) - : [vl] "r"(vl), [vh] "r"(vh), [ml] "r"(ml), [mh] "r"(mh)); -} - -#endif /* AARCH64_STORE_INSERT_AL16_H */ diff --git a/host/include/aarch64/host/store-insert-al16.h.inc b/host/include/aarch64/host/store-insert-al16.h.inc new file mode 100644 index 0000000000..1943155bc6 --- /dev/null +++ b/host/include/aarch64/host/store-insert-al16.h.inc @@ -0,0 +1,47 @@ +/* + * SPDX-License-Identifier: GPL-2.0-or-later + * Atomic store insert into 128-bit, AArch64 version. + * + * Copyright (C) 2023 Linaro, Ltd. + */ + +#ifndef AARCH64_STORE_INSERT_AL16_H +#define AARCH64_STORE_INSERT_AL16_H + +/** + * store_atom_insert_al16: + * @p: host address + * @val: shifted value to store + * @msk: mask for value to store + * + * Atomically store @val to @p masked by @msk. + */ +static inline void ATTRIBUTE_ATOMIC128_OPT +store_atom_insert_al16(Int128 *ps, Int128 val, Int128 msk) +{ + /* + * GCC only implements __sync* primitives for int128 on aarch64. + * We can do better without the barriers, and integrating the + * arithmetic into the load-exclusive/store-conditional pair. + */ + uint64_t tl, th, vl, vh, ml, mh; + uint32_t fail; + + qemu_build_assert(!HOST_BIG_ENDIAN); + vl = int128_getlo(val); + vh = int128_gethi(val); + ml = int128_getlo(msk); + mh = int128_gethi(msk); + + asm("0: ldxp %[l], %[h], %[mem]\n\t" + "bic %[l], %[l], %[ml]\n\t" + "bic %[h], %[h], %[mh]\n\t" + "orr %[l], %[l], %[vl]\n\t" + "orr %[h], %[h], %[vh]\n\t" + "stxp %w[f], %[l], %[h], %[mem]\n\t" + "cbnz %w[f], 0b\n" + : [mem] "+Q"(*ps), [f] "=&r"(fail), [l] "=&r"(tl), [h] "=&r"(th) + : [vl] "r"(vl), [vh] "r"(vh), [ml] "r"(ml), [mh] "r"(mh)); +} + +#endif /* AARCH64_STORE_INSERT_AL16_H */ diff --git a/host/include/generic/host/load-extract-al16-al8.h b/host/include/generic/host/load-extract-al16-al8.h deleted file mode 100644 index d95556130f..0000000000 --- a/host/include/generic/host/load-extract-al16-al8.h +++ /dev/null @@ -1,45 +0,0 @@ -/* - * SPDX-License-Identifier: GPL-2.0-or-later - * Atomic extract 64 from 128-bit, generic version. - * - * Copyright (C) 2023 Linaro, Ltd. - */ - -#ifndef HOST_LOAD_EXTRACT_AL16_AL8_H -#define HOST_LOAD_EXTRACT_AL16_AL8_H - -/** - * load_atom_extract_al16_or_al8: - * @pv: host address - * @s: object size in bytes, @s <= 8. - * - * Load @s bytes from @pv, when pv % s != 0. If [p, p+s-1] does not - * cross an 16-byte boundary then the access must be 16-byte atomic, - * otherwise the access must be 8-byte atomic. - */ -static inline uint64_t ATTRIBUTE_ATOMIC128_OPT -load_atom_extract_al16_or_al8(void *pv, int s) -{ - uintptr_t pi = (uintptr_t)pv; - int o = pi & 7; - int shr = (HOST_BIG_ENDIAN ? 16 - s - o : o) * 8; - Int128 r; - - pv = (void *)(pi & ~7); - if (pi & 8) { - uint64_t *p8 = __builtin_assume_aligned(pv, 16, 8); - uint64_t a = qatomic_read__nocheck(p8); - uint64_t b = qatomic_read__nocheck(p8 + 1); - - if (HOST_BIG_ENDIAN) { - r = int128_make128(b, a); - } else { - r = int128_make128(a, b); - } - } else { - r = atomic16_read_ro(pv); - } - return int128_getlo(int128_urshift(r, shr)); -} - -#endif /* HOST_LOAD_EXTRACT_AL16_AL8_H */ diff --git a/host/include/generic/host/load-extract-al16-al8.h.inc b/host/include/generic/host/load-extract-al16-al8.h.inc new file mode 100644 index 0000000000..d95556130f --- /dev/null +++ b/host/include/generic/host/load-extract-al16-al8.h.inc @@ -0,0 +1,45 @@ +/* + * SPDX-License-Identifier: GPL-2.0-or-later + * Atomic extract 64 from 128-bit, generic version. + * + * Copyright (C) 2023 Linaro, Ltd. + */ + +#ifndef HOST_LOAD_EXTRACT_AL16_AL8_H +#define HOST_LOAD_EXTRACT_AL16_AL8_H + +/** + * load_atom_extract_al16_or_al8: + * @pv: host address + * @s: object size in bytes, @s <= 8. + * + * Load @s bytes from @pv, when pv % s != 0. If [p, p+s-1] does not + * cross an 16-byte boundary then the access must be 16-byte atomic, + * otherwise the access must be 8-byte atomic. + */ +static inline uint64_t ATTRIBUTE_ATOMIC128_OPT +load_atom_extract_al16_or_al8(void *pv, int s) +{ + uintptr_t pi = (uintptr_t)pv; + int o = pi & 7; + int shr = (HOST_BIG_ENDIAN ? 16 - s - o : o) * 8; + Int128 r; + + pv = (void *)(pi & ~7); + if (pi & 8) { + uint64_t *p8 = __builtin_assume_aligned(pv, 16, 8); + uint64_t a = qatomic_read__nocheck(p8); + uint64_t b = qatomic_read__nocheck(p8 + 1); + + if (HOST_BIG_ENDIAN) { + r = int128_make128(b, a); + } else { + r = int128_make128(a, b); + } + } else { + r = atomic16_read_ro(pv); + } + return int128_getlo(int128_urshift(r, shr)); +} + +#endif /* HOST_LOAD_EXTRACT_AL16_AL8_H */ diff --git a/host/include/generic/host/store-insert-al16.h b/host/include/generic/host/store-insert-al16.h deleted file mode 100644 index 4a1662183d..0000000000 --- a/host/include/generic/host/store-insert-al16.h +++ /dev/null @@ -1,50 +0,0 @@ -/* - * SPDX-License-Identifier: GPL-2.0-or-later - * Atomic store insert into 128-bit, generic version. - * - * Copyright (C) 2023 Linaro, Ltd. - */ - -#ifndef HOST_STORE_INSERT_AL16_H -#define HOST_STORE_INSERT_AL16_H - -/** - * store_atom_insert_al16: - * @p: host address - * @val: shifted value to store - * @msk: mask for value to store - * - * Atomically store @val to @p masked by @msk. - */ -static inline void ATTRIBUTE_ATOMIC128_OPT -store_atom_insert_al16(Int128 *ps, Int128 val, Int128 msk) -{ -#if defined(CONFIG_ATOMIC128) - __uint128_t *pu; - Int128Alias old, new; - - /* With CONFIG_ATOMIC128, we can avoid the memory barriers. */ - pu = __builtin_assume_aligned(ps, 16); - old.u = *pu; - msk = int128_not(msk); - do { - new.s = int128_and(old.s, msk); - new.s = int128_or(new.s, val); - } while (!__atomic_compare_exchange_n(pu, &old.u, new.u, true, - __ATOMIC_RELAXED, __ATOMIC_RELAXED)); -#else - Int128 old, new, cmp; - - ps = __builtin_assume_aligned(ps, 16); - old = *ps; - msk = int128_not(msk); - do { - cmp = old; - new = int128_and(old, msk); - new = int128_or(new, val); - old = atomic16_cmpxchg(ps, cmp, new); - } while (int128_ne(cmp, old)); -#endif -} - -#endif /* HOST_STORE_INSERT_AL16_H */ diff --git a/host/include/generic/host/store-insert-al16.h.inc b/host/include/generic/host/store-insert-al16.h.inc new file mode 100644 index 0000000000..4a1662183d --- /dev/null +++ b/host/include/generic/host/store-insert-al16.h.inc @@ -0,0 +1,50 @@ +/* + * SPDX-License-Identifier: GPL-2.0-or-later + * Atomic store insert into 128-bit, generic version. + * + * Copyright (C) 2023 Linaro, Ltd. + */ + +#ifndef HOST_STORE_INSERT_AL16_H +#define HOST_STORE_INSERT_AL16_H + +/** + * store_atom_insert_al16: + * @p: host address + * @val: shifted value to store + * @msk: mask for value to store + * + * Atomically store @val to @p masked by @msk. + */ +static inline void ATTRIBUTE_ATOMIC128_OPT +store_atom_insert_al16(Int128 *ps, Int128 val, Int128 msk) +{ +#if defined(CONFIG_ATOMIC128) + __uint128_t *pu; + Int128Alias old, new; + + /* With CONFIG_ATOMIC128, we can avoid the memory barriers. */ + pu = __builtin_assume_aligned(ps, 16); + old.u = *pu; + msk = int128_not(msk); + do { + new.s = int128_and(old.s, msk); + new.s = int128_or(new.s, val); + } while (!__atomic_compare_exchange_n(pu, &old.u, new.u, true, + __ATOMIC_RELAXED, __ATOMIC_RELAXED)); +#else + Int128 old, new, cmp; + + ps = __builtin_assume_aligned(ps, 16); + old = *ps; + msk = int128_not(msk); + do { + cmp = old; + new = int128_and(old, msk); + new = int128_or(new, val); + old = atomic16_cmpxchg(ps, cmp, new); + } while (int128_ne(cmp, old)); +#endif +} + +#endif /* HOST_STORE_INSERT_AL16_H */ diff --git a/host/include/loongarch64/host/load-extract-al16-al8.h b/host/include/loongarch64/host/load-extract-al16-al8.h deleted file mode 100644 index d1fb59d8af..0000000000 --- a/host/include/loongarch64/host/load-extract-al16-al8.h +++ /dev/null @@ -1,39 +0,0 @@ -/* - * SPDX-License-Identifier: GPL-2.0-or-later - * Atomic extract 64 from 128-bit, LoongArch version. - * - * Copyright (C) 2023 Linaro, Ltd. - */ - -#ifndef LOONGARCH_LOAD_EXTRACT_AL16_AL8_H -#define LOONGARCH_LOAD_EXTRACT_AL16_AL8_H - -#include "host/cpuinfo.h" -#include "tcg/debug-assert.h" - -/** - * load_atom_extract_al16_or_al8: - * @pv: host address - * @s: object size in bytes, @s <= 8. - * - * Load @s bytes from @pv, when pv % s != 0. If [p, p+s-1] does not - * cross an 16-byte boundary then the access must be 16-byte atomic, - * otherwise the access must be 8-byte atomic. - */ -static inline uint64_t load_atom_extract_al16_or_al8(void *pv, int s) -{ - uintptr_t pi = (uintptr_t)pv; - Int128 *ptr_align = (Int128 *)(pi & ~7); - int shr = (pi & 7) * 8; - uint64_t l, h; - - tcg_debug_assert(HAVE_ATOMIC128_RO); - asm("vld $vr0, %2, 0\n\t" - "vpickve2gr.d %0, $vr0, 0\n\t" - "vpickve2gr.d %1, $vr0, 1" - : "=r"(l), "=r"(h) : "r"(ptr_align), "m"(*ptr_align) : "f0"); - - return (l >> shr) | (h << (-shr & 63)); -} - -#endif /* LOONGARCH_LOAD_EXTRACT_AL16_AL8_H */ diff --git a/host/include/loongarch64/host/load-extract-al16-al8.h.inc b/host/include/loongarch64/host/load-extract-al16-al8.h.inc new file mode 100644 index 0000000000..d1fb59d8af --- /dev/null +++ b/host/include/loongarch64/host/load-extract-al16-al8.h.inc @@ -0,0 +1,39 @@ +/* + * SPDX-License-Identifier: GPL-2.0-or-later + * Atomic extract 64 from 128-bit, LoongArch version. + * + * Copyright (C) 2023 Linaro, Ltd. + */ + +#ifndef LOONGARCH_LOAD_EXTRACT_AL16_AL8_H +#define LOONGARCH_LOAD_EXTRACT_AL16_AL8_H + +#include "host/cpuinfo.h" +#include "tcg/debug-assert.h" + +/** + * load_atom_extract_al16_or_al8: + * @pv: host address + * @s: object size in bytes, @s <= 8. + * + * Load @s bytes from @pv, when pv % s != 0. If [p, p+s-1] does not + * cross an 16-byte boundary then the access must be 16-byte atomic, + * otherwise the access must be 8-byte atomic. + */ +static inline uint64_t load_atom_extract_al16_or_al8(void *pv, int s) +{ + uintptr_t pi = (uintptr_t)pv; + Int128 *ptr_align = (Int128 *)(pi & ~7); + int shr = (pi & 7) * 8; + uint64_t l, h; + + tcg_debug_assert(HAVE_ATOMIC128_RO); + asm("vld $vr0, %2, 0\n\t" + "vpickve2gr.d %0, $vr0, 0\n\t" + "vpickve2gr.d %1, $vr0, 1" + : "=r"(l), "=r"(h) : "r"(ptr_align), "m"(*ptr_align) : "f0"); + + return (l >> shr) | (h << (-shr & 63)); +} + +#endif /* LOONGARCH_LOAD_EXTRACT_AL16_AL8_H */ diff --git a/host/include/loongarch64/host/store-insert-al16.h b/host/include/loongarch64/host/store-insert-al16.h deleted file mode 100644 index 919fd8d744..0000000000 --- a/host/include/loongarch64/host/store-insert-al16.h +++ /dev/null @@ -1,12 +0,0 @@ -/* - * SPDX-License-Identifier: GPL-2.0-or-later - * Atomic store insert into 128-bit, LoongArch version. - */ - -#ifndef LOONGARCH_STORE_INSERT_AL16_H -#define LOONGARCH_STORE_INSERT_AL16_H - -void store_atom_insert_al16(Int128 *ps, Int128 val, Int128 msk) - QEMU_ERROR("unsupported atomic"); - -#endif /* LOONGARCH_STORE_INSERT_AL16_H */ diff --git a/host/include/loongarch64/host/store-insert-al16.h.inc b/host/include/loongarch64/host/store-insert-al16.h.inc new file mode 100644 index 0000000000..919fd8d744 --- /dev/null +++ b/host/include/loongarch64/host/store-insert-al16.h.inc @@ -0,0 +1,12 @@ +/* + * SPDX-License-Identifier: GPL-2.0-or-later + * Atomic store insert into 128-bit, LoongArch version. + */ + +#ifndef LOONGARCH_STORE_INSERT_AL16_H +#define LOONGARCH_STORE_INSERT_AL16_H + +void store_atom_insert_al16(Int128 *ps, Int128 val, Int128 msk) + QEMU_ERROR("unsupported atomic"); + +#endif /* LOONGARCH_STORE_INSERT_AL16_H */ diff --git a/host/include/x86_64/host/load-extract-al16-al8.h b/host/include/x86_64/host/load-extract-al16-al8.h deleted file mode 100644 index baa506b7b5..0000000000 --- a/host/include/x86_64/host/load-extract-al16-al8.h +++ /dev/null @@ -1,50 +0,0 @@ -/* - * SPDX-License-Identifier: GPL-2.0-or-later - * Atomic extract 64 from 128-bit, x86_64 version. - * - * Copyright (C) 2023 Linaro, Ltd. - */ - -#ifndef X86_64_LOAD_EXTRACT_AL16_AL8_H -#define X86_64_LOAD_EXTRACT_AL16_AL8_H - -#ifdef CONFIG_INT128_TYPE -#include "host/atomic128-ldst.h" - -/** - * load_atom_extract_al16_or_al8: - * @pv: host address - * @s: object size in bytes, @s <= 8. - * - * Load @s bytes from @pv, when pv % s != 0. If [p, p+s-1] does not - * cross an 16-byte boundary then the access must be 16-byte atomic, - * otherwise the access must be 8-byte atomic. - */ -static inline uint64_t ATTRIBUTE_ATOMIC128_OPT -load_atom_extract_al16_or_al8(void *pv, int s) -{ - uintptr_t pi = (uintptr_t)pv; - __int128_t *ptr_align = (__int128_t *)(pi & ~7); - int shr = (pi & 7) * 8; - X86Int128Union r; - - /* - * ptr_align % 16 is now only 0 or 8. - * If the host supports atomic loads with VMOVDQU, then always use that, - * making the branch highly predictable. Otherwise we must use VMOVDQA - * when ptr_align % 16 == 0 for 16-byte atomicity. - */ - if ((cpuinfo & CPUINFO_ATOMIC_VMOVDQU) || (pi & 8)) { - asm("vmovdqu %1, %0" : "=x" (r.v) : "m" (*ptr_align)); - } else { - asm("vmovdqa %1, %0" : "=x" (r.v) : "m" (*ptr_align)); - } - return int128_getlo(int128_urshift(r.s, shr)); -} -#else -/* Fallback definition that must be optimized away, or error. */ -uint64_t QEMU_ERROR("unsupported atomic") - load_atom_extract_al16_or_al8(void *pv, int s); -#endif - -#endif /* X86_64_LOAD_EXTRACT_AL16_AL8_H */ diff --git a/host/include/x86_64/host/load-extract-al16-al8.h.inc b/host/include/x86_64/host/load-extract-al16-al8.h.inc new file mode 100644 index 0000000000..baa506b7b5 --- /dev/null +++ b/host/include/x86_64/host/load-extract-al16-al8.h.inc @@ -0,0 +1,50 @@ +/* + * SPDX-License-Identifier: GPL-2.0-or-later + * Atomic extract 64 from 128-bit, x86_64 version. + * + * Copyright (C) 2023 Linaro, Ltd. + */ + +#ifndef X86_64_LOAD_EXTRACT_AL16_AL8_H +#define X86_64_LOAD_EXTRACT_AL16_AL8_H + +#ifdef CONFIG_INT128_TYPE +#include "host/atomic128-ldst.h" + +/** + * load_atom_extract_al16_or_al8: + * @pv: host address + * @s: object size in bytes, @s <= 8. + * + * Load @s bytes from @pv, when pv % s != 0. If [p, p+s-1] does not + * cross an 16-byte boundary then the access must be 16-byte atomic, + * otherwise the access must be 8-byte atomic. + */ +static inline uint64_t ATTRIBUTE_ATOMIC128_OPT +load_atom_extract_al16_or_al8(void *pv, int s) +{ + uintptr_t pi = (uintptr_t)pv; + __int128_t *ptr_align = (__int128_t *)(pi & ~7); + int shr = (pi & 7) * 8; + X86Int128Union r; + + /* + * ptr_align % 16 is now only 0 or 8. + * If the host supports atomic loads with VMOVDQU, then always use that, + * making the branch highly predictable. Otherwise we must use VMOVDQA + * when ptr_align % 16 == 0 for 16-byte atomicity. + */ + if ((cpuinfo & CPUINFO_ATOMIC_VMOVDQU) || (pi & 8)) { + asm("vmovdqu %1, %0" : "=x" (r.v) : "m" (*ptr_align)); + } else { + asm("vmovdqa %1, %0" : "=x" (r.v) : "m" (*ptr_align)); + } + return int128_getlo(int128_urshift(r.s, shr)); +} +#else +/* Fallback definition that must be optimized away, or error. */ +uint64_t QEMU_ERROR("unsupported atomic") + load_atom_extract_al16_or_al8(void *pv, int s); +#endif + +#endif /* X86_64_LOAD_EXTRACT_AL16_AL8_H */