--- /dev/null
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Selection between LSE and LL/SC atomics.
+ *
+ * Copyright (C) 2018 ARM Ltd.
+ * Author: Andrew Murray <andrew.murray@arm.com>
+ */
+
+#ifndef __ASM_ATOMIC_ARCH_H
+#define __ASM_ATOMIC_ARCH_H
+
+
+#include <linux/jump_label.h>
+
+#include <asm/cpucaps.h>
+#include <asm/atomic_ll_sc.h>
+#include <asm/atomic_lse.h>
+
+extern struct static_key_false cpu_hwcap_keys[ARM64_NCAPS];
+extern struct static_key_false arm64_const_caps_ready;
+
+static inline bool system_uses_lse_atomics(void)
+{
+       return (IS_ENABLED(CONFIG_ARM64_LSE_ATOMICS) &&
+               IS_ENABLED(CONFIG_AS_LSE) &&
+               static_branch_likely(&arm64_const_caps_ready)) &&
+               static_branch_likely(&cpu_hwcap_keys[ARM64_HAS_LSE_ATOMICS]);
+}
+
+#define __lse_ll_sc_body(op, ...)                                      \
+({                                                                     \
+       system_uses_lse_atomics() ?                                     \
+               __lse_##op(__VA_ARGS__) :                               \
+               __ll_sc_##op(__VA_ARGS__);                              \
+})
+
+#define ATOMIC_OP(op)                                                  \
+static inline void arch_##op(int i, atomic_t *v)                       \
+{                                                                      \
+       __lse_ll_sc_body(op, i, v);                                     \
+}
+
+ATOMIC_OP(atomic_andnot)
+ATOMIC_OP(atomic_or)
+ATOMIC_OP(atomic_xor)
+ATOMIC_OP(atomic_add)
+ATOMIC_OP(atomic_and)
+ATOMIC_OP(atomic_sub)
+
+
+#define ATOMIC_FETCH_OP(name, op)                                      \
+static inline int arch_##op##name(int i, atomic_t *v)                  \
+{                                                                      \
+       return __lse_ll_sc_body(op##name, i, v);                        \
+}
+
+#define ATOMIC_FETCH_OPS(op)                                           \
+       ATOMIC_FETCH_OP(_relaxed, op)                                   \
+       ATOMIC_FETCH_OP(_acquire, op)                                   \
+       ATOMIC_FETCH_OP(_release, op)                                   \
+       ATOMIC_FETCH_OP(        , op)
+
+ATOMIC_FETCH_OPS(atomic_fetch_andnot)
+ATOMIC_FETCH_OPS(atomic_fetch_or)
+ATOMIC_FETCH_OPS(atomic_fetch_xor)
+ATOMIC_FETCH_OPS(atomic_fetch_add)
+ATOMIC_FETCH_OPS(atomic_fetch_and)
+ATOMIC_FETCH_OPS(atomic_fetch_sub)
+ATOMIC_FETCH_OPS(atomic_add_return)
+ATOMIC_FETCH_OPS(atomic_sub_return)
+
+
+#define ATOMIC64_OP(op)                                                        \
+static inline void arch_##op(long i, atomic64_t *v)                    \
+{                                                                      \
+       __lse_ll_sc_body(op, i, v);                                     \
+}
+
+ATOMIC64_OP(atomic64_andnot)
+ATOMIC64_OP(atomic64_or)
+ATOMIC64_OP(atomic64_xor)
+ATOMIC64_OP(atomic64_add)
+ATOMIC64_OP(atomic64_and)
+ATOMIC64_OP(atomic64_sub)
+
+
+#define ATOMIC64_FETCH_OP(name, op)                                    \
+static inline long arch_##op##name(long i, atomic64_t *v)              \
+{                                                                      \
+       return __lse_ll_sc_body(op##name, i, v);                        \
+}
+
+#define ATOMIC64_FETCH_OPS(op)                                         \
+       ATOMIC64_FETCH_OP(_relaxed, op)                                 \
+       ATOMIC64_FETCH_OP(_acquire, op)                                 \
+       ATOMIC64_FETCH_OP(_release, op)                                 \
+       ATOMIC64_FETCH_OP(        , op)
+
+ATOMIC64_FETCH_OPS(atomic64_fetch_andnot)
+ATOMIC64_FETCH_OPS(atomic64_fetch_or)
+ATOMIC64_FETCH_OPS(atomic64_fetch_xor)
+ATOMIC64_FETCH_OPS(atomic64_fetch_add)
+ATOMIC64_FETCH_OPS(atomic64_fetch_and)
+ATOMIC64_FETCH_OPS(atomic64_fetch_sub)
+ATOMIC64_FETCH_OPS(atomic64_add_return)
+ATOMIC64_FETCH_OPS(atomic64_sub_return)
+
+
+static inline long arch_atomic64_dec_if_positive(atomic64_t *v)
+{
+       return __lse_ll_sc_body(atomic64_dec_if_positive, v);
+}
+
+#define __CMPXCHG_CASE(name, sz)                       \
+static inline u##sz __cmpxchg_case_##name##sz(volatile void *ptr,      \
+                                             u##sz old,                \
+                                             u##sz new)                \
+{                                                                      \
+       return __lse_ll_sc_body(_cmpxchg_case_##name##sz,               \
+                               ptr, old, new);                         \
+}
+
+__CMPXCHG_CASE(    ,  8)
+__CMPXCHG_CASE(    , 16)
+__CMPXCHG_CASE(    , 32)
+__CMPXCHG_CASE(    , 64)
+__CMPXCHG_CASE(acq_,  8)
+__CMPXCHG_CASE(acq_, 16)
+__CMPXCHG_CASE(acq_, 32)
+__CMPXCHG_CASE(acq_, 64)
+__CMPXCHG_CASE(rel_,  8)
+__CMPXCHG_CASE(rel_, 16)
+__CMPXCHG_CASE(rel_, 32)
+__CMPXCHG_CASE(rel_, 64)
+__CMPXCHG_CASE(mb_,  8)
+__CMPXCHG_CASE(mb_, 16)
+__CMPXCHG_CASE(mb_, 32)
+__CMPXCHG_CASE(mb_, 64)
+
+
+#define __CMPXCHG_DBL(name)                                            \
+static inline long __cmpxchg_double##name(unsigned long old1,          \
+                                        unsigned long old2,            \
+                                        unsigned long new1,            \
+                                        unsigned long new2,            \
+                                        volatile void *ptr)            \
+{                                                                      \
+       return __lse_ll_sc_body(_cmpxchg_double##name,                  \
+                               old1, old2, new1, new2, ptr);           \
+}
+
+__CMPXCHG_DBL(   )
+__CMPXCHG_DBL(_mb)
+
+#endif /* __ASM_ATOMIC_LSE_H */
 
 #ifndef __ASM_ATOMIC_LL_SC_H
 #define __ASM_ATOMIC_LL_SC_H
 
-#ifndef __ARM64_IN_ATOMIC_IMPL
-#error "please don't include this file directly"
+#if IS_ENABLED(CONFIG_ARM64_LSE_ATOMICS) && IS_ENABLED(CONFIG_AS_LSE)
+#define __LL_SC_FALLBACK(asm_ops)                                      \
+"      b       3f\n"                                                   \
+"      .subsection     1\n"                                            \
+"3:\n"                                                                 \
+asm_ops "\n"                                                           \
+"      b       4f\n"                                                   \
+"      .previous\n"                                                    \
+"4:\n"
+#else
+#define __LL_SC_FALLBACK(asm_ops) asm_ops
 #endif
 
 /*
  * AArch64 UP and SMP safe atomic ops.  We use load exclusive and
  * store exclusive to ensure that these are atomic.  We may loop
  * to ensure that the update happens.
- *
- * NOTE: these functions do *not* follow the PCS and must explicitly
- * save any clobbered registers other than x0 (regardless of return
- * value).  This is achieved through -fcall-saved-* compiler flags for
- * this file, which unfortunately don't work on a per-function basis
- * (the optimize attribute silently ignores these options).
  */
 
 #define ATOMIC_OP(op, asm_op, constraint)                              \
-__LL_SC_INLINE void                                                    \
-__LL_SC_PREFIX(arch_atomic_##op(int i, atomic_t *v))                   \
+static inline void                                                     \
+__ll_sc_atomic_##op(int i, atomic_t *v)                                        \
 {                                                                      \
        unsigned long tmp;                                              \
        int result;                                                     \
                                                                        \
        asm volatile("// atomic_" #op "\n"                              \
+       __LL_SC_FALLBACK(                                               \
 "      prfm    pstl1strm, %2\n"                                        \
 "1:    ldxr    %w0, %2\n"                                              \
 "      " #asm_op "     %w0, %w0, %w3\n"                                \
 "      stxr    %w1, %w0, %2\n"                                         \
-"      cbnz    %w1, 1b"                                                \
+"      cbnz    %w1, 1b\n")                                             \
        : "=&r" (result), "=&r" (tmp), "+Q" (v->counter)                \
        : #constraint "r" (i));                                         \
-}                                                                      \
-__LL_SC_EXPORT(arch_atomic_##op);
+}
 
 #define ATOMIC_OP_RETURN(name, mb, acq, rel, cl, op, asm_op, constraint)\
-__LL_SC_INLINE int                                                     \
-__LL_SC_PREFIX(arch_atomic_##op##_return##name(int i, atomic_t *v))    \
+static inline int                                                      \
+__ll_sc_atomic_##op##_return##name(int i, atomic_t *v)                 \
 {                                                                      \
        unsigned long tmp;                                              \
        int result;                                                     \
                                                                        \
        asm volatile("// atomic_" #op "_return" #name "\n"              \
+       __LL_SC_FALLBACK(                                               \
 "      prfm    pstl1strm, %2\n"                                        \
 "1:    ld" #acq "xr    %w0, %2\n"                                      \
 "      " #asm_op "     %w0, %w0, %w3\n"                                \
 "      st" #rel "xr    %w1, %w0, %2\n"                                 \
 "      cbnz    %w1, 1b\n"                                              \
-"      " #mb                                                           \
+"      " #mb )                                                         \
        : "=&r" (result), "=&r" (tmp), "+Q" (v->counter)                \
        : #constraint "r" (i)                                           \
        : cl);                                                          \
                                                                        \
        return result;                                                  \
-}                                                                      \
-__LL_SC_EXPORT(arch_atomic_##op##_return##name);
+}
 
-#define ATOMIC_FETCH_OP(name, mb, acq, rel, cl, op, asm_op, constraint)        \
-__LL_SC_INLINE int                                                     \
-__LL_SC_PREFIX(arch_atomic_fetch_##op##name(int i, atomic_t *v))       \
+#define ATOMIC_FETCH_OP(name, mb, acq, rel, cl, op, asm_op, constraint) \
+static inline int                                                      \
+__ll_sc_atomic_fetch_##op##name(int i, atomic_t *v)                    \
 {                                                                      \
        unsigned long tmp;                                              \
        int val, result;                                                \
                                                                        \
        asm volatile("// atomic_fetch_" #op #name "\n"                  \
+       __LL_SC_FALLBACK(                                               \
 "      prfm    pstl1strm, %3\n"                                        \
 "1:    ld" #acq "xr    %w0, %3\n"                                      \
 "      " #asm_op "     %w1, %w0, %w4\n"                                \
 "      st" #rel "xr    %w2, %w1, %3\n"                                 \
 "      cbnz    %w2, 1b\n"                                              \
-"      " #mb                                                           \
+"      " #mb )                                                         \
        : "=&r" (result), "=&r" (val), "=&r" (tmp), "+Q" (v->counter)   \
        : #constraint "r" (i)                                           \
        : cl);                                                          \
                                                                        \
        return result;                                                  \
-}                                                                      \
-__LL_SC_EXPORT(arch_atomic_fetch_##op##name);
+}
 
 #define ATOMIC_OPS(...)                                                        \
        ATOMIC_OP(__VA_ARGS__)                                          \
 #undef ATOMIC_OP
 
 #define ATOMIC64_OP(op, asm_op, constraint)                            \
-__LL_SC_INLINE void                                                    \
-__LL_SC_PREFIX(arch_atomic64_##op(s64 i, atomic64_t *v))               \
+static inline void                                                     \
+__ll_sc_atomic64_##op(s64 i, atomic64_t *v)                            \
 {                                                                      \
        s64 result;                                                     \
        unsigned long tmp;                                              \
                                                                        \
        asm volatile("// atomic64_" #op "\n"                            \
+       __LL_SC_FALLBACK(                                               \
 "      prfm    pstl1strm, %2\n"                                        \
 "1:    ldxr    %0, %2\n"                                               \
 "      " #asm_op "     %0, %0, %3\n"                                   \
 "      stxr    %w1, %0, %2\n"                                          \
-"      cbnz    %w1, 1b"                                                \
+"      cbnz    %w1, 1b")                                               \
        : "=&r" (result), "=&r" (tmp), "+Q" (v->counter)                \
        : #constraint "r" (i));                                         \
-}                                                                      \
-__LL_SC_EXPORT(arch_atomic64_##op);
+}
 
 #define ATOMIC64_OP_RETURN(name, mb, acq, rel, cl, op, asm_op, constraint)\
-__LL_SC_INLINE s64                                                     \
-__LL_SC_PREFIX(arch_atomic64_##op##_return##name(s64 i, atomic64_t *v))\
+static inline long                                                     \
+__ll_sc_atomic64_##op##_return##name(s64 i, atomic64_t *v)             \
 {                                                                      \
        s64 result;                                                     \
        unsigned long tmp;                                              \
                                                                        \
        asm volatile("// atomic64_" #op "_return" #name "\n"            \
+       __LL_SC_FALLBACK(                                               \
 "      prfm    pstl1strm, %2\n"                                        \
 "1:    ld" #acq "xr    %0, %2\n"                                       \
 "      " #asm_op "     %0, %0, %3\n"                                   \
 "      st" #rel "xr    %w1, %0, %2\n"                                  \
 "      cbnz    %w1, 1b\n"                                              \
-"      " #mb                                                           \
+"      " #mb )                                                         \
        : "=&r" (result), "=&r" (tmp), "+Q" (v->counter)                \
        : #constraint "r" (i)                                           \
        : cl);                                                          \
                                                                        \
        return result;                                                  \
-}                                                                      \
-__LL_SC_EXPORT(arch_atomic64_##op##_return##name);
+}
 
 #define ATOMIC64_FETCH_OP(name, mb, acq, rel, cl, op, asm_op, constraint)\
-__LL_SC_INLINE s64                                                     \
-__LL_SC_PREFIX(arch_atomic64_fetch_##op##name(s64 i, atomic64_t *v))   \
+static inline long                                                     \
+__ll_sc_atomic64_fetch_##op##name(s64 i, atomic64_t *v)                \
 {                                                                      \
        s64 result, val;                                                \
        unsigned long tmp;                                              \
                                                                        \
        asm volatile("// atomic64_fetch_" #op #name "\n"                \
+       __LL_SC_FALLBACK(                                               \
 "      prfm    pstl1strm, %3\n"                                        \
 "1:    ld" #acq "xr    %0, %3\n"                                       \
 "      " #asm_op "     %1, %0, %4\n"                                   \
 "      st" #rel "xr    %w2, %1, %3\n"                                  \
 "      cbnz    %w2, 1b\n"                                              \
-"      " #mb                                                           \
+"      " #mb )                                                         \
        : "=&r" (result), "=&r" (val), "=&r" (tmp), "+Q" (v->counter)   \
        : #constraint "r" (i)                                           \
        : cl);                                                          \
                                                                        \
        return result;                                                  \
-}                                                                      \
-__LL_SC_EXPORT(arch_atomic64_fetch_##op##name);
+}
 
 #define ATOMIC64_OPS(...)                                              \
        ATOMIC64_OP(__VA_ARGS__)                                        \
 #undef ATOMIC64_OP_RETURN
 #undef ATOMIC64_OP
 
-__LL_SC_INLINE s64
-__LL_SC_PREFIX(arch_atomic64_dec_if_positive(atomic64_t *v))
+static inline s64
+__ll_sc_atomic64_dec_if_positive(atomic64_t *v)
 {
        s64 result;
        unsigned long tmp;
 
        asm volatile("// atomic64_dec_if_positive\n"
+       __LL_SC_FALLBACK(
 "      prfm    pstl1strm, %2\n"
 "1:    ldxr    %0, %2\n"
 "      subs    %0, %0, #1\n"
 "      stlxr   %w1, %0, %2\n"
 "      cbnz    %w1, 1b\n"
 "      dmb     ish\n"
-"2:"
+"2:")
        : "=&r" (result), "=&r" (tmp), "+Q" (v->counter)
        :
        : "cc", "memory");
 
        return result;
 }
-__LL_SC_EXPORT(arch_atomic64_dec_if_positive);
 
 #define __CMPXCHG_CASE(w, sfx, name, sz, mb, acq, rel, cl, constraint) \
-__LL_SC_INLINE u##sz                                                   \
-__LL_SC_PREFIX(__cmpxchg_case_##name##sz(volatile void *ptr,           \
+static inline u##sz                                                    \
+__ll_sc__cmpxchg_case_##name##sz(volatile void *ptr,                   \
                                         unsigned long old,             \
-                                        u##sz new))                    \
+                                        u##sz new)                     \
 {                                                                      \
        unsigned long tmp;                                              \
        u##sz oldval;                                                   \
                old = (u##sz)old;                                       \
                                                                        \
        asm volatile(                                                   \
+       __LL_SC_FALLBACK(                                               \
        "       prfm    pstl1strm, %[v]\n"                              \
        "1:     ld" #acq "xr" #sfx "\t%" #w "[oldval], %[v]\n"          \
        "       eor     %" #w "[tmp], %" #w "[oldval], %" #w "[old]\n"  \
        "       st" #rel "xr" #sfx "\t%w[tmp], %" #w "[new], %[v]\n"    \
        "       cbnz    %w[tmp], 1b\n"                                  \
        "       " #mb "\n"                                              \
-       "2:"                                                            \
+       "2:")                                                           \
        : [tmp] "=&r" (tmp), [oldval] "=&r" (oldval),                   \
          [v] "+Q" (*(u##sz *)ptr)                                      \
        : [old] #constraint "r" (old), [new] "r" (new)                  \
        : cl);                                                          \
                                                                        \
        return oldval;                                                  \
-}                                                                      \
-__LL_SC_EXPORT(__cmpxchg_case_##name##sz);
+}
 
 /*
  * Earlier versions of GCC (no later than 8.1.0) appear to incorrectly
 #undef __CMPXCHG_CASE
 
 #define __CMPXCHG_DBL(name, mb, rel, cl)                               \
-__LL_SC_INLINE long                                                    \
-__LL_SC_PREFIX(__cmpxchg_double##name(unsigned long old1,              \
+static inline long                                                     \
+__ll_sc__cmpxchg_double##name(unsigned long old1,                      \
                                      unsigned long old2,               \
                                      unsigned long new1,               \
                                      unsigned long new2,               \
-                                     volatile void *ptr))              \
+                                     volatile void *ptr)               \
 {                                                                      \
        unsigned long tmp, ret;                                         \
                                                                        \
        asm volatile("// __cmpxchg_double" #name "\n"                   \
+       __LL_SC_FALLBACK(                                               \
        "       prfm    pstl1strm, %2\n"                                \
        "1:     ldxp    %0, %1, %2\n"                                   \
        "       eor     %0, %0, %3\n"                                   \
        "       st" #rel "xp    %w0, %5, %6, %2\n"                      \
        "       cbnz    %w0, 1b\n"                                      \
        "       " #mb "\n"                                              \
-       "2:"                                                            \
+       "2:")                                                           \
        : "=&r" (tmp), "=&r" (ret), "+Q" (*(unsigned long *)ptr)        \
        : "r" (old1), "r" (old2), "r" (new1), "r" (new2)                \
        : cl);                                                          \
                                                                        \
        return ret;                                                     \
-}                                                                      \
-__LL_SC_EXPORT(__cmpxchg_double##name);
+}
 
 __CMPXCHG_DBL(   ,        ,  ,         )
 __CMPXCHG_DBL(_mb, dmb ish, l, "memory")
 
 #ifndef __ASM_ATOMIC_LSE_H
 #define __ASM_ATOMIC_LSE_H
 
-#ifndef __ARM64_IN_ATOMIC_IMPL
-#error "please don't include this file directly"
-#endif
-
-#define __LL_SC_ATOMIC(op)     __LL_SC_CALL(arch_atomic_##op)
 #define ATOMIC_OP(op, asm_op)                                          \
-static inline void arch_atomic_##op(int i, atomic_t *v)                        \
+static inline void __lse_atomic_##op(int i, atomic_t *v)                       \
 {                                                                      \
-       register int w0 asm ("w0") = i;                                 \
-       register atomic_t *x1 asm ("x1") = v;                           \
-                                                                       \
-       asm volatile(ARM64_LSE_ATOMIC_INSN(__LL_SC_ATOMIC(op),          \
-"      " #asm_op "     %w[i], %[v]\n")                                 \
-       : [i] "+r" (w0), [v] "+Q" (v->counter)                          \
-       : "r" (x1)                                                      \
-       : __LL_SC_CLOBBERS);                                            \
+       asm volatile(                                                   \
+"      " #asm_op "     %w[i], %[v]\n"                                  \
+       : [i] "+r" (i), [v] "+Q" (v->counter)                           \
+       : "r" (v));                                                     \
 }
 
 ATOMIC_OP(andnot, stclr)
 #undef ATOMIC_OP
 
 #define ATOMIC_FETCH_OP(name, mb, op, asm_op, cl...)                   \
-static inline int arch_atomic_fetch_##op##name(int i, atomic_t *v)     \
+static inline int __lse_atomic_fetch_##op##name(int i, atomic_t *v)    \
 {                                                                      \
-       register int w0 asm ("w0") = i;                                 \
-       register atomic_t *x1 asm ("x1") = v;                           \
-                                                                       \
-       asm volatile(ARM64_LSE_ATOMIC_INSN(                             \
-       /* LL/SC */                                                     \
-       __LL_SC_ATOMIC(fetch_##op##name),                               \
-       /* LSE atomics */                                               \
-"      " #asm_op #mb " %w[i], %w[i], %[v]")                            \
-       : [i] "+r" (w0), [v] "+Q" (v->counter)                          \
-       : "r" (x1)                                                      \
-       : __LL_SC_CLOBBERS, ##cl);                                      \
+       asm volatile(                                                   \
+"      " #asm_op #mb " %w[i], %w[i], %[v]"                             \
+       : [i] "+r" (i), [v] "+Q" (v->counter)                           \
+       : "r" (v)                                                       \
+       : cl);                                                          \
                                                                        \
-       return w0;                                                      \
+       return i;                                                       \
 }
 
 #define ATOMIC_FETCH_OPS(op, asm_op)                                   \
 #undef ATOMIC_FETCH_OPS
 
 #define ATOMIC_OP_ADD_RETURN(name, mb, cl...)                          \
-static inline int arch_atomic_add_return##name(int i, atomic_t *v)     \
+static inline int __lse_atomic_add_return##name(int i, atomic_t *v)    \
 {                                                                      \
-       register int w0 asm ("w0") = i;                                 \
-       register atomic_t *x1 asm ("x1") = v;                           \
-                                                                       \
-       asm volatile(ARM64_LSE_ATOMIC_INSN(                             \
-       /* LL/SC */                                                     \
-       __LL_SC_ATOMIC(add_return##name)                                \
-       __nops(1),                                                      \
-       /* LSE atomics */                                               \
+       asm volatile(                                                   \
        "       ldadd" #mb "    %w[i], w30, %[v]\n"                     \
-       "       add     %w[i], %w[i], w30")                             \
-       : [i] "+r" (w0), [v] "+Q" (v->counter)                          \
-       : "r" (x1)                                                      \
-       : __LL_SC_CLOBBERS, ##cl);                                      \
+       "       add     %w[i], %w[i], w30"                              \
+       : [i] "+r" (i), [v] "+Q" (v->counter)                           \
+       : "r" (v)                                                       \
+       : "x30", ##cl);                                                 \
                                                                        \
-       return w0;                                                      \
+       return i;                                                       \
 }
 
 ATOMIC_OP_ADD_RETURN(_relaxed,   )
 
 #undef ATOMIC_OP_ADD_RETURN
 
-static inline void arch_atomic_and(int i, atomic_t *v)
+static inline void __lse_atomic_and(int i, atomic_t *v)
 {
-       register int w0 asm ("w0") = i;
-       register atomic_t *x1 asm ("x1") = v;
-
-       asm volatile(ARM64_LSE_ATOMIC_INSN(
-       /* LL/SC */
-       __LL_SC_ATOMIC(and)
-       __nops(1),
-       /* LSE atomics */
+       asm volatile(
        "       mvn     %w[i], %w[i]\n"
-       "       stclr   %w[i], %[v]")
-       : [i] "+&r" (w0), [v] "+Q" (v->counter)
-       : "r" (x1)
-       : __LL_SC_CLOBBERS);
+       "       stclr   %w[i], %[v]"
+       : [i] "+&r" (i), [v] "+Q" (v->counter)
+       : "r" (v));
 }
 
 #define ATOMIC_FETCH_OP_AND(name, mb, cl...)                           \
-static inline int arch_atomic_fetch_and##name(int i, atomic_t *v)      \
+static inline int __lse_atomic_fetch_and##name(int i, atomic_t *v)     \
 {                                                                      \
-       register int w0 asm ("w0") = i;                                 \
-       register atomic_t *x1 asm ("x1") = v;                           \
-                                                                       \
-       asm volatile(ARM64_LSE_ATOMIC_INSN(                             \
-       /* LL/SC */                                                     \
-       __LL_SC_ATOMIC(fetch_and##name)                                 \
-       __nops(1),                                                      \
-       /* LSE atomics */                                               \
+       asm volatile(                                                   \
        "       mvn     %w[i], %w[i]\n"                                 \
-       "       ldclr" #mb "    %w[i], %w[i], %[v]")                    \
-       : [i] "+&r" (w0), [v] "+Q" (v->counter)                         \
-       : "r" (x1)                                                      \
-       : __LL_SC_CLOBBERS, ##cl);                                      \
+       "       ldclr" #mb "    %w[i], %w[i], %[v]"                     \
+       : [i] "+&r" (i), [v] "+Q" (v->counter)                          \
+       : "r" (v)                                                       \
+       : cl);                                                          \
                                                                        \
-       return w0;                                                      \
+       return i;                                                       \
 }
 
 ATOMIC_FETCH_OP_AND(_relaxed,   )
 
 #undef ATOMIC_FETCH_OP_AND
 
-static inline void arch_atomic_sub(int i, atomic_t *v)
+static inline void __lse_atomic_sub(int i, atomic_t *v)
 {
-       register int w0 asm ("w0") = i;
-       register atomic_t *x1 asm ("x1") = v;
-
-       asm volatile(ARM64_LSE_ATOMIC_INSN(
-       /* LL/SC */
-       __LL_SC_ATOMIC(sub)
-       __nops(1),
-       /* LSE atomics */
+       asm volatile(
        "       neg     %w[i], %w[i]\n"
-       "       stadd   %w[i], %[v]")
-       : [i] "+&r" (w0), [v] "+Q" (v->counter)
-       : "r" (x1)
-       : __LL_SC_CLOBBERS);
+       "       stadd   %w[i], %[v]"
+       : [i] "+&r" (i), [v] "+Q" (v->counter)
+       : "r" (v));
 }
 
 #define ATOMIC_OP_SUB_RETURN(name, mb, cl...)                          \
-static inline int arch_atomic_sub_return##name(int i, atomic_t *v)     \
+static inline int __lse_atomic_sub_return##name(int i, atomic_t *v)    \
 {                                                                      \
-       register int w0 asm ("w0") = i;                                 \
-       register atomic_t *x1 asm ("x1") = v;                           \
-                                                                       \
-       asm volatile(ARM64_LSE_ATOMIC_INSN(                             \
-       /* LL/SC */                                                     \
-       __LL_SC_ATOMIC(sub_return##name)                                \
-       __nops(2),                                                      \
-       /* LSE atomics */                                               \
+       asm volatile(                                                   \
        "       neg     %w[i], %w[i]\n"                                 \
        "       ldadd" #mb "    %w[i], w30, %[v]\n"                     \
-       "       add     %w[i], %w[i], w30")                             \
-       : [i] "+&r" (w0), [v] "+Q" (v->counter)                         \
-       : "r" (x1)                                                      \
-       : __LL_SC_CLOBBERS , ##cl);                                     \
+       "       add     %w[i], %w[i], w30"                              \
+       : [i] "+&r" (i), [v] "+Q" (v->counter)                          \
+       : "r" (v)                                                       \
+       : "x30", ##cl);                                                 \
                                                                        \
-       return w0;                                                      \
+       return i;                                                       \
 }
 
 ATOMIC_OP_SUB_RETURN(_relaxed,   )
 #undef ATOMIC_OP_SUB_RETURN
 
 #define ATOMIC_FETCH_OP_SUB(name, mb, cl...)                           \
-static inline int arch_atomic_fetch_sub##name(int i, atomic_t *v)      \
+static inline int __lse_atomic_fetch_sub##name(int i, atomic_t *v)     \
 {                                                                      \
-       register int w0 asm ("w0") = i;                                 \
-       register atomic_t *x1 asm ("x1") = v;                           \
-                                                                       \
-       asm volatile(ARM64_LSE_ATOMIC_INSN(                             \
-       /* LL/SC */                                                     \
-       __LL_SC_ATOMIC(fetch_sub##name)                                 \
-       __nops(1),                                                      \
-       /* LSE atomics */                                               \
+       asm volatile(                                                   \
        "       neg     %w[i], %w[i]\n"                                 \
-       "       ldadd" #mb "    %w[i], %w[i], %[v]")                    \
-       : [i] "+&r" (w0), [v] "+Q" (v->counter)                         \
-       : "r" (x1)                                                      \
-       : __LL_SC_CLOBBERS, ##cl);                                      \
+       "       ldadd" #mb "    %w[i], %w[i], %[v]"                     \
+       : [i] "+&r" (i), [v] "+Q" (v->counter)                          \
+       : "r" (v)                                                       \
+       : cl);                                                          \
                                                                        \
-       return w0;                                                      \
+       return i;                                                       \
 }
 
 ATOMIC_FETCH_OP_SUB(_relaxed,   )
 ATOMIC_FETCH_OP_SUB(        , al, "memory")
 
 #undef ATOMIC_FETCH_OP_SUB
-#undef __LL_SC_ATOMIC
 
-#define __LL_SC_ATOMIC64(op)   __LL_SC_CALL(arch_atomic64_##op)
 #define ATOMIC64_OP(op, asm_op)                                                \
-static inline void arch_atomic64_##op(s64 i, atomic64_t *v)            \
+static inline void __lse_atomic64_##op(s64 i, atomic64_t *v)           \
 {                                                                      \
-       register s64 x0 asm ("x0") = i;                                 \
-       register atomic64_t *x1 asm ("x1") = v;                         \
-                                                                       \
-       asm volatile(ARM64_LSE_ATOMIC_INSN(__LL_SC_ATOMIC64(op),        \
-"      " #asm_op "     %[i], %[v]\n")                                  \
-       : [i] "+r" (x0), [v] "+Q" (v->counter)                          \
-       : "r" (x1)                                                      \
-       : __LL_SC_CLOBBERS);                                            \
+       asm volatile(                                                   \
+"      " #asm_op "     %[i], %[v]\n"                                   \
+       : [i] "+r" (i), [v] "+Q" (v->counter)                           \
+       : "r" (v));                                                     \
 }
 
 ATOMIC64_OP(andnot, stclr)
 #undef ATOMIC64_OP
 
 #define ATOMIC64_FETCH_OP(name, mb, op, asm_op, cl...)                 \
-static inline s64 arch_atomic64_fetch_##op##name(s64 i, atomic64_t *v) \
+static inline long __lse_atomic64_fetch_##op##name(s64 i, atomic64_t *v)\
 {                                                                      \
-       register s64 x0 asm ("x0") = i;                                 \
-       register atomic64_t *x1 asm ("x1") = v;                         \
-                                                                       \
-       asm volatile(ARM64_LSE_ATOMIC_INSN(                             \
-       /* LL/SC */                                                     \
-       __LL_SC_ATOMIC64(fetch_##op##name),                             \
-       /* LSE atomics */                                               \
-"      " #asm_op #mb " %[i], %[i], %[v]")                              \
-       : [i] "+r" (x0), [v] "+Q" (v->counter)                          \
-       : "r" (x1)                                                      \
-       : __LL_SC_CLOBBERS, ##cl);                                      \
+       asm volatile(                                                   \
+"      " #asm_op #mb " %[i], %[i], %[v]"                               \
+       : [i] "+r" (i), [v] "+Q" (v->counter)                           \
+       : "r" (v)                                                       \
+       : cl);                                                          \
                                                                        \
-       return x0;                                                      \
+       return i;                                                       \
 }
 
 #define ATOMIC64_FETCH_OPS(op, asm_op)                                 \
 #undef ATOMIC64_FETCH_OPS
 
 #define ATOMIC64_OP_ADD_RETURN(name, mb, cl...)                                \
-static inline s64 arch_atomic64_add_return##name(s64 i, atomic64_t *v) \
+static inline long __lse_atomic64_add_return##name(s64 i, atomic64_t *v)\
 {                                                                      \
-       register s64 x0 asm ("x0") = i;                                 \
-       register atomic64_t *x1 asm ("x1") = v;                         \
-                                                                       \
-       asm volatile(ARM64_LSE_ATOMIC_INSN(                             \
-       /* LL/SC */                                                     \
-       __LL_SC_ATOMIC64(add_return##name)                              \
-       __nops(1),                                                      \
-       /* LSE atomics */                                               \
+       asm volatile(                                                   \
        "       ldadd" #mb "    %[i], x30, %[v]\n"                      \
-       "       add     %[i], %[i], x30")                               \
-       : [i] "+r" (x0), [v] "+Q" (v->counter)                          \
-       : "r" (x1)                                                      \
-       : __LL_SC_CLOBBERS, ##cl);                                      \
+       "       add     %[i], %[i], x30"                                \
+       : [i] "+r" (i), [v] "+Q" (v->counter)                           \
+       : "r" (v)                                                       \
+       : "x30", ##cl);                                                 \
                                                                        \
-       return x0;                                                      \
+       return i;                                                       \
 }
 
 ATOMIC64_OP_ADD_RETURN(_relaxed,   )
 
 #undef ATOMIC64_OP_ADD_RETURN
 
-static inline void arch_atomic64_and(s64 i, atomic64_t *v)
+static inline void __lse_atomic64_and(s64 i, atomic64_t *v)
 {
-       register s64 x0 asm ("x0") = i;
-       register atomic64_t *x1 asm ("x1") = v;
-
-       asm volatile(ARM64_LSE_ATOMIC_INSN(
-       /* LL/SC */
-       __LL_SC_ATOMIC64(and)
-       __nops(1),
-       /* LSE atomics */
+       asm volatile(
        "       mvn     %[i], %[i]\n"
-       "       stclr   %[i], %[v]")
-       : [i] "+&r" (x0), [v] "+Q" (v->counter)
-       : "r" (x1)
-       : __LL_SC_CLOBBERS);
+       "       stclr   %[i], %[v]"
+       : [i] "+&r" (i), [v] "+Q" (v->counter)
+       : "r" (v));
 }
 
 #define ATOMIC64_FETCH_OP_AND(name, mb, cl...)                         \
-static inline s64 arch_atomic64_fetch_and##name(s64 i, atomic64_t *v)  \
+static inline long __lse_atomic64_fetch_and##name(s64 i, atomic64_t *v)        \
 {                                                                      \
-       register s64 x0 asm ("x0") = i;                                 \
-       register atomic64_t *x1 asm ("x1") = v;                         \
-                                                                       \
-       asm volatile(ARM64_LSE_ATOMIC_INSN(                             \
-       /* LL/SC */                                                     \
-       __LL_SC_ATOMIC64(fetch_and##name)                               \
-       __nops(1),                                                      \
-       /* LSE atomics */                                               \
+       asm volatile(                                                   \
        "       mvn     %[i], %[i]\n"                                   \
-       "       ldclr" #mb "    %[i], %[i], %[v]")                      \
-       : [i] "+&r" (x0), [v] "+Q" (v->counter)                         \
-       : "r" (x1)                                                      \
-       : __LL_SC_CLOBBERS, ##cl);                                      \
+       "       ldclr" #mb "    %[i], %[i], %[v]"                       \
+       : [i] "+&r" (i), [v] "+Q" (v->counter)                          \
+       : "r" (v)                                                       \
+       : cl);                                                          \
                                                                        \
-       return x0;                                                      \
+       return i;                                                       \
 }
 
 ATOMIC64_FETCH_OP_AND(_relaxed,   )
 
 #undef ATOMIC64_FETCH_OP_AND
 
-static inline void arch_atomic64_sub(s64 i, atomic64_t *v)
+static inline void __lse_atomic64_sub(s64 i, atomic64_t *v)
 {
-       register s64 x0 asm ("x0") = i;
-       register atomic64_t *x1 asm ("x1") = v;
-
-       asm volatile(ARM64_LSE_ATOMIC_INSN(
-       /* LL/SC */
-       __LL_SC_ATOMIC64(sub)
-       __nops(1),
-       /* LSE atomics */
+       asm volatile(
        "       neg     %[i], %[i]\n"
-       "       stadd   %[i], %[v]")
-       : [i] "+&r" (x0), [v] "+Q" (v->counter)
-       : "r" (x1)
-       : __LL_SC_CLOBBERS);
+       "       stadd   %[i], %[v]"
+       : [i] "+&r" (i), [v] "+Q" (v->counter)
+       : "r" (v));
 }
 
 #define ATOMIC64_OP_SUB_RETURN(name, mb, cl...)                                \
-static inline s64 arch_atomic64_sub_return##name(s64 i, atomic64_t *v) \
+static inline long __lse_atomic64_sub_return##name(s64 i, atomic64_t *v)       \
 {                                                                      \
-       register s64 x0 asm ("x0") = i;                                 \
-       register atomic64_t *x1 asm ("x1") = v;                         \
-                                                                       \
-       asm volatile(ARM64_LSE_ATOMIC_INSN(                             \
-       /* LL/SC */                                                     \
-       __LL_SC_ATOMIC64(sub_return##name)                              \
-       __nops(2),                                                      \
-       /* LSE atomics */                                               \
+       asm volatile(                                                   \
        "       neg     %[i], %[i]\n"                                   \
        "       ldadd" #mb "    %[i], x30, %[v]\n"                      \
-       "       add     %[i], %[i], x30")                               \
-       : [i] "+&r" (x0), [v] "+Q" (v->counter)                         \
-       : "r" (x1)                                                      \
-       : __LL_SC_CLOBBERS, ##cl);                                      \
+       "       add     %[i], %[i], x30"                                \
+       : [i] "+&r" (i), [v] "+Q" (v->counter)                          \
+       : "r" (v)                                                       \
+       : "x30", ##cl);                                                 \
                                                                        \
-       return x0;                                                      \
+       return i;                                                       \
 }
 
 ATOMIC64_OP_SUB_RETURN(_relaxed,   )
 #undef ATOMIC64_OP_SUB_RETURN
 
 #define ATOMIC64_FETCH_OP_SUB(name, mb, cl...)                         \
-static inline s64 arch_atomic64_fetch_sub##name(s64 i, atomic64_t *v)  \
+static inline long __lse_atomic64_fetch_sub##name(s64 i, atomic64_t *v)        \
 {                                                                      \
-       register s64 x0 asm ("x0") = i;                                 \
-       register atomic64_t *x1 asm ("x1") = v;                         \
-                                                                       \
-       asm volatile(ARM64_LSE_ATOMIC_INSN(                             \
-       /* LL/SC */                                                     \
-       __LL_SC_ATOMIC64(fetch_sub##name)                               \
-       __nops(1),                                                      \
-       /* LSE atomics */                                               \
+       asm volatile(                                                   \
        "       neg     %[i], %[i]\n"                                   \
-       "       ldadd" #mb "    %[i], %[i], %[v]")                      \
-       : [i] "+&r" (x0), [v] "+Q" (v->counter)                         \
-       : "r" (x1)                                                      \
-       : __LL_SC_CLOBBERS, ##cl);                                      \
+       "       ldadd" #mb "    %[i], %[i], %[v]"                       \
+       : [i] "+&r" (i), [v] "+Q" (v->counter)                          \
+       : "r" (v)                                                       \
+       : cl);                                                          \
                                                                        \
-       return x0;                                                      \
+       return i;                                                       \
 }
 
 ATOMIC64_FETCH_OP_SUB(_relaxed,   )
 
 #undef ATOMIC64_FETCH_OP_SUB
 
-static inline s64 arch_atomic64_dec_if_positive(atomic64_t *v)
+static inline s64 __lse_atomic64_dec_if_positive(atomic64_t *v)
 {
-       register long x0 asm ("x0") = (long)v;
-
-       asm volatile(ARM64_LSE_ATOMIC_INSN(
-       /* LL/SC */
-       __LL_SC_ATOMIC64(dec_if_positive)
-       __nops(6),
-       /* LSE atomics */
+       asm volatile(
        "1:     ldr     x30, %[v]\n"
        "       subs    %[ret], x30, #1\n"
        "       b.lt    2f\n"
        "       sub     x30, x30, #1\n"
        "       sub     x30, x30, %[ret]\n"
        "       cbnz    x30, 1b\n"
-       "2:")
-       : [ret] "+&r" (x0), [v] "+Q" (v->counter)
+       "2:"
+       : [ret] "+&r" (v), [v] "+Q" (v->counter)
        :
-       : __LL_SC_CLOBBERS, "cc", "memory");
+       : "x30", "cc", "memory");
 
-       return x0;
+       return (long)v;
 }
 
-#undef __LL_SC_ATOMIC64
-
-#define __LL_SC_CMPXCHG(op)    __LL_SC_CALL(__cmpxchg_case_##op)
-
 #define __CMPXCHG_CASE(w, sfx, name, sz, mb, cl...)                    \
-static inline u##sz __cmpxchg_case_##name##sz(volatile void *ptr,      \
+static inline u##sz __lse__cmpxchg_case_##name##sz(volatile void *ptr, \
                                              u##sz old,                \
                                              u##sz new)                \
 {                                                                      \
        register u##sz x1 asm ("x1") = old;                             \
        register u##sz x2 asm ("x2") = new;                             \
                                                                        \
-       asm volatile(ARM64_LSE_ATOMIC_INSN(                             \
-       /* LL/SC */                                                     \
-       __LL_SC_CMPXCHG(name##sz)                                       \
-       __nops(2),                                                      \
-       /* LSE atomics */                                               \
+       asm volatile(                                                   \
        "       mov     " #w "30, %" #w "[old]\n"                       \
        "       cas" #mb #sfx "\t" #w "30, %" #w "[new], %[v]\n"        \
-       "       mov     %" #w "[ret], " #w "30")                        \
+       "       mov     %" #w "[ret], " #w "30"                         \
        : [ret] "+r" (x0), [v] "+Q" (*(unsigned long *)ptr)             \
        : [old] "r" (x1), [new] "r" (x2)                                \
-       : __LL_SC_CLOBBERS, ##cl);                                      \
+       : "x30", ##cl);                                                 \
                                                                        \
        return x0;                                                      \
 }
 __CMPXCHG_CASE(w,  ,  mb_, 32, al, "memory")
 __CMPXCHG_CASE(x,  ,  mb_, 64, al, "memory")
 
-#undef __LL_SC_CMPXCHG
 #undef __CMPXCHG_CASE
 
-#define __LL_SC_CMPXCHG_DBL(op)        __LL_SC_CALL(__cmpxchg_double##op)
-
 #define __CMPXCHG_DBL(name, mb, cl...)                                 \
-static inline long __cmpxchg_double##name(unsigned long old1,          \
+static inline long __lse__cmpxchg_double##name(unsigned long old1,     \
                                         unsigned long old2,            \
                                         unsigned long new1,            \
                                         unsigned long new2,            \
        register unsigned long x3 asm ("x3") = new2;                    \
        register unsigned long x4 asm ("x4") = (unsigned long)ptr;      \
                                                                        \
-       asm volatile(ARM64_LSE_ATOMIC_INSN(                             \
-       /* LL/SC */                                                     \
-       __LL_SC_CMPXCHG_DBL(name)                                       \
-       __nops(3),                                                      \
-       /* LSE atomics */                                               \
+       asm volatile(                                                   \
        "       casp" #mb "\t%[old1], %[old2], %[new1], %[new2], %[v]\n"\
        "       eor     %[old1], %[old1], %[oldval1]\n"                 \
        "       eor     %[old2], %[old2], %[oldval2]\n"                 \
-       "       orr     %[old1], %[old1], %[old2]")                     \
+       "       orr     %[old1], %[old1], %[old2]"                      \
        : [old1] "+&r" (x0), [old2] "+&r" (x1),                         \
          [v] "+Q" (*(unsigned long *)ptr)                              \
        : [new1] "r" (x2), [new2] "r" (x3), [ptr] "r" (x4),             \
          [oldval1] "r" (oldval1), [oldval2] "r" (oldval2)              \
-       : __LL_SC_CLOBBERS, ##cl);                                      \
+       : cl);                                                          \
                                                                        \
        return x0;                                                      \
 }
 __CMPXCHG_DBL(   ,   )
 __CMPXCHG_DBL(_mb, al, "memory")
 
-#undef __LL_SC_CMPXCHG_DBL
 #undef __CMPXCHG_DBL
 
 #endif /* __ASM_ATOMIC_LSE_H */