* (the optimize attribute silently ignores these options).
  */
 
-#define ATOMIC_OP(op, asm_op)                                          \
+#define ATOMIC_OP(op, asm_op, constraint)                              \
 __LL_SC_INLINE void                                                    \
 __LL_SC_PREFIX(arch_atomic_##op(int i, atomic_t *v))                   \
 {                                                                      \
 "      stxr    %w1, %w0, %2\n"                                         \
 "      cbnz    %w1, 1b"                                                \
        : "=&r" (result), "=&r" (tmp), "+Q" (v->counter)                \
-       : "Ir" (i));                                                    \
+       : #constraint "r" (i));                                         \
 }                                                                      \
 __LL_SC_EXPORT(arch_atomic_##op);
 
-#define ATOMIC_OP_RETURN(name, mb, acq, rel, cl, op, asm_op)           \
+#define ATOMIC_OP_RETURN(name, mb, acq, rel, cl, op, asm_op, constraint)\
 __LL_SC_INLINE int                                                     \
 __LL_SC_PREFIX(arch_atomic_##op##_return##name(int i, atomic_t *v))    \
 {                                                                      \
 "      cbnz    %w1, 1b\n"                                              \
 "      " #mb                                                           \
        : "=&r" (result), "=&r" (tmp), "+Q" (v->counter)                \
-       : "Ir" (i)                                                      \
+       : #constraint "r" (i)                                           \
        : cl);                                                          \
                                                                        \
        return result;                                                  \
 }                                                                      \
 __LL_SC_EXPORT(arch_atomic_##op##_return##name);
 
-#define ATOMIC_FETCH_OP(name, mb, acq, rel, cl, op, asm_op)            \
+#define ATOMIC_FETCH_OP(name, mb, acq, rel, cl, op, asm_op, constraint)        \
 __LL_SC_INLINE int                                                     \
 __LL_SC_PREFIX(arch_atomic_fetch_##op##name(int i, atomic_t *v))       \
 {                                                                      \
 "      cbnz    %w2, 1b\n"                                              \
 "      " #mb                                                           \
        : "=&r" (result), "=&r" (val), "=&r" (tmp), "+Q" (v->counter)   \
-       : "Ir" (i)                                                      \
+       : #constraint "r" (i)                                           \
        : cl);                                                          \
                                                                        \
        return result;                                                  \
        ATOMIC_FETCH_OP (_acquire,        , a,  , "memory", __VA_ARGS__)\
        ATOMIC_FETCH_OP (_release,        ,  , l, "memory", __VA_ARGS__)
 
-ATOMIC_OPS(add, add)
-ATOMIC_OPS(sub, sub)
+ATOMIC_OPS(add, add, I)
+ATOMIC_OPS(sub, sub, J)
 
 #undef ATOMIC_OPS
 #define ATOMIC_OPS(...)                                                        \
        ATOMIC_FETCH_OP (_acquire,        , a,  , "memory", __VA_ARGS__)\
        ATOMIC_FETCH_OP (_release,        ,  , l, "memory", __VA_ARGS__)
 
-ATOMIC_OPS(and, and)
-ATOMIC_OPS(andnot, bic)
-ATOMIC_OPS(or, orr)
-ATOMIC_OPS(xor, eor)
+ATOMIC_OPS(and, and, )
+ATOMIC_OPS(andnot, bic, )
+ATOMIC_OPS(or, orr, )
+ATOMIC_OPS(xor, eor, )
 
 #undef ATOMIC_OPS
 #undef ATOMIC_FETCH_OP
 #undef ATOMIC_OP_RETURN
 #undef ATOMIC_OP
 
-#define ATOMIC64_OP(op, asm_op)                                                \
+#define ATOMIC64_OP(op, asm_op, constraint)                            \
 __LL_SC_INLINE void                                                    \
 __LL_SC_PREFIX(arch_atomic64_##op(s64 i, atomic64_t *v))               \
 {                                                                      \
 "      stxr    %w1, %0, %2\n"                                          \
 "      cbnz    %w1, 1b"                                                \
        : "=&r" (result), "=&r" (tmp), "+Q" (v->counter)                \
-       : "Ir" (i));                                                    \
+       : #constraint "r" (i));                                         \
 }                                                                      \
 __LL_SC_EXPORT(arch_atomic64_##op);
 
-#define ATOMIC64_OP_RETURN(name, mb, acq, rel, cl, op, asm_op)         \
+#define ATOMIC64_OP_RETURN(name, mb, acq, rel, cl, op, asm_op, constraint)\
 __LL_SC_INLINE s64                                                     \
 __LL_SC_PREFIX(arch_atomic64_##op##_return##name(s64 i, atomic64_t *v))\
 {                                                                      \
 "      cbnz    %w1, 1b\n"                                              \
 "      " #mb                                                           \
        : "=&r" (result), "=&r" (tmp), "+Q" (v->counter)                \
-       : "Ir" (i)                                                      \
+       : #constraint "r" (i)                                           \
        : cl);                                                          \
                                                                        \
        return result;                                                  \
 }                                                                      \
 __LL_SC_EXPORT(arch_atomic64_##op##_return##name);
 
-#define ATOMIC64_FETCH_OP(name, mb, acq, rel, cl, op, asm_op)          \
+#define ATOMIC64_FETCH_OP(name, mb, acq, rel, cl, op, asm_op, constraint)\
 __LL_SC_INLINE s64                                                     \
 __LL_SC_PREFIX(arch_atomic64_fetch_##op##name(s64 i, atomic64_t *v))   \
 {                                                                      \
 "      cbnz    %w2, 1b\n"                                              \
 "      " #mb                                                           \
        : "=&r" (result), "=&r" (val), "=&r" (tmp), "+Q" (v->counter)   \
-       : "Ir" (i)                                                      \
+       : #constraint "r" (i)                                           \
        : cl);                                                          \
                                                                        \
        return result;                                                  \
        ATOMIC64_FETCH_OP (_acquire,, a,  , "memory", __VA_ARGS__)      \
        ATOMIC64_FETCH_OP (_release,,  , l, "memory", __VA_ARGS__)
 
-ATOMIC64_OPS(add, add)
-ATOMIC64_OPS(sub, sub)
+ATOMIC64_OPS(add, add, I)
+ATOMIC64_OPS(sub, sub, J)
 
 #undef ATOMIC64_OPS
 #define ATOMIC64_OPS(...)                                              \
        ATOMIC64_FETCH_OP (_acquire,, a,  , "memory", __VA_ARGS__)      \
        ATOMIC64_FETCH_OP (_release,,  , l, "memory", __VA_ARGS__)
 
-ATOMIC64_OPS(and, and)
-ATOMIC64_OPS(andnot, bic)
-ATOMIC64_OPS(or, orr)
-ATOMIC64_OPS(xor, eor)
+ATOMIC64_OPS(and, and, L)
+ATOMIC64_OPS(andnot, bic, )
+ATOMIC64_OPS(or, orr, L)
+ATOMIC64_OPS(xor, eor, L)
 
 #undef ATOMIC64_OPS
 #undef ATOMIC64_FETCH_OP
 }
 __LL_SC_EXPORT(arch_atomic64_dec_if_positive);
 
-#define __CMPXCHG_CASE(w, sfx, name, sz, mb, acq, rel, cl)             \
+#define __CMPXCHG_CASE(w, sfx, name, sz, mb, acq, rel, cl, constraint) \
 __LL_SC_INLINE u##sz                                                   \
 __LL_SC_PREFIX(__cmpxchg_case_##name##sz(volatile void *ptr,           \
                                         unsigned long old,             \
        "2:"                                                            \
        : [tmp] "=&r" (tmp), [oldval] "=&r" (oldval),                   \
          [v] "+Q" (*(u##sz *)ptr)                                      \
-       : [old] "Kr" (old), [new] "r" (new)                             \
+       : [old] #constraint "r" (old), [new] "r" (new)                  \
        : cl);                                                          \
                                                                        \
        return oldval;                                                  \
 }                                                                      \
 __LL_SC_EXPORT(__cmpxchg_case_##name##sz);
 
-__CMPXCHG_CASE(w, b,     ,  8,        ,  ,  ,         )
-__CMPXCHG_CASE(w, h,     , 16,        ,  ,  ,         )
-__CMPXCHG_CASE(w,  ,     , 32,        ,  ,  ,         )
-__CMPXCHG_CASE( ,  ,     , 64,        ,  ,  ,         )
-__CMPXCHG_CASE(w, b, acq_,  8,        , a,  , "memory")
-__CMPXCHG_CASE(w, h, acq_, 16,        , a,  , "memory")
-__CMPXCHG_CASE(w,  , acq_, 32,        , a,  , "memory")
-__CMPXCHG_CASE( ,  , acq_, 64,        , a,  , "memory")
-__CMPXCHG_CASE(w, b, rel_,  8,        ,  , l, "memory")
-__CMPXCHG_CASE(w, h, rel_, 16,        ,  , l, "memory")
-__CMPXCHG_CASE(w,  , rel_, 32,        ,  , l, "memory")
-__CMPXCHG_CASE( ,  , rel_, 64,        ,  , l, "memory")
-__CMPXCHG_CASE(w, b,  mb_,  8, dmb ish,  , l, "memory")
-__CMPXCHG_CASE(w, h,  mb_, 16, dmb ish,  , l, "memory")
-__CMPXCHG_CASE(w,  ,  mb_, 32, dmb ish,  , l, "memory")
-__CMPXCHG_CASE( ,  ,  mb_, 64, dmb ish,  , l, "memory")
+/*
+ * Earlier versions of GCC (no later than 8.1.0) appear to incorrectly
+ * handle the 'K' constraint for the value 4294967295 - thus we use no
+ * constraint for 32 bit operations.
+ */
+__CMPXCHG_CASE(w, b,     ,  8,        ,  ,  ,         , )
+__CMPXCHG_CASE(w, h,     , 16,        ,  ,  ,         , )
+__CMPXCHG_CASE(w,  ,     , 32,        ,  ,  ,         , )
+__CMPXCHG_CASE( ,  ,     , 64,        ,  ,  ,         , L)
+__CMPXCHG_CASE(w, b, acq_,  8,        , a,  , "memory", )
+__CMPXCHG_CASE(w, h, acq_, 16,        , a,  , "memory", )
+__CMPXCHG_CASE(w,  , acq_, 32,        , a,  , "memory", )
+__CMPXCHG_CASE( ,  , acq_, 64,        , a,  , "memory", L)
+__CMPXCHG_CASE(w, b, rel_,  8,        ,  , l, "memory", )
+__CMPXCHG_CASE(w, h, rel_, 16,        ,  , l, "memory", )
+__CMPXCHG_CASE(w,  , rel_, 32,        ,  , l, "memory", )
+__CMPXCHG_CASE( ,  , rel_, 64,        ,  , l, "memory", L)
+__CMPXCHG_CASE(w, b,  mb_,  8, dmb ish,  , l, "memory", )
+__CMPXCHG_CASE(w, h,  mb_, 16, dmb ish,  , l, "memory", )
+__CMPXCHG_CASE(w,  ,  mb_, 32, dmb ish,  , l, "memory", )
+__CMPXCHG_CASE( ,  ,  mb_, 64, dmb ish,  , l, "memory", L)
 
 #undef __CMPXCHG_CASE