futex: Add sys_futex_wait()
authorpeterz@infradead.org <peterz@infradead.org>
Thu, 21 Sep 2023 10:45:12 +0000 (12:45 +0200)
committerPeter Zijlstra <peterz@infradead.org>
Thu, 21 Sep 2023 17:22:08 +0000 (19:22 +0200)
To complement sys_futex_waitv()/wake(), add sys_futex_wait(). This
syscall implements what was previously known as FUTEX_WAIT_BITSET
except it uses 'unsigned long' for the value and bitmask arguments,
takes timespec and clockid_t arguments for the absolute timeout and
uses FUTEX2 flags.

The 'unsigned long' allows FUTEX2_SIZE_U64 on 64bit platforms.

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Reviewed-by: Thomas Gleixner <tglx@linutronix.de>
Acked-by: Geert Uytterhoeven <geert@linux-m68k.org>
Link: https://lore.kernel.org/r/20230921105248.164324363@noisy.programming.kicks-ass.net
24 files changed:
arch/alpha/kernel/syscalls/syscall.tbl
arch/arm/tools/syscall.tbl
arch/arm64/include/asm/unistd.h
arch/arm64/include/asm/unistd32.h
arch/ia64/kernel/syscalls/syscall.tbl
arch/m68k/kernel/syscalls/syscall.tbl
arch/microblaze/kernel/syscalls/syscall.tbl
arch/mips/kernel/syscalls/syscall_n32.tbl
arch/mips/kernel/syscalls/syscall_n64.tbl
arch/mips/kernel/syscalls/syscall_o32.tbl
arch/parisc/kernel/syscalls/syscall.tbl
arch/powerpc/kernel/syscalls/syscall.tbl
arch/s390/kernel/syscalls/syscall.tbl
arch/sh/kernel/syscalls/syscall.tbl
arch/sparc/kernel/syscalls/syscall.tbl
arch/x86/entry/syscalls/syscall_32.tbl
arch/x86/entry/syscalls/syscall_64.tbl
arch/xtensa/kernel/syscalls/syscall.tbl
include/linux/syscalls.h
include/uapi/asm-generic/unistd.h
kernel/futex/futex.h
kernel/futex/syscalls.c
kernel/futex/waitwake.c
kernel/sys_ni.c

index 3b86519d68e430346855f45ea18fdd45696cc108..c49f12fd264e362db98fc60891edbfc60faedfbc 100644 (file)
 561    common  cachestat                       sys_cachestat
 562    common  fchmodat2                       sys_fchmodat2
 563    common  futex_wake                      sys_futex_wake
+564    common  futex_wait                      sys_futex_wait
index 714abeb1e6fa3925ece07b707ed3d67d6c1156d7..a6cf56277327023047841dea70d94c5fac44c4fc 100644 (file)
 451    common  cachestat                       sys_cachestat
 452    common  fchmodat2                       sys_fchmodat2
 454    common  futex_wake                      sys_futex_wake
+455    common  futex_wait                      sys_futex_wait
index 63a8a9c4abc16741221e80e7db5b489b1557b98e..f33190f17ebb6173c5335282100db7bd47624d2d 100644 (file)
@@ -39,7 +39,7 @@
 #define __ARM_NR_compat_set_tls                (__ARM_NR_COMPAT_BASE + 5)
 #define __ARM_NR_COMPAT_END            (__ARM_NR_COMPAT_BASE + 0x800)
 
-#define __NR_compat_syscalls           455
+#define __NR_compat_syscalls           456
 #endif
 
 #define __ARCH_WANT_SYS_CLONE
index 68974683737bffce97b993a0bfe933fe64acd19e..6e7d37282ba1150d9fc8326b7b350c311ca2675d 100644 (file)
@@ -913,6 +913,8 @@ __SYSCALL(__NR_cachestat, sys_cachestat)
 __SYSCALL(__NR_fchmodat2, sys_fchmodat2)
 #define __NR_futex_wake 454
 __SYSCALL(__NR_futex_wake, sys_futex_wake)
+#define __NR_futex_wait 455
+__SYSCALL(__NR_futex_wait, sys_futex_wait)
 
 /*
  * Please add new compat syscalls above this comment and update
index cd50247508e62696951702b91087329b0d3f7094..4043f0c55170e0e2e33dda402e3a07f57b200d0a 100644 (file)
 451    common  cachestat                       sys_cachestat
 452    common  fchmodat2                       sys_fchmodat2
 454    common  futex_wake                      sys_futex_wake
+455    common  futex_wait                      sys_futex_wait
index 21eb35c693e12ebab49e59b803723cd14997bda3..24841674acc53b431f5addc4e13c0b0f1c019c29 100644 (file)
 451    common  cachestat                       sys_cachestat
 452    common  fchmodat2                       sys_fchmodat2
 454    common  futex_wake                      sys_futex_wake
+455    common  futex_wait                      sys_futex_wait
index 3a4e8513a8e16dc82943729172561ea96aa8ea88..f03927ab0220a79137cb831785b599aff0c5c0ac 100644 (file)
 451    common  cachestat                       sys_cachestat
 452    common  fchmodat2                       sys_fchmodat2
 454    common  futex_wake                      sys_futex_wake
+455    common  futex_wait                      sys_futex_wait
index 6883ea3b830da6864423471274dd800aa9c3b84f..dbb5edfb667b3017f5208205b2be0d7aa1877785 100644 (file)
 451    n32     cachestat                       sys_cachestat
 452    n32     fchmodat2                       sys_fchmodat2
 454    n32     futex_wake                      sys_futex_wake
+455    n32     futex_wait                      sys_futex_wait
index 48bc0fb4e3dc20a74a839ed03e9a6985119d3679..faff8dfd298337c974dd14a9bb4616c8d5883482 100644 (file)
 451    n64     cachestat                       sys_cachestat
 452    n64     fchmodat2                       sys_fchmodat2
 454    n64     futex_wake                      sys_futex_wake
+455    n64     futex_wait                      sys_futex_wait
index a92625f5bad83a544206bb69224b6c8af84baa4a..542f75605b3e44771d8dfd21b03ff359df6ffbe1 100644 (file)
 451    o32     cachestat                       sys_cachestat
 452    o32     fchmodat2                       sys_fchmodat2
 454    o32     futex_wake                      sys_futex_wake
+455    o32     futex_wait                      sys_futex_wait
index 57faa9786ffe7421ddb56374e13810fe05961917..8e50e89551f7b405362785f1b9a3982e92d58bfa 100644 (file)
 451    common  cachestat                       sys_cachestat
 452    common  fchmodat2                       sys_fchmodat2
 454    common  futex_wake                      sys_futex_wake
+455    common  futex_wait                      sys_futex_wait
index e6c6ed6b30ee6afb1cfed7abedc0bfa2c4adf26a..ad33a9993a6acb61a3a4c5db574bfdcf4177bab8 100644 (file)
 451    common  cachestat                       sys_cachestat
 452    common  fchmodat2                       sys_fchmodat2
 454    common  futex_wake                      sys_futex_wake
+455    common  futex_wait                      sys_futex_wait
index 754720154dc1231b1d4a34a3b6a04030d3172318..418853fd2a6be156fe3992837192482d7a286458 100644 (file)
 451  common    cachestat               sys_cachestat                   sys_cachestat
 452  common    fchmodat2               sys_fchmodat2                   sys_fchmodat2
 454  common    futex_wake              sys_futex_wake                  sys_futex_wake
+455  common    futex_wait              sys_futex_wait                  sys_futex_wait
index 902a997e7ec6ecb39a68aca3554f75c889ba3fff..8ef9557d27796471bb688c1de1190580247db07f 100644 (file)
 451    common  cachestat                       sys_cachestat
 452    common  fchmodat2                       sys_fchmodat2
 454    common  futex_wake                      sys_futex_wake
+455    common  futex_wait                      sys_futex_wait
index 8a1f887c8be66143925d5112c45f49b031bfd93c..df59a9d5f109ff3ff1fb1ca9e07622bdf84d93a9 100644 (file)
 451    common  cachestat                       sys_cachestat
 452    common  fchmodat2                       sys_fchmodat2
 454    common  futex_wake                      sys_futex_wake
+455    common  futex_wait                      sys_futex_wait
index 9e81323979b00f9d4485179c4d82cb0c8a137480..0f6616822bd5010a373a48716b0d325f4e0f498b 100644 (file)
 451    i386    cachestat               sys_cachestat
 452    i386    fchmodat2               sys_fchmodat2
 454    i386    futex_wake              sys_futex_wake
+455    i386    futex_wait              sys_futex_wait
index d10a6003a7c9f1e0adc8c908afa45524fdaccda6..ddf6288823ad06ad4727b9fd2f357a9bbc336261 100644 (file)
 452    common  fchmodat2               sys_fchmodat2
 453    64      map_shadow_stack        sys_map_shadow_stack
 454    common  futex_wake              sys_futex_wake
+455    common  futex_wait              sys_futex_wait
 
 #
 # Due to a historical design error, certain syscalls are numbered differently
index 4e511bfd4b8f5fd56a05758b879da2d21a0f4267..ac278dbce2eeef80a7c878da1d9515afaeb9b9fd 100644 (file)
 451    common  cachestat                       sys_cachestat
 452    common  fchmodat2                       sys_fchmodat2
 454    common  futex_wake                      sys_futex_wake
+455    common  futex_wait                      sys_futex_wait
index e174ed86da1da47baae01d709b675a7a9d124c2f..11f3fdd1ee0378730386e19f301ec6da6764378d 100644 (file)
@@ -552,6 +552,10 @@ asmlinkage long sys_futex_waitv(struct futex_waitv *waiters,
 
 asmlinkage long sys_futex_wake(void __user *uaddr, unsigned long mask, int nr, unsigned int flags);
 
+asmlinkage long sys_futex_wait(void __user *uaddr, unsigned long val, unsigned long mask,
+                              unsigned int flags, struct __kernel_timespec __user *timespec,
+                              clockid_t clockid);
+
 asmlinkage long sys_nanosleep(struct __kernel_timespec __user *rqtp,
                              struct __kernel_timespec __user *rmtp);
 asmlinkage long sys_nanosleep_time32(struct old_timespec32 __user *rqtp,
index f5454e6f4c6ff3e02b35b7bdb7d4a2027159448b..f6553bd5d213b2e26e3fa26c5846e91c94c0f9c7 100644 (file)
@@ -824,9 +824,11 @@ __SYSCALL(__NR_cachestat, sys_cachestat)
 __SYSCALL(__NR_fchmodat2, sys_fchmodat2)
 #define __NR_futex_wake 454
 __SYSCALL(__NR_futex_wake, sys_futex_wake)
+#define __NR_futex_wait 455
+__SYSCALL(__NR_futex_wait, sys_futex_wait)
 
 #undef __NR_syscalls
-#define __NR_syscalls 455
+#define __NR_syscalls 456
 
 /*
  * 32 bit systems traditionally used different
index 0e7821a944a27d53d939c18c20d5b144ad286456..e74888a7d71db2052691001e5e29121e6cf09b48 100644 (file)
@@ -332,6 +332,9 @@ extern int futex_requeue(u32 __user *uaddr1, unsigned int flags,
                         u32 __user *uaddr2, int nr_wake, int nr_requeue,
                         u32 *cmpval, int requeue_pi);
 
+extern int __futex_wait(u32 __user *uaddr, unsigned int flags, u32 val,
+                       struct hrtimer_sleeper *to, u32 bitset);
+
 extern int futex_wait(u32 __user *uaddr, unsigned int flags, u32 val,
                      ktime_t *abs_time, u32 bitset);
 
index 47398926765e4ef99969c6ee0f712b07e8519b79..e4c8ec713787044beecb8be780677bd95d7fce69 100644 (file)
@@ -221,6 +221,46 @@ static int futex_parse_waitv(struct futex_vector *futexv,
        return 0;
 }
 
+static int futex2_setup_timeout(struct __kernel_timespec __user *timeout,
+                               clockid_t clockid, struct hrtimer_sleeper *to)
+{
+       int flag_clkid = 0, flag_init = 0;
+       struct timespec64 ts;
+       ktime_t time;
+       int ret;
+
+       if (!timeout)
+               return 0;
+
+       if (clockid == CLOCK_REALTIME) {
+               flag_clkid = FLAGS_CLOCKRT;
+               flag_init = FUTEX_CLOCK_REALTIME;
+       }
+
+       if (clockid != CLOCK_REALTIME && clockid != CLOCK_MONOTONIC)
+               return -EINVAL;
+
+       if (get_timespec64(&ts, timeout))
+               return -EFAULT;
+
+       /*
+        * Since there's no opcode for futex_waitv, use
+        * FUTEX_WAIT_BITSET that uses absolute timeout as well
+        */
+       ret = futex_init_timeout(FUTEX_WAIT_BITSET, flag_init, &ts, &time);
+       if (ret)
+               return ret;
+
+       futex_setup_timer(&time, to, flag_clkid, 0);
+       return 0;
+}
+
+static inline void futex2_destroy_timeout(struct hrtimer_sleeper *to)
+{
+       hrtimer_cancel(&to->timer);
+       destroy_hrtimer_on_stack(&to->timer);
+}
+
 /**
  * sys_futex_waitv - Wait on a list of futexes
  * @waiters:    List of futexes to wait on
@@ -250,8 +290,6 @@ SYSCALL_DEFINE5(futex_waitv, struct futex_waitv __user *, waiters,
 {
        struct hrtimer_sleeper to;
        struct futex_vector *futexv;
-       struct timespec64 ts;
-       ktime_t time;
        int ret;
 
        /* This syscall supports no flags for now */
@@ -261,30 +299,8 @@ SYSCALL_DEFINE5(futex_waitv, struct futex_waitv __user *, waiters,
        if (!nr_futexes || nr_futexes > FUTEX_WAITV_MAX || !waiters)
                return -EINVAL;
 
-       if (timeout) {
-               int flag_clkid = 0, flag_init = 0;
-
-               if (clockid == CLOCK_REALTIME) {
-                       flag_clkid = FLAGS_CLOCKRT;
-                       flag_init = FUTEX_CLOCK_REALTIME;
-               }
-
-               if (clockid != CLOCK_REALTIME && clockid != CLOCK_MONOTONIC)
-                       return -EINVAL;
-
-               if (get_timespec64(&ts, timeout))
-                       return -EFAULT;
-
-               /*
-                * Since there's no opcode for futex_waitv, use
-                * FUTEX_WAIT_BITSET that uses absolute timeout as well
-                */
-               ret = futex_init_timeout(FUTEX_WAIT_BITSET, flag_init, &ts, &time);
-               if (ret)
-                       return ret;
-
-               futex_setup_timer(&time, &to, flag_clkid, 0);
-       }
+       if (timeout && (ret = futex2_setup_timeout(timeout, clockid, &to)))
+               return ret;
 
        futexv = kcalloc(nr_futexes, sizeof(*futexv), GFP_KERNEL);
        if (!futexv) {
@@ -299,10 +315,8 @@ SYSCALL_DEFINE5(futex_waitv, struct futex_waitv __user *, waiters,
        kfree(futexv);
 
 destroy_timer:
-       if (timeout) {
-               hrtimer_cancel(&to.timer);
-               destroy_hrtimer_on_stack(&to.timer);
-       }
+       if (timeout)
+               futex2_destroy_timeout(&to);
        return ret;
 }
 
@@ -336,6 +350,52 @@ SYSCALL_DEFINE4(futex_wake,
        return futex_wake(uaddr, FLAGS_STRICT | flags, nr, mask);
 }
 
+/*
+ * sys_futex_wait - Wait on a futex
+ * @uaddr:     Address of the futex to wait on
+ * @val:       Value of @uaddr
+ * @mask:      bitmask
+ * @flags:     FUTEX2 flags
+ * @timeout:   Optional absolute timeout
+ * @clockid:   Clock to be used for the timeout, realtime or monotonic
+ *
+ * Identical to the traditional FUTEX_WAIT_BITSET op, except it is part of the
+ * futex2 familiy of calls.
+ */
+
+SYSCALL_DEFINE6(futex_wait,
+               void __user *, uaddr,
+               unsigned long, val,
+               unsigned long, mask,
+               unsigned int, flags,
+               struct __kernel_timespec __user *, timeout,
+               clockid_t, clockid)
+{
+       struct hrtimer_sleeper to;
+       int ret;
+
+       if (flags & ~FUTEX2_VALID_MASK)
+               return -EINVAL;
+
+       flags = futex2_to_flags(flags);
+       if (!futex_flags_valid(flags))
+               return -EINVAL;
+
+       if (!futex_validate_input(flags, val) ||
+           !futex_validate_input(flags, mask))
+               return -EINVAL;
+
+       if (timeout && (ret = futex2_setup_timeout(timeout, clockid, &to)))
+               return ret;
+
+       ret = __futex_wait(uaddr, flags, val, timeout ? &to : NULL, mask);
+
+       if (timeout)
+               futex2_destroy_timeout(&to);
+
+       return ret;
+}
+
 #ifdef CONFIG_COMPAT
 COMPAT_SYSCALL_DEFINE2(set_robust_list,
                struct compat_robust_list_head __user *, head,
index ceb05b876597476fe011d886915b8bbc76d6b20d..b109a0810a2c688308b9376b66c62be5a8a6eb9a 100644 (file)
@@ -632,20 +632,18 @@ retry_private:
        return ret;
 }
 
-int futex_wait(u32 __user *uaddr, unsigned int flags, u32 val, ktime_t *abs_time, u32 bitset)
+int __futex_wait(u32 __user *uaddr, unsigned int flags, u32 val,
+                struct hrtimer_sleeper *to, u32 bitset)
 {
-       struct hrtimer_sleeper timeout, *to;
-       struct restart_block *restart;
-       struct futex_hash_bucket *hb;
        struct futex_q q = futex_q_init;
+       struct futex_hash_bucket *hb;
        int ret;
 
        if (!bitset)
                return -EINVAL;
+
        q.bitset = bitset;
 
-       to = futex_setup_timer(abs_time, &timeout, flags,
-                              current->timer_slack_ns);
 retry:
        /*
         * Prepare to wait on uaddr. On success, it holds hb->lock and q
@@ -653,18 +651,17 @@ retry:
         */
        ret = futex_wait_setup(uaddr, val, flags, &q, &hb);
        if (ret)
-               goto out;
+               return ret;
 
        /* futex_queue and wait for wakeup, timeout, or a signal. */
        futex_wait_queue(hb, &q, to);
 
        /* If we were woken (and unqueued), we succeeded, whatever. */
-       ret = 0;
        if (!futex_unqueue(&q))
-               goto out;
-       ret = -ETIMEDOUT;
+               return 0;
+
        if (to && !to->task)
-               goto out;
+               return -ETIMEDOUT;
 
        /*
         * We expect signal_pending(current), but we might be the
@@ -673,24 +670,38 @@ retry:
        if (!signal_pending(current))
                goto retry;
 
-       ret = -ERESTARTSYS;
-       if (!abs_time)
-               goto out;
+       return -ERESTARTSYS;
+}
 
-       restart = &current->restart_block;
-       restart->futex.uaddr = uaddr;
-       restart->futex.val = val;
-       restart->futex.time = *abs_time;
-       restart->futex.bitset = bitset;
-       restart->futex.flags = flags | FLAGS_HAS_TIMEOUT;
+int futex_wait(u32 __user *uaddr, unsigned int flags, u32 val, ktime_t *abs_time, u32 bitset)
+{
+       struct hrtimer_sleeper timeout, *to;
+       struct restart_block *restart;
+       int ret;
+
+       to = futex_setup_timer(abs_time, &timeout, flags,
+                              current->timer_slack_ns);
+
+       ret = __futex_wait(uaddr, flags, val, to, bitset);
+
+       /* No timeout, nothing to clean up. */
+       if (!to)
+               return ret;
+
+       hrtimer_cancel(&to->timer);
+       destroy_hrtimer_on_stack(&to->timer);
 
-       ret = set_restart_fn(restart, futex_wait_restart);
+       if (ret == -ERESTARTSYS) {
+               restart = &current->restart_block;
+               restart->futex.uaddr = uaddr;
+               restart->futex.val = val;
+               restart->futex.time = *abs_time;
+               restart->futex.bitset = bitset;
+               restart->futex.flags = flags | FLAGS_HAS_TIMEOUT;
 
-out:
-       if (to) {
-               hrtimer_cancel(&to->timer);
-               destroy_hrtimer_on_stack(&to->timer);
+               return set_restart_fn(restart, futex_wait_restart);
        }
+
        return ret;
 }
 
index 983c0583c627c9baf1f90161587bb1ee1291c4f8..13df391194e2b3fb4fc7ef340a4f27a46a85bf46 100644 (file)
@@ -88,6 +88,7 @@ COND_SYSCALL(get_robust_list);
 COND_SYSCALL_COMPAT(get_robust_list);
 COND_SYSCALL(futex_waitv);
 COND_SYSCALL(futex_wake);
+COND_SYSCALL(futex_wait);
 COND_SYSCALL(kexec_load);
 COND_SYSCALL_COMPAT(kexec_load);
 COND_SYSCALL(init_module);