local_bh_disable();
 
                fpsimd_save();
-               set_thread_flag(TIF_FOREIGN_FPSTATE);
        }
 
        fpsimd_flush_task_state(task);
        local_bh_disable();
 
        fpsimd_save();
-       fpsimd_to_sve(current);
 
        /* Force ret_to_user to reload the registers: */
        fpsimd_flush_task_state(current);
-       set_thread_flag(TIF_FOREIGN_FPSTATE);
 
+       fpsimd_to_sve(current);
        if (test_and_set_thread_flag(TIF_SVE))
                WARN_ON(1); /* SVE access shouldn't have trapped */
 
 
        local_bh_disable();
 
+       fpsimd_flush_task_state(current);
        memset(¤t->thread.uw.fpsimd_state, 0,
               sizeof(current->thread.uw.fpsimd_state));
-       fpsimd_flush_task_state(current);
 
        if (system_supports_sve()) {
                clear_thread_flag(TIF_SVE);
                        current->thread.sve_vl_onexec = 0;
        }
 
-       set_thread_flag(TIF_FOREIGN_FPSTATE);
-
        local_bh_enable();
 }
 
 
 /*
  * Invalidate live CPU copies of task t's FPSIMD state
+ *
+ * This function may be called with preemption enabled.  The barrier()
+ * ensures that the assignment to fpsimd_cpu is visible to any
+ * preemption/softirq that could race with set_tsk_thread_flag(), so
+ * that TIF_FOREIGN_FPSTATE cannot be spuriously re-cleared.
+ *
+ * The final barrier ensures that TIF_FOREIGN_FPSTATE is seen set by any
+ * subsequent code.
  */
 void fpsimd_flush_task_state(struct task_struct *t)
 {
        t->thread.fpsimd_cpu = NR_CPUS;
+
+       barrier();
+       set_tsk_thread_flag(t, TIF_FOREIGN_FPSTATE);
+
+       barrier();
 }
 
+/*
+ * Invalidate any task's FPSIMD state that is present on this cpu.
+ * This function must be called with softirqs disabled.
+ */
 void fpsimd_flush_cpu_state(void)
 {
        __this_cpu_write(fpsimd_last_state.st, NULL);