#ifndef __ASSEMBLY__
 
+#include <linux/cpumask.h>
 #include <linux/linkage.h>
 #include <linux/irqflags.h>
 #include <asm/cpu.h>
  * Give up the time slice of the virtual PU.
  */
 #define cpu_relax_yield cpu_relax_yield
-void cpu_relax_yield(void);
+void cpu_relax_yield(const struct cpumask *cpumask);
 
 #define cpu_relax() barrier()
 
 
 };
 
 static DEFINE_PER_CPU(struct cpu_info, cpu_info);
+static DEFINE_PER_CPU(int, cpu_relax_retry);
 
 static bool machine_has_cpu_mhz;
 
                on_each_cpu(update_cpu_mhz, NULL, 0);
 }
 
-void notrace cpu_relax_yield(void)
+void notrace cpu_relax_yield(const struct cpumask *cpumask)
 {
-       if (!smp_cpu_mtid && MACHINE_HAS_DIAG44) {
-               diag_stat_inc(DIAG_STAT_X044);
-               asm volatile("diag 0,0,0x44");
+       int cpu, this_cpu;
+
+       this_cpu = smp_processor_id();
+       if (__this_cpu_inc_return(cpu_relax_retry) >= spin_retry) {
+               __this_cpu_write(cpu_relax_retry, 0);
+               cpu = cpumask_next_wrap(this_cpu, cpumask, this_cpu, false);
+               if (cpu >= nr_cpu_ids)
+                       return;
+               if (arch_vcpu_is_preempted(cpu))
+                       smp_yield_cpu(cpu);
        }
-       barrier();
 }
 EXPORT_SYMBOL(cpu_relax_yield);
 
 
                diag_stat_inc_norecursion(DIAG_STAT_X09C);
                asm volatile("diag %0,0,0x9c"
                             : : "d" (pcpu_devices[cpu].address));
-       } else if (MACHINE_HAS_DIAG44) {
+       } else if (MACHINE_HAS_DIAG44 && !smp_cpu_mtid) {
                diag_stat_inc_norecursion(DIAG_STAT_X044);
                asm volatile("diag 0,0,0x44");
        }
 
 #endif
 
 #ifndef cpu_relax_yield
-#define cpu_relax_yield() cpu_relax()
+#define cpu_relax_yield(cpumask) cpu_relax()
 #endif
 
 extern int yield_to(struct task_struct *p, bool preempt);
 
        struct multi_stop_data *msdata = data;
        enum multi_stop_state curstate = MULTI_STOP_NONE;
        int cpu = smp_processor_id(), err = 0;
+       const struct cpumask *cpumask;
        unsigned long flags;
        bool is_active;
 
         */
        local_save_flags(flags);
 
-       if (!msdata->active_cpus)
-               is_active = cpu == cpumask_first(cpu_online_mask);
-       else
-               is_active = cpumask_test_cpu(cpu, msdata->active_cpus);
+       if (!msdata->active_cpus) {
+               cpumask = cpu_online_mask;
+               is_active = cpu == cpumask_first(cpumask);
+       } else {
+               cpumask = msdata->active_cpus;
+               is_active = cpumask_test_cpu(cpu, cpumask);
+       }
 
        /* Simple state machine */
        do {
                /* Chill out and ensure we re-read multi_stop_state. */
-               cpu_relax_yield();
+               cpu_relax_yield(cpumask);
                if (msdata->state != curstate) {
                        curstate = msdata->state;
                        switch (curstate) {