The synchronization of the AP with the control CPU is a SMP boot problem
and has nothing to do with cpu_init().
Open code cpu_init_secondary() in start_secondary() and move
wait_for_master_cpu() into the SMP boot code.
No functional change.
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Tested-by: Michael Kelley <mikelley@microsoft.com>
Tested-by: Oleksandr Natalenko <oleksandr@natalenko.name>
Tested-by: Helge Deller <deller@gmx.de> # parisc
Tested-by: Guilherme G. Piccoli <gpiccoli@igalia.com> # Steam Deck
Link: https://lore.kernel.org/r/20230512205255.981999763@linutronix.de
 extern void load_direct_gdt(int);
 extern void load_fixmap_gdt(int);
 extern void cpu_init(void);
-extern void cpu_init_secondary(void);
 extern void cpu_init_exception_handling(void);
 extern void cr4_init(void);
 
 
 #define dbg_restore_debug_regs()
 #endif /* ! CONFIG_KGDB */
 
-static void wait_for_master_cpu(int cpu)
-{
-#ifdef CONFIG_SMP
-       /*
-        * wait for ACK from master CPU before continuing
-        * with AP initialization
-        */
-       WARN_ON(cpumask_test_and_set_cpu(cpu, cpu_initialized_mask));
-       while (!cpumask_test_cpu(cpu, cpu_callout_mask))
-               cpu_relax();
-#endif
-}
-
 static inline void setup_getcpu(int cpu)
 {
        unsigned long cpudata = vdso_encode_cpunode(cpu, early_cpu_to_node(cpu));
        struct task_struct *cur = current;
        int cpu = raw_smp_processor_id();
 
-       wait_for_master_cpu(cpu);
-
        ucode_cpu_init(cpu);
 
 #ifdef CONFIG_NUMA
        load_fixmap_gdt(cpu);
 }
 
-#ifdef CONFIG_SMP
-void cpu_init_secondary(void)
-{
-       /*
-        * Relies on the BP having set-up the IDT tables, which are loaded
-        * on this CPU in cpu_init_exception_handling().
-        */
-       cpu_init_exception_handling();
-       cpu_init();
-}
-#endif
-
 #ifdef CONFIG_MICROCODE_LATE_LOADING
 /**
  * store_cpu_caps() - Store a snapshot of CPU capabilities
 
        cpu_data(smp_processor_id()).loops_per_jiffy = loops_per_jiffy;
 }
 
+static void wait_for_master_cpu(int cpu)
+{
+       /*
+        * Wait for release by control CPU before continuing with AP
+        * initialization.
+        */
+       WARN_ON(cpumask_test_and_set_cpu(cpu, cpu_initialized_mask));
+       while (!cpumask_test_cpu(cpu, cpu_callout_mask))
+               cpu_relax();
+}
+
 /*
  * Activate a secondary processor.
  */
        load_cr3(swapper_pg_dir);
        __flush_tlb_all();
 #endif
+       cpu_init_exception_handling();
+
        /*
-        * Sync point with wait_cpu_initialized(). Before proceeding through
-        * cpu_init(), the AP will call wait_for_master_cpu() which sets its
-        * own bit in cpu_initialized_mask and then waits for the BSP to set
-        * its bit in cpu_callout_mask to release it.
+        * Sync point with wait_cpu_initialized(). Sets AP in
+        * cpu_initialized_mask and then waits for the control CPU
+        * to release it.
         */
-       cpu_init_secondary();
+       wait_for_master_cpu(raw_smp_processor_id());
+
+       cpu_init();
        rcu_cpu_starting(raw_smp_processor_id());
        x86_cpuinit.early_percpu_clock_init();