cpu_smt_control != CPU_SMT_NOT_SUPPORTED;
 }
 EXPORT_SYMBOL_GPL(cpu_smt_possible);
+
+static inline bool cpuhp_smt_aware(void)
+{
+       return topology_smt_supported();
+}
+
+static inline const struct cpumask *cpuhp_get_primary_thread_mask(void)
+{
+       return cpu_primary_thread_mask;
+}
 #else
 static inline bool cpu_smt_allowed(unsigned int cpu) { return true; }
+static inline bool cpuhp_smt_aware(void) { return false; }
+static inline const struct cpumask *cpuhp_get_primary_thread_mask(void)
+{
+       return cpu_present_mask;
+}
 #endif
 
 static inline enum cpuhp_state
        return 0;
 }
 
-void __init bringup_nonboot_cpus(unsigned int setup_max_cpus)
+static void __init cpuhp_bringup_mask(const struct cpumask *mask, unsigned int ncpus,
+                                     enum cpuhp_state target)
 {
        unsigned int cpu;
 
-       for_each_present_cpu(cpu) {
-               if (num_online_cpus() >= setup_max_cpus)
+       for_each_cpu(cpu, mask) {
+               struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu);
+
+               if (!--ncpus)
                        break;
-               if (!cpu_online(cpu))
-                       cpu_up(cpu, CPUHP_ONLINE);
+
+               if (cpu_up(cpu, target) && can_rollback_cpu(st)) {
+                       /*
+                        * If this failed then cpu_up() might have only
+                        * rolled back to CPUHP_BP_KICK_AP for the final
+                        * online. Clean it up. NOOP if already rolled back.
+                        */
+                       WARN_ON(cpuhp_invoke_callback_range(false, cpu, st, CPUHP_OFFLINE));
+               }
        }
 }
 
+#ifdef CONFIG_HOTPLUG_PARALLEL
+static bool __cpuhp_parallel_bringup __ro_after_init = true;
+
+static int __init parallel_bringup_parse_param(char *arg)
+{
+       return kstrtobool(arg, &__cpuhp_parallel_bringup);
+}
+early_param("cpuhp.parallel", parallel_bringup_parse_param);
+
+/*
+ * On architectures which have enabled parallel bringup this invokes all BP
+ * prepare states for each of the to be onlined APs first. The last state
+ * sends the startup IPI to the APs. The APs proceed through the low level
+ * bringup code in parallel and then wait for the control CPU to release
+ * them one by one for the final onlining procedure.
+ *
+ * This avoids waiting for each AP to respond to the startup IPI in
+ * CPUHP_BRINGUP_CPU.
+ */
+static bool __init cpuhp_bringup_cpus_parallel(unsigned int ncpus)
+{
+       const struct cpumask *mask = cpu_present_mask;
+
+       if (__cpuhp_parallel_bringup)
+               __cpuhp_parallel_bringup = arch_cpuhp_init_parallel_bringup();
+       if (!__cpuhp_parallel_bringup)
+               return false;
+
+       if (cpuhp_smt_aware()) {
+               const struct cpumask *pmask = cpuhp_get_primary_thread_mask();
+               static struct cpumask tmp_mask __initdata;
+
+               /*
+                * X86 requires to prevent that SMT siblings stopped while
+                * the primary thread does a microcode update for various
+                * reasons. Bring the primary threads up first.
+                */
+               cpumask_and(&tmp_mask, mask, pmask);
+               cpuhp_bringup_mask(&tmp_mask, ncpus, CPUHP_BP_KICK_AP);
+               cpuhp_bringup_mask(&tmp_mask, ncpus, CPUHP_ONLINE);
+               /* Account for the online CPUs */
+               ncpus -= num_online_cpus();
+               if (!ncpus)
+                       return true;
+               /* Create the mask for secondary CPUs */
+               cpumask_andnot(&tmp_mask, mask, pmask);
+               mask = &tmp_mask;
+       }
+
+       /* Bring the not-yet started CPUs up */
+       cpuhp_bringup_mask(mask, ncpus, CPUHP_BP_KICK_AP);
+       cpuhp_bringup_mask(mask, ncpus, CPUHP_ONLINE);
+       return true;
+}
+#else
+static inline bool cpuhp_bringup_cpus_parallel(unsigned int ncpus) { return false; }
+#endif /* CONFIG_HOTPLUG_PARALLEL */
+
+void __init bringup_nonboot_cpus(unsigned int setup_max_cpus)
+{
+       /* Try parallel bringup optimization if enabled */
+       if (cpuhp_bringup_cpus_parallel(setup_max_cpus))
+               return;
+
+       /* Full per CPU serialized bringup */
+       cpuhp_bringup_mask(cpu_present_mask, setup_max_cpus, CPUHP_ONLINE);
+}
+
 #ifdef CONFIG_PM_SLEEP_SMP
 static cpumask_var_t frozen_cpus;