static struct irq_chip lapic_controller;
 static struct irq_matrix *vector_matrix;
 #ifdef CONFIG_SMP
-static DEFINE_PER_CPU(struct hlist_head, cleanup_list);
+
+static void vector_cleanup_callback(struct timer_list *tmr);
+
+struct vector_cleanup {
+       struct hlist_head       head;
+       struct timer_list       timer;
+};
+
+static DEFINE_PER_CPU(struct vector_cleanup, vector_cleanup) = {
+       .head   = HLIST_HEAD_INIT,
+       .timer  = __TIMER_INITIALIZER(vector_cleanup_callback, TIMER_PINNED),
+};
 #endif
 
 void lock_vector_lock(void)
                this_cpu_write(vector_irq[vector], __setup_vector_irq(vector));
 }
 
+static void __vector_cleanup(struct vector_cleanup *cl, bool check_irr);
+
 void lapic_offline(void)
 {
+       struct vector_cleanup *cl = this_cpu_ptr(&vector_cleanup);
+
        lock_vector_lock();
+
+       /* In case the vector cleanup timer has not expired */
+       __vector_cleanup(cl, false);
+
        irq_matrix_offline(vector_matrix);
+       WARN_ON_ONCE(try_to_del_timer_sync(&cl->timer) < 0);
+       WARN_ON_ONCE(!hlist_empty(&cl->head));
+
        unlock_vector_lock();
 }
 
        apicd->move_in_progress = 0;
 }
 
-DEFINE_IDTENTRY_SYSVEC(sysvec_irq_move_cleanup)
+static void __vector_cleanup(struct vector_cleanup *cl, bool check_irr)
 {
-       struct hlist_head *clhead = this_cpu_ptr(&cleanup_list);
        struct apic_chip_data *apicd;
        struct hlist_node *tmp;
+       bool rearm = false;
 
-       ack_APIC_irq();
-       /* Prevent vectors vanishing under us */
-       raw_spin_lock(&vector_lock);
+       lockdep_assert_held(&vector_lock);
 
-       hlist_for_each_entry_safe(apicd, tmp, clhead, clist) {
+       hlist_for_each_entry_safe(apicd, tmp, &cl->head, clist) {
                unsigned int irr, vector = apicd->prev_vector;
 
                /*
                 * Paranoia: Check if the vector that needs to be cleaned
-                * up is registered at the APICs IRR. If so, then this is
-                * not the best time to clean it up. Clean it up in the
-                * next attempt by sending another IRQ_MOVE_CLEANUP_VECTOR
-                * to this CPU. IRQ_MOVE_CLEANUP_VECTOR is the lowest
-                * priority external vector, so on return from this
-                * interrupt the device interrupt will happen first.
+                * up is registered at the APICs IRR. That's clearly a
+                * hardware issue if the vector arrived on the old target
+                * _after_ interrupts were disabled above. Keep @apicd
+                * on the list and schedule the timer again to give the CPU
+                * a chance to handle the pending interrupt.
+                *
+                * Do not check IRR when called from lapic_offline(), because
+                * fixup_irqs() was just called to scan IRR for set bits and
+                * forward them to new destination CPUs via IPIs.
                 */
-               irr = apic_read(APIC_IRR + (vector / 32 * 0x10));
+               irr = check_irr ? apic_read(APIC_IRR + (vector / 32 * 0x10)) : 0;
                if (irr & (1U << (vector % 32))) {
-                       apic->send_IPI_self(IRQ_MOVE_CLEANUP_VECTOR);
+                       pr_warn_once("Moved interrupt pending in old target APIC %u\n", apicd->irq);
+                       rearm = true;
                        continue;
                }
                free_moved_vector(apicd);
        }
 
-       raw_spin_unlock(&vector_lock);
+       /*
+        * Must happen under vector_lock to make the timer_pending() check
+        * in __vector_schedule_cleanup() race free against the rearm here.
+        */
+       if (rearm)
+               mod_timer(&cl->timer, jiffies + 1);
+}
+
+static void vector_cleanup_callback(struct timer_list *tmr)
+{
+       struct vector_cleanup *cl = container_of(tmr, typeof(*cl), timer);
+
+       /* Prevent vectors vanishing under us */
+       raw_spin_lock_irq(&vector_lock);
+       __vector_cleanup(cl, true);
+       raw_spin_unlock_irq(&vector_lock);
 }
 
 static void __vector_schedule_cleanup(struct apic_chip_data *apicd)
 {
-       unsigned int cpu;
+       unsigned int cpu = apicd->prev_cpu;
 
        raw_spin_lock(&vector_lock);
        apicd->move_in_progress = 0;
-       cpu = apicd->prev_cpu;
        if (cpu_online(cpu)) {
-               hlist_add_head(&apicd->clist, per_cpu_ptr(&cleanup_list, cpu));
-               apic->send_IPI(cpu, IRQ_MOVE_CLEANUP_VECTOR);
+               struct vector_cleanup *cl = per_cpu_ptr(&vector_cleanup, cpu);
+
+               hlist_add_head(&apicd->clist, &cl->head);
+
+               /*
+                * The lockless timer_pending() check is safe here. If it
+                * returns true, then the callback will observe this new
+                * apic data in the hlist as everything is serialized by
+                * vector lock.
+                *
+                * If it returns false then the timer is either not armed
+                * or the other CPU executes the callback, which again
+                * would be blocked on vector lock. Rearming it in the
+                * latter case makes it fire for nothing.
+                *
+                * This is also safe against the callback rearming the timer
+                * because that's serialized via vector lock too.
+                */
+               if (!timer_pending(&cl->timer)) {
+                       cl->timer.expires = jiffies + 1;
+                       add_timer_on(&cl->timer, cpu);
+               }
        } else {
                apicd->prev_vector = 0;
        }