* @list: List entry, to attach to the padata lists.
  * @pd: Pointer to the internal control structure.
  * @cb_cpu: Callback cpu for serializatioon.
+ * @cpu: Cpu for parallelization.
  * @seq_nr: Sequence number of the parallelized data object.
  * @info: Used to pass information from the parallel to the serial function.
  * @parallel: Parallel execution function.
        struct list_head        list;
        struct parallel_data    *pd;
        int                     cb_cpu;
+       int                     cpu;
        int                     info;
        void                    (*parallel)(struct padata_priv *padata);
        void                    (*serial)(struct padata_priv *padata);
 
        padata->cb_cpu = cb_cpu;
 
        target_cpu = padata_cpu_hash(pd);
+       padata->cpu = target_cpu;
        queue = per_cpu_ptr(pd->pqueue, target_cpu);
 
        spin_lock(&queue->parallel.lock);
        int cpu;
        struct padata_parallel_queue *pqueue;
        struct parallel_data *pd;
+       int reorder_via_wq = 0;
 
        pd = padata->pd;
 
        cpu = get_cpu();
+
+       /* We need to run on the same CPU padata_do_parallel(.., padata, ..)
+        * was called on -- or, at least, enqueue the padata object into the
+        * correct per-cpu queue.
+        */
+       if (cpu != padata->cpu) {
+               reorder_via_wq = 1;
+               cpu = padata->cpu;
+       }
+
        pqueue = per_cpu_ptr(pd->pqueue, cpu);
 
        spin_lock(&pqueue->reorder.lock);
 
        put_cpu();
 
-       padata_reorder(pd);
+       /* If we're running on the wrong CPU, call padata_reorder() via a
+        * kernel worker.
+        */
+       if (reorder_via_wq)
+               queue_work_on(cpu, pd->pinst->wq, &pqueue->reorder_work);
+       else
+               padata_reorder(pd);
 }
 EXPORT_SYMBOL(padata_do_serial);