static struct hl_cb *hl_cb_alloc(struct hl_device *hdev, u32 cb_size,
                                        int ctx_id, bool internal_cb)
 {
-       struct hl_cb *cb;
+       struct hl_cb *cb = NULL;
        u32 cb_offset;
        void *p;
 
         * the kernel's copy. Hence, we must never sleep in this code section
         * and must use GFP_ATOMIC for all memory allocations.
         */
-       if (ctx_id == HL_KERNEL_ASID_ID)
+       if (ctx_id == HL_KERNEL_ASID_ID && !hdev->disabled)
                cb = kzalloc(sizeof(*cb), GFP_ATOMIC);
-       else
+
+       if (!cb)
                cb = kzalloc(sizeof(*cb), GFP_KERNEL);
 
        if (!cb)
        } else if (ctx_id == HL_KERNEL_ASID_ID) {
                p = hdev->asic_funcs->asic_dma_alloc_coherent(hdev, cb_size,
                                                &cb->bus_address, GFP_ATOMIC);
+               if (!p)
+                       p = hdev->asic_funcs->asic_dma_alloc_coherent(hdev,
+                                       cb_size, &cb->bus_address, GFP_KERNEL);
        } else {
                p = hdev->asic_funcs->asic_dma_alloc_coherent(hdev, cb_size,
                                                &cb->bus_address,
 
        spin_lock(&mgr->cb_lock);
        rc = idr_alloc(&mgr->cb_handles, cb, 1, 0, GFP_ATOMIC);
+       if (rc < 0)
+               rc = idr_alloc(&mgr->cb_handles, cb, 1, 0, GFP_KERNEL);
        spin_unlock(&mgr->cb_lock);
 
        if (rc < 0) {
 
        cntr = &hdev->aggregated_cs_counters;
 
        cs = kzalloc(sizeof(*cs), GFP_ATOMIC);
+       if (!cs)
+               cs = kzalloc(sizeof(*cs), GFP_KERNEL);
+
        if (!cs) {
                atomic64_inc(&ctx->cs_counters.out_of_mem_drop_cnt);
                atomic64_inc(&cntr->out_of_mem_drop_cnt);
        spin_lock_init(&cs->job_lock);
 
        cs_cmpl = kmalloc(sizeof(*cs_cmpl), GFP_ATOMIC);
+       if (!cs_cmpl)
+               cs_cmpl = kmalloc(sizeof(*cs_cmpl), GFP_KERNEL);
+
        if (!cs_cmpl) {
                atomic64_inc(&ctx->cs_counters.out_of_mem_drop_cnt);
                atomic64_inc(&cntr->out_of_mem_drop_cnt);
 
        cs->jobs_in_queue_cnt = kcalloc(hdev->asic_prop.max_queues,
                        sizeof(*cs->jobs_in_queue_cnt), GFP_ATOMIC);
+       if (!cs->jobs_in_queue_cnt)
+               cs->jobs_in_queue_cnt = kcalloc(hdev->asic_prop.max_queues,
+                               sizeof(*cs->jobs_in_queue_cnt), GFP_KERNEL);
+
        if (!cs->jobs_in_queue_cnt) {
                atomic64_inc(&ctx->cs_counters.out_of_mem_drop_cnt);
                atomic64_inc(&cntr->out_of_mem_drop_cnt);
        struct hl_cs_job *job;
 
        job = kzalloc(sizeof(*job), GFP_ATOMIC);
+       if (!job)
+               job = kzalloc(sizeof(*job), GFP_KERNEL);
+
        if (!job)
                return NULL;
 
 
        *cs_chunk_array = kmalloc_array(num_chunks, sizeof(**cs_chunk_array),
                                        GFP_ATOMIC);
+       if (!*cs_chunk_array)
+               *cs_chunk_array = kmalloc_array(num_chunks,
+                                       sizeof(**cs_chunk_array), GFP_KERNEL);
        if (!*cs_chunk_array) {
                atomic64_inc(&ctx->cs_counters.out_of_mem_drop_cnt);
                atomic64_inc(&hdev->aggregated_cs_counters.out_of_mem_drop_cnt);
        signal_seq_arr = kmalloc_array(signal_seq_arr_len,
                                        sizeof(*signal_seq_arr),
                                        GFP_ATOMIC);
+       if (!signal_seq_arr)
+               signal_seq_arr = kmalloc_array(signal_seq_arr_len,
+                                       sizeof(*signal_seq_arr),
+                                       GFP_KERNEL);
        if (!signal_seq_arr) {
                atomic64_inc(&ctx->cs_counters.out_of_mem_drop_cnt);
                atomic64_inc(&hdev->aggregated_cs_counters.out_of_mem_drop_cnt);
 
        hl_ctx_get(hdev, ctx);
 
-       pend = kmalloc(sizeof(*pend), GFP_ATOMIC);
+       pend = kmalloc(sizeof(*pend), GFP_KERNEL);
        if (!pend) {
                hl_ctx_put(ctx);
                return -ENOMEM;
 
        if (hdev->asic_prop.completion_queues_count) {
                hdev->cq_wq = kcalloc(hdev->asic_prop.completion_queues_count,
                                sizeof(*hdev->cq_wq),
-                               GFP_ATOMIC);
+                               GFP_KERNEL);
                if (!hdev->cq_wq) {
                        rc = -ENOMEM;
                        goto asid_fini;
 
 
        spin_lock(&vm->idr_lock);
        handle = idr_alloc(&vm->phys_pg_pack_handles, phys_pg_pack, 1, 0,
-                               GFP_ATOMIC);
+                               GFP_KERNEL);
        spin_unlock(&vm->idr_lock);
 
        if (handle < 0) {
 
        rc = sg_alloc_table_from_pages(userptr->sgt,
                                       userptr->pages,
-                                      npages, offset, size, GFP_ATOMIC);
+                                      npages, offset, size, GFP_KERNEL);
        if (rc < 0) {
                dev_err(hdev->dev, "failed to create SG table from pages\n");
                goto put_pages;
                return -EINVAL;
        }
 
-       /*
-        * This function can be called also from data path, hence use atomic
-        * always as it is not a big allocation.
-        */
-       userptr->sgt = kzalloc(sizeof(*userptr->sgt), GFP_ATOMIC);
+       userptr->sgt = kzalloc(sizeof(*userptr->sgt), GFP_KERNEL);
        if (!userptr->sgt)
                return -ENOMEM;
 
 
                        parser->job_userptr_list, &userptr))
                goto already_pinned;
 
-       userptr = kzalloc(sizeof(*userptr), GFP_ATOMIC);
+       userptr = kzalloc(sizeof(*userptr), GFP_KERNEL);
        if (!userptr)
                return -ENOMEM;
 
 
                        parser->job_userptr_list, &userptr))
                goto already_pinned;
 
-       userptr = kzalloc(sizeof(*userptr), GFP_ATOMIC);
+       userptr = kzalloc(sizeof(*userptr), GFP_KERNEL);
        if (!userptr)
                return -ENOMEM;