#define HL_CS_FLAGS_TYPE_MASK  (HL_CS_FLAGS_SIGNAL | HL_CS_FLAGS_WAIT | \
                        HL_CS_FLAGS_COLLECTIVE_WAIT | HL_CS_FLAGS_RESERVE_SIGNALS_ONLY | \
-                       HL_CS_FLAGS_UNRESERVE_SIGNALS_ONLY)
+                       HL_CS_FLAGS_UNRESERVE_SIGNALS_ONLY | HL_CS_FLAGS_ENGINE_CORE_COMMAND)
 
 
 #define MAX_TS_ITER_NUM 10
                return CS_RESERVE_SIGNALS;
        else if (cs_type_flags & HL_CS_FLAGS_UNRESERVE_SIGNALS_ONLY)
                return CS_UNRESERVE_SIGNALS;
+       else if (cs_type_flags & HL_CS_FLAGS_ENGINE_CORE_COMMAND)
+               return CS_TYPE_ENGINE_CORE;
        else
                return CS_TYPE_DEFAULT;
 }
        return rc;
 }
 
+static int cs_ioctl_engine_cores(struct hl_fpriv *hpriv, u64 engine_cores,
+                                               u32 num_engine_cores, u32 core_command)
+{
+       int rc;
+       struct hl_device *hdev = hpriv->hdev;
+       void __user *engine_cores_arr;
+       u32 *cores;
+
+       if (!num_engine_cores || num_engine_cores > hdev->asic_prop.num_engine_cores) {
+               dev_err(hdev->dev, "Number of engine cores %d is invalid\n", num_engine_cores);
+               return -EINVAL;
+       }
+
+       if (core_command != HL_ENGINE_CORE_RUN && core_command != HL_ENGINE_CORE_HALT) {
+               dev_err(hdev->dev, "Engine core command is invalid\n");
+               return -EINVAL;
+       }
+
+       engine_cores_arr = (void __user *) (uintptr_t) engine_cores;
+       cores = kmalloc_array(num_engine_cores, sizeof(u32), GFP_KERNEL);
+       if (!cores)
+               return -ENOMEM;
+
+       if (copy_from_user(cores, engine_cores_arr, num_engine_cores * sizeof(u32))) {
+               dev_err(hdev->dev, "Failed to copy core-ids array from user\n");
+               kfree(cores);
+               return -EFAULT;
+       }
+
+       rc = hdev->asic_funcs->set_engine_cores(hdev, cores, num_engine_cores, core_command);
+       kfree(cores);
+
+       return rc;
+}
+
 int hl_cs_ioctl(struct hl_fpriv *hpriv, void *data)
 {
        union hl_cs_args *args = data;
                rc = cs_ioctl_unreserve_signals(hpriv,
                                        args->in.encaps_sig_handle_id);
                break;
+       case CS_TYPE_ENGINE_CORE:
+               rc = cs_ioctl_engine_cores(hpriv, args->in.engine_cores,
+                               args->in.num_engine_cores, args->in.core_command);
+               break;
        default:
                rc = cs_ioctl_default(hpriv, chunks, num_chunks, &cs_seq,
                                                args->in.cs_flags,
 
        CS_TYPE_WAIT,
        CS_TYPE_COLLECTIVE_WAIT,
        CS_RESERVE_SIGNALS,
-       CS_UNRESERVE_SIGNALS
+       CS_UNRESERVE_SIGNALS,
+       CS_TYPE_ENGINE_CORE
 };
 
 /*
  *                                      which the property supports_user_set_page_size is true
  *                                      (i.e. the DRAM supports multiple page sizes), otherwise
  *                                      it will shall  be equal to dram_page_size.
+ * @num_engine_cores: number of engine cpu cores
  * @collective_first_sob: first sync object available for collective use
  * @collective_first_mon: first monitor available for collective use
  * @sync_stream_first_sob: first sync object available for sync stream use
        u32                             faulty_dram_cluster_map;
        u32                             xbar_edge_enabled_mask;
        u32                             device_mem_alloc_default_page_size;
+       u32                             num_engine_cores;
        u16                             collective_first_sob;
        u16                             collective_first_mon;
        u16                             sync_stream_first_sob;
  * @check_if_razwi_happened: check if there was a razwi due to RR violation.
  * @access_dev_mem: access device memory
  * @set_dram_bar_base: set the base of the DRAM BAR
+ * @set_engine_cores: set a config command to enigne cores
  */
 struct hl_asic_funcs {
        int (*early_init)(struct hl_device *hdev);
        int (*access_dev_mem)(struct hl_device *hdev, enum pci_region region_type,
                                u64 addr, u64 *val, enum debugfs_access_type acc_type);
        u64 (*set_dram_bar_base)(struct hl_device *hdev, u64 addr);
+       int (*set_engine_cores)(struct hl_device *hdev, u32 *core_ids,
+                                       u32 num_cores, u32 core_command);
 };
 
 
 
                prop->pmmu_huge.end_addr = VA_HOST_SPACE_HPAGE_END;
        }
 
+       prop->num_engine_cores = CPU_ID_MAX;
        prop->cfg_size = CFG_SIZE;
        prop->max_asid = MAX_ASID;
        prop->num_of_events = GAUDI2_EVENT_SIZE;
        gaudi2_stop_pcie_dec(hdev);
 }
 
-static void gaudi2_halt_arc(struct hl_device *hdev, u32 cpu_id)
+static void gaudi2_set_arc_running_mode(struct hl_device *hdev, u32 cpu_id, u32 run_mode)
 {
        u32 reg_base, reg_val;
 
        reg_base = gaudi2_arc_blocks_bases[cpu_id];
+       if (run_mode == HL_ENGINE_CORE_RUN)
+               reg_val = FIELD_PREP(ARC_FARM_ARC0_AUX_RUN_HALT_REQ_RUN_REQ_MASK, 1);
+       else
+               reg_val = FIELD_PREP(ARC_FARM_ARC0_AUX_RUN_HALT_REQ_HALT_REQ_MASK, 1);
 
-       /* Halt ARC */
-       reg_val = FIELD_PREP(ARC_FARM_ARC0_AUX_RUN_HALT_REQ_HALT_REQ_MASK, 1);
        WREG32(reg_base + ARC_HALT_REQ_OFFSET, reg_val);
 }
 
 
        for (arc_id = CPU_ID_SCHED_ARC0; arc_id < CPU_ID_MAX; arc_id++) {
                if (gaudi2_is_arc_enabled(hdev, arc_id))
-                       gaudi2_halt_arc(hdev, arc_id);
+                       gaudi2_set_arc_running_mode(hdev, arc_id, HL_ENGINE_CORE_HALT);
        }
 }
 
+static int gaudi2_verify_arc_running_mode(struct hl_device *hdev, u32 cpu_id, u32 run_mode)
+{
+       int rc;
+       u32 reg_base, val, ack_mask, timeout_usec = 100000;
+
+       if (hdev->pldm)
+               timeout_usec *= 100;
+
+       reg_base = gaudi2_arc_blocks_bases[cpu_id];
+       if (run_mode == HL_ENGINE_CORE_RUN)
+               ack_mask = ARC_FARM_ARC0_AUX_RUN_HALT_ACK_RUN_ACK_MASK;
+       else
+               ack_mask = ARC_FARM_ARC0_AUX_RUN_HALT_ACK_HALT_ACK_MASK;
+
+       rc = hl_poll_timeout(hdev, reg_base + ARC_HALT_ACK_OFFSET,
+                               val, ((val & ack_mask) == ack_mask),
+                               1000, timeout_usec);
+
+       if (!rc) {
+               /* Clear */
+               val = FIELD_PREP(ARC_FARM_ARC0_AUX_RUN_HALT_REQ_RUN_REQ_MASK, 0);
+               WREG32(reg_base + ARC_HALT_REQ_OFFSET, val);
+       }
+
+       return rc;
+}
+
 static void gaudi2_reset_arcs(struct hl_device *hdev)
 {
        struct gaudi2_device *gaudi2 = hdev->asic_specific;
 
        queue_id = GAUDI2_QUEUE_ID_NIC_0_0;
 
-       for (i = 0 ; i < NIC_NUMBER_OF_ENGINES ; i++, queue_id += NUM_OF_PQ_PER_QMAN)
+       for (i = 0 ; i < NIC_NUMBER_OF_ENGINES ; i++, queue_id += NUM_OF_PQ_PER_QMAN) {
+               if (!(hdev->nic_ports_mask & BIT(i)))
+                       continue;
+
                gaudi2_qman_manual_flush_common(hdev, queue_id);
+       }
+}
+
+static int gaudi2_set_engine_cores(struct hl_device *hdev, u32 *core_ids,
+                                       u32 num_cores, u32 core_command)
+{
+       int i, rc;
+
+
+       for (i = 0 ; i < num_cores ; i++) {
+               if (gaudi2_is_arc_enabled(hdev, core_ids[i]))
+                       gaudi2_set_arc_running_mode(hdev, core_ids[i], core_command);
+       }
+
+       for (i = 0 ; i < num_cores ; i++) {
+               if (gaudi2_is_arc_enabled(hdev, core_ids[i])) {
+                       rc = gaudi2_verify_arc_running_mode(hdev, core_ids[i], core_command);
+
+                       if (rc) {
+                               dev_err(hdev->dev, "failed to %s arc: %d\n",
+                                       (core_command == HL_ENGINE_CORE_HALT) ?
+                                       "HALT" : "RUN", core_ids[i]);
+                               return -1;
+                       }
+               }
+       }
+
+       return 0;
 }
 
 static void gaudi2_halt_engines(struct hl_device *hdev, bool hard_reset, bool fw_reset)
        .mmu_get_real_page_size = gaudi2_mmu_get_real_page_size,
        .access_dev_mem = hl_access_dev_mem,
        .set_dram_bar_base = gaudi2_set_hbm_bar_base,
+       .set_engine_cores = gaudi2_set_engine_cores,
 };
 
 void gaudi2_set_asic_funcs(struct hl_device *hdev)
 
 #define SFT_IF_RTR_OFFSET      (mmSFT0_HBW_RTR_IF1_RTR_H3_BASE - mmSFT0_HBW_RTR_IF0_RTR_H3_BASE)
 
 #define ARC_HALT_REQ_OFFSET    (mmARC_FARM_ARC0_AUX_RUN_HALT_REQ - mmARC_FARM_ARC0_AUX_BASE)
+#define ARC_HALT_ACK_OFFSET    (mmARC_FARM_ARC0_AUX_RUN_HALT_ACK - mmARC_FARM_ARC0_AUX_BASE)
 
 #define ARC_REGION_CFG_OFFSET(region) \
        (mmARC_FARM_ARC0_AUX_ARC_REGION_CFG_0 + (region * 4) - mmARC_FARM_ARC0_AUX_BASE)
 
 #define HL_CS_FLAGS_RESERVE_SIGNALS_ONLY       0x1000
 #define HL_CS_FLAGS_UNRESERVE_SIGNALS_ONLY     0x2000
 
+/*
+ * The engine cores CS is merged into the existing CS ioctls.
+ * Use it to control the engine cores mode.
+ */
+#define HL_CS_FLAGS_ENGINE_CORE_COMMAND                0x4000
+
 #define HL_CS_STATUS_SUCCESS           0
 
 #define HL_MAX_JOBS_PER_CS             512
 
+/* HL_ENGINE_CORE_ values
+ *
+ * HL_ENGINE_CORE_HALT: engine core halt
+ * HL_ENGINE_CORE_RUN:  engine core run
+ */
+#define HL_ENGINE_CORE_HALT    (1 << 0)
+#define HL_ENGINE_CORE_RUN     (1 << 1)
+
 struct hl_cs_in {
 
-       /* this holds address of array of hl_cs_chunk for restore phase */
-       __u64 chunks_restore;
+       union {
+               struct {
+                       /* this holds address of array of hl_cs_chunk for restore phase */
+                       __u64 chunks_restore;
 
-       /* holds address of array of hl_cs_chunk for execution phase */
-       __u64 chunks_execute;
+                       /* holds address of array of hl_cs_chunk for execution phase */
+                       __u64 chunks_execute;
+               };
+
+               /* Valid only when HL_CS_FLAGS_ENGINE_CORE_COMMAND is set */
+               struct {
+                       /* this holds address of array of uint32 for engine_cores */
+                       __u64 engine_cores;
+
+                       /* number of engine cores in engine_cores array */
+                       __u32 num_engine_cores;
+
+                       /* the core command to be sent towards engine cores */
+                       __u32 core_command;
+               };
+       };
 
        union {
                /*