if (wr->opcode == IB_WR_LSO) {
                struct ib_ud_wr *ud_wr = container_of(wr, struct ib_ud_wr, wr);
-               int size_of_inl_hdr_start = sizeof(eseg->inline_hdr_start);
+               int size_of_inl_hdr_start = sizeof(eseg->inline_hdr.start);
                u64 left, leftlen, copysz;
                void *pdata = ud_wr->header;
 
                left = ud_wr->hlen;
                eseg->mss = cpu_to_be16(ud_wr->mss);
-               eseg->inline_hdr_sz = cpu_to_be16(left);
+               eseg->inline_hdr.sz = cpu_to_be16(left);
 
                /*
                 * check if there is space till the end of queue, if yes,
                 * copy all in one shot, otherwise copy till the end of queue,
                 * rollback and than the copy the left
                 */
-               leftlen = qend - (void *)eseg->inline_hdr_start;
+               leftlen = qend - (void *)eseg->inline_hdr.start;
                copysz = min_t(u64, leftlen, left);
 
                memcpy(seg - size_of_inl_hdr_start, pdata, copysz);
 
        memset(wqe, 0, sizeof(*wqe));
 
        /* copy the inline part */
-       memcpy(eseg->inline_hdr_start, xdp->data, MLX5E_XDP_MIN_INLINE);
-       eseg->inline_hdr_sz = cpu_to_be16(MLX5E_XDP_MIN_INLINE);
+       memcpy(eseg->inline_hdr.start, xdp->data, MLX5E_XDP_MIN_INLINE);
+       eseg->inline_hdr.sz = cpu_to_be16(MLX5E_XDP_MIN_INLINE);
 
        dseg = (struct mlx5_wqe_data_seg *)cseg + (MLX5E_XDP_TX_DS_COUNT - 1);
 
 
        wi->num_bytes = num_bytes;
 
        if (skb_vlan_tag_present(skb)) {
-               mlx5e_insert_vlan(eseg->inline_hdr_start, skb, ihs, &skb_data,
+               mlx5e_insert_vlan(eseg->inline_hdr.start, skb, ihs, &skb_data,
                                  &skb_len);
                ihs += VLAN_HLEN;
        } else {
-               memcpy(eseg->inline_hdr_start, skb_data, ihs);
+               memcpy(eseg->inline_hdr.start, skb_data, ihs);
                mlx5e_tx_skb_pull_inline(&skb_data, &skb_len, ihs);
        }
 
-       eseg->inline_hdr_sz = cpu_to_be16(ihs);
+       eseg->inline_hdr.sz = cpu_to_be16(ihs);
 
        ds_cnt  = sizeof(*wqe) / MLX5_SEND_WQE_DS;
-       ds_cnt += DIV_ROUND_UP(ihs - sizeof(eseg->inline_hdr_start),
+       ds_cnt += DIV_ROUND_UP(ihs - sizeof(eseg->inline_hdr.start),
                               MLX5_SEND_WQE_DS);
        dseg    = (struct mlx5_wqe_data_seg *)cseg + ds_cnt;
 
 
        u8         lro_cap[0x1];
        u8         lro_psh_flag[0x1];
        u8         lro_time_stamp[0x1];
-       u8         reserved_at_5[0x3];
+       u8         reserved_at_5[0x2];
+       u8         wqe_vlan_insert[0x1];
        u8         self_lb_en_modifiable[0x1];
        u8         reserved_at_9[0x2];
        u8         max_lso_cap[0x5];
 
        MLX5_ETH_WQE_L4_CSUM            = 1 << 7,
 };
 
+enum {
+       MLX5_ETH_WQE_INSERT_VLAN        = 1 << 15,
+};
+
 struct mlx5_wqe_eth_seg {
        u8              rsvd0[4];
        u8              cs_flags;
        u8              rsvd1;
        __be16          mss;
        __be32          rsvd2;
-       __be16          inline_hdr_sz;
-       u8              inline_hdr_start[2];
+       union {
+               struct {
+                       __be16 sz;
+                       u8     start[2];
+               } inline_hdr;
+               struct {
+                       __be16 type;
+                       __be16 vlan_tci;
+               } insert;
+       };
 };
 
 struct mlx5_wqe_xrc_seg {