struct ib_pd *pd)
 {
        struct mlx5_ib_dev *dev = to_mdev(pd->device);
+       bool ro_pci_enabled = pcie_relaxed_ordering_enabled(dev->mdev->pdev);
 
        MLX5_SET(mkc, mkc, a, !!(acc & IB_ACCESS_REMOTE_ATOMIC));
        MLX5_SET(mkc, mkc, rw, !!(acc & IB_ACCESS_REMOTE_WRITE));
 
        if (MLX5_CAP_GEN(dev->mdev, relaxed_ordering_write))
                MLX5_SET(mkc, mkc, relaxed_ordering_write,
-                        !!(acc & IB_ACCESS_RELAXED_ORDERING));
+                        (acc & IB_ACCESS_RELAXED_ORDERING) && ro_pci_enabled);
        if (MLX5_CAP_GEN(dev->mdev, relaxed_ordering_read))
                MLX5_SET(mkc, mkc, relaxed_ordering_read,
-                        !!(acc & IB_ACCESS_RELAXED_ORDERING));
+                        (acc & IB_ACCESS_RELAXED_ORDERING) && ro_pci_enabled);
 
        MLX5_SET(mkc, mkc, pd, to_mpd(pd)->pdn);
        MLX5_SET(mkc, mkc, qpn, 0xffffff);
 
        MLX5_SET(mkc, mkc, access_mode_1_0, MLX5_MKC_ACCESS_MODE_PA);
        MLX5_SET(mkc, mkc, length64, 1);
-       set_mkc_access_pd_addr_fields(mkc, acc, 0, pd);
+       set_mkc_access_pd_addr_fields(mkc, acc | IB_ACCESS_RELAXED_ORDERING, 0,
+                                     pd);
 
        err = mlx5_ib_create_mkey(dev, &mr->mmkey, in, inlen);
        if (err)
        mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry);
 
        /* This is only used from the kernel, so setting the PD is OK. */
-       set_mkc_access_pd_addr_fields(mkc, 0, 0, pd);
+       set_mkc_access_pd_addr_fields(mkc, IB_ACCESS_RELAXED_ORDERING, 0, pd);
        MLX5_SET(mkc, mkc, free, 1);
        MLX5_SET(mkc, mkc, translations_octword_size, ndescs);
        MLX5_SET(mkc, mkc, access_mode_1_0, access_mode & 0x3);
 
                         enum ib_uverbs_advise_mr_advice advice, u32 flags,
                         struct ib_sge *sg_list, u32 num_sge,
                         struct uverbs_attr_bundle *attrs);
+
+       /*
+        * Kernel users should universally support relaxed ordering (RO), as
+        * they are designed to read data only after observing the CQE and use
+        * the DMA API correctly.
+        *
+        * Some drivers implicitly enable RO if platform supports it.
+        */
        int (*map_mr_sg)(struct ib_mr *mr, struct scatterlist *sg, int sg_nents,
                         unsigned int *sg_offset);
        int (*check_mr_status)(struct ib_mr *mr, u32 check_mask,