PCI: Expose PCIe Resizable BAR support via sysfs
authorAlex Williamson <alex.williamson@redhat.com>
Fri, 16 Sep 2022 20:44:48 +0000 (14:44 -0600)
committerBjorn Helgaas <bhelgaas@google.com>
Wed, 5 Oct 2022 17:21:02 +0000 (12:21 -0500)
Add a simple sysfs interface to Resizable BAR support, largely for the
purposes of assigning such devices to a VM through VFIO.  Resizable BARs
present a difficult feature to expose to a VM through emulation, as
resizing a BAR is done on the host.  It can fail, and often does, but we
have no means via emulation of a PCIe REBAR capability to handle the error
cases.

A vfio-pci specific ioctl interface is also cumbersome as there are often
multiple devices within the same bridge aperture and handling them is a
challenge.  In the interface proposed here, expanding a BAR potentially
requires such devices to be soft-removed during the resize operation and
rescanned after, in order for all the necessary resources to be released.
A pci-sysfs interface is also more universal than a vfio specific
interface.

Please see the ABI documentation update for usage.

Link: https://lore.kernel.org/r/166336088796.3597940.14973499936692558556.stgit@omen
Signed-off-by: Alex Williamson <alex.williamson@redhat.com>
Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
Reviewed-by: Christian König <christian.koenig@amd.com>
Cc: Krzysztof Wilczyński <kw@linux.com>
Documentation/ABI/testing/sysfs-bus-pci
drivers/pci/pci-sysfs.c

index 6fc2c2efe8ab2655c9ce697865a45a0aaf1ec798..840727fc75dcf90347455225ca1df58dc80400ea 100644 (file)
@@ -457,3 +457,36 @@ Description:
 
                The file is writable if the PF is bound to a driver that
                implements ->sriov_set_msix_vec_count().
+
+What:          /sys/bus/pci/devices/.../resourceN_resize
+Date:          September 2022
+Contact:       Alex Williamson <alex.williamson@redhat.com>
+Description:
+               These files provide an interface to PCIe Resizable BAR support.
+               A file is created for each BAR resource (N) supported by the
+               PCIe Resizable BAR extended capability of the device.  Reading
+               each file exposes the bitmap of available resource sizes:
+
+               # cat resource1_resize
+               00000000000001c0
+
+               The bitmap represents supported resource sizes for the BAR,
+               where bit0 = 1MB, bit1 = 2MB, bit2 = 4MB, etc.  In the above
+               example the device supports 64MB, 128MB, and 256MB BAR sizes.
+
+               When writing the file, the user provides the bit position of
+               the desired resource size, for example:
+
+               # echo 7 > resource1_resize
+
+               This indicates to set the size value corresponding to bit 7,
+               128MB.  The resulting size is 2 ^ (bit# + 20).  This definition
+               matches the PCIe specification of this capability.
+
+               In order to make use of resource resizing, all PCI drivers must
+               be unbound from the device and peer devices under the same
+               parent bridge may need to be soft removed.  In the case of
+               VGA devices, writing a resize value will remove low level
+               console drivers from the device.  Raw users of pci-sysfs
+               resourceN attributes must be terminated prior to resizing.
+               Success of the resizing operation is not guaranteed.
index fc804e08e3cb59672b7bc851cac016a864dfff0a..0a2eeb82cebde8149c9c9b46bf2d72dfbae132ca 100644 (file)
@@ -28,6 +28,7 @@
 #include <linux/pm_runtime.h>
 #include <linux/msi.h>
 #include <linux/of.h>
+#include <linux/aperture.h>
 #include "pci.h"
 
 static int sysfs_initialized;  /* = 0 */
@@ -1373,6 +1374,112 @@ static const struct attribute_group pci_dev_reset_attr_group = {
        .is_visible = pci_dev_reset_attr_is_visible,
 };
 
+#define pci_dev_resource_resize_attr(n)                                        \
+static ssize_t resource##n##_resize_show(struct device *dev,           \
+                                        struct device_attribute *attr, \
+                                        char * buf)                    \
+{                                                                      \
+       struct pci_dev *pdev = to_pci_dev(dev);                         \
+       ssize_t ret;                                                    \
+                                                                       \
+       pci_config_pm_runtime_get(pdev);                                \
+                                                                       \
+       ret = sysfs_emit(buf, "%016llx\n",                              \
+                        (u64)pci_rebar_get_possible_sizes(pdev, n));   \
+                                                                       \
+       pci_config_pm_runtime_put(pdev);                                \
+                                                                       \
+       return ret;                                                     \
+}                                                                      \
+                                                                       \
+static ssize_t resource##n##_resize_store(struct device *dev,          \
+                                         struct device_attribute *attr,\
+                                         const char *buf, size_t count)\
+{                                                                      \
+       struct pci_dev *pdev = to_pci_dev(dev);                         \
+       unsigned long size, flags;                                      \
+       int ret, i;                                                     \
+       u16 cmd;                                                        \
+                                                                       \
+       if (kstrtoul(buf, 0, &size) < 0)                                \
+               return -EINVAL;                                         \
+                                                                       \
+       device_lock(dev);                                               \
+       if (dev->driver) {                                              \
+               ret = -EBUSY;                                           \
+               goto unlock;                                            \
+       }                                                               \
+                                                                       \
+       pci_config_pm_runtime_get(pdev);                                \
+                                                                       \
+       if ((pdev->class >> 8) == PCI_CLASS_DISPLAY_VGA) {              \
+               ret = aperture_remove_conflicting_pci_devices(pdev,     \
+                                               "resourceN_resize");    \
+               if (ret)                                                \
+                       goto pm_put;                                    \
+       }                                                               \
+                                                                       \
+       pci_read_config_word(pdev, PCI_COMMAND, &cmd);                  \
+       pci_write_config_word(pdev, PCI_COMMAND,                        \
+                             cmd & ~PCI_COMMAND_MEMORY);               \
+                                                                       \
+       flags = pci_resource_flags(pdev, n);                            \
+                                                                       \
+       pci_remove_resource_files(pdev);                                \
+                                                                       \
+       for (i = 0; i < PCI_STD_NUM_BARS; i++) {                        \
+               if (pci_resource_len(pdev, i) &&                        \
+                   pci_resource_flags(pdev, i) == flags)               \
+                       pci_release_resource(pdev, i);                  \
+       }                                                               \
+                                                                       \
+       ret = pci_resize_resource(pdev, n, size);                       \
+                                                                       \
+       pci_assign_unassigned_bus_resources(pdev->bus);                 \
+                                                                       \
+       if (pci_create_resource_files(pdev))                            \
+               pci_warn(pdev, "Failed to recreate resource files after BAR resizing\n");\
+                                                                       \
+       pci_write_config_word(pdev, PCI_COMMAND, cmd);                  \
+pm_put:                                                                        \
+       pci_config_pm_runtime_put(pdev);                                \
+unlock:                                                                        \
+       device_unlock(dev);                                             \
+                                                                       \
+       return ret ? ret : count;                                       \
+}                                                                      \
+static DEVICE_ATTR_RW(resource##n##_resize)
+
+pci_dev_resource_resize_attr(0);
+pci_dev_resource_resize_attr(1);
+pci_dev_resource_resize_attr(2);
+pci_dev_resource_resize_attr(3);
+pci_dev_resource_resize_attr(4);
+pci_dev_resource_resize_attr(5);
+
+static struct attribute *resource_resize_attrs[] = {
+       &dev_attr_resource0_resize.attr,
+       &dev_attr_resource1_resize.attr,
+       &dev_attr_resource2_resize.attr,
+       &dev_attr_resource3_resize.attr,
+       &dev_attr_resource4_resize.attr,
+       &dev_attr_resource5_resize.attr,
+       NULL,
+};
+
+static umode_t resource_resize_is_visible(struct kobject *kobj,
+                                         struct attribute *a, int n)
+{
+       struct pci_dev *pdev = to_pci_dev(kobj_to_dev(kobj));
+
+       return pci_rebar_get_current_size(pdev, n) < 0 ? 0 : a->mode;
+}
+
+static const struct attribute_group pci_dev_resource_resize_group = {
+       .attrs = resource_resize_attrs,
+       .is_visible = resource_resize_is_visible,
+};
+
 int __must_check pci_create_sysfs_dev_files(struct pci_dev *pdev)
 {
        if (!sysfs_initialized)
@@ -1494,6 +1601,7 @@ const struct attribute_group *pci_dev_groups[] = {
 #ifdef CONFIG_ACPI
        &pci_dev_acpi_attr_group,
 #endif
+       &pci_dev_resource_resize_group,
        NULL,
 };