x86/PCI: Add kernel cmdline options to use/ignore E820 reserved regions
authorHans de Goede <hdegoede@redhat.com>
Thu, 19 May 2022 15:21:48 +0000 (17:21 +0200)
committerBjorn Helgaas <bhelgaas@google.com>
Thu, 19 May 2022 19:26:55 +0000 (14:26 -0500)
Some firmware supplies PCI host bridge _CRS that includes address space
unusable by PCI devices, e.g., space occupied by host bridge registers or
used by hidden PCI devices.

To avoid this unusable space, Linux currently excludes E820 reserved
regions from _CRS windows; see 4dc2287c1805 ("x86: avoid E820 regions when
allocating address space").

However, this use of E820 reserved regions to clip things out of _CRS is
not supported by ACPI, UEFI, or PCI Firmware specs, and some systems have
E820 reserved regions that cover the entire memory window from _CRS.
4dc2287c1805 clips the entire window, leaving no space for hot-added or
uninitialized PCI devices.

For example, from a Lenovo IdeaPad 3 15IIL 81WE:

  BIOS-e820: [mem 0x4bc50000-0xcfffffff] reserved
  pci_bus 0000:00: root bus resource [mem 0x65400000-0xbfffffff window]
  pci 0000:00:15.0: BAR 0: [mem 0x00000000-0x00000fff 64bit]
  pci 0000:00:15.0: BAR 0: no space for [mem size 0x00001000 64bit]

Future patches will add quirks to enable/disable E820 clipping
automatically.

Add a "pci=no_e820" kernel command line option to disable clipping with
E820 reserved regions.  Also add a matching "pci=use_e820" option to enable
clipping with E820 reserved regions if that has been disabled by default by
further patches in this patch-set.

Both options taint the kernel because they are intended for debugging and
workaround purposes until a quirk can set them automatically.

[bhelgaas: commit log, add printk]
Link: https://bugzilla.redhat.com/show_bug.cgi?id=1868899
Link: https://lore.kernel.org/r/20220519152150.6135-2-hdegoede@redhat.com
Signed-off-by: Hans de Goede <hdegoede@redhat.com>
Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
Acked-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Cc: Benoit Grégoire <benoitg@coeus.ca>
Cc: Hui Wang <hui.wang@canonical.com>
Documentation/admin-guide/kernel-parameters.txt
arch/x86/include/asm/pci_x86.h
arch/x86/pci/acpi.c
arch/x86/pci/common.c

index 3f1cc5e317ed4a5ad001082c9c589b6008f68db9..2477b639d5c4f06a8d081418c446cdf04f0e92c5 100644 (file)
                                please report a bug.
                nocrs           [X86] Ignore PCI host bridge windows from ACPI.
                                If you need to use this, please report a bug.
+               use_e820        [X86] Use E820 reservations to exclude parts of
+                               PCI host bridge windows. This is a workaround
+                               for BIOS defects in host bridge _CRS methods.
+                               If you need to use this, please report a bug to
+                               <linux-pci@vger.kernel.org>.
+               no_e820         [X86] Ignore E820 reservations for PCI host
+                               bridge windows. This is the default on modern
+                               hardware. If you need to use this, please report
+                               a bug to <linux-pci@vger.kernel.org>.
                routeirq        Do IRQ routing for all PCI devices.
                                This is normally done in pci_enable_device(),
                                so this option is a temporary workaround
index a0627dfae5412a0b01fb57804fa2e6db454a7134..ce3fd3311772b5a504690c167fc2f4fb50b8713b 100644 (file)
@@ -42,6 +42,8 @@ do {                                          \
 #define PCI_ROOT_NO_CRS                0x100000
 #define PCI_NOASSIGN_BARS      0x200000
 #define PCI_BIG_ROOT_WINDOW    0x400000
+#define PCI_USE_E820           0x800000
+#define PCI_NO_E820            0x1000000
 
 extern unsigned int pci_probe;
 extern unsigned long pirq_table_addr;
index 562c81a51ea02022718831e52bc581fe4b384d05..c61c815efedb2df7f1ad127be999726e8f3f73ec 100644 (file)
@@ -20,6 +20,7 @@ struct pci_root_info {
 #endif
 };
 
+static bool pci_use_e820 = true;
 static bool pci_use_crs = true;
 static bool pci_ignore_seg;
 
@@ -161,6 +162,17 @@ void __init pci_acpi_crs_quirks(void)
               "if necessary, use \"pci=%s\" and report a bug\n",
               pci_use_crs ? "Using" : "Ignoring",
               pci_use_crs ? "nocrs" : "use_crs");
+
+       /* "pci=use_e820"/"pci=no_e820" on the kernel cmdline takes precedence */
+       if (pci_probe & PCI_NO_E820)
+               pci_use_e820 = false;
+       else if (pci_probe & PCI_USE_E820)
+               pci_use_e820 = true;
+
+       printk(KERN_INFO "PCI: %s E820 reservations for host bridge windows\n",
+              pci_use_e820 ? "Using" : "Ignoring");
+       if (pci_probe & (PCI_NO_E820 | PCI_USE_E820))
+               printk(KERN_INFO "PCI: Please notify linux-pci@vger.kernel.org so future kernels can this automatically\n");
 }
 
 #ifdef CONFIG_PCI_MMCONFIG
@@ -301,8 +313,10 @@ static int pci_acpi_root_prepare_resources(struct acpi_pci_root_info *ci)
 
        status = acpi_pci_probe_root_resources(ci);
 
-       resource_list_for_each_entry(entry, &ci->resources)
-               remove_e820_regions(&device->dev, entry->res);
+       if (pci_use_e820) {
+               resource_list_for_each_entry(entry, &ci->resources)
+                       remove_e820_regions(&device->dev, entry->res);
+       }
 
        if (pci_use_crs) {
                resource_list_for_each_entry_safe(entry, tmp, &ci->resources)
index 9e1e6b8d8876313e2972b3634d337e6d266af504..ddb798603201ef79dd6c554d10b746a936ec2a84 100644 (file)
@@ -595,6 +595,14 @@ char *__init pcibios_setup(char *str)
        } else if (!strcmp(str, "nocrs")) {
                pci_probe |= PCI_ROOT_NO_CRS;
                return NULL;
+       } else if (!strcmp(str, "use_e820")) {
+               pci_probe |= PCI_USE_E820;
+               add_taint(TAINT_FIRMWARE_WORKAROUND, LOCKDEP_STILL_OK);
+               return NULL;
+       } else if (!strcmp(str, "no_e820")) {
+               pci_probe |= PCI_NO_E820;
+               add_taint(TAINT_FIRMWARE_WORKAROUND, LOCKDEP_STILL_OK);
+               return NULL;
 #ifdef CONFIG_PHYS_ADDR_T_64BIT
        } else if (!strcmp(str, "big_root_window")) {
                pci_probe |= PCI_BIG_ROOT_WINDOW;