summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMark Brown <broonie@kernel.org>2022-08-04 15:58:19 +0100
committerMark Brown <broonie@kernel.org>2022-08-04 15:58:19 +0100
commit0d9ff2133e3969392b7a9fd594cf9f63abf54011 (patch)
tree97b5fe98b9200d55603c2450991c4dad5236cf0a
parent8e05604f86b8fe049cd138f59cd7e08465718b73 (diff)
parent273aaa24369cb8d0f246bb16f7122b91a1ef5188 (diff)
Merge branch 'driver-core-next' of git://git.kernel.org/pub/scm/linux/kernel/git/gregkh/driver-core.git
-rw-r--r--Documentation/ABI/stable/sysfs-module2
-rw-r--r--Documentation/ABI/testing/sysfs-class-pwm2
-rw-r--r--Documentation/ABI/testing/sysfs-class-rtrs-client2
-rw-r--r--Documentation/ABI/testing/sysfs-class-rtrs-server2
-rw-r--r--Documentation/ABI/testing/sysfs-devices-platform-ACPI-TAD2
-rw-r--r--Documentation/ABI/testing/sysfs-devices-power2
-rw-r--r--Documentation/ABI/testing/sysfs-devices-soc14
-rw-r--r--Documentation/ABI/testing/sysfs-devices-system-cpu7
-rw-r--r--Documentation/driver-api/firmware/core.rst1
-rw-r--r--Documentation/driver-api/firmware/firmware-usage-guidelines.rst44
-rw-r--r--Documentation/process/embargoed-hardware-issues.rst5
-rw-r--r--Documentation/translations/zh_CN/process/embargoed-hardware-issues.rst2
-rw-r--r--Documentation/translations/zh_TW/process/embargoed-hardware-issues.rst2
-rw-r--r--MAINTAINERS4
-rw-r--r--arch/arm64/kernel/topology.c14
-rw-r--r--drivers/acpi/pptt.c142
-rw-r--r--drivers/base/arch_topology.c100
-rw-r--r--drivers/base/base.h1
-rw-r--r--drivers/base/cacheinfo.c145
-rw-r--r--drivers/base/core.c123
-rw-r--r--drivers/base/dd.c59
-rw-r--r--drivers/base/devtmpfs.c1
-rw-r--r--drivers/base/firmware_loader/main.c4
-rw-r--r--drivers/base/firmware_loader/sysfs.c10
-rw-r--r--drivers/base/node.c4
-rw-r--r--drivers/base/power/domain.c2
-rw-r--r--drivers/base/topology.c32
-rw-r--r--drivers/iommu/of_iommu.c2
-rw-r--r--drivers/net/mdio/fwnode_mdio.c4
-rw-r--r--drivers/of/base.c2
-rw-r--r--drivers/pinctrl/devicetree.c2
-rw-r--r--drivers/spi/spi.c9
-rw-r--r--fs/kernfs/dir.c7
-rw-r--r--fs/kernfs/file.c205
-rw-r--r--fs/kernfs/kernfs-internal.h4
-rw-r--r--fs/kernfs/mount.c19
-rw-r--r--include/linux/acpi.h5
-rw-r--r--include/linux/arch_topology.h1
-rw-r--r--include/linux/cacheinfo.h3
-rw-r--r--include/linux/cpumask.h18
-rw-r--r--include/linux/device.h2
-rw-r--r--include/linux/device/driver.h2
-rw-r--r--include/linux/firmware/trusted_foundations.h8
-rw-r--r--include/linux/fwnode.h4
-rw-r--r--include/linux/kernfs.h59
-rw-r--r--lib/Kconfig.debug2
-rw-r--r--net/ipv4/ipconfig.c6
47 files changed, 718 insertions, 374 deletions
diff --git a/Documentation/ABI/stable/sysfs-module b/Documentation/ABI/stable/sysfs-module
index 560b4a3278df..41b1f16e8795 100644
--- a/Documentation/ABI/stable/sysfs-module
+++ b/Documentation/ABI/stable/sysfs-module
@@ -38,7 +38,7 @@ What: /sys/module/<MODULENAME>/srcversion
Date: Jun 2005
Description:
If the module source has MODULE_VERSION, this file will contain
- the checksum of the the source code.
+ the checksum of the source code.
What: /sys/module/<MODULENAME>/version
Date: Jun 2005
diff --git a/Documentation/ABI/testing/sysfs-class-pwm b/Documentation/ABI/testing/sysfs-class-pwm
index 3d65285bcd5f..0638c94d01ef 100644
--- a/Documentation/ABI/testing/sysfs-class-pwm
+++ b/Documentation/ABI/testing/sysfs-class-pwm
@@ -81,7 +81,7 @@ Description:
What: /sys/class/pwm/pwmchip<N>/pwmX/capture
Date: June 2016
KernelVersion: 4.8
-Contact: Lee Jones <lee.jones@linaro.org>
+Contact: Lee Jones <lee@kernel.org>
Description:
Capture information about a PWM signal. The output format is a
pair unsigned integers (period and duty cycle), separated by a
diff --git a/Documentation/ABI/testing/sysfs-class-rtrs-client b/Documentation/ABI/testing/sysfs-class-rtrs-client
index 49a4157c7bf1..fecc59d1b96f 100644
--- a/Documentation/ABI/testing/sysfs-class-rtrs-client
+++ b/Documentation/ABI/testing/sysfs-class-rtrs-client
@@ -78,7 +78,7 @@ What: /sys/class/rtrs-client/<session-name>/paths/<src@dst>/hca_name
Date: Feb 2020
KernelVersion: 5.7
Contact: Jack Wang <jinpu.wang@cloud.ionos.com> Danil Kipnis <danil.kipnis@cloud.ionos.com>
-Description: RO, Contains the the name of HCA the connection established on.
+Description: RO, Contains the name of HCA the connection established on.
What: /sys/class/rtrs-client/<session-name>/paths/<src@dst>/hca_port
Date: Feb 2020
diff --git a/Documentation/ABI/testing/sysfs-class-rtrs-server b/Documentation/ABI/testing/sysfs-class-rtrs-server
index 3b6d5b067df0..b08601d80409 100644
--- a/Documentation/ABI/testing/sysfs-class-rtrs-server
+++ b/Documentation/ABI/testing/sysfs-class-rtrs-server
@@ -24,7 +24,7 @@ What: /sys/class/rtrs-server/<session-name>/paths/<src@dst>/hca_name
Date: Feb 2020
KernelVersion: 5.7
Contact: Jack Wang <jinpu.wang@cloud.ionos.com> Danil Kipnis <danil.kipnis@cloud.ionos.com>
-Description: RO, Contains the the name of HCA the connection established on.
+Description: RO, Contains the name of HCA the connection established on.
What: /sys/class/rtrs-server/<session-name>/paths/<src@dst>/hca_port
Date: Feb 2020
diff --git a/Documentation/ABI/testing/sysfs-devices-platform-ACPI-TAD b/Documentation/ABI/testing/sysfs-devices-platform-ACPI-TAD
index f7b360a61b21..bc44bc903bc8 100644
--- a/Documentation/ABI/testing/sysfs-devices-platform-ACPI-TAD
+++ b/Documentation/ABI/testing/sysfs-devices-platform-ACPI-TAD
@@ -74,7 +74,7 @@ Description:
Reads also cause the AC alarm timer status to be reset.
- Another way to reset the the status of the AC alarm timer is to
+ Another way to reset the status of the AC alarm timer is to
write (the number) 0 to this file.
If the status return value indicates that the timer has expired,
diff --git a/Documentation/ABI/testing/sysfs-devices-power b/Documentation/ABI/testing/sysfs-devices-power
index 1b2a2d41ff80..54195530e97a 100644
--- a/Documentation/ABI/testing/sysfs-devices-power
+++ b/Documentation/ABI/testing/sysfs-devices-power
@@ -303,5 +303,5 @@ Date: Apr 2010
Contact: Dominik Brodowski <linux@dominikbrodowski.net>
Description:
Reports the runtime PM children usage count of a device, or
- 0 if the the children will be ignored.
+ 0 if the children will be ignored.
diff --git a/Documentation/ABI/testing/sysfs-devices-soc b/Documentation/ABI/testing/sysfs-devices-soc
index ea999e292f11..5269808ec35f 100644
--- a/Documentation/ABI/testing/sysfs-devices-soc
+++ b/Documentation/ABI/testing/sysfs-devices-soc
@@ -1,6 +1,6 @@
What: /sys/devices/socX
Date: January 2012
-contact: Lee Jones <lee.jones@linaro.org>
+contact: Lee Jones <lee@kernel.org>
Description:
The /sys/devices/ directory contains a sub-directory for each
System-on-Chip (SoC) device on a running platform. Information
@@ -14,14 +14,14 @@ Description:
What: /sys/devices/socX/machine
Date: January 2012
-contact: Lee Jones <lee.jones@linaro.org>
+contact: Lee Jones <lee@kernel.org>
Description:
Read-only attribute common to all SoCs. Contains the SoC machine
name (e.g. Ux500).
What: /sys/devices/socX/family
Date: January 2012
-contact: Lee Jones <lee.jones@linaro.org>
+contact: Lee Jones <lee@kernel.org>
Description:
Read-only attribute common to all SoCs. Contains SoC family name
(e.g. DB8500).
@@ -59,7 +59,7 @@ Description:
What: /sys/devices/socX/soc_id
Date: January 2012
-contact: Lee Jones <lee.jones@linaro.org>
+contact: Lee Jones <lee@kernel.org>
Description:
Read-only attribute supported by most SoCs. In the case of
ST-Ericsson's chips this contains the SoC serial number.
@@ -72,21 +72,21 @@ Description:
What: /sys/devices/socX/revision
Date: January 2012
-contact: Lee Jones <lee.jones@linaro.org>
+contact: Lee Jones <lee@kernel.org>
Description:
Read-only attribute supported by most SoCs. Contains the SoC's
manufacturing revision number.
What: /sys/devices/socX/process
Date: January 2012
-contact: Lee Jones <lee.jones@linaro.org>
+contact: Lee Jones <lee@kernel.org>
Description:
Read-only attribute supported ST-Ericsson's silicon. Contains the
the process by which the silicon chip was manufactured.
What: /sys/bus/soc
Date: January 2012
-contact: Lee Jones <lee.jones@linaro.org>
+contact: Lee Jones <lee@kernel.org>
Description:
The /sys/bus/soc/ directory contains the usual sub-folders
expected under most buses. /sys/bus/soc/devices is of particular
diff --git a/Documentation/ABI/testing/sysfs-devices-system-cpu b/Documentation/ABI/testing/sysfs-devices-system-cpu
index df79e129d097..5bf61881f012 100644
--- a/Documentation/ABI/testing/sysfs-devices-system-cpu
+++ b/Documentation/ABI/testing/sysfs-devices-system-cpu
@@ -67,8 +67,7 @@ Description: Discover NUMA node a CPU belongs to
/sys/devices/system/cpu/cpu42/node2 -> ../../node/node2
-What: /sys/devices/system/cpu/cpuX/topology/core_id
- /sys/devices/system/cpu/cpuX/topology/core_siblings
+What: /sys/devices/system/cpu/cpuX/topology/core_siblings
/sys/devices/system/cpu/cpuX/topology/core_siblings_list
/sys/devices/system/cpu/cpuX/topology/physical_package_id
/sys/devices/system/cpu/cpuX/topology/thread_siblings
@@ -84,10 +83,6 @@ Description: CPU topology files that describe a logical CPU's relationship
Briefly, the files above are:
- core_id: the CPU core ID of cpuX. Typically it is the
- hardware platform's identifier (rather than the kernel's).
- The actual value is architecture and platform dependent.
-
core_siblings: internal kernel map of cpuX's hardware threads
within the same physical_package_id.
diff --git a/Documentation/driver-api/firmware/core.rst b/Documentation/driver-api/firmware/core.rst
index 1d1688cbc078..803cd574bbd7 100644
--- a/Documentation/driver-api/firmware/core.rst
+++ b/Documentation/driver-api/firmware/core.rst
@@ -13,4 +13,5 @@ documents these features.
direct-fs-lookup
fallback-mechanisms
lookup-order
+ firmware-usage-guidelines
diff --git a/Documentation/driver-api/firmware/firmware-usage-guidelines.rst b/Documentation/driver-api/firmware/firmware-usage-guidelines.rst
new file mode 100644
index 000000000000..fdcfce42c6d2
--- /dev/null
+++ b/Documentation/driver-api/firmware/firmware-usage-guidelines.rst
@@ -0,0 +1,44 @@
+===================
+Firmware Guidelines
+===================
+
+Users switching to a newer kernel should *not* have to install newer
+firmware files to keep their hardware working. At the same time updated
+firmware files must not cause any regressions for users of older kernel
+releases.
+
+Drivers that use firmware from linux-firmware should follow the rules in
+this guide. (Where there is limited control of the firmware,
+i.e. company doesn't support Linux, firmwares sourced from misc places,
+then of course these rules will not apply strictly.)
+
+* Firmware files shall be designed in a way that it allows checking for
+ firmware ABI version changes. It is recommended that firmware files be
+ versioned with at least a major/minor version. It is suggested that
+ the firmware files in linux-firmware be named with some device
+ specific name, and just the major version. The firmware version should
+ be stored in the firmware header, or as an exception, as part of the
+ firmware file name, in order to let the driver detact any non-ABI
+ fixes/changes. The firmware files in linux-firmware should be
+ overwritten with the newest compatible major version. Newer major
+ version firmware shall remain compatible with all kernels that load
+ that major number.
+
+* If the kernel support for the hardware is normally inactive, or the
+ hardware isn't available for public consumption, this can
+ be ignored, until the first kernel release that enables that hardware.
+ This means no major version bumps without the kernel retaining
+ backwards compatibility for the older major versions. Minor version
+ bumps should not introduce new features that newer kernels depend on
+ non-optionally.
+
+* If a security fix needs lockstep firmware and kernel fixes in order to
+ be successful, then all supported major versions in the linux-firmware
+ repo that are required by currently supported stable/LTS kernels,
+ should be updated with the security fix. The kernel patches should
+ detect if the firmware is new enough to declare if the security issue
+ is fixed. All communications around security fixes should point at
+ both the firmware and kernel fixes. If a security fix requires
+ deprecating old major versions, then this should only be done as a
+ last option, and be stated clearly in all communications.
+
diff --git a/Documentation/process/embargoed-hardware-issues.rst b/Documentation/process/embargoed-hardware-issues.rst
index 95999302d279..b6b4481e2474 100644
--- a/Documentation/process/embargoed-hardware-issues.rst
+++ b/Documentation/process/embargoed-hardware-issues.rst
@@ -244,7 +244,7 @@ disclosure of a particular issue, unless requested by a response team or by
an involved disclosed party. The current ambassadors list:
============= ========================================================
- AMD Tom Lendacky <tom.lendacky@amd.com>
+ AMD Tom Lendacky <thomas.lendacky@amd.com>
Ampere Darren Hart <darren@os.amperecomputing.com>
ARM Catalin Marinas <catalin.marinas@arm.com>
IBM Power Anton Blanchard <anton@linux.ibm.com>
@@ -264,6 +264,9 @@ an involved disclosed party. The current ambassadors list:
Amazon
Google Kees Cook <keescook@chromium.org>
+
+ GCC
+ LLVM Nick Desaulniers <ndesaulniers@google.com>
============= ========================================================
If you want your organization to be added to the ambassadors list, please
diff --git a/Documentation/translations/zh_CN/process/embargoed-hardware-issues.rst b/Documentation/translations/zh_CN/process/embargoed-hardware-issues.rst
index 88273ebe7823..cf5f1fca3d92 100644
--- a/Documentation/translations/zh_CN/process/embargoed-hardware-issues.rst
+++ b/Documentation/translations/zh_CN/process/embargoed-hardware-issues.rst
@@ -174,7 +174,7 @@ CVE分配
============= ========================================================
ARM
- AMD Tom Lendacky <tom.lendacky@amd.com>
+ AMD Tom Lendacky <thomas.lendacky@amd.com>
IBM
Intel Tony Luck <tony.luck@intel.com>
Qualcomm Trilok Soni <tsoni@codeaurora.org>
diff --git a/Documentation/translations/zh_TW/process/embargoed-hardware-issues.rst b/Documentation/translations/zh_TW/process/embargoed-hardware-issues.rst
index 6c76fc96131a..fbde3e26eda5 100644
--- a/Documentation/translations/zh_TW/process/embargoed-hardware-issues.rst
+++ b/Documentation/translations/zh_TW/process/embargoed-hardware-issues.rst
@@ -177,7 +177,7 @@ CVE分配
============= ========================================================
ARM
- AMD Tom Lendacky <tom.lendacky@amd.com>
+ AMD Tom Lendacky <thomas.lendacky@amd.com>
IBM
Intel Tony Luck <tony.luck@intel.com>
Qualcomm Trilok Soni <tsoni@codeaurora.org>
diff --git a/MAINTAINERS b/MAINTAINERS
index 8c8c98f1e45c..a66ce9e64f91 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -7519,7 +7519,7 @@ F: Documentation/admin-guide/media/em28xx*
F: drivers/media/usb/em28xx/
EMBEDDED LINUX
-M: Matt Mackall <mpm@selenic.com>
+M: Olivia Mackall <olivia@selenic.com>
M: David Woodhouse <dwmw2@infradead.org>
L: linux-embedded@vger.kernel.org
S: Maintained
@@ -8911,7 +8911,7 @@ F: include/trace/events/hwmon*.h
K: (devm_)?hwmon_device_(un)?register(|_with_groups|_with_info)
HARDWARE RANDOM NUMBER GENERATOR CORE
-M: Matt Mackall <mpm@selenic.com>
+M: Olivia Mackall <olivia@selenic.com>
M: Herbert Xu <herbert@gondor.apana.org.au>
L: linux-crypto@vger.kernel.org
S: Odd fixes
diff --git a/arch/arm64/kernel/topology.c b/arch/arm64/kernel/topology.c
index 9ab78ad826e2..869ffc4d4484 100644
--- a/arch/arm64/kernel/topology.c
+++ b/arch/arm64/kernel/topology.c
@@ -89,8 +89,6 @@ int __init parse_acpi_topology(void)
return 0;
for_each_possible_cpu(cpu) {
- int i, cache_id;
-
topology_id = find_acpi_cpu_topology(cpu, 0);
if (topology_id < 0)
return topology_id;
@@ -107,18 +105,6 @@ int __init parse_acpi_topology(void)
cpu_topology[cpu].cluster_id = topology_id;
topology_id = find_acpi_cpu_topology_package(cpu);
cpu_topology[cpu].package_id = topology_id;
-
- i = acpi_find_last_cache_level(cpu);
-
- if (i > 0) {
- /*
- * this is the only part of cpu_topology that has
- * a direct relationship with the cache topology
- */
- cache_id = find_acpi_cpu_cache_topology(cpu, i);
- if (cache_id > 0)
- cpu_topology[cpu].llc_id = cache_id;
- }
}
return 0;
diff --git a/drivers/acpi/pptt.c b/drivers/acpi/pptt.c
index 701f61c01359..c91342dcbcd6 100644
--- a/drivers/acpi/pptt.c
+++ b/drivers/acpi/pptt.c
@@ -437,7 +437,8 @@ static void cache_setup_acpi_cpu(struct acpi_table_header *table,
pr_debug("found = %p %p\n", found_cache, cpu_node);
if (found_cache)
update_cache_properties(this_leaf, found_cache,
- cpu_node, table->revision);
+ ACPI_TO_POINTER(ACPI_PTR_DIFF(cpu_node, table)),
+ table->revision);
index++;
}
@@ -532,21 +533,37 @@ static int topology_get_acpi_cpu_tag(struct acpi_table_header *table,
return -ENOENT;
}
+
+static struct acpi_table_header *acpi_get_pptt(void)
+{
+ static struct acpi_table_header *pptt;
+ acpi_status status;
+
+ /*
+ * PPTT will be used at runtime on every CPU hotplug in path, so we
+ * don't need to call acpi_put_table() to release the table mapping.
+ */
+ if (!pptt) {
+ status = acpi_get_table(ACPI_SIG_PPTT, 0, &pptt);
+ if (ACPI_FAILURE(status))
+ acpi_pptt_warn_missing();
+ }
+
+ return pptt;
+}
+
static int find_acpi_cpu_topology_tag(unsigned int cpu, int level, int flag)
{
struct acpi_table_header *table;
- acpi_status status;
int retval;
- status = acpi_get_table(ACPI_SIG_PPTT, 0, &table);
- if (ACPI_FAILURE(status)) {
- acpi_pptt_warn_missing();
+ table = acpi_get_pptt();
+ if (!table)
return -ENOENT;
- }
+
retval = topology_get_acpi_cpu_tag(table, cpu, level, flag);
pr_debug("Topology Setup ACPI CPU %d, level %d ret = %d\n",
cpu, level, retval);
- acpi_put_table(table);
return retval;
}
@@ -567,16 +584,13 @@ static int find_acpi_cpu_topology_tag(unsigned int cpu, int level, int flag)
static int check_acpi_cpu_flag(unsigned int cpu, int rev, u32 flag)
{
struct acpi_table_header *table;
- acpi_status status;
u32 acpi_cpu_id = get_acpi_id_for_cpu(cpu);
struct acpi_pptt_processor *cpu_node = NULL;
int ret = -ENOENT;
- status = acpi_get_table(ACPI_SIG_PPTT, 0, &table);
- if (ACPI_FAILURE(status)) {
- acpi_pptt_warn_missing();
- return ret;
- }
+ table = acpi_get_pptt();
+ if (!table)
+ return -ENOENT;
if (table->revision >= rev)
cpu_node = acpi_find_processor_node(table, acpi_cpu_id);
@@ -584,8 +598,6 @@ static int check_acpi_cpu_flag(unsigned int cpu, int rev, u32 flag)
if (cpu_node)
ret = (cpu_node->flags & flag) != 0;
- acpi_put_table(table);
-
return ret;
}
@@ -604,18 +616,15 @@ int acpi_find_last_cache_level(unsigned int cpu)
u32 acpi_cpu_id;
struct acpi_table_header *table;
int number_of_levels = 0;
- acpi_status status;
+
+ table = acpi_get_pptt();
+ if (!table)
+ return -ENOENT;
pr_debug("Cache Setup find last level CPU=%d\n", cpu);
acpi_cpu_id = get_acpi_id_for_cpu(cpu);
- status = acpi_get_table(ACPI_SIG_PPTT, 0, &table);
- if (ACPI_FAILURE(status)) {
- acpi_pptt_warn_missing();
- } else {
- number_of_levels = acpi_find_cache_levels(table, acpi_cpu_id);
- acpi_put_table(table);
- }
+ number_of_levels = acpi_find_cache_levels(table, acpi_cpu_id);
pr_debug("Cache Setup find last level level=%d\n", number_of_levels);
return number_of_levels;
@@ -637,20 +646,16 @@ int acpi_find_last_cache_level(unsigned int cpu)
int cache_setup_acpi(unsigned int cpu)
{
struct acpi_table_header *table;
- acpi_status status;
- pr_debug("Cache Setup ACPI CPU %d\n", cpu);
-
- status = acpi_get_table(ACPI_SIG_PPTT, 0, &table);
- if (ACPI_FAILURE(status)) {
- acpi_pptt_warn_missing();
+ table = acpi_get_pptt();
+ if (!table)
return -ENOENT;
- }
+
+ pr_debug("Cache Setup ACPI CPU %d\n", cpu);
cache_setup_acpi_cpu(table, cpu);
- acpi_put_table(table);
- return status;
+ return 0;
}
/**
@@ -691,43 +696,6 @@ int find_acpi_cpu_topology(unsigned int cpu, int level)
}
/**
- * find_acpi_cpu_cache_topology() - Determine a unique cache topology value
- * @cpu: Kernel logical CPU number
- * @level: The cache level for which we would like a unique ID
- *
- * Determine a unique ID for each unified cache in the system
- *
- * Return: -ENOENT if the PPTT doesn't exist, or the CPU cannot be found.
- * Otherwise returns a value which represents a unique topological feature.
- */
-int find_acpi_cpu_cache_topology(unsigned int cpu, int level)
-{
- struct acpi_table_header *table;
- struct acpi_pptt_cache *found_cache;
- acpi_status status;
- u32 acpi_cpu_id = get_acpi_id_for_cpu(cpu);
- struct acpi_pptt_processor *cpu_node = NULL;
- int ret = -1;
-
- status = acpi_get_table(ACPI_SIG_PPTT, 0, &table);
- if (ACPI_FAILURE(status)) {
- acpi_pptt_warn_missing();
- return -ENOENT;
- }
-
- found_cache = acpi_find_cache_node(table, acpi_cpu_id,
- CACHE_TYPE_UNIFIED,
- level,
- &cpu_node);
- if (found_cache)
- ret = ACPI_PTR_DIFF(cpu_node, table);
-
- acpi_put_table(table);
-
- return ret;
-}
-
-/**
* find_acpi_cpu_topology_package() - Determine a unique CPU package value
* @cpu: Kernel logical CPU number
*
@@ -766,50 +734,38 @@ int find_acpi_cpu_topology_package(unsigned int cpu)
int find_acpi_cpu_topology_cluster(unsigned int cpu)
{
struct acpi_table_header *table;
- acpi_status status;
struct acpi_pptt_processor *cpu_node, *cluster_node;
u32 acpi_cpu_id;
int retval;
int is_thread;
- status = acpi_get_table(ACPI_SIG_PPTT, 0, &table);
- if (ACPI_FAILURE(status)) {
- acpi_pptt_warn_missing();
+ table = acpi_get_pptt();
+ if (!table)
return -ENOENT;
- }
acpi_cpu_id = get_acpi_id_for_cpu(cpu);
cpu_node = acpi_find_processor_node(table, acpi_cpu_id);
- if (cpu_node == NULL || !cpu_node->parent) {
- retval = -ENOENT;
- goto put_table;
- }
+ if (!cpu_node || !cpu_node->parent)
+ return -ENOENT;
is_thread = cpu_node->flags & ACPI_PPTT_ACPI_PROCESSOR_IS_THREAD;
cluster_node = fetch_pptt_node(table, cpu_node->parent);
- if (cluster_node == NULL) {
- retval = -ENOENT;
- goto put_table;
- }
+ if (!cluster_node)
+ return -ENOENT;
+
if (is_thread) {
- if (!cluster_node->parent) {
- retval = -ENOENT;
- goto put_table;
- }
+ if (!cluster_node->parent)
+ return -ENOENT;
+
cluster_node = fetch_pptt_node(table, cluster_node->parent);
- if (cluster_node == NULL) {
- retval = -ENOENT;
- goto put_table;
- }
+ if (!cluster_node)
+ return -ENOENT;
}
if (cluster_node->flags & ACPI_PPTT_ACPI_PROCESSOR_ID_VALID)
retval = cluster_node->acpi_processor_id;
else
retval = ACPI_PTR_DIFF(cluster_node, table);
-put_table:
- acpi_put_table(table);
-
return retval;
}
diff --git a/drivers/base/arch_topology.c b/drivers/base/arch_topology.c
index 579c851a2bd7..0424b59b695e 100644
--- a/drivers/base/arch_topology.c
+++ b/drivers/base/arch_topology.c
@@ -7,6 +7,7 @@
*/
#include <linux/acpi.h>
+#include <linux/cacheinfo.h>
#include <linux/cpu.h>
#include <linux/cpufreq.h>
#include <linux/device.h>
@@ -496,7 +497,7 @@ static int __init get_cpu_for_node(struct device_node *node)
}
static int __init parse_core(struct device_node *core, int package_id,
- int core_id)
+ int cluster_id, int core_id)
{
char name[20];
bool leaf = true;
@@ -512,6 +513,7 @@ static int __init parse_core(struct device_node *core, int package_id,
cpu = get_cpu_for_node(t);
if (cpu >= 0) {
cpu_topology[cpu].package_id = package_id;
+ cpu_topology[cpu].cluster_id = cluster_id;
cpu_topology[cpu].core_id = core_id;
cpu_topology[cpu].thread_id = i;
} else if (cpu != -ENODEV) {
@@ -533,6 +535,7 @@ static int __init parse_core(struct device_node *core, int package_id,
}
cpu_topology[cpu].package_id = package_id;
+ cpu_topology[cpu].cluster_id = cluster_id;
cpu_topology[cpu].core_id = core_id;
} else if (leaf && cpu != -ENODEV) {
pr_err("%pOF: Can't get CPU for leaf core\n", core);
@@ -542,13 +545,13 @@ static int __init parse_core(struct device_node *core, int package_id,
return 0;
}
-static int __init parse_cluster(struct device_node *cluster, int depth)
+static int __init parse_cluster(struct device_node *cluster, int package_id,
+ int cluster_id, int depth)
{
char name[20];
bool leaf = true;
bool has_cores = false;
struct device_node *c;
- static int package_id __initdata;
int core_id = 0;
int i, ret;
@@ -563,7 +566,9 @@ static int __init parse_cluster(struct device_node *cluster, int depth)
c = of_get_child_by_name(cluster, name);
if (c) {
leaf = false;
- ret = parse_cluster(c, depth + 1);
+ ret = parse_cluster(c, package_id, i, depth + 1);
+ if (depth > 0)
+ pr_warn("Topology for clusters of clusters not yet supported\n");
of_node_put(c);
if (ret != 0)
return ret;
@@ -587,7 +592,8 @@ static int __init parse_cluster(struct device_node *cluster, int depth)
}
if (leaf) {
- ret = parse_core(c, package_id, core_id++);
+ ret = parse_core(c, package_id, cluster_id,
+ core_id++);
} else {
pr_err("%pOF: Non-leaf cluster with core %s\n",
cluster, name);
@@ -604,10 +610,33 @@ static int __init parse_cluster(struct device_node *cluster, int depth)
if (leaf && !has_cores)
pr_warn("%pOF: empty cluster\n", cluster);
- if (leaf)
+ return 0;
+}
+
+static int __init parse_socket(struct device_node *socket)
+{
+ char name[20];
+ struct device_node *c;
+ bool has_socket = false;
+ int package_id = 0, ret;
+
+ do {
+ snprintf(name, sizeof(name), "socket%d", package_id);
+ c = of_get_child_by_name(socket, name);
+ if (c) {
+ has_socket = true;
+ ret = parse_cluster(c, package_id, -1, 0);
+ of_node_put(c);
+ if (ret != 0)
+ return ret;
+ }
package_id++;
+ } while (c);
- return 0;
+ if (!has_socket)
+ ret = parse_cluster(socket, 0, -1, 0);
+
+ return ret;
}
static int __init parse_dt_topology(void)
@@ -630,7 +659,7 @@ static int __init parse_dt_topology(void)
if (!map)
goto out;
- ret = parse_cluster(map, 0);
+ ret = parse_socket(map);
if (ret != 0)
goto out_map;
@@ -641,8 +670,10 @@ static int __init parse_dt_topology(void)
* only mark cores described in the DT as possible.
*/
for_each_possible_cpu(cpu)
- if (cpu_topology[cpu].package_id == -1)
+ if (cpu_topology[cpu].package_id < 0) {
ret = -EINVAL;
+ break;
+ }
out_map:
of_node_put(map);
@@ -667,7 +698,8 @@ const struct cpumask *cpu_coregroup_mask(int cpu)
/* not numa in package, lets use the package siblings */
core_mask = &cpu_topology[cpu].core_sibling;
}
- if (cpu_topology[cpu].llc_id != -1) {
+
+ if (last_level_cache_is_valid(cpu)) {
if (cpumask_subset(&cpu_topology[cpu].llc_sibling, core_mask))
core_mask = &cpu_topology[cpu].llc_sibling;
}
@@ -686,19 +718,31 @@ const struct cpumask *cpu_coregroup_mask(int cpu)
const struct cpumask *cpu_clustergroup_mask(int cpu)
{
+ /*
+ * Forbid cpu_clustergroup_mask() to span more or the same CPUs as
+ * cpu_coregroup_mask().
+ */
+ if (cpumask_subset(cpu_coregroup_mask(cpu),
+ &cpu_topology[cpu].cluster_sibling))
+ return get_cpu_mask(cpu);
+
return &cpu_topology[cpu].cluster_sibling;
}
void update_siblings_masks(unsigned int cpuid)
{
struct cpu_topology *cpu_topo, *cpuid_topo = &cpu_topology[cpuid];
- int cpu;
+ int cpu, ret;
+
+ ret = detect_cache_attributes(cpuid);
+ if (ret)
+ pr_info("Early cacheinfo failed, ret = %d\n", ret);
/* update core and thread sibling masks */
for_each_online_cpu(cpu) {
cpu_topo = &cpu_topology[cpu];
- if (cpu_topo->llc_id != -1 && cpuid_topo->llc_id == cpu_topo->llc_id) {
+ if (last_level_cache_is_shared(cpu, cpuid)) {
cpumask_set_cpu(cpu, &cpuid_topo->llc_sibling);
cpumask_set_cpu(cpuid, &cpu_topo->llc_sibling);
}
@@ -706,15 +750,17 @@ void update_siblings_masks(unsigned int cpuid)
if (cpuid_topo->package_id != cpu_topo->package_id)
continue;
- if (cpuid_topo->cluster_id == cpu_topo->cluster_id &&
- cpuid_topo->cluster_id != -1) {
+ cpumask_set_cpu(cpuid, &cpu_topo->core_sibling);
+ cpumask_set_cpu(cpu, &cpuid_topo->core_sibling);
+
+ if (cpuid_topo->cluster_id != cpu_topo->cluster_id)
+ continue;
+
+ if (cpuid_topo->cluster_id >= 0) {
cpumask_set_cpu(cpu, &cpuid_topo->cluster_sibling);
cpumask_set_cpu(cpuid, &cpu_topo->cluster_sibling);
}
- cpumask_set_cpu(cpuid, &cpu_topo->core_sibling);
- cpumask_set_cpu(cpu, &cpuid_topo->core_sibling);
-
if (cpuid_topo->core_id != cpu_topo->core_id)
continue;
@@ -750,7 +796,6 @@ void __init reset_cpu_topology(void)
cpu_topo->core_id = -1;
cpu_topo->cluster_id = -1;
cpu_topo->package_id = -1;
- cpu_topo->llc_id = -1;
clear_cpu_topology(cpu);
}
@@ -780,15 +825,20 @@ __weak int __init parse_acpi_topology(void)
#if defined(CONFIG_ARM64) || defined(CONFIG_RISCV)
void __init init_cpu_topology(void)
{
+ int ret;
+
reset_cpu_topology();
+ ret = parse_acpi_topology();
+ if (!ret)
+ ret = of_have_populated_dt() && parse_dt_topology();
- /*
- * Discard anything that was parsed if we hit an error so we
- * don't use partial information.
- */
- if (parse_acpi_topology())
- reset_cpu_topology();
- else if (of_have_populated_dt() && parse_dt_topology())
+ if (ret) {
+ /*
+ * Discard anything that was parsed if we hit an error so we
+ * don't use partial information.
+ */
reset_cpu_topology();
+ return;
+ }
}
#endif
diff --git a/drivers/base/base.h b/drivers/base/base.h
index ab71403d102f..b3a43a164dcd 100644
--- a/drivers/base/base.h
+++ b/drivers/base/base.h
@@ -160,6 +160,7 @@ extern int devres_release_all(struct device *dev);
extern void device_block_probing(void);
extern void device_unblock_probing(void);
extern void deferred_probe_extend_timeout(void);
+extern void driver_deferred_probe_trigger(void);
/* /sys/devices directory */
extern struct kset *devices_kset;
diff --git a/drivers/base/cacheinfo.c b/drivers/base/cacheinfo.c
index dad296229161..4b5cd08c5a65 100644
--- a/drivers/base/cacheinfo.c
+++ b/drivers/base/cacheinfo.c
@@ -14,7 +14,7 @@
#include <linux/cpu.h>
#include <linux/device.h>
#include <linux/init.h>
-#include <linux/of.h>
+#include <linux/of_device.h>
#include <linux/sched.h>
#include <linux/slab.h>
#include <linux/smp.h>
@@ -25,19 +25,60 @@ static DEFINE_PER_CPU(struct cpu_cacheinfo, ci_cpu_cacheinfo);
#define ci_cacheinfo(cpu) (&per_cpu(ci_cpu_cacheinfo, cpu))
#define cache_leaves(cpu) (ci_cacheinfo(cpu)->num_leaves)
#define per_cpu_cacheinfo(cpu) (ci_cacheinfo(cpu)->info_list)
+#define per_cpu_cacheinfo_idx(cpu, idx) \
+ (per_cpu_cacheinfo(cpu) + (idx))
struct cpu_cacheinfo *get_cpu_cacheinfo(unsigned int cpu)
{
return ci_cacheinfo(cpu);
}
-#ifdef CONFIG_OF
static inline bool cache_leaves_are_shared(struct cacheinfo *this_leaf,
struct cacheinfo *sib_leaf)
{
+ /*
+ * For non DT/ACPI systems, assume unique level 1 caches,
+ * system-wide shared caches for all other levels. This will be used
+ * only if arch specific code has not populated shared_cpu_map
+ */
+ if (!(IS_ENABLED(CONFIG_OF) || IS_ENABLED(CONFIG_ACPI)))
+ return !(this_leaf->level == 1);
+
+ if ((sib_leaf->attributes & CACHE_ID) &&
+ (this_leaf->attributes & CACHE_ID))
+ return sib_leaf->id == this_leaf->id;
+
return sib_leaf->fw_token == this_leaf->fw_token;
}
+bool last_level_cache_is_valid(unsigned int cpu)
+{
+ struct cacheinfo *llc;
+
+ if (!cache_leaves(cpu))
+ return false;
+
+ llc = per_cpu_cacheinfo_idx(cpu, cache_leaves(cpu) - 1);
+
+ return (llc->attributes & CACHE_ID) || !!llc->fw_token;
+
+}
+
+bool last_level_cache_is_shared(unsigned int cpu_x, unsigned int cpu_y)
+{
+ struct cacheinfo *llc_x, *llc_y;
+
+ if (!last_level_cache_is_valid(cpu_x) ||
+ !last_level_cache_is_valid(cpu_y))
+ return false;
+
+ llc_x = per_cpu_cacheinfo_idx(cpu_x, cache_leaves(cpu_x) - 1);
+ llc_y = per_cpu_cacheinfo_idx(cpu_y, cache_leaves(cpu_y) - 1);
+
+ return cache_leaves_are_shared(llc_x, llc_y);
+}
+
+#ifdef CONFIG_OF
/* OF properties to query for a given cache type */
struct cache_type_info {
const char *size_prop;
@@ -157,27 +198,16 @@ static int cache_setup_of_node(unsigned int cpu)
{
struct device_node *np;
struct cacheinfo *this_leaf;
- struct device *cpu_dev = get_cpu_device(cpu);
- struct cpu_cacheinfo *this_cpu_ci = get_cpu_cacheinfo(cpu);
unsigned int index = 0;
- /* skip if fw_token is already populated */
- if (this_cpu_ci->info_list->fw_token) {
- return 0;
- }
-
- if (!cpu_dev) {
- pr_err("No cpu device for CPU %d\n", cpu);
- return -ENODEV;
- }
- np = cpu_dev->of_node;
+ np = of_cpu_device_node_get(cpu);
if (!np) {
pr_err("Failed to find cpu%d device node\n", cpu);
return -ENOENT;
}
while (index < cache_leaves(cpu)) {
- this_leaf = this_cpu_ci->info_list + index;
+ this_leaf = per_cpu_cacheinfo_idx(cpu, index);
if (this_leaf->level != 1)
np = of_find_next_cache_node(np);
else
@@ -196,16 +226,6 @@ static int cache_setup_of_node(unsigned int cpu)
}
#else
static inline int cache_setup_of_node(unsigned int cpu) { return 0; }
-static inline bool cache_leaves_are_shared(struct cacheinfo *this_leaf,
- struct cacheinfo *sib_leaf)
-{
- /*
- * For non-DT/ACPI systems, assume unique level 1 caches, system-wide
- * shared caches for all other levels. This will be used only if
- * arch specific code has not populated shared_cpu_map
- */
- return !(this_leaf->level == 1);
-}
#endif
int __weak cache_setup_acpi(unsigned int cpu)
@@ -215,6 +235,18 @@ int __weak cache_setup_acpi(unsigned int cpu)
unsigned int coherency_max_size;
+static int cache_setup_properties(unsigned int cpu)
+{
+ int ret = 0;
+
+ if (of_have_populated_dt())
+ ret = cache_setup_of_node(cpu);
+ else if (!acpi_disabled)
+ ret = cache_setup_acpi(cpu);
+
+ return ret;
+}
+
static int cache_shared_cpu_map_setup(unsigned int cpu)
{
struct cpu_cacheinfo *this_cpu_ci = get_cpu_cacheinfo(cpu);
@@ -225,21 +257,21 @@ static int cache_shared_cpu_map_setup(unsigned int cpu)
if (this_cpu_ci->cpu_map_populated)
return 0;
- if (of_have_populated_dt())
- ret = cache_setup_of_node(cpu);
- else if (!acpi_disabled)
- ret = cache_setup_acpi(cpu);
-
- if (ret)
- return ret;
+ /*
+ * skip setting up cache properties if LLC is valid, just need
+ * to update the shared cpu_map if the cache attributes were
+ * populated early before all the cpus are brought online
+ */
+ if (!last_level_cache_is_valid(cpu)) {
+ ret = cache_setup_properties(cpu);
+ if (ret)
+ return ret;
+ }
for (index = 0; index < cache_leaves(cpu); index++) {
unsigned int i;
- this_leaf = this_cpu_ci->info_list + index;
- /* skip if shared_cpu_map is already populated */
- if (!cpumask_empty(&this_leaf->shared_cpu_map))
- continue;
+ this_leaf = per_cpu_cacheinfo_idx(cpu, index);
cpumask_set_cpu(cpu, &this_leaf->shared_cpu_map);
for_each_online_cpu(i) {
@@ -247,7 +279,8 @@ static int cache_shared_cpu_map_setup(unsigned int cpu)
if (i == cpu || !sib_cpu_ci->info_list)
continue;/* skip if itself or no cacheinfo */
- sib_leaf = sib_cpu_ci->info_list + index;
+
+ sib_leaf = per_cpu_cacheinfo_idx(i, index);
if (cache_leaves_are_shared(this_leaf, sib_leaf)) {
cpumask_set_cpu(cpu, &sib_leaf->shared_cpu_map);
cpumask_set_cpu(i, &this_leaf->shared_cpu_map);
@@ -263,23 +296,19 @@ static int cache_shared_cpu_map_setup(unsigned int cpu)
static void cache_shared_cpu_map_remove(unsigned int cpu)
{
- struct cpu_cacheinfo *this_cpu_ci = get_cpu_cacheinfo(cpu);
struct cacheinfo *this_leaf, *sib_leaf;
unsigned int sibling, index;
for (index = 0; index < cache_leaves(cpu); index++) {
- this_leaf = this_cpu_ci->info_list + index;
+ this_leaf = per_cpu_cacheinfo_idx(cpu, index);
for_each_cpu(sibling, &this_leaf->shared_cpu_map) {
- struct cpu_cacheinfo *sib_cpu_ci;
-
- if (sibling == cpu) /* skip itself */
- continue;
+ struct cpu_cacheinfo *sib_cpu_ci =
+ get_cpu_cacheinfo(sibling);
- sib_cpu_ci = get_cpu_cacheinfo(sibling);
- if (!sib_cpu_ci->info_list)
- continue;
+ if (sibling == cpu || !sib_cpu_ci->info_list)
+ continue;/* skip if itself or no cacheinfo */
- sib_leaf = sib_cpu_ci->info_list + index;
+ sib_leaf = per_cpu_cacheinfo_idx(sibling, index);
cpumask_clear_cpu(cpu, &sib_leaf->shared_cpu_map);
cpumask_clear_cpu(sibling, &this_leaf->shared_cpu_map);
}
@@ -310,17 +339,28 @@ int __weak populate_cache_leaves(unsigned int cpu)
return -ENOENT;
}
-static int detect_cache_attributes(unsigned int cpu)
+int detect_cache_attributes(unsigned int cpu)
{
int ret;
+ /* Since early detection of the cacheinfo is allowed via this
+ * function and this also gets called as CPU hotplug callbacks via
+ * cacheinfo_cpu_online, the initialisation can be skipped and only
+ * CPU maps can be updated as the CPU online status would be update
+ * if called via cacheinfo_cpu_online path.
+ */
+ if (per_cpu_cacheinfo(cpu))
+ goto update_cpu_map;
+
if (init_cache_level(cpu) || !cache_leaves(cpu))
return -ENOENT;
per_cpu_cacheinfo(cpu) = kcalloc(cache_leaves(cpu),
- sizeof(struct cacheinfo), GFP_KERNEL);
- if (per_cpu_cacheinfo(cpu) == NULL)
+ sizeof(struct cacheinfo), GFP_ATOMIC);
+ if (per_cpu_cacheinfo(cpu) == NULL) {
+ cache_leaves(cpu) = 0;
return -ENOMEM;
+ }
/*
* populate_cache_leaves() may completely setup the cache leaves and
@@ -329,6 +369,8 @@ static int detect_cache_attributes(unsigned int cpu)
ret = populate_cache_leaves(cpu);
if (ret)
goto free_ci;
+
+update_cpu_map:
/*
* For systems using DT for cache hierarchy, fw_token
* and shared_cpu_map will be set up here only if they are
@@ -614,7 +656,6 @@ static int cache_add_dev(unsigned int cpu)
int rc;
struct device *ci_dev, *parent;
struct cacheinfo *this_leaf;
- struct cpu_cacheinfo *this_cpu_ci = get_cpu_cacheinfo(cpu);
const struct attribute_group **cache_groups;
rc = cpu_cache_sysfs_init(cpu);
@@ -623,7 +664,7 @@ static int cache_add_dev(unsigned int cpu)
parent = per_cpu_cache_dev(cpu);
for (i = 0; i < cache_leaves(cpu); i++) {
- this_leaf = this_cpu_ci->info_list + i;
+ this_leaf = per_cpu_cacheinfo_idx(cpu, i);
if (this_leaf->disable_sysfs)
continue;
if (this_leaf->type == CACHE_TYPE_NOCACHE)
diff --git a/drivers/base/core.c b/drivers/base/core.c
index 460d6f163e41..753e7cca0f40 100644
--- a/drivers/base/core.c
+++ b/drivers/base/core.c
@@ -54,6 +54,7 @@ static unsigned int defer_sync_state_count = 1;
static DEFINE_MUTEX(fwnode_link_lock);
static bool fw_devlink_is_permissive(void);
static bool fw_devlink_drv_reg_done;
+static bool fw_devlink_best_effort;
/**
* fwnode_link_add - Create a link between two fwnode_handles.
@@ -976,6 +977,12 @@ static void device_links_missing_supplier(struct device *dev)
}
}
+static bool dev_is_best_effort(struct device *dev)
+{
+ return (fw_devlink_best_effort && dev->can_match) ||
+ (dev->fwnode && (dev->fwnode->flags & FWNODE_FLAG_BEST_EFFORT));
+}
+
/**
* device_links_check_suppliers - Check presence of supplier drivers.
* @dev: Consumer device.
@@ -995,7 +1002,7 @@ static void device_links_missing_supplier(struct device *dev)
int device_links_check_suppliers(struct device *dev)
{
struct device_link *link;
- int ret = 0;
+ int ret = 0, fwnode_ret = 0;
struct fwnode_handle *sup_fw;
/*
@@ -1008,12 +1015,17 @@ int device_links_check_suppliers(struct device *dev)
sup_fw = list_first_entry(&dev->fwnode->suppliers,
struct fwnode_link,
c_hook)->supplier;
- dev_err_probe(dev, -EPROBE_DEFER, "wait for supplier %pfwP\n",
- sup_fw);
- mutex_unlock(&fwnode_link_lock);
- return -EPROBE_DEFER;
+ if (!dev_is_best_effort(dev)) {
+ fwnode_ret = -EPROBE_DEFER;
+ dev_err_probe(dev, -EPROBE_DEFER,
+ "wait for supplier %pfwP\n", sup_fw);
+ } else {
+ fwnode_ret = -EAGAIN;
+ }
}
mutex_unlock(&fwnode_link_lock);
+ if (fwnode_ret == -EPROBE_DEFER)
+ return fwnode_ret;
device_links_write_lock();
@@ -1023,6 +1035,14 @@ int device_links_check_suppliers(struct device *dev)
if (link->status != DL_STATE_AVAILABLE &&
!(link->flags & DL_FLAG_SYNC_STATE_ONLY)) {
+
+ if (dev_is_best_effort(dev) &&
+ link->flags & DL_FLAG_INFERRED &&
+ !link->supplier->can_match) {
+ ret = -EAGAIN;
+ continue;
+ }
+
device_links_missing_supplier(dev);
dev_err_probe(dev, -EPROBE_DEFER,
"supplier %s not ready\n",
@@ -1035,7 +1055,8 @@ int device_links_check_suppliers(struct device *dev)
dev->links.status = DL_DEV_PROBING;
device_links_write_unlock();
- return ret;
+
+ return ret ? ret : fwnode_ret;
}
/**
@@ -1300,6 +1321,18 @@ void device_links_driver_bound(struct device *dev)
* save to drop the managed link completely.
*/
device_link_drop_managed(link);
+ } else if (dev_is_best_effort(dev) &&
+ link->flags & DL_FLAG_INFERRED &&
+ link->status != DL_STATE_CONSUMER_PROBE &&
+ !link->supplier->can_match) {
+ /*
+ * When dev_is_best_effort() is true, we ignore device
+ * links to suppliers that don't have a driver. If the
+ * consumer device still managed to probe, there's no
+ * point in maintaining a device link in a weird state
+ * (consumer probed before supplier). So delete it.
+ */
+ device_link_drop_managed(link);
} else {
WARN_ON(link->status != DL_STATE_CONSUMER_PROBE);
WRITE_ONCE(link->status, DL_STATE_ACTIVE);
@@ -1592,7 +1625,7 @@ static int __init fw_devlink_setup(char *arg)
}
early_param("fw_devlink", fw_devlink_setup);
-static bool fw_devlink_strict;
+static bool fw_devlink_strict = true;
static int __init fw_devlink_strict_setup(char *arg)
{
return strtobool(arg, &fw_devlink_strict);
@@ -1666,6 +1699,62 @@ void fw_devlink_drivers_done(void)
device_links_write_unlock();
}
+/**
+ * wait_for_init_devices_probe - Try to probe any device needed for init
+ *
+ * Some devices might need to be probed and bound successfully before the kernel
+ * boot sequence can finish and move on to init/userspace. For example, a
+ * network interface might need to be bound to be able to mount a NFS rootfs.
+ *
+ * With fw_devlink=on by default, some of these devices might be blocked from
+ * probing because they are waiting on a optional supplier that doesn't have a
+ * driver. While fw_devlink will eventually identify such devices and unblock
+ * the probing automatically, it might be too late by the time it unblocks the
+ * probing of devices. For example, the IP4 autoconfig might timeout before
+ * fw_devlink unblocks probing of the network interface.
+ *
+ * This function is available to temporarily try and probe all devices that have
+ * a driver even if some of their suppliers haven't been added or don't have
+ * drivers.
+ *
+ * The drivers can then decide which of the suppliers are optional vs mandatory
+ * and probe the device if possible. By the time this function returns, all such
+ * "best effort" probes are guaranteed to be completed. If a device successfully
+ * probes in this mode, we delete all fw_devlink discovered dependencies of that
+ * device where the supplier hasn't yet probed successfully because they have to
+ * be optional dependencies.
+ *
+ * Any devices that didn't successfully probe go back to being treated as if
+ * this function was never called.
+ *
+ * This also means that some devices that aren't needed for init and could have
+ * waited for their optional supplier to probe (when the supplier's module is
+ * loaded later on) would end up probing prematurely with limited functionality.
+ * So call this function only when boot would fail without it.
+ */
+void __init wait_for_init_devices_probe(void)
+{
+ if (!fw_devlink_flags || fw_devlink_is_permissive())
+ return;
+
+ /*
+ * Wait for all ongoing probes to finish so that the "best effort" is
+ * only applied to devices that can't probe otherwise.
+ */
+ wait_for_device_probe();
+
+ pr_info("Trying to probe devices needed for running init ...\n");
+ fw_devlink_best_effort = true;
+ driver_deferred_probe_trigger();
+
+ /*
+ * Wait for all "best effort" probes to finish before going back to
+ * normal enforcement.
+ */
+ wait_for_device_probe();
+ fw_devlink_best_effort = false;
+}
+
static void fw_devlink_unblock_consumers(struct device *dev)
{
struct device_link *link;
@@ -3843,6 +3932,26 @@ struct device *device_find_child_by_name(struct device *parent,
}
EXPORT_SYMBOL_GPL(device_find_child_by_name);
+static int match_any(struct device *dev, void *unused)
+{
+ return 1;
+}
+
+/**
+ * device_find_any_child - device iterator for locating a child device, if any.
+ * @parent: parent struct device
+ *
+ * This is similar to the device_find_child() function above, but it
+ * returns a reference to a child device, if any.
+ *
+ * NOTE: you will need to drop the reference with put_device() after use.
+ */
+struct device *device_find_any_child(struct device *parent)
+{
+ return device_find_child(parent, NULL, match_any);
+}
+EXPORT_SYMBOL_GPL(device_find_any_child);
+
int __init devices_init(void)
{
devices_kset = kset_create_and_add("devices", &device_uevent_ops, NULL);
diff --git a/drivers/base/dd.c b/drivers/base/dd.c
index 11b0fb6414d3..70f79fc71539 100644
--- a/drivers/base/dd.c
+++ b/drivers/base/dd.c
@@ -172,7 +172,7 @@ static bool driver_deferred_probe_enable;
* changes in the midst of a probe, then deferred processing should be triggered
* again.
*/
-static void driver_deferred_probe_trigger(void)
+void driver_deferred_probe_trigger(void)
{
if (!driver_deferred_probe_enable)
return;
@@ -256,7 +256,12 @@ static int deferred_devs_show(struct seq_file *s, void *data)
}
DEFINE_SHOW_ATTRIBUTE(deferred_devs);
+#ifdef CONFIG_MODULES
+int driver_deferred_probe_timeout = 10;
+#else
int driver_deferred_probe_timeout;
+#endif
+
EXPORT_SYMBOL_GPL(driver_deferred_probe_timeout);
static int __init deferred_probe_timeout_setup(char *str)
@@ -269,42 +274,12 @@ static int __init deferred_probe_timeout_setup(char *str)
}
__setup("deferred_probe_timeout=", deferred_probe_timeout_setup);
-/**
- * driver_deferred_probe_check_state() - Check deferred probe state
- * @dev: device to check
- *
- * Return:
- * * -ENODEV if initcalls have completed and modules are disabled.
- * * -ETIMEDOUT if the deferred probe timeout was set and has expired
- * and modules are enabled.
- * * -EPROBE_DEFER in other cases.
- *
- * Drivers or subsystems can opt-in to calling this function instead of directly
- * returning -EPROBE_DEFER.
- */
-int driver_deferred_probe_check_state(struct device *dev)
-{
- if (!IS_ENABLED(CONFIG_MODULES) && initcalls_done) {
- dev_warn(dev, "ignoring dependency for device, assuming no driver\n");
- return -ENODEV;
- }
-
- if (!driver_deferred_probe_timeout && initcalls_done) {
- dev_warn(dev, "deferred probe timeout, ignoring dependency\n");
- return -ETIMEDOUT;
- }
-
- return -EPROBE_DEFER;
-}
-EXPORT_SYMBOL_GPL(driver_deferred_probe_check_state);
-
static void deferred_probe_timeout_work_func(struct work_struct *work)
{
struct device_private *p;
fw_devlink_drivers_done();
- driver_deferred_probe_timeout = 0;
driver_deferred_probe_trigger();
flush_work(&deferred_probe_work);
@@ -580,7 +555,7 @@ static int really_probe(struct device *dev, struct device_driver *drv)
{
bool test_remove = IS_ENABLED(CONFIG_DEBUG_TEST_DRIVER_REMOVE) &&
!drv->suppress_bind_attrs;
- int ret;
+ int ret, link_ret;
if (defer_all_probes) {
/*
@@ -592,9 +567,9 @@ static int really_probe(struct device *dev, struct device_driver *drv)
return -EPROBE_DEFER;
}
- ret = device_links_check_suppliers(dev);
- if (ret)
- return ret;
+ link_ret = device_links_check_suppliers(dev);
+ if (link_ret == -EPROBE_DEFER)
+ return link_ret;
pr_debug("bus: '%s': %s: probing driver %s with device %s\n",
drv->bus->name, __func__, drv->name, dev_name(dev));
@@ -634,6 +609,15 @@ re_probe:
ret = call_driver_probe(dev, drv);
if (ret) {
/*
+ * If fw_devlink_best_effort is active (denoted by -EAGAIN), the
+ * device might actually probe properly once some of its missing
+ * suppliers have probed. So, treat this as if the driver
+ * returned -EPROBE_DEFER.
+ */
+ if (link_ret == -EAGAIN)
+ ret = -EPROBE_DEFER;
+
+ /*
* Return probe errors as positive values so that the callers
* can distinguish them from other errors.
*/
@@ -1115,6 +1099,7 @@ static void __driver_attach_async_helper(void *_dev, async_cookie_t cookie)
static int __driver_attach(struct device *dev, void *data)
{
struct device_driver *drv = data;
+ bool async = false;
int ret;
/*
@@ -1153,9 +1138,11 @@ static int __driver_attach(struct device *dev, void *data)
if (!dev->driver && !dev->p->async_driver) {
get_device(dev);
dev->p->async_driver = drv;
- async_schedule_dev(__driver_attach_async_helper, dev);
+ async = true;
}
device_unlock(dev);
+ if (async)
+ async_schedule_dev(__driver_attach_async_helper, dev);
return 0;
}
diff --git a/drivers/base/devtmpfs.c b/drivers/base/devtmpfs.c
index 8a3ddbae3b70..e4bffeabf344 100644
--- a/drivers/base/devtmpfs.c
+++ b/drivers/base/devtmpfs.c
@@ -482,6 +482,7 @@ int __init devtmpfs_init(void)
if (err) {
printk(KERN_ERR "devtmpfs: unable to create devtmpfs %i\n", err);
unregister_filesystem(&dev_fs_type);
+ thread = NULL;
return err;
}
diff --git a/drivers/base/firmware_loader/main.c b/drivers/base/firmware_loader/main.c
index ac3f34e80194..7c3590fd97c2 100644
--- a/drivers/base/firmware_loader/main.c
+++ b/drivers/base/firmware_loader/main.c
@@ -435,11 +435,11 @@ static int fw_decompress_xz_pages(struct device *dev, struct fw_priv *fw_priv,
/* decompress onto the new allocated page */
page = fw_priv->pages[fw_priv->nr_pages - 1];
- xz_buf.out = kmap(page);
+ xz_buf.out = kmap_local_page(page);
xz_buf.out_pos = 0;
xz_buf.out_size = PAGE_SIZE;
xz_ret = xz_dec_run(xz_dec, &xz_buf);
- kunmap(page);
+ kunmap_local(xz_buf.out);
fw_priv->size += xz_buf.out_pos;
/* partial decompression means either end or error */
if (xz_buf.out_pos != PAGE_SIZE)
diff --git a/drivers/base/firmware_loader/sysfs.c b/drivers/base/firmware_loader/sysfs.c
index 5b0b85b70b6f..77bad32c481a 100644
--- a/drivers/base/firmware_loader/sysfs.c
+++ b/drivers/base/firmware_loader/sysfs.c
@@ -242,19 +242,17 @@ static void firmware_rw(struct fw_priv *fw_priv, char *buffer,
loff_t offset, size_t count, bool read)
{
while (count) {
- void *page_data;
int page_nr = offset >> PAGE_SHIFT;
int page_ofs = offset & (PAGE_SIZE - 1);
int page_cnt = min_t(size_t, PAGE_SIZE - page_ofs, count);
- page_data = kmap(fw_priv->pages[page_nr]);
-
if (read)
- memcpy(buffer, page_data + page_ofs, page_cnt);
+ memcpy_from_page(buffer, fw_priv->pages[page_nr],
+ page_ofs, page_cnt);
else
- memcpy(page_data + page_ofs, buffer, page_cnt);
+ memcpy_to_page(fw_priv->pages[page_nr], page_ofs,
+ buffer, page_cnt);
- kunmap(fw_priv->pages[page_nr]);
buffer += page_cnt;
offset += page_cnt;
count -= page_cnt;
diff --git a/drivers/base/node.c b/drivers/base/node.c
index 0ac6376ef7a1..eb0f43784c2b 100644
--- a/drivers/base/node.c
+++ b/drivers/base/node.c
@@ -45,7 +45,7 @@ static inline ssize_t cpumap_read(struct file *file, struct kobject *kobj,
return n;
}
-static BIN_ATTR_RO(cpumap, 0);
+static BIN_ATTR_RO(cpumap, CPUMAP_FILE_MAX_BYTES);
static inline ssize_t cpulist_read(struct file *file, struct kobject *kobj,
struct bin_attribute *attr, char *buf,
@@ -66,7 +66,7 @@ static inline ssize_t cpulist_read(struct file *file, struct kobject *kobj,
return n;
}
-static BIN_ATTR_RO(cpulist, 0);
+static BIN_ATTR_RO(cpulist, CPULIST_FILE_MAX_BYTES);
/**
* struct node_access_nodes - Access class device to hold user visible
diff --git a/drivers/base/power/domain.c b/drivers/base/power/domain.c
index 55a10e6d4e2a..5a2e0232862e 100644
--- a/drivers/base/power/domain.c
+++ b/drivers/base/power/domain.c
@@ -2733,7 +2733,7 @@ static int __genpd_dev_pm_attach(struct device *dev, struct device *base_dev,
mutex_unlock(&gpd_list_lock);
dev_dbg(dev, "%s() failed to find PM domain: %ld\n",
__func__, PTR_ERR(pd));
- return driver_deferred_probe_check_state(base_dev);
+ return -ENODEV;
}
dev_dbg(dev, "adding to PM domain %s\n", pd->name);
diff --git a/drivers/base/topology.c b/drivers/base/topology.c
index ac6ad9ab67f9..89f98be5c5b9 100644
--- a/drivers/base/topology.c
+++ b/drivers/base/topology.c
@@ -62,47 +62,47 @@ define_id_show_func(ppin, "0x%llx");
static DEVICE_ATTR_ADMIN_RO(ppin);
define_siblings_read_func(thread_siblings, sibling_cpumask);
-static BIN_ATTR_RO(thread_siblings, 0);
-static BIN_ATTR_RO(thread_siblings_list, 0);
+static BIN_ATTR_RO(thread_siblings, CPUMAP_FILE_MAX_BYTES);
+static BIN_ATTR_RO(thread_siblings_list, CPULIST_FILE_MAX_BYTES);
define_siblings_read_func(core_cpus, sibling_cpumask);
-static BIN_ATTR_RO(core_cpus, 0);
-static BIN_ATTR_RO(core_cpus_list, 0);
+static BIN_ATTR_RO(core_cpus, CPUMAP_FILE_MAX_BYTES);
+static BIN_ATTR_RO(core_cpus_list, CPULIST_FILE_MAX_BYTES);
define_siblings_read_func(core_siblings, core_cpumask);
-static BIN_ATTR_RO(core_siblings, 0);
-static BIN_ATTR_RO(core_siblings_list, 0);
+static BIN_ATTR_RO(core_siblings, CPUMAP_FILE_MAX_BYTES);
+static BIN_ATTR_RO(core_siblings_list, CPULIST_FILE_MAX_BYTES);
#ifdef TOPOLOGY_CLUSTER_SYSFS
define_siblings_read_func(cluster_cpus, cluster_cpumask);
-static BIN_ATTR_RO(cluster_cpus, 0);
-static BIN_ATTR_RO(cluster_cpus_list, 0);
+static BIN_ATTR_RO(cluster_cpus, CPUMAP_FILE_MAX_BYTES);
+static BIN_ATTR_RO(cluster_cpus_list, CPULIST_FILE_MAX_BYTES);
#endif
#ifdef TOPOLOGY_DIE_SYSFS
define_siblings_read_func(die_cpus, die_cpumask);
-static BIN_ATTR_RO(die_cpus, 0);
-static BIN_ATTR_RO(die_cpus_list, 0);
+static BIN_ATTR_RO(die_cpus, CPUMAP_FILE_MAX_BYTES);
+static BIN_ATTR_RO(die_cpus_list, CPULIST_FILE_MAX_BYTES);
#endif
define_siblings_read_func(package_cpus, core_cpumask);
-static BIN_ATTR_RO(package_cpus, 0);
-static BIN_ATTR_RO(package_cpus_list, 0);
+static BIN_ATTR_RO(package_cpus, CPUMAP_FILE_MAX_BYTES);
+static BIN_ATTR_RO(package_cpus_list, CPULIST_FILE_MAX_BYTES);
#ifdef TOPOLOGY_BOOK_SYSFS
define_id_show_func(book_id, "%d");
static DEVICE_ATTR_RO(book_id);
define_siblings_read_func(book_siblings, book_cpumask);
-static BIN_ATTR_RO(book_siblings, 0);
-static BIN_ATTR_RO(book_siblings_list, 0);
+static BIN_ATTR_RO(book_siblings, CPUMAP_FILE_MAX_BYTES);
+static BIN_ATTR_RO(book_siblings_list, CPULIST_FILE_MAX_BYTES);
#endif
#ifdef TOPOLOGY_DRAWER_SYSFS
define_id_show_func(drawer_id, "%d");
static DEVICE_ATTR_RO(drawer_id);
define_siblings_read_func(drawer_siblings, drawer_cpumask);
-static BIN_ATTR_RO(drawer_siblings, 0);
-static BIN_ATTR_RO(drawer_siblings_list, 0);
+static BIN_ATTR_RO(drawer_siblings, CPUMAP_FILE_MAX_BYTES);
+static BIN_ATTR_RO(drawer_siblings_list, CPULIST_FILE_MAX_BYTES);
#endif
static struct bin_attribute *bin_attrs[] = {
diff --git a/drivers/iommu/of_iommu.c b/drivers/iommu/of_iommu.c
index 5696314ae69e..41f4eb005219 100644
--- a/drivers/iommu/of_iommu.c
+++ b/drivers/iommu/of_iommu.c
@@ -40,7 +40,7 @@ static int of_iommu_xlate(struct device *dev,
* a proper probe-ordering dependency mechanism in future.
*/
if (!ops)
- return driver_deferred_probe_check_state(dev);
+ return -ENODEV;
if (!try_module_get(ops->owner))
return -ENODEV;
diff --git a/drivers/net/mdio/fwnode_mdio.c b/drivers/net/mdio/fwnode_mdio.c
index 1c1584fca632..3e79c2c51929 100644
--- a/drivers/net/mdio/fwnode_mdio.c
+++ b/drivers/net/mdio/fwnode_mdio.c
@@ -47,9 +47,7 @@ int fwnode_mdiobus_phy_device_register(struct mii_bus *mdio,
* just fall back to poll mode
*/
if (rc == -EPROBE_DEFER)
- rc = driver_deferred_probe_check_state(&phy->mdio.dev);
- if (rc == -EPROBE_DEFER)
- return rc;
+ rc = -ENODEV;
if (rc > 0) {
phy->irq = rc;
diff --git a/drivers/of/base.c b/drivers/of/base.c
index 88f86ee54b9a..7fa960bd3df1 100644
--- a/drivers/of/base.c
+++ b/drivers/of/base.c
@@ -1919,6 +1919,8 @@ void of_alias_scan(void * (*dt_alloc)(u64 size, u64 align))
of_property_read_string(of_aliases, "stdout", &name);
if (name)
of_stdout = of_find_node_opts_by_path(name, &of_stdout_options);
+ if (of_stdout)
+ of_stdout->fwnode.flags |= FWNODE_FLAG_BEST_EFFORT;
}
if (!of_aliases)
diff --git a/drivers/pinctrl/devicetree.c b/drivers/pinctrl/devicetree.c
index 3fb238714718..ef898ee8ca6b 100644
--- a/drivers/pinctrl/devicetree.c
+++ b/drivers/pinctrl/devicetree.c
@@ -129,7 +129,7 @@ static int dt_to_map_one_config(struct pinctrl *p,
np_pctldev = of_get_next_parent(np_pctldev);
if (!np_pctldev || of_node_is_root(np_pctldev)) {
of_node_put(np_pctldev);
- ret = driver_deferred_probe_check_state(p->dev);
+ ret = -ENODEV;
/* keep deferring if modules are enabled */
if (IS_ENABLED(CONFIG_MODULES) && !allow_default && ret < 0)
ret = -EPROBE_DEFER;
diff --git a/drivers/spi/spi.c b/drivers/spi/spi.c
index 1c14d682ffed..8f97a3eacdea 100644
--- a/drivers/spi/spi.c
+++ b/drivers/spi/spi.c
@@ -2687,11 +2687,6 @@ int spi_slave_abort(struct spi_device *spi)
}
EXPORT_SYMBOL_GPL(spi_slave_abort);
-static int match_true(struct device *dev, void *data)
-{
- return 1;
-}
-
static ssize_t slave_show(struct device *dev, struct device_attribute *attr,
char *buf)
{
@@ -2699,7 +2694,7 @@ static ssize_t slave_show(struct device *dev, struct device_attribute *attr,
dev);
struct device *child;
- child = device_find_child(&ctlr->dev, NULL, match_true);
+ child = device_find_any_child(&ctlr->dev);
return sprintf(buf, "%s\n",
child ? to_spi_device(child)->modalias : NULL);
}
@@ -2718,7 +2713,7 @@ static ssize_t slave_store(struct device *dev, struct device_attribute *attr,
if (rc != 1 || !name[0])
return -EINVAL;
- child = device_find_child(&ctlr->dev, NULL, match_true);
+ child = device_find_any_child(&ctlr->dev);
if (child) {
/* Remove registered slave */
device_unregister(child);
diff --git a/fs/kernfs/dir.c b/fs/kernfs/dir.c
index 6eca72cfa1f2..1cc88ba6de90 100644
--- a/fs/kernfs/dir.c
+++ b/fs/kernfs/dir.c
@@ -1343,14 +1343,17 @@ static void __kernfs_remove(struct kernfs_node *kn)
{
struct kernfs_node *pos;
+ /* Short-circuit if non-root @kn has already finished removal. */
+ if (!kn)
+ return;
+
lockdep_assert_held_write(&kernfs_root(kn)->kernfs_rwsem);
/*
- * Short-circuit if non-root @kn has already finished removal.
* This is for kernfs_remove_self() which plays with active ref
* after removal.
*/
- if (!kn || (kn->parent && RB_EMPTY_NODE(&kn->rb)))
+ if (kn->parent && RB_EMPTY_NODE(&kn->rb))
return;
pr_debug("kernfs %s: removing\n", kn->name);
diff --git a/fs/kernfs/file.c b/fs/kernfs/file.c
index e3abfa843879..b3ec34386b43 100644
--- a/fs/kernfs/file.c
+++ b/fs/kernfs/file.c
@@ -18,21 +18,8 @@
#include "kernfs-internal.h"
-/*
- * There's one kernfs_open_file for each open file and one kernfs_open_node
- * for each kernfs_node with one or more open files.
- *
- * kernfs_node->attr.open points to kernfs_open_node. attr.open is
- * protected by kernfs_open_node_lock.
- *
- * filp->private_data points to seq_file whose ->private points to
- * kernfs_open_file. kernfs_open_files are chained at
- * kernfs_open_node->files, which is protected by kernfs_open_file_mutex.
- */
-static DEFINE_SPINLOCK(kernfs_open_node_lock);
-static DEFINE_MUTEX(kernfs_open_file_mutex);
-
struct kernfs_open_node {
+ struct rcu_head rcu_head;
atomic_t event;
wait_queue_head_t poll;
struct list_head files; /* goes through kernfs_open_file.list */
@@ -51,6 +38,70 @@ struct kernfs_open_node {
static DEFINE_SPINLOCK(kernfs_notify_lock);
static struct kernfs_node *kernfs_notify_list = KERNFS_NOTIFY_EOL;
+static inline struct mutex *kernfs_open_file_mutex_ptr(struct kernfs_node *kn)
+{
+ int idx = hash_ptr(kn, NR_KERNFS_LOCK_BITS);
+
+ return &kernfs_locks->open_file_mutex[idx];
+}
+
+static inline struct mutex *kernfs_open_file_mutex_lock(struct kernfs_node *kn)
+{
+ struct mutex *lock;
+
+ lock = kernfs_open_file_mutex_ptr(kn);
+
+ mutex_lock(lock);
+
+ return lock;
+}
+
+/**
+ * kernfs_deref_open_node - Get kernfs_open_node corresponding to @kn.
+ *
+ * @of: associated kernfs_open_file instance.
+ * @kn: target kernfs_node.
+ *
+ * Fetch and return ->attr.open of @kn if @of->list is non empty.
+ * If @of->list is not empty we can safely assume that @of is on
+ * @kn->attr.open->files list and this guarantees that @kn->attr.open
+ * will not vanish i.e. dereferencing outside RCU read-side critical
+ * section is safe here.
+ *
+ * The caller needs to make sure that @of->list is not empty.
+ */
+static struct kernfs_open_node *
+kernfs_deref_open_node(struct kernfs_open_file *of, struct kernfs_node *kn)
+{
+ struct kernfs_open_node *on;
+
+ on = rcu_dereference_check(kn->attr.open, !list_empty(&of->list));
+
+ return on;
+}
+
+/**
+ * kernfs_deref_open_node_protected - Get kernfs_open_node corresponding to @kn
+ *
+ * @kn: target kernfs_node.
+ *
+ * Fetch and return ->attr.open of @kn when caller holds the
+ * kernfs_open_file_mutex_ptr(kn).
+ *
+ * Update of ->attr.open happens under kernfs_open_file_mutex_ptr(kn). So when
+ * the caller guarantees that this mutex is being held, other updaters can't
+ * change ->attr.open and this means that we can safely deref ->attr.open
+ * outside RCU read-side critical section.
+ *
+ * The caller needs to make sure that kernfs_open_file_mutex is held.
+ */
+static struct kernfs_open_node *
+kernfs_deref_open_node_protected(struct kernfs_node *kn)
+{
+ return rcu_dereference_protected(kn->attr.open,
+ lockdep_is_held(kernfs_open_file_mutex_ptr(kn)));
+}
+
static struct kernfs_open_file *kernfs_of(struct file *file)
{
return ((struct seq_file *)file->private_data)->private;
@@ -156,8 +207,12 @@ static void kernfs_seq_stop(struct seq_file *sf, void *v)
static int kernfs_seq_show(struct seq_file *sf, void *v)
{
struct kernfs_open_file *of = sf->private;
+ struct kernfs_open_node *on = kernfs_deref_open_node(of, of->kn);
- of->event = atomic_read(&of->kn->attr.open->event);
+ if (!on)
+ return -EINVAL;
+
+ of->event = atomic_read(&on->event);
return of->kn->attr.ops->seq_show(sf, v);
}
@@ -180,6 +235,7 @@ static ssize_t kernfs_file_read_iter(struct kiocb *iocb, struct iov_iter *iter)
struct kernfs_open_file *of = kernfs_of(iocb->ki_filp);
ssize_t len = min_t(size_t, iov_iter_count(iter), PAGE_SIZE);
const struct kernfs_ops *ops;
+ struct kernfs_open_node *on;
char *buf;
buf = of->prealloc_buf;
@@ -201,7 +257,15 @@ static ssize_t kernfs_file_read_iter(struct kiocb *iocb, struct iov_iter *iter)
goto out_free;
}
- of->event = atomic_read(&of->kn->attr.open->event);
+ on = kernfs_deref_open_node(of, of->kn);
+ if (!on) {
+ len = -EINVAL;
+ mutex_unlock(&of->mutex);
+ goto out_free;
+ }
+
+ of->event = atomic_read(&on->event);
+
ops = kernfs_ops(of->kn);
if (ops->read)
len = ops->read(of, buf, len, iocb->ki_pos);
@@ -243,7 +307,7 @@ static ssize_t kernfs_fop_read_iter(struct kiocb *iocb, struct iov_iter *iter)
* There is no easy way for us to know if userspace is only doing a partial
* write, so we don't support them. We expect the entire buffer to come on
* the first write. Hint: if you're writing a value, first read the file,
- * modify only the the value you're changing, then write entire buffer
+ * modify only the value you're changing, then write entire buffer
* back.
*/
static ssize_t kernfs_fop_write_iter(struct kiocb *iocb, struct iov_iter *iter)
@@ -484,7 +548,6 @@ static int kernfs_fop_mmap(struct file *file, struct vm_area_struct *vma)
* It is not possible to successfully wrap close.
* So error if someone is trying to use close.
*/
- rc = -EINVAL;
if (vma->vm_ops && vma->vm_ops->close)
goto out_put;
@@ -518,37 +581,31 @@ static int kernfs_get_open_node(struct kernfs_node *kn,
struct kernfs_open_file *of)
{
struct kernfs_open_node *on, *new_on = NULL;
+ struct mutex *mutex = NULL;
- retry:
- mutex_lock(&kernfs_open_file_mutex);
- spin_lock_irq(&kernfs_open_node_lock);
-
- if (!kn->attr.open && new_on) {
- kn->attr.open = new_on;
- new_on = NULL;
- }
-
- on = kn->attr.open;
- if (on)
- list_add_tail(&of->list, &on->files);
-
- spin_unlock_irq(&kernfs_open_node_lock);
- mutex_unlock(&kernfs_open_file_mutex);
+ mutex = kernfs_open_file_mutex_lock(kn);
+ on = kernfs_deref_open_node_protected(kn);
if (on) {
- kfree(new_on);
+ list_add_tail(&of->list, &on->files);
+ mutex_unlock(mutex);
return 0;
+ } else {
+ /* not there, initialize a new one */
+ new_on = kmalloc(sizeof(*new_on), GFP_KERNEL);
+ if (!new_on) {
+ mutex_unlock(mutex);
+ return -ENOMEM;
+ }
+ atomic_set(&new_on->event, 1);
+ init_waitqueue_head(&new_on->poll);
+ INIT_LIST_HEAD(&new_on->files);
+ list_add_tail(&of->list, &new_on->files);
+ rcu_assign_pointer(kn->attr.open, new_on);
}
+ mutex_unlock(mutex);
- /* not there, initialize a new one and retry */
- new_on = kmalloc(sizeof(*new_on), GFP_KERNEL);
- if (!new_on)
- return -ENOMEM;
-
- atomic_set(&new_on->event, 1);
- init_waitqueue_head(&new_on->poll);
- INIT_LIST_HEAD(&new_on->files);
- goto retry;
+ return 0;
}
/**
@@ -567,24 +624,26 @@ static int kernfs_get_open_node(struct kernfs_node *kn,
static void kernfs_unlink_open_file(struct kernfs_node *kn,
struct kernfs_open_file *of)
{
- struct kernfs_open_node *on = kn->attr.open;
- unsigned long flags;
+ struct kernfs_open_node *on;
+ struct mutex *mutex = NULL;
- mutex_lock(&kernfs_open_file_mutex);
- spin_lock_irqsave(&kernfs_open_node_lock, flags);
+ mutex = kernfs_open_file_mutex_lock(kn);
+
+ on = kernfs_deref_open_node_protected(kn);
+ if (!on) {
+ mutex_unlock(mutex);
+ return;
+ }
if (of)
list_del(&of->list);
- if (list_empty(&on->files))
- kn->attr.open = NULL;
- else
- on = NULL;
-
- spin_unlock_irqrestore(&kernfs_open_node_lock, flags);
- mutex_unlock(&kernfs_open_file_mutex);
+ if (list_empty(&on->files)) {
+ rcu_assign_pointer(kn->attr.open, NULL);
+ kfree_rcu(on, rcu_head);
+ }
- kfree(on);
+ mutex_unlock(mutex);
}
static int kernfs_fop_open(struct inode *inode, struct file *file)
@@ -722,11 +781,11 @@ static void kernfs_release_file(struct kernfs_node *kn,
/*
* @of is guaranteed to have no other file operations in flight and
* we just want to synchronize release and drain paths.
- * @kernfs_open_file_mutex is enough. @of->mutex can't be used
+ * @kernfs_open_file_mutex_ptr(kn) is enough. @of->mutex can't be used
* here because drain path may be called from places which can
* cause circular dependency.
*/
- lockdep_assert_held(&kernfs_open_file_mutex);
+ lockdep_assert_held(kernfs_open_file_mutex_ptr(kn));
if (!of->released) {
/*
@@ -743,11 +802,12 @@ static int kernfs_fop_release(struct inode *inode, struct file *filp)
{
struct kernfs_node *kn = inode->i_private;
struct kernfs_open_file *of = kernfs_of(filp);
+ struct mutex *mutex = NULL;
if (kn->flags & KERNFS_HAS_RELEASE) {
- mutex_lock(&kernfs_open_file_mutex);
+ mutex = kernfs_open_file_mutex_lock(kn);
kernfs_release_file(kn, of);
- mutex_unlock(&kernfs_open_file_mutex);
+ mutex_unlock(mutex);
}
kernfs_unlink_open_file(kn, of);
@@ -762,6 +822,7 @@ void kernfs_drain_open_files(struct kernfs_node *kn)
{
struct kernfs_open_node *on;
struct kernfs_open_file *of;
+ struct mutex *mutex = NULL;
if (!(kn->flags & (KERNFS_HAS_MMAP | KERNFS_HAS_RELEASE)))
return;
@@ -771,20 +832,19 @@ void kernfs_drain_open_files(struct kernfs_node *kn)
* ->attr.open at this point of time. This check allows early bail out
* if ->attr.open is already NULL. kernfs_unlink_open_file makes
* ->attr.open NULL only while holding kernfs_open_file_mutex so below
- * check under kernfs_open_file_mutex will ensure bailing out if
+ * check under kernfs_open_file_mutex_ptr(kn) will ensure bailing out if
* ->attr.open became NULL while waiting for the mutex.
*/
- if (!kn->attr.open)
+ if (!rcu_access_pointer(kn->attr.open))
return;
- mutex_lock(&kernfs_open_file_mutex);
- if (!kn->attr.open) {
- mutex_unlock(&kernfs_open_file_mutex);
+ mutex = kernfs_open_file_mutex_lock(kn);
+ on = kernfs_deref_open_node_protected(kn);
+ if (!on) {
+ mutex_unlock(mutex);
return;
}
- on = kn->attr.open;
-
list_for_each_entry(of, &on->files, list) {
struct inode *inode = file_inode(of->file);
@@ -795,7 +855,7 @@ void kernfs_drain_open_files(struct kernfs_node *kn)
kernfs_release_file(kn, of);
}
- mutex_unlock(&kernfs_open_file_mutex);
+ mutex_unlock(mutex);
}
/*
@@ -815,7 +875,10 @@ void kernfs_drain_open_files(struct kernfs_node *kn)
__poll_t kernfs_generic_poll(struct kernfs_open_file *of, poll_table *wait)
{
struct kernfs_node *kn = kernfs_dentry_node(of->file->f_path.dentry);
- struct kernfs_open_node *on = kn->attr.open;
+ struct kernfs_open_node *on = kernfs_deref_open_node(of, kn);
+
+ if (!on)
+ return EPOLLERR;
poll_wait(of->file, &on->poll, wait);
@@ -922,13 +985,13 @@ void kernfs_notify(struct kernfs_node *kn)
return;
/* kick poll immediately */
- spin_lock_irqsave(&kernfs_open_node_lock, flags);
- on = kn->attr.open;
+ rcu_read_lock();
+ on = rcu_dereference(kn->attr.open);
if (on) {
atomic_inc(&on->event);
wake_up_interruptible(&on->poll);
}
- spin_unlock_irqrestore(&kernfs_open_node_lock, flags);
+ rcu_read_unlock();
/* schedule work to kick fsnotify */
spin_lock_irqsave(&kernfs_notify_lock, flags);
diff --git a/fs/kernfs/kernfs-internal.h b/fs/kernfs/kernfs-internal.h
index eeaa779b929c..3ae214d02d44 100644
--- a/fs/kernfs/kernfs-internal.h
+++ b/fs/kernfs/kernfs-internal.h
@@ -164,4 +164,8 @@ void kernfs_drain_open_files(struct kernfs_node *kn);
*/
extern const struct inode_operations kernfs_symlink_iops;
+/*
+ * kernfs locks
+ */
+extern struct kernfs_global_locks *kernfs_locks;
#endif /* __KERNFS_INTERNAL_H */
diff --git a/fs/kernfs/mount.c b/fs/kernfs/mount.c
index cfa79715fc1a..d0859f72d2d6 100644
--- a/fs/kernfs/mount.c
+++ b/fs/kernfs/mount.c
@@ -20,6 +20,7 @@
#include "kernfs-internal.h"
struct kmem_cache *kernfs_node_cache, *kernfs_iattrs_cache;
+struct kernfs_global_locks *kernfs_locks;
static int kernfs_sop_show_options(struct seq_file *sf, struct dentry *dentry)
{
@@ -387,6 +388,22 @@ void kernfs_kill_sb(struct super_block *sb)
kfree(info);
}
+static void __init kernfs_mutex_init(void)
+{
+ int count;
+
+ for (count = 0; count < NR_KERNFS_LOCKS; count++)
+ mutex_init(&kernfs_locks->open_file_mutex[count]);
+}
+
+static void __init kernfs_lock_init(void)
+{
+ kernfs_locks = kmalloc(sizeof(struct kernfs_global_locks), GFP_KERNEL);
+ WARN_ON(!kernfs_locks);
+
+ kernfs_mutex_init();
+}
+
void __init kernfs_init(void)
{
kernfs_node_cache = kmem_cache_create("kernfs_node_cache",
@@ -397,4 +414,6 @@ void __init kernfs_init(void)
kernfs_iattrs_cache = kmem_cache_create("kernfs_iattrs_cache",
sizeof(struct kernfs_iattrs),
0, SLAB_PANIC, NULL);
+
+ kernfs_lock_init();
}
diff --git a/include/linux/acpi.h b/include/linux/acpi.h
index 813a33858c73..6f64b2f3dc54 100644
--- a/include/linux/acpi.h
+++ b/include/linux/acpi.h
@@ -1437,7 +1437,6 @@ int find_acpi_cpu_topology(unsigned int cpu, int level);
int find_acpi_cpu_topology_cluster(unsigned int cpu);
int find_acpi_cpu_topology_package(unsigned int cpu);
int find_acpi_cpu_topology_hetero_id(unsigned int cpu);
-int find_acpi_cpu_cache_topology(unsigned int cpu, int level);
#else
static inline int acpi_pptt_cpu_is_thread(unsigned int cpu)
{
@@ -1459,10 +1458,6 @@ static inline int find_acpi_cpu_topology_hetero_id(unsigned int cpu)
{
return -EINVAL;
}
-static inline int find_acpi_cpu_cache_topology(unsigned int cpu, int level)
-{
- return -EINVAL;
-}
#endif
#ifdef CONFIG_ACPI_PCC
diff --git a/include/linux/arch_topology.h b/include/linux/arch_topology.h
index 58cbe18d825c..a07b510e7dc5 100644
--- a/include/linux/arch_topology.h
+++ b/include/linux/arch_topology.h
@@ -68,7 +68,6 @@ struct cpu_topology {
int core_id;
int cluster_id;
int package_id;
- int llc_id;
cpumask_t thread_sibling;
cpumask_t core_sibling;
cpumask_t cluster_sibling;
diff --git a/include/linux/cacheinfo.h b/include/linux/cacheinfo.h
index 4ff37cb763ae..00b7a6ae8617 100644
--- a/include/linux/cacheinfo.h
+++ b/include/linux/cacheinfo.h
@@ -82,6 +82,9 @@ struct cpu_cacheinfo *get_cpu_cacheinfo(unsigned int cpu);
int init_cache_level(unsigned int cpu);
int populate_cache_leaves(unsigned int cpu);
int cache_setup_acpi(unsigned int cpu);
+bool last_level_cache_is_valid(unsigned int cpu);
+bool last_level_cache_is_shared(unsigned int cpu_x, unsigned int cpu_y);
+int detect_cache_attributes(unsigned int cpu);
#ifndef CONFIG_ACPI_PPTT
/*
* acpi_find_last_cache_level is only called on ACPI enabled
diff --git a/include/linux/cpumask.h b/include/linux/cpumask.h
index fe29ac7cc469..4592d0845941 100644
--- a/include/linux/cpumask.h
+++ b/include/linux/cpumask.h
@@ -1071,4 +1071,22 @@ cpumap_print_list_to_buf(char *buf, const struct cpumask *mask,
[0] = 1UL \
} }
+/*
+ * Provide a valid theoretical max size for cpumap and cpulist sysfs files
+ * to avoid breaking userspace which may allocate a buffer based on the size
+ * reported by e.g. fstat.
+ *
+ * for cpumap NR_CPUS * 9/32 - 1 should be an exact length.
+ *
+ * For cpulist 7 is (ceil(log10(NR_CPUS)) + 1) allowing for NR_CPUS to be up
+ * to 2 orders of magnitude larger than 8192. And then we divide by 2 to
+ * cover a worst-case of every other cpu being on one of two nodes for a
+ * very large NR_CPUS.
+ *
+ * Use PAGE_SIZE as a minimum for smaller configurations.
+ */
+#define CPUMAP_FILE_MAX_BYTES ((((NR_CPUS * 9)/32 - 1) > PAGE_SIZE) \
+ ? (NR_CPUS * 9)/32 - 1 : PAGE_SIZE)
+#define CPULIST_FILE_MAX_BYTES (((NR_CPUS * 7)/2 > PAGE_SIZE) ? (NR_CPUS * 7)/2 : PAGE_SIZE)
+
#endif /* __LINUX_CPUMASK_H */
diff --git a/include/linux/device.h b/include/linux/device.h
index dc941997795c..424b55df0272 100644
--- a/include/linux/device.h
+++ b/include/linux/device.h
@@ -905,6 +905,8 @@ struct device *device_find_child(struct device *dev, void *data,
int (*match)(struct device *dev, void *data));
struct device *device_find_child_by_name(struct device *parent,
const char *name);
+struct device *device_find_any_child(struct device *parent);
+
int device_rename(struct device *dev, const char *new_name);
int device_move(struct device *dev, struct device *new_parent,
enum dpm_order dpm_order);
diff --git a/include/linux/device/driver.h b/include/linux/device/driver.h
index 700453017e1c..7acaabde5396 100644
--- a/include/linux/device/driver.h
+++ b/include/linux/device/driver.h
@@ -129,6 +129,7 @@ extern struct device_driver *driver_find(const char *name,
struct bus_type *bus);
extern int driver_probe_done(void);
extern void wait_for_device_probe(void);
+void __init wait_for_init_devices_probe(void);
/* sysfs interface for exporting driver attributes */
@@ -241,7 +242,6 @@ driver_find_device_by_acpi_dev(struct device_driver *drv, const void *adev)
extern int driver_deferred_probe_timeout;
void driver_deferred_probe_add(struct device *dev);
-int driver_deferred_probe_check_state(struct device *dev);
void driver_init(void);
/**
diff --git a/include/linux/firmware/trusted_foundations.h b/include/linux/firmware/trusted_foundations.h
index be5984bda592..931b6c5c72df 100644
--- a/include/linux/firmware/trusted_foundations.h
+++ b/include/linux/firmware/trusted_foundations.h
@@ -71,12 +71,16 @@ static inline void register_trusted_foundations(
static inline void of_register_trusted_foundations(void)
{
+ struct device_node *np = of_find_compatible_node(NULL, NULL, "tlm,trusted-foundations");
+
+ if (!np)
+ return;
+ of_node_put(np);
/*
* If we find the target should enable TF but does not support it,
* fail as the system won't be able to do much anyway
*/
- if (of_find_compatible_node(NULL, NULL, "tlm,trusted-foundations"))
- register_trusted_foundations(NULL);
+ register_trusted_foundations(NULL);
}
static inline bool trusted_foundations_registered(void)
diff --git a/include/linux/fwnode.h b/include/linux/fwnode.h
index 9a81c4410b9f..89b9bdfca925 100644
--- a/include/linux/fwnode.h
+++ b/include/linux/fwnode.h
@@ -27,11 +27,15 @@ struct device;
* driver needs its child devices to be bound with
* their respective drivers as soon as they are
* added.
+ * BEST_EFFORT: The fwnode/device needs to probe early and might be missing some
+ * suppliers. Only enforce ordering with suppliers that have
+ * drivers.
*/
#define FWNODE_FLAG_LINKS_ADDED BIT(0)
#define FWNODE_FLAG_NOT_DEVICE BIT(1)
#define FWNODE_FLAG_INITIALIZED BIT(2)
#define FWNODE_FLAG_NEEDS_CHILD_BOUND_ON_ADD BIT(3)
+#define FWNODE_FLAG_BEST_EFFORT BIT(4)
struct fwnode_handle {
struct fwnode_handle *secondary;
diff --git a/include/linux/kernfs.h b/include/linux/kernfs.h
index e2ae15a6225e..367044d7708c 100644
--- a/include/linux/kernfs.h
+++ b/include/linux/kernfs.h
@@ -18,6 +18,7 @@
#include <linux/uidgid.h>
#include <linux/wait.h>
#include <linux/rwsem.h>
+#include <linux/cache.h>
struct file;
struct dentry;
@@ -34,6 +35,62 @@ struct kernfs_fs_context;
struct kernfs_open_node;
struct kernfs_iattrs;
+/*
+ * NR_KERNFS_LOCK_BITS determines size (NR_KERNFS_LOCKS) of hash
+ * table of locks.
+ * Having a small hash table would impact scalability, since
+ * more and more kernfs_node objects will end up using same lock
+ * and having a very large hash table would waste memory.
+ *
+ * At the moment size of hash table of locks is being set based on
+ * the number of CPUs as follows:
+ *
+ * NR_CPU NR_KERNFS_LOCK_BITS NR_KERNFS_LOCKS
+ * 1 1 2
+ * 2-3 2 4
+ * 4-7 4 16
+ * 8-15 6 64
+ * 16-31 8 256
+ * 32 and more 10 1024
+ *
+ * The above relation between NR_CPU and number of locks is based
+ * on some internal experimentation which involved booting qemu
+ * with different values of smp, performing some sysfs operations
+ * on all CPUs and observing how increase in number of locks impacts
+ * completion time of these sysfs operations on each CPU.
+ */
+#ifdef CONFIG_SMP
+#define NR_KERNFS_LOCK_BITS (2 * (ilog2(NR_CPUS < 32 ? NR_CPUS : 32)))
+#else
+#define NR_KERNFS_LOCK_BITS 1
+#endif
+
+#define NR_KERNFS_LOCKS (1 << NR_KERNFS_LOCK_BITS)
+
+/*
+ * There's one kernfs_open_file for each open file and one kernfs_open_node
+ * for each kernfs_node with one or more open files.
+ *
+ * filp->private_data points to seq_file whose ->private points to
+ * kernfs_open_file.
+ *
+ * kernfs_open_files are chained at kernfs_open_node->files, which is
+ * protected by kernfs_global_locks.open_file_mutex[i].
+ *
+ * To reduce possible contention in sysfs access, arising due to single
+ * locks, use an array of locks (e.g. open_file_mutex) and use kernfs_node
+ * object address as hash keys to get the index of these locks.
+ *
+ * Hashed mutexes are safe to use here because operations using these don't
+ * rely on global exclusion.
+ *
+ * In future we intend to replace other global locks with hashed ones as well.
+ * kernfs_global_locks acts as a holder for all such hash tables.
+ */
+struct kernfs_global_locks {
+ struct mutex open_file_mutex[NR_KERNFS_LOCKS];
+};
+
enum kernfs_node_type {
KERNFS_DIR = 0x0001,
KERNFS_FILE = 0x0002,
@@ -114,7 +171,7 @@ struct kernfs_elem_symlink {
struct kernfs_elem_attr {
const struct kernfs_ops *ops;
- struct kernfs_open_node *open;
+ struct kernfs_open_node __rcu *open;
loff_t size;
struct kernfs_node *notify_next; /* for kernfs_notify() */
};
diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
index 79a71eb96111..35cd8287642a 100644
--- a/lib/Kconfig.debug
+++ b/lib/Kconfig.debug
@@ -1560,7 +1560,7 @@ config DEBUG_KOBJECT_RELEASE
help
kobjects are reference counted objects. This means that their
last reference count put is not predictable, and the kobject can
- live on past the point at which a driver decides to drop it's
+ live on past the point at which a driver decides to drop its
initial reference to the kobject gained on allocation. An
example of this would be a struct device which has just been
unregistered.
diff --git a/net/ipv4/ipconfig.c b/net/ipv4/ipconfig.c
index f53a0f2453af..e90bc0aa85c7 100644
--- a/net/ipv4/ipconfig.c
+++ b/net/ipv4/ipconfig.c
@@ -1434,6 +1434,7 @@ __be32 __init root_nfs_parse_addr(char *name)
static int __init wait_for_devices(void)
{
int i;
+ bool try_init_devs = true;
for (i = 0; i < DEVICE_WAIT_MAX; i++) {
struct net_device *dev;
@@ -1452,6 +1453,11 @@ static int __init wait_for_devices(void)
rtnl_unlock();
if (found)
return 0;
+ if (try_init_devs &&
+ (ROOT_DEV == Root_NFS || ROOT_DEV == Root_CIFS)) {
+ try_init_devs = false;
+ wait_for_init_devices_probe();
+ }
ssleep(1);
}
return -ENODEV;