From 9c0f59e47a90c54d0153f8ddc0f80d7a36207d0e Mon Sep 17 00:00:00 2001 From: Nam Cao Date: Mon, 18 Mar 2024 11:59:02 +0100 Subject: [PATCH 001/606] HID: i2c-hid: remove I2C_HID_READ_PENDING flag to prevent lock-up The flag I2C_HID_READ_PENDING is used to serialize I2C operations. However, this is not necessary, because I2C core already has its own locking for that. More importantly, this flag can cause a lock-up: if the flag is set in i2c_hid_xfer() and an interrupt happens, the interrupt handler (i2c_hid_irq) will check this flag and return immediately without doing anything, then the interrupt handler will be invoked again in an infinite loop. Since interrupt handler is an RT task, it takes over the CPU and the flag-clearing task never gets scheduled, thus we have a lock-up. Delete this unnecessary flag. Reported-and-tested-by: Eva Kurchatova Closes: https://lore.kernel.org/r/CA+eeCSPUDpUg76ZO8dszSbAGn+UHjcyv8F1J-CUPVARAzEtW9w@mail.gmail.com Fixes: 4a200c3b9a40 ("HID: i2c-hid: introduce HID over i2c specification implementation") Cc: Signed-off-by: Nam Cao Signed-off-by: Jiri Kosina --- drivers/hid/i2c-hid/i2c-hid-core.c | 9 --------- 1 file changed, 9 deletions(-) diff --git a/drivers/hid/i2c-hid/i2c-hid-core.c b/drivers/hid/i2c-hid/i2c-hid-core.c index 2df1ab3c31cc5..1c86c97688e93 100644 --- a/drivers/hid/i2c-hid/i2c-hid-core.c +++ b/drivers/hid/i2c-hid/i2c-hid-core.c @@ -64,7 +64,6 @@ /* flags */ #define I2C_HID_STARTED 0 #define I2C_HID_RESET_PENDING 1 -#define I2C_HID_READ_PENDING 2 #define I2C_HID_PWR_ON 0x00 #define I2C_HID_PWR_SLEEP 0x01 @@ -190,15 +189,10 @@ static int i2c_hid_xfer(struct i2c_hid *ihid, msgs[n].len = recv_len; msgs[n].buf = recv_buf; n++; - - set_bit(I2C_HID_READ_PENDING, &ihid->flags); } ret = i2c_transfer(client->adapter, msgs, n); - if (recv_len) - clear_bit(I2C_HID_READ_PENDING, &ihid->flags); - if (ret != n) return ret < 0 ? ret : -EIO; @@ -556,9 +550,6 @@ static irqreturn_t i2c_hid_irq(int irq, void *dev_id) { struct i2c_hid *ihid = dev_id; - if (test_bit(I2C_HID_READ_PENDING, &ihid->flags)) - return IRQ_HANDLED; - i2c_hid_get_input(ihid); return IRQ_HANDLED; From 92826905ae340b7f2b25759a06c8c60bfc476b9f Mon Sep 17 00:00:00 2001 From: Zhang Lixu Date: Wed, 6 Mar 2024 00:44:04 +0000 Subject: [PATCH 002/606] HID: intel-ish-hid: ipc: Fix dev_err usage with uninitialized dev->devc The variable dev->devc in ish_dev_init was utilized by dev_err before it was properly assigned. To rectify this, the assignment of dev->devc has been moved to immediately follow memory allocation. Without this change "(NULL device *)" is printed for device information. Fixes: 8ae2f2b0a284 ("HID: intel-ish-hid: ipc: Fix potential use-after-free in work function") Fixes: ae02e5d40d5f ("HID: intel-ish-hid: ipc layer") Signed-off-by: Zhang Lixu Acked-by: Srinivas Pandruvada Signed-off-by: Jiri Kosina --- drivers/hid/intel-ish-hid/ipc/ipc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/hid/intel-ish-hid/ipc/ipc.c b/drivers/hid/intel-ish-hid/ipc/ipc.c index a49c6affd7c4c..dd5fc60874ba1 100644 --- a/drivers/hid/intel-ish-hid/ipc/ipc.c +++ b/drivers/hid/intel-ish-hid/ipc/ipc.c @@ -948,6 +948,7 @@ struct ishtp_device *ish_dev_init(struct pci_dev *pdev) if (!dev) return NULL; + dev->devc = &pdev->dev; ishtp_device_init(dev); init_waitqueue_head(&dev->wait_hw_ready); @@ -983,7 +984,6 @@ struct ishtp_device *ish_dev_init(struct pci_dev *pdev) } dev->ops = &ish_hw_ops; - dev->devc = &pdev->dev; dev->mtu = IPC_PAYLOAD_SIZE - sizeof(struct ishtp_msg_hdr); return dev; } From 139b4c37e9cb0943e51adbb9c20c45bf60e44422 Mon Sep 17 00:00:00 2001 From: Benjamin Tissoires Date: Thu, 21 Mar 2024 16:19:27 +0100 Subject: [PATCH 003/606] MAINTAINERS: update Benjamin's email address Update my email address to the kernel.org one, as it's getting more convenient this way. Signed-off-by: Benjamin Tissoires Signed-off-by: Jiri Kosina --- .mailmap | 2 ++ MAINTAINERS | 4 ++-- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/.mailmap b/.mailmap index e90797de3256a..25019ddf467c9 100644 --- a/.mailmap +++ b/.mailmap @@ -96,6 +96,8 @@ Ben Widawsky Ben Widawsky Ben Widawsky Benjamin Poirier +Benjamin Tissoires +Benjamin Tissoires Bjorn Andersson Bjorn Andersson Bjorn Andersson diff --git a/MAINTAINERS b/MAINTAINERS index c50e72258ba9a..52bb6f2c3e7be 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -9546,7 +9546,7 @@ F: kernel/power/ HID CORE LAYER M: Jiri Kosina -M: Benjamin Tissoires +M: Benjamin Tissoires L: linux-input@vger.kernel.org S: Maintained T: git git://git.kernel.org/pub/scm/linux/kernel/git/hid/hid.git @@ -22685,7 +22685,7 @@ F: drivers/usb/host/ehci* USB HID/HIDBP DRIVERS (USB KEYBOARDS, MICE, REMOTE CONTROLS, ...) M: Jiri Kosina -M: Benjamin Tissoires +M: Benjamin Tissoires L: linux-usb@vger.kernel.org S: Maintained T: git git://git.kernel.org/pub/scm/linux/kernel/git/hid/hid.git From 546a4f4b5f4d930ea57f5510e109acf08eca5e87 Mon Sep 17 00:00:00 2001 From: Vasileios Amoiridis Date: Sat, 16 Mar 2024 12:07:42 +0100 Subject: [PATCH 004/606] iio: pressure: Fixes BME280 SPI driver data Use bme280_chip_info structure instead of bmp280_chip_info in SPI support for the BME280 sensor. Fixes: 0b0b772637cd ("iio: pressure: bmp280: Use chip_info pointers for each chip as driver data") Signed-off-by: Vasileios Amoiridis Link: https://lore.kernel.org/r/20240316110743.1998400-2-vassilisamir@gmail.com Cc: Signed-off-by: Jonathan Cameron --- drivers/iio/pressure/bmp280-spi.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/iio/pressure/bmp280-spi.c b/drivers/iio/pressure/bmp280-spi.c index a444d4b2978b5..038d36aad3eb9 100644 --- a/drivers/iio/pressure/bmp280-spi.c +++ b/drivers/iio/pressure/bmp280-spi.c @@ -127,7 +127,7 @@ static const struct of_device_id bmp280_of_spi_match[] = { { .compatible = "bosch,bmp180", .data = &bmp180_chip_info }, { .compatible = "bosch,bmp181", .data = &bmp180_chip_info }, { .compatible = "bosch,bmp280", .data = &bmp280_chip_info }, - { .compatible = "bosch,bme280", .data = &bmp280_chip_info }, + { .compatible = "bosch,bme280", .data = &bme280_chip_info }, { .compatible = "bosch,bmp380", .data = &bmp380_chip_info }, { .compatible = "bosch,bmp580", .data = &bmp580_chip_info }, { }, @@ -139,7 +139,7 @@ static const struct spi_device_id bmp280_spi_id[] = { { "bmp180", (kernel_ulong_t)&bmp180_chip_info }, { "bmp181", (kernel_ulong_t)&bmp180_chip_info }, { "bmp280", (kernel_ulong_t)&bmp280_chip_info }, - { "bme280", (kernel_ulong_t)&bmp280_chip_info }, + { "bme280", (kernel_ulong_t)&bme280_chip_info }, { "bmp380", (kernel_ulong_t)&bmp380_chip_info }, { "bmp580", (kernel_ulong_t)&bmp580_chip_info }, { } From 5ca29ea4e4073b3caba750efe155b1bd4c597ca9 Mon Sep 17 00:00:00 2001 From: Vasileios Amoiridis Date: Sat, 16 Mar 2024 12:07:43 +0100 Subject: [PATCH 005/606] iio: pressure: Fixes SPI support for BMP3xx devices Bosch does not use unique BMPxxx_CHIP_ID for the different versions of the device which leads to misidentification of devices if their ID is used. Use a new value in the chip_info structure instead of the BMPxxx_CHIP_ID, in order to choose the correct regmap_bus to be used. Fixes: a9dd9ba32311 ("iio: pressure: Fixes BMP38x and BMP390 SPI support") Signed-off-by: Vasileios Amoiridis Link: https://lore.kernel.org/r/20240316110743.1998400-3-vassilisamir@gmail.com Cc: Signed-off-by: Jonathan Cameron --- drivers/iio/pressure/bmp280-core.c | 1 + drivers/iio/pressure/bmp280-spi.c | 9 ++------- drivers/iio/pressure/bmp280.h | 1 + 3 files changed, 4 insertions(+), 7 deletions(-) diff --git a/drivers/iio/pressure/bmp280-core.c b/drivers/iio/pressure/bmp280-core.c index fe8734468ed35..62e9e93d915dc 100644 --- a/drivers/iio/pressure/bmp280-core.c +++ b/drivers/iio/pressure/bmp280-core.c @@ -1233,6 +1233,7 @@ const struct bmp280_chip_info bmp380_chip_info = { .chip_id = bmp380_chip_ids, .num_chip_id = ARRAY_SIZE(bmp380_chip_ids), .regmap_config = &bmp380_regmap_config, + .spi_read_extra_byte = true, .start_up_time = 2000, .channels = bmp380_channels, .num_channels = 2, diff --git a/drivers/iio/pressure/bmp280-spi.c b/drivers/iio/pressure/bmp280-spi.c index 038d36aad3eb9..4e19ea0b4d398 100644 --- a/drivers/iio/pressure/bmp280-spi.c +++ b/drivers/iio/pressure/bmp280-spi.c @@ -96,15 +96,10 @@ static int bmp280_spi_probe(struct spi_device *spi) chip_info = spi_get_device_match_data(spi); - switch (chip_info->chip_id[0]) { - case BMP380_CHIP_ID: - case BMP390_CHIP_ID: + if (chip_info->spi_read_extra_byte) bmp_regmap_bus = &bmp380_regmap_bus; - break; - default: + else bmp_regmap_bus = &bmp280_regmap_bus; - break; - } regmap = devm_regmap_init(&spi->dev, bmp_regmap_bus, diff --git a/drivers/iio/pressure/bmp280.h b/drivers/iio/pressure/bmp280.h index 4012387d79565..5812a344ed8e8 100644 --- a/drivers/iio/pressure/bmp280.h +++ b/drivers/iio/pressure/bmp280.h @@ -423,6 +423,7 @@ struct bmp280_chip_info { int num_chip_id; const struct regmap_config *regmap_config; + bool spi_read_extra_byte; const struct iio_chan_spec *channels; int num_channels; From 89384a2b656b9dace4c965432a209d5c9c3a2a6f Mon Sep 17 00:00:00 2001 From: Javier Carrasco Date: Sat, 16 Mar 2024 23:56:57 +0100 Subject: [PATCH 006/606] dt-bindings: iio: health: maxim,max30102: fix compatible check The "maxim,green-led-current-microamp" property is only available for the max30105 part (it provides an extra green LED), and must be set to false for the max30102 part. Instead, the max30100 part has been used for that, which is not supported by this binding (it has its own binding). This error was introduced during the txt to yaml conversion. Fixes: 5a6a65b11e3a ("dt-bindings:iio:health:maxim,max30102: txt to yaml conversion") Signed-off-by: Javier Carrasco Acked-by: Conor Dooley Link: https://lore.kernel.org/r/20240316-max30102_binding_fix-v1-1-e8e58f69ef8a@gmail.com Cc: Signed-off-by: Jonathan Cameron --- .../devicetree/bindings/iio/health/maxim,max30102.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Documentation/devicetree/bindings/iio/health/maxim,max30102.yaml b/Documentation/devicetree/bindings/iio/health/maxim,max30102.yaml index c13c10c8d65da..eed0df9d3a232 100644 --- a/Documentation/devicetree/bindings/iio/health/maxim,max30102.yaml +++ b/Documentation/devicetree/bindings/iio/health/maxim,max30102.yaml @@ -42,7 +42,7 @@ allOf: properties: compatible: contains: - const: maxim,max30100 + const: maxim,max30102 then: properties: maxim,green-led-current-microamp: false From a2ac2a1b02590a22a236c43c455f421cdede45f5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ar=C4=B1n=C3=A7=20=C3=9CNAL?= Date: Thu, 14 Mar 2024 15:24:35 +0300 Subject: [PATCH 007/606] arm64: dts: rockchip: set PHY address of MT7531 switch to 0x1f MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The MT7531 switch listens on PHY address 0x1f on an MDIO bus. I've got two findings that support this. There's no bootstrapping option to change the PHY address of the switch. The Linux driver hardcodes 0x1f as the PHY address of the switch. So the reg property on the device tree is currently ignored by the Linux driver. Therefore, describe the correct PHY address on Banana Pi BPI-R2 Pro that has this switch. Signed-off-by: Arınç ÜNAL Fixes: c1804463e5c6 ("arm64: dts: rockchip: Add mt7531 dsa node to BPI-R2-Pro board") Link: https://lore.kernel.org/r/20240314-for-rockchip-mt7531-phy-address-v1-1-743b5873358f@arinc9.com Signed-off-by: Heiko Stuebner --- arch/arm64/boot/dts/rockchip/rk3568-bpi-r2-pro.dts | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/arm64/boot/dts/rockchip/rk3568-bpi-r2-pro.dts b/arch/arm64/boot/dts/rockchip/rk3568-bpi-r2-pro.dts index 7b5f3904ef610..03d6d920446ab 100644 --- a/arch/arm64/boot/dts/rockchip/rk3568-bpi-r2-pro.dts +++ b/arch/arm64/boot/dts/rockchip/rk3568-bpi-r2-pro.dts @@ -525,9 +525,9 @@ #address-cells = <1>; #size-cells = <0>; - switch@0 { + switch@1f { compatible = "mediatek,mt7531"; - reg = <0>; + reg = <0x1f>; ports { #address-cells = <1>; From 0ac417b8f124427c90ec8c2ef4f632b821d924cc Mon Sep 17 00:00:00 2001 From: Iskander Amara Date: Fri, 8 Mar 2024 09:52:42 +0100 Subject: [PATCH 008/606] arm64: dts: rockchip: enable internal pull-up for Q7_THRM# on RK3399 Puma Q7_THRM# pin is connected to a diode on the module which is used as a level shifter, and the pin have a pull-down enabled by default. We need to configure it to internal pull-up, other- wise whenever the pin is configured as INPUT and we try to control it externally the value will always remain zero. Signed-off-by: Iskander Amara Fixes: 2c66fc34e945 ("arm64: dts: rockchip: add RK3399-Q7 (Puma) SoM") Reviewed-by: Quentin Schulz Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20240308085243.69903-1-iskander.amara@theobroma-systems.com Signed-off-by: Heiko Stuebner --- arch/arm64/boot/dts/rockchip/rk3399-puma.dtsi | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/arch/arm64/boot/dts/rockchip/rk3399-puma.dtsi b/arch/arm64/boot/dts/rockchip/rk3399-puma.dtsi index c08e69391c015..06f3e97af7cd7 100644 --- a/arch/arm64/boot/dts/rockchip/rk3399-puma.dtsi +++ b/arch/arm64/boot/dts/rockchip/rk3399-puma.dtsi @@ -426,6 +426,16 @@ }; &pinctrl { + pinctrl-names = "default"; + pinctrl-0 = <&q7_thermal_pin>; + + gpios { + q7_thermal_pin: q7-thermal-pin { + rockchip,pins = + <0 RK_PA3 RK_FUNC_GPIO &pcfg_pull_up>; + }; + }; + i2c8 { i2c8_xfer_a: i2c8-xfer { rockchip,pins = From f0abb4b2c7acf3c3e4130dc3f54cd90cf2ae62bc Mon Sep 17 00:00:00 2001 From: Iskander Amara Date: Fri, 8 Mar 2024 09:52:43 +0100 Subject: [PATCH 009/606] arm64: dts: rockchip: fix alphabetical ordering RK3399 puma Nodes overridden by their reference should be ordered alphabetically to make it easier to read the DTS. pinctrl node is defined in the wrong location so let's reorder it. Signed-off-by: Iskander Amara Reviewed-by: Quentin Schulz Link: https://lore.kernel.org/r/20240308085243.69903-2-iskander.amara@theobroma-systems.com Signed-off-by: Heiko Stuebner --- arch/arm64/boot/dts/rockchip/rk3399-puma.dtsi | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/arch/arm64/boot/dts/rockchip/rk3399-puma.dtsi b/arch/arm64/boot/dts/rockchip/rk3399-puma.dtsi index 06f3e97af7cd7..214ea62b24a5b 100644 --- a/arch/arm64/boot/dts/rockchip/rk3399-puma.dtsi +++ b/arch/arm64/boot/dts/rockchip/rk3399-puma.dtsi @@ -416,15 +416,6 @@ gpio1830-supply = <&vcc_1v8>; }; -&pmu_io_domains { - status = "okay"; - pmu1830-supply = <&vcc_1v8>; -}; - -&pwm2 { - status = "okay"; -}; - &pinctrl { pinctrl-names = "default"; pinctrl-0 = <&q7_thermal_pin>; @@ -473,6 +464,15 @@ }; }; +&pmu_io_domains { + status = "okay"; + pmu1830-supply = <&vcc_1v8>; +}; + +&pwm2 { + status = "okay"; +}; + &sdhci { /* * Signal integrity isn't great at 200MHz but 100MHz has proven stable From e6b1168f37e3f86d9966276c5a3fff9eb0df3e5f Mon Sep 17 00:00:00 2001 From: Quentin Schulz Date: Fri, 8 Mar 2024 16:46:07 +0100 Subject: [PATCH 010/606] arm64: dts: rockchip: enable internal pull-up on Q7_USB_ID for RK3399 Puma The Q7_USB_ID has a diode used as a level-shifter, and is used as an input pin. The SoC default for this pin is a pull-up, which is correct but the pinconf in the introducing commit missed that, so let's fix this oversight. Fixes: ed2c66a95c0c ("arm64: dts: rockchip: fix rk3399-puma-haikou USB OTG mode") Signed-off-by: Quentin Schulz Link: https://lore.kernel.org/r/20240308-puma-diode-pu-v2-1-309f83da110a@theobroma-systems.com Signed-off-by: Heiko Stuebner --- arch/arm64/boot/dts/rockchip/rk3399-puma.dtsi | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/boot/dts/rockchip/rk3399-puma.dtsi b/arch/arm64/boot/dts/rockchip/rk3399-puma.dtsi index 214ea62b24a5b..a51ebb8f8b80f 100644 --- a/arch/arm64/boot/dts/rockchip/rk3399-puma.dtsi +++ b/arch/arm64/boot/dts/rockchip/rk3399-puma.dtsi @@ -459,7 +459,7 @@ usb3 { usb3_id: usb3-id { rockchip,pins = - <1 RK_PC2 RK_FUNC_GPIO &pcfg_pull_none>; + <1 RK_PC2 RK_FUNC_GPIO &pcfg_pull_up>; }; }; }; From 945a7c8570916650a415757d15d83e0fa856a686 Mon Sep 17 00:00:00 2001 From: Quentin Schulz Date: Fri, 8 Mar 2024 16:46:08 +0100 Subject: [PATCH 011/606] arm64: dts: rockchip: enable internal pull-up on PCIE_WAKE# for RK3399 Puma The PCIE_WAKE# has a diode used as a level-shifter, and is used as an input pin. While the SoC default is to enable the pull-up, the core rk3399 pinconf for this pin opted for pull-none. So as to not disturb the behaviour of other boards which may rely on pull-none instead of pull-up, set the needed pull-up only for RK3399 Puma. Fixes: 60fd9f72ce8a ("arm64: dts: rockchip: add Haikou baseboard with RK3399-Q7 SoM") Signed-off-by: Quentin Schulz Link: https://lore.kernel.org/r/20240308-puma-diode-pu-v2-2-309f83da110a@theobroma-systems.com Signed-off-by: Heiko Stuebner --- arch/arm64/boot/dts/rockchip/rk3399-puma.dtsi | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/arch/arm64/boot/dts/rockchip/rk3399-puma.dtsi b/arch/arm64/boot/dts/rockchip/rk3399-puma.dtsi index a51ebb8f8b80f..2484ad2bd86fc 100644 --- a/arch/arm64/boot/dts/rockchip/rk3399-puma.dtsi +++ b/arch/arm64/boot/dts/rockchip/rk3399-puma.dtsi @@ -416,6 +416,11 @@ gpio1830-supply = <&vcc_1v8>; }; +&pcie_clkreqn_cpm { + rockchip,pins = + <2 RK_PD2 RK_FUNC_GPIO &pcfg_pull_up>; +}; + &pinctrl { pinctrl-names = "default"; pinctrl-0 = <&q7_thermal_pin>; From d7ed698abc28b2886c9fc71d17ca6b023fcf47f3 Mon Sep 17 00:00:00 2001 From: Quentin Schulz Date: Fri, 8 Mar 2024 16:46:09 +0100 Subject: [PATCH 012/606] arm64: dts: rockchip: add regulators for PCIe on RK3399 Puma Haikou The PCIe PHY requires two regulators and are present on the SoM directly, while the PCIe connector also exposes 3V3 and 12V power rails which are available on the baseboard. Considering that 3/4 regulators are always-on on HW level and that the last one depends on a regulator from the PMIC that is specified as always on, this commit should be purely cosmetic and no change in behavior is expected. Let's add all regulators for PCIe on RK3399 Puma Haikou. Reviewed-by: Dragan Simic Signed-off-by: Quentin Schulz Link: https://lore.kernel.org/r/20240308-puma-diode-pu-v2-3-309f83da110a@theobroma-systems.com Signed-off-by: Heiko Stuebner --- .../boot/dts/rockchip/rk3399-puma-haikou.dts | 2 ++ arch/arm64/boot/dts/rockchip/rk3399-puma.dtsi | 26 +++++++++++++++++++ 2 files changed, 28 insertions(+) diff --git a/arch/arm64/boot/dts/rockchip/rk3399-puma-haikou.dts b/arch/arm64/boot/dts/rockchip/rk3399-puma-haikou.dts index 2c3984a880af6..f6f15946579eb 100644 --- a/arch/arm64/boot/dts/rockchip/rk3399-puma-haikou.dts +++ b/arch/arm64/boot/dts/rockchip/rk3399-puma-haikou.dts @@ -194,6 +194,8 @@ num-lanes = <4>; pinctrl-names = "default"; pinctrl-0 = <&pcie_clkreqn_cpm>; + vpcie3v3-supply = <&vcc3v3_baseboard>; + vpcie12v-supply = <&dc_12v>; status = "okay"; }; diff --git a/arch/arm64/boot/dts/rockchip/rk3399-puma.dtsi b/arch/arm64/boot/dts/rockchip/rk3399-puma.dtsi index 2484ad2bd86fc..ccbe3a7a1d2c2 100644 --- a/arch/arm64/boot/dts/rockchip/rk3399-puma.dtsi +++ b/arch/arm64/boot/dts/rockchip/rk3399-puma.dtsi @@ -79,6 +79,26 @@ regulator-max-microvolt = <5000000>; }; + vcca_0v9: vcca-0v9-regulator { + compatible = "regulator-fixed"; + regulator-name = "vcca_0v9"; + regulator-always-on; + regulator-boot-on; + regulator-min-microvolt = <900000>; + regulator-max-microvolt = <900000>; + vin-supply = <&vcc_1v8>; + }; + + vcca_1v8: vcca-1v8-regulator { + compatible = "regulator-fixed"; + regulator-name = "vcca_1v8"; + regulator-always-on; + regulator-boot-on; + regulator-min-microvolt = <1800000>; + regulator-max-microvolt = <1800000>; + vin-supply = <&vcc3v3_sys>; + }; + vdd_log: vdd-log { compatible = "pwm-regulator"; pwms = <&pwm2 0 25000 1>; @@ -416,6 +436,12 @@ gpio1830-supply = <&vcc_1v8>; }; +&pcie0 { + /* PCIe PHY supplies */ + vpcie0v9-supply = <&vcca_0v9>; + vpcie1v8-supply = <&vcca_1v8>; +}; + &pcie_clkreqn_cpm { rockchip,pins = <2 RK_PD2 RK_FUNC_GPIO &pcfg_pull_up>; From 64da060dd4eb625646970d7c96a16de617412ec5 Mon Sep 17 00:00:00 2001 From: Andy Yan Date: Sun, 24 Mar 2024 19:28:33 +0800 Subject: [PATCH 013/606] arm64: dts: rockchip: Fix the i2c address of es8316 on Cool Pi CM5 According to the hardware design, the i2c address of audio codec es8316 on Cool Pi CM5 is 0x10. This fix the read/write error like bellow: es8316 7-0011: ASoC: error at soc_component_write_no_lock on es8316.7-0011 for register: [0x0000000c] -6 es8316 7-0011: ASoC: error at soc_component_write_no_lock on es8316.7-0011 for register: [0x00000003] -6 es8316 7-0011: ASoC: error at soc_component_read_no_lock on es8316.7-0011 for register: [0x00000016] -6 es8316 7-0011: ASoC: error at soc_component_read_no_lock on es8316.7-0011 for register: [0x00000016] -6 Fixes: 791c154c3982 ("arm64: dts: rockchip: Add support for rk3588 based board Cool Pi CM5 EVB") Signed-off-by: Andy Yan Link: https://lore.kernel.org/r/20240324112833.2181961-1-andyshrk@163.com [also adapted the node name to audio-codec@10] Signed-off-by: Heiko Stuebner --- arch/arm64/boot/dts/rockchip/rk3588-coolpi-cm5.dtsi | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/arm64/boot/dts/rockchip/rk3588-coolpi-cm5.dtsi b/arch/arm64/boot/dts/rockchip/rk3588-coolpi-cm5.dtsi index cce1c8e835877..94ecb9b4f98f8 100644 --- a/arch/arm64/boot/dts/rockchip/rk3588-coolpi-cm5.dtsi +++ b/arch/arm64/boot/dts/rockchip/rk3588-coolpi-cm5.dtsi @@ -216,9 +216,9 @@ pinctrl-0 = <&i2c7m0_xfer>; status = "okay"; - es8316: audio-codec@11 { + es8316: audio-codec@10 { compatible = "everest,es8316"; - reg = <0x11>; + reg = <0x10>; assigned-clocks = <&cru I2S0_8CH_MCLKOUT>; assigned-clock-rates = <12288000>; clocks = <&cru I2S0_8CH_MCLKOUT>; From 4053caf60bb349ab9ea9e36ee30c64681b696198 Mon Sep 17 00:00:00 2001 From: William Zhang Date: Wed, 20 Mar 2024 15:26:22 -0700 Subject: [PATCH 014/606] mtd: rawnand: brcmnand: Fix data access violation for STB chip Florian reported the following kernel NULL pointer dereference issue on a BCM7250 board: [ 2.829744] Unable to handle kernel NULL pointer dereference at virtual address 0000000c when read [ 2.838740] [0000000c] *pgd=80000000004003, *pmd=00000000 [ 2.844178] Internal error: Oops: 206 [#1] SMP ARM [ 2.848990] Modules linked in: [ 2.852061] CPU: 0 PID: 1 Comm: swapper/0 Not tainted 6.8.0-next-20240305-gd95fcdf4961d #66 [ 2.860436] Hardware name: Broadcom STB (Flattened Device Tree) [ 2.866371] PC is at brcmnand_read_by_pio+0x180/0x278 [ 2.871449] LR is at __wait_for_common+0x9c/0x1b0 [ 2.876178] pc : [] lr : [] psr: 60000053 [ 2.882460] sp : f0811a80 ip : 00000012 fp : 00000000 [ 2.887699] r10: 00000000 r9 : 00000000 r8 : c3790000 [ 2.892936] r7 : 00000000 r6 : 00000000 r5 : c35db440 r4 : ffe00000 [ 2.899479] r3 : f15cb814 r2 : 00000000 r1 : 00000000 r0 : 00000000 The issue only happens when dma mode is disabled or not supported on STB chip. The pio mode transfer calls brcmnand_read_data_bus function which dereferences ctrl->soc->read_data_bus. But the soc member in STB chip is NULL hence triggers the access violation. The function needs to check the soc pointer first. Fixes: 546e42599120 ("mtd: rawnand: brcmnand: Add BCMBCA read data bus interface") Reported-by: Florian Fainelli Tested-by: Florian Fainelli Signed-off-by: William Zhang Signed-off-by: Miquel Raynal Link: https://lore.kernel.org/linux-mtd/20240320222623.35604-1-william.zhang@broadcom.com --- drivers/mtd/nand/raw/brcmnand/brcmnand.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/mtd/nand/raw/brcmnand/brcmnand.c b/drivers/mtd/nand/raw/brcmnand/brcmnand.c index a8d12c71f987b..1b2ec0fec60c7 100644 --- a/drivers/mtd/nand/raw/brcmnand/brcmnand.c +++ b/drivers/mtd/nand/raw/brcmnand/brcmnand.c @@ -857,7 +857,7 @@ static inline void brcmnand_read_data_bus(struct brcmnand_controller *ctrl, struct brcmnand_soc *soc = ctrl->soc; int i; - if (soc->read_data_bus) { + if (soc && soc->read_data_bus) { soc->read_data_bus(soc, flash_cache, buffer, fc_words); } else { for (i = 0; i < fc_words; i++) From a76932e45e9b56ac24ee947294fbb52c3713f839 Mon Sep 17 00:00:00 2001 From: Lad Prabhakar Date: Wed, 13 Mar 2024 08:38:28 +0000 Subject: [PATCH 015/606] dt-bindings: pinctrl: renesas,rzg2l-pinctrl: Allow 'input' and 'output-enable' properties On the RZ/G3S SMARC platform, the 'input' property is utilized in gpio-hog nodes, and the 'output-enable' property is used for ETH0/1 TXC pins. Update the binding documentation to include these properties, addressing the following dtbs_check warnings: arch/arm64/boot/dts/renesas/r9a08g045s33-smarc.dtb: pinctrl@11030000: key-1-gpio-hog: 'anyOf' conditional failed, one must be fixed: 'input' does not match any of the regexes: 'pinctrl-[0-9]+' arch/arm64/boot/dts/renesas/r9a08g045s33-smarc.dtb: pinctrl@11030000: eth0: 'anyOf' conditional failed, one must be fixed: 'output-enable' does not match any of the regexes: 'pinctrl-[0-9]+' Signed-off-by: Lad Prabhakar Acked-by: Krzysztof Kozlowski Reviewed-by: Geert Uytterhoeven Link: https://lore.kernel.org/r/20240313083828.5048-1-prabhakar.mahadev-lad.rj@bp.renesas.com Signed-off-by: Geert Uytterhoeven --- .../devicetree/bindings/pinctrl/renesas,rzg2l-pinctrl.yaml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/Documentation/devicetree/bindings/pinctrl/renesas,rzg2l-pinctrl.yaml b/Documentation/devicetree/bindings/pinctrl/renesas,rzg2l-pinctrl.yaml index d476de82e5c3f..4d5a957fa232e 100644 --- a/Documentation/devicetree/bindings/pinctrl/renesas,rzg2l-pinctrl.yaml +++ b/Documentation/devicetree/bindings/pinctrl/renesas,rzg2l-pinctrl.yaml @@ -120,7 +120,9 @@ additionalProperties: slew-rate: true gpio-hog: true gpios: true + input: true input-enable: true + output-enable: true output-high: true output-low: true line-name: true From aa43c15a790cf083a6e6a7c531cffd27a5e1fd4f Mon Sep 17 00:00:00 2001 From: Claudiu Beznea Date: Wed, 20 Mar 2024 12:42:29 +0200 Subject: [PATCH 016/606] pinctrl: renesas: rzg2l: Execute atomically the interrupt configuration Lockdep detects a possible deadlock as listed below. This is because it detects the IA55 interrupt controller .irq_eoi() API is called from interrupt context while configuration-specific API (e.g., .irq_enable()) could be called from process context on resume path (by calling rzg2l_gpio_irq_restore()). To avoid this, protect the call of rzg2l_gpio_irq_enable() with spin_lock_irqsave()/spin_unlock_irqrestore(). With this the same approach that is available in __setup_irq() is mimicked to pinctrl IRQ resume function. Below is the lockdep report: WARNING: inconsistent lock state 6.8.0-rc5-next-20240219-arm64-renesas-00030-gb17a289abf1f #90 Not tainted -------------------------------- inconsistent {IN-HARDIRQ-W} -> {HARDIRQ-ON-W} usage. str_rwdt_t_001./159 [HC0[0]:SC0[0]:HE1:SE1] takes: ffff00000b001d70 (&rzg2l_irqc_data->lock){?...}-{2:2}, at: rzg2l_irqc_irq_enable+0x60/0xa4 {IN-HARDIRQ-W} state was registered at: lock_acquire+0x1e0/0x310 _raw_spin_lock+0x44/0x58 rzg2l_irqc_eoi+0x2c/0x130 irq_chip_eoi_parent+0x18/0x20 rzg2l_gpio_irqc_eoi+0xc/0x14 handle_fasteoi_irq+0x134/0x230 generic_handle_domain_irq+0x28/0x3c gic_handle_irq+0x4c/0xbc call_on_irq_stack+0x24/0x34 do_interrupt_handler+0x78/0x7c el1_interrupt+0x30/0x5c el1h_64_irq_handler+0x14/0x1c el1h_64_irq+0x64/0x68 _raw_spin_unlock_irqrestore+0x34/0x70 __setup_irq+0x4d4/0x6b8 request_threaded_irq+0xe8/0x1a0 request_any_context_irq+0x60/0xb8 devm_request_any_context_irq+0x74/0x104 gpio_keys_probe+0x374/0xb08 platform_probe+0x64/0xcc really_probe+0x140/0x2ac __driver_probe_device+0x74/0x124 driver_probe_device+0x3c/0x15c __driver_attach+0xec/0x1c4 bus_for_each_dev+0x70/0xcc driver_attach+0x20/0x28 bus_add_driver+0xdc/0x1d0 driver_register+0x5c/0x118 __platform_driver_register+0x24/0x2c gpio_keys_init+0x18/0x20 do_one_initcall+0x70/0x290 kernel_init_freeable+0x294/0x504 kernel_init+0x20/0x1cc ret_from_fork+0x10/0x20 irq event stamp: 69071 hardirqs last enabled at (69071): [] _raw_spin_unlock_irqrestore+0x6c/0x70 hardirqs last disabled at (69070): [] _raw_spin_lock_irqsave+0x7c/0x80 softirqs last enabled at (67654): [] __do_softirq+0x494/0x4dc softirqs last disabled at (67645): [] ____do_softirq+0xc/0x14 other info that might help us debug this: Possible unsafe locking scenario: CPU0 ---- lock(&rzg2l_irqc_data->lock); lock(&rzg2l_irqc_data->lock); *** DEADLOCK *** 4 locks held by str_rwdt_t_001./159: #0: ffff00000b10f3f0 (sb_writers#4){.+.+}-{0:0}, at: vfs_write+0x1a4/0x35c #1: ffff00000e43ba88 (&of->mutex){+.+.}-{3:3}, at: kernfs_fop_write_iter+0xe8/0x1a8 #2: ffff00000aa21dc8 (kn->active#40){.+.+}-{0:0}, at: kernfs_fop_write_iter+0xf0/0x1a8 #3: ffff80008179d970 (system_transition_mutex){+.+.}-{3:3}, at: pm_suspend+0x9c/0x278 stack backtrace: CPU: 0 PID: 159 Comm: str_rwdt_t_001. Not tainted 6.8.0-rc5-next-20240219-arm64-renesas-00030-gb17a289abf1f #90 Hardware name: Renesas SMARC EVK version 2 based on r9a08g045s33 (DT) Call trace: dump_backtrace+0x94/0xe8 show_stack+0x14/0x1c dump_stack_lvl+0x88/0xc4 dump_stack+0x14/0x1c print_usage_bug.part.0+0x294/0x348 mark_lock+0x6b0/0x948 __lock_acquire+0x750/0x20b0 lock_acquire+0x1e0/0x310 _raw_spin_lock+0x44/0x58 rzg2l_irqc_irq_enable+0x60/0xa4 irq_chip_enable_parent+0x1c/0x34 rzg2l_gpio_irq_enable+0xc4/0xd8 rzg2l_pinctrl_resume_noirq+0x4cc/0x520 pm_generic_resume_noirq+0x28/0x3c genpd_finish_resume+0xc0/0xdc genpd_resume_noirq+0x14/0x1c dpm_run_callback+0x34/0x90 device_resume_noirq+0xa8/0x268 dpm_noirq_resume_devices+0x13c/0x160 dpm_resume_noirq+0xc/0x1c suspend_devices_and_enter+0x2c8/0x570 pm_suspend+0x1ac/0x278 state_store+0x88/0x124 kobj_attr_store+0x14/0x24 sysfs_kf_write+0x48/0x6c kernfs_fop_write_iter+0x118/0x1a8 vfs_write+0x270/0x35c ksys_write+0x64/0xec __arm64_sys_write+0x18/0x20 invoke_syscall+0x44/0x108 el0_svc_common.constprop.0+0xb4/0xd4 do_el0_svc+0x18/0x20 el0_svc+0x3c/0xb8 el0t_64_sync_handler+0xb8/0xbc el0t_64_sync+0x14c/0x150 Fixes: 254203f9a94c ("pinctrl: renesas: rzg2l: Add suspend/resume support") Signed-off-by: Claudiu Beznea Reviewed-by: Geert Uytterhoeven Link: https://lore.kernel.org/r/20240320104230.446400-2-claudiu.beznea.uj@bp.renesas.com Signed-off-by: Geert Uytterhoeven --- drivers/pinctrl/renesas/pinctrl-rzg2l.c | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/drivers/pinctrl/renesas/pinctrl-rzg2l.c b/drivers/pinctrl/renesas/pinctrl-rzg2l.c index eb5a8c6542606..93916553bcc72 100644 --- a/drivers/pinctrl/renesas/pinctrl-rzg2l.c +++ b/drivers/pinctrl/renesas/pinctrl-rzg2l.c @@ -2063,8 +2063,17 @@ static void rzg2l_gpio_irq_restore(struct rzg2l_pinctrl *pctrl) continue; } - if (!irqd_irq_disabled(data)) + if (!irqd_irq_disabled(data)) { + unsigned long flags; + + /* + * This has to be atomically executed to protect against a concurrent + * interrupt. + */ + raw_spin_lock_irqsave(&pctrl->lock.rlock, flags); rzg2l_gpio_irq_enable(data); + raw_spin_unlock_irqrestore(&pctrl->lock.rlock, flags); + } } } From afc89870ea677bd5a44516eb981f7a259b74280c Mon Sep 17 00:00:00 2001 From: Vinod Koul Date: Thu, 28 Mar 2024 12:21:51 +0530 Subject: [PATCH 017/606] dmaengine: Revert "dmaengine: pl330: issue_pending waits until WFP state" This reverts commit 22a9d9585812 ("dmaengine: pl330: issue_pending waits until WFP state") as it seems to cause regression in pl330 driver. Note the issue now exists in mainline so a fix to be done. Cc: stable@vger.kernel.org Reported-by: karthikeyan Signed-off-by: Vinod Koul --- drivers/dma/pl330.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/drivers/dma/pl330.c b/drivers/dma/pl330.c index 5f6d7f1e095f9..ad8e3da1b2cd2 100644 --- a/drivers/dma/pl330.c +++ b/drivers/dma/pl330.c @@ -1053,9 +1053,6 @@ static bool _trigger(struct pl330_thread *thrd) thrd->req_running = idx; - if (desc->rqtype == DMA_MEM_TO_DEV || desc->rqtype == DMA_DEV_TO_MEM) - UNTIL(thrd, PL330_STATE_WFP); - return true; } From 43c633ef93a5d293c96ebcedb40130df13128428 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Fri, 22 Mar 2024 14:21:07 +0100 Subject: [PATCH 018/606] dmaengine: owl: fix register access functions When building with 'make W=1', clang notices that the computed register values are never actually written back but instead the wrong variable is set: drivers/dma/owl-dma.c:244:6: error: variable 'regval' set but not used [-Werror,-Wunused-but-set-variable] 244 | u32 regval; | ^ drivers/dma/owl-dma.c:268:6: error: variable 'regval' set but not used [-Werror,-Wunused-but-set-variable] 268 | u32 regval; | ^ Change these to what was most likely intended. Fixes: 47e20577c24d ("dmaengine: Add Actions Semi Owl family S900 DMA driver") Signed-off-by: Arnd Bergmann Reviewed-by: Peter Korsgaard Reviewed-by: Manivannan Sadhasivam Link: https://lore.kernel.org/r/20240322132116.906475-1-arnd@kernel.org Signed-off-by: Vinod Koul --- drivers/dma/owl-dma.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/dma/owl-dma.c b/drivers/dma/owl-dma.c index 4e76c4ec2d396..e001f4f7aa640 100644 --- a/drivers/dma/owl-dma.c +++ b/drivers/dma/owl-dma.c @@ -250,7 +250,7 @@ static void pchan_update(struct owl_dma_pchan *pchan, u32 reg, else regval &= ~val; - writel(val, pchan->base + reg); + writel(regval, pchan->base + reg); } static void pchan_writel(struct owl_dma_pchan *pchan, u32 reg, u32 data) @@ -274,7 +274,7 @@ static void dma_update(struct owl_dma *od, u32 reg, u32 val, bool state) else regval &= ~val; - writel(val, od->base + reg); + writel(regval, od->base + reg); } static void dma_writel(struct owl_dma *od, u32 reg, u32 data) From 30f0ced9971b2d8c8c24ae75786f9079489a012d Mon Sep 17 00:00:00 2001 From: Akhil R Date: Fri, 15 Mar 2024 18:14:11 +0530 Subject: [PATCH 019/606] dmaengine: tegra186: Fix residual calculation The existing residual calculation returns an incorrect value when bytes_xfer == bytes_req. This scenario occurs particularly with drivers like UART where DMA is scheduled for maximum number of bytes and is terminated when the bytes inflow stops. At higher baud rates, it could request the tx_status while there is no bytes left to transfer. This will lead to incorrect residual being set. Hence return residual as '0' when bytes transferred equals to the bytes requested. Fixes: ee17028009d4 ("dmaengine: tegra: Add tegra gpcdma driver") Signed-off-by: Akhil R Reviewed-by: Jon Hunter Acked-by: Thierry Reding Link: https://lore.kernel.org/r/20240315124411.17582-1-akhilrajeev@nvidia.com Signed-off-by: Vinod Koul --- drivers/dma/tegra186-gpc-dma.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/dma/tegra186-gpc-dma.c b/drivers/dma/tegra186-gpc-dma.c index 88547a23825b1..3642508e88bb2 100644 --- a/drivers/dma/tegra186-gpc-dma.c +++ b/drivers/dma/tegra186-gpc-dma.c @@ -746,6 +746,9 @@ static int tegra_dma_get_residual(struct tegra_dma_channel *tdc) bytes_xfer = dma_desc->bytes_xfer + sg_req[dma_desc->sg_idx].len - (wcount * 4); + if (dma_desc->bytes_req == bytes_xfer) + return 0; + residual = dma_desc->bytes_req - (bytes_xfer % dma_desc->bytes_req); return residual; From 9140ce47872bfd89fca888c2f992faa51d20c2bc Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Thu, 21 Mar 2024 14:04:21 +0200 Subject: [PATCH 020/606] idma64: Don't try to serve interrupts when device is powered off When iDMA 64-bit device is powered off, the IRQ status register is all 1:s. This is never happen in real case and signalling that the device is simply powered off. Don't try to serve interrupts that are not ours. Fixes: 667dfed98615 ("dmaengine: add a driver for Intel integrated DMA 64-bit") Reported-by: Heiner Kallweit Closes: https://lore.kernel.org/r/700bbb84-90e1-4505-8ff0-3f17ea8bc631@gmail.com Tested-by: Heiner Kallweit Signed-off-by: Andy Shevchenko Link: https://lore.kernel.org/r/20240321120453.1360138-1-andriy.shevchenko@linux.intel.com Signed-off-by: Vinod Koul --- drivers/dma/idma64.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/dma/idma64.c b/drivers/dma/idma64.c index 78a938969d7d7..1398814d8fbb6 100644 --- a/drivers/dma/idma64.c +++ b/drivers/dma/idma64.c @@ -171,6 +171,10 @@ static irqreturn_t idma64_irq(int irq, void *dev) u32 status_err; unsigned short i; + /* Since IRQ may be shared, check if DMA controller is powered on */ + if (status == GENMASK(31, 0)) + return IRQ_NONE; + dev_vdbg(idma64->dma.dev, "%s: status=%#x\n", __func__, status); /* Check if we have any interrupt from the DMA controller */ From c10cd03d69403fa0f00be8631bd4cb4690440ebd Mon Sep 17 00:00:00 2001 From: Billy Tsai Date: Wed, 13 Mar 2024 17:28:09 +0800 Subject: [PATCH 021/606] pinctrl: pinctrl-aspeed-g6: Fix register offset for pinconf of GPIOR-T The register offset to disable the internal pull-down of GPIOR~T is 0x630 instead of 0x620, as specified in the Ast2600 datasheet v15 The datasheet can download from the official Aspeed website. Fixes: 15711ba6ff19 ("pinctrl: aspeed-g6: Add AST2600 pinconf support") Reported-by: Delphine CC Chiu Signed-off-by: Billy Tsai Reviewed-by: Paul Menzel Reviewed-by: Andrew Jeffery Message-ID: <20240313092809.2596644-1-billy_tsai@aspeedtech.com> Signed-off-by: Linus Walleij --- drivers/pinctrl/aspeed/pinctrl-aspeed-g6.c | 34 +++++++++++----------- 1 file changed, 17 insertions(+), 17 deletions(-) diff --git a/drivers/pinctrl/aspeed/pinctrl-aspeed-g6.c b/drivers/pinctrl/aspeed/pinctrl-aspeed-g6.c index d376fa7114d1a..029efe16f8cc2 100644 --- a/drivers/pinctrl/aspeed/pinctrl-aspeed-g6.c +++ b/drivers/pinctrl/aspeed/pinctrl-aspeed-g6.c @@ -43,7 +43,7 @@ #define SCU614 0x614 /* Disable GPIO Internal Pull-Down #1 */ #define SCU618 0x618 /* Disable GPIO Internal Pull-Down #2 */ #define SCU61C 0x61c /* Disable GPIO Internal Pull-Down #3 */ -#define SCU620 0x620 /* Disable GPIO Internal Pull-Down #4 */ +#define SCU630 0x630 /* Disable GPIO Internal Pull-Down #4 */ #define SCU634 0x634 /* Disable GPIO Internal Pull-Down #5 */ #define SCU638 0x638 /* Disable GPIO Internal Pull-Down #6 */ #define SCU690 0x690 /* Multi-function Pin Control #24 */ @@ -2495,38 +2495,38 @@ static struct aspeed_pin_config aspeed_g6_configs[] = { ASPEED_PULL_DOWN_PINCONF(D14, SCU61C, 0), /* GPIOS7 */ - ASPEED_PULL_DOWN_PINCONF(T24, SCU620, 23), + ASPEED_PULL_DOWN_PINCONF(T24, SCU630, 23), /* GPIOS6 */ - ASPEED_PULL_DOWN_PINCONF(P23, SCU620, 22), + ASPEED_PULL_DOWN_PINCONF(P23, SCU630, 22), /* GPIOS5 */ - ASPEED_PULL_DOWN_PINCONF(P24, SCU620, 21), + ASPEED_PULL_DOWN_PINCONF(P24, SCU630, 21), /* GPIOS4 */ - ASPEED_PULL_DOWN_PINCONF(R26, SCU620, 20), + ASPEED_PULL_DOWN_PINCONF(R26, SCU630, 20), /* GPIOS3*/ - ASPEED_PULL_DOWN_PINCONF(R24, SCU620, 19), + ASPEED_PULL_DOWN_PINCONF(R24, SCU630, 19), /* GPIOS2 */ - ASPEED_PULL_DOWN_PINCONF(T26, SCU620, 18), + ASPEED_PULL_DOWN_PINCONF(T26, SCU630, 18), /* GPIOS1 */ - ASPEED_PULL_DOWN_PINCONF(T25, SCU620, 17), + ASPEED_PULL_DOWN_PINCONF(T25, SCU630, 17), /* GPIOS0 */ - ASPEED_PULL_DOWN_PINCONF(R23, SCU620, 16), + ASPEED_PULL_DOWN_PINCONF(R23, SCU630, 16), /* GPIOR7 */ - ASPEED_PULL_DOWN_PINCONF(U26, SCU620, 15), + ASPEED_PULL_DOWN_PINCONF(U26, SCU630, 15), /* GPIOR6 */ - ASPEED_PULL_DOWN_PINCONF(W26, SCU620, 14), + ASPEED_PULL_DOWN_PINCONF(W26, SCU630, 14), /* GPIOR5 */ - ASPEED_PULL_DOWN_PINCONF(T23, SCU620, 13), + ASPEED_PULL_DOWN_PINCONF(T23, SCU630, 13), /* GPIOR4 */ - ASPEED_PULL_DOWN_PINCONF(U25, SCU620, 12), + ASPEED_PULL_DOWN_PINCONF(U25, SCU630, 12), /* GPIOR3*/ - ASPEED_PULL_DOWN_PINCONF(V26, SCU620, 11), + ASPEED_PULL_DOWN_PINCONF(V26, SCU630, 11), /* GPIOR2 */ - ASPEED_PULL_DOWN_PINCONF(V24, SCU620, 10), + ASPEED_PULL_DOWN_PINCONF(V24, SCU630, 10), /* GPIOR1 */ - ASPEED_PULL_DOWN_PINCONF(U24, SCU620, 9), + ASPEED_PULL_DOWN_PINCONF(U24, SCU630, 9), /* GPIOR0 */ - ASPEED_PULL_DOWN_PINCONF(V25, SCU620, 8), + ASPEED_PULL_DOWN_PINCONF(V25, SCU630, 8), /* GPIOX7 */ ASPEED_PULL_DOWN_PINCONF(AB10, SCU634, 31), From 368a90e651faeeb7049a876599cf2b0d74954796 Mon Sep 17 00:00:00 2001 From: Jan Dakinevich Date: Mon, 25 Mar 2024 14:30:58 +0300 Subject: [PATCH 022/606] pinctrl/meson: fix typo in PDM's pin name Other pins have _a or _x suffix, but this one doesn't have any. Most likely this is a typo. Fixes: dabad1ff8561 ("pinctrl: meson: add pinctrl driver support for Meson-A1 SoC") Signed-off-by: Jan Dakinevich Reviewed-by: Neil Armstrong Message-ID: <20240325113058.248022-1-jan.dakinevich@salutedevices.com> Signed-off-by: Linus Walleij --- drivers/pinctrl/meson/pinctrl-meson-a1.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/pinctrl/meson/pinctrl-meson-a1.c b/drivers/pinctrl/meson/pinctrl-meson-a1.c index 79f5d753d7e1a..50a87d9618a8e 100644 --- a/drivers/pinctrl/meson/pinctrl-meson-a1.c +++ b/drivers/pinctrl/meson/pinctrl-meson-a1.c @@ -250,7 +250,7 @@ static const unsigned int pdm_dclk_x_pins[] = { GPIOX_10 }; static const unsigned int pdm_din2_a_pins[] = { GPIOA_6 }; static const unsigned int pdm_din1_a_pins[] = { GPIOA_7 }; static const unsigned int pdm_din0_a_pins[] = { GPIOA_8 }; -static const unsigned int pdm_dclk_pins[] = { GPIOA_9 }; +static const unsigned int pdm_dclk_a_pins[] = { GPIOA_9 }; /* gen_clk */ static const unsigned int gen_clk_x_pins[] = { GPIOX_7 }; @@ -591,7 +591,7 @@ static struct meson_pmx_group meson_a1_periphs_groups[] = { GROUP(pdm_din2_a, 3), GROUP(pdm_din1_a, 3), GROUP(pdm_din0_a, 3), - GROUP(pdm_dclk, 3), + GROUP(pdm_dclk_a, 3), GROUP(pwm_c_a, 3), GROUP(pwm_b_a, 3), @@ -755,7 +755,7 @@ static const char * const spi_a_groups[] = { static const char * const pdm_groups[] = { "pdm_din0_x", "pdm_din1_x", "pdm_din2_x", "pdm_dclk_x", "pdm_din2_a", - "pdm_din1_a", "pdm_din0_a", "pdm_dclk", + "pdm_din1_a", "pdm_din0_a", "pdm_dclk_a", }; static const char * const gen_clk_groups[] = { From 57a1592784d622ecee0b71940c65429173996b33 Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Tue, 26 Mar 2024 12:36:59 +0100 Subject: [PATCH 023/606] iio: accel: mxc4005: Interrupt handling fixes There are 2 issues with interrupt handling in the mxc4005 driver: 1. mxc4005_set_trigger_state() writes MXC4005_REG_INT_MASK1_BIT_DRDYE (0x01) to INT_MASK1 to enable the interrupt, but to disable the interrupt it writes ~MXC4005_REG_INT_MASK1_BIT_DRDYE which is 0xfe, so it enables all other interrupt sources in the INT_SRC1 register. On the MXC4005 this is not an issue because only bit 0 of the register is used. On the MXC6655 OTOH this is a problem since bit7 is used as TC (Temperature Compensation) disable bit and writing 1 to this disables Temperature Compensation which should only be done when running self-tests on the chip. Write 0 instead of ~MXC4005_REG_INT_MASK1_BIT_DRDYE to disable the interrupts to fix this. 2. The datasheets for the MXC4005 / MXC6655 do not state what the reset value for the INT_MASK0 and INT_MASK1 registers is and since these are write only we also cannot learn this from the hw. Presumably the reset value for both is all 0, which means all interrupts disabled. Explicitly set both registers to 0 from mxc4005_chip_init() to ensure both masks are actually set to 0. Fixes: 79846e33aac1 ("iio: accel: mxc4005: add support for mxc6655") Signed-off-by: Hans de Goede Link: https://lore.kernel.org/r/20240326113700.56725-2-hdegoede@redhat.com Cc: Signed-off-by: Jonathan Cameron --- drivers/iio/accel/mxc4005.c | 24 +++++++++++++++++------- 1 file changed, 17 insertions(+), 7 deletions(-) diff --git a/drivers/iio/accel/mxc4005.c b/drivers/iio/accel/mxc4005.c index 61839be501c21..111f4bcf24ad6 100644 --- a/drivers/iio/accel/mxc4005.c +++ b/drivers/iio/accel/mxc4005.c @@ -27,9 +27,13 @@ #define MXC4005_REG_ZOUT_UPPER 0x07 #define MXC4005_REG_ZOUT_LOWER 0x08 +#define MXC4005_REG_INT_MASK0 0x0A + #define MXC4005_REG_INT_MASK1 0x0B #define MXC4005_REG_INT_MASK1_BIT_DRDYE 0x01 +#define MXC4005_REG_INT_CLR0 0x00 + #define MXC4005_REG_INT_CLR1 0x01 #define MXC4005_REG_INT_CLR1_BIT_DRDYC 0x01 @@ -113,7 +117,9 @@ static bool mxc4005_is_readable_reg(struct device *dev, unsigned int reg) static bool mxc4005_is_writeable_reg(struct device *dev, unsigned int reg) { switch (reg) { + case MXC4005_REG_INT_CLR0: case MXC4005_REG_INT_CLR1: + case MXC4005_REG_INT_MASK0: case MXC4005_REG_INT_MASK1: case MXC4005_REG_CONTROL: return true; @@ -330,17 +336,13 @@ static int mxc4005_set_trigger_state(struct iio_trigger *trig, { struct iio_dev *indio_dev = iio_trigger_get_drvdata(trig); struct mxc4005_data *data = iio_priv(indio_dev); + unsigned int val; int ret; mutex_lock(&data->mutex); - if (state) { - ret = regmap_write(data->regmap, MXC4005_REG_INT_MASK1, - MXC4005_REG_INT_MASK1_BIT_DRDYE); - } else { - ret = regmap_write(data->regmap, MXC4005_REG_INT_MASK1, - ~MXC4005_REG_INT_MASK1_BIT_DRDYE); - } + val = state ? MXC4005_REG_INT_MASK1_BIT_DRDYE : 0; + ret = regmap_write(data->regmap, MXC4005_REG_INT_MASK1, val); if (ret < 0) { mutex_unlock(&data->mutex); dev_err(data->dev, "failed to update reg_int_mask1"); @@ -382,6 +384,14 @@ static int mxc4005_chip_init(struct mxc4005_data *data) dev_dbg(data->dev, "MXC4005 chip id %02x\n", reg); + ret = regmap_write(data->regmap, MXC4005_REG_INT_MASK0, 0); + if (ret < 0) + return dev_err_probe(data->dev, ret, "writing INT_MASK0\n"); + + ret = regmap_write(data->regmap, MXC4005_REG_INT_MASK1, 0); + if (ret < 0) + return dev_err_probe(data->dev, ret, "writing INT_MASK1\n"); + return 0; } From 6b8cffdc4a31e4a72f75ecd1bc13fbf0dafee390 Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Tue, 26 Mar 2024 12:37:00 +0100 Subject: [PATCH 024/606] iio: accel: mxc4005: Reset chip on probe() and resume() On some designs the chip is not properly reset when powered up at boot or after a suspend/resume cycle. Use the sw-reset feature to ensure that the chip is in a clean state after probe() / resume() and in the case of resume() restore the settings (scale, trigger-enabled). Closes: https://bugzilla.kernel.org/show_bug.cgi?id=218578 Signed-off-by: Hans de Goede Link: https://lore.kernel.org/r/20240326113700.56725-3-hdegoede@redhat.com Cc: Signed-off-by: Jonathan Cameron --- drivers/iio/accel/mxc4005.c | 68 +++++++++++++++++++++++++++++++++++++ 1 file changed, 68 insertions(+) diff --git a/drivers/iio/accel/mxc4005.c b/drivers/iio/accel/mxc4005.c index 111f4bcf24ad6..63c3566a533bd 100644 --- a/drivers/iio/accel/mxc4005.c +++ b/drivers/iio/accel/mxc4005.c @@ -5,6 +5,7 @@ * Copyright (c) 2014, Intel Corporation. */ +#include #include #include #include @@ -36,6 +37,7 @@ #define MXC4005_REG_INT_CLR1 0x01 #define MXC4005_REG_INT_CLR1_BIT_DRDYC 0x01 +#define MXC4005_REG_INT_CLR1_SW_RST 0x10 #define MXC4005_REG_CONTROL 0x0D #define MXC4005_REG_CONTROL_MASK_FSR GENMASK(6, 5) @@ -43,6 +45,9 @@ #define MXC4005_REG_DEVICE_ID 0x0E +/* Datasheet does not specify a reset time, this is a conservative guess */ +#define MXC4005_RESET_TIME_US 2000 + enum mxc4005_axis { AXIS_X, AXIS_Y, @@ -66,6 +71,8 @@ struct mxc4005_data { s64 timestamp __aligned(8); } scan; bool trigger_enabled; + unsigned int control; + unsigned int int_mask1; }; /* @@ -349,6 +356,7 @@ static int mxc4005_set_trigger_state(struct iio_trigger *trig, return ret; } + data->int_mask1 = val; data->trigger_enabled = state; mutex_unlock(&data->mutex); @@ -384,6 +392,13 @@ static int mxc4005_chip_init(struct mxc4005_data *data) dev_dbg(data->dev, "MXC4005 chip id %02x\n", reg); + ret = regmap_write(data->regmap, MXC4005_REG_INT_CLR1, + MXC4005_REG_INT_CLR1_SW_RST); + if (ret < 0) + return dev_err_probe(data->dev, ret, "resetting chip\n"); + + fsleep(MXC4005_RESET_TIME_US); + ret = regmap_write(data->regmap, MXC4005_REG_INT_MASK0, 0); if (ret < 0) return dev_err_probe(data->dev, ret, "writing INT_MASK0\n"); @@ -479,6 +494,58 @@ static int mxc4005_probe(struct i2c_client *client) return devm_iio_device_register(&client->dev, indio_dev); } +static int mxc4005_suspend(struct device *dev) +{ + struct iio_dev *indio_dev = dev_get_drvdata(dev); + struct mxc4005_data *data = iio_priv(indio_dev); + int ret; + + /* Save control to restore it on resume */ + ret = regmap_read(data->regmap, MXC4005_REG_CONTROL, &data->control); + if (ret < 0) + dev_err(data->dev, "failed to read reg_control\n"); + + return ret; +} + +static int mxc4005_resume(struct device *dev) +{ + struct iio_dev *indio_dev = dev_get_drvdata(dev); + struct mxc4005_data *data = iio_priv(indio_dev); + int ret; + + ret = regmap_write(data->regmap, MXC4005_REG_INT_CLR1, + MXC4005_REG_INT_CLR1_SW_RST); + if (ret) { + dev_err(data->dev, "failed to reset chip: %d\n", ret); + return ret; + } + + fsleep(MXC4005_RESET_TIME_US); + + ret = regmap_write(data->regmap, MXC4005_REG_CONTROL, data->control); + if (ret) { + dev_err(data->dev, "failed to restore control register\n"); + return ret; + } + + ret = regmap_write(data->regmap, MXC4005_REG_INT_MASK0, 0); + if (ret) { + dev_err(data->dev, "failed to restore interrupt 0 mask\n"); + return ret; + } + + ret = regmap_write(data->regmap, MXC4005_REG_INT_MASK1, data->int_mask1); + if (ret) { + dev_err(data->dev, "failed to restore interrupt 1 mask\n"); + return ret; + } + + return 0; +} + +static DEFINE_SIMPLE_DEV_PM_OPS(mxc4005_pm_ops, mxc4005_suspend, mxc4005_resume); + static const struct acpi_device_id mxc4005_acpi_match[] = { {"MXC4005", 0}, {"MXC6655", 0}, @@ -506,6 +573,7 @@ static struct i2c_driver mxc4005_driver = { .name = MXC4005_DRV_NAME, .acpi_match_table = mxc4005_acpi_match, .of_match_table = mxc4005_of_match, + .pm = pm_sleep_ptr(&mxc4005_pm_ops), }, .probe = mxc4005_probe, .id_table = mxc4005_id, From 63dc588e7af1392576071a1841298198c9cddee3 Mon Sep 17 00:00:00 2001 From: Vijendar Mukunda Date: Wed, 27 Mar 2024 12:01:43 +0530 Subject: [PATCH 025/606] soundwire: amd: fix for wake interrupt handling for clockstop mode When SoundWire Wake interrupt is enabled along with SoundWire Wake enable register, SoundWire wake interrupt will be reported when SoundWire manager is in D3 state and ACP is in D3 state. When SoundWire Wake interrupt is reported, it will invoke runtime resume of the SoundWire manager device. In case of system level suspend, for ClockStop Mode SoundWire Wake interrupt should be disabled. It should be enabled only for runtime suspend scenario. Change wake interrupt enable/disable sequence for ClockStop Mode in system level suspend and runtime suspend sceanrio. Fixes: 9cf1efc5ed2d ("soundwire: amd: add pm_prepare callback and pm ops support") Signed-off-by: Vijendar Mukunda Link: https://lore.kernel.org/r/20240327063143.2266464-2-Vijendar.Mukunda@amd.com Signed-off-by: Vinod Koul --- drivers/soundwire/amd_manager.c | 15 +++++++++++++++ drivers/soundwire/amd_manager.h | 3 ++- 2 files changed, 17 insertions(+), 1 deletion(-) diff --git a/drivers/soundwire/amd_manager.c b/drivers/soundwire/amd_manager.c index 7cd24bd8e2248..6bcf8e75273c4 100644 --- a/drivers/soundwire/amd_manager.c +++ b/drivers/soundwire/amd_manager.c @@ -130,6 +130,19 @@ static void amd_sdw_set_frameshape(struct amd_sdw_manager *amd_manager) writel(frame_size, amd_manager->mmio + ACP_SW_FRAMESIZE); } +static void amd_sdw_wake_enable(struct amd_sdw_manager *amd_manager, bool enable) +{ + u32 wake_ctrl; + + wake_ctrl = readl(amd_manager->mmio + ACP_SW_STATE_CHANGE_STATUS_MASK_8TO11); + if (enable) + wake_ctrl |= AMD_SDW_WAKE_INTR_MASK; + else + wake_ctrl &= ~AMD_SDW_WAKE_INTR_MASK; + + writel(wake_ctrl, amd_manager->mmio + ACP_SW_STATE_CHANGE_STATUS_MASK_8TO11); +} + static void amd_sdw_ctl_word_prep(u32 *lower_word, u32 *upper_word, struct sdw_msg *msg, int cmd_offset) { @@ -1095,6 +1108,7 @@ static int __maybe_unused amd_suspend(struct device *dev) } if (amd_manager->power_mode_mask & AMD_SDW_CLK_STOP_MODE) { + amd_sdw_wake_enable(amd_manager, false); return amd_sdw_clock_stop(amd_manager); } else if (amd_manager->power_mode_mask & AMD_SDW_POWER_OFF_MODE) { /* @@ -1121,6 +1135,7 @@ static int __maybe_unused amd_suspend_runtime(struct device *dev) return 0; } if (amd_manager->power_mode_mask & AMD_SDW_CLK_STOP_MODE) { + amd_sdw_wake_enable(amd_manager, true); return amd_sdw_clock_stop(amd_manager); } else if (amd_manager->power_mode_mask & AMD_SDW_POWER_OFF_MODE) { ret = amd_sdw_clock_stop(amd_manager); diff --git a/drivers/soundwire/amd_manager.h b/drivers/soundwire/amd_manager.h index 418b679e0b1a6..707065468e05a 100644 --- a/drivers/soundwire/amd_manager.h +++ b/drivers/soundwire/amd_manager.h @@ -152,7 +152,7 @@ #define AMD_SDW0_EXT_INTR_MASK 0x200000 #define AMD_SDW1_EXT_INTR_MASK 4 #define AMD_SDW_IRQ_MASK_0TO7 0x77777777 -#define AMD_SDW_IRQ_MASK_8TO11 0x000d7777 +#define AMD_SDW_IRQ_MASK_8TO11 0x000c7777 #define AMD_SDW_IRQ_ERROR_MASK 0xff #define AMD_SDW_MAX_FREQ_NUM 1 #define AMD_SDW0_MAX_TX_PORTS 3 @@ -190,6 +190,7 @@ #define AMD_SDW_CLK_RESUME_REQ 2 #define AMD_SDW_CLK_RESUME_DONE 3 #define AMD_SDW_WAKE_STAT_MASK BIT(16) +#define AMD_SDW_WAKE_INTR_MASK BIT(16) static u32 amd_sdw_freq_tbl[AMD_SDW_MAX_FREQ_NUM] = { AMD_SDW_DEFAULT_CLK_FREQ, From 5038a66dad0199de60e5671603ea6623eb9e5c79 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Thu, 21 Mar 2024 09:38:39 +0300 Subject: [PATCH 026/606] pinctrl: core: delete incorrect free in pinctrl_enable() The "pctldev" struct is allocated in devm_pinctrl_register_and_init(). It's a devm_ managed pointer that is freed by devm_pinctrl_dev_release(), so freeing it in pinctrl_enable() will lead to a double free. The devm_pinctrl_dev_release() function frees the pindescs and destroys the mutex as well. Fixes: 6118714275f0 ("pinctrl: core: Fix pinctrl_register_and_init() with pinctrl_enable()") Signed-off-by: Dan Carpenter Message-ID: <578fbe56-44e9-487c-ae95-29b695650f7c@moroto.mountain> Signed-off-by: Linus Walleij --- drivers/pinctrl/core.c | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) diff --git a/drivers/pinctrl/core.c b/drivers/pinctrl/core.c index 6649357637ff3..cffeb869130dd 100644 --- a/drivers/pinctrl/core.c +++ b/drivers/pinctrl/core.c @@ -2124,13 +2124,7 @@ int pinctrl_enable(struct pinctrl_dev *pctldev) error = pinctrl_claim_hogs(pctldev); if (error) { - dev_err(pctldev->dev, "could not claim hogs: %i\n", - error); - pinctrl_free_pindescs(pctldev, pctldev->desc->pins, - pctldev->desc->npins); - mutex_destroy(&pctldev->mutex); - kfree(pctldev); - + dev_err(pctldev->dev, "could not claim hogs: %i\n", error); return error; } From 7d49f53af4b988b188d3932deac2c9c80fd7d9ce Mon Sep 17 00:00:00 2001 From: Alice Ryhl Date: Fri, 8 Mar 2024 09:36:31 +0000 Subject: [PATCH 027/606] rust: don't select CONSTRUCTORS This was originally part of commit 4b9a68f2e59a0 ("rust: add support for static synchronisation primitives") from the old Rust branch, which used module constructors to initialize globals containing various synchronisation primitives with pin-init. That commit has never been upstreamed, but the `select CONSTRUCTORS` statement ended up being included in the patch that initially added Rust support to the Linux Kernel. We are not using module constructors, so let's remove the select. Signed-off-by: Alice Ryhl Reviewed-by: Benno Lossin Cc: stable@vger.kernel.org Fixes: 2f7ab1267dc9 ("Kbuild: add Rust support") Link: https://lore.kernel.org/r/20240308-constructors-v1-1-4c811342391c@google.com Signed-off-by: Miguel Ojeda --- init/Kconfig | 1 - 1 file changed, 1 deletion(-) diff --git a/init/Kconfig b/init/Kconfig index aa02aec6aa7d2..b9a336a3d7d8f 100644 --- a/init/Kconfig +++ b/init/Kconfig @@ -1903,7 +1903,6 @@ config RUST depends on !GCC_PLUGINS depends on !RANDSTRUCT depends on !DEBUG_INFO_BTF || PAHOLE_HAS_LANG_EXCLUDE - select CONSTRUCTORS help Enables Rust support in the kernel. From 01848eee20c6396e5a96cfbc9061dc37481e06fd Mon Sep 17 00:00:00 2001 From: Bo-Wei Chen Date: Sun, 24 Mar 2024 09:09:15 +0800 Subject: [PATCH 028/606] docs: rust: fix improper rendering in Arch Support page Fix improper rendering of table cell (empty bullet list) by rendering as a dash using the backslash escaping mechanism [1]. Link: https://docutils.sourceforge.io/docs/ref/rst/restructuredtext.html#escaping-mechanism [1] Reported-by: Miguel Ojeda Closes: https://github.com/Rust-for-Linux/linux/issues/1069 Signed-off-by: Bo-Wei Chen Reviewed-by: Benno Lossin Fixes: 90868ff9cade ("LoongArch: Enable initial Rust support") Link: https://lore.kernel.org/r/20240324010915.3089934-1-tim.chenbw@gmail.com [ Reworded slightly title and message; use "Link:" tag. ] Signed-off-by: Miguel Ojeda --- Documentation/rust/arch-support.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Documentation/rust/arch-support.rst b/Documentation/rust/arch-support.rst index 5c4fa9f5d1cdb..c9137710633a3 100644 --- a/Documentation/rust/arch-support.rst +++ b/Documentation/rust/arch-support.rst @@ -16,7 +16,7 @@ support corresponds to ``S`` values in the ``MAINTAINERS`` file. Architecture Level of support Constraints ============= ================ ============================================== ``arm64`` Maintained Little Endian only. -``loongarch`` Maintained - +``loongarch`` Maintained \- ``um`` Maintained ``x86_64`` only. ``x86`` Maintained ``x86_64`` only. ============= ================ ============================================== From 43853e843aa6c3d47ff2b0cce898318839483d05 Mon Sep 17 00:00:00 2001 From: Dragan Simic Date: Mon, 1 Apr 2024 00:20:56 +0200 Subject: [PATCH 029/606] arm64: dts: rockchip: Remove unsupported node from the Pinebook Pro dts Remove a redundant node from the Pine64 Pinebook Pro dts, which is intended to provide a value for the delay in PCI Express enumeration, but that isn't supported without additional out-of-tree kernel patches. There were already efforts to upstream those kernel patches, because they reportedly make some PCI Express cards (such as LSI SAS HBAs) usable in Pine64 RockPro64 (which is also based on the RK3399); otherwise, those PCI Express cards fail to enumerate. However, providing the required background and explanations proved to be a tough nut to crack, which is the reason why those patches remain outside of the kernel mainline for now. If those out-of-tree patches eventually become upstreamed, the resulting device-tree changes will almost surely belong to the RK3399 SoC dtsi. Also, the above-mentioned unusable-without-out-of-tree-patches PCI Express devices are in all fairness not usable in a Pinebook Pro without some extensive hardware modifications, which is another reason to delete this redundant node. When it comes to the Pinebook Pro, only M.2 NVMe SSDs can be installed out of the box (using an additional passive adapter PCB sold separately by Pine64), which reportedly works fine with no additional patches. Fixes: 5a65505a6988 ("arm64: dts: rockchip: Add initial support for Pinebook Pro") Signed-off-by: Dragan Simic Link: https://lore.kernel.org/r/0f82c3f97cb798d012270d13b34d8d15305ef293.1711923520.git.dsimic@manjaro.org Signed-off-by: Heiko Stuebner --- arch/arm64/boot/dts/rockchip/rk3399-pinebook-pro.dts | 1 - 1 file changed, 1 deletion(-) diff --git a/arch/arm64/boot/dts/rockchip/rk3399-pinebook-pro.dts b/arch/arm64/boot/dts/rockchip/rk3399-pinebook-pro.dts index 054c6a4d1a45f..294eb2de263de 100644 --- a/arch/arm64/boot/dts/rockchip/rk3399-pinebook-pro.dts +++ b/arch/arm64/boot/dts/rockchip/rk3399-pinebook-pro.dts @@ -779,7 +779,6 @@ }; &pcie0 { - bus-scan-delay-ms = <1000>; ep-gpios = <&gpio2 RK_PD4 GPIO_ACTIVE_HIGH>; num-lanes = <4>; pinctrl-names = "default"; From 4ddc13461740308d3133c2defda97d9e3a30ede8 Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Mon, 1 Apr 2024 16:09:39 +0200 Subject: [PATCH 030/606] arm64: dts: rockchip: drop panel port unit address in GRU Scarlet Panel port does not have "reg", thus it should not have unit address, as reported by dtc W=1 warning: rk3399-gru-scarlet.dtsi:666.32-668.7: Warning (unit_address_vs_reg): /dsi@ff960000/panel@0/ports/port@1/endpoint@1: node has a unit name, but no reg or ranges property Signed-off-by: Krzysztof Kozlowski Link: https://lore.kernel.org/r/20240401140939.97808-1-krzk@kernel.org Signed-off-by: Heiko Stuebner --- arch/arm64/boot/dts/rockchip/rk3399-gru-scarlet.dtsi | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/boot/dts/rockchip/rk3399-gru-scarlet.dtsi b/arch/arm64/boot/dts/rockchip/rk3399-gru-scarlet.dtsi index 5846a11f0e848..6d1e3ca863925 100644 --- a/arch/arm64/boot/dts/rockchip/rk3399-gru-scarlet.dtsi +++ b/arch/arm64/boot/dts/rockchip/rk3399-gru-scarlet.dtsi @@ -663,7 +663,7 @@ camera: &i2c7 { port@1 { reg = <1>; - mipi1_in_panel: endpoint@1 { + mipi1_in_panel: endpoint { remote-endpoint = <&mipi1_out_panel>; }; }; From db70d9f9dcf8d5cda86303eeb381b1213a2ab191 Mon Sep 17 00:00:00 2001 From: Vineet Gupta Date: Wed, 27 Mar 2024 22:02:58 -0700 Subject: [PATCH 031/606] ARC: Fix -Wmissing-prototypes warnings | ../arch/arc/kernel/kprobes.c:193:15: warning: no previous prototype for 'arc_kprobe_handler' [-Wmissing-prototypes] | 193 | int __kprobes arc_kprobe_handler(unsigned long addr, struct pt_regs *regs) | |../arch/arc/kernel/ptrace.c:342:16: warning: no previous prototype for 'syscall_trace_enter' [-Wmissing-prototypes] | 342 | asmlinkage int syscall_trace_enter(struct pt_regs *regs) Link: https://qa-reports.linaro.org/lkft/linux-next-master/build/next-20240325/testrun/23149630/suite/build/test/gcc-9-defconfig/log Reported-by: Linux Kernel Functional Testing Signed-off-by: Vineet Gupta --- arch/arc/include/asm/ptrace.h | 2 +- arch/arc/kernel/kprobes.c | 7 ++++--- 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/arch/arc/include/asm/ptrace.h b/arch/arc/include/asm/ptrace.h index 00b9318e551e7..cf79df0b25705 100644 --- a/arch/arc/include/asm/ptrace.h +++ b/arch/arc/include/asm/ptrace.h @@ -169,7 +169,7 @@ static inline unsigned long regs_get_register(struct pt_regs *regs, return *(unsigned long *)((unsigned long)regs + offset); } -extern int syscall_trace_entry(struct pt_regs *); +extern int syscall_trace_enter(struct pt_regs *); extern void syscall_trace_exit(struct pt_regs *); #endif /* !__ASSEMBLY__ */ diff --git a/arch/arc/kernel/kprobes.c b/arch/arc/kernel/kprobes.c index e71d64119d71a..f8e2960832d94 100644 --- a/arch/arc/kernel/kprobes.c +++ b/arch/arc/kernel/kprobes.c @@ -190,7 +190,8 @@ static void __kprobes setup_singlestep(struct kprobe *p, struct pt_regs *regs) } } -int __kprobes arc_kprobe_handler(unsigned long addr, struct pt_regs *regs) +static int +__kprobes arc_kprobe_handler(unsigned long addr, struct pt_regs *regs) { struct kprobe *p; struct kprobe_ctlblk *kcb; @@ -241,8 +242,8 @@ int __kprobes arc_kprobe_handler(unsigned long addr, struct pt_regs *regs) return 0; } -static int __kprobes arc_post_kprobe_handler(unsigned long addr, - struct pt_regs *regs) +static int +__kprobes arc_post_kprobe_handler(unsigned long addr, struct pt_regs *regs) { struct kprobe *cur = kprobe_running(); struct kprobe_ctlblk *kcb = get_kprobe_ctlblk(); From d5272aaa8257920c7b398f953ada65e25c248f9a Mon Sep 17 00:00:00 2001 From: Vineet Gupta Date: Wed, 27 Mar 2024 22:19:25 -0700 Subject: [PATCH 032/606] ARC: mm: fix new code about cache aliasing Manual/partial revert of 8690bbcf3b70 ("Introduce cpu_dcache_is_aliasing() across all architectures") Current generation of ARCv2/ARCv3 based HSxx cores are only PIPT (to software at least). Legacy ARC700 cpus could be VIPT aliasing (based on cache geometry and PAGE_SIZE) [1] however recently that support was ripped out so VIPT aliasing cache is not relevant to ARC anymore. [1] http://lists.infradead.org/pipermail/linux-snps-arc/2023-February/006899.html Acked-by: Mathieu Desnoyers Signed-off-by: Vineet Gupta --- arch/arc/Kconfig | 1 - arch/arc/include/asm/cachetype.h | 9 --------- 2 files changed, 10 deletions(-) delete mode 100644 arch/arc/include/asm/cachetype.h diff --git a/arch/arc/Kconfig b/arch/arc/Kconfig index 99d2845f3feb9..4092bec198bec 100644 --- a/arch/arc/Kconfig +++ b/arch/arc/Kconfig @@ -6,7 +6,6 @@ config ARC def_bool y select ARC_TIMERS - select ARCH_HAS_CPU_CACHE_ALIASING select ARCH_HAS_CACHE_LINE_SIZE select ARCH_HAS_DEBUG_VM_PGTABLE select ARCH_HAS_DMA_PREP_COHERENT diff --git a/arch/arc/include/asm/cachetype.h b/arch/arc/include/asm/cachetype.h deleted file mode 100644 index 05fc7ed597125..0000000000000 --- a/arch/arc/include/asm/cachetype.h +++ /dev/null @@ -1,9 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef __ASM_ARC_CACHETYPE_H -#define __ASM_ARC_CACHETYPE_H - -#include - -#define cpu_dcache_is_aliasing() true - -#endif From ebfc2fd8873b4feb86f01835ad97282aede1e956 Mon Sep 17 00:00:00 2001 From: Bjorn Helgaas Date: Fri, 29 Mar 2024 17:14:32 -0500 Subject: [PATCH 033/606] ARC: Fix typos Fix typos, most reported by "codespell arch/arc". Only touches comments, no code changes. Signed-off-by: Bjorn Helgaas Signed-off-by: Vineet Gupta --- arch/arc/boot/Makefile | 4 ++-- arch/arc/boot/dts/axc003.dtsi | 4 ++-- arch/arc/boot/dts/vdk_axs10x_mb.dtsi | 2 +- arch/arc/include/asm/dsp.h | 2 +- arch/arc/include/asm/entry-compact.h | 10 +++++----- arch/arc/include/asm/entry.h | 4 ++-- arch/arc/include/asm/irq.h | 2 +- arch/arc/include/asm/irqflags-compact.h | 2 +- arch/arc/include/asm/mmu_context.h | 2 +- arch/arc/include/asm/pgtable-bits-arcv2.h | 2 +- arch/arc/include/asm/shmparam.h | 2 +- arch/arc/include/asm/smp.h | 4 ++-- arch/arc/include/asm/thread_info.h | 2 +- arch/arc/include/uapi/asm/swab.h | 2 +- arch/arc/kernel/entry-arcv2.S | 8 ++++---- arch/arc/kernel/entry.S | 4 ++-- arch/arc/kernel/head.S | 2 +- arch/arc/kernel/intc-arcv2.c | 2 +- arch/arc/kernel/perf_event.c | 2 +- arch/arc/kernel/setup.c | 2 +- arch/arc/kernel/signal.c | 7 ++++--- arch/arc/kernel/traps.c | 2 +- arch/arc/kernel/vmlinux.lds.S | 4 ++-- arch/arc/mm/tlb.c | 4 ++-- arch/arc/mm/tlbex.S | 8 ++++---- 25 files changed, 45 insertions(+), 44 deletions(-) diff --git a/arch/arc/boot/Makefile b/arch/arc/boot/Makefile index 5648748c285f5..5a8550124b73e 100644 --- a/arch/arc/boot/Makefile +++ b/arch/arc/boot/Makefile @@ -1,8 +1,8 @@ # SPDX-License-Identifier: GPL-2.0 -# uImage build relies on mkimage being availble on your host for ARC target +# uImage build relies on mkimage being available on your host for ARC target # You will need to build u-boot for ARC, rename mkimage to arc-elf32-mkimage -# and make sure it's reacable from your PATH +# and make sure it's reachable from your PATH OBJCOPYFLAGS= -O binary -R .note -R .note.gnu.build-id -R .comment -S diff --git a/arch/arc/boot/dts/axc003.dtsi b/arch/arc/boot/dts/axc003.dtsi index 3434c8131ecd5..c0a812674ce9e 100644 --- a/arch/arc/boot/dts/axc003.dtsi +++ b/arch/arc/boot/dts/axc003.dtsi @@ -119,9 +119,9 @@ /* * The DW APB ICTL intc on MB is connected to CPU intc via a * DT "invisible" DW APB GPIO block, configured to simply pass thru - * interrupts - setup accordinly in platform init (plat-axs10x/ax10x.c) + * interrupts - setup accordingly in platform init (plat-axs10x/ax10x.c) * - * So here we mimic a direct connection betwen them, ignoring the + * So here we mimic a direct connection between them, ignoring the * ABPG GPIO. Thus set "interrupts = <24>" (DW APB GPIO to core) * instead of "interrupts = <12>" (DW APB ICTL to DW APB GPIO) * diff --git a/arch/arc/boot/dts/vdk_axs10x_mb.dtsi b/arch/arc/boot/dts/vdk_axs10x_mb.dtsi index 90a412026e643..0e0e2d337bf87 100644 --- a/arch/arc/boot/dts/vdk_axs10x_mb.dtsi +++ b/arch/arc/boot/dts/vdk_axs10x_mb.dtsi @@ -113,7 +113,7 @@ /* * Embedded Vision subsystem UIO mappings; only relevant for EV VDK * - * This node is intentionally put outside of MB above becase + * This node is intentionally put outside of MB above because * it maps areas outside of MB's 0xez-0xfz. */ uio_ev: uio@d0000000 { diff --git a/arch/arc/include/asm/dsp.h b/arch/arc/include/asm/dsp.h index 202c78e567045..f496dbc4640b2 100644 --- a/arch/arc/include/asm/dsp.h +++ b/arch/arc/include/asm/dsp.h @@ -12,7 +12,7 @@ /* * DSP-related saved registers - need to be saved only when you are * scheduled out. - * structure fields name must correspond to aux register defenitions for + * structure fields name must correspond to aux register definitions for * automatic offset calculation in DSP_AUX_SAVE_RESTORE macros */ struct dsp_callee_regs { diff --git a/arch/arc/include/asm/entry-compact.h b/arch/arc/include/asm/entry-compact.h index 92c3e9f132521..00946fe04c9b2 100644 --- a/arch/arc/include/asm/entry-compact.h +++ b/arch/arc/include/asm/entry-compact.h @@ -7,7 +7,7 @@ * Stack switching code can no longer reliably rely on the fact that * if we are NOT in user mode, stack is switched to kernel mode. * e.g. L2 IRQ interrupted a L1 ISR which had not yet completed - * it's prologue including stack switching from user mode + * its prologue including stack switching from user mode * * Vineetg: Aug 28th 2008: Bug #94984 * -Zero Overhead Loop Context shd be cleared when entering IRQ/EXcp/Trap @@ -143,7 +143,7 @@ * 2. L1 IRQ taken, ISR starts (CPU auto-switched to KERNEL mode) * 3. But before it could switch SP from USER to KERNEL stack * a L2 IRQ "Interrupts" L1 - * Thay way although L2 IRQ happened in Kernel mode, stack is still + * That way although L2 IRQ happened in Kernel mode, stack is still * not switched. * To handle this, we may need to switch stack even if in kernel mode * provided SP has values in range of USER mode stack ( < 0x7000_0000 ) @@ -173,7 +173,7 @@ GET_CURR_TASK_ON_CPU r9 - /* With current tsk in r9, get it's kernel mode stack base */ + /* With current tsk in r9, get its kernel mode stack base */ GET_TSK_STACK_BASE r9, r9 /* save U mode SP @ pt_regs->sp */ @@ -282,7 +282,7 @@ * NOTE: * * It is recommended that lp_count/ilink1/ilink2 not be used as a dest reg - * for memory load operations. If used in that way interrupts are deffered + * for memory load operations. If used in that way interrupts are deferred * by hardware and that is not good. *-------------------------------------------------------------*/ .macro EXCEPTION_EPILOGUE @@ -350,7 +350,7 @@ * NOTE: * * It is recommended that lp_count/ilink1/ilink2 not be used as a dest reg - * for memory load operations. If used in that way interrupts are deffered + * for memory load operations. If used in that way interrupts are deferred * by hardware and that is not good. *-------------------------------------------------------------*/ .macro INTERRUPT_EPILOGUE LVL diff --git a/arch/arc/include/asm/entry.h b/arch/arc/include/asm/entry.h index cf1ba376e992c..38c35722cebf0 100644 --- a/arch/arc/include/asm/entry.h +++ b/arch/arc/include/asm/entry.h @@ -7,7 +7,7 @@ #ifndef __ASM_ARC_ENTRY_H #define __ASM_ARC_ENTRY_H -#include /* For NR_syscalls defination */ +#include /* For NR_syscalls definition */ #include #include #include /* For VMALLOC_START */ @@ -56,7 +56,7 @@ .endm /*------------------------------------------------------------- - * given a tsk struct, get to the base of it's kernel mode stack + * given a tsk struct, get to the base of its kernel mode stack * tsk->thread_info is really a PAGE, whose bottom hoists stack * which grows upwards towards thread_info *------------------------------------------------------------*/ diff --git a/arch/arc/include/asm/irq.h b/arch/arc/include/asm/irq.h index c574712ad8658..9cd79263acba8 100644 --- a/arch/arc/include/asm/irq.h +++ b/arch/arc/include/asm/irq.h @@ -10,7 +10,7 @@ * ARCv2 can support 240 interrupts in the core interrupts controllers and * 128 interrupts in IDU. Thus 512 virtual IRQs must be enough for most * configurations of boards. - * This doesnt affect ARCompact, but we change it to same value + * This doesn't affect ARCompact, but we change it to same value */ #define NR_IRQS 512 diff --git a/arch/arc/include/asm/irqflags-compact.h b/arch/arc/include/asm/irqflags-compact.h index 0d63e568d64cb..936a2f21f315e 100644 --- a/arch/arc/include/asm/irqflags-compact.h +++ b/arch/arc/include/asm/irqflags-compact.h @@ -46,7 +46,7 @@ * IRQ Control Macros * * All of them have "memory" clobber (compiler barrier) which is needed to - * ensure that LD/ST requiring irq safetly (R-M-W when LLSC is not available) + * ensure that LD/ST requiring irq safety (R-M-W when LLSC is not available) * are redone after IRQs are re-enabled (and gcc doesn't reuse stale register) * * Noted at the time of Abilis Timer List corruption diff --git a/arch/arc/include/asm/mmu_context.h b/arch/arc/include/asm/mmu_context.h index dda471f5f05bb..9963bb1a5733f 100644 --- a/arch/arc/include/asm/mmu_context.h +++ b/arch/arc/include/asm/mmu_context.h @@ -165,7 +165,7 @@ static inline void switch_mm(struct mm_struct *prev, struct mm_struct *next, * for retiring-mm. However destroy_context( ) still needs to do that because * between mm_release( ) = >deactive_mm( ) and * mmput => .. => __mmdrop( ) => destroy_context( ) - * there is a good chance that task gets sched-out/in, making it's ASID valid + * there is a good chance that task gets sched-out/in, making its ASID valid * again (this teased me for a whole day). */ diff --git a/arch/arc/include/asm/pgtable-bits-arcv2.h b/arch/arc/include/asm/pgtable-bits-arcv2.h index f3eea3f30b2e2..8ebec1b21d246 100644 --- a/arch/arc/include/asm/pgtable-bits-arcv2.h +++ b/arch/arc/include/asm/pgtable-bits-arcv2.h @@ -66,7 +66,7 @@ * Other rules which cause the divergence from 1:1 mapping * * 1. Although ARC700 can do exclusive execute/write protection (meaning R - * can be tracked independet of X/W unlike some other CPUs), still to + * can be tracked independently of X/W unlike some other CPUs), still to * keep things consistent with other archs: * -Write implies Read: W => R * -Execute implies Read: X => R diff --git a/arch/arc/include/asm/shmparam.h b/arch/arc/include/asm/shmparam.h index 8b0251464ffd4..719112af0f41a 100644 --- a/arch/arc/include/asm/shmparam.h +++ b/arch/arc/include/asm/shmparam.h @@ -6,7 +6,7 @@ #ifndef __ARC_ASM_SHMPARAM_H #define __ARC_ASM_SHMPARAM_H -/* Handle upto 2 cache bins */ +/* Handle up to 2 cache bins */ #define SHMLBA (2 * PAGE_SIZE) /* Enforce SHMLBA in shmat */ diff --git a/arch/arc/include/asm/smp.h b/arch/arc/include/asm/smp.h index e0913f52c2cdd..990f834909f08 100644 --- a/arch/arc/include/asm/smp.h +++ b/arch/arc/include/asm/smp.h @@ -77,7 +77,7 @@ static inline const char *arc_platform_smp_cpuinfo(void) /* * ARC700 doesn't support atomic Read-Modify-Write ops. - * Originally Interrupts had to be disabled around code to gaurantee atomicity. + * Originally Interrupts had to be disabled around code to guarantee atomicity. * The LLOCK/SCOND insns allow writing interrupt-hassle-free based atomic ops * based on retry-if-irq-in-atomic (with hardware assist). * However despite these, we provide the IRQ disabling variant @@ -86,7 +86,7 @@ static inline const char *arc_platform_smp_cpuinfo(void) * support needed. * * (2) In a SMP setup, the LLOCK/SCOND atomicity across CPUs needs to be - * gaurantted by the platform (not something which core handles). + * guaranteed by the platform (not something which core handles). * Assuming a platform won't, SMP Linux needs to use spinlocks + local IRQ * disabling for atomicity. * diff --git a/arch/arc/include/asm/thread_info.h b/arch/arc/include/asm/thread_info.h index 4c530cf131f33..12daaf3a61eaf 100644 --- a/arch/arc/include/asm/thread_info.h +++ b/arch/arc/include/asm/thread_info.h @@ -38,7 +38,7 @@ struct thread_info { unsigned long flags; /* low level flags */ unsigned long ksp; /* kernel mode stack top in __switch_to */ - int preempt_count; /* 0 => preemptable, <0 => BUG */ + int preempt_count; /* 0 => preemptible, <0 => BUG */ int cpu; /* current CPU */ unsigned long thr_ptr; /* TLS ptr */ struct task_struct *task; /* main task structure */ diff --git a/arch/arc/include/uapi/asm/swab.h b/arch/arc/include/uapi/asm/swab.h index 02109cd48ee12..8d1f1ef44ba75 100644 --- a/arch/arc/include/uapi/asm/swab.h +++ b/arch/arc/include/uapi/asm/swab.h @@ -62,7 +62,7 @@ * 8051fdc4: st r2,[r1,20] ; Mem op : save result back to mem * * Joern suggested a better "C" algorithm which is great since - * (1) It is portable to any architecure + * (1) It is portable to any architecture * (2) At the same time it takes advantage of ARC ISA (rotate intrns) */ diff --git a/arch/arc/kernel/entry-arcv2.S b/arch/arc/kernel/entry-arcv2.S index 2e49c81c8086b..e238b5fd3c8cf 100644 --- a/arch/arc/kernel/entry-arcv2.S +++ b/arch/arc/kernel/entry-arcv2.S @@ -5,7 +5,7 @@ * Copyright (C) 2013 Synopsys, Inc. (www.synopsys.com) */ -#include /* ARC_{EXTRY,EXIT} */ +#include /* ARC_{ENTRY,EXIT} */ #include /* SAVE_ALL_{INT1,INT2,TRAP...} */ #include #include @@ -31,7 +31,7 @@ VECTOR res_service ; Reset Vector VECTOR mem_service ; Mem exception VECTOR instr_service ; Instrn Error VECTOR EV_MachineCheck ; Fatal Machine check -VECTOR EV_TLBMissI ; Intruction TLB miss +VECTOR EV_TLBMissI ; Instruction TLB miss VECTOR EV_TLBMissD ; Data TLB miss VECTOR EV_TLBProtV ; Protection Violation VECTOR EV_PrivilegeV ; Privilege Violation @@ -76,11 +76,11 @@ ENTRY(handle_interrupt) # query in hard ISR path would return false (since .IE is set) which would # trips genirq interrupt handling asserts. # - # So do a "soft" disable of interrutps here. + # So do a "soft" disable of interrupts here. # # Note this disable is only for consistent book-keeping as further interrupts # will be disabled anyways even w/o this. Hardware tracks active interrupts - # seperately in AUX_IRQ_ACT.active and will not take new interrupts + # separately in AUX_IRQ_ACT.active and will not take new interrupts # unless this one returns (or higher prio becomes pending in 2-prio scheme) IRQ_DISABLE diff --git a/arch/arc/kernel/entry.S b/arch/arc/kernel/entry.S index 089f6680518f8..3c7e74aba6794 100644 --- a/arch/arc/kernel/entry.S +++ b/arch/arc/kernel/entry.S @@ -95,7 +95,7 @@ ENTRY(EV_MachineCheck) lr r0, [efa] mov r1, sp - ; MC excpetions disable MMU + ; MC exceptions disable MMU ARC_MMU_REENABLE r3 lsr r3, r10, 8 @@ -209,7 +209,7 @@ trap_with_param: ; --------------------------------------------- ; syscall TRAP -; ABI: (r0-r7) upto 8 args, (r8) syscall number +; ABI: (r0-r7) up to 8 args, (r8) syscall number ; --------------------------------------------- ENTRY(EV_Trap) diff --git a/arch/arc/kernel/head.S b/arch/arc/kernel/head.S index 9152782444b55..8d541f53fae3e 100644 --- a/arch/arc/kernel/head.S +++ b/arch/arc/kernel/head.S @@ -165,7 +165,7 @@ ENTRY(first_lines_of_secondary) ; setup stack (fp, sp) mov fp, 0 - ; set it's stack base to tsk->thread_info bottom + ; set its stack base to tsk->thread_info bottom GET_TSK_STACK_BASE r0, sp j start_kernel_secondary diff --git a/arch/arc/kernel/intc-arcv2.c b/arch/arc/kernel/intc-arcv2.c index 678898757e473..f324f0e3341a3 100644 --- a/arch/arc/kernel/intc-arcv2.c +++ b/arch/arc/kernel/intc-arcv2.c @@ -56,7 +56,7 @@ void arc_init_IRQ(void) WRITE_AUX(AUX_IRQ_CTRL, ictrl); /* - * ARCv2 core intc provides multiple interrupt priorities (upto 16). + * ARCv2 core intc provides multiple interrupt priorities (up to 16). * Typical builds though have only two levels (0-high, 1-low) * Linux by default uses lower prio 1 for most irqs, reserving 0 for * NMI style interrupts in future (say perf) diff --git a/arch/arc/kernel/perf_event.c b/arch/arc/kernel/perf_event.c index adff957962da8..6e5a651cd75cf 100644 --- a/arch/arc/kernel/perf_event.c +++ b/arch/arc/kernel/perf_event.c @@ -38,7 +38,7 @@ * (based on a specific RTL build) * Below is the static map between perf generic/arc specific event_id and * h/w condition names. - * At the time of probe, we loop thru each index and find it's name to + * At the time of probe, we loop thru each index and find its name to * complete the mapping of perf event_id to h/w index as latter is needed * to program the counter really */ diff --git a/arch/arc/kernel/setup.c b/arch/arc/kernel/setup.c index d08a5092c2b4d..7b6a9beba9db6 100644 --- a/arch/arc/kernel/setup.c +++ b/arch/arc/kernel/setup.c @@ -390,7 +390,7 @@ static void arc_chk_core_config(struct cpuinfo_arc *info) #ifdef CONFIG_ARC_HAS_DCCM /* * DCCM can be arbit placed in hardware. - * Make sure it's placement/sz matches what Linux is built with + * Make sure its placement/sz matches what Linux is built with */ if ((unsigned int)__arc_dccm_base != info->dccm.base) panic("Linux built with incorrect DCCM Base address\n"); diff --git a/arch/arc/kernel/signal.c b/arch/arc/kernel/signal.c index 8f6f4a5429646..fefa705a86385 100644 --- a/arch/arc/kernel/signal.c +++ b/arch/arc/kernel/signal.c @@ -8,15 +8,16 @@ * * vineetg: Nov 2009 (Everything needed for TIF_RESTORE_SIGMASK) * -do_signal() supports TIF_RESTORE_SIGMASK - * -do_signal() no loner needs oldset, required by OLD sys_sigsuspend - * -sys_rt_sigsuspend() now comes from generic code, so discard arch implemen + * -do_signal() no longer needs oldset, required by OLD sys_sigsuspend + * -sys_rt_sigsuspend() now comes from generic code, so discard arch + * implementation * -sys_sigsuspend() no longer needs to fudge ptregs, hence that arg removed * -sys_sigsuspend() no longer loops for do_signal(), sets TIF_xxx and leaves * the job to do_signal() * * vineetg: July 2009 * -Modified Code to support the uClibc provided userland sigreturn stub - * to avoid kernel synthesing it on user stack at runtime, costing TLB + * to avoid kernel synthesizing it on user stack at runtime, costing TLB * probes and Cache line flushes. * * vineetg: July 2009 diff --git a/arch/arc/kernel/traps.c b/arch/arc/kernel/traps.c index 9b9570b79362e..a19751e824fb4 100644 --- a/arch/arc/kernel/traps.c +++ b/arch/arc/kernel/traps.c @@ -89,7 +89,7 @@ int do_misaligned_access(unsigned long address, struct pt_regs *regs, /* * Entry point for miscll errors such as Nested Exceptions - * -Duplicate TLB entry is handled seperately though + * -Duplicate TLB entry is handled separately though */ void do_machine_check_fault(unsigned long address, struct pt_regs *regs) { diff --git a/arch/arc/kernel/vmlinux.lds.S b/arch/arc/kernel/vmlinux.lds.S index 549c3f4079186..61a1b2b96e1d8 100644 --- a/arch/arc/kernel/vmlinux.lds.S +++ b/arch/arc/kernel/vmlinux.lds.S @@ -41,8 +41,8 @@ SECTIONS #endif /* - * The reason for having a seperate subsection .init.ramfs is to - * prevent objump from including it in kernel dumps + * The reason for having a separate subsection .init.ramfs is to + * prevent objdump from including it in kernel dumps * * Reason for having .init.ramfs above .init is to make sure that the * binary blob is tucked away to one side, reducing the displacement diff --git a/arch/arc/mm/tlb.c b/arch/arc/mm/tlb.c index ad702b49aeb3b..cae4a7aae0ed4 100644 --- a/arch/arc/mm/tlb.c +++ b/arch/arc/mm/tlb.c @@ -212,7 +212,7 @@ void local_flush_tlb_range(struct vm_area_struct *vma, unsigned long start, unsigned long flags; /* If range @start to @end is more than 32 TLB entries deep, - * its better to move to a new ASID rather than searching for + * it's better to move to a new ASID rather than searching for * individual entries and then shooting them down * * The calc above is rough, doesn't account for unaligned parts, @@ -408,7 +408,7 @@ static void create_tlb(struct vm_area_struct *vma, unsigned long vaddr, pte_t *p * -More importantly it makes this handler inconsistent with fast-path * TLB Refill handler which always deals with "current" * - * Lets see the use cases when current->mm != vma->mm and we land here + * Let's see the use cases when current->mm != vma->mm and we land here * 1. execve->copy_strings()->__get_user_pages->handle_mm_fault * Here VM wants to pre-install a TLB entry for user stack while * current->mm still points to pre-execve mm (hence the condition). diff --git a/arch/arc/mm/tlbex.S b/arch/arc/mm/tlbex.S index e054780a8fe0c..dc65e87a531fd 100644 --- a/arch/arc/mm/tlbex.S +++ b/arch/arc/mm/tlbex.S @@ -5,19 +5,19 @@ * Copyright (C) 2004, 2007-2010, 2011-2012 Synopsys, Inc. (www.synopsys.com) * * Vineetg: April 2011 : - * -MMU v1: moved out legacy code into a seperate file + * -MMU v1: moved out legacy code into a separate file * -MMU v3: PD{0,1} bits layout changed: They don't overlap anymore, * helps avoid a shift when preparing PD0 from PTE * * Vineetg: July 2009 - * -For MMU V2, we need not do heuristics at the time of commiting a D-TLB - * entry, so that it doesn't knock out it's I-TLB entry + * -For MMU V2, we need not do heuristics at the time of committing a D-TLB + * entry, so that it doesn't knock out its I-TLB entry * -Some more fine tuning: * bmsk instead of add, asl.cc instead of branch, delay slot utilise etc * * Vineetg: July 2009 * -Practically rewrote the I/D TLB Miss handlers - * Now 40 and 135 instructions a peice as compared to 131 and 449 resp. + * Now 40 and 135 instructions apiece as compared to 131 and 449 resp. * Hence Leaner by 1.5 K * Used Conditional arithmetic to replace excessive branching * Also used short instructions wherever possible From f8def10f73a516b771051a2f70f2f0446902cb4f Mon Sep 17 00:00:00 2001 From: Mantas Pucka Date: Thu, 21 Mar 2024 14:30:01 +0000 Subject: [PATCH 034/606] mmc: sdhci-msm: pervent access to suspended controller Generic sdhci code registers LED device and uses host->runtime_suspended flag to protect access to it. The sdhci-msm driver doesn't set this flag, which causes a crash when LED is accessed while controller is runtime suspended. Fix this by setting the flag correctly. Cc: stable@vger.kernel.org Fixes: 67e6db113c90 ("mmc: sdhci-msm: Add pm_runtime and system PM support") Signed-off-by: Mantas Pucka Acked-by: Adrian Hunter Link: https://lore.kernel.org/r/20240321-sdhci-mmc-suspend-v1-1-fbc555a64400@8devices.com Signed-off-by: Ulf Hansson --- drivers/mmc/host/sdhci-msm.c | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) diff --git a/drivers/mmc/host/sdhci-msm.c b/drivers/mmc/host/sdhci-msm.c index 668e0aceeebac..e113b99a3eab5 100644 --- a/drivers/mmc/host/sdhci-msm.c +++ b/drivers/mmc/host/sdhci-msm.c @@ -2694,6 +2694,11 @@ static __maybe_unused int sdhci_msm_runtime_suspend(struct device *dev) struct sdhci_host *host = dev_get_drvdata(dev); struct sdhci_pltfm_host *pltfm_host = sdhci_priv(host); struct sdhci_msm_host *msm_host = sdhci_pltfm_priv(pltfm_host); + unsigned long flags; + + spin_lock_irqsave(&host->lock, flags); + host->runtime_suspended = true; + spin_unlock_irqrestore(&host->lock, flags); /* Drop the performance vote */ dev_pm_opp_set_rate(dev, 0); @@ -2708,6 +2713,7 @@ static __maybe_unused int sdhci_msm_runtime_resume(struct device *dev) struct sdhci_host *host = dev_get_drvdata(dev); struct sdhci_pltfm_host *pltfm_host = sdhci_priv(host); struct sdhci_msm_host *msm_host = sdhci_pltfm_priv(pltfm_host); + unsigned long flags; int ret; ret = clk_bulk_prepare_enable(ARRAY_SIZE(msm_host->bulk_clks), @@ -2726,7 +2732,15 @@ static __maybe_unused int sdhci_msm_runtime_resume(struct device *dev) dev_pm_opp_set_rate(dev, msm_host->clk_rate); - return sdhci_msm_ice_resume(msm_host); + ret = sdhci_msm_ice_resume(msm_host); + if (ret) + return ret; + + spin_lock_irqsave(&host->lock, flags); + host->runtime_suspended = false; + spin_unlock_irqrestore(&host->lock, flags); + + return ret; } static const struct dev_pm_ops sdhci_msm_pm_ops = { From 0e60f0b75884677fb9f4f2ad40d52b43451564d5 Mon Sep 17 00:00:00 2001 From: Max Filippov Date: Sat, 17 Feb 2024 05:15:42 -0800 Subject: [PATCH 035/606] xtensa: fix MAKE_PC_FROM_RA second argument Xtensa has two-argument MAKE_PC_FROM_RA macro to convert a0 to an actual return address because when windowed ABI is used call{,x}{4,8,12} opcodes stuff encoded window size into the top 2 bits of the register that becomes a return address in the called function. Second argument of that macro is supposed to be an address having these 2 topmost bits set correctly, but the comment suggested that that could be the stack address. However the stack doesn't have to be in the same 1GByte region as the code, especially in noMMU XIP configurations. Fix the comment and use either _text or regs->pc as the second argument for the MAKE_PC_FROM_RA macro. Cc: stable@vger.kernel.org Signed-off-by: Max Filippov --- arch/xtensa/include/asm/processor.h | 8 ++++---- arch/xtensa/include/asm/ptrace.h | 2 +- arch/xtensa/kernel/process.c | 5 +++-- arch/xtensa/kernel/stacktrace.c | 3 ++- 4 files changed, 10 insertions(+), 8 deletions(-) diff --git a/arch/xtensa/include/asm/processor.h b/arch/xtensa/include/asm/processor.h index d008a153a2b9f..7ed1a2085bd72 100644 --- a/arch/xtensa/include/asm/processor.h +++ b/arch/xtensa/include/asm/processor.h @@ -115,9 +115,9 @@ #define MAKE_RA_FOR_CALL(ra,ws) (((ra) & 0x3fffffff) | (ws) << 30) /* Convert return address to a valid pc - * Note: We assume that the stack pointer is in the same 1GB ranges as the ra + * Note: 'text' is the address within the same 1GB range as the ra */ -#define MAKE_PC_FROM_RA(ra,sp) (((ra) & 0x3fffffff) | ((sp) & 0xc0000000)) +#define MAKE_PC_FROM_RA(ra, text) (((ra) & 0x3fffffff) | ((unsigned long)(text) & 0xc0000000)) #elif defined(__XTENSA_CALL0_ABI__) @@ -127,9 +127,9 @@ #define MAKE_RA_FOR_CALL(ra, ws) (ra) /* Convert return address to a valid pc - * Note: We assume that the stack pointer is in the same 1GB ranges as the ra + * Note: 'text' is not used as 'ra' is always the full address */ -#define MAKE_PC_FROM_RA(ra, sp) (ra) +#define MAKE_PC_FROM_RA(ra, text) (ra) #else #error Unsupported Xtensa ABI diff --git a/arch/xtensa/include/asm/ptrace.h b/arch/xtensa/include/asm/ptrace.h index a270467556dc8..86c70117371bb 100644 --- a/arch/xtensa/include/asm/ptrace.h +++ b/arch/xtensa/include/asm/ptrace.h @@ -87,7 +87,7 @@ struct pt_regs { # define user_mode(regs) (((regs)->ps & 0x00000020)!=0) # define instruction_pointer(regs) ((regs)->pc) # define return_pointer(regs) (MAKE_PC_FROM_RA((regs)->areg[0], \ - (regs)->areg[1])) + (regs)->pc)) # ifndef CONFIG_SMP # define profile_pc(regs) instruction_pointer(regs) diff --git a/arch/xtensa/kernel/process.c b/arch/xtensa/kernel/process.c index a815577d25fd0..7bd66677f7b6d 100644 --- a/arch/xtensa/kernel/process.c +++ b/arch/xtensa/kernel/process.c @@ -47,6 +47,7 @@ #include #include #include +#include #include extern void ret_from_fork(void); @@ -380,7 +381,7 @@ unsigned long __get_wchan(struct task_struct *p) int count = 0; sp = p->thread.sp; - pc = MAKE_PC_FROM_RA(p->thread.ra, p->thread.sp); + pc = MAKE_PC_FROM_RA(p->thread.ra, _text); do { if (sp < stack_page + sizeof(struct task_struct) || @@ -392,7 +393,7 @@ unsigned long __get_wchan(struct task_struct *p) /* Stack layout: sp-4: ra, sp-3: sp' */ - pc = MAKE_PC_FROM_RA(SPILL_SLOT(sp, 0), sp); + pc = MAKE_PC_FROM_RA(SPILL_SLOT(sp, 0), _text); sp = SPILL_SLOT(sp, 1); } while (count++ < 16); return 0; diff --git a/arch/xtensa/kernel/stacktrace.c b/arch/xtensa/kernel/stacktrace.c index 831ffb648bda7..ed324fdf2a2f9 100644 --- a/arch/xtensa/kernel/stacktrace.c +++ b/arch/xtensa/kernel/stacktrace.c @@ -13,6 +13,7 @@ #include #include +#include #include #include #include @@ -189,7 +190,7 @@ void walk_stackframe(unsigned long *sp, if (a1 <= (unsigned long)sp) break; - frame.pc = MAKE_PC_FROM_RA(a0, a1); + frame.pc = MAKE_PC_FROM_RA(a0, _text); frame.sp = a1; if (fn(&frame, data)) From 6677196fb1932e60b88ad0794a7ae532df178654 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Mon, 25 Mar 2024 09:58:35 +0100 Subject: [PATCH 036/606] clk: qcom: gdsc: treat optional supplies as optional Since commit deebc79b28d6 ("clk: qcom: gpucc-sc8280xp: Add external supply for GX gdsc") the GDSC supply must be treated as optional to avoid warnings like: gpu_cc-sc8280xp 3d90000.clock-controller: supply vdd-gfx not found, using dummy regulator on SC8280XP. Fortunately, the driver is already prepared to handle this by checking that the regulator pointer is non-NULL before use. This also avoids triggering a potential deadlock on SC8280XP even if the underlying issue still remains for the derivative platforms like SA8295P that actually use the supply. Fixes: deebc79b28d6 ("clk: qcom: gpucc-sc8280xp: Add external supply for GX gdsc") Link: https://lore.kernel.org/lkml/Zf25Sv2x9WaCFuIH@hovoldconsulting.com/ Signed-off-by: Johan Hovold Reviewed-by: Bjorn Andersson Link: https://lore.kernel.org/r/20240325085835.26158-1-johan+linaro@kernel.org Signed-off-by: Bjorn Andersson --- drivers/clk/qcom/gdsc.c | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/drivers/clk/qcom/gdsc.c b/drivers/clk/qcom/gdsc.c index e7a4068b9f390..df9618ab7eea1 100644 --- a/drivers/clk/qcom/gdsc.c +++ b/drivers/clk/qcom/gdsc.c @@ -487,9 +487,14 @@ int gdsc_register(struct gdsc_desc *desc, if (!scs[i] || !scs[i]->supply) continue; - scs[i]->rsupply = devm_regulator_get(dev, scs[i]->supply); - if (IS_ERR(scs[i]->rsupply)) - return PTR_ERR(scs[i]->rsupply); + scs[i]->rsupply = devm_regulator_get_optional(dev, scs[i]->supply); + if (IS_ERR(scs[i]->rsupply)) { + ret = PTR_ERR(scs[i]->rsupply); + if (ret != -ENODEV) + return ret; + + scs[i]->rsupply = NULL; + } } data->num_domains = num; From 0d4ce2458cd7d1d66a5ee2f3c036592fb663d5bc Mon Sep 17 00:00:00 2001 From: Adam Skladowski Date: Mon, 1 Apr 2024 19:16:39 +0200 Subject: [PATCH 037/606] clk: qcom: smd-rpm: Restore msm8976 num_clk During rework somehow msm8976 num_clk got removed, restore it. Fixes: d6edc31f3a68 ("clk: qcom: smd-rpm: Separate out interconnect bus clocks") Signed-off-by: Adam Skladowski Reviewed-by: Konrad Dybcio Link: https://lore.kernel.org/r/20240401171641.8979-1-a39.skl@gmail.com Signed-off-by: Bjorn Andersson --- drivers/clk/qcom/clk-smd-rpm.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/clk/qcom/clk-smd-rpm.c b/drivers/clk/qcom/clk-smd-rpm.c index 8602c02047d04..45c5255bcd11b 100644 --- a/drivers/clk/qcom/clk-smd-rpm.c +++ b/drivers/clk/qcom/clk-smd-rpm.c @@ -768,6 +768,7 @@ static struct clk_smd_rpm *msm8976_clks[] = { static const struct rpm_smd_clk_desc rpm_clk_msm8976 = { .clks = msm8976_clks, + .num_clks = ARRAY_SIZE(msm8976_clks), .icc_clks = bimc_pcnoc_snoc_smmnoc_icc_clks, .num_icc_clks = ARRAY_SIZE(bimc_pcnoc_snoc_smmnoc_icc_clks), }; From 1781f2c461804c0123f59afc7350e520a88edffb Mon Sep 17 00:00:00 2001 From: Ikjoon Jang Date: Fri, 23 Feb 2024 17:11:21 +0800 Subject: [PATCH 038/606] arm64: dts: mediatek: mt8183: Add power-domains properity to mfgcfg mfgcfg clock is under MFG_ASYNC power domain. Fixes: e526c9bc11f8 ("arm64: dts: Add Mediatek SoC MT8183 and evaluation board dts and Makefile") Fixes: 37fb78b9aeb7 ("arm64: dts: mediatek: Add mt8183 power domains controller") Signed-off-by: Weiyi Lu Signed-off-by: Ikjoon Jang Reviewed-by: Enric Balletbo i Serra Signed-off-by: Chen-Yu Tsai Link: https://lore.kernel.org/r/20240223091122.2430037-1-wenst@chromium.org Signed-off-by: AngeloGioacchino Del Regno --- arch/arm64/boot/dts/mediatek/mt8183.dtsi | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/arm64/boot/dts/mediatek/mt8183.dtsi b/arch/arm64/boot/dts/mediatek/mt8183.dtsi index 93dfbf1302315..774ae5d9143f1 100644 --- a/arch/arm64/boot/dts/mediatek/mt8183.dtsi +++ b/arch/arm64/boot/dts/mediatek/mt8183.dtsi @@ -1637,6 +1637,7 @@ compatible = "mediatek,mt8183-mfgcfg", "syscon"; reg = <0 0x13000000 0 0x1000>; #clock-cells = <1>; + power-domains = <&spm MT8183_POWER_DOMAIN_MFG_ASYNC>; }; gpu: gpu@13040000 { From 00bcc8810d9dd69d3899a4189e2f3964f263a600 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?N=C3=ADcolas=20F=2E=20R=2E=20A=2E=20Prado?= Date: Thu, 29 Feb 2024 14:44:28 -0500 Subject: [PATCH 039/606] arm64: dts: mediatek: mt8192: Add missing gce-client-reg to mutex MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add the missing mediatek,gce-client-reg property to the mutex node to allow it to use the GCE. This prevents the "can't parse gce-client-reg property" error from being printed and should result in better performance. Fixes: b4b75bac952b ("arm64: dts: mt8192: Add display nodes") Suggested-by: AngeloGioacchino Del Regno Signed-off-by: Nícolas F. R. A. Prado Reviewed-by: AngeloGioacchino Del Regno Link: https://lore.kernel.org/r/20240229-gce-client-reg-add-missing-mt8192-95-v1-1-b12c233a8a33@collabora.com Signed-off-by: AngeloGioacchino Del Regno --- arch/arm64/boot/dts/mediatek/mt8192.dtsi | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/arm64/boot/dts/mediatek/mt8192.dtsi b/arch/arm64/boot/dts/mediatek/mt8192.dtsi index 05e401670bced..84cbdf6e9eb0c 100644 --- a/arch/arm64/boot/dts/mediatek/mt8192.dtsi +++ b/arch/arm64/boot/dts/mediatek/mt8192.dtsi @@ -1464,6 +1464,7 @@ reg = <0 0x14001000 0 0x1000>; interrupts = ; clocks = <&mmsys CLK_MM_DISP_MUTEX0>; + mediatek,gce-client-reg = <&gce SUBSYS_1400XXXX 0x1000 0x1000>; mediatek,gce-events = , ; power-domains = <&spm MT8192_POWER_DOMAIN_DISP>; From 96b0c1528ef41fe754f5d1378b1db6c098a2e33f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?N=C3=ADcolas=20F=2E=20R=2E=20A=2E=20Prado?= Date: Thu, 29 Feb 2024 14:44:29 -0500 Subject: [PATCH 040/606] arm64: dts: mediatek: mt8195: Add missing gce-client-reg to vpp/vdosys MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add the missing mediatek,gce-client-reg property to the vppsys and vdosys nodes to allow them to use the GCE. This prevents the "can't parse gce-client-reg property" error from being printed and should result in better performance. Fixes: 6aa5b46d1755 ("arm64: dts: mt8195: Add vdosys and vppsys clock nodes") Suggested-by: AngeloGioacchino Del Regno Signed-off-by: Nícolas F. R. A. Prado Link: https://lore.kernel.org/r/20240229-gce-client-reg-add-missing-mt8192-95-v1-2-b12c233a8a33@collabora.com Signed-off-by: AngeloGioacchino Del Regno --- arch/arm64/boot/dts/mediatek/mt8195.dtsi | 3 +++ 1 file changed, 3 insertions(+) diff --git a/arch/arm64/boot/dts/mediatek/mt8195.dtsi b/arch/arm64/boot/dts/mediatek/mt8195.dtsi index ea6dc220e1cce..f2912e1a0e49e 100644 --- a/arch/arm64/boot/dts/mediatek/mt8195.dtsi +++ b/arch/arm64/boot/dts/mediatek/mt8195.dtsi @@ -2028,6 +2028,7 @@ compatible = "mediatek,mt8195-vppsys0", "syscon"; reg = <0 0x14000000 0 0x1000>; #clock-cells = <1>; + mediatek,gce-client-reg = <&gce1 SUBSYS_1400XXXX 0 0x1000>; }; dma-controller@14001000 { @@ -2251,6 +2252,7 @@ compatible = "mediatek,mt8195-vppsys1", "syscon"; reg = <0 0x14f00000 0 0x1000>; #clock-cells = <1>; + mediatek,gce-client-reg = <&gce1 SUBSYS_14f0XXXX 0 0x1000>; }; mutex@14f01000 { @@ -3080,6 +3082,7 @@ reg = <0 0x1c01a000 0 0x1000>; mboxes = <&gce0 0 CMDQ_THR_PRIO_4>; #clock-cells = <1>; + mediatek,gce-client-reg = <&gce0 SUBSYS_1c01XXXX 0xa000 0x1000>; }; From 3b129949184a1251e6a42db714f6d68b75fabedd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?N=C3=ADcolas=20F=2E=20R=2E=20A=2E=20Prado?= Date: Thu, 29 Feb 2024 14:44:30 -0500 Subject: [PATCH 041/606] arm64: dts: mediatek: mt8195: Add missing gce-client-reg to mutex MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add the missing mediatek,gce-client-reg property to the mutex node to allow it to use the GCE. This prevents the "can't parse gce-client-reg property" error from being printed and should result in better performance. Fixes: b852ee68fd72 ("arm64: dts: mt8195: Add display node for vdosys0") Suggested-by: AngeloGioacchino Del Regno Signed-off-by: Nícolas F. R. A. Prado Link: https://lore.kernel.org/r/20240229-gce-client-reg-add-missing-mt8192-95-v1-3-b12c233a8a33@collabora.com Signed-off-by: AngeloGioacchino Del Regno --- arch/arm64/boot/dts/mediatek/mt8195.dtsi | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/arm64/boot/dts/mediatek/mt8195.dtsi b/arch/arm64/boot/dts/mediatek/mt8195.dtsi index f2912e1a0e49e..fd074103979c6 100644 --- a/arch/arm64/boot/dts/mediatek/mt8195.dtsi +++ b/arch/arm64/boot/dts/mediatek/mt8195.dtsi @@ -3264,6 +3264,7 @@ interrupts = ; power-domains = <&spm MT8195_POWER_DOMAIN_VDOSYS0>; clocks = <&vdosys0 CLK_VDO0_DISP_MUTEX0>; + mediatek,gce-client-reg = <&gce0 SUBSYS_1c01XXXX 0x6000 0x1000>; mediatek,gce-events = ; }; From 58f126296c3c52d02bf3fad1f68c331d718c4a9b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?N=C3=ADcolas=20F=2E=20R=2E=20A=2E=20Prado?= Date: Thu, 29 Feb 2024 14:44:31 -0500 Subject: [PATCH 042/606] arm64: dts: mediatek: mt8195: Add missing gce-client-reg to mutex1 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add the missing mediatek,gce-client-reg property to the mutex1 node to allow it to use the GCE. This prevents the "can't parse gce-client-reg property" error from being printed and should result in better performance. Fixes: 92d2c23dc269 ("arm64: dts: mt8195: add display node for vdosys1") Suggested-by: AngeloGioacchino Del Regno Signed-off-by: Nícolas F. R. A. Prado Link: https://lore.kernel.org/r/20240229-gce-client-reg-add-missing-mt8192-95-v1-4-b12c233a8a33@collabora.com Signed-off-by: AngeloGioacchino Del Regno --- arch/arm64/boot/dts/mediatek/mt8195.dtsi | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/arm64/boot/dts/mediatek/mt8195.dtsi b/arch/arm64/boot/dts/mediatek/mt8195.dtsi index fd074103979c6..5d8b68f86ce44 100644 --- a/arch/arm64/boot/dts/mediatek/mt8195.dtsi +++ b/arch/arm64/boot/dts/mediatek/mt8195.dtsi @@ -3335,6 +3335,7 @@ power-domains = <&spm MT8195_POWER_DOMAIN_VDOSYS1>; clocks = <&vdosys1 CLK_VDO1_DISP_MUTEX>; clock-names = "vdo1_mutex"; + mediatek,gce-client-reg = <&gce0 SUBSYS_1c10XXXX 0x1000 0x1000>; mediatek,gce-events = ; }; From 17b33dd9e4a38fbaca87c68e532b52f9d0492ba7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?N=C3=ADcolas=20F=2E=20R=2E=20A=2E=20Prado?= Date: Wed, 10 Jan 2024 11:23:01 -0300 Subject: [PATCH 043/606] arm64: dts: mediatek: cherry: Describe CPU supplies MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Describe in each CPU node the regulator supplying it. Fixes: 260c04d425eb ("arm64: dts: mediatek: cherry: Enable MT6315 regulators on SPMI bus") Signed-off-by: Nícolas F. R. A. Prado Reviewed-by: AngeloGioacchino Del Regno Link: https://lore.kernel.org/r/20240110142305.755367-2-nfraprado@collabora.com Signed-off-by: AngeloGioacchino Del Regno --- .../boot/dts/mediatek/mt8195-cherry.dtsi | 32 +++++++++++++++++++ 1 file changed, 32 insertions(+) diff --git a/arch/arm64/boot/dts/mediatek/mt8195-cherry.dtsi b/arch/arm64/boot/dts/mediatek/mt8195-cherry.dtsi index f94c07f8b9334..f12322eac3898 100644 --- a/arch/arm64/boot/dts/mediatek/mt8195-cherry.dtsi +++ b/arch/arm64/boot/dts/mediatek/mt8195-cherry.dtsi @@ -264,6 +264,38 @@ status = "okay"; }; +&cpu0 { + cpu-supply = <&mt6359_vcore_buck_reg>; +}; + +&cpu1 { + cpu-supply = <&mt6359_vcore_buck_reg>; +}; + +&cpu2 { + cpu-supply = <&mt6359_vcore_buck_reg>; +}; + +&cpu3 { + cpu-supply = <&mt6359_vcore_buck_reg>; +}; + +&cpu4 { + cpu-supply = <&mt6315_6_vbuck1>; +}; + +&cpu5 { + cpu-supply = <&mt6315_6_vbuck1>; +}; + +&cpu6 { + cpu-supply = <&mt6315_6_vbuck1>; +}; + +&cpu7 { + cpu-supply = <&mt6315_6_vbuck1>; +}; + &dp_intf0 { status = "okay"; From 374a7c6400e314458178255a63c37d6347845092 Mon Sep 17 00:00:00 2001 From: Pin-yen Lin Date: Fri, 15 Mar 2024 19:16:02 +0800 Subject: [PATCH 044/606] arm64: dts: mediatek: mt8192-asurada: Update min voltage constraint for MT6315 Update the minimum voltage from 300000 uV to 400000 uV so it matches the MT6315 datasheet. Also update the minimum voltage for Vgpu regulator from 606250 uV to 400000 uV because the requested voltage could be lower than the minimum voltage on the GPU OPP table when the MTK Smart Voltage Scaling (SVS) driver is enabled. Fixes: 3183cb62b033 ("arm64: dts: mediatek: asurada: Add SPMI regulators") Signed-off-by: Pin-yen Lin Reviewed-by: AngeloGioacchino Del Regno Link: https://lore.kernel.org/r/20240315111621.2263159-2-treapking@chromium.org Signed-off-by: AngeloGioacchino Del Regno --- arch/arm64/boot/dts/mediatek/mt8192-asurada.dtsi | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/arch/arm64/boot/dts/mediatek/mt8192-asurada.dtsi b/arch/arm64/boot/dts/mediatek/mt8192-asurada.dtsi index 9b738f6a5d213..7a704246678f0 100644 --- a/arch/arm64/boot/dts/mediatek/mt8192-asurada.dtsi +++ b/arch/arm64/boot/dts/mediatek/mt8192-asurada.dtsi @@ -1421,7 +1421,7 @@ mt6315_6_vbuck1: vbuck1 { regulator-compatible = "vbuck1"; regulator-name = "Vbcpu"; - regulator-min-microvolt = <300000>; + regulator-min-microvolt = <400000>; regulator-max-microvolt = <1193750>; regulator-enable-ramp-delay = <256>; regulator-allowed-modes = <0 1 2>; @@ -1431,7 +1431,7 @@ mt6315_6_vbuck3: vbuck3 { regulator-compatible = "vbuck3"; regulator-name = "Vlcpu"; - regulator-min-microvolt = <300000>; + regulator-min-microvolt = <400000>; regulator-max-microvolt = <1193750>; regulator-enable-ramp-delay = <256>; regulator-allowed-modes = <0 1 2>; @@ -1448,7 +1448,7 @@ mt6315_7_vbuck1: vbuck1 { regulator-compatible = "vbuck1"; regulator-name = "Vgpu"; - regulator-min-microvolt = <606250>; + regulator-min-microvolt = <400000>; regulator-max-microvolt = <800000>; regulator-enable-ramp-delay = <256>; regulator-allowed-modes = <0 1 2>; From e9a6b8b5c61350535c7eb5ea9b2dde0d5745bd1b Mon Sep 17 00:00:00 2001 From: Pin-yen Lin Date: Fri, 15 Mar 2024 19:16:03 +0800 Subject: [PATCH 045/606] arm64: dts: mediatek: mt8195-cherry: Update min voltage constraint for MT6315 Update the minimum voltage from 300000 uV to 400000 uV so it matches the MT6315 datasheet. Also update the minimum voltage for Vgpu regulator from 625000 uV to 400000 uV because the requested voltage could be lower than the minimum voltage on the GPU OPP table when the MTK Smart Voltage Scaling (SVS) driver is enabled. Fixes: 260c04d425eb ("arm64: dts: mediatek: cherry: Enable MT6315 regulators on SPMI bus") Signed-off-by: Pin-yen Lin Reviewed-by: AngeloGioacchino Del Regno Link: https://lore.kernel.org/r/20240315111621.2263159-3-treapking@chromium.org Signed-off-by: AngeloGioacchino Del Regno --- arch/arm64/boot/dts/mediatek/mt8195-cherry.dtsi | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/arm64/boot/dts/mediatek/mt8195-cherry.dtsi b/arch/arm64/boot/dts/mediatek/mt8195-cherry.dtsi index f12322eac3898..4a11918da3704 100644 --- a/arch/arm64/boot/dts/mediatek/mt8195-cherry.dtsi +++ b/arch/arm64/boot/dts/mediatek/mt8195-cherry.dtsi @@ -1246,7 +1246,7 @@ mt6315_6_vbuck1: vbuck1 { regulator-compatible = "vbuck1"; regulator-name = "Vbcpu"; - regulator-min-microvolt = <300000>; + regulator-min-microvolt = <400000>; regulator-max-microvolt = <1193750>; regulator-enable-ramp-delay = <256>; regulator-ramp-delay = <6250>; @@ -1264,7 +1264,7 @@ mt6315_7_vbuck1: vbuck1 { regulator-compatible = "vbuck1"; regulator-name = "Vgpu"; - regulator-min-microvolt = <625000>; + regulator-min-microvolt = <400000>; regulator-max-microvolt = <1193750>; regulator-enable-ramp-delay = <256>; regulator-ramp-delay = <6250>; From 296118a8dc297de47d9b3a364b9743f8446bd612 Mon Sep 17 00:00:00 2001 From: Pin-yen Lin Date: Fri, 15 Mar 2024 19:16:04 +0800 Subject: [PATCH 046/606] arm64: dts: mediatek: mt8183-kukui: Use default min voltage for MT6358 The requested voltage could be lower than the minimum voltage on the GPU OPP table when the MTK Smart Voltage Scaling (SVS) driver is enabled, so removing the definition in mt8183-kukui to use the default minimum voltage (500000 uV) defined in mt6358.dtsi. Fixes: 31c6732da9d5 ("arm64: dts: mediatek: mt8183-kukui: Override vgpu/vsram_gpu constraints") Signed-off-by: Pin-yen Lin Reviewed-by: AngeloGioacchino Del Regno Link: https://lore.kernel.org/r/20240315111621.2263159-4-treapking@chromium.org Signed-off-by: AngeloGioacchino Del Regno --- arch/arm64/boot/dts/mediatek/mt8183-kukui.dtsi | 1 - 1 file changed, 1 deletion(-) diff --git a/arch/arm64/boot/dts/mediatek/mt8183-kukui.dtsi b/arch/arm64/boot/dts/mediatek/mt8183-kukui.dtsi index 6bd7424ef66c5..100191c6453ba 100644 --- a/arch/arm64/boot/dts/mediatek/mt8183-kukui.dtsi +++ b/arch/arm64/boot/dts/mediatek/mt8183-kukui.dtsi @@ -433,7 +433,6 @@ }; &mt6358_vgpu_reg { - regulator-min-microvolt = <625000>; regulator-max-microvolt = <900000>; regulator-coupled-with = <&mt6358_vsram_gpu_reg>; From 366940c860bc27cc1cc92061e6626a4fa56bab3c Mon Sep 17 00:00:00 2001 From: Pin-yen Lin Date: Fri, 15 Mar 2024 19:16:05 +0800 Subject: [PATCH 047/606] arm64: dts: mediatek: mt8186-corsola: Update min voltage constraint for Vgpu The requested voltage could be lower than the minimum voltage on the GPU OPP table when the MTK Smart Voltage Scaling (SVS) driver is enabled, so update the minimum voltage constraint from 600000 uV to 500000 uV as listed on the mt6366 datasheet. Fixes: 8855d01fb81f ("arm64: dts: mediatek: Add MT8186 Krabby platform based Tentacruel / Tentacool") Signed-off-by: Pin-yen Lin Reviewed-by: AngeloGioacchino Del Regno Link: https://lore.kernel.org/r/20240315111621.2263159-5-treapking@chromium.org Signed-off-by: AngeloGioacchino Del Regno --- arch/arm64/boot/dts/mediatek/mt8186-corsola.dtsi | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/boot/dts/mediatek/mt8186-corsola.dtsi b/arch/arm64/boot/dts/mediatek/mt8186-corsola.dtsi index 3dea28f1d8061..1807e9d6cb0e4 100644 --- a/arch/arm64/boot/dts/mediatek/mt8186-corsola.dtsi +++ b/arch/arm64/boot/dts/mediatek/mt8186-corsola.dtsi @@ -1296,7 +1296,7 @@ * regulator coupling requirements. */ regulator-name = "ppvar_dvdd_vgpu"; - regulator-min-microvolt = <600000>; + regulator-min-microvolt = <500000>; regulator-max-microvolt = <950000>; regulator-ramp-delay = <6250>; regulator-enable-ramp-delay = <200>; From 3ba5a61594347ab46e7c2cff6cd63ea0f1282efb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rafa=C5=82=20Mi=C5=82ecki?= Date: Sun, 17 Mar 2024 23:10:47 +0100 Subject: [PATCH 048/606] arm64: dts: mediatek: mt7622: fix clock controllers MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 1. Drop unneeded "syscon"s (bindings were updated recently) 2. Use "clock-controller" in nodenames 3. Add missing "#clock-cells" Fixes: d7167881e03e ("arm64: dts: mt7622: add clock controller device nodes") Fixes: e9b65ecb7c30 ("arm64: dts: mediatek: mt7622: introduce nodes for Wireless Ethernet Dispatch") Signed-off-by: Rafał Miłecki Reviewed-by: AngeloGioacchino Del Regno Link: https://lore.kernel.org/r/20240317221050.18595-2-zajec5@gmail.com Signed-off-by: AngeloGioacchino Del Regno --- arch/arm64/boot/dts/mediatek/mt7622.dtsi | 27 +++++++++++------------- 1 file changed, 12 insertions(+), 15 deletions(-) diff --git a/arch/arm64/boot/dts/mediatek/mt7622.dtsi b/arch/arm64/boot/dts/mediatek/mt7622.dtsi index 3ee9266fa8e98..283fdf7d2d8b9 100644 --- a/arch/arm64/boot/dts/mediatek/mt7622.dtsi +++ b/arch/arm64/boot/dts/mediatek/mt7622.dtsi @@ -283,16 +283,14 @@ }; }; - apmixedsys: apmixedsys@10209000 { - compatible = "mediatek,mt7622-apmixedsys", - "syscon"; + apmixedsys: clock-controller@10209000 { + compatible = "mediatek,mt7622-apmixedsys"; reg = <0 0x10209000 0 0x1000>; #clock-cells = <1>; }; - topckgen: topckgen@10210000 { - compatible = "mediatek,mt7622-topckgen", - "syscon"; + topckgen: clock-controller@10210000 { + compatible = "mediatek,mt7622-topckgen"; reg = <0 0x10210000 0 0x1000>; #clock-cells = <1>; }; @@ -734,9 +732,8 @@ power-domains = <&scpsys MT7622_POWER_DOMAIN_WB>; }; - ssusbsys: ssusbsys@1a000000 { - compatible = "mediatek,mt7622-ssusbsys", - "syscon"; + ssusbsys: clock-controller@1a000000 { + compatible = "mediatek,mt7622-ssusbsys"; reg = <0 0x1a000000 0 0x1000>; #clock-cells = <1>; #reset-cells = <1>; @@ -793,9 +790,8 @@ }; }; - pciesys: pciesys@1a100800 { - compatible = "mediatek,mt7622-pciesys", - "syscon"; + pciesys: clock-controller@1a100800 { + compatible = "mediatek,mt7622-pciesys"; reg = <0 0x1a100800 0 0x1000>; #clock-cells = <1>; #reset-cells = <1>; @@ -921,12 +917,13 @@ }; }; - hifsys: syscon@1af00000 { - compatible = "mediatek,mt7622-hifsys", "syscon"; + hifsys: clock-controller@1af00000 { + compatible = "mediatek,mt7622-hifsys"; reg = <0 0x1af00000 0 0x70>; + #clock-cells = <1>; }; - ethsys: syscon@1b000000 { + ethsys: clock-controller@1b000000 { compatible = "mediatek,mt7622-ethsys", "syscon"; reg = <0 0x1b000000 0 0x1000>; From 800dc93c3941e372c94278bf4059e6e82f60bd66 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rafa=C5=82=20Mi=C5=82ecki?= Date: Sun, 17 Mar 2024 23:10:48 +0100 Subject: [PATCH 049/606] arm64: dts: mediatek: mt7622: fix IR nodename MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fix following validation error: arch/arm64/boot/dts/mediatek/mt7622-rfb1.dtb: cir@10009000: $nodename:0: 'cir@10009000' does not match '^ir(-receiver)?(@[a-f0-9]+)?$' from schema $id: http://devicetree.org/schemas/media/mediatek,mt7622-cir.yaml# Fixes: ae457b7679c4 ("arm64: dts: mt7622: add SoC and peripheral related device nodes") Signed-off-by: Rafał Miłecki Reviewed-by: AngeloGioacchino Del Regno Link: https://lore.kernel.org/r/20240317221050.18595-3-zajec5@gmail.com Signed-off-by: AngeloGioacchino Del Regno --- arch/arm64/boot/dts/mediatek/mt7622.dtsi | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/boot/dts/mediatek/mt7622.dtsi b/arch/arm64/boot/dts/mediatek/mt7622.dtsi index 283fdf7d2d8b9..4c8a71c8184b7 100644 --- a/arch/arm64/boot/dts/mediatek/mt7622.dtsi +++ b/arch/arm64/boot/dts/mediatek/mt7622.dtsi @@ -252,7 +252,7 @@ clock-names = "hif_sel"; }; - cir: cir@10009000 { + cir: ir-receiver@10009000 { compatible = "mediatek,mt7622-cir"; reg = <0 0x10009000 0 0x1000>; interrupts = ; From 208add29ce5b7291f6c466e4dfd9cbf61c72888e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rafa=C5=82=20Mi=C5=82ecki?= Date: Sun, 17 Mar 2024 23:10:49 +0100 Subject: [PATCH 050/606] arm64: dts: mediatek: mt7622: fix ethernet controller "compatible" MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fix following validation error: arch/arm64/boot/dts/mediatek/mt7622-rfb1.dtb: ethernet@1b100000: compatible: ['mediatek,mt7622-eth', 'mediatek,mt2701-eth', 'syscon'] is too long from schema $id: http://devicetree.org/schemas/net/mediatek,net.yaml# (and other complains about wrong clocks). Fixes: 5f599b3a0bb8 ("arm64: dts: mt7622: add ethernet device nodes") Signed-off-by: Rafał Miłecki Reviewed-by: AngeloGioacchino Del Regno Link: https://lore.kernel.org/r/20240317221050.18595-4-zajec5@gmail.com Signed-off-by: AngeloGioacchino Del Regno --- arch/arm64/boot/dts/mediatek/mt7622.dtsi | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/arch/arm64/boot/dts/mediatek/mt7622.dtsi b/arch/arm64/boot/dts/mediatek/mt7622.dtsi index 4c8a71c8184b7..8e46480b5364b 100644 --- a/arch/arm64/boot/dts/mediatek/mt7622.dtsi +++ b/arch/arm64/boot/dts/mediatek/mt7622.dtsi @@ -963,9 +963,7 @@ }; eth: ethernet@1b100000 { - compatible = "mediatek,mt7622-eth", - "mediatek,mt2701-eth", - "syscon"; + compatible = "mediatek,mt7622-eth"; reg = <0 0x1b100000 0 0x20000>; interrupts = , , From ecb5b0034f5bcc35003b4b965cf50c6e98316e79 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rafa=C5=82=20Mi=C5=82ecki?= Date: Sun, 17 Mar 2024 23:10:50 +0100 Subject: [PATCH 051/606] arm64: dts: mediatek: mt7622: drop "reset-names" from thermal block MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Binding doesn't specify "reset-names" property and Linux driver also doesn't use it. Fix following validation error: arch/arm64/boot/dts/mediatek/mt7622-rfb1.dtb: thermal@1100b000: Unevaluated properties are not allowed ('reset-names' was unexpected) from schema $id: http://devicetree.org/schemas/thermal/mediatek,thermal.yaml# Fixes: ae457b7679c4 ("arm64: dts: mt7622: add SoC and peripheral related device nodes") Signed-off-by: Rafał Miłecki Reviewed-by: AngeloGioacchino Del Regno Link: https://lore.kernel.org/r/20240317221050.18595-5-zajec5@gmail.com Signed-off-by: AngeloGioacchino Del Regno --- arch/arm64/boot/dts/mediatek/mt7622.dtsi | 1 - 1 file changed, 1 deletion(-) diff --git a/arch/arm64/boot/dts/mediatek/mt7622.dtsi b/arch/arm64/boot/dts/mediatek/mt7622.dtsi index 8e46480b5364b..917fa39a74f8d 100644 --- a/arch/arm64/boot/dts/mediatek/mt7622.dtsi +++ b/arch/arm64/boot/dts/mediatek/mt7622.dtsi @@ -513,7 +513,6 @@ <&pericfg CLK_PERI_AUXADC_PD>; clock-names = "therm", "auxadc"; resets = <&pericfg MT7622_PERI_THERM_SW_RST>; - reset-names = "therm"; mediatek,auxadc = <&auxadc>; mediatek,apmixedsys = <&apmixedsys>; nvmem-cells = <&thermal_calibration>; From 8db8c77059e75a0f418b10ede39dd82a9eb031fa Mon Sep 17 00:00:00 2001 From: Nuno Pereira Date: Mon, 26 Feb 2024 22:39:31 +0000 Subject: [PATCH 052/606] HID: nintendo: Fix N64 controller being identified as mouse This patch is regarding the recent addition of support for the NSO controllers to hid-nintendo. All controllers are working correctly with the exception of the N64 controller, which is being identified as a mouse by udev. This results in the joystick controlling the mouse cursor and the controller not being detected by games. The reason for this is because the N64's C buttons have been attributed to BTN_FORWARD, BTN_BACK, BTN_LEFT, BTN_RIGHT, which are buttons typically attributed to mice. This patch changes those buttons to controller buttons, making the controller be correctly identified as such. Signed-off-by: Nuno Pereira Signed-off-by: Jiri Kosina --- drivers/hid/hid-nintendo.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/hid/hid-nintendo.c b/drivers/hid/hid-nintendo.c index ab5953fc24367..80e0f23c1c33e 100644 --- a/drivers/hid/hid-nintendo.c +++ b/drivers/hid/hid-nintendo.c @@ -481,10 +481,10 @@ static const struct joycon_ctlr_button_mapping n64con_button_mappings[] = { { BTN_TR, JC_BTN_R, }, { BTN_TR2, JC_BTN_LSTICK, }, /* ZR */ { BTN_START, JC_BTN_PLUS, }, - { BTN_FORWARD, JC_BTN_Y, }, /* C UP */ - { BTN_BACK, JC_BTN_ZR, }, /* C DOWN */ - { BTN_LEFT, JC_BTN_X, }, /* C LEFT */ - { BTN_RIGHT, JC_BTN_MINUS, }, /* C RIGHT */ + { BTN_SELECT, JC_BTN_Y, }, /* C UP */ + { BTN_X, JC_BTN_ZR, }, /* C DOWN */ + { BTN_Y, JC_BTN_X, }, /* C LEFT */ + { BTN_C, JC_BTN_MINUS, }, /* C RIGHT */ { BTN_MODE, JC_BTN_HOME, }, { BTN_Z, JC_BTN_CAP, }, { /* sentinel */ }, From ea36bf1827462e4a52365bf8e3f7d1712c5d9600 Mon Sep 17 00:00:00 2001 From: Kenny Levinsen Date: Tue, 2 Apr 2024 13:10:04 +0200 Subject: [PATCH 053/606] HID: i2c-hid: Revert to await reset ACK before reading report descriptor In af93a167eda9, i2c_hid_parse was changed to continue with reading the report descriptor before waiting for reset to be acknowledged. This has lead to two regressions: 1. We fail to handle reset acknowledgment if it happens while reading the report descriptor. The transfer sets I2C_HID_READ_PENDING, which causes the IRQ handler to return without doing anything. This affects both a Wacom touchscreen and a Sensel touchpad. 2. On a Sensel touchpad, reading the report descriptor this quickly after reset results in all zeroes or partial zeroes. The issues were observed on the Lenovo Thinkpad Z16 Gen 2. The change in question was made based on a Microsoft article[0] stating that Windows 8 *may* read the report descriptor in parallel with awaiting reset acknowledgment, intended as a slight reset performance optimization. Perhaps they only do this if reset is not completing quickly enough for their tastes? As the code is not currently ready to read registers in parallel with a pending reset acknowledgment, and as reading quickly breaks the report descriptor on the Sensel touchpad, revert to waiting for reset acknowledgment before proceeding to read the report descriptor. [0]: https://learn.microsoft.com/en-us/windows-hardware/drivers/hid/plug-and-play-support-and-power-management Fixes: af93a167eda9 ("HID: i2c-hid: Move i2c_hid_finish_hwreset() to after reading the report-descriptor") Closes: https://bugzilla.redhat.com/show_bug.cgi?id=2271136 Cc: stable@vger.kernel.org Signed-off-by: Kenny Levinsen Link: https://lore.kernel.org/r/20240331182440.14477-1-kl@kl.wtf [hdegoede@redhat.com Drop no longer necessary abort_reset error exit path] Signed-off-by: Hans de Goede Tested-by: Mark Pearson Signed-off-by: Jiri Kosina --- drivers/hid/i2c-hid/i2c-hid-core.c | 29 ++++++++--------------------- 1 file changed, 8 insertions(+), 21 deletions(-) diff --git a/drivers/hid/i2c-hid/i2c-hid-core.c b/drivers/hid/i2c-hid/i2c-hid-core.c index 1c86c97688e93..d965382196c69 100644 --- a/drivers/hid/i2c-hid/i2c-hid-core.c +++ b/drivers/hid/i2c-hid/i2c-hid-core.c @@ -726,12 +726,15 @@ static int i2c_hid_parse(struct hid_device *hid) mutex_lock(&ihid->reset_lock); do { ret = i2c_hid_start_hwreset(ihid); - if (ret) + if (ret == 0) + ret = i2c_hid_finish_hwreset(ihid); + else msleep(1000); } while (tries-- > 0 && ret); + mutex_unlock(&ihid->reset_lock); if (ret) - goto abort_reset; + return ret; use_override = i2c_hid_get_dmi_hid_report_desc_override(client->name, &rsize); @@ -741,11 +744,8 @@ static int i2c_hid_parse(struct hid_device *hid) i2c_hid_dbg(ihid, "Using a HID report descriptor override\n"); } else { rdesc = kzalloc(rsize, GFP_KERNEL); - - if (!rdesc) { - ret = -ENOMEM; - goto abort_reset; - } + if (!rdesc) + return -ENOMEM; i2c_hid_dbg(ihid, "asking HID report descriptor\n"); @@ -754,23 +754,10 @@ static int i2c_hid_parse(struct hid_device *hid) rdesc, rsize); if (ret) { hid_err(hid, "reading report descriptor failed\n"); - goto abort_reset; + goto out; } } - /* - * Windows directly reads the report-descriptor after sending reset - * and then waits for resets completion afterwards. Some touchpads - * actually wait for the report-descriptor to be read before signalling - * reset completion. - */ - ret = i2c_hid_finish_hwreset(ihid); -abort_reset: - clear_bit(I2C_HID_RESET_PENDING, &ihid->flags); - mutex_unlock(&ihid->reset_lock); - if (ret) - goto out; - i2c_hid_dbg(ihid, "Report Descriptor: %*ph\n", rsize, rdesc); ret = hid_parse_report(hid, rdesc, rsize); From 21f28a7eb78dea6c59be6b0a5e0b47bf3d25fcbb Mon Sep 17 00:00:00 2001 From: Yaraslau Furman Date: Wed, 3 Apr 2024 19:54:24 +0300 Subject: [PATCH 054/606] HID: logitech-dj: allow mice to use all types of reports You can bind whatever action you want to the mouse's reprogrammable buttons using Windows application. Allow Linux to receive multimedia keycodes. Fixes: 3ed224e273ac ("HID: logitech-dj: Fix 064d:c52f receiver support") Signed-off-by: Yaraslau Furman Reviewed-by: Hans de Goede Signed-off-by: Jiri Kosina --- drivers/hid/hid-logitech-dj.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/drivers/hid/hid-logitech-dj.c b/drivers/hid/hid-logitech-dj.c index e6a8b6d8eab70..3c3c497b6b911 100644 --- a/drivers/hid/hid-logitech-dj.c +++ b/drivers/hid/hid-logitech-dj.c @@ -965,9 +965,7 @@ static void logi_hidpp_dev_conn_notif_equad(struct hid_device *hdev, } break; case REPORT_TYPE_MOUSE: - workitem->reports_supported |= STD_MOUSE | HIDPP; - if (djrcv_dev->type == recvr_type_mouse_only) - workitem->reports_supported |= MULTIMEDIA; + workitem->reports_supported |= STD_MOUSE | HIDPP | MULTIMEDIA; break; } } From 2c534f2f2464828600ad5fb45f45a3f1ed4fb978 Mon Sep 17 00:00:00 2001 From: Audra Mitchell Date: Wed, 3 Apr 2024 14:00:22 -0400 Subject: [PATCH 055/606] Documentation/core-api: Update events_freezable_power references. Due to commit 8318d6a6362f ("workqueue: Shorten events_freezable_power_efficient name") we now have some stale references in the workqeueue documentation, so updating those references accordingly. Signed-off-by: Audra Mitchell Signed-off-by: Tejun Heo --- Documentation/core-api/workqueue.rst | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/Documentation/core-api/workqueue.rst b/Documentation/core-api/workqueue.rst index ed73c612174d4..bcc370c876be9 100644 --- a/Documentation/core-api/workqueue.rst +++ b/Documentation/core-api/workqueue.rst @@ -671,7 +671,7 @@ configuration, worker pools and how workqueues map to the pools: :: events_unbound unbound 9 9 10 10 8 events_freezable percpu 0 2 4 6 events_power_efficient percpu 0 2 4 6 - events_freezable_power_ percpu 0 2 4 6 + events_freezable_pwr_ef percpu 0 2 4 6 rcu_gp percpu 0 2 4 6 rcu_par_gp percpu 0 2 4 6 slub_flushwq percpu 0 2 4 6 @@ -694,7 +694,7 @@ Use tools/workqueue/wq_monitor.py to monitor workqueue operations: :: events_unbound 38306 0 0.1 - 7 - - events_freezable 0 0 0.0 0 0 - - events_power_efficient 29598 0 0.2 0 0 - - - events_freezable_power_ 10 0 0.0 0 0 - - + events_freezable_pwr_ef 10 0 0.0 0 0 - - sock_diag_events 0 0 0.0 0 0 - - total infl CPUtime CPUhog CMW/RPR mayday rescued @@ -704,7 +704,7 @@ Use tools/workqueue/wq_monitor.py to monitor workqueue operations: :: events_unbound 38322 0 0.1 - 7 - - events_freezable 0 0 0.0 0 0 - - events_power_efficient 29603 0 0.2 0 0 - - - events_freezable_power_ 10 0 0.0 0 0 - - + events_freezable_pwr_ef 10 0 0.0 0 0 - - sock_diag_events 0 0 0.0 0 0 - - ... From a1d34930d1b3782307ef5d0636f4f6a9ac5028e5 Mon Sep 17 00:00:00 2001 From: Xingyou Chen Date: Wed, 3 Apr 2024 07:08:10 +0800 Subject: [PATCH 056/606] docs/zh_CN: core-api: Update translation of workqueue.rst to 6.9-rc1 Significant changes have been made to workqueue, and there are staging works transferring from tasklet, while the current translation doesn't include description around WQ_BH, an update seems to be helpful. Synchronize translation from upstream commit 3bc1e711c26b ("workqueue: Don't implicitly make UNBOUND workqueues w/ @max_active==1 ordered") Signed-off-by: Xingyou Chen Signed-off-by: Tejun Heo --- .../translations/zh_CN/core-api/workqueue.rst | 398 ++++++++++++++++-- 1 file changed, 371 insertions(+), 27 deletions(-) diff --git a/Documentation/translations/zh_CN/core-api/workqueue.rst b/Documentation/translations/zh_CN/core-api/workqueue.rst index 7fac6f75d0782..fe0ff5a127f3f 100644 --- a/Documentation/translations/zh_CN/core-api/workqueue.rst +++ b/Documentation/translations/zh_CN/core-api/workqueue.rst @@ -7,12 +7,13 @@ 司延腾 Yanteng Si 周彬彬 Binbin Zhou + 陈兴友 Xingyou Chen .. _cn_workqueue.rst: -========================= -并发管理的工作队列 (cmwq) -========================= +======== +工作队列 +======== :日期: September, 2010 :作者: Tejun Heo @@ -22,7 +23,7 @@ 简介 ==== -在很多情况下,需要一个异步进程的执行环境,工作队列(wq)API是这种情况下 +在很多情况下,需要一个异步的程序执行环境,工作队列(wq)API是这种情况下 最常用的机制。 当需要这样一个异步执行上下文时,一个描述将要执行的函数的工作项(work, @@ -34,8 +35,8 @@ 队列时,工作者又开始执行。 -为什么要cmwq? -============= +为什么要有并发管理工作队列? +=========================== 在最初的wq实现中,多线程(MT)wq在每个CPU上有一个工作者线程,而单线程 (ST)wq在全系统有一个工作者线程。一个MT wq需要保持与CPU数量相同的工 @@ -73,9 +74,11 @@ 向该函数的工作项,并在工作队列中排队等待该工作项。(就是挂到workqueue 队列里面去) -特定目的线程,称为工作线程(工作者),一个接一个地执行队列中的功能。 -如果没有工作项排队,工作者线程就会闲置。这些工作者线程被管理在所谓 -的工作者池中。 +工作项可以在线程或BH(软中断)上下文中执行。 + +对于由线程执行的工作队列,被称为(内核)工作者([k]worker)的特殊 +线程会依次执行其中的函数。如果没有工作项排队,工作者线程就会闲置。 +这些工作者线程被管理在所谓的工作者池中。 cmwq设计区分了面向用户的工作队列,子系统和驱动程序在上面排队工作, 以及管理工作者池和处理排队工作项的后端机制。 @@ -84,6 +87,10 @@ cmwq设计区分了面向用户的工作队列,子系统和驱动程序在上 优先级的工作项,还有一些额外的工作者池,用于服务未绑定工作队列的工 作项目——这些后备池的数量是动态的。 +BH工作队列使用相同的结构。然而,由于同一时间只可能有一个执行上下文, +不需要担心并发问题。每个CPU上的BH工作者池只包含一个用于表示BH执行 +上下文的虚拟工作者。BH工作队列可以被看作软中断的便捷接口。 + 当他们认为合适的时候,子系统和驱动程序可以通过特殊的 ``workqueue API`` 函数创建和排队工作项。他们可以通过在工作队列上 设置标志来影响工作项执行方式的某些方面,他们把工作项放在那里。这些 @@ -95,9 +102,9 @@ cmwq设计区分了面向用户的工作队列,子系统和驱动程序在上 否则一个绑定的工作队列的工作项将被排在与发起线程运行的CPU相关的普 通或高级工作工作者池的工作项列表中。 -对于任何工作者池的实施,管理并发水平(有多少执行上下文处于活动状 -态)是一个重要问题。最低水平是为了节省资源,而饱和水平是指系统被 -充分使用。 +对于任何线程池的实施,管理并发水平(有多少执行上下文处于活动状 +态)是一个重要问题。cmwq试图将并发保持在一个尽可能低且充足的 +水平。最低水平是为了节省资源,而充足是为了使系统能被充分使用。 每个与实际CPU绑定的worker-pool通过钩住调度器来实现并发管理。每当 一个活动的工作者被唤醒或睡眠时,工作者池就会得到通知,并跟踪当前可 @@ -140,6 +147,17 @@ workqueue将自动创建与属性相匹配的后备工作者池。调节并发 ``flags`` --------- +``WQ_BH`` + BH工作队列可以被看作软中断的便捷接口。它总是每个CPU一份, + 其中的各个工作项也会按在队列中的顺序,被所属CPU在软中断 + 上下文中执行。 + + BH工作队列的 ``max_active`` 值必须为0,且只能单独或和 + ``WQ_HIGHPRI`` 标志组合使用。 + + BH工作项不可以睡眠。像延迟排队、冲洗、取消等所有其他特性 + 都是支持的。 + ``WQ_UNBOUND`` 排队到非绑定wq的工作项由特殊的工作者池提供服务,这些工作者不 绑定在任何特定的CPU上。这使得wq表现得像一个简单的执行环境提 @@ -184,25 +202,21 @@ workqueue将自动创建与属性相匹配的后备工作者池。调节并发 -------------- ``@max_active`` 决定了每个CPU可以分配给wq的工作项的最大执行上 -下文数量。例如,如果 ``@max_active为16`` ,每个CPU最多可以同 -时执行16个wq的工作项。 +下文数量。例如,如果 ``@max_active`` 为16 ,每个CPU最多可以同 +时执行16个wq的工作项。它总是每CPU属性,即便对于未绑定 wq。 -目前,对于一个绑定的wq, ``@max_active`` 的最大限制是512,当指 -定为0时使用的默认值是256。对于非绑定的wq,其限制是512和 -4 * ``num_possible_cpus()`` 中的较高值。这些值被选得足够高,所 -以它们不是限制性因素,同时会在失控情况下提供保护。 +``@max_active`` 的最大限制是512,当指定为0时使用的默认值是256。 +这些值被选得足够高,所以它们不是限制性因素,同时会在失控情况下提供 +保护。 一个wq的活动工作项的数量通常由wq的用户来调节,更具体地说,是由用 户在同一时间可以排列多少个工作项来调节。除非有特定的需求来控制活动 工作项的数量,否则建议指定 为"0"。 -一些用户依赖于ST wq的严格执行顺序。 ``@max_active`` 为1和 ``WQ_UNBOUND`` -的组合用来实现这种行为。这种wq上的工作项目总是被排到未绑定的工作池 -中,并且在任何时候都只有一个工作项目处于活动状态,从而实现与ST wq相 -同的排序属性。 - -在目前的实现中,上述配置只保证了特定NUMA节点内的ST行为。相反, -``alloc_ordered_workqueue()`` 应该被用来实现全系统的ST行为。 +一些用户依赖于任意时刻最多只有一个工作项被执行,且各工作项被按队列中 +顺序处理带来的严格执行顺序。``@max_active`` 为1和 ``WQ_UNBOUND`` +的组合曾被用来实现这种行为,现在不用了。请使用 +``alloc_ordered_workqueue()`` 。 执行场景示例 @@ -285,7 +299,7 @@ And with cmwq with ``@max_active`` >= 3, :: * 除非有特殊需要,建议使用0作为@max_active。在大多数使用情 况下,并发水平通常保持在默认限制之下。 -* 一个wq作为前进进度保证(WQ_MEM_RECLAIM,冲洗(flush)和工 +* 一个wq作为前进进度保证,``WQ_MEM_RECLAIM`` ,冲洗(flush)和工 作项属性的域。不涉及内存回收的工作项,不需要作为工作项组的一 部分被刷新,也不需要任何特殊属性,可以使用系统中的一个wq。使 用专用wq和系统wq在执行特性上没有区别。 @@ -294,6 +308,337 @@ And with cmwq with ``@max_active`` >= 3, :: 益的,因为wq操作和工作项执行中的定位水平提高了。 +亲和性作用域 +============ + +一个非绑定工作队列根据其亲和性作用域来对CPU进行分组以提高缓存 +局部性。比如如果一个工作队列使用默认的“cache”亲和性作用域, +它将根据最后一级缓存的边界来分组处理器。这个工作队列上的工作项 +将被分配给一个与发起CPU共用最后级缓存的处理器上的工作者。根据 +``affinity_strict`` 的设置,工作者在启动后可能被允许移出 +所在作用域,也可能不被允许。 + +工作队列目前支持以下亲和性作用域。 + +``default`` + 使用模块参数 ``workqueue.default_affinity_scope`` 指定 + 的作用域,该参数总是会被设为以下作用域中的一个。 + +``cpu`` + CPU不被分组。一个CPU上发起的工作项会被同一CPU上的工作者执行。 + 这使非绑定工作队列表现得像是不含并发管理的每CPU工作队列。 + +``smt`` + CPU被按SMT边界分组。这通常意味着每个物理CPU核上的各逻辑CPU会 + 被分进同一组。 + +``cache`` + CPU被按缓存边界分组。采用哪个缓存边界由架构代码决定。很多情况 + 下会使用L3。这是默认的亲和性作用域。 + +``numa`` + CPU被按NUMA边界分组。 + +``system`` + 所有CPU被放在同一组。工作队列不尝试在临近发起CPU的CPU上运行 + 工作项。 + +默认的亲和性作用域可以被模块参数 ``workqueue.default_affinity_scope`` +修改,特定工作队列的亲和性作用域可以通过 ``apply_workqueue_attrs()`` +被更改。 + +如果设置了 ``WQ_SYSFS`` ,工作队列会在它的 ``/sys/devices/virtual/workqueue/WQ_NAME/`` +目录中有以下亲和性作用域相关的接口文件。 + +``affinity_scope`` + 读操作以查看当前的亲和性作用域。写操作用于更改设置。 + + 当前作用域是默认值时,当前生效的作用域也可以被从这个文件中 + 读到(小括号内),例如 ``default (cache)`` 。 + +``affinity_strict`` + 默认值0表明亲和性作用域不是严格的。当一个工作项开始执行时, + 工作队列尽量尝试使工作者处于亲和性作用域内,称为遣返。启动后, + 调度器可以自由地将工作者调度到系统中任意它认为合适的地方去。 + 这使得在保留使用其他CPU(如果必需且有可用)能力的同时, + 还能从作用域局部性上获益。 + + 如果设置为1,作用域内的所有工作者将被保证总是处于作用域内。 + 这在跨亲和性作用域会导致如功耗、负载隔离等方面的潜在影响时 + 会有用。严格的NUMA作用域也可用于和旧版内核中工作队列的行为 + 保持一致。 + + +亲和性作用域与性能 +================== + +如果非绑定工作队列的行为对绝大多数使用场景来说都是最优的, +不需要更多调节,就完美了。很不幸,在当前内核中,重度使用 +工作队列时,需要在局部性和利用率间显式地作一个明显的权衡。 + +更高的局部性带来更高效率,也就是相同数量的CPU周期内可以做 +更多工作。然而,如果发起者没能将工作项充分地分散在亲和性 +作用域间,更高的局部性也可能带来更低的整体系统利用率。以下 +dm-crypt 的性能测试清楚地阐明了这一取舍。 + +测试运行在一个12核24线程、4个L3缓存的处理器(AMD Ryzen +9 3900x)上。为保持一致性,关闭CPU超频。 ``/dev/dm-0`` +是NVME SSD(三星 990 PRO)上创建,用 ``cryptsetup`` +以默认配置打开的一个 dm-crypt 设备。 + + +场景 1: 机器上遍布着有充足的发起者和工作量 +------------------------------------------ + +使用命令::: + + $ fio --filename=/dev/dm-0 --direct=1 --rw=randrw --bs=32k --ioengine=libaio \ + --iodepth=64 --runtime=60 --numjobs=24 --time_based --group_reporting \ + --name=iops-test-job --verify=sha512 + +这里有24个发起者,每个同时发起64个IO。 ``--verify=sha512`` +使得 ``fio`` 每次生成和读回内容受发起者和 ``kcryptd`` +间的执行局部性影响。下面是基于不同 ``kcryptd`` 的亲和性 +作用域设置,各经过五次测试得到的读取带宽和CPU利用率数据。 + +.. list-table:: + :widths: 16 20 20 + :header-rows: 1 + + * - 亲和性 + - 带宽 (MiBps) + - CPU利用率(%) + + * - system + - 1159.40 ±1.34 + - 99.31 ±0.02 + + * - cache + - 1166.40 ±0.89 + - 99.34 ±0.01 + + * - cache (strict) + - 1166.00 ±0.71 + - 99.35 ±0.01 + +在系统中分布着足够多发起者的情况下,不论严格与否,“cache” +没有表现得更差。三种配置均使整个机器达到饱和,但由于提高了 +局部性,缓存相关的两种有0.6%的(带宽)提升。 + + +场景 2: 更少发起者,足以达到饱和的工作量 +---------------------------------------- + +使用命令::: + + $ fio --filename=/dev/dm-0 --direct=1 --rw=randrw --bs=32k \ + --ioengine=libaio --iodepth=64 --runtime=60 --numjobs=8 \ + --time_based --group_reporting --name=iops-test-job --verify=sha512 + +与上一个场景唯一的区别是 ``--numjobs=8``。 发起者数量 +减少为三分之一,但仍然有足以使系统达到饱和的工作总量。 + +.. list-table:: + :widths: 16 20 20 + :header-rows: 1 + + * - 亲和性 + - 带宽 (MiBps) + - CPU利用率(%) + + * - system + - 1155.40 ±0.89 + - 97.41 ±0.05 + + * - cache + - 1154.40 ±1.14 + - 96.15 ±0.09 + + * - cache (strict) + - 1112.00 ±4.64 + - 93.26 ±0.35 + +这里有超过使系统达到饱和所需的工作量。“system”和“cache” +都接近但并未使机器完全饱和。“cache”消耗更少的CPU但更高的 +效率使其得到和“system”相同的带宽。 + +八个发起者盘桓在四个L3缓存作用域间仍然允许“cache (strict)” +几乎使机器饱和,但缺少对工作的保持(不移到空闲处理器上) +开始带来3.7%的带宽损失。 + + +场景 3: 更少发起者,不充足的工作量 +---------------------------------- + +使用命令::: + + $ fio --filename=/dev/dm-0 --direct=1 --rw=randrw --bs=32k \ + --ioengine=libaio --iodepth=64 --runtime=60 --numjobs=4 \ + --time_based --group_reporting --name=iops-test-job --verify=sha512 + +再次,唯一的区别是 ``--numjobs=4``。由于发起者减少到四个, +现在没有足以使系统饱和的工作量,带宽变得依赖于完成时延。 + +.. list-table:: + :widths: 16 20 20 + :header-rows: 1 + + * - 亲和性 + - 带宽 (MiBps) + - CPU利用率(%) + + * - system + - 993.60 ±1.82 + - 75.49 ±0.06 + + * - cache + - 973.40 ±1.52 + - 74.90 ±0.07 + + * - cache (strict) + - 828.20 ±4.49 + - 66.84 ±0.29 + +现在,局部性和利用率间的权衡更清晰了。“cache”展示出相比 +“system”2%的带宽损失,而“cache (strict)”跌到20%。 + + +结论和建议 +---------- + +在以上试验中,虽然一致并且也明显,但“cache”亲和性作用域 +相比“system”的性能优势并不大。然而,这影响是依赖于作用域 +间距离的,在更复杂的处理器拓扑下可能有更明显的影响。 + +虽然这些情形下缺少工作保持是有坏处的,但比“cache (strict)” +好多了,而且最大化工作队列利用率的需求也并不常见。因此, +“cache”是非绑定池的默认亲和性作用域。 + +* 由于不存在一个适用于大多数场景的选择,对于可能需要消耗 + 大量CPU的工作队列,建议通过 ``apply_workqueue_attrs()`` + 进行(专门)配置,并考虑是否启用 ``WQ_SYSFS``。 + +* 设置了严格“cpu”亲和性作用域的非绑定工作队列,它的行为与 + ``WQ_CPU_INTENSIVE`` 每CPU工作队列一样。后者没有真正 + 优势,而前者提供了大幅度的灵活性。 + +* 亲和性作用域是从Linux v6.5起引入的。为了模拟旧版行为, + 可以使用严格的“numa”亲和性作用域。 + +* 不严格的亲和性作用域中,缺少工作保持大概缘于调度器。内核 + 为什么没能维护好大多数场景下的工作保持,把事情作对,还没有 + 理论上的解释。因此,未来调度器的改进可能会使我们不再需要 + 这些调节项。 + + +检查配置 +======== + +使用 tools/workqueue/wq_dump.py(drgn脚本) 来检查未 +绑定CPU的亲和性配置,工作者池,以及工作队列如何映射到池上: :: + + $ tools/workqueue/wq_dump.py + Affinity Scopes + =============== + wq_unbound_cpumask=0000000f + + CPU + nr_pods 4 + pod_cpus [0]=00000001 [1]=00000002 [2]=00000004 [3]=00000008 + pod_node [0]=0 [1]=0 [2]=1 [3]=1 + cpu_pod [0]=0 [1]=1 [2]=2 [3]=3 + + SMT + nr_pods 4 + pod_cpus [0]=00000001 [1]=00000002 [2]=00000004 [3]=00000008 + pod_node [0]=0 [1]=0 [2]=1 [3]=1 + cpu_pod [0]=0 [1]=1 [2]=2 [3]=3 + + CACHE (default) + nr_pods 2 + pod_cpus [0]=00000003 [1]=0000000c + pod_node [0]=0 [1]=1 + cpu_pod [0]=0 [1]=0 [2]=1 [3]=1 + + NUMA + nr_pods 2 + pod_cpus [0]=00000003 [1]=0000000c + pod_node [0]=0 [1]=1 + cpu_pod [0]=0 [1]=0 [2]=1 [3]=1 + + SYSTEM + nr_pods 1 + pod_cpus [0]=0000000f + pod_node [0]=-1 + cpu_pod [0]=0 [1]=0 [2]=0 [3]=0 + + Worker Pools + ============ + pool[00] ref= 1 nice= 0 idle/workers= 4/ 4 cpu= 0 + pool[01] ref= 1 nice=-20 idle/workers= 2/ 2 cpu= 0 + pool[02] ref= 1 nice= 0 idle/workers= 4/ 4 cpu= 1 + pool[03] ref= 1 nice=-20 idle/workers= 2/ 2 cpu= 1 + pool[04] ref= 1 nice= 0 idle/workers= 4/ 4 cpu= 2 + pool[05] ref= 1 nice=-20 idle/workers= 2/ 2 cpu= 2 + pool[06] ref= 1 nice= 0 idle/workers= 3/ 3 cpu= 3 + pool[07] ref= 1 nice=-20 idle/workers= 2/ 2 cpu= 3 + pool[08] ref=42 nice= 0 idle/workers= 6/ 6 cpus=0000000f + pool[09] ref=28 nice= 0 idle/workers= 3/ 3 cpus=00000003 + pool[10] ref=28 nice= 0 idle/workers= 17/ 17 cpus=0000000c + pool[11] ref= 1 nice=-20 idle/workers= 1/ 1 cpus=0000000f + pool[12] ref= 2 nice=-20 idle/workers= 1/ 1 cpus=00000003 + pool[13] ref= 2 nice=-20 idle/workers= 1/ 1 cpus=0000000c + + Workqueue CPU -> pool + ===================== + [ workqueue \ CPU 0 1 2 3 dfl] + events percpu 0 2 4 6 + events_highpri percpu 1 3 5 7 + events_long percpu 0 2 4 6 + events_unbound unbound 9 9 10 10 8 + events_freezable percpu 0 2 4 6 + events_power_efficient percpu 0 2 4 6 + events_freezable_power_ percpu 0 2 4 6 + rcu_gp percpu 0 2 4 6 + rcu_par_gp percpu 0 2 4 6 + slub_flushwq percpu 0 2 4 6 + netns ordered 8 8 8 8 8 + ... + +参见命令的帮助消息以获取更多信息。 + + +监视 +==== + +使用 tools/workqueue/wq_monitor.py 来监视工作队列的运行: :: + + $ tools/workqueue/wq_monitor.py events + total infl CPUtime CPUhog CMW/RPR mayday rescued + events 18545 0 6.1 0 5 - - + events_highpri 8 0 0.0 0 0 - - + events_long 3 0 0.0 0 0 - - + events_unbound 38306 0 0.1 - 7 - - + events_freezable 0 0 0.0 0 0 - - + events_power_efficient 29598 0 0.2 0 0 - - + events_freezable_power_ 10 0 0.0 0 0 - - + sock_diag_events 0 0 0.0 0 0 - - + + total infl CPUtime CPUhog CMW/RPR mayday rescued + events 18548 0 6.1 0 5 - - + events_highpri 8 0 0.0 0 0 - - + events_long 3 0 0.0 0 0 - - + events_unbound 38322 0 0.1 - 7 - - + events_freezable 0 0 0.0 0 0 - - + events_power_efficient 29603 0 0.2 0 0 - - + events_freezable_power_ 10 0 0.0 0 0 - - + sock_diag_events 0 0 0.0 0 0 - - + + ... + +参见命令的帮助消息以获取更多信息。 + + 调试 ==== @@ -330,7 +675,6 @@ And with cmwq with ``@max_active`` >= 3, :: 工作队列保证,如果在工作项排队后满足以下条件,则工作项不能重入: - 1. 工作函数没有被改变。 2. 没有人将该工作项排到另一个工作队列中。 3. 该工作项尚未被重新启动。 From ace323f80b9bc6734289a4e8a77938a3ce964c7d Mon Sep 17 00:00:00 2001 From: Maksim Kiselev Date: Tue, 2 Apr 2024 12:35:39 +0300 Subject: [PATCH 057/606] mmc: sdhci-of-dwcmshc: th1520: Increase tuning loop count to 128 Fix SD card tuning error by increasing tuning loop count from 40(MAX_TUNING_LOOP) to 128. For some reason the tuning algorithm requires to move through all the taps of delay line even if the THRESHOLD_MODE (bit 2 in AT_CTRL_R) is used instead of the LARGEST_WIN_MODE. Tested-by: Drew Fustini Tested-by: Xi Ruoyao Signed-off-by: Maksim Kiselev Acked-by: Adrian Hunter Fixes: 43658a542ebf ("mmc: sdhci-of-dwcmshc: Add support for T-Head TH1520") Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20240402093539.184287-1-bigunclemax@gmail.com Signed-off-by: Ulf Hansson --- drivers/mmc/host/sdhci-of-dwcmshc.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/mmc/host/sdhci-of-dwcmshc.c b/drivers/mmc/host/sdhci-of-dwcmshc.c index 1d8f5a76096ae..f2e4a93ed1d61 100644 --- a/drivers/mmc/host/sdhci-of-dwcmshc.c +++ b/drivers/mmc/host/sdhci-of-dwcmshc.c @@ -626,6 +626,7 @@ static int th1520_execute_tuning(struct sdhci_host *host, u32 opcode) /* perform tuning */ sdhci_start_tuning(host); + host->tuning_loop_count = 128; host->tuning_err = __sdhci_execute_tuning(host, opcode); if (host->tuning_err) { /* disable auto-tuning upon tuning error */ From 11cca8ccf2c3643d002e7b421acfdc847a627e9f Mon Sep 17 00:00:00 2001 From: Thorsten Blum Date: Thu, 4 Apr 2024 09:58:13 +0200 Subject: [PATCH 058/606] tty: xtensa/iss: Use min() to fix Coccinelle warning Inline strlen() and use min() to fix the following Coccinelle/coccicheck warning reported by minmax.cocci: WARNING opportunity for min() Signed-off-by: Thorsten Blum Message-Id: <20240404075811.6936-3-thorsten.blum@toblux.com> Reviewed-by: Jiri Slaby Signed-off-by: Max Filippov --- arch/xtensa/platforms/iss/console.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/arch/xtensa/platforms/iss/console.c b/arch/xtensa/platforms/iss/console.c index 8896e691c051e..abec44b687dff 100644 --- a/arch/xtensa/platforms/iss/console.c +++ b/arch/xtensa/platforms/iss/console.c @@ -166,10 +166,8 @@ late_initcall(rs_init); static void iss_console_write(struct console *co, const char *s, unsigned count) { - if (s && *s != 0) { - int len = strlen(s); - simc_write(1, s, count < len ? count : len); - } + if (s && *s != 0) + simc_write(1, s, min(count, strlen(s))); } static struct tty_driver* iss_console_device(struct console *c, int *index) From 49ceae68a0df9a92617a61e9ce8a0efcf6419585 Mon Sep 17 00:00:00 2001 From: Laine Taffin Altman Date: Wed, 3 Apr 2024 14:06:59 -0700 Subject: [PATCH 059/606] rust: init: remove impl Zeroable for Infallible In Rust, producing an invalid value of any type is immediate undefined behavior (UB); this includes via zeroing memory. Therefore, since an uninhabited type has no valid values, producing any values at all for it is UB. The Rust standard library type `core::convert::Infallible` is uninhabited, by virtue of having been declared as an enum with no cases, which always produces uninhabited types in Rust. The current kernel code allows this UB to be triggered, for example by code like `Box::::init(kernel::init::zeroed())`. Thus, remove the implementation of `Zeroable` for `Infallible`, thereby avoiding the unsoundness (potential for future UB). Cc: stable@vger.kernel.org Fixes: 38cde0bd7b67 ("rust: init: add `Zeroable` trait and `init::zeroed` function") Closes: https://github.com/Rust-for-Linux/pinned-init/pull/13 Signed-off-by: Laine Taffin Altman Reviewed-by: Alice Ryhl Reviewed-by: Boqun Feng Reviewed-by: Benno Lossin Link: https://lore.kernel.org/r/CA160A4E-561E-4918-837E-3DCEBA74F808@me.com [ Reformatted the comment slightly. ] Signed-off-by: Miguel Ojeda --- rust/kernel/init.rs | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/rust/kernel/init.rs b/rust/kernel/init.rs index 424257284d167..09004b56fb65c 100644 --- a/rust/kernel/init.rs +++ b/rust/kernel/init.rs @@ -1292,8 +1292,15 @@ impl_zeroable! { i8, i16, i32, i64, i128, isize, f32, f64, - // SAFETY: These are ZSTs, there is nothing to zero. - {} PhantomData, core::marker::PhantomPinned, Infallible, (), + // Note: do not add uninhabited types (such as `!` or `core::convert::Infallible`) to this list; + // creating an instance of an uninhabited type is immediate undefined behavior. For more on + // uninhabited/empty types, consult The Rustonomicon: + // . The Rust Reference + // also has information on undefined behavior: + // . + // + // SAFETY: These are inhabited ZSTs; there is nothing to zero and a valid value exists. + {} PhantomData, core::marker::PhantomPinned, (), // SAFETY: Type is allowed to take any value, including all zeros. {} MaybeUninit, From 08f66a8edd08f6f7cfa769c81634b29a2b123908 Mon Sep 17 00:00:00 2001 From: Chen-Yu Tsai Date: Wed, 27 Mar 2024 17:13:33 +0800 Subject: [PATCH 060/606] pinctrl: mediatek: paris: Fix PIN_CONFIG_INPUT_SCHMITT_ENABLE readback In the generic pin config library, readback of some options are handled differently compared to the setting of those options: the argument value is used to convey enable/disable of an option in the set path, but success or -EINVAL is used to convey if an option is enabled or disabled in the debugfs readback path. PIN_CONFIG_INPUT_SCHMITT_ENABLE is one such option. Fix the readback of the option in the mediatek-paris library, so that the debugfs dump is not showing "input schmitt enabled" for pins that don't have it enabled. Fixes: 1bea6afbc842 ("pinctrl: mediatek: Refine mtk_pinconf_get()") Signed-off-by: Chen-Yu Tsai Reviewed-by: AngeloGioacchino Del Regno Message-ID: <20240327091336.3434141-2-wenst@chromium.org> Signed-off-by: Linus Walleij --- drivers/pinctrl/mediatek/pinctrl-paris.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/pinctrl/mediatek/pinctrl-paris.c b/drivers/pinctrl/mediatek/pinctrl-paris.c index b6bc31abd2b06..9353f78a52f05 100644 --- a/drivers/pinctrl/mediatek/pinctrl-paris.c +++ b/drivers/pinctrl/mediatek/pinctrl-paris.c @@ -193,6 +193,8 @@ static int mtk_pinconf_get(struct pinctrl_dev *pctldev, } err = mtk_hw_get_value(hw, desc, PINCTRL_PIN_REG_SMT, &ret); + if (!ret) + err = -EINVAL; break; case PIN_CONFIG_DRIVE_STRENGTH: if (!hw->soc->drive_get) From c5d3b64c568a344e998830e0e94a7c04e372f89b Mon Sep 17 00:00:00 2001 From: Chen-Yu Tsai Date: Wed, 27 Mar 2024 17:13:34 +0800 Subject: [PATCH 061/606] pinctrl: mediatek: paris: Rework support for PIN_CONFIG_{INPUT,OUTPUT}_ENABLE There is a misinterpretation of some of the PIN_CONFIG_* options in this driver library. PIN_CONFIG_OUTPUT_ENABLE should refer to a buffer or switch in the output direction of the electrical path. The MediaTek hardware does not have such a thing. The driver incorrectly maps this option to the GPIO function's direction. Likewise, PIN_CONFIG_INPUT_ENABLE should refer to a buffer or switch in the input direction. The hardware does have such a mechanism, and is mapped to the IES bit. The driver however sets the direction in addition to the IES bit, which is incorrect. On readback, the IES bit isn't even considered. Ironically, the driver does not support readback for PIN_CONFIG_OUTPUT, while its readback of PIN_CONFIG_{INPUT,OUTPUT}_ENABLE is what it should be doing for PIN_CONFIG_OUTPUT. Rework support for these three options, so that PIN_CONFIG_OUTPUT_ENABLE is completely removed, PIN_CONFIG_INPUT_ENABLE is only linked to the IES bit, and PIN_CONFIG_OUTPUT is linked to the GPIO function's direction and output level. Fixes: 805250982bb5 ("pinctrl: mediatek: add pinctrl-paris that implements the vendor dt-bindings") Signed-off-by: Chen-Yu Tsai Reviewed-by: AngeloGioacchino Del Regno Message-ID: <20240327091336.3434141-3-wenst@chromium.org> Signed-off-by: Linus Walleij --- drivers/pinctrl/mediatek/pinctrl-paris.c | 38 +++++++----------------- 1 file changed, 11 insertions(+), 27 deletions(-) diff --git a/drivers/pinctrl/mediatek/pinctrl-paris.c b/drivers/pinctrl/mediatek/pinctrl-paris.c index 9353f78a52f05..b19bc391705ee 100644 --- a/drivers/pinctrl/mediatek/pinctrl-paris.c +++ b/drivers/pinctrl/mediatek/pinctrl-paris.c @@ -165,20 +165,21 @@ static int mtk_pinconf_get(struct pinctrl_dev *pctldev, err = mtk_hw_get_value(hw, desc, PINCTRL_PIN_REG_SR, &ret); break; case PIN_CONFIG_INPUT_ENABLE: - case PIN_CONFIG_OUTPUT_ENABLE: + err = mtk_hw_get_value(hw, desc, PINCTRL_PIN_REG_IES, &ret); + if (!ret) + err = -EINVAL; + break; + case PIN_CONFIG_OUTPUT: err = mtk_hw_get_value(hw, desc, PINCTRL_PIN_REG_DIR, &ret); if (err) break; - /* CONFIG Current direction return value - * ------------- ----------------- ---------------------- - * OUTPUT_ENABLE output 1 (= HW value) - * input 0 (= HW value) - * INPUT_ENABLE output 0 (= reverse HW value) - * input 1 (= reverse HW value) - */ - if (param == PIN_CONFIG_INPUT_ENABLE) - ret = !ret; + if (!ret) { + err = -EINVAL; + break; + } + + err = mtk_hw_get_value(hw, desc, PINCTRL_PIN_REG_DO, &ret); break; case PIN_CONFIG_INPUT_SCHMITT_ENABLE: err = mtk_hw_get_value(hw, desc, PINCTRL_PIN_REG_DIR, &ret); @@ -283,26 +284,9 @@ static int mtk_pinconf_set(struct pinctrl_dev *pctldev, unsigned int pin, break; err = hw->soc->bias_set_combo(hw, desc, 0, arg); break; - case PIN_CONFIG_OUTPUT_ENABLE: - err = mtk_hw_set_value(hw, desc, PINCTRL_PIN_REG_SMT, - MTK_DISABLE); - /* Keep set direction to consider the case that a GPIO pin - * does not have SMT control - */ - if (err != -ENOTSUPP) - break; - - err = mtk_hw_set_value(hw, desc, PINCTRL_PIN_REG_DIR, - MTK_OUTPUT); - break; case PIN_CONFIG_INPUT_ENABLE: /* regard all non-zero value as enable */ err = mtk_hw_set_value(hw, desc, PINCTRL_PIN_REG_IES, !!arg); - if (err) - break; - - err = mtk_hw_set_value(hw, desc, PINCTRL_PIN_REG_DIR, - MTK_INPUT); break; case PIN_CONFIG_SLEW_RATE: /* regard all non-zero value as enable */ From 8e088a20dbe33919695a8082c0b32deb62d23b4a Mon Sep 17 00:00:00 2001 From: Olga Kornievskaia Date: Tue, 2 Apr 2024 14:41:38 -0400 Subject: [PATCH 062/606] SUNRPC: add a missing rpc_stat for TCP TLS Commit 1548036ef120 ("nfs: make the rpc_stat per net namespace") added functionality to specify rpc_stats function but missed adding it to the TCP TLS functionality. As the result, mounting with xprtsec=tls lead to the following kernel oops. [ 128.984192] Unable to handle kernel NULL pointer dereference at virtual address 000000000000001c [ 128.985058] Mem abort info: [ 128.985372] ESR = 0x0000000096000004 [ 128.985709] EC = 0x25: DABT (current EL), IL = 32 bits [ 128.986176] SET = 0, FnV = 0 [ 128.986521] EA = 0, S1PTW = 0 [ 128.986804] FSC = 0x04: level 0 translation fault [ 128.987229] Data abort info: [ 128.987597] ISV = 0, ISS = 0x00000004, ISS2 = 0x00000000 [ 128.988169] CM = 0, WnR = 0, TnD = 0, TagAccess = 0 [ 128.988811] GCS = 0, Overlay = 0, DirtyBit = 0, Xs = 0 [ 128.989302] user pgtable: 4k pages, 48-bit VAs, pgdp=0000000106c84000 [ 128.990048] [000000000000001c] pgd=0000000000000000, p4d=0000000000000000 [ 128.990736] Internal error: Oops: 0000000096000004 [#1] SMP [ 128.991168] Modules linked in: nfs_layout_nfsv41_files rpcsec_gss_krb5 auth_rpcgss nfsv4 dns_resolver nfs lockd grace netfs uinput dm_mod nft_fib_inet nft_fib_ipv4 nft_fib_ipv6 nft_fib nft_reject_inet nf_reject_ipv4 nf_reject_ipv6 nft_reject nft_ct nft_chain_nat nf_nat nf_conntrack nf_defrag_ipv6 nf_defrag_ipv4 rfkill ip_set nf_tables nfnetlink qrtr vsock_loopback vmw_vsock_virtio_transport_common vmw_vsock_vmci_transport vsock sunrpc vfat fat uvcvideo videobuf2_vmalloc videobuf2_memops uvc videobuf2_v4l2 videodev videobuf2_common mc vmw_vmci xfs libcrc32c e1000e crct10dif_ce ghash_ce sha2_ce vmwgfx nvme sha256_arm64 nvme_core sr_mod cdrom sha1_ce drm_ttm_helper ttm drm_kms_helper drm sg fuse [ 128.996466] CPU: 0 PID: 179 Comm: kworker/u4:26 Kdump: loaded Not tainted 6.8.0-rc6+ #12 [ 128.997226] Hardware name: VMware, Inc. VMware20,1/VBSA, BIOS VMW201.00V.21805430.BA64.2305221830 05/22/2023 [ 128.998084] Workqueue: xprtiod xs_tcp_tls_setup_socket [sunrpc] [ 128.998701] pstate: 81400005 (Nzcv daif +PAN -UAO -TCO +DIT -SSBS BTYPE=--) [ 128.999384] pc : call_start+0x74/0x138 [sunrpc] [ 128.999809] lr : __rpc_execute+0xb8/0x3e0 [sunrpc] [ 129.000244] sp : ffff8000832b3a00 [ 129.000508] x29: ffff8000832b3a00 x28: ffff800081ac79c0 x27: ffff800081ac7000 [ 129.001111] x26: 0000000004248060 x25: 0000000000000000 x24: ffff800081596008 [ 129.001757] x23: ffff80007b087240 x22: ffff00009a509d30 x21: 0000000000000000 [ 129.002345] x20: ffff000090075600 x19: ffff00009a509d00 x18: ffffffffffffffff [ 129.002912] x17: 733d4d4554535953 x16: 42555300312d746e x15: ffff8000832b3a88 [ 129.003464] x14: ffffffffffffffff x13: ffff8000832b3a7d x12: 0000000000000008 [ 129.004021] x11: 0101010101010101 x10: ffff8000150cb560 x9 : ffff80007b087c00 [ 129.004577] x8 : ffff00009a509de0 x7 : 0000000000000000 x6 : 00000000be8c4ee3 [ 129.005026] x5 : 0000000000000000 x4 : 0000000000000000 x3 : ffff000094d56680 [ 129.005425] x2 : ffff80007b0637f8 x1 : ffff000090075600 x0 : ffff00009a509d00 [ 129.005824] Call trace: [ 129.005967] call_start+0x74/0x138 [sunrpc] [ 129.006233] __rpc_execute+0xb8/0x3e0 [sunrpc] [ 129.006506] rpc_execute+0x160/0x1d8 [sunrpc] [ 129.006778] rpc_run_task+0x148/0x1f8 [sunrpc] [ 129.007204] tls_probe+0x80/0xd0 [sunrpc] [ 129.007460] rpc_ping+0x28/0x80 [sunrpc] [ 129.007715] rpc_create_xprt+0x134/0x1a0 [sunrpc] [ 129.007999] rpc_create+0x128/0x2a0 [sunrpc] [ 129.008264] xs_tcp_tls_setup_socket+0xdc/0x508 [sunrpc] [ 129.008583] process_one_work+0x174/0x3c8 [ 129.008813] worker_thread+0x2c8/0x3e0 [ 129.009033] kthread+0x100/0x110 [ 129.009225] ret_from_fork+0x10/0x20 [ 129.009432] Code: f0ffffc2 911fe042 aa1403e1 aa1303e0 (b9401c83) Fixes: 1548036ef120 ("nfs: make the rpc_stat per net namespace") Signed-off-by: Olga Kornievskaia Signed-off-by: Trond Myklebust --- net/sunrpc/xprtsock.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c index bb81050c870e0..ea7bf1b4172db 100644 --- a/net/sunrpc/xprtsock.c +++ b/net/sunrpc/xprtsock.c @@ -2664,6 +2664,7 @@ static void xs_tcp_tls_setup_socket(struct work_struct *work) .xprtsec = { .policy = RPC_XPRTSEC_NONE, }, + .stats = upper_clnt->cl_stats, }; unsigned int pflags = current->flags; struct rpc_clnt *lower_clnt; From 24457f1be29f1e7042e50a7749f5c2dde8c433c8 Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Thu, 4 Apr 2024 15:12:00 -0700 Subject: [PATCH 063/606] nfs: Handle error of rpc_proc_register() in nfs_net_init(). syzkaller reported a warning [0] triggered while destroying immature netns. rpc_proc_register() was called in init_nfs_fs(), but its error has been ignored since at least the initial commit 1da177e4c3f4 ("Linux-2.6.12-rc2"). Recently, commit d47151b79e32 ("nfs: expose /proc/net/sunrpc/nfs in net namespaces") converted the procfs to per-netns and made the problem more visible. Even when rpc_proc_register() fails, nfs_net_init() could succeed, and thus nfs_net_exit() will be called while destroying the netns. Then, remove_proc_entry() will be called for non-existing proc directory and trigger the warning below. Let's handle the error of rpc_proc_register() properly in nfs_net_init(). [0]: name 'nfs' WARNING: CPU: 1 PID: 1710 at fs/proc/generic.c:711 remove_proc_entry+0x1bb/0x2d0 fs/proc/generic.c:711 Modules linked in: CPU: 1 PID: 1710 Comm: syz-executor.2 Not tainted 6.8.0-12822-gcd51db110a7e #12 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.16.0-0-gd239552ce722-prebuilt.qemu.org 04/01/2014 RIP: 0010:remove_proc_entry+0x1bb/0x2d0 fs/proc/generic.c:711 Code: 41 5d 41 5e c3 e8 85 09 b5 ff 48 c7 c7 88 58 64 86 e8 09 0e 71 02 e8 74 09 b5 ff 4c 89 e6 48 c7 c7 de 1b 80 84 e8 c5 ad 97 ff <0f> 0b eb b1 e8 5c 09 b5 ff 48 c7 c7 88 58 64 86 e8 e0 0d 71 02 eb RSP: 0018:ffffc9000c6d7ce0 EFLAGS: 00010286 RAX: 0000000000000000 RBX: ffff8880422b8b00 RCX: ffffffff8110503c RDX: ffff888030652f00 RSI: ffffffff81105045 RDI: 0000000000000001 RBP: 0000000000000000 R08: 0000000000000001 R09: 0000000000000000 R10: 0000000000000001 R11: ffffffff81bb62cb R12: ffffffff84807ffc R13: ffff88804ad6fcc0 R14: ffffffff84807ffc R15: ffffffff85741ff8 FS: 00007f30cfba8640(0000) GS:ffff88807dd00000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 00007ff51afe8000 CR3: 000000005a60a005 CR4: 0000000000770ef0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 PKRU: 55555554 Call Trace: rpc_proc_unregister+0x64/0x70 net/sunrpc/stats.c:310 nfs_net_exit+0x1c/0x30 fs/nfs/inode.c:2438 ops_exit_list+0x62/0xb0 net/core/net_namespace.c:170 setup_net+0x46c/0x660 net/core/net_namespace.c:372 copy_net_ns+0x244/0x590 net/core/net_namespace.c:505 create_new_namespaces+0x2ed/0x770 kernel/nsproxy.c:110 unshare_nsproxy_namespaces+0xae/0x160 kernel/nsproxy.c:228 ksys_unshare+0x342/0x760 kernel/fork.c:3322 __do_sys_unshare kernel/fork.c:3393 [inline] __se_sys_unshare kernel/fork.c:3391 [inline] __x64_sys_unshare+0x1f/0x30 kernel/fork.c:3391 do_syscall_x64 arch/x86/entry/common.c:52 [inline] do_syscall_64+0x4f/0x110 arch/x86/entry/common.c:83 entry_SYSCALL_64_after_hwframe+0x46/0x4e RIP: 0033:0x7f30d0febe5d Code: ff c3 66 2e 0f 1f 84 00 00 00 00 00 90 f3 0f 1e fa 48 89 f8 48 89 f7 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 f0 ff ff 73 01 c3 48 8b 0d 73 9f 1b 00 f7 d8 64 89 01 48 RSP: 002b:00007f30cfba7cc8 EFLAGS: 00000246 ORIG_RAX: 0000000000000110 RAX: ffffffffffffffda RBX: 00000000004bbf80 RCX: 00007f30d0febe5d RDX: 0000000000000000 RSI: 0000000000000000 RDI: 000000006c020600 RBP: 00000000004bbf80 R08: 0000000000000000 R09: 0000000000000000 R10: 0000000000000000 R11: 0000000000000246 R12: 0000000000000002 R13: 000000000000000b R14: 00007f30d104c530 R15: 0000000000000000 Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Reported-by: syzkaller Signed-off-by: Kuniyuki Iwashima Signed-off-by: Trond Myklebust --- fs/nfs/inode.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index 91b4d811958a4..6fe4b47c39287 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c @@ -2429,7 +2429,12 @@ static int nfs_net_init(struct net *net) struct nfs_net *nn = net_generic(net, nfs_net_id); nfs_clients_init(net); - rpc_proc_register(net, &nn->rpcstats); + + if (!rpc_proc_register(net, &nn->rpcstats)) { + nfs_clients_exit(net); + return -ENOMEM; + } + return nfs_fs_proc_net_init(net); } From 3b449bfd2ff6c5d3ceecfcb18528ff8e1b4ac2fd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rafa=C5=82=20Mi=C5=82ecki?= Date: Tue, 13 Feb 2024 06:37:37 +0100 Subject: [PATCH 064/606] arm64: dts: mediatek: mt7986: drop invalid properties from ethsys MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Mediatek ethsys controller / syscon binding doesn't allow any subnodes so "#address-cells" and "#size-cells" are redundant (actually: disallowed). This fixes: arch/arm64/boot/dts/mediatek/mt7986a-bananapi-bpi-r3.dtb: syscon@15000000: '#address-cells', '#size-cells' do not match any of the regexes: 'pinctrl-[0-9]+' from schema $id: http://devicetree.org/schemas/clock/mediatek,ethsys.yaml# Fixes: 1f9986b258c2 ("arm64: dts: mediatek: add clock support for mt7986a") Cc: Sam Shih Signed-off-by: Rafał Miłecki Reviewed-by: AngeloGioacchino Del Regno Link: https://lore.kernel.org/r/20240213053739.14387-1-zajec5@gmail.com Signed-off-by: AngeloGioacchino Del Regno --- arch/arm64/boot/dts/mediatek/mt7986a.dtsi | 2 -- 1 file changed, 2 deletions(-) diff --git a/arch/arm64/boot/dts/mediatek/mt7986a.dtsi b/arch/arm64/boot/dts/mediatek/mt7986a.dtsi index b3f416b9a7a4d..228e02954e852 100644 --- a/arch/arm64/boot/dts/mediatek/mt7986a.dtsi +++ b/arch/arm64/boot/dts/mediatek/mt7986a.dtsi @@ -492,8 +492,6 @@ compatible = "mediatek,mt7986-ethsys", "syscon"; reg = <0 0x15000000 0 0x1000>; - #address-cells = <1>; - #size-cells = <1>; #clock-cells = <1>; #reset-cells = <1>; }; From 9bd88afc94c3570289a0f1c696578b3e1f4e3169 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rafa=C5=82=20Mi=C5=82ecki?= Date: Tue, 13 Feb 2024 06:37:38 +0100 Subject: [PATCH 065/606] arm64: dts: mediatek: mt7986: drop "#reset-cells" from Ethernet controller MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Ethernet block doesn't include or act as a reset controller. Documentation also doesn't document "#reset-cells" for it. This fixes: arch/arm64/boot/dts/mediatek/mt7986a-bananapi-bpi-r3.dtb: ethernet@15100000: Unevaluated properties are not allowed ('#reset-cells' was unexpected) from schema $id: http://devicetree.org/schemas/net/mediatek,net.yaml# Fixes: 082ff36bd5c0 ("arm64: dts: mediatek: mt7986: introduce ethernet nodes") Cc: Lorenzo Bianconi Signed-off-by: Rafał Miłecki Reviewed-by: AngeloGioacchino Del Regno Link: https://lore.kernel.org/r/20240213053739.14387-2-zajec5@gmail.com Signed-off-by: AngeloGioacchino Del Regno --- arch/arm64/boot/dts/mediatek/mt7986a.dtsi | 1 - 1 file changed, 1 deletion(-) diff --git a/arch/arm64/boot/dts/mediatek/mt7986a.dtsi b/arch/arm64/boot/dts/mediatek/mt7986a.dtsi index 228e02954e852..f3a2a89fada41 100644 --- a/arch/arm64/boot/dts/mediatek/mt7986a.dtsi +++ b/arch/arm64/boot/dts/mediatek/mt7986a.dtsi @@ -554,7 +554,6 @@ <&topckgen CLK_TOP_SGM_325M_SEL>; assigned-clock-parents = <&apmixedsys CLK_APMIXED_NET2PLL>, <&apmixedsys CLK_APMIXED_SGMPLL>; - #reset-cells = <1>; #address-cells = <1>; #size-cells = <0>; mediatek,ethsys = <ðsys>; From 970f8b01bd7719a22e577ba6c78e27f9ccf22783 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rafa=C5=82=20Mi=C5=82ecki?= Date: Tue, 13 Feb 2024 06:37:39 +0100 Subject: [PATCH 066/606] arm64: dts: mediatek: mt7986: drop invalid thermal block clock MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Thermal block uses only two clocks. Its binding doesn't document or allow "adc_32k". Also Linux driver doesn't support it. It has been additionally verified by Angelo by his detailed research on MT7981 / MT7986 clocks (thanks!). This fixes: arch/arm64/boot/dts/mediatek/mt7986a-bananapi-bpi-r3.dtb: thermal@1100c800: clocks: [[4, 27], [4, 44], [4, 45]] is too long from schema $id: http://devicetree.org/schemas/thermal/mediatek,thermal.yaml# arch/arm64/boot/dts/mediatek/mt7986a-bananapi-bpi-r3.dtb: thermal@1100c800: clock-names: ['therm', 'auxadc', 'adc_32k'] is too long from schema $id: http://devicetree.org/schemas/thermal/mediatek,thermal.yaml# Fixes: 0a9615d58d04 ("arm64: dts: mt7986: add thermal and efuse") Cc: Daniel Golle Link: https://lore.kernel.org/linux-devicetree/17d143aa-576e-4d67-a0ea-b79f3518b81c@collabora.com/ Signed-off-by: Rafał Miłecki Reviewed-by: AngeloGioacchino Del Regno Link: https://lore.kernel.org/r/20240213053739.14387-3-zajec5@gmail.com Signed-off-by: AngeloGioacchino Del Regno --- arch/arm64/boot/dts/mediatek/mt7986a.dtsi | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/arch/arm64/boot/dts/mediatek/mt7986a.dtsi b/arch/arm64/boot/dts/mediatek/mt7986a.dtsi index f3a2a89fada41..559990dcd1d17 100644 --- a/arch/arm64/boot/dts/mediatek/mt7986a.dtsi +++ b/arch/arm64/boot/dts/mediatek/mt7986a.dtsi @@ -332,9 +332,8 @@ reg = <0 0x1100c800 0 0x800>; interrupts = ; clocks = <&infracfg CLK_INFRA_THERM_CK>, - <&infracfg CLK_INFRA_ADC_26M_CK>, - <&infracfg CLK_INFRA_ADC_FRC_CK>; - clock-names = "therm", "auxadc", "adc_32k"; + <&infracfg CLK_INFRA_ADC_26M_CK>; + clock-names = "therm", "auxadc"; nvmem-cells = <&thermal_calibration>; nvmem-cell-names = "calibration-data"; #thermal-sensor-cells = <1>; From f8c65a5e4560781f2ea175d8f26cd75ac98e8d78 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rafa=C5=82=20Mi=C5=82ecki?= Date: Tue, 13 Feb 2024 07:14:59 +0100 Subject: [PATCH 067/606] arm64: dts: mediatek: mt7986: prefix BPI-R3 cooling maps with "map-" MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This fixes: arch/arm64/boot/dts/mediatek/mt7986a-bananapi-bpi-r3.dtb: thermal-zones: cpu-thermal:cooling-maps: 'cpu-active-high', 'cpu-active-low', 'cpu-active-med' do not match any of the regexes: '^map[-a-zA-Z0-9]*$', 'pinctrl-[0-9]+' from schema $id: http://devicetree.org/schemas/thermal/thermal-zones.yaml# Fixes: c26f779a2295 ("arm64: dts: mt7986: add pwm-fan and cooling-maps to BPI-R3 dts") Cc: Daniel Golle Signed-off-by: Rafał Miłecki Reviewed-by: AngeloGioacchino Del Regno Link: https://lore.kernel.org/r/20240213061459.17917-1-zajec5@gmail.com Signed-off-by: AngeloGioacchino Del Regno --- arch/arm64/boot/dts/mediatek/mt7986a-bananapi-bpi-r3.dts | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/arch/arm64/boot/dts/mediatek/mt7986a-bananapi-bpi-r3.dts b/arch/arm64/boot/dts/mediatek/mt7986a-bananapi-bpi-r3.dts index e04b1c0c0ebbf..ed79ad1ae8716 100644 --- a/arch/arm64/boot/dts/mediatek/mt7986a-bananapi-bpi-r3.dts +++ b/arch/arm64/boot/dts/mediatek/mt7986a-bananapi-bpi-r3.dts @@ -146,19 +146,19 @@ &cpu_thermal { cooling-maps { - cpu-active-high { + map-cpu-active-high { /* active: set fan to cooling level 2 */ cooling-device = <&fan 2 2>; trip = <&cpu_trip_active_high>; }; - cpu-active-med { + map-cpu-active-med { /* active: set fan to cooling level 1 */ cooling-device = <&fan 1 1>; trip = <&cpu_trip_active_med>; }; - cpu-active-low { + map-cpu-active-low { /* active: set fan to cooling level 0 */ cooling-device = <&fan 0 0>; trip = <&cpu_trip_active_low>; From 3baac7291effb501c4d52df7019ebf52011e5772 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rafa=C5=82=20Mi=C5=82ecki?= Date: Fri, 1 Mar 2024 08:47:41 +0100 Subject: [PATCH 068/606] arm64: dts: mediatek: mt2712: fix validation errors MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 1. Fixup infracfg clock controller binding It also acts as reset controller so #reset-cells is required. 2. Use -pins suffix for pinctrl This fixes: arch/arm64/boot/dts/mediatek/mt2712-evb.dtb: syscon@10001000: '#reset-cells' is a required property from schema $id: http://devicetree.org/schemas/arm/mediatek/mediatek,infracfg.yaml# arch/arm64/boot/dts/mediatek/mt2712-evb.dtb: pinctrl@1000b000: 'eth_default', 'eth_sleep', 'usb0_iddig', 'usb1_iddig' do not match any of the regexes: 'pinctrl-[0-9]+', 'pins$' from schema $id: http://devicetree.org/schemas/pinctrl/mediatek,mt65xx-pinctrl.yaml# Signed-off-by: Rafał Miłecki Reviewed-by: AngeloGioacchino Del Regno Link: https://lore.kernel.org/r/20240301074741.8362-1-zajec5@gmail.com [Angelo: Added Fixes tags] Fixes: 5d4839709c8e ("arm64: dts: mt2712: Add clock controller device nodes") Fixes: 1724f4cc5133 ("arm64: dts: Add USB3 related nodes for MT2712") Signed-off-by: AngeloGioacchino Del Regno --- arch/arm64/boot/dts/mediatek/mt2712-evb.dts | 8 ++++---- arch/arm64/boot/dts/mediatek/mt2712e.dtsi | 3 ++- 2 files changed, 6 insertions(+), 5 deletions(-) diff --git a/arch/arm64/boot/dts/mediatek/mt2712-evb.dts b/arch/arm64/boot/dts/mediatek/mt2712-evb.dts index 0c38f7b517637..234e3b23d7a8d 100644 --- a/arch/arm64/boot/dts/mediatek/mt2712-evb.dts +++ b/arch/arm64/boot/dts/mediatek/mt2712-evb.dts @@ -129,7 +129,7 @@ }; &pio { - eth_default: eth_default { + eth_default: eth-default-pins { tx_pins { pinmux = , , @@ -156,7 +156,7 @@ }; }; - eth_sleep: eth_sleep { + eth_sleep: eth-sleep-pins { tx_pins { pinmux = , , @@ -182,14 +182,14 @@ }; }; - usb0_id_pins_float: usb0_iddig { + usb0_id_pins_float: usb0-iddig-pins { pins_iddig { pinmux = ; bias-pull-up; }; }; - usb1_id_pins_float: usb1_iddig { + usb1_id_pins_float: usb1-iddig-pins { pins_iddig { pinmux = ; bias-pull-up; diff --git a/arch/arm64/boot/dts/mediatek/mt2712e.dtsi b/arch/arm64/boot/dts/mediatek/mt2712e.dtsi index 6d218caa198cf..082672efba0a3 100644 --- a/arch/arm64/boot/dts/mediatek/mt2712e.dtsi +++ b/arch/arm64/boot/dts/mediatek/mt2712e.dtsi @@ -249,10 +249,11 @@ #clock-cells = <1>; }; - infracfg: syscon@10001000 { + infracfg: clock-controller@10001000 { compatible = "mediatek,mt2712-infracfg", "syscon"; reg = <0 0x10001000 0 0x1000>; #clock-cells = <1>; + #reset-cells = <1>; }; pericfg: syscon@10003000 { From 229087f6f1dc2d0c38feba805770f28529980ec0 Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Thu, 4 Apr 2024 15:03:44 -0700 Subject: [PATCH 069/606] bpf, kconfig: Fix DEBUG_INFO_BTF_MODULES Kconfig definition Turns out that due to CONFIG_DEBUG_INFO_BTF_MODULES not having an explicitly specified "menu item name" in Kconfig, it's basically impossible to turn it off (see [0]). This patch fixes the issue by defining menu name for CONFIG_DEBUG_INFO_BTF_MODULES, which makes it actually adjustable and independent of CONFIG_DEBUG_INFO_BTF, in the sense that one can have DEBUG_INFO_BTF=y and DEBUG_INFO_BTF_MODULES=n. We still keep it as defaulting to Y, of course. Fixes: 5f9ae91f7c0d ("kbuild: Build kernel module BTFs if BTF is enabled and pahole supports it") Reported-by: Vincent Li Signed-off-by: Andrii Nakryiko Signed-off-by: Daniel Borkmann Link: https://lore.kernel.org/bpf/CAK3+h2xiFfzQ9UXf56nrRRP=p1+iUxGoEP5B+aq9MDT5jLXDSg@mail.gmail.com [0] Link: https://lore.kernel.org/bpf/20240404220344.3879270-1-andrii@kernel.org --- lib/Kconfig.debug | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug index c63a5fbf1f1c2..291185f54ee4c 100644 --- a/lib/Kconfig.debug +++ b/lib/Kconfig.debug @@ -375,7 +375,7 @@ config DEBUG_INFO_SPLIT Incompatible with older versions of ccache. config DEBUG_INFO_BTF - bool "Generate BTF typeinfo" + bool "Generate BTF type information" depends on !DEBUG_INFO_SPLIT && !DEBUG_INFO_REDUCED depends on !GCC_PLUGIN_RANDSTRUCT || COMPILE_TEST depends on BPF_SYSCALL @@ -408,7 +408,8 @@ config PAHOLE_HAS_LANG_EXCLUDE using DEBUG_INFO_BTF_MODULES. config DEBUG_INFO_BTF_MODULES - def_bool y + bool "Generate BTF type information for kernel modules" + default y depends on DEBUG_INFO_BTF && MODULES && PAHOLE_HAS_SPLIT_BTF help Generate compact split BTF type information for kernel modules. From cfddb048040b598fa7df0e51ca361289fc7abf28 Mon Sep 17 00:00:00 2001 From: Puranjay Mohan Date: Fri, 5 Apr 2024 13:23:37 +0000 Subject: [PATCH 070/606] MAINTAINERS: Update email address for Puranjay Mohan I would like to use the kernel.org address for kernel development from now on. Signed-off-by: Puranjay Mohan Signed-off-by: Daniel Borkmann Link: https://lore.kernel.org/bpf/20240405132337.71950-1-puranjay@kernel.org --- .mailmap | 1 + MAINTAINERS | 6 +++--- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/.mailmap b/.mailmap index 59c9a841bf714..b6930c2f68f47 100644 --- a/.mailmap +++ b/.mailmap @@ -496,6 +496,7 @@ Praveen BP Pradeep Kumar Chitrapu Prasad Sodagudi Punit Agrawal +Puranjay Mohan Qais Yousef Qais Yousef Quentin Monnet diff --git a/MAINTAINERS b/MAINTAINERS index 75381386fe4c3..0c08b44bde17b 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -553,7 +553,7 @@ F: Documentation/devicetree/bindings/iio/accel/adi,adxl345.yaml F: drivers/input/misc/adxl34x.c ADXL355 THREE-AXIS DIGITAL ACCELEROMETER DRIVER -M: Puranjay Mohan +M: Puranjay Mohan L: linux-iio@vger.kernel.org S: Supported F: Documentation/devicetree/bindings/iio/accel/adi,adxl355.yaml @@ -3714,7 +3714,7 @@ F: drivers/iio/imu/bmi323/ BPF JIT for ARM M: Russell King -M: Puranjay Mohan +M: Puranjay Mohan L: bpf@vger.kernel.org S: Maintained F: arch/arm/net/ @@ -21934,7 +21934,7 @@ F: include/linux/soc/ti/ti_sci_inta_msi.h F: include/linux/soc/ti/ti_sci_protocol.h TEXAS INSTRUMENTS' TMP117 TEMPERATURE SENSOR DRIVER -M: Puranjay Mohan +M: Puranjay Mohan L: linux-iio@vger.kernel.org S: Supported F: Documentation/devicetree/bindings/iio/temperature/ti,tmp117.yaml From e4308bc22b9d46cf33165c9dfaeebcf29cd56f04 Mon Sep 17 00:00:00 2001 From: Mikhail Kobuk Date: Thu, 21 Mar 2024 19:47:30 +0300 Subject: [PATCH 071/606] phy: marvell: a3700-comphy: Fix out of bounds read There is an out of bounds read access of 'gbe_phy_init_fix[fix_idx].addr' every iteration after 'fix_idx' reaches 'ARRAY_SIZE(gbe_phy_init_fix)'. Make sure 'gbe_phy_init[addr]' is used when all elements of 'gbe_phy_init_fix' array are handled. Found by Linux Verification Center (linuxtesting.org) with SVACE. Fixes: 934337080c6c ("phy: marvell: phy-mvebu-a3700-comphy: Add native kernel implementation") Signed-off-by: Mikhail Kobuk Reviewed-by: Miquel Raynal Link: https://lore.kernel.org/r/20240321164734.49273-1-m.kobuk@ispras.ru Signed-off-by: Vinod Koul --- drivers/phy/marvell/phy-mvebu-a3700-comphy.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/drivers/phy/marvell/phy-mvebu-a3700-comphy.c b/drivers/phy/marvell/phy-mvebu-a3700-comphy.c index 41162d7228c91..68710ad1ad70a 100644 --- a/drivers/phy/marvell/phy-mvebu-a3700-comphy.c +++ b/drivers/phy/marvell/phy-mvebu-a3700-comphy.c @@ -611,11 +611,12 @@ static void comphy_gbe_phy_init(struct mvebu_a3700_comphy_lane *lane, * comparison to 3.125 Gbps values. These register values are * stored in "gbe_phy_init_fix" array. */ - if (!is_1gbps && gbe_phy_init_fix[fix_idx].addr == addr) { + if (!is_1gbps && + fix_idx < ARRAY_SIZE(gbe_phy_init_fix) && + gbe_phy_init_fix[fix_idx].addr == addr) { /* Use new value */ val = gbe_phy_init_fix[fix_idx].value; - if (fix_idx < ARRAY_SIZE(gbe_phy_init_fix)) - fix_idx++; + fix_idx++; } else { val = gbe_phy_init[addr]; } From 627207703b73615653eea5ab7a841d5b478d961e Mon Sep 17 00:00:00 2001 From: Mikhail Kobuk Date: Thu, 21 Mar 2024 19:47:31 +0300 Subject: [PATCH 072/606] phy: marvell: a3700-comphy: Fix hardcoded array size Replace hardcoded 'gbe_phy_init' array size by explicit one. Fixes: 934337080c6c ("phy: marvell: phy-mvebu-a3700-comphy: Add native kernel implementation") Signed-off-by: Mikhail Kobuk Link: https://lore.kernel.org/r/20240321164734.49273-2-m.kobuk@ispras.ru Signed-off-by: Vinod Koul --- drivers/phy/marvell/phy-mvebu-a3700-comphy.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/phy/marvell/phy-mvebu-a3700-comphy.c b/drivers/phy/marvell/phy-mvebu-a3700-comphy.c index 68710ad1ad70a..1d1db1737422a 100644 --- a/drivers/phy/marvell/phy-mvebu-a3700-comphy.c +++ b/drivers/phy/marvell/phy-mvebu-a3700-comphy.c @@ -603,7 +603,7 @@ static void comphy_gbe_phy_init(struct mvebu_a3700_comphy_lane *lane, u16 val; fix_idx = 0; - for (addr = 0; addr < 512; addr++) { + for (addr = 0; addr < ARRAY_SIZE(gbe_phy_init); addr++) { /* * All PHY register values are defined in full for 3.125Gbps * SERDES speed. The values required for 1.25 Gbps are almost From 76cd338994778c552c51086fc056819b5cdda2e7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bj=C3=B6rn=20T=C3=B6pel?= Date: Fri, 5 Apr 2024 14:33:51 +0200 Subject: [PATCH 073/606] MAINTAINERS: bpf: Add Lehui and Puranjay as riscv64 reviewers MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Lehui and Puranjay have been active RISC-V 64-bit BPF JIT contributors/reviewers for a long time! Let's make it more official by adding them as reviewers in MAINTAINERS. Thank you for your hard work! Signed-off-by: Björn Töpel Link: https://lore.kernel.org/r/20240405123352.2852393-1-bjorn@kernel.org Signed-off-by: Alexei Starovoitov --- MAINTAINERS | 2 ++ 1 file changed, 2 insertions(+) diff --git a/MAINTAINERS b/MAINTAINERS index 0c08b44bde17b..65e8351f978e2 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -3764,6 +3764,8 @@ X: arch/riscv/net/bpf_jit_comp64.c BPF JIT for RISC-V (64-bit) M: Björn Töpel +R: Pu Lehui +R: Puranjay Mohan L: bpf@vger.kernel.org S: Maintained F: arch/riscv/net/ From 3a161017f1de55cc48be81f6156004c151f32677 Mon Sep 17 00:00:00 2001 From: Marcel Ziswiler Date: Fri, 22 Mar 2024 14:06:32 +0100 Subject: [PATCH 074/606] phy: freescale: imx8m-pcie: fix pcie link-up instability Leaving AUX_PLL_REFCLK_SEL at its reset default of AUX_IN (PLL clock) proves to be more stable on the i.MX 8M Mini. Fixes: 1aa97b002258 ("phy: freescale: pcie: Initialize the imx8 pcie standalone phy driver") Signed-off-by: Marcel Ziswiler Reviewed-by: Richard Zhu Link: https://lore.kernel.org/r/20240322130646.1016630-2-marcel@ziswiler.com Signed-off-by: Vinod Koul --- drivers/phy/freescale/phy-fsl-imx8m-pcie.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/phy/freescale/phy-fsl-imx8m-pcie.c b/drivers/phy/freescale/phy-fsl-imx8m-pcie.c index b700f52b7b679..11fcb1867118c 100644 --- a/drivers/phy/freescale/phy-fsl-imx8m-pcie.c +++ b/drivers/phy/freescale/phy-fsl-imx8m-pcie.c @@ -110,8 +110,10 @@ static int imx8_pcie_phy_power_on(struct phy *phy) /* Source clock from SoC internal PLL */ writel(ANA_PLL_CLK_OUT_TO_EXT_IO_SEL, imx8_phy->base + IMX8MM_PCIE_PHY_CMN_REG062); - writel(AUX_PLL_REFCLK_SEL_SYS_PLL, - imx8_phy->base + IMX8MM_PCIE_PHY_CMN_REG063); + if (imx8_phy->drvdata->variant != IMX8MM) { + writel(AUX_PLL_REFCLK_SEL_SYS_PLL, + imx8_phy->base + IMX8MM_PCIE_PHY_CMN_REG063); + } val = ANA_AUX_RX_TX_SEL_TX | ANA_AUX_TX_TERM; writel(val | ANA_AUX_RX_TERM_GND_EN, imx8_phy->base + IMX8MM_PCIE_PHY_CMN_REG064); From f8020dfb311d2b6cf657668792aaa5fa8863a7dd Mon Sep 17 00:00:00 2001 From: Michal Tomek Date: Thu, 4 Apr 2024 19:11:26 +0200 Subject: [PATCH 075/606] phy: rockchip-snps-pcie3: fix bifurcation on rk3588 So far all RK3588 boards use fully aggregated PCIe. CM3588 is one of the few boards using this feature and apparently it is broken. The PHY offers the following mapping options: port 0 lane 0 - always mapped to controller 0 (4L) port 0 lane 1 - to controller 0 or 2 (1L0) port 1 lane 0 - to controller 0 or 1 (2L) port 1 lane 1 - to controller 0, 1 or 3 (1L1) The data-lanes DT property maps these as follows: 0 = no controller (unsupported by the HW) 1 = 4L 2 = 2L 3 = 1L0 4 = 1L1 That allows the following configurations with first column being the mainline data-lane mapping, second column being the downstream name, third column being PCIE3PHY_GRF_CMN_CON0 and PHP_GRF_PCIESEL register values and final column being the user visible lane setup: <1 1 1 1> = AGGREG = [4 0] = x4 (aggregation) <1 1 2 2> = NANBNB = [0 0] = x2 x2 (no bif.) <1 3 2 2> = NANBBI = [1 1] = x2 x1x1 (bif. of port 0) <1 1 2 4> = NABINB = [2 2] = x1x1 x2 (bif. of port 1) <1 3 2 4> = NABIBI = [3 3] = x1x1 x1x1 (bif. of both ports) The driver currently does not program PHP_GRF_PCIESEL correctly, which is fixed by this patch. As a side-effect the new logic is much simpler than the old logic. Fixes: 2e9bffc4f713 ("phy: rockchip: Support PCIe v3") Signed-off-by: Michal Tomek Signed-off-by: Sebastian Reichel Acked-by: Heiko Stuebner Link: https://lore.kernel.org/r/20240404-rk3588-pcie-bifurcation-fixes-v1-1-9907136eeafd@kernel.org Signed-off-by: Vinod Koul --- .../phy/rockchip/phy-rockchip-snps-pcie3.c | 24 +++++++------------ 1 file changed, 8 insertions(+), 16 deletions(-) diff --git a/drivers/phy/rockchip/phy-rockchip-snps-pcie3.c b/drivers/phy/rockchip/phy-rockchip-snps-pcie3.c index 121e5961ce114..d5bcc9c42b284 100644 --- a/drivers/phy/rockchip/phy-rockchip-snps-pcie3.c +++ b/drivers/phy/rockchip/phy-rockchip-snps-pcie3.c @@ -132,7 +132,7 @@ static const struct rockchip_p3phy_ops rk3568_ops = { static int rockchip_p3phy_rk3588_init(struct rockchip_p3phy_priv *priv) { u32 reg = 0; - u8 mode = 0; + u8 mode = RK3588_LANE_AGGREGATION; /* default */ int ret; /* Deassert PCIe PMA output clamp mode */ @@ -140,28 +140,20 @@ static int rockchip_p3phy_rk3588_init(struct rockchip_p3phy_priv *priv) /* Set bifurcation if needed */ for (int i = 0; i < priv->num_lanes; i++) { - if (!priv->lanes[i]) - mode |= (BIT(i) << 3); - if (priv->lanes[i] > 1) - mode |= (BIT(i) >> 1); - } - - if (!mode) - reg = RK3588_LANE_AGGREGATION; - else { - if (mode & (BIT(0) | BIT(1))) - reg |= RK3588_BIFURCATION_LANE_0_1; - - if (mode & (BIT(2) | BIT(3))) - reg |= RK3588_BIFURCATION_LANE_2_3; + mode &= ~RK3588_LANE_AGGREGATION; + if (priv->lanes[i] == 3) + mode |= RK3588_BIFURCATION_LANE_0_1; + if (priv->lanes[i] == 4) + mode |= RK3588_BIFURCATION_LANE_2_3; } + reg = mode; regmap_write(priv->phy_grf, RK3588_PCIE3PHY_GRF_CMN_CON0, (0x7<<16) | reg); /* Set pcie1ln_sel in PHP_GRF_PCIESEL_CON */ if (!IS_ERR(priv->pipe_grf)) { - reg = (mode & (BIT(6) | BIT(7))) >> 6; + reg = mode & 3; if (reg) regmap_write(priv->pipe_grf, PHP_GRF_PCIESEL_CON, (reg << 16) | reg); From 55491a5fa163bf15158f34f3650b3985f25622b9 Mon Sep 17 00:00:00 2001 From: Sebastian Reichel Date: Thu, 4 Apr 2024 19:11:27 +0200 Subject: [PATCH 076/606] phy: rockchip-snps-pcie3: fix clearing PHP_GRF_PCIESEL_CON bits Currently the PCIe v3 PHY driver only sets the pcie1ln_sel bits, but does not clear them because of an incorrect write mask. This fixes up the issue by using a newly introduced constant for the write mask. While at it also introduces a proper GENMASK based constant for the PCIE30_PHY_MODE. Fixes: 2e9bffc4f713 ("phy: rockchip: Support PCIe v3") Signed-off-by: Sebastian Reichel Reviewed-by: Heiko Stuebner Link: https://lore.kernel.org/r/20240404-rk3588-pcie-bifurcation-fixes-v1-2-9907136eeafd@kernel.org Signed-off-by: Vinod Koul --- drivers/phy/rockchip/phy-rockchip-snps-pcie3.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/drivers/phy/rockchip/phy-rockchip-snps-pcie3.c b/drivers/phy/rockchip/phy-rockchip-snps-pcie3.c index d5bcc9c42b284..9857ee45b89e0 100644 --- a/drivers/phy/rockchip/phy-rockchip-snps-pcie3.c +++ b/drivers/phy/rockchip/phy-rockchip-snps-pcie3.c @@ -40,6 +40,8 @@ #define RK3588_BIFURCATION_LANE_0_1 BIT(0) #define RK3588_BIFURCATION_LANE_2_3 BIT(1) #define RK3588_LANE_AGGREGATION BIT(2) +#define RK3588_PCIE1LN_SEL_EN (GENMASK(1, 0) << 16) +#define RK3588_PCIE30_PHY_MODE_EN (GENMASK(2, 0) << 16) struct rockchip_p3phy_ops; @@ -149,14 +151,15 @@ static int rockchip_p3phy_rk3588_init(struct rockchip_p3phy_priv *priv) } reg = mode; - regmap_write(priv->phy_grf, RK3588_PCIE3PHY_GRF_CMN_CON0, (0x7<<16) | reg); + regmap_write(priv->phy_grf, RK3588_PCIE3PHY_GRF_CMN_CON0, + RK3588_PCIE30_PHY_MODE_EN | reg); /* Set pcie1ln_sel in PHP_GRF_PCIESEL_CON */ if (!IS_ERR(priv->pipe_grf)) { - reg = mode & 3; + reg = mode & (RK3588_BIFURCATION_LANE_0_1 | RK3588_BIFURCATION_LANE_2_3); if (reg) regmap_write(priv->pipe_grf, PHP_GRF_PCIESEL_CON, - (reg << 16) | reg); + RK3588_PCIE1LN_SEL_EN | reg); } reset_control_deassert(priv->p30phy); From d16d4002fea69b6609b852dd8db1f5844c02fbe4 Mon Sep 17 00:00:00 2001 From: Sebastian Reichel Date: Thu, 4 Apr 2024 19:11:28 +0200 Subject: [PATCH 077/606] phy: rockchip: naneng-combphy: Fix mux on rk3588 The pcie1l0_sel and pcie1l1_sel bits in PCIESEL_CON configure the mux for PCIe1L0 and PCIe1L1 to either the PIPE Combo PHYs or the PCIe3 PHY. Thus this configuration interfers with the data-lanes configuration done by the PCIe3 PHY. RK3588 has three Combo PHYs. The first one has a dedicated PCIe controller and is not affected by this. For the other two Combo PHYs, there is one mux for each of them. pcie1l0_sel selects if PCIe 1L0 is muxed to Combo PHY 1 when bit is set to 0 or to the PCIe3 PHY when bit is set to 1. pcie1l1_sel selects if PCIe 1L1 is muxed to Combo PHY 2 when bit is set to 0 or to the PCIe3 PHY when bit is set to 1. Currently the code always muxes 1L0 and 1L1 to the Combi PHYs once one of them is being used in PCIe mode. This is obviously wrong when at least one of the ports should be muxed to the PCIe3 PHY. Fix this by introducing Combo PHY identification and then only setting up the required bit. Fixes: a03c44277253 ("phy: rockchip: Add naneng combo phy support for RK3588") Reported-by: Michal Tomek Signed-off-by: Sebastian Reichel Reviewed-by: Heiko Stuebner Link: https://lore.kernel.org/r/20240404-rk3588-pcie-bifurcation-fixes-v1-3-9907136eeafd@kernel.org Signed-off-by: Vinod Koul --- .../rockchip/phy-rockchip-naneng-combphy.c | 36 +++++++++++++++++-- 1 file changed, 33 insertions(+), 3 deletions(-) diff --git a/drivers/phy/rockchip/phy-rockchip-naneng-combphy.c b/drivers/phy/rockchip/phy-rockchip-naneng-combphy.c index 76b9cf417591d..bf74e429ff46e 100644 --- a/drivers/phy/rockchip/phy-rockchip-naneng-combphy.c +++ b/drivers/phy/rockchip/phy-rockchip-naneng-combphy.c @@ -125,12 +125,15 @@ struct rockchip_combphy_grfcfg { }; struct rockchip_combphy_cfg { + unsigned int num_phys; + unsigned int phy_ids[3]; const struct rockchip_combphy_grfcfg *grfcfg; int (*combphy_cfg)(struct rockchip_combphy_priv *priv); }; struct rockchip_combphy_priv { u8 type; + int id; void __iomem *mmio; int num_clks; struct clk_bulk_data *clks; @@ -320,7 +323,7 @@ static int rockchip_combphy_probe(struct platform_device *pdev) struct rockchip_combphy_priv *priv; const struct rockchip_combphy_cfg *phy_cfg; struct resource *res; - int ret; + int ret, id; phy_cfg = of_device_get_match_data(dev); if (!phy_cfg) { @@ -338,6 +341,15 @@ static int rockchip_combphy_probe(struct platform_device *pdev) return ret; } + /* find the phy-id from the io address */ + priv->id = -ENODEV; + for (id = 0; id < phy_cfg->num_phys; id++) { + if (res->start == phy_cfg->phy_ids[id]) { + priv->id = id; + break; + } + } + priv->dev = dev; priv->type = PHY_NONE; priv->cfg = phy_cfg; @@ -562,6 +574,12 @@ static const struct rockchip_combphy_grfcfg rk3568_combphy_grfcfgs = { }; static const struct rockchip_combphy_cfg rk3568_combphy_cfgs = { + .num_phys = 3, + .phy_ids = { + 0xfe820000, + 0xfe830000, + 0xfe840000, + }, .grfcfg = &rk3568_combphy_grfcfgs, .combphy_cfg = rk3568_combphy_cfg, }; @@ -578,8 +596,14 @@ static int rk3588_combphy_cfg(struct rockchip_combphy_priv *priv) rockchip_combphy_param_write(priv->phy_grf, &cfg->con1_for_pcie, true); rockchip_combphy_param_write(priv->phy_grf, &cfg->con2_for_pcie, true); rockchip_combphy_param_write(priv->phy_grf, &cfg->con3_for_pcie, true); - rockchip_combphy_param_write(priv->pipe_grf, &cfg->pipe_pcie1l0_sel, true); - rockchip_combphy_param_write(priv->pipe_grf, &cfg->pipe_pcie1l1_sel, true); + switch (priv->id) { + case 1: + rockchip_combphy_param_write(priv->pipe_grf, &cfg->pipe_pcie1l0_sel, true); + break; + case 2: + rockchip_combphy_param_write(priv->pipe_grf, &cfg->pipe_pcie1l1_sel, true); + break; + } break; case PHY_TYPE_USB3: /* Set SSC downward spread spectrum */ @@ -736,6 +760,12 @@ static const struct rockchip_combphy_grfcfg rk3588_combphy_grfcfgs = { }; static const struct rockchip_combphy_cfg rk3588_combphy_cfgs = { + .num_phys = 3, + .phy_ids = { + 0xfee00000, + 0xfee10000, + 0xfee20000, + }, .grfcfg = &rk3588_combphy_grfcfgs, .combphy_cfg = rk3588_combphy_cfg, }; From 5abed58a8bde6d349bde364a160510b5bb904d18 Mon Sep 17 00:00:00 2001 From: Stephen Boyd Date: Thu, 4 Apr 2024 16:43:44 -0700 Subject: [PATCH 078/606] phy: qcom: qmp-combo: Fix VCO div offset on v3 Commit ec17373aebd0 ("phy: qcom: qmp-combo: extract common function to setup clocks") changed the offset that is used to write to DP_PHY_VCO_DIV from QSERDES_V3_DP_PHY_VCO_DIV to QSERDES_V4_DP_PHY_VCO_DIV. Unfortunately, this offset is different between v3 and v4 phys: #define QSERDES_V3_DP_PHY_VCO_DIV 0x064 #define QSERDES_V4_DP_PHY_VCO_DIV 0x070 meaning that we write the wrong register on v3 phys now. Add another generic register to 'regs' and use it here instead of a version specific define to fix this. This was discovered after Abhinav looked over register dumps with me from sc7180 Trogdor devices that started failing to light up the external display with v6.6 based kernels. It turns out that some monitors are very specific about their link clk frequency and if the default power on reset value is still there the monitor will show a blank screen or a garbled display. Other monitors are perfectly happy to get a bad clock signal. Cc: Douglas Anderson Cc: Abhinav Kumar Cc: Dmitry Baryshkov Fixes: ec17373aebd0 ("phy: qcom: qmp-combo: extract common function to setup clocks") Signed-off-by: Stephen Boyd Reviewed-by: Abhinav Kumar Reviewed-by: Dmitry Baryshkov Link: https://lore.kernel.org/r/20240404234345.1446300-1-swboyd@chromium.org Signed-off-by: Vinod Koul --- drivers/phy/qualcomm/phy-qcom-qmp-combo.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/phy/qualcomm/phy-qcom-qmp-combo.c b/drivers/phy/qualcomm/phy-qcom-qmp-combo.c index 7d585a4bbbba9..3b19d8ebf4673 100644 --- a/drivers/phy/qualcomm/phy-qcom-qmp-combo.c +++ b/drivers/phy/qualcomm/phy-qcom-qmp-combo.c @@ -77,6 +77,7 @@ enum qphy_reg_layout { QPHY_COM_BIAS_EN_CLKBUFLR_EN, QPHY_DP_PHY_STATUS, + QPHY_DP_PHY_VCO_DIV, QPHY_TX_TX_POL_INV, QPHY_TX_TX_DRV_LVL, @@ -102,6 +103,7 @@ static const unsigned int qmp_v3_usb3phy_regs_layout[QPHY_LAYOUT_SIZE] = { [QPHY_COM_BIAS_EN_CLKBUFLR_EN] = QSERDES_V3_COM_BIAS_EN_CLKBUFLR_EN, [QPHY_DP_PHY_STATUS] = QSERDES_V3_DP_PHY_STATUS, + [QPHY_DP_PHY_VCO_DIV] = QSERDES_V3_DP_PHY_VCO_DIV, [QPHY_TX_TX_POL_INV] = QSERDES_V3_TX_TX_POL_INV, [QPHY_TX_TX_DRV_LVL] = QSERDES_V3_TX_TX_DRV_LVL, @@ -126,6 +128,7 @@ static const unsigned int qmp_v45_usb3phy_regs_layout[QPHY_LAYOUT_SIZE] = { [QPHY_COM_BIAS_EN_CLKBUFLR_EN] = QSERDES_V4_COM_BIAS_EN_CLKBUFLR_EN, [QPHY_DP_PHY_STATUS] = QSERDES_V4_DP_PHY_STATUS, + [QPHY_DP_PHY_VCO_DIV] = QSERDES_V4_DP_PHY_VCO_DIV, [QPHY_TX_TX_POL_INV] = QSERDES_V4_TX_TX_POL_INV, [QPHY_TX_TX_DRV_LVL] = QSERDES_V4_TX_TX_DRV_LVL, @@ -2162,6 +2165,7 @@ static int qmp_combo_configure_dp_clocks(struct qmp_combo *qmp) const struct phy_configure_opts_dp *dp_opts = &qmp->dp_opts; u32 phy_vco_div; unsigned long pixel_freq; + const struct qmp_phy_cfg *cfg = qmp->cfg; switch (dp_opts->link_rate) { case 1620: @@ -2184,7 +2188,7 @@ static int qmp_combo_configure_dp_clocks(struct qmp_combo *qmp) /* Other link rates aren't supported */ return -EINVAL; } - writel(phy_vco_div, qmp->dp_dp_phy + QSERDES_V4_DP_PHY_VCO_DIV); + writel(phy_vco_div, qmp->dp_dp_phy + cfg->regs[QPHY_DP_PHY_VCO_DIV]); clk_set_rate(qmp->dp_link_hw.clk, dp_opts->link_rate * 100000); clk_set_rate(qmp->dp_pixel_hw.clk, pixel_freq); From ee13e1f3c72b9464a4d73017c060ab503eed653a Mon Sep 17 00:00:00 2001 From: Stephen Boyd Date: Thu, 4 Apr 2024 17:01:03 -0700 Subject: [PATCH 079/606] phy: qcom: qmp-combo: Fix register base for QSERDES_DP_PHY_MODE The register base that was used to write to the QSERDES_DP_PHY_MODE register was 'dp_dp_phy' before commit 815891eee668 ("phy: qcom-qmp-combo: Introduce orientation variable"). There isn't any explanation in the commit why this is changed, so I suspect it was an oversight or happened while being extracted from some other series. Oddly the value being 0x4c or 0x5c doesn't seem to matter for me, so I suspect this is dead code, but that can be fixed in another patch. It's not good to write to the wrong register space, and maybe some other version of this phy relies on this. Cc: Douglas Anderson Cc: Abhinav Kumar Cc: Dmitry Baryshkov Cc: Neil Armstrong Cc: Abel Vesa Cc: Steev Klimaszewski Cc: Johan Hovold Cc: Bjorn Andersson Cc: stable@vger.kernel.org # 6.5 Fixes: 815891eee668 ("phy: qcom-qmp-combo: Introduce orientation variable") Signed-off-by: Stephen Boyd Reviewed-by: Abhinav Kumar Reviewed-by: Bjorn Andersson Reviewed-by: Dmitry Baryshkov Reviewed-by: Johan Hovold Link: https://lore.kernel.org/r/20240405000111.1450598-1-swboyd@chromium.org Signed-off-by: Vinod Koul --- drivers/phy/qualcomm/phy-qcom-qmp-combo.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/phy/qualcomm/phy-qcom-qmp-combo.c b/drivers/phy/qualcomm/phy-qcom-qmp-combo.c index 3b19d8ebf4673..2a6f70b3e25ff 100644 --- a/drivers/phy/qualcomm/phy-qcom-qmp-combo.c +++ b/drivers/phy/qualcomm/phy-qcom-qmp-combo.c @@ -2153,9 +2153,9 @@ static bool qmp_combo_configure_dp_mode(struct qmp_combo *qmp) writel(val, qmp->dp_dp_phy + QSERDES_DP_PHY_PD_CTL); if (reverse) - writel(0x4c, qmp->pcs + QSERDES_DP_PHY_MODE); + writel(0x4c, qmp->dp_dp_phy + QSERDES_DP_PHY_MODE); else - writel(0x5c, qmp->pcs + QSERDES_DP_PHY_MODE); + writel(0x5c, qmp->dp_dp_phy + QSERDES_DP_PHY_MODE); return reverse; } From 74a72baf204fd509bbe8b53eec35e39869d94341 Mon Sep 17 00:00:00 2001 From: Ramona Gradinariu Date: Fri, 5 Apr 2024 07:53:09 +0300 Subject: [PATCH 080/606] iio:imu: adis16475: Fix sync mode setting Fix sync mode setting by applying the necessary shift bits. Fixes: fff7352bf7a3 ("iio: imu: Add support for adis16475") Signed-off-by: Ramona Gradinariu Reviewed-by: Nuno Sa Link: https://lore.kernel.org/r/20240405045309.816328-2-ramona.bolboaca13@gmail.com Cc: Signed-off-by: Jonathan Cameron --- drivers/iio/imu/adis16475.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/iio/imu/adis16475.c b/drivers/iio/imu/adis16475.c index 01f55cc902faa..060a21c70460d 100644 --- a/drivers/iio/imu/adis16475.c +++ b/drivers/iio/imu/adis16475.c @@ -1289,6 +1289,7 @@ static int adis16475_config_sync_mode(struct adis16475 *st) struct device *dev = &st->adis.spi->dev; const struct adis16475_sync *sync; u32 sync_mode; + u16 val; /* default to internal clk */ st->clk_freq = st->info->int_clk * 1000; @@ -1350,8 +1351,9 @@ static int adis16475_config_sync_mode(struct adis16475 *st) * I'm keeping this for simplicity and avoiding extra variables * in chip_info. */ + val = ADIS16475_SYNC_MODE(sync->sync_mode); ret = __adis_update_bits(&st->adis, ADIS16475_REG_MSG_CTRL, - ADIS16475_SYNC_MODE_MASK, sync->sync_mode); + ADIS16475_SYNC_MODE_MASK, val); if (ret) return ret; From 47b3e2f3914ae5e8d9025d65ae5cffcbb54bc9c3 Mon Sep 17 00:00:00 2001 From: Gabor Juhos Date: Sat, 6 Apr 2024 15:37:09 +0200 Subject: [PATCH 081/606] phy: qcom: m31: match requested regulator name with dt schema According to the 'qcom,ipq5332-usb-hsphy.yaml' schema, the 5V supply regulator must be defined via the 'vdd-supply' property. The driver however requests for the 'vdda-phy' regulator which results in the following message when the driver is probed on a IPQ5018 based board with a device tree matching to the schema: qcom-m31usb-phy 5b000.phy: supply vdda-phy not found, using dummy regulator qcom-m31usb-phy 5b000.phy: Registered M31 USB phy This means that the regulator specified in the device tree never gets enabled. Change the driver to use the 'vdd' name for the regulator as per defined in the schema in order to ensure that the corresponding regulator gets enabled. Fixes: 08e49af50701 ("phy: qcom: Introduce M31 USB PHY driver") Reviewed-by: Varadarajan Narayanan Signed-off-by: Gabor Juhos Reviewed-by: Dmitry Baryshkov Link: https://lore.kernel.org/r/20240406-phy-qcom-m31-regulator-fix-v2-1-c8e9795bc071@gmail.com Signed-off-by: Vinod Koul --- drivers/phy/qualcomm/phy-qcom-m31.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/phy/qualcomm/phy-qcom-m31.c b/drivers/phy/qualcomm/phy-qcom-m31.c index 03fb0d4b75d74..20d4c020a83c1 100644 --- a/drivers/phy/qualcomm/phy-qcom-m31.c +++ b/drivers/phy/qualcomm/phy-qcom-m31.c @@ -297,7 +297,7 @@ static int m31usb_phy_probe(struct platform_device *pdev) return dev_err_probe(dev, PTR_ERR(qphy->phy), "failed to create phy\n"); - qphy->vreg = devm_regulator_get(dev, "vdda-phy"); + qphy->vreg = devm_regulator_get(dev, "vdd"); if (IS_ERR(qphy->vreg)) return dev_err_probe(dev, PTR_ERR(qphy->vreg), "failed to get vreg\n"); From d5638de827cff0fce77007e426ec0ffdedf68a44 Mon Sep 17 00:00:00 2001 From: Rex Zhang Date: Thu, 4 Apr 2024 15:39:49 -0700 Subject: [PATCH 082/606] dmaengine: idxd: Convert spinlock to mutex to lock evl workqueue drain_workqueue() cannot be called safely in a spinlocked context due to possible task rescheduling. In the multi-task scenario, calling queue_work() while drain_workqueue() will lead to a Call Trace as pushing a work on a draining workqueue is not permitted in spinlocked context. Call Trace: ? __warn+0x7d/0x140 ? __queue_work+0x2b2/0x440 ? report_bug+0x1f8/0x200 ? handle_bug+0x3c/0x70 ? exc_invalid_op+0x18/0x70 ? asm_exc_invalid_op+0x1a/0x20 ? __queue_work+0x2b2/0x440 queue_work_on+0x28/0x30 idxd_misc_thread+0x303/0x5a0 [idxd] ? __schedule+0x369/0xb40 ? __pfx_irq_thread_fn+0x10/0x10 ? irq_thread+0xbc/0x1b0 irq_thread_fn+0x21/0x70 irq_thread+0x102/0x1b0 ? preempt_count_add+0x74/0xa0 ? __pfx_irq_thread_dtor+0x10/0x10 ? __pfx_irq_thread+0x10/0x10 kthread+0x103/0x140 ? __pfx_kthread+0x10/0x10 ret_from_fork+0x31/0x50 ? __pfx_kthread+0x10/0x10 ret_from_fork_asm+0x1b/0x30 The current implementation uses a spinlock to protect event log workqueue and will lead to the Call Trace due to potential task rescheduling. To address the locking issue, convert the spinlock to mutex, allowing the drain_workqueue() to be called in a safe mutex-locked context. This change ensures proper synchronization when accessing the event log workqueue, preventing potential Call Trace and improving the overall robustness of the code. Fixes: c40bd7d9737b ("dmaengine: idxd: process user page faults for completion record") Signed-off-by: Rex Zhang Reviewed-by: Dave Jiang Reviewed-by: Fenghua Yu Reviewed-by: Lijun Pan Link: https://lore.kernel.org/r/20240404223949.2885604-1-fenghua.yu@intel.com Signed-off-by: Vinod Koul --- drivers/dma/idxd/cdev.c | 5 ++--- drivers/dma/idxd/debugfs.c | 4 ++-- drivers/dma/idxd/device.c | 8 ++++---- drivers/dma/idxd/idxd.h | 2 +- drivers/dma/idxd/init.c | 2 +- drivers/dma/idxd/irq.c | 4 ++-- 6 files changed, 12 insertions(+), 13 deletions(-) diff --git a/drivers/dma/idxd/cdev.c b/drivers/dma/idxd/cdev.c index 8078ab9acfbc3..c095a2c8f6595 100644 --- a/drivers/dma/idxd/cdev.c +++ b/drivers/dma/idxd/cdev.c @@ -342,7 +342,7 @@ static void idxd_cdev_evl_drain_pasid(struct idxd_wq *wq, u32 pasid) if (!evl) return; - spin_lock(&evl->lock); + mutex_lock(&evl->lock); status.bits = ioread64(idxd->reg_base + IDXD_EVLSTATUS_OFFSET); t = status.tail; h = status.head; @@ -354,9 +354,8 @@ static void idxd_cdev_evl_drain_pasid(struct idxd_wq *wq, u32 pasid) set_bit(h, evl->bmap); h = (h + 1) % size; } - spin_unlock(&evl->lock); - drain_workqueue(wq->wq); + mutex_unlock(&evl->lock); } static int idxd_cdev_release(struct inode *node, struct file *filep) diff --git a/drivers/dma/idxd/debugfs.c b/drivers/dma/idxd/debugfs.c index f3f25ee676f30..ad4245cb301d5 100644 --- a/drivers/dma/idxd/debugfs.c +++ b/drivers/dma/idxd/debugfs.c @@ -66,7 +66,7 @@ static int debugfs_evl_show(struct seq_file *s, void *d) if (!evl || !evl->log) return 0; - spin_lock(&evl->lock); + mutex_lock(&evl->lock); evl_status.bits = ioread64(idxd->reg_base + IDXD_EVLSTATUS_OFFSET); t = evl_status.tail; @@ -87,7 +87,7 @@ static int debugfs_evl_show(struct seq_file *s, void *d) dump_event_entry(idxd, s, i, &count, processed); } - spin_unlock(&evl->lock); + mutex_unlock(&evl->lock); return 0; } diff --git a/drivers/dma/idxd/device.c b/drivers/dma/idxd/device.c index ecfdf4a8f1f83..c41ef195eeb9f 100644 --- a/drivers/dma/idxd/device.c +++ b/drivers/dma/idxd/device.c @@ -775,7 +775,7 @@ static int idxd_device_evl_setup(struct idxd_device *idxd) goto err_alloc; } - spin_lock(&evl->lock); + mutex_lock(&evl->lock); evl->log = addr; evl->dma = dma_addr; evl->log_size = size; @@ -796,7 +796,7 @@ static int idxd_device_evl_setup(struct idxd_device *idxd) gencfg.evl_en = 1; iowrite32(gencfg.bits, idxd->reg_base + IDXD_GENCFG_OFFSET); - spin_unlock(&evl->lock); + mutex_unlock(&evl->lock); return 0; err_alloc: @@ -819,7 +819,7 @@ static void idxd_device_evl_free(struct idxd_device *idxd) if (!gencfg.evl_en) return; - spin_lock(&evl->lock); + mutex_lock(&evl->lock); gencfg.evl_en = 0; iowrite32(gencfg.bits, idxd->reg_base + IDXD_GENCFG_OFFSET); @@ -836,7 +836,7 @@ static void idxd_device_evl_free(struct idxd_device *idxd) evl_dma = evl->dma; evl->log = NULL; evl->size = IDXD_EVL_SIZE_MIN; - spin_unlock(&evl->lock); + mutex_unlock(&evl->lock); dma_free_coherent(dev, evl_log_size, evl_log, evl_dma); } diff --git a/drivers/dma/idxd/idxd.h b/drivers/dma/idxd/idxd.h index a4099a1e2340f..7b98944135eb4 100644 --- a/drivers/dma/idxd/idxd.h +++ b/drivers/dma/idxd/idxd.h @@ -293,7 +293,7 @@ struct idxd_driver_data { struct idxd_evl { /* Lock to protect event log access. */ - spinlock_t lock; + struct mutex lock; void *log; dma_addr_t dma; /* Total size of event log = number of entries * entry size. */ diff --git a/drivers/dma/idxd/init.c b/drivers/dma/idxd/init.c index 4954adc6bb609..264c4e47d7cca 100644 --- a/drivers/dma/idxd/init.c +++ b/drivers/dma/idxd/init.c @@ -354,7 +354,7 @@ static int idxd_init_evl(struct idxd_device *idxd) if (!evl) return -ENOMEM; - spin_lock_init(&evl->lock); + mutex_init(&evl->lock); evl->size = IDXD_EVL_SIZE_MIN; idxd_name = dev_name(idxd_confdev(idxd)); diff --git a/drivers/dma/idxd/irq.c b/drivers/dma/idxd/irq.c index 348aa21389a9f..8dc029c865515 100644 --- a/drivers/dma/idxd/irq.c +++ b/drivers/dma/idxd/irq.c @@ -363,7 +363,7 @@ static void process_evl_entries(struct idxd_device *idxd) evl_status.bits = 0; evl_status.int_pending = 1; - spin_lock(&evl->lock); + mutex_lock(&evl->lock); /* Clear interrupt pending bit */ iowrite32(evl_status.bits_upper32, idxd->reg_base + IDXD_EVLSTATUS_OFFSET + sizeof(u32)); @@ -380,7 +380,7 @@ static void process_evl_entries(struct idxd_device *idxd) evl_status.head = h; iowrite32(evl_status.bits_lower32, idxd->reg_base + IDXD_EVLSTATUS_OFFSET); - spin_unlock(&evl->lock); + mutex_unlock(&evl->lock); } irqreturn_t idxd_misc_thread(int vec, void *data) From 244296cc3a155199a8b080d19e645d7d49081a38 Mon Sep 17 00:00:00 2001 From: Sean Anderson Date: Fri, 8 Mar 2024 16:00:32 -0500 Subject: [PATCH 083/606] dma: xilinx_dpdma: Fix locking There are several places where either chan->lock or chan->vchan.lock was not held. Add appropriate locking. This fixes lockdep warnings like [ 31.077578] ------------[ cut here ]------------ [ 31.077831] WARNING: CPU: 2 PID: 40 at drivers/dma/xilinx/xilinx_dpdma.c:834 xilinx_dpdma_chan_queue_transfer+0x274/0x5e0 [ 31.077953] Modules linked in: [ 31.078019] CPU: 2 PID: 40 Comm: kworker/u12:1 Not tainted 6.6.20+ #98 [ 31.078102] Hardware name: xlnx,zynqmp (DT) [ 31.078169] Workqueue: events_unbound deferred_probe_work_func [ 31.078272] pstate: 600000c5 (nZCv daIF -PAN -UAO -TCO -DIT -SSBS BTYPE=--) [ 31.078377] pc : xilinx_dpdma_chan_queue_transfer+0x274/0x5e0 [ 31.078473] lr : xilinx_dpdma_chan_queue_transfer+0x270/0x5e0 [ 31.078550] sp : ffffffc083bb2e10 [ 31.078590] x29: ffffffc083bb2e10 x28: 0000000000000000 x27: ffffff880165a168 [ 31.078754] x26: ffffff880164e920 x25: ffffff880164eab8 x24: ffffff880164d480 [ 31.078920] x23: ffffff880165a148 x22: ffffff880164e988 x21: 0000000000000000 [ 31.079132] x20: ffffffc082aa3000 x19: ffffff880164e880 x18: 0000000000000000 [ 31.079295] x17: 0000000000000000 x16: 0000000000000000 x15: 0000000000000000 [ 31.079453] x14: 0000000000000000 x13: ffffff8802263dc0 x12: 0000000000000001 [ 31.079613] x11: 0001ffc083bb2e34 x10: 0001ff880164e98f x9 : 0001ffc082aa3def [ 31.079824] x8 : 0001ffc082aa3dec x7 : 0000000000000000 x6 : 0000000000000516 [ 31.079982] x5 : ffffffc7f8d43000 x4 : ffffff88003c9c40 x3 : ffffffffffffffff [ 31.080147] x2 : ffffffc7f8d43000 x1 : 00000000000000c0 x0 : 0000000000000000 [ 31.080307] Call trace: [ 31.080340] xilinx_dpdma_chan_queue_transfer+0x274/0x5e0 [ 31.080518] xilinx_dpdma_issue_pending+0x11c/0x120 [ 31.080595] zynqmp_disp_layer_update+0x180/0x3ac [ 31.080712] zynqmp_dpsub_plane_atomic_update+0x11c/0x21c [ 31.080825] drm_atomic_helper_commit_planes+0x20c/0x684 [ 31.080951] drm_atomic_helper_commit_tail+0x5c/0xb0 [ 31.081139] commit_tail+0x234/0x294 [ 31.081246] drm_atomic_helper_commit+0x1f8/0x210 [ 31.081363] drm_atomic_commit+0x100/0x140 [ 31.081477] drm_client_modeset_commit_atomic+0x318/0x384 [ 31.081634] drm_client_modeset_commit_locked+0x8c/0x24c [ 31.081725] drm_client_modeset_commit+0x34/0x5c [ 31.081812] __drm_fb_helper_restore_fbdev_mode_unlocked+0x104/0x168 [ 31.081899] drm_fb_helper_set_par+0x50/0x70 [ 31.081971] fbcon_init+0x538/0xc48 [ 31.082047] visual_init+0x16c/0x23c [ 31.082207] do_bind_con_driver.isra.0+0x2d0/0x634 [ 31.082320] do_take_over_console+0x24c/0x33c [ 31.082429] do_fbcon_takeover+0xbc/0x1b0 [ 31.082503] fbcon_fb_registered+0x2d0/0x34c [ 31.082663] register_framebuffer+0x27c/0x38c [ 31.082767] __drm_fb_helper_initial_config_and_unlock+0x5c0/0x91c [ 31.082939] drm_fb_helper_initial_config+0x50/0x74 [ 31.083012] drm_fbdev_dma_client_hotplug+0xb8/0x108 [ 31.083115] drm_client_register+0xa0/0xf4 [ 31.083195] drm_fbdev_dma_setup+0xb0/0x1cc [ 31.083293] zynqmp_dpsub_drm_init+0x45c/0x4e0 [ 31.083431] zynqmp_dpsub_probe+0x444/0x5e0 [ 31.083616] platform_probe+0x8c/0x13c [ 31.083713] really_probe+0x258/0x59c [ 31.083793] __driver_probe_device+0xc4/0x224 [ 31.083878] driver_probe_device+0x70/0x1c0 [ 31.083961] __device_attach_driver+0x108/0x1e0 [ 31.084052] bus_for_each_drv+0x9c/0x100 [ 31.084125] __device_attach+0x100/0x298 [ 31.084207] device_initial_probe+0x14/0x20 [ 31.084292] bus_probe_device+0xd8/0xdc [ 31.084368] deferred_probe_work_func+0x11c/0x180 [ 31.084451] process_one_work+0x3ac/0x988 [ 31.084643] worker_thread+0x398/0x694 [ 31.084752] kthread+0x1bc/0x1c0 [ 31.084848] ret_from_fork+0x10/0x20 [ 31.084932] irq event stamp: 64549 [ 31.084970] hardirqs last enabled at (64548): [] _raw_spin_unlock_irqrestore+0x80/0x90 [ 31.085157] hardirqs last disabled at (64549): [] _raw_spin_lock_irqsave+0xc0/0xdc [ 31.085277] softirqs last enabled at (64503): [] __do_softirq+0x47c/0x500 [ 31.085390] softirqs last disabled at (64498): [] ____do_softirq+0x10/0x1c [ 31.085501] ---[ end trace 0000000000000000 ]--- Fixes: 7cbb0c63de3f ("dmaengine: xilinx: dpdma: Add the Xilinx DisplayPort DMA engine driver") Signed-off-by: Sean Anderson Reviewed-by: Tomi Valkeinen Link: https://lore.kernel.org/r/20240308210034.3634938-2-sean.anderson@linux.dev Signed-off-by: Vinod Koul --- drivers/dma/xilinx/xilinx_dpdma.c | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/drivers/dma/xilinx/xilinx_dpdma.c b/drivers/dma/xilinx/xilinx_dpdma.c index b82815e64d24e..eb0637d90342a 100644 --- a/drivers/dma/xilinx/xilinx_dpdma.c +++ b/drivers/dma/xilinx/xilinx_dpdma.c @@ -214,7 +214,8 @@ struct xilinx_dpdma_tx_desc { * @running: true if the channel is running * @first_frame: flag for the first frame of stream * @video_group: flag if multi-channel operation is needed for video channels - * @lock: lock to access struct xilinx_dpdma_chan + * @lock: lock to access struct xilinx_dpdma_chan. Must be taken before + * @vchan.lock, if both are to be held. * @desc_pool: descriptor allocation pool * @err_task: error IRQ bottom half handler * @desc: References to descriptors being processed @@ -1097,12 +1098,14 @@ static void xilinx_dpdma_chan_vsync_irq(struct xilinx_dpdma_chan *chan) * Complete the active descriptor, if any, promote the pending * descriptor to active, and queue the next transfer, if any. */ + spin_lock(&chan->vchan.lock); if (chan->desc.active) vchan_cookie_complete(&chan->desc.active->vdesc); chan->desc.active = pending; chan->desc.pending = NULL; xilinx_dpdma_chan_queue_transfer(chan); + spin_unlock(&chan->vchan.lock); out: spin_unlock_irqrestore(&chan->lock, flags); @@ -1264,10 +1267,12 @@ static void xilinx_dpdma_issue_pending(struct dma_chan *dchan) struct xilinx_dpdma_chan *chan = to_xilinx_chan(dchan); unsigned long flags; - spin_lock_irqsave(&chan->vchan.lock, flags); + spin_lock_irqsave(&chan->lock, flags); + spin_lock(&chan->vchan.lock); if (vchan_issue_pending(&chan->vchan)) xilinx_dpdma_chan_queue_transfer(chan); - spin_unlock_irqrestore(&chan->vchan.lock, flags); + spin_unlock(&chan->vchan.lock); + spin_unlock_irqrestore(&chan->lock, flags); } static int xilinx_dpdma_config(struct dma_chan *dchan, @@ -1495,7 +1500,9 @@ static void xilinx_dpdma_chan_err_task(struct tasklet_struct *t) XILINX_DPDMA_EINTR_CHAN_ERR_MASK << chan->id); spin_lock_irqsave(&chan->lock, flags); + spin_lock(&chan->vchan.lock); xilinx_dpdma_chan_queue_transfer(chan); + spin_unlock(&chan->vchan.lock); spin_unlock_irqrestore(&chan->lock, flags); } From 5b9706bfc094314c600ab810a61208a7cbaa4cb3 Mon Sep 17 00:00:00 2001 From: Miquel Raynal Date: Wed, 27 Mar 2024 10:58:48 +0100 Subject: [PATCH 084/606] dmaengine: xilinx: xdma: Fix wrong offsets in the buffers addresses in dma descriptor The addition of interleaved transfers slightly changed the way addresses inside DMA descriptors are derived, breaking cyclic transfers. Fixes: 3e184e64c2e5 ("dmaengine: xilinx: xdma: Prepare the introduction of interleaved DMA transfers") Cc: stable@vger.kernel.org Signed-off-by: Miquel Raynal Signed-off-by: Louis Chauvet Link: https://lore.kernel.org/r/20240327-digigram-xdma-fixes-v1-1-45f4a52c0283@bootlin.com Signed-off-by: Vinod Koul --- drivers/dma/xilinx/xdma.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/dma/xilinx/xdma.c b/drivers/dma/xilinx/xdma.c index 170017ff2aad6..b9788aa8f6b7b 100644 --- a/drivers/dma/xilinx/xdma.c +++ b/drivers/dma/xilinx/xdma.c @@ -704,7 +704,7 @@ xdma_prep_dma_cyclic(struct dma_chan *chan, dma_addr_t address, desc_num = 0; for (i = 0; i < periods; i++) { desc_num += xdma_fill_descs(sw_desc, *src, *dst, period_size, desc_num); - addr += i * period_size; + addr += period_size; } tx_desc = vchan_tx_prep(&xdma_chan->vchan, &sw_desc->vdesc, flags); From 6a40fb8245965b481b4dcce011cd63f20bf91ee0 Mon Sep 17 00:00:00 2001 From: Louis Chauvet Date: Wed, 27 Mar 2024 10:58:49 +0100 Subject: [PATCH 085/606] dmaengine: xilinx: xdma: Fix synchronization issue The current xdma_synchronize method does not properly wait for the last transfer to be done. Due to limitations of the XMDA engine, it is not possible to stop a transfer in the middle of a descriptor. Said otherwise, if a stop is requested at the end of descriptor "N" and the OS is fast enough, the DMA controller will effectively stop immediately. However, if the OS is slightly too slow to request the stop and the DMA engine starts descriptor "N+1", the N+1 transfer will be performed until its end. This means that after a terminate_all, the last descriptor must remain valid and the synchronization must wait for this last descriptor to be terminated. Fixes: 855c2e1d1842 ("dmaengine: xilinx: xdma: Rework xdma_terminate_all()") Fixes: f5c392d106e7 ("dmaengine: xilinx: xdma: Add terminate_all/synchronize callbacks") Cc: stable@vger.kernel.org Suggested-by: Miquel Raynal Signed-off-by: Louis Chauvet Link: https://lore.kernel.org/r/20240327-digigram-xdma-fixes-v1-2-45f4a52c0283@bootlin.com Signed-off-by: Vinod Koul --- drivers/dma/xilinx/xdma-regs.h | 3 +++ drivers/dma/xilinx/xdma.c | 26 ++++++++++++++++++-------- 2 files changed, 21 insertions(+), 8 deletions(-) diff --git a/drivers/dma/xilinx/xdma-regs.h b/drivers/dma/xilinx/xdma-regs.h index 98f5f6fb9ff9c..6ad08878e9386 100644 --- a/drivers/dma/xilinx/xdma-regs.h +++ b/drivers/dma/xilinx/xdma-regs.h @@ -117,6 +117,9 @@ struct xdma_hw_desc { CHAN_CTRL_IE_WRITE_ERROR | \ CHAN_CTRL_IE_DESC_ERROR) +/* bits of the channel status register */ +#define XDMA_CHAN_STATUS_BUSY BIT(0) + #define XDMA_CHAN_STATUS_MASK CHAN_CTRL_START #define XDMA_CHAN_ERROR_MASK (CHAN_CTRL_IE_DESC_ALIGN_MISMATCH | \ diff --git a/drivers/dma/xilinx/xdma.c b/drivers/dma/xilinx/xdma.c index b9788aa8f6b7b..5a3a3293b21b5 100644 --- a/drivers/dma/xilinx/xdma.c +++ b/drivers/dma/xilinx/xdma.c @@ -71,6 +71,8 @@ struct xdma_chan { enum dma_transfer_direction dir; struct dma_slave_config cfg; u32 irq; + struct completion last_interrupt; + bool stop_requested; }; /** @@ -376,6 +378,8 @@ static int xdma_xfer_start(struct xdma_chan *xchan) return ret; xchan->busy = true; + xchan->stop_requested = false; + reinit_completion(&xchan->last_interrupt); return 0; } @@ -387,7 +391,6 @@ static int xdma_xfer_start(struct xdma_chan *xchan) static int xdma_xfer_stop(struct xdma_chan *xchan) { int ret; - u32 val; struct xdma_device *xdev = xchan->xdev_hdl; /* clear run stop bit to prevent any further auto-triggering */ @@ -395,13 +398,7 @@ static int xdma_xfer_stop(struct xdma_chan *xchan) CHAN_CTRL_RUN_STOP); if (ret) return ret; - - /* Clear the channel status register */ - ret = regmap_read(xdev->rmap, xchan->base + XDMA_CHAN_STATUS_RC, &val); - if (ret) - return ret; - - return 0; + return ret; } /** @@ -474,6 +471,8 @@ static int xdma_alloc_channels(struct xdma_device *xdev, xchan->xdev_hdl = xdev; xchan->base = base + i * XDMA_CHAN_STRIDE; xchan->dir = dir; + xchan->stop_requested = false; + init_completion(&xchan->last_interrupt); ret = xdma_channel_init(xchan); if (ret) @@ -521,6 +520,7 @@ static int xdma_terminate_all(struct dma_chan *chan) spin_lock_irqsave(&xdma_chan->vchan.lock, flags); xdma_chan->busy = false; + xdma_chan->stop_requested = true; vd = vchan_next_desc(&xdma_chan->vchan); if (vd) { list_del(&vd->node); @@ -542,6 +542,13 @@ static int xdma_terminate_all(struct dma_chan *chan) static void xdma_synchronize(struct dma_chan *chan) { struct xdma_chan *xdma_chan = to_xdma_chan(chan); + struct xdma_device *xdev = xdma_chan->xdev_hdl; + int st = 0; + + /* If the engine continues running, wait for the last interrupt */ + regmap_read(xdev->rmap, xdma_chan->base + XDMA_CHAN_STATUS, &st); + if (st & XDMA_CHAN_STATUS_BUSY) + wait_for_completion_timeout(&xdma_chan->last_interrupt, msecs_to_jiffies(1000)); vchan_synchronize(&xdma_chan->vchan); } @@ -876,6 +883,9 @@ static irqreturn_t xdma_channel_isr(int irq, void *dev_id) u32 st; bool repeat_tx; + if (xchan->stop_requested) + complete(&xchan->last_interrupt); + spin_lock(&xchan->vchan.lock); /* get submitted request */ From 7a71c6dc21d5ae83ab27c39a67845d6d23ac271f Mon Sep 17 00:00:00 2001 From: Miquel Raynal Date: Wed, 27 Mar 2024 10:58:50 +0100 Subject: [PATCH 086/606] dmaengine: xilinx: xdma: Clarify kdoc in XDMA driver Clarify the kernel doc of xdma_fill_descs(), especially how big chunks will be handled. Signed-off-by: Miquel Raynal Signed-off-by: Louis Chauvet Link: https://lore.kernel.org/stable/20240327-digigram-xdma-fixes-v1-3-45f4a52c0283%40bootlin.com Link: https://lore.kernel.org/r/20240327-digigram-xdma-fixes-v1-3-45f4a52c0283@bootlin.com Signed-off-by: Vinod Koul --- drivers/dma/xilinx/xdma.c | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/drivers/dma/xilinx/xdma.c b/drivers/dma/xilinx/xdma.c index 5a3a3293b21b5..313b217388fe9 100644 --- a/drivers/dma/xilinx/xdma.c +++ b/drivers/dma/xilinx/xdma.c @@ -554,12 +554,14 @@ static void xdma_synchronize(struct dma_chan *chan) } /** - * xdma_fill_descs - Fill hardware descriptors with contiguous memory block addresses - * @sw_desc: tx descriptor state container - * @src_addr: Value for a ->src_addr field of a first descriptor - * @dst_addr: Value for a ->dst_addr field of a first descriptor - * @size: Total size of a contiguous memory block - * @filled_descs_num: Number of filled hardware descriptors for corresponding sw_desc + * xdma_fill_descs() - Fill hardware descriptors for one contiguous memory chunk. + * More than one descriptor will be used if the size is bigger + * than XDMA_DESC_BLEN_MAX. + * @sw_desc: Descriptor container + * @src_addr: First value for the ->src_addr field + * @dst_addr: First value for the ->dst_addr field + * @size: Size of the contiguous memory block + * @filled_descs_num: Index of the first descriptor to take care of in @sw_desc */ static inline u32 xdma_fill_descs(struct xdma_desc *sw_desc, u64 src_addr, u64 dst_addr, u32 size, u32 filled_descs_num) From f221033f5c24659dc6ad7e5cf18fb1b075f4a8be Mon Sep 17 00:00:00 2001 From: Fenghua Yu Date: Wed, 13 Mar 2024 14:40:31 -0700 Subject: [PATCH 087/606] dmaengine: idxd: Fix oops during rmmod on single-CPU platforms During the removal of the idxd driver, registered offline callback is invoked as part of the clean up process. However, on systems with only one CPU online, no valid target is available to migrate the perf context, resulting in a kernel oops: BUG: unable to handle page fault for address: 000000000002a2b8 #PF: supervisor write access in kernel mode #PF: error_code(0x0002) - not-present page PGD 1470e1067 P4D 0 Oops: 0002 [#1] PREEMPT SMP NOPTI CPU: 0 PID: 20 Comm: cpuhp/0 Not tainted 6.8.0-rc6-dsa+ #57 Hardware name: Intel Corporation AvenueCity/AvenueCity, BIOS BHSDCRB1.86B.2492.D03.2307181620 07/18/2023 RIP: 0010:mutex_lock+0x2e/0x50 ... Call Trace: __die+0x24/0x70 page_fault_oops+0x82/0x160 do_user_addr_fault+0x65/0x6b0 __pfx___rdmsr_safe_on_cpu+0x10/0x10 exc_page_fault+0x7d/0x170 asm_exc_page_fault+0x26/0x30 mutex_lock+0x2e/0x50 mutex_lock+0x1e/0x50 perf_pmu_migrate_context+0x87/0x1f0 perf_event_cpu_offline+0x76/0x90 [idxd] cpuhp_invoke_callback+0xa2/0x4f0 __pfx_perf_event_cpu_offline+0x10/0x10 [idxd] cpuhp_thread_fun+0x98/0x150 smpboot_thread_fn+0x27/0x260 smpboot_thread_fn+0x1af/0x260 __pfx_smpboot_thread_fn+0x10/0x10 kthread+0x103/0x140 __pfx_kthread+0x10/0x10 ret_from_fork+0x31/0x50 __pfx_kthread+0x10/0x10 ret_from_fork_asm+0x1b/0x30 Fix the issue by preventing the migration of the perf context to an invalid target. Fixes: 81dd4d4d6178 ("dmaengine: idxd: Add IDXD performance monitor support") Reported-by: Terrence Xu Tested-by: Terrence Xu Signed-off-by: Fenghua Yu Link: https://lore.kernel.org/r/20240313214031.1658045-1-fenghua.yu@intel.com Signed-off-by: Vinod Koul --- drivers/dma/idxd/perfmon.c | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/drivers/dma/idxd/perfmon.c b/drivers/dma/idxd/perfmon.c index fdda6d6042629..5e94247e1ea70 100644 --- a/drivers/dma/idxd/perfmon.c +++ b/drivers/dma/idxd/perfmon.c @@ -528,14 +528,11 @@ static int perf_event_cpu_offline(unsigned int cpu, struct hlist_node *node) return 0; target = cpumask_any_but(cpu_online_mask, cpu); - /* migrate events if there is a valid target */ - if (target < nr_cpu_ids) + if (target < nr_cpu_ids) { cpumask_set_cpu(target, &perfmon_dsa_cpu_mask); - else - target = -1; - - perf_pmu_migrate_context(&idxd_pmu->pmu, cpu, target); + perf_pmu_migrate_context(&idxd_pmu->pmu, cpu, target); + } return 0; } From 6648e613226e18897231ab5e42ffc29e63fa3365 Mon Sep 17 00:00:00 2001 From: Jason Xing Date: Thu, 4 Apr 2024 10:10:01 +0800 Subject: [PATCH 088/606] bpf, skmsg: Fix NULL pointer dereference in sk_psock_skb_ingress_enqueue Fix NULL pointer data-races in sk_psock_skb_ingress_enqueue() which syzbot reported [1]. [1] BUG: KCSAN: data-race in sk_psock_drop / sk_psock_skb_ingress_enqueue write to 0xffff88814b3278b8 of 8 bytes by task 10724 on cpu 1: sk_psock_stop_verdict net/core/skmsg.c:1257 [inline] sk_psock_drop+0x13e/0x1f0 net/core/skmsg.c:843 sk_psock_put include/linux/skmsg.h:459 [inline] sock_map_close+0x1a7/0x260 net/core/sock_map.c:1648 unix_release+0x4b/0x80 net/unix/af_unix.c:1048 __sock_release net/socket.c:659 [inline] sock_close+0x68/0x150 net/socket.c:1421 __fput+0x2c1/0x660 fs/file_table.c:422 __fput_sync+0x44/0x60 fs/file_table.c:507 __do_sys_close fs/open.c:1556 [inline] __se_sys_close+0x101/0x1b0 fs/open.c:1541 __x64_sys_close+0x1f/0x30 fs/open.c:1541 do_syscall_64+0xd3/0x1d0 entry_SYSCALL_64_after_hwframe+0x6d/0x75 read to 0xffff88814b3278b8 of 8 bytes by task 10713 on cpu 0: sk_psock_data_ready include/linux/skmsg.h:464 [inline] sk_psock_skb_ingress_enqueue+0x32d/0x390 net/core/skmsg.c:555 sk_psock_skb_ingress_self+0x185/0x1e0 net/core/skmsg.c:606 sk_psock_verdict_apply net/core/skmsg.c:1008 [inline] sk_psock_verdict_recv+0x3e4/0x4a0 net/core/skmsg.c:1202 unix_read_skb net/unix/af_unix.c:2546 [inline] unix_stream_read_skb+0x9e/0xf0 net/unix/af_unix.c:2682 sk_psock_verdict_data_ready+0x77/0x220 net/core/skmsg.c:1223 unix_stream_sendmsg+0x527/0x860 net/unix/af_unix.c:2339 sock_sendmsg_nosec net/socket.c:730 [inline] __sock_sendmsg+0x140/0x180 net/socket.c:745 ____sys_sendmsg+0x312/0x410 net/socket.c:2584 ___sys_sendmsg net/socket.c:2638 [inline] __sys_sendmsg+0x1e9/0x280 net/socket.c:2667 __do_sys_sendmsg net/socket.c:2676 [inline] __se_sys_sendmsg net/socket.c:2674 [inline] __x64_sys_sendmsg+0x46/0x50 net/socket.c:2674 do_syscall_64+0xd3/0x1d0 entry_SYSCALL_64_after_hwframe+0x6d/0x75 value changed: 0xffffffff83d7feb0 -> 0x0000000000000000 Reported by Kernel Concurrency Sanitizer on: CPU: 0 PID: 10713 Comm: syz-executor.4 Tainted: G W 6.8.0-syzkaller-08951-gfe46a7dd189e #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 02/29/2024 Prior to this, commit 4cd12c6065df ("bpf, sockmap: Fix NULL pointer dereference in sk_psock_verdict_data_ready()") fixed one NULL pointer similarly due to no protection of saved_data_ready. Here is another different caller causing the same issue because of the same reason. So we should protect it with sk_callback_lock read lock because the writer side in the sk_psock_drop() uses "write_lock_bh(&sk->sk_callback_lock);". To avoid errors that could happen in future, I move those two pairs of lock into the sk_psock_data_ready(), which is suggested by John Fastabend. Fixes: 604326b41a6f ("bpf, sockmap: convert to generic sk_msg interface") Reported-by: syzbot+aa8c8ec2538929f18f2d@syzkaller.appspotmail.com Signed-off-by: Jason Xing Signed-off-by: Daniel Borkmann Reviewed-by: John Fastabend Closes: https://syzkaller.appspot.com/bug?extid=aa8c8ec2538929f18f2d Link: https://lore.kernel.org/all/20240329134037.92124-1-kerneljasonxing@gmail.com Link: https://lore.kernel.org/bpf/20240404021001.94815-1-kerneljasonxing@gmail.com --- include/linux/skmsg.h | 2 ++ net/core/skmsg.c | 5 +---- 2 files changed, 3 insertions(+), 4 deletions(-) diff --git a/include/linux/skmsg.h b/include/linux/skmsg.h index e65ec3fd27998..a509caf823d61 100644 --- a/include/linux/skmsg.h +++ b/include/linux/skmsg.h @@ -461,10 +461,12 @@ static inline void sk_psock_put(struct sock *sk, struct sk_psock *psock) static inline void sk_psock_data_ready(struct sock *sk, struct sk_psock *psock) { + read_lock_bh(&sk->sk_callback_lock); if (psock->saved_data_ready) psock->saved_data_ready(sk); else sk->sk_data_ready(sk); + read_unlock_bh(&sk->sk_callback_lock); } static inline void psock_set_prog(struct bpf_prog **pprog, diff --git a/net/core/skmsg.c b/net/core/skmsg.c index 4d75ef9d24bfa..fd20aae30be23 100644 --- a/net/core/skmsg.c +++ b/net/core/skmsg.c @@ -1226,11 +1226,8 @@ static void sk_psock_verdict_data_ready(struct sock *sk) rcu_read_lock(); psock = sk_psock(sk); - if (psock) { - read_lock_bh(&sk->sk_callback_lock); + if (psock) sk_psock_data_ready(sk, psock); - read_unlock_bh(&sk->sk_callback_lock); - } rcu_read_unlock(); } } From 7a1625c1711b526a77cb9c3acc15dbba71896a40 Mon Sep 17 00:00:00 2001 From: Cezary Rojewski Date: Mon, 8 Apr 2024 10:18:40 +0200 Subject: [PATCH 089/606] ASoC: Intel: avs: Fix debug window description Recent changes addressed PAGE_SIZE ambiguity in 2/3 locations for struct avs_icl_memwnd2. The unaddressed one causes build errors when PAGE_SIZE != SZ_4K. Reported-by: kernel test robot Closes: https://lore.kernel.org/oe-kbuild-all/202404070100.i3t3Jf7d-lkp@intel.com/ Fixes: 275b583d047a ("ASoC: Intel: avs: ICL-based platforms support") Signed-off-by: Cezary Rojewski Link: https://msgid.link/r/20240408081840.1319431-1-cezary.rojewski@intel.com Signed-off-by: Mark Brown --- sound/soc/intel/avs/icl.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sound/soc/intel/avs/icl.c b/sound/soc/intel/avs/icl.c index 9d9921e1cd4de..d2554c8577326 100644 --- a/sound/soc/intel/avs/icl.c +++ b/sound/soc/intel/avs/icl.c @@ -64,7 +64,7 @@ struct avs_icl_memwnd2_desc { struct avs_icl_memwnd2 { union { struct avs_icl_memwnd2_desc slot_desc[AVS_ICL_MEMWND2_SLOTS_COUNT]; - u8 rsvd[PAGE_SIZE]; + u8 rsvd[SZ_4K]; }; u8 slot_array[AVS_ICL_MEMWND2_SLOTS_COUNT][PAGE_SIZE]; } __packed; From 2e93a29b48a017c777d4fcbfcc51aba4e6a90d38 Mon Sep 17 00:00:00 2001 From: Sameer Pujar Date: Fri, 5 Apr 2024 10:43:06 +0000 Subject: [PATCH 090/606] ASoC: tegra: Fix DSPK 16-bit playback DSPK configuration is wrong for 16-bit playback and this happens because the client config is always fixed at 24-bit in hw_params(). Fix this by updating the client config to 16-bit for the respective playback. Fixes: 327ef6470266 ("ASoC: tegra: Add Tegra186 based DSPK driver") Cc: stable@vger.kernel.org Signed-off-by: Sameer Pujar Acked-by: Thierry Reding Link: https://msgid.link/r/20240405104306.551036-1-spujar@nvidia.com Signed-off-by: Mark Brown --- sound/soc/tegra/tegra186_dspk.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/sound/soc/tegra/tegra186_dspk.c b/sound/soc/tegra/tegra186_dspk.c index aa37c4ab0adbd..21cd41fec7a9c 100644 --- a/sound/soc/tegra/tegra186_dspk.c +++ b/sound/soc/tegra/tegra186_dspk.c @@ -1,8 +1,7 @@ // SPDX-License-Identifier: GPL-2.0-only +// SPDX-FileCopyrightText: Copyright (c) 2020-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. // // tegra186_dspk.c - Tegra186 DSPK driver -// -// Copyright (c) 2020 NVIDIA CORPORATION. All rights reserved. #include #include @@ -241,14 +240,14 @@ static int tegra186_dspk_hw_params(struct snd_pcm_substream *substream, return -EINVAL; } - cif_conf.client_bits = TEGRA_ACIF_BITS_24; - switch (params_format(params)) { case SNDRV_PCM_FORMAT_S16_LE: cif_conf.audio_bits = TEGRA_ACIF_BITS_16; + cif_conf.client_bits = TEGRA_ACIF_BITS_16; break; case SNDRV_PCM_FORMAT_S32_LE: cif_conf.audio_bits = TEGRA_ACIF_BITS_32; + cif_conf.client_bits = TEGRA_ACIF_BITS_24; break; default: dev_err(dev, "unsupported format!\n"); From e50729d742ec364895f1c389c32315984a987aa5 Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Sun, 7 Apr 2024 21:15:59 +0200 Subject: [PATCH 091/606] ASoC: Intel: bytcr_rt5640: Apply Asus T100TA quirk to Asus T100TAM too The Asus T100TA quirk has been using an exact match on a product-name of "T100TA" but there are also T100TAM variants with a slightly higher clocked CPU and a metal backside which need the same quirk. Sort the existing T100TA (stereo speakers) below the more specific T100TAF (mono speaker) quirk and switch from exact matching to substring matching so that the T100TA quirk will also match on the T100TAM models. Signed-off-by: Hans de Goede Link: https://msgid.link/r/20240407191559.21596-1-hdegoede@redhat.com Signed-off-by: Mark Brown --- sound/soc/intel/boards/bytcr_rt5640.c | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/sound/soc/intel/boards/bytcr_rt5640.c b/sound/soc/intel/boards/bytcr_rt5640.c index 05f38d1f7d824..b41a1147f1c34 100644 --- a/sound/soc/intel/boards/bytcr_rt5640.c +++ b/sound/soc/intel/boards/bytcr_rt5640.c @@ -636,28 +636,30 @@ static const struct dmi_system_id byt_rt5640_quirk_table[] = { BYT_RT5640_USE_AMCR0F28), }, { + /* Asus T100TAF, unlike other T100TA* models this one has a mono speaker */ .matches = { DMI_EXACT_MATCH(DMI_SYS_VENDOR, "ASUSTeK COMPUTER INC."), - DMI_EXACT_MATCH(DMI_PRODUCT_NAME, "T100TA"), + DMI_EXACT_MATCH(DMI_PRODUCT_NAME, "T100TAF"), }, .driver_data = (void *)(BYT_RT5640_IN1_MAP | BYT_RT5640_JD_SRC_JD2_IN4N | BYT_RT5640_OVCD_TH_2000UA | BYT_RT5640_OVCD_SF_0P75 | + BYT_RT5640_MONO_SPEAKER | + BYT_RT5640_DIFF_MIC | + BYT_RT5640_SSP0_AIF2 | BYT_RT5640_MCLK_EN), }, { + /* Asus T100TA and T100TAM, must come after T100TAF (mono spk) match */ .matches = { - DMI_EXACT_MATCH(DMI_SYS_VENDOR, "ASUSTeK COMPUTER INC."), - DMI_EXACT_MATCH(DMI_PRODUCT_NAME, "T100TAF"), + DMI_MATCH(DMI_SYS_VENDOR, "ASUSTeK COMPUTER INC."), + DMI_MATCH(DMI_PRODUCT_NAME, "T100TA"), }, .driver_data = (void *)(BYT_RT5640_IN1_MAP | BYT_RT5640_JD_SRC_JD2_IN4N | BYT_RT5640_OVCD_TH_2000UA | BYT_RT5640_OVCD_SF_0P75 | - BYT_RT5640_MONO_SPEAKER | - BYT_RT5640_DIFF_MIC | - BYT_RT5640_SSP0_AIF2 | BYT_RT5640_MCLK_EN), }, { From 7ab681ddedd4b6dd2b047c74af95221c5f827e1d Mon Sep 17 00:00:00 2001 From: Matti Vaittinen Date: Sun, 7 Apr 2024 10:35:21 +0300 Subject: [PATCH 092/606] regulator: irq_helpers: duplicate IRQ name The regulator IRQ helper requires caller to provide pointer to IRQ name which is kept in memory by caller. All other data passed to the helper in the regulator_irq_desc structure is copied. This can cause some confusion and unnecessary complexity. Make the regulator_irq_helper() to copy also the provided IRQ name information so caller can discard the name after the call to regulator_irq_helper() completes. Signed-off-by: Matti Vaittinen Link: https://msgid.link/r/ZhJMuUYwaZbBXFGP@drtxq0yyyyyyyyyyyyydy-3.rev.dnainternet.fi Signed-off-by: Mark Brown --- drivers/regulator/irq_helpers.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/regulator/irq_helpers.c b/drivers/regulator/irq_helpers.c index fe7ae0f3f46af..5ab1a0befe12f 100644 --- a/drivers/regulator/irq_helpers.c +++ b/drivers/regulator/irq_helpers.c @@ -352,6 +352,9 @@ void *regulator_irq_helper(struct device *dev, h->irq = irq; h->desc = *d; + h->desc.name = devm_kstrdup(dev, d->name, GFP_KERNEL); + if (!h->desc.name) + return ERR_PTR(-ENOMEM); ret = init_rdev_state(dev, h, rdev, common_errs, per_rdev_errs, rdev_amount); From 70ee853eec5693fefd8348a2b049d9cb83362e58 Mon Sep 17 00:00:00 2001 From: Richard Fitzgerald Date: Mon, 8 Apr 2024 11:18:00 +0100 Subject: [PATCH 093/606] regmap: Add regmap_read_bypassed() Add a regmap_read_bypassed() to allow reads from the hardware registers while the regmap is in cache-only mode. A typical use for this is to keep the cache in cache-only mode until the hardware has reached a valid state, but one or more status registers must be polled to determine when this state is reached. For example, firmware download on the cs35l56 can take several seconds if there are multiple amps sharing limited bus bandwidth. This is too long to block in probe() so it is done as a background task. The device must be soft-reset to reboot the firmware and during this time the registers are not accessible, so the cache should be in cache-only. But the driver must poll a register to detect when reboot has completed. Signed-off-by: Richard Fitzgerald Fixes: 8a731fd37f8b ("ASoC: cs35l56: Move utility functions to shared file") Link: https://msgid.link/r/20240408101803.43183-2-rf@opensource.cirrus.com Signed-off-by: Mark Brown --- drivers/base/regmap/regmap.c | 37 ++++++++++++++++++++++++++++++++++++ include/linux/regmap.h | 8 ++++++++ 2 files changed, 45 insertions(+) diff --git a/drivers/base/regmap/regmap.c b/drivers/base/regmap/regmap.c index 5cb425f6f02d4..0a34dd3c4f38d 100644 --- a/drivers/base/regmap/regmap.c +++ b/drivers/base/regmap/regmap.c @@ -2838,6 +2838,43 @@ int regmap_read(struct regmap *map, unsigned int reg, unsigned int *val) } EXPORT_SYMBOL_GPL(regmap_read); +/** + * regmap_read_bypassed() - Read a value from a single register direct + * from the device, bypassing the cache + * + * @map: Register map to read from + * @reg: Register to be read from + * @val: Pointer to store read value + * + * A value of zero will be returned on success, a negative errno will + * be returned in error cases. + */ +int regmap_read_bypassed(struct regmap *map, unsigned int reg, unsigned int *val) +{ + int ret; + bool bypass, cache_only; + + if (!IS_ALIGNED(reg, map->reg_stride)) + return -EINVAL; + + map->lock(map->lock_arg); + + bypass = map->cache_bypass; + cache_only = map->cache_only; + map->cache_bypass = true; + map->cache_only = false; + + ret = _regmap_read(map, reg, val); + + map->cache_bypass = bypass; + map->cache_only = cache_only; + + map->unlock(map->lock_arg); + + return ret; +} +EXPORT_SYMBOL_GPL(regmap_read_bypassed); + /** * regmap_raw_read() - Read raw data from the device * diff --git a/include/linux/regmap.h b/include/linux/regmap.h index b743241cfb7ca..d470303b1bbbb 100644 --- a/include/linux/regmap.h +++ b/include/linux/regmap.h @@ -1230,6 +1230,7 @@ int regmap_multi_reg_write_bypassed(struct regmap *map, int regmap_raw_write_async(struct regmap *map, unsigned int reg, const void *val, size_t val_len); int regmap_read(struct regmap *map, unsigned int reg, unsigned int *val); +int regmap_read_bypassed(struct regmap *map, unsigned int reg, unsigned int *val); int regmap_raw_read(struct regmap *map, unsigned int reg, void *val, size_t val_len); int regmap_noinc_read(struct regmap *map, unsigned int reg, @@ -1739,6 +1740,13 @@ static inline int regmap_read(struct regmap *map, unsigned int reg, return -EINVAL; } +static inline int regmap_read_bypassed(struct regmap *map, unsigned int reg, + unsigned int *val) +{ + WARN_ONCE(1, "regmap API is disabled"); + return -EINVAL; +} + static inline int regmap_raw_read(struct regmap *map, unsigned int reg, void *val, size_t val_len) { From 73580ec607dfe125b140ed30c7c0a074db78c558 Mon Sep 17 00:00:00 2001 From: Richard Fitzgerald Date: Mon, 8 Apr 2024 11:18:01 +0100 Subject: [PATCH 094/606] ALSA: hda: cs35l56: Exit cache-only after cs35l56_wait_for_firmware_boot() Adds calls to disable regmap cache-only after a successful return from cs35l56_wait_for_firmware_boot(). This is to prepare for a change in the shared ASoC module that will leave regmap in cache-only mode after cs35l56_system_reset(). This is to prevent register accesses going to the hardware while it is rebooting. Signed-off-by: Richard Fitzgerald Link: https://msgid.link/r/20240408101803.43183-3-rf@opensource.cirrus.com Signed-off-by: Mark Brown --- sound/pci/hda/cs35l56_hda.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/sound/pci/hda/cs35l56_hda.c b/sound/pci/hda/cs35l56_hda.c index 1a3f84599cb58..558c1f38fe971 100644 --- a/sound/pci/hda/cs35l56_hda.c +++ b/sound/pci/hda/cs35l56_hda.c @@ -644,6 +644,8 @@ static int cs35l56_hda_fw_load(struct cs35l56_hda *cs35l56) ret = cs35l56_wait_for_firmware_boot(&cs35l56->base); if (ret) goto err_powered_up; + + regcache_cache_only(cs35l56->base.regmap, false); } /* Disable auto-hibernate so that runtime_pm has control */ @@ -1002,6 +1004,8 @@ int cs35l56_hda_common_probe(struct cs35l56_hda *cs35l56, int hid, int id) if (ret) goto err; + regcache_cache_only(cs35l56->base.regmap, false); + ret = cs35l56_set_patch(&cs35l56->base); if (ret) goto err; From d4884fd48a44f3d7f0d4d7399b663b69c000233d Mon Sep 17 00:00:00 2001 From: Richard Fitzgerald Date: Mon, 8 Apr 2024 11:18:02 +0100 Subject: [PATCH 095/606] ASoC: cs35l56: Fix unintended bus access while resetting amp Use the new regmap_read_bypassed() so that the regmap can be left in cache-only mode while it is booting, but the driver can still read boot-status and chip-id information during this time. This fixes race conditions where some writes could be issued to the silicon while it is still rebooting, before the driver has determined that the boot is complete. This is typically prevented by putting regmap into cache-only until the hardware is ready. But this assumes that the driver does not need to access device registers to determine when it is "ready". For cs35l56 this involves polling a register and the original implementation relied on having special handlers to block racing callbacks until dsp_work() is complete. However, some cases were missed, most notably the ASP DAI functions. The regmap_read_bypassed() function allows the fix for this to be simplified to putting regmap into cache-only during the reset. The initial boot stages (poll HALO_STATE and read the chip ID) are all done bypassed. Only when the amp is seen to be booted is the cache-only revoked. Changes are: - cs35l56_system_reset() now leaves the regmap in cache-only status. - cs35l56_wait_for_firmware_boot() polls using regmap_read_bypassed(). - cs35l56_init() revokes cache-only either via cs35l56_hw_init() or when firmware has rebooted after a soft reset. - cs35l56_hw_init() exits cache-only after it has determined that the amp has booted. - cs35l56_sdw_init() doesn't disable cache-only, since this must be deferred to cs35l56_init(). - cs35l56_runtime_resume_common() waits for firmware boot before exiting cache-only. These changes cover three situations where the registers are not accessible: 1) SoundWire first-time enumeration. The regmap is kept in cache-only until the chip is fully booted. The original code had to exit cache-only to read chip status in cs35l56_init() and cs35l56_hw_init() but this is now deferred to after the firmware has rebooted. In this case cs35l56_sdw_probe() leaves regmap in cache-only (unchanged behaviour) and cs35l56_hw_init() exits cache-only after the firmware is booted and the chip identified. 2) Soft reset during first-time initialization. cs35l56_init() calls cs35l56_system_reset(), which puts regmap into cache-only. On I2C/SPI cs35l56_init() then flows through to call cs35l56_wait_for_firmware_boot() and exit cache-only. On SoundWire the re-enumeration will enter cs35l56_init() again, which then drops down to call cs35l56_wait_for_firmware_boot() and exit cache-only. 3) Soft reset after firmware download. dsp_work() calls cs35l56_system_reset(), which puts regmap into cache-only. After this the flow is the same as (2). Signed-off-by: Richard Fitzgerald Fixes: 8a731fd37f8b ("ASoC: cs35l56: Move utility functions to shared file") Link: https://msgid.link/r/20240408101803.43183-4-rf@opensource.cirrus.com Signed-off-by: Mark Brown --- sound/soc/codecs/cs35l56-sdw.c | 2 -- sound/soc/codecs/cs35l56-shared.c | 20 ++++++++++++-------- sound/soc/codecs/cs35l56.c | 2 ++ 3 files changed, 14 insertions(+), 10 deletions(-) diff --git a/sound/soc/codecs/cs35l56-sdw.c b/sound/soc/codecs/cs35l56-sdw.c index 14a5f86019aad..70ff55c1517fe 100644 --- a/sound/soc/codecs/cs35l56-sdw.c +++ b/sound/soc/codecs/cs35l56-sdw.c @@ -188,8 +188,6 @@ static void cs35l56_sdw_init(struct sdw_slave *peripheral) goto out; } - regcache_cache_only(cs35l56->base.regmap, false); - ret = cs35l56_init(cs35l56); if (ret < 0) { regcache_cache_only(cs35l56->base.regmap, true); diff --git a/sound/soc/codecs/cs35l56-shared.c b/sound/soc/codecs/cs35l56-shared.c index 08cac58e3ab22..a83317db75edc 100644 --- a/sound/soc/codecs/cs35l56-shared.c +++ b/sound/soc/codecs/cs35l56-shared.c @@ -307,10 +307,10 @@ int cs35l56_wait_for_firmware_boot(struct cs35l56_base *cs35l56_base) reg = CS35L56_DSP1_HALO_STATE; /* - * This can't be a regmap_read_poll_timeout() because cs35l56 will NAK - * I2C until it has booted which would terminate the poll + * The regmap must remain in cache-only until the chip has + * booted, so use a bypassed read of the status register. */ - poll_ret = read_poll_timeout(regmap_read, read_ret, + poll_ret = read_poll_timeout(regmap_read_bypassed, read_ret, (val < 0xFFFF) && (val >= CS35L56_HALO_STATE_BOOT_DONE), CS35L56_HALO_STATE_POLL_US, CS35L56_HALO_STATE_TIMEOUT_US, @@ -362,7 +362,8 @@ void cs35l56_system_reset(struct cs35l56_base *cs35l56_base, bool is_soundwire) return; cs35l56_wait_control_port_ready(); - regcache_cache_only(cs35l56_base->regmap, false); + + /* Leave in cache-only. This will be revoked when the chip has rebooted. */ } EXPORT_SYMBOL_NS_GPL(cs35l56_system_reset, SND_SOC_CS35L56_SHARED); @@ -577,14 +578,14 @@ int cs35l56_runtime_resume_common(struct cs35l56_base *cs35l56_base, bool is_sou cs35l56_issue_wake_event(cs35l56_base); out_sync: - regcache_cache_only(cs35l56_base->regmap, false); - ret = cs35l56_wait_for_firmware_boot(cs35l56_base); if (ret) { dev_err(cs35l56_base->dev, "Hibernate wake failed: %d\n", ret); goto err; } + regcache_cache_only(cs35l56_base->regmap, false); + ret = cs35l56_mbox_send(cs35l56_base, CS35L56_MBOX_CMD_PREVENT_AUTO_HIBERNATE); if (ret) goto err; @@ -757,7 +758,7 @@ int cs35l56_hw_init(struct cs35l56_base *cs35l56_base) * devices so the REVID needs to be determined before waiting for the * firmware to boot. */ - ret = regmap_read(cs35l56_base->regmap, CS35L56_REVID, &revid); + ret = regmap_read_bypassed(cs35l56_base->regmap, CS35L56_REVID, &revid); if (ret < 0) { dev_err(cs35l56_base->dev, "Get Revision ID failed\n"); return ret; @@ -768,7 +769,7 @@ int cs35l56_hw_init(struct cs35l56_base *cs35l56_base) if (ret) return ret; - ret = regmap_read(cs35l56_base->regmap, CS35L56_DEVID, &devid); + ret = regmap_read_bypassed(cs35l56_base->regmap, CS35L56_DEVID, &devid); if (ret < 0) { dev_err(cs35l56_base->dev, "Get Device ID failed\n"); return ret; @@ -787,6 +788,9 @@ int cs35l56_hw_init(struct cs35l56_base *cs35l56_base) cs35l56_base->type = devid & 0xFF; + /* Silicon is now identified and booted so exit cache-only */ + regcache_cache_only(cs35l56_base->regmap, false); + ret = regmap_read(cs35l56_base->regmap, CS35L56_DSP_RESTRICT_STS1, &secured); if (ret) { dev_err(cs35l56_base->dev, "Get Secure status failed\n"); diff --git a/sound/soc/codecs/cs35l56.c b/sound/soc/codecs/cs35l56.c index 8d2f021fb3628..5a4e0e479414b 100644 --- a/sound/soc/codecs/cs35l56.c +++ b/sound/soc/codecs/cs35l56.c @@ -1531,6 +1531,8 @@ int cs35l56_init(struct cs35l56_private *cs35l56) return ret; dev_dbg(cs35l56->base.dev, "Firmware rebooted after soft reset\n"); + + regcache_cache_only(cs35l56->base.regmap, false); } /* Disable auto-hibernate so that runtime_pm has control */ From dfd2ffb373999630a14d7ff614440f1c2fcc704c Mon Sep 17 00:00:00 2001 From: Richard Fitzgerald Date: Mon, 8 Apr 2024 11:18:03 +0100 Subject: [PATCH 096/606] ASoC: cs35l56: Prevent overwriting firmware ASP config Only populate the ASP1 config registers in the regmap cache if the ASP DAI is used. This prevents regcache_sync() from overwriting these registers with their defaults when the firmware owns control of these registers. On a SoundWire system the ASP could be owned by the firmware to share reference audio with the firmware on other cs35l56. Or it can be used as a normal codec-codec interface owned by the driver. The driver must not overwrite the registers if the firmware has control of them. The original implementation for this in commit 07f7d6e7a124 ("ASoC: cs35l56: Fix for initializing ASP1 mixer registers") was to still provide defaults for these registers, assuming that if they were never reconfigured from defaults then regcache_sync() would not write them out because they are not dirty. Unfortunately regcache_sync() is not that smart. If the chip has not reset (so the driver has not called regcache_mark_dirty()) a regcache_sync() could write out registers that are not dirty. To avoid accidental overwriting of the ASP registers, they are removed from the table of defaults and instead are populated with defaults only if one of the ASP DAI configuration functions is called. So if the DAI has never been configured, the firmware is assumed to have ownership of these registers, and the regmap cache will not contain any entries for them. Signed-off-by: Richard Fitzgerald Fixes: 07f7d6e7a124 ("ASoC: cs35l56: Fix for initializing ASP1 mixer registers") Link: https://msgid.link/r/20240408101803.43183-5-rf@opensource.cirrus.com Signed-off-by: Mark Brown --- include/sound/cs35l56.h | 2 + sound/soc/codecs/cs35l56-shared.c | 63 ++++++++++++++++++++----------- sound/soc/codecs/cs35l56.c | 24 +++++++++++- 3 files changed, 67 insertions(+), 22 deletions(-) diff --git a/include/sound/cs35l56.h b/include/sound/cs35l56.h index e0629699b5633..1a3c6f66f6205 100644 --- a/include/sound/cs35l56.h +++ b/include/sound/cs35l56.h @@ -267,6 +267,7 @@ struct cs35l56_base { bool fw_patched; bool secured; bool can_hibernate; + bool fw_owns_asp1; bool cal_data_valid; s8 cal_index; struct cirrus_amp_cal_data cal_data; @@ -283,6 +284,7 @@ extern const char * const cs35l56_tx_input_texts[CS35L56_NUM_INPUT_SRC]; extern const unsigned int cs35l56_tx_input_values[CS35L56_NUM_INPUT_SRC]; int cs35l56_set_patch(struct cs35l56_base *cs35l56_base); +int cs35l56_init_asp1_regs_for_driver_control(struct cs35l56_base *cs35l56_base); int cs35l56_force_sync_asp1_registers_from_cache(struct cs35l56_base *cs35l56_base); int cs35l56_mbox_send(struct cs35l56_base *cs35l56_base, unsigned int command); int cs35l56_firmware_shutdown(struct cs35l56_base *cs35l56_base); diff --git a/sound/soc/codecs/cs35l56-shared.c b/sound/soc/codecs/cs35l56-shared.c index a83317db75edc..ec1d95e570615 100644 --- a/sound/soc/codecs/cs35l56-shared.c +++ b/sound/soc/codecs/cs35l56-shared.c @@ -40,16 +40,11 @@ EXPORT_SYMBOL_NS_GPL(cs35l56_set_patch, SND_SOC_CS35L56_SHARED); static const struct reg_default cs35l56_reg_defaults[] = { /* no defaults for OTP_MEM - first read populates cache */ - { CS35L56_ASP1_ENABLES1, 0x00000000 }, - { CS35L56_ASP1_CONTROL1, 0x00000028 }, - { CS35L56_ASP1_CONTROL2, 0x18180200 }, - { CS35L56_ASP1_CONTROL3, 0x00000002 }, - { CS35L56_ASP1_FRAME_CONTROL1, 0x03020100 }, - { CS35L56_ASP1_FRAME_CONTROL5, 0x00020100 }, - { CS35L56_ASP1_DATA_CONTROL1, 0x00000018 }, - { CS35L56_ASP1_DATA_CONTROL5, 0x00000018 }, - - /* no defaults for ASP1TX mixer */ + /* + * No defaults for ASP1 control or ASP1TX mixer. See + * cs35l56_populate_asp1_register_defaults() and + * cs35l56_sync_asp1_mixer_widgets_with_firmware(). + */ { CS35L56_SWIRE_DP3_CH1_INPUT, 0x00000018 }, { CS35L56_SWIRE_DP3_CH2_INPUT, 0x00000019 }, @@ -210,6 +205,36 @@ static bool cs35l56_volatile_reg(struct device *dev, unsigned int reg) } } +static const struct reg_sequence cs35l56_asp1_defaults[] = { + REG_SEQ0(CS35L56_ASP1_ENABLES1, 0x00000000), + REG_SEQ0(CS35L56_ASP1_CONTROL1, 0x00000028), + REG_SEQ0(CS35L56_ASP1_CONTROL2, 0x18180200), + REG_SEQ0(CS35L56_ASP1_CONTROL3, 0x00000002), + REG_SEQ0(CS35L56_ASP1_FRAME_CONTROL1, 0x03020100), + REG_SEQ0(CS35L56_ASP1_FRAME_CONTROL5, 0x00020100), + REG_SEQ0(CS35L56_ASP1_DATA_CONTROL1, 0x00000018), + REG_SEQ0(CS35L56_ASP1_DATA_CONTROL5, 0x00000018), +}; + +/* + * The firmware can have control of the ASP so we don't provide regmap + * with defaults for these registers, to prevent a regcache_sync() from + * overwriting the firmware settings. But if the machine driver hooks up + * the ASP it means the driver is taking control of the ASP, so then the + * registers are populated with the defaults. + */ +int cs35l56_init_asp1_regs_for_driver_control(struct cs35l56_base *cs35l56_base) +{ + if (!cs35l56_base->fw_owns_asp1) + return 0; + + cs35l56_base->fw_owns_asp1 = false; + + return regmap_multi_reg_write(cs35l56_base->regmap, cs35l56_asp1_defaults, + ARRAY_SIZE(cs35l56_asp1_defaults)); +} +EXPORT_SYMBOL_NS_GPL(cs35l56_init_asp1_regs_for_driver_control, SND_SOC_CS35L56_SHARED); + /* * The firmware boot sequence can overwrite the ASP1 config registers so that * they don't match regmap's view of their values. Rewrite the values from the @@ -217,19 +242,15 @@ static bool cs35l56_volatile_reg(struct device *dev, unsigned int reg) */ int cs35l56_force_sync_asp1_registers_from_cache(struct cs35l56_base *cs35l56_base) { - struct reg_sequence asp1_regs[] = { - { .reg = CS35L56_ASP1_ENABLES1 }, - { .reg = CS35L56_ASP1_CONTROL1 }, - { .reg = CS35L56_ASP1_CONTROL2 }, - { .reg = CS35L56_ASP1_CONTROL3 }, - { .reg = CS35L56_ASP1_FRAME_CONTROL1 }, - { .reg = CS35L56_ASP1_FRAME_CONTROL5 }, - { .reg = CS35L56_ASP1_DATA_CONTROL1 }, - { .reg = CS35L56_ASP1_DATA_CONTROL5 }, - }; + struct reg_sequence asp1_regs[ARRAY_SIZE(cs35l56_asp1_defaults)]; int i, ret; - /* Read values from regmap cache into a write sequence */ + if (cs35l56_base->fw_owns_asp1) + return 0; + + memcpy(asp1_regs, cs35l56_asp1_defaults, sizeof(asp1_regs)); + + /* Read current values from regmap cache into the write sequence */ for (i = 0; i < ARRAY_SIZE(asp1_regs); ++i) { ret = regmap_read(cs35l56_base->regmap, asp1_regs[i].reg, &asp1_regs[i].def); if (ret) diff --git a/sound/soc/codecs/cs35l56.c b/sound/soc/codecs/cs35l56.c index 5a4e0e479414b..6331b8c6136ea 100644 --- a/sound/soc/codecs/cs35l56.c +++ b/sound/soc/codecs/cs35l56.c @@ -454,9 +454,14 @@ static int cs35l56_asp_dai_set_fmt(struct snd_soc_dai *codec_dai, unsigned int f { struct cs35l56_private *cs35l56 = snd_soc_component_get_drvdata(codec_dai->component); unsigned int val; + int ret; dev_dbg(cs35l56->base.dev, "%s: %#x\n", __func__, fmt); + ret = cs35l56_init_asp1_regs_for_driver_control(&cs35l56->base); + if (ret) + return ret; + switch (fmt & SND_SOC_DAIFMT_CLOCK_PROVIDER_MASK) { case SND_SOC_DAIFMT_CBC_CFC: break; @@ -530,6 +535,11 @@ static int cs35l56_asp_dai_set_tdm_slot(struct snd_soc_dai *dai, unsigned int tx unsigned int rx_mask, int slots, int slot_width) { struct cs35l56_private *cs35l56 = snd_soc_component_get_drvdata(dai->component); + int ret; + + ret = cs35l56_init_asp1_regs_for_driver_control(&cs35l56->base); + if (ret) + return ret; if ((slots == 0) || (slot_width == 0)) { dev_dbg(cs35l56->base.dev, "tdm config cleared\n"); @@ -578,6 +588,11 @@ static int cs35l56_asp_dai_hw_params(struct snd_pcm_substream *substream, struct cs35l56_private *cs35l56 = snd_soc_component_get_drvdata(dai->component); unsigned int rate = params_rate(params); u8 asp_width, asp_wl; + int ret; + + ret = cs35l56_init_asp1_regs_for_driver_control(&cs35l56->base); + if (ret) + return ret; asp_wl = params_width(params); if (cs35l56->asp_slot_width) @@ -634,7 +649,11 @@ static int cs35l56_asp_dai_set_sysclk(struct snd_soc_dai *dai, int clk_id, unsigned int freq, int dir) { struct cs35l56_private *cs35l56 = snd_soc_component_get_drvdata(dai->component); - int freq_id; + int freq_id, ret; + + ret = cs35l56_init_asp1_regs_for_driver_control(&cs35l56->base); + if (ret) + return ret; if (freq == 0) { cs35l56->sysclk_set = false; @@ -1403,6 +1422,9 @@ int cs35l56_common_probe(struct cs35l56_private *cs35l56) cs35l56->base.cal_index = -1; cs35l56->speaker_id = -ENOENT; + /* Assume that the firmware owns ASP1 until we know different */ + cs35l56->base.fw_owns_asp1 = true; + dev_set_drvdata(cs35l56->base.dev, cs35l56); cs35l56_fill_supply_names(cs35l56->supplies); From fed6d9a8e6a60ecf6506d0ea004040fbaa109927 Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Sun, 7 Apr 2024 19:50:48 +0200 Subject: [PATCH 097/606] pinctrl: baytrail: Fix selecting gpio pinctrl state For all the "score" pin-groups all the intel_pingroup-s to select the non GPIO function are re-used for byt_score_gpio_groups[]. But this is incorrect since a pin-group includes the mode setting, which for the non GPIO functions generally is 1, where as to select the GPIO function mode must be set to 0. So the GPIO function needs separate intel_pingroup-s with their own mode value of 0. Add a new PIN_GROUP_GPIO macro which adds a foo_gpio entry to each pin-group defined this way and update byt_score_gpio_groups[] to point to the new foo_gpio entries. The "sus" usb_oc_grp usb_ulpi_grp and pcu_spi_grp pin-groups are special because these have a non 0 mode value to select the GPIO functions and these already have matching foo_gpio pin-groups, leave these are unchanged. The pmu_clk "sus" groups added in commit 2f46d7f7e959 ("pinctrl: baytrail: Add pinconf group + function for the pmu_clk") do need to use the new PIN_GROUP_GPIO macro. Fixes: 2f46d7f7e959 ("pinctrl: baytrail: Add pinconf group + function for the pmu_clk") Signed-off-by: Hans de Goede Signed-off-by: Andy Shevchenko --- drivers/pinctrl/intel/pinctrl-baytrail.c | 74 ++++++++++++------------ drivers/pinctrl/intel/pinctrl-intel.h | 4 ++ 2 files changed, 42 insertions(+), 36 deletions(-) diff --git a/drivers/pinctrl/intel/pinctrl-baytrail.c b/drivers/pinctrl/intel/pinctrl-baytrail.c index ac97724c59bae..1edb6041fb424 100644 --- a/drivers/pinctrl/intel/pinctrl-baytrail.c +++ b/drivers/pinctrl/intel/pinctrl-baytrail.c @@ -278,33 +278,33 @@ static const unsigned int byt_score_plt_clk5_pins[] = { 101 }; static const unsigned int byt_score_smbus_pins[] = { 51, 52, 53 }; static const struct intel_pingroup byt_score_groups[] = { - PIN_GROUP("uart1_grp", byt_score_uart1_pins, 1), - PIN_GROUP("uart2_grp", byt_score_uart2_pins, 1), - PIN_GROUP("pwm0_grp", byt_score_pwm0_pins, 1), - PIN_GROUP("pwm1_grp", byt_score_pwm1_pins, 1), - PIN_GROUP("ssp2_grp", byt_score_ssp2_pins, 1), - PIN_GROUP("sio_spi_grp", byt_score_sio_spi_pins, 1), - PIN_GROUP("i2c5_grp", byt_score_i2c5_pins, 1), - PIN_GROUP("i2c6_grp", byt_score_i2c6_pins, 1), - PIN_GROUP("i2c4_grp", byt_score_i2c4_pins, 1), - PIN_GROUP("i2c3_grp", byt_score_i2c3_pins, 1), - PIN_GROUP("i2c2_grp", byt_score_i2c2_pins, 1), - PIN_GROUP("i2c1_grp", byt_score_i2c1_pins, 1), - PIN_GROUP("i2c0_grp", byt_score_i2c0_pins, 1), - PIN_GROUP("ssp0_grp", byt_score_ssp0_pins, 1), - PIN_GROUP("ssp1_grp", byt_score_ssp1_pins, 1), - PIN_GROUP("sdcard_grp", byt_score_sdcard_pins, byt_score_sdcard_mux_values), - PIN_GROUP("sdio_grp", byt_score_sdio_pins, 1), - PIN_GROUP("emmc_grp", byt_score_emmc_pins, 1), - PIN_GROUP("lpc_grp", byt_score_ilb_lpc_pins, 1), - PIN_GROUP("sata_grp", byt_score_sata_pins, 1), - PIN_GROUP("plt_clk0_grp", byt_score_plt_clk0_pins, 1), - PIN_GROUP("plt_clk1_grp", byt_score_plt_clk1_pins, 1), - PIN_GROUP("plt_clk2_grp", byt_score_plt_clk2_pins, 1), - PIN_GROUP("plt_clk3_grp", byt_score_plt_clk3_pins, 1), - PIN_GROUP("plt_clk4_grp", byt_score_plt_clk4_pins, 1), - PIN_GROUP("plt_clk5_grp", byt_score_plt_clk5_pins, 1), - PIN_GROUP("smbus_grp", byt_score_smbus_pins, 1), + PIN_GROUP_GPIO("uart1_grp", byt_score_uart1_pins, 1), + PIN_GROUP_GPIO("uart2_grp", byt_score_uart2_pins, 1), + PIN_GROUP_GPIO("pwm0_grp", byt_score_pwm0_pins, 1), + PIN_GROUP_GPIO("pwm1_grp", byt_score_pwm1_pins, 1), + PIN_GROUP_GPIO("ssp2_grp", byt_score_ssp2_pins, 1), + PIN_GROUP_GPIO("sio_spi_grp", byt_score_sio_spi_pins, 1), + PIN_GROUP_GPIO("i2c5_grp", byt_score_i2c5_pins, 1), + PIN_GROUP_GPIO("i2c6_grp", byt_score_i2c6_pins, 1), + PIN_GROUP_GPIO("i2c4_grp", byt_score_i2c4_pins, 1), + PIN_GROUP_GPIO("i2c3_grp", byt_score_i2c3_pins, 1), + PIN_GROUP_GPIO("i2c2_grp", byt_score_i2c2_pins, 1), + PIN_GROUP_GPIO("i2c1_grp", byt_score_i2c1_pins, 1), + PIN_GROUP_GPIO("i2c0_grp", byt_score_i2c0_pins, 1), + PIN_GROUP_GPIO("ssp0_grp", byt_score_ssp0_pins, 1), + PIN_GROUP_GPIO("ssp1_grp", byt_score_ssp1_pins, 1), + PIN_GROUP_GPIO("sdcard_grp", byt_score_sdcard_pins, byt_score_sdcard_mux_values), + PIN_GROUP_GPIO("sdio_grp", byt_score_sdio_pins, 1), + PIN_GROUP_GPIO("emmc_grp", byt_score_emmc_pins, 1), + PIN_GROUP_GPIO("lpc_grp", byt_score_ilb_lpc_pins, 1), + PIN_GROUP_GPIO("sata_grp", byt_score_sata_pins, 1), + PIN_GROUP_GPIO("plt_clk0_grp", byt_score_plt_clk0_pins, 1), + PIN_GROUP_GPIO("plt_clk1_grp", byt_score_plt_clk1_pins, 1), + PIN_GROUP_GPIO("plt_clk2_grp", byt_score_plt_clk2_pins, 1), + PIN_GROUP_GPIO("plt_clk3_grp", byt_score_plt_clk3_pins, 1), + PIN_GROUP_GPIO("plt_clk4_grp", byt_score_plt_clk4_pins, 1), + PIN_GROUP_GPIO("plt_clk5_grp", byt_score_plt_clk5_pins, 1), + PIN_GROUP_GPIO("smbus_grp", byt_score_smbus_pins, 1), }; static const char * const byt_score_uart_groups[] = { @@ -332,12 +332,14 @@ static const char * const byt_score_plt_clk_groups[] = { }; static const char * const byt_score_smbus_groups[] = { "smbus_grp" }; static const char * const byt_score_gpio_groups[] = { - "uart1_grp", "uart2_grp", "pwm0_grp", "pwm1_grp", "ssp0_grp", - "ssp1_grp", "ssp2_grp", "sio_spi_grp", "i2c0_grp", "i2c1_grp", - "i2c2_grp", "i2c3_grp", "i2c4_grp", "i2c5_grp", "i2c6_grp", - "sdcard_grp", "sdio_grp", "emmc_grp", "lpc_grp", "sata_grp", - "plt_clk0_grp", "plt_clk1_grp", "plt_clk2_grp", "plt_clk3_grp", - "plt_clk4_grp", "plt_clk5_grp", "smbus_grp", + "uart1_grp_gpio", "uart2_grp_gpio", "pwm0_grp_gpio", + "pwm1_grp_gpio", "ssp0_grp_gpio", "ssp1_grp_gpio", "ssp2_grp_gpio", + "sio_spi_grp_gpio", "i2c0_grp_gpio", "i2c1_grp_gpio", "i2c2_grp_gpio", + "i2c3_grp_gpio", "i2c4_grp_gpio", "i2c5_grp_gpio", "i2c6_grp_gpio", + "sdcard_grp_gpio", "sdio_grp_gpio", "emmc_grp_gpio", "lpc_grp_gpio", + "sata_grp_gpio", "plt_clk0_grp_gpio", "plt_clk1_grp_gpio", + "plt_clk2_grp_gpio", "plt_clk3_grp_gpio", "plt_clk4_grp_gpio", + "plt_clk5_grp_gpio", "smbus_grp_gpio", }; static const struct intel_function byt_score_functions[] = { @@ -456,8 +458,8 @@ static const struct intel_pingroup byt_sus_groups[] = { PIN_GROUP("usb_oc_grp_gpio", byt_sus_usb_over_current_pins, byt_sus_usb_over_current_gpio_mode_values), PIN_GROUP("usb_ulpi_grp_gpio", byt_sus_usb_ulpi_pins, byt_sus_usb_ulpi_gpio_mode_values), PIN_GROUP("pcu_spi_grp_gpio", byt_sus_pcu_spi_pins, byt_sus_pcu_spi_gpio_mode_values), - PIN_GROUP("pmu_clk1_grp", byt_sus_pmu_clk1_pins, 1), - PIN_GROUP("pmu_clk2_grp", byt_sus_pmu_clk2_pins, 1), + PIN_GROUP_GPIO("pmu_clk1_grp", byt_sus_pmu_clk1_pins, 1), + PIN_GROUP_GPIO("pmu_clk2_grp", byt_sus_pmu_clk2_pins, 1), }; static const char * const byt_sus_usb_groups[] = { @@ -469,7 +471,7 @@ static const char * const byt_sus_pmu_clk_groups[] = { }; static const char * const byt_sus_gpio_groups[] = { "usb_oc_grp_gpio", "usb_ulpi_grp_gpio", "pcu_spi_grp_gpio", - "pmu_clk1_grp", "pmu_clk2_grp", + "pmu_clk1_grp_gpio", "pmu_clk2_grp_gpio", }; static const struct intel_function byt_sus_functions[] = { diff --git a/drivers/pinctrl/intel/pinctrl-intel.h b/drivers/pinctrl/intel/pinctrl-intel.h index fde65e18cd145..6981e2fab93f3 100644 --- a/drivers/pinctrl/intel/pinctrl-intel.h +++ b/drivers/pinctrl/intel/pinctrl-intel.h @@ -179,6 +179,10 @@ struct intel_community { .modes = __builtin_choose_expr(__builtin_constant_p((m)), NULL, (m)), \ } +#define PIN_GROUP_GPIO(n, p, m) \ + PIN_GROUP(n, p, m), \ + PIN_GROUP(n "_gpio", p, 0) + #define FUNCTION(n, g) \ { \ .func = PINCTRL_PINFUNCTION((n), (g), ARRAY_SIZE(g)), \ From 5d10a157ebe02ac9b8abacfd529f8b045e8aa41b Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Sun, 7 Apr 2024 19:50:49 +0200 Subject: [PATCH 098/606] pinctrl: baytrail: Add pinconf group for uart3 GPIO_S0_SC57 / GPIO_S0_SC61 can be muxed to PCU_UART_TXD / PCU_UART_RXD, add a pinconf group for this. On Bay Trail board schematics using these pins as UART these are called UART3_TXD / UART3_RXD, name the pinconf group "uart3_grp" to be consistent with the schematics. Signed-off-by: Hans de Goede Signed-off-by: Andy Shevchenko --- drivers/pinctrl/intel/pinctrl-baytrail.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/pinctrl/intel/pinctrl-baytrail.c b/drivers/pinctrl/intel/pinctrl-baytrail.c index 1edb6041fb424..4e87f5b875c0e 100644 --- a/drivers/pinctrl/intel/pinctrl-baytrail.c +++ b/drivers/pinctrl/intel/pinctrl-baytrail.c @@ -231,6 +231,7 @@ static const unsigned int byt_score_pins_map[BYT_NGPIO_SCORE] = { /* SCORE groups */ static const unsigned int byt_score_uart1_pins[] = { 70, 71, 72, 73 }; static const unsigned int byt_score_uart2_pins[] = { 74, 75, 76, 77 }; +static const unsigned int byt_score_uart3_pins[] = { 57, 61 }; static const unsigned int byt_score_pwm0_pins[] = { 94 }; static const unsigned int byt_score_pwm1_pins[] = { 95 }; @@ -280,6 +281,7 @@ static const unsigned int byt_score_smbus_pins[] = { 51, 52, 53 }; static const struct intel_pingroup byt_score_groups[] = { PIN_GROUP_GPIO("uart1_grp", byt_score_uart1_pins, 1), PIN_GROUP_GPIO("uart2_grp", byt_score_uart2_pins, 1), + PIN_GROUP_GPIO("uart3_grp", byt_score_uart3_pins, 1), PIN_GROUP_GPIO("pwm0_grp", byt_score_pwm0_pins, 1), PIN_GROUP_GPIO("pwm1_grp", byt_score_pwm1_pins, 1), PIN_GROUP_GPIO("ssp2_grp", byt_score_ssp2_pins, 1), @@ -308,7 +310,7 @@ static const struct intel_pingroup byt_score_groups[] = { }; static const char * const byt_score_uart_groups[] = { - "uart1_grp", "uart2_grp", + "uart1_grp", "uart2_grp", "uart3_grp", }; static const char * const byt_score_pwm_groups[] = { "pwm0_grp", "pwm1_grp", @@ -332,7 +334,7 @@ static const char * const byt_score_plt_clk_groups[] = { }; static const char * const byt_score_smbus_groups[] = { "smbus_grp" }; static const char * const byt_score_gpio_groups[] = { - "uart1_grp_gpio", "uart2_grp_gpio", "pwm0_grp_gpio", + "uart1_grp_gpio", "uart2_grp_gpio", "uart3_grp_gpio", "pwm0_grp_gpio", "pwm1_grp_gpio", "ssp0_grp_gpio", "ssp1_grp_gpio", "ssp2_grp_gpio", "sio_spi_grp_gpio", "i2c0_grp_gpio", "i2c1_grp_gpio", "i2c2_grp_gpio", "i2c3_grp_gpio", "i2c4_grp_gpio", "i2c5_grp_gpio", "i2c6_grp_gpio", From 7c1c73bf84c50b641449f9811e2196cdc3ca4a1b Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Tue, 26 Mar 2024 21:38:58 +0100 Subject: [PATCH 099/606] wifi: mac80211: check EHT/TTLM action frame length Check the EHT action frame length before accessing the action code, if it's not present then the frame cannot be valid. Reported-by: syzbot+75af45a00cf13243ba39@syzkaller.appspotmail.com Closes: https://lore.kernel.org/r/0000000000006c06870614886611@google.com/ Fixes: 8f500fbc6c65 ("wifi: mac80211: process and save negotiated TID to Link mapping request") Link: https://msgid.link/20240326213858.19c84f34349f.I71b439f016b28f65284bb7646fe36343b74cbc9a@changeid Signed-off-by: Johannes Berg --- net/mac80211/rx.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c index c1f8501384056..685185dc04f97 100644 --- a/net/mac80211/rx.c +++ b/net/mac80211/rx.c @@ -3780,6 +3780,10 @@ ieee80211_rx_h_action(struct ieee80211_rx_data *rx) } break; case WLAN_CATEGORY_PROTECTED_EHT: + if (len < offsetofend(typeof(*mgmt), + u.action.u.ttlm_req.action_code)) + break; + switch (mgmt->u.action.u.ttlm_req.action_code) { case WLAN_PROTECTED_EHT_ACTION_TTLM_REQ: if (sdata->vif.type != NL80211_IFTYPE_STATION) From ab9177d83c040eba58387914077ebca56f14fae6 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Tue, 26 Mar 2024 22:08:54 +0100 Subject: [PATCH 100/606] wifi: mac80211: don't use rate mask for scanning The rate mask is intended for use during operation, and can be set to only have masks for the currently active band. As such, it cannot be used for scanning which can be on other bands as well. Simply ignore the rate masks during scanning to avoid warnings from incorrect settings. Reported-by: syzbot+fdc5123366fb9c3fdc6d@syzkaller.appspotmail.com Closes: https://syzkaller.appspot.com/bug?extid=fdc5123366fb9c3fdc6d Co-developed-by: Dmitry Antipov Signed-off-by: Dmitry Antipov Tested-by: Dmitry Antipov Link: https://msgid.link/20240326220854.9594cbb418ca.I7f86c0ba1f98cf7e27c2bacf6c2d417200ecea5c@changeid Signed-off-by: Johannes Berg --- include/net/mac80211.h | 3 +++ net/mac80211/rate.c | 6 +++++- net/mac80211/scan.c | 1 + net/mac80211/tx.c | 13 +++++++++---- 4 files changed, 18 insertions(+), 5 deletions(-) diff --git a/include/net/mac80211.h b/include/net/mac80211.h index 353488ab94a29..2d7f87bc5324b 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -953,6 +953,8 @@ enum mac80211_tx_info_flags { * of their QoS TID or other priority field values. * @IEEE80211_TX_CTRL_MCAST_MLO_FIRST_TX: first MLO TX, used mostly internally * for sequence number assignment + * @IEEE80211_TX_CTRL_SCAN_TX: Indicates that this frame is transmitted + * due to scanning, not in normal operation on the interface. * @IEEE80211_TX_CTRL_MLO_LINK: If not @IEEE80211_LINK_UNSPECIFIED, this * frame should be transmitted on the specific link. This really is * only relevant for frames that do not have data present, and is @@ -973,6 +975,7 @@ enum mac80211_tx_control_flags { IEEE80211_TX_CTRL_NO_SEQNO = BIT(7), IEEE80211_TX_CTRL_DONT_REORDER = BIT(8), IEEE80211_TX_CTRL_MCAST_MLO_FIRST_TX = BIT(9), + IEEE80211_TX_CTRL_SCAN_TX = BIT(10), IEEE80211_TX_CTRL_MLO_LINK = 0xf0000000, }; diff --git a/net/mac80211/rate.c b/net/mac80211/rate.c index 23404b275457a..4dc1def695486 100644 --- a/net/mac80211/rate.c +++ b/net/mac80211/rate.c @@ -877,6 +877,7 @@ void ieee80211_get_tx_rates(struct ieee80211_vif *vif, struct ieee80211_sub_if_data *sdata; struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb); struct ieee80211_supported_band *sband; + u32 mask = ~0; rate_control_fill_sta_table(sta, info, dest, max_rates); @@ -889,9 +890,12 @@ void ieee80211_get_tx_rates(struct ieee80211_vif *vif, if (ieee80211_is_tx_data(skb)) rate_control_apply_mask(sdata, sta, sband, dest, max_rates); + if (!(info->control.flags & IEEE80211_TX_CTRL_SCAN_TX)) + mask = sdata->rc_rateidx_mask[info->band]; + if (dest[0].idx < 0) __rate_control_send_low(&sdata->local->hw, sband, sta, info, - sdata->rc_rateidx_mask[info->band]); + mask); if (sta) rate_fixup_ratelist(vif, sband, info, dest, max_rates); diff --git a/net/mac80211/scan.c b/net/mac80211/scan.c index 0429e59ba387c..73850312580f7 100644 --- a/net/mac80211/scan.c +++ b/net/mac80211/scan.c @@ -648,6 +648,7 @@ static void ieee80211_send_scan_probe_req(struct ieee80211_sub_if_data *sdata, cpu_to_le16(IEEE80211_SN_TO_SEQ(sn)); } IEEE80211_SKB_CB(skb)->flags |= tx_flags; + IEEE80211_SKB_CB(skb)->control.flags |= IEEE80211_TX_CTRL_SCAN_TX; ieee80211_tx_skb_tid_band(sdata, skb, 7, channel->band); } } diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c index 6bf223e6cd1a5..cfd0a62d0152b 100644 --- a/net/mac80211/tx.c +++ b/net/mac80211/tx.c @@ -698,11 +698,16 @@ ieee80211_tx_h_rate_ctrl(struct ieee80211_tx_data *tx) txrc.bss_conf = &tx->sdata->vif.bss_conf; txrc.skb = tx->skb; txrc.reported_rate.idx = -1; - txrc.rate_idx_mask = tx->sdata->rc_rateidx_mask[info->band]; - if (tx->sdata->rc_has_mcs_mask[info->band]) - txrc.rate_idx_mcs_mask = - tx->sdata->rc_rateidx_mcs_mask[info->band]; + if (unlikely(info->control.flags & IEEE80211_TX_CTRL_SCAN_TX)) { + txrc.rate_idx_mask = ~0; + } else { + txrc.rate_idx_mask = tx->sdata->rc_rateidx_mask[info->band]; + + if (tx->sdata->rc_has_mcs_mask[info->band]) + txrc.rate_idx_mcs_mask = + tx->sdata->rc_rateidx_mcs_mask[info->band]; + } txrc.bss = (tx->sdata->vif.type == NL80211_IFTYPE_AP || tx->sdata->vif.type == NL80211_IFTYPE_MESH_POINT || From d12b9779cc9ba29d65fbfc728eb8a037871dd331 Mon Sep 17 00:00:00 2001 From: Richard Kinder Date: Thu, 28 Mar 2024 11:57:25 +1100 Subject: [PATCH 101/606] wifi: mac80211: ensure beacon is non-S1G prior to extracting the beacon timestamp field Logic inside ieee80211_rx_mgmt_beacon accesses the mgmt->u.beacon.timestamp field without first checking whether the beacon received is non-S1G format. Fix the problem by checking the beacon is non-S1G format to avoid access of the mgmt->u.beacon.timestamp field. Signed-off-by: Richard Kinder Link: https://msgid.link/20240328005725.85355-1-richard.kinder@gmail.com Signed-off-by: Johannes Berg --- net/mac80211/mlme.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index 96b70006b7fc0..db7128f6c901e 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -6193,7 +6193,8 @@ static void ieee80211_rx_mgmt_beacon(struct ieee80211_link_data *link, link->u.mgd.dtim_period = elems->dtim_period; link->u.mgd.have_beacon = true; ifmgd->assoc_data->need_beacon = false; - if (ieee80211_hw_check(&local->hw, TIMING_BEACON_ONLY)) { + if (ieee80211_hw_check(&local->hw, TIMING_BEACON_ONLY) && + !ieee80211_is_s1g_beacon(hdr->frame_control)) { link->conf->sync_tsf = le64_to_cpu(mgmt->u.beacon.timestamp); link->conf->sync_device_ts = From 9ef369973cd2c97cce3388d2c0c7e3c056656e8a Mon Sep 17 00:00:00 2001 From: Igor Artemiev Date: Fri, 5 Apr 2024 18:24:30 +0300 Subject: [PATCH 102/606] wifi: cfg80211: fix the order of arguments for trace events of the tx_rx_evt class The declarations of the tx_rx_evt class and the rdev_set_antenna event use the wrong order of arguments in the TP_ARGS macro. Fix the order of arguments in the TP_ARGS macro. Found by Linux Verification Center (linuxtesting.org) with SVACE. Signed-off-by: Igor Artemiev Link: https://msgid.link/20240405152431.270267-1-Igor.A.Artemiev@mcst.ru Signed-off-by: Johannes Berg --- net/wireless/trace.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/wireless/trace.h b/net/wireless/trace.h index cbbf347c6b2e0..df013c98b80df 100644 --- a/net/wireless/trace.h +++ b/net/wireless/trace.h @@ -1758,7 +1758,7 @@ TRACE_EVENT(rdev_return_void_tx_rx, DECLARE_EVENT_CLASS(tx_rx_evt, TP_PROTO(struct wiphy *wiphy, u32 tx, u32 rx), - TP_ARGS(wiphy, rx, tx), + TP_ARGS(wiphy, tx, rx), TP_STRUCT__entry( WIPHY_ENTRY __field(u32, tx) @@ -1775,7 +1775,7 @@ DECLARE_EVENT_CLASS(tx_rx_evt, DEFINE_EVENT(tx_rx_evt, rdev_set_antenna, TP_PROTO(struct wiphy *wiphy, u32 tx, u32 rx), - TP_ARGS(wiphy, rx, tx) + TP_ARGS(wiphy, tx, rx) ); DECLARE_EVENT_CLASS(wiphy_netdev_id_evt, From b61bb5bc2c1cd00bb53db42f705735db6e8700f0 Mon Sep 17 00:00:00 2001 From: Christian Marangi Date: Thu, 4 Apr 2024 10:31:55 +0200 Subject: [PATCH 103/606] mtd: rawnand: qcom: Fix broken OP_RESET_DEVICE command in qcom_misc_cmd_type_exec() While migrating to exec_ops in commit a82990c8a409 ("mtd: rawnand: qcom: Add read/read_start ops in exec_op path"), OP_RESET_DEVICE command handling got broken unintentionally. Right now for the OP_RESET_DEVICE command, qcom_misc_cmd_type_exec() will simply return 0 without handling it. Even, if that gets fixed, an unnecessary FLASH_STATUS read descriptor command is being added in the middle and that seems to be causing the command to fail on IPQ806x devices. So let's fix the above two issues to make OP_RESET_DEVICE command working again. Fixes: a82990c8a409 ("mtd: rawnand: qcom: Add read/read_start ops in exec_op path") Cc: stable@vger.kernel.org Reviewed-by: Manivannan Sadhasivam Signed-off-by: Christian Marangi Signed-off-by: Miquel Raynal Link: https://lore.kernel.org/linux-mtd/20240404083157.940-1-ansuelsmth@gmail.com --- drivers/mtd/nand/raw/qcom_nandc.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/drivers/mtd/nand/raw/qcom_nandc.c b/drivers/mtd/nand/raw/qcom_nandc.c index b079605c84d38..b8cff9240b286 100644 --- a/drivers/mtd/nand/raw/qcom_nandc.c +++ b/drivers/mtd/nand/raw/qcom_nandc.c @@ -2815,7 +2815,7 @@ static int qcom_misc_cmd_type_exec(struct nand_chip *chip, const struct nand_sub host->cfg0_raw & ~(7 << CW_PER_PAGE)); nandc_set_reg(chip, NAND_DEV0_CFG1, host->cfg1_raw); instrs = 3; - } else { + } else if (q_op.cmd_reg != OP_RESET_DEVICE) { return 0; } @@ -2830,9 +2830,8 @@ static int qcom_misc_cmd_type_exec(struct nand_chip *chip, const struct nand_sub nandc_set_reg(chip, NAND_EXEC_CMD, 1); write_reg_dma(nandc, NAND_FLASH_CMD, instrs, NAND_BAM_NEXT_SGL); - (q_op.cmd_reg == OP_BLOCK_ERASE) ? write_reg_dma(nandc, NAND_DEV0_CFG0, - 2, NAND_BAM_NEXT_SGL) : read_reg_dma(nandc, - NAND_FLASH_STATUS, 1, NAND_BAM_NEXT_SGL); + if (q_op.cmd_reg == OP_BLOCK_ERASE) + write_reg_dma(nandc, NAND_DEV0_CFG0, 2, NAND_BAM_NEXT_SGL); write_reg_dma(nandc, NAND_EXEC_CMD, 1, NAND_BAM_NEXT_SGL); read_reg_dma(nandc, NAND_FLASH_STATUS, 1, NAND_BAM_NEXT_SGL); From 21c9fb611c25d5cd038f6fe485232e7884bb0b3d Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Fri, 5 Apr 2024 16:30:04 +0200 Subject: [PATCH 104/606] mtd: diskonchip: work around ubsan link failure I ran into a randconfig build failure with UBSAN using gcc-13.2: arm-linux-gnueabi-ld: error: unplaced orphan section `.bss..Lubsan_data31' from `drivers/mtd/nand/raw/diskonchip.o' I'm not entirely sure what is going on here, but I suspect this has something to do with the check for the end of the doc_locations[] array that contains an (unsigned long)0xffffffff element, which is compared against the signed (int)0xffffffff. If this is the case, we should get a runtime check for undefined behavior, but we instead get an unexpected build-time error. I would have expected this to work fine on 32-bit architectures despite the signed integer overflow, though on 64-bit architectures this likely won't ever work. Changing the contition to instead check for the size of the array makes the code safe everywhere and avoids the ubsan check that leads to the link error. The loop code goes back to before 2.6.12. Cc: stable@vger.kernel.org Signed-off-by: Arnd Bergmann Signed-off-by: Miquel Raynal Link: https://lore.kernel.org/linux-mtd/20240405143015.717429-1-arnd@kernel.org --- drivers/mtd/nand/raw/diskonchip.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/mtd/nand/raw/diskonchip.c b/drivers/mtd/nand/raw/diskonchip.c index 5243fab9face0..8db7fc4245711 100644 --- a/drivers/mtd/nand/raw/diskonchip.c +++ b/drivers/mtd/nand/raw/diskonchip.c @@ -53,7 +53,7 @@ static unsigned long doc_locations[] __initdata = { 0xe8000, 0xea000, 0xec000, 0xee000, #endif #endif - 0xffffffff }; +}; static struct mtd_info *doclist = NULL; @@ -1554,7 +1554,7 @@ static int __init init_nanddoc(void) if (ret < 0) return ret; } else { - for (i = 0; (doc_locations[i] != 0xffffffff); i++) { + for (i = 0; i < ARRAY_SIZE(doc_locations); i++) { doc_probe(doc_locations[i]); } } From abe6acfa7d7b666d785eae706bd34b63f3c2b11f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?G=C3=BCnther=20Noack?= Date: Fri, 5 Apr 2024 21:40:29 +0000 Subject: [PATCH 105/606] fs: Return ENOTTY directly if FS_IOC_GETUUID or FS_IOC_GETFSSYSFSPATH fail MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit These IOCTL commands should be implemented by setting attributes on the superblock, rather than in the IOCTL hooks in struct file_operations. By returning -ENOTTY instead of -ENOIOCTLCMD, we instruct the fs/ioctl.c logic to return -ENOTTY immediately, rather than attempting to call f_op->unlocked_ioctl() or f_op->compat_ioctl() as a fallback. Why this is safe: Before this change, fs/ioctl.c would unsuccessfully attempt calling the IOCTL hooks, and then return -ENOTTY. By returning -ENOTTY directly, we return the same error code immediately, but save ourselves the fallback attempt. Motivation: This simplifies the logic for these IOCTL commands and lets us reason about the side effects of these IOCTLs more easily. It will be possible to permit these IOCTLs under LSM IOCTL policies, without having to worry about them getting dispatched to problematic device drivers (which sometimes do work before looking at the IOCTL command number). Link: https://lore.kernel.org/all/cnwpkeovzbumhprco7q2c2y6zxzmxfpwpwe3tyy6c3gg2szgqd@vfzjaw5v5imr/ Cc: Kent Overstreet Cc: Christian Brauner Cc: Jan Kara Cc: Dave Chinner Cc: Darrick J. Wong Cc: Theodore Ts'o Cc: Josef Bacik Signed-off-by: Günther Noack Link: https://lore.kernel.org/r/20240405214040.101396-2-gnoack@google.com Acked-by: Kent Overstreet Signed-off-by: Christian Brauner --- fs/ioctl.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/ioctl.c b/fs/ioctl.c index 1d5abfdf0f22a..fb0628e680c40 100644 --- a/fs/ioctl.c +++ b/fs/ioctl.c @@ -769,7 +769,7 @@ static int ioctl_getfsuuid(struct file *file, void __user *argp) struct fsuuid2 u = { .len = sb->s_uuid_len, }; if (!sb->s_uuid_len) - return -ENOIOCTLCMD; + return -ENOTTY; memcpy(&u.uuid[0], &sb->s_uuid, sb->s_uuid_len); @@ -781,7 +781,7 @@ static int ioctl_get_fs_sysfs_path(struct file *file, void __user *argp) struct super_block *sb = file_inode(file)->i_sb; if (!strlen(sb->s_sysfs_name)) - return -ENOIOCTLCMD; + return -ENOTTY; struct fs_sysfs_path u = {}; From 965e49cdf8c19f21b8308adeded3a8139cff5c84 Mon Sep 17 00:00:00 2001 From: Peter Ujfalusi Date: Tue, 9 Apr 2024 14:00:33 +0300 Subject: [PATCH 106/606] ASoC: SOF: ipc4-pcm: Use consistent name for snd_sof_pcm_stream pointer Throughout the file the pointer for snd_sof_pcm_stream is named either 'stream' or (wrongly) 'spcm' which confuses the reader. Use 'sps' for the pointer name as it is the most common name used in SOF codebase. Signed-off-by: Peter Ujfalusi Reviewed-by: Pierre-Louis Bossart Reviewed-by: Ranjani Sridharan Link: https://msgid.link/r/20240409110036.9411-2-peter.ujfalusi@linux.intel.com Signed-off-by: Mark Brown --- sound/soc/sof/ipc4-pcm.c | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/sound/soc/sof/ipc4-pcm.c b/sound/soc/sof/ipc4-pcm.c index e915f9f87a6c3..f989cc2992bff 100644 --- a/sound/soc/sof/ipc4-pcm.c +++ b/sound/soc/sof/ipc4-pcm.c @@ -764,7 +764,7 @@ static int sof_ipc4_pcm_setup(struct snd_sof_dev *sdev, struct snd_sof_pcm *spcm return 0; } -static void sof_ipc4_build_time_info(struct snd_sof_dev *sdev, struct snd_sof_pcm_stream *spcm) +static void sof_ipc4_build_time_info(struct snd_sof_dev *sdev, struct snd_sof_pcm_stream *sps) { struct sof_ipc4_copier *host_copier = NULL; struct sof_ipc4_copier *dai_copier = NULL; @@ -775,7 +775,7 @@ static void sof_ipc4_build_time_info(struct snd_sof_dev *sdev, struct snd_sof_pc int i; /* find host & dai to locate info in memory window */ - for_each_dapm_widgets(spcm->list, i, widget) { + for_each_dapm_widgets(sps->list, i, widget) { struct snd_sof_widget *swidget = widget->dobj.private; if (!swidget) @@ -795,7 +795,7 @@ static void sof_ipc4_build_time_info(struct snd_sof_dev *sdev, struct snd_sof_pc return; } - info = spcm->private; + info = sps->private; info->host_copier = host_copier; info->dai_copier = dai_copier; info->llp_offset = offsetof(struct sof_ipc4_fw_registers, llp_gpdma_reading_slots) + @@ -864,7 +864,7 @@ static int sof_ipc4_pcm_hw_params(struct snd_soc_component *component, static int sof_ipc4_get_stream_start_offset(struct snd_sof_dev *sdev, struct snd_pcm_substream *substream, - struct snd_sof_pcm_stream *stream, + struct snd_sof_pcm_stream *sps, struct sof_ipc4_timestamp_info *time_info) { struct sof_ipc4_copier *host_copier = time_info->host_copier; @@ -918,7 +918,7 @@ static int sof_ipc4_pcm_pointer(struct snd_soc_component *component, struct sof_ipc4_timestamp_info *time_info; struct sof_ipc4_llp_reading_slot llp; snd_pcm_uframes_t head_cnt, tail_cnt; - struct snd_sof_pcm_stream *stream; + struct snd_sof_pcm_stream *sps; u64 dai_cnt, host_cnt, host_ptr; struct snd_sof_pcm *spcm; int ret; @@ -927,8 +927,8 @@ static int sof_ipc4_pcm_pointer(struct snd_soc_component *component, if (!spcm) return -EOPNOTSUPP; - stream = &spcm->stream[substream->stream]; - time_info = stream->private; + sps = &spcm->stream[substream->stream]; + time_info = sps->private; if (!time_info) return -EOPNOTSUPP; @@ -938,7 +938,7 @@ static int sof_ipc4_pcm_pointer(struct snd_soc_component *component, * the statistics is complete. And it will not change after the first initiailization. */ if (time_info->stream_start_offset == SOF_IPC4_INVALID_STREAM_POSITION) { - ret = sof_ipc4_get_stream_start_offset(sdev, substream, stream, time_info); + ret = sof_ipc4_get_stream_start_offset(sdev, substream, sps, time_info); if (ret < 0) return -EOPNOTSUPP; } @@ -1030,15 +1030,15 @@ static snd_pcm_sframes_t sof_ipc4_pcm_delay(struct snd_soc_component *component, { struct snd_soc_pcm_runtime *rtd = snd_soc_substream_to_rtd(substream); struct sof_ipc4_timestamp_info *time_info; - struct snd_sof_pcm_stream *stream; + struct snd_sof_pcm_stream *sps; struct snd_sof_pcm *spcm; spcm = snd_sof_find_spcm_dai(component, rtd); if (!spcm) return 0; - stream = &spcm->stream[substream->stream]; - time_info = stream->private; + sps = &spcm->stream[substream->stream]; + time_info = sps->private; /* * Report the stored delay value calculated in the pointer callback. * In the unlikely event that the calculation was skipped/aborted, the From 36e980050b0733829e4e0f97b97f7907ba9f00bb Mon Sep 17 00:00:00 2001 From: Peter Ujfalusi Date: Tue, 9 Apr 2024 14:00:34 +0300 Subject: [PATCH 107/606] ASoC: SOF: ipc4-pcm: Use consistent name for sof_ipc4_timestamp_info pointer The pointer to sof_ipc4_timestamp_info named most of the time as 'time_info' only to be named as 'stream_info' or 'info' in two function. Use the consistent name of 'time_info' throughout the file. Signed-off-by: Peter Ujfalusi Reviewed-by: Pierre-Louis Bossart Reviewed-by: Ranjani Sridharan Link: https://msgid.link/r/20240409110036.9411-3-peter.ujfalusi@linux.intel.com Signed-off-by: Mark Brown --- sound/soc/sof/ipc4-pcm.c | 40 ++++++++++++++++++++-------------------- 1 file changed, 20 insertions(+), 20 deletions(-) diff --git a/sound/soc/sof/ipc4-pcm.c b/sound/soc/sof/ipc4-pcm.c index f989cc2992bff..74b0d0d002709 100644 --- a/sound/soc/sof/ipc4-pcm.c +++ b/sound/soc/sof/ipc4-pcm.c @@ -721,7 +721,7 @@ static int sof_ipc4_pcm_setup(struct snd_sof_dev *sdev, struct snd_sof_pcm *spcm { struct snd_sof_pcm_stream_pipeline_list *pipeline_list; struct sof_ipc4_fw_data *ipc4_data = sdev->private; - struct sof_ipc4_timestamp_info *stream_info; + struct sof_ipc4_timestamp_info *time_info; bool support_info = true; u32 abi_version; u32 abi_offset; @@ -752,13 +752,13 @@ static int sof_ipc4_pcm_setup(struct snd_sof_dev *sdev, struct snd_sof_pcm *spcm if (!support_info) continue; - stream_info = kzalloc(sizeof(*stream_info), GFP_KERNEL); - if (!stream_info) { + time_info = kzalloc(sizeof(*time_info), GFP_KERNEL); + if (!time_info) { sof_ipc4_pcm_free(sdev, spcm); return -ENOMEM; } - spcm->stream[stream].private = stream_info; + spcm->stream[stream].private = time_info; } return 0; @@ -769,7 +769,7 @@ static void sof_ipc4_build_time_info(struct snd_sof_dev *sdev, struct snd_sof_pc struct sof_ipc4_copier *host_copier = NULL; struct sof_ipc4_copier *dai_copier = NULL; struct sof_ipc4_llp_reading_slot llp_slot; - struct sof_ipc4_timestamp_info *info; + struct sof_ipc4_timestamp_info *time_info; struct snd_soc_dapm_widget *widget; struct snd_sof_dai *dai; int i; @@ -795,44 +795,44 @@ static void sof_ipc4_build_time_info(struct snd_sof_dev *sdev, struct snd_sof_pc return; } - info = sps->private; - info->host_copier = host_copier; - info->dai_copier = dai_copier; - info->llp_offset = offsetof(struct sof_ipc4_fw_registers, llp_gpdma_reading_slots) + - sdev->fw_info_box.offset; + time_info = sps->private; + time_info->host_copier = host_copier; + time_info->dai_copier = dai_copier; + time_info->llp_offset = offsetof(struct sof_ipc4_fw_registers, + llp_gpdma_reading_slots) + sdev->fw_info_box.offset; /* find llp slot used by current dai */ for (i = 0; i < SOF_IPC4_MAX_LLP_GPDMA_READING_SLOTS; i++) { - sof_mailbox_read(sdev, info->llp_offset, &llp_slot, sizeof(llp_slot)); + sof_mailbox_read(sdev, time_info->llp_offset, &llp_slot, sizeof(llp_slot)); if (llp_slot.node_id == dai_copier->data.gtw_cfg.node_id) break; - info->llp_offset += sizeof(llp_slot); + time_info->llp_offset += sizeof(llp_slot); } if (i < SOF_IPC4_MAX_LLP_GPDMA_READING_SLOTS) return; /* if no llp gpdma slot is used, check aggregated sdw slot */ - info->llp_offset = offsetof(struct sof_ipc4_fw_registers, llp_sndw_reading_slots) + - sdev->fw_info_box.offset; + time_info->llp_offset = offsetof(struct sof_ipc4_fw_registers, + llp_sndw_reading_slots) + sdev->fw_info_box.offset; for (i = 0; i < SOF_IPC4_MAX_LLP_SNDW_READING_SLOTS; i++) { - sof_mailbox_read(sdev, info->llp_offset, &llp_slot, sizeof(llp_slot)); + sof_mailbox_read(sdev, time_info->llp_offset, &llp_slot, sizeof(llp_slot)); if (llp_slot.node_id == dai_copier->data.gtw_cfg.node_id) break; - info->llp_offset += sizeof(llp_slot); + time_info->llp_offset += sizeof(llp_slot); } if (i < SOF_IPC4_MAX_LLP_SNDW_READING_SLOTS) return; /* check EVAD slot */ - info->llp_offset = offsetof(struct sof_ipc4_fw_registers, llp_evad_reading_slot) + - sdev->fw_info_box.offset; - sof_mailbox_read(sdev, info->llp_offset, &llp_slot, sizeof(llp_slot)); + time_info->llp_offset = offsetof(struct sof_ipc4_fw_registers, + llp_evad_reading_slot) + sdev->fw_info_box.offset; + sof_mailbox_read(sdev, time_info->llp_offset, &llp_slot, sizeof(llp_slot)); if (llp_slot.node_id != dai_copier->data.gtw_cfg.node_id) - info->llp_offset = 0; + time_info->llp_offset = 0; } static int sof_ipc4_pcm_hw_params(struct snd_soc_component *component, From 551af3280c16166244425bbb1d73028f3a907e1f Mon Sep 17 00:00:00 2001 From: Peter Ujfalusi Date: Tue, 9 Apr 2024 14:00:35 +0300 Subject: [PATCH 108/606] ASoC: SOF: ipc4-pcm: Introduce generic sof_ipc4_pcm_stream_priv Using the sof_ipc4_timestamp_info struct directly as sps->private data is too restrictive, add a new generic sof_ipc4_pcm_stream_priv struct containing the time_info to allow new information to be stored in a generic way. Signed-off-by: Peter Ujfalusi Reviewed-by: Pierre-Louis Bossart Reviewed-by: Ranjani Sridharan Link: https://msgid.link/r/20240409110036.9411-4-peter.ujfalusi@linux.intel.com Signed-off-by: Mark Brown --- sound/soc/sof/ipc4-pcm.c | 43 ++++++++++++++++++++++++++++++++-------- 1 file changed, 35 insertions(+), 8 deletions(-) diff --git a/sound/soc/sof/ipc4-pcm.c b/sound/soc/sof/ipc4-pcm.c index 74b0d0d002709..34ce6bb7f37da 100644 --- a/sound/soc/sof/ipc4-pcm.c +++ b/sound/soc/sof/ipc4-pcm.c @@ -37,6 +37,22 @@ struct sof_ipc4_timestamp_info { snd_pcm_sframes_t delay; }; +/** + * struct sof_ipc4_pcm_stream_priv - IPC4 specific private data + * @time_info: pointer to time info struct if it is supported, otherwise NULL + */ +struct sof_ipc4_pcm_stream_priv { + struct sof_ipc4_timestamp_info *time_info; +}; + +static inline struct sof_ipc4_timestamp_info * +sof_ipc4_sps_to_time_info(struct snd_sof_pcm_stream *sps) +{ + struct sof_ipc4_pcm_stream_priv *stream_priv = sps->private; + + return stream_priv->time_info; +} + static int sof_ipc4_set_multi_pipeline_state(struct snd_sof_dev *sdev, u32 state, struct ipc4_pipeline_set_state_data *trigger_list) { @@ -452,7 +468,7 @@ static int sof_ipc4_trigger_pipelines(struct snd_soc_component *component, * Invalidate the stream_start_offset to make sure that it is * going to be updated if the stream resumes */ - time_info = spcm->stream[substream->stream].private; + time_info = sof_ipc4_sps_to_time_info(&spcm->stream[substream->stream]); if (time_info) time_info->stream_start_offset = SOF_IPC4_INVALID_STREAM_POSITION; @@ -706,12 +722,16 @@ static int sof_ipc4_pcm_dai_link_fixup(struct snd_soc_pcm_runtime *rtd, static void sof_ipc4_pcm_free(struct snd_sof_dev *sdev, struct snd_sof_pcm *spcm) { struct snd_sof_pcm_stream_pipeline_list *pipeline_list; + struct sof_ipc4_pcm_stream_priv *stream_priv; int stream; for_each_pcm_streams(stream) { pipeline_list = &spcm->stream[stream].pipeline_list; kfree(pipeline_list->pipelines); pipeline_list->pipelines = NULL; + + stream_priv = spcm->stream[stream].private; + kfree(stream_priv->time_info); kfree(spcm->stream[stream].private); spcm->stream[stream].private = NULL; } @@ -721,6 +741,7 @@ static int sof_ipc4_pcm_setup(struct snd_sof_dev *sdev, struct snd_sof_pcm *spcm { struct snd_sof_pcm_stream_pipeline_list *pipeline_list; struct sof_ipc4_fw_data *ipc4_data = sdev->private; + struct sof_ipc4_pcm_stream_priv *stream_priv; struct sof_ipc4_timestamp_info *time_info; bool support_info = true; u32 abi_version; @@ -749,6 +770,14 @@ static int sof_ipc4_pcm_setup(struct snd_sof_dev *sdev, struct snd_sof_pcm *spcm return -ENOMEM; } + stream_priv = kzalloc(sizeof(*stream_priv), GFP_KERNEL); + if (!stream_priv) { + sof_ipc4_pcm_free(sdev, spcm); + return -ENOMEM; + } + + spcm->stream[stream].private = stream_priv; + if (!support_info) continue; @@ -758,7 +787,7 @@ static int sof_ipc4_pcm_setup(struct snd_sof_dev *sdev, struct snd_sof_pcm *spcm return -ENOMEM; } - spcm->stream[stream].private = time_info; + stream_priv->time_info = time_info; } return 0; @@ -795,7 +824,7 @@ static void sof_ipc4_build_time_info(struct snd_sof_dev *sdev, struct snd_sof_pc return; } - time_info = sps->private; + time_info = sof_ipc4_sps_to_time_info(sps); time_info->host_copier = host_copier; time_info->dai_copier = dai_copier; time_info->llp_offset = offsetof(struct sof_ipc4_fw_registers, @@ -849,7 +878,7 @@ static int sof_ipc4_pcm_hw_params(struct snd_soc_component *component, if (!spcm) return -EINVAL; - time_info = spcm->stream[substream->stream].private; + time_info = sof_ipc4_sps_to_time_info(&spcm->stream[substream->stream]); /* delay calculation is not supported by current fw_reg ABI */ if (!time_info) return 0; @@ -928,7 +957,7 @@ static int sof_ipc4_pcm_pointer(struct snd_soc_component *component, return -EOPNOTSUPP; sps = &spcm->stream[substream->stream]; - time_info = sps->private; + time_info = sof_ipc4_sps_to_time_info(sps); if (!time_info) return -EOPNOTSUPP; @@ -1030,15 +1059,13 @@ static snd_pcm_sframes_t sof_ipc4_pcm_delay(struct snd_soc_component *component, { struct snd_soc_pcm_runtime *rtd = snd_soc_substream_to_rtd(substream); struct sof_ipc4_timestamp_info *time_info; - struct snd_sof_pcm_stream *sps; struct snd_sof_pcm *spcm; spcm = snd_sof_find_spcm_dai(component, rtd); if (!spcm) return 0; - sps = &spcm->stream[substream->stream]; - time_info = sps->private; + time_info = sof_ipc4_sps_to_time_info(&spcm->stream[substream->stream]); /* * Report the stored delay value calculated in the pointer callback. * In the unlikely event that the calculation was skipped/aborted, the From 7211814f2adcf376b8db6321447a9725c33b6ae7 Mon Sep 17 00:00:00 2001 From: Peter Ujfalusi Date: Tue, 9 Apr 2024 14:00:36 +0300 Subject: [PATCH 109/606] ASoC: SOF: ipc4-pcm: Do not reset the ChainDMA if it has not been allocated The ChainDMA operation differs from normal pipelines that it is only created when the stream started, in fact a PCM using ChainDMA has no pipelines, modules. To reset a ChainDMA, it needs to be first allocated in firmware. When PulseAudio/PipeWire starts, they will probe the PCMs by opening them, check hw_params and then close the PCM without starting audio. Unconditionally resetting the ChainDMA can result the following error: ipc tx : 0xe040000|0x0: GLB_CHAIN_DMA ipc tx reply: 0x2e000007|0x0: GLB_CHAIN_DMA FW reported error: 7 - Unsupported operation requested ipc error for msg 0xe040000|0x0 sof_pcm_stream_free: pcm_ops hw_free failed -22 Add a new chain_dma_allocated flag to sof_ipc4_pcm_stream_priv to store the ChainDMA allocation state and use this flag to skip sending the reset if the ChainDMA is not allocated. Signed-off-by: Peter Ujfalusi Reviewed-by: Pierre-Louis Bossart Reviewed-by: Ranjani Sridharan Link: https://msgid.link/r/20240409110036.9411-5-peter.ujfalusi@linux.intel.com Signed-off-by: Mark Brown --- sound/soc/sof/ipc4-pcm.c | 24 ++++++++++++++++++++---- 1 file changed, 20 insertions(+), 4 deletions(-) diff --git a/sound/soc/sof/ipc4-pcm.c b/sound/soc/sof/ipc4-pcm.c index 34ce6bb7f37da..4594470ed08b1 100644 --- a/sound/soc/sof/ipc4-pcm.c +++ b/sound/soc/sof/ipc4-pcm.c @@ -40,9 +40,12 @@ struct sof_ipc4_timestamp_info { /** * struct sof_ipc4_pcm_stream_priv - IPC4 specific private data * @time_info: pointer to time info struct if it is supported, otherwise NULL + * @chain_dma_allocated: indicates the ChainDMA allocation state */ struct sof_ipc4_pcm_stream_priv { struct sof_ipc4_timestamp_info *time_info; + + bool chain_dma_allocated; }; static inline struct sof_ipc4_timestamp_info * @@ -269,14 +272,17 @@ sof_ipc4_update_pipeline_state(struct snd_sof_dev *sdev, int state, int cmd, */ static int sof_ipc4_chain_dma_trigger(struct snd_sof_dev *sdev, - int direction, + struct snd_sof_pcm *spcm, int direction, struct snd_sof_pcm_stream_pipeline_list *pipeline_list, int state, int cmd) { struct sof_ipc4_fw_data *ipc4_data = sdev->private; + struct sof_ipc4_pcm_stream_priv *stream_priv; bool allocate, enable, set_fifo_size; struct sof_ipc4_msg msg = {{ 0 }}; - int i; + int ret, i; + + stream_priv = spcm->stream[direction].private; switch (state) { case SOF_IPC4_PIPE_RUNNING: /* Allocate and start chained dma */ @@ -297,6 +303,11 @@ static int sof_ipc4_chain_dma_trigger(struct snd_sof_dev *sdev, set_fifo_size = false; break; case SOF_IPC4_PIPE_RESET: /* Disable and free chained DMA. */ + + /* ChainDMA can only be reset if it has been allocated */ + if (!stream_priv->chain_dma_allocated) + return 0; + allocate = false; enable = false; set_fifo_size = false; @@ -354,7 +365,12 @@ static int sof_ipc4_chain_dma_trigger(struct snd_sof_dev *sdev, if (enable) msg.primary |= SOF_IPC4_GLB_CHAIN_DMA_ENABLE_MASK; - return sof_ipc_tx_message_no_reply(sdev->ipc, &msg, 0); + ret = sof_ipc_tx_message_no_reply(sdev->ipc, &msg, 0); + /* Update the ChainDMA allocation state */ + if (!ret) + stream_priv->chain_dma_allocated = allocate; + + return ret; } static int sof_ipc4_trigger_pipelines(struct snd_soc_component *component, @@ -394,7 +410,7 @@ static int sof_ipc4_trigger_pipelines(struct snd_soc_component *component, * trigger function that handles the rest for the substream. */ if (pipeline->use_chain_dma) - return sof_ipc4_chain_dma_trigger(sdev, substream->stream, + return sof_ipc4_chain_dma_trigger(sdev, spcm, substream->stream, pipeline_list, state, cmd); /* allocate memory for the pipeline data */ From 305539a25a1c9929b058381aac6104bd939c0fee Mon Sep 17 00:00:00 2001 From: Pierre-Louis Bossart Date: Mon, 8 Apr 2024 14:41:45 -0500 Subject: [PATCH 110/606] ASoC: SOF: Intel: add default firmware library path for LNL MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The commit cd6f2a2e6346 ("ASoC: SOF: Intel: Set the default firmware library path for IPC4") added the default_lib_path field for all platforms, but this was missed when LunarLake was later introduced. Fixes: 64a63d9914a5 ("ASoC: SOF: Intel: LNL: Add support for Lunarlake platform") Signed-off-by: Pierre-Louis Bossart Reviewed-by: Péter Ujfalusi Reviewed-by: Bard Liao Link: https://msgid.link/r/20240408194147.28919-2-pierre-louis.bossart@linux.intel.com Signed-off-by: Mark Brown --- sound/soc/sof/intel/pci-lnl.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/sound/soc/sof/intel/pci-lnl.c b/sound/soc/sof/intel/pci-lnl.c index b26ffe767fab5..b14e508f1f315 100644 --- a/sound/soc/sof/intel/pci-lnl.c +++ b/sound/soc/sof/intel/pci-lnl.c @@ -35,6 +35,9 @@ static const struct sof_dev_desc lnl_desc = { .default_fw_path = { [SOF_IPC_TYPE_4] = "intel/sof-ipc4/lnl", }, + .default_lib_path = { + [SOF_IPC_TYPE_4] = "intel/sof-ipc4-lib/lnl", + }, .default_tplg_path = { [SOF_IPC_TYPE_4] = "intel/sof-ipc4-tplg", }, From 90a2353080eedec855d63f6aadfda14104ee9b06 Mon Sep 17 00:00:00 2001 From: Ranjani Sridharan Date: Mon, 8 Apr 2024 14:41:46 -0500 Subject: [PATCH 111/606] ASoC: SOF: pcm: Restrict DSP D0i3 during S0ix to IPC3 Introduce a new field in struct sof_ipc_pcm_ops that can be used to restrict DSP D0i3 during S0ix suspend to IPC3. With IPC4, all streams must be stopped before S0ix suspend. Reviewed-by: Uday M Bhat Reviewed-by: Bard Liao Signed-off-by: Ranjani Sridharan Signed-off-by: Pierre-Louis Bossart Link: https://msgid.link/r/20240408194147.28919-3-pierre-louis.bossart@linux.intel.com Signed-off-by: Mark Brown --- sound/soc/sof/ipc3-pcm.c | 1 + sound/soc/sof/pcm.c | 13 ++++++------- sound/soc/sof/sof-audio.h | 2 ++ 3 files changed, 9 insertions(+), 7 deletions(-) diff --git a/sound/soc/sof/ipc3-pcm.c b/sound/soc/sof/ipc3-pcm.c index 35769dd7905eb..af0bf354cb209 100644 --- a/sound/soc/sof/ipc3-pcm.c +++ b/sound/soc/sof/ipc3-pcm.c @@ -434,4 +434,5 @@ const struct sof_ipc_pcm_ops ipc3_pcm_ops = { .trigger = sof_ipc3_pcm_trigger, .dai_link_fixup = sof_ipc3_pcm_dai_link_fixup, .reset_hw_params_during_stop = true, + .d0i3_supported_in_s0ix = true, }; diff --git a/sound/soc/sof/pcm.c b/sound/soc/sof/pcm.c index f03cee94bce62..8804e00e7251b 100644 --- a/sound/soc/sof/pcm.c +++ b/sound/soc/sof/pcm.c @@ -325,14 +325,13 @@ static int sof_pcm_trigger(struct snd_soc_component *component, ipc_first = true; break; case SNDRV_PCM_TRIGGER_SUSPEND: - if (sdev->system_suspend_target == SOF_SUSPEND_S0IX && + /* + * If DSP D0I3 is allowed during S0iX, set the suspend_ignored flag for + * D0I3-compatible streams to keep the firmware pipeline running + */ + if (pcm_ops && pcm_ops->d0i3_supported_in_s0ix && + sdev->system_suspend_target == SOF_SUSPEND_S0IX && spcm->stream[substream->stream].d0i3_compatible) { - /* - * trap the event, not sending trigger stop to - * prevent the FW pipelines from being stopped, - * and mark the flag to ignore the upcoming DAPM - * PM events. - */ spcm->stream[substream->stream].suspend_ignored = true; return 0; } diff --git a/sound/soc/sof/sof-audio.h b/sound/soc/sof/sof-audio.h index 86bbb531e142c..499b6084b5263 100644 --- a/sound/soc/sof/sof-audio.h +++ b/sound/soc/sof/sof-audio.h @@ -116,6 +116,7 @@ struct snd_sof_dai_config_data { * triggers. The FW keeps the host DMA running in this case and * therefore the host must do the same and should stop the DMA during * hw_free. + * @d0i3_supported_in_s0ix: Allow DSP D0I3 during S0iX */ struct sof_ipc_pcm_ops { int (*hw_params)(struct snd_soc_component *component, struct snd_pcm_substream *substream, @@ -135,6 +136,7 @@ struct sof_ipc_pcm_ops { bool reset_hw_params_during_stop; bool ipc_first_on_start; bool platform_stop_during_hw_free; + bool d0i3_supported_in_s0ix; }; /** From 17f4041244e66a417c646c8a90bc6747d5f1de1e Mon Sep 17 00:00:00 2001 From: Pierre-Louis Bossart Date: Mon, 8 Apr 2024 14:41:47 -0500 Subject: [PATCH 112/606] ASoC: SOF: debug: show firmware/topology prefix/names MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The SOF driver has multiple profiles to select firmware/topology prefix/names depending on the platform and ipc_type, and each of those fields can be overridden with kernel parameters. This results in some cases in confusion on what configuration is actually used in a given test. We currently log the firmware and topology names in the kernel logs, but there's been an ask to add the information in debugfs to simplify test scripts used by developers and CI. This isn't meant to be a stable ABI used by apps, changes will be allowed as needed. Closes: https://github.com/thesofproject/linux/issues/3867 Signed-off-by: Pierre-Louis Bossart Reviewed-by: Péter Ujfalusi Reviewed-by: Bard Liao Link: https://msgid.link/r/20240408194147.28919-4-pierre-louis.bossart@linux.intel.com Signed-off-by: Mark Brown --- sound/soc/sof/debug.c | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/sound/soc/sof/debug.c b/sound/soc/sof/debug.c index 7c8aafca8fdef..7275437ea8d8a 100644 --- a/sound/soc/sof/debug.c +++ b/sound/soc/sof/debug.c @@ -330,14 +330,32 @@ EXPORT_SYMBOL_GPL(snd_sof_dbg_memory_info_init); int snd_sof_dbg_init(struct snd_sof_dev *sdev) { + struct snd_sof_pdata *plat_data = sdev->pdata; struct snd_sof_dsp_ops *ops = sof_ops(sdev); const struct snd_sof_debugfs_map *map; + struct dentry *fw_profile; int i; int err; /* use "sof" as top level debugFS dir */ sdev->debugfs_root = debugfs_create_dir("sof", NULL); + /* expose firmware/topology prefix/names for test purposes */ + fw_profile = debugfs_create_dir("fw_profile", sdev->debugfs_root); + + debugfs_create_str("fw_path", 0444, fw_profile, + (char **)&plat_data->fw_filename_prefix); + debugfs_create_str("fw_lib_path", 0444, fw_profile, + (char **)&plat_data->fw_lib_prefix); + debugfs_create_str("tplg_path", 0444, fw_profile, + (char **)&plat_data->tplg_filename_prefix); + debugfs_create_str("fw_name", 0444, fw_profile, + (char **)&plat_data->fw_filename); + debugfs_create_str("tplg_name", 0444, fw_profile, + (char **)&plat_data->tplg_filename); + debugfs_create_u32("ipc_type", 0444, fw_profile, + (u32 *)&plat_data->ipc_type); + /* init dfsentry list */ INIT_LIST_HEAD(&sdev->dfsentry_list); From ed09f81eeaa8f9265e1787282cb283f10285c259 Mon Sep 17 00:00:00 2001 From: Maximilian Luz Date: Sat, 6 Apr 2024 15:01:09 +0200 Subject: [PATCH 113/606] firmware: qcom: uefisecapp: Fix memory related IO errors and crashes It turns out that while the QSEECOM APP_SEND command has specific fields for request and response buffers, uefisecapp expects them both to be in a single memory region. Failure to adhere to this has (so far) resulted in either no response being written to the response buffer (causing an EIO to be emitted down the line), the SCM call to fail with EINVAL (i.e., directly from TZ/firmware), or the device to be hard-reset. While this issue can be triggered deterministically, in the current form it seems to happen rather sporadically (which is why it has gone unnoticed during earlier testing). This is likely due to the two kzalloc() calls (for request and response) being directly after each other. Which means that those likely return consecutive regions most of the time, especially when not much else is going on in the system. Fix this by allocating a single memory region for both request and response buffers, properly aligning both structs inside it. This unfortunately also means that the qcom_scm_qseecom_app_send() interface needs to be restructured, as it should no longer map the DMA regions separately. Therefore, move the responsibility of DMA allocation (or mapping) to the caller. Fixes: 759e7a2b62eb ("firmware: Add support for Qualcomm UEFI Secure Application") Cc: stable@vger.kernel.org # 6.7 Tested-by: Johan Hovold Reviewed-by: Johan Hovold Signed-off-by: Maximilian Luz Tested-by: Konrad Dybcio # X13s Link: https://lore.kernel.org/r/20240406130125.1047436-1-luzmaximilian@gmail.com Signed-off-by: Bjorn Andersson --- .../firmware/qcom/qcom_qseecom_uefisecapp.c | 137 ++++++++++++------ drivers/firmware/qcom/qcom_scm.c | 37 +---- include/linux/firmware/qcom/qcom_qseecom.h | 55 ++++++- include/linux/firmware/qcom/qcom_scm.h | 10 +- 4 files changed, 153 insertions(+), 86 deletions(-) diff --git a/drivers/firmware/qcom/qcom_qseecom_uefisecapp.c b/drivers/firmware/qcom/qcom_qseecom_uefisecapp.c index 32188f098ef34..bc550ad0dbe0c 100644 --- a/drivers/firmware/qcom/qcom_qseecom_uefisecapp.c +++ b/drivers/firmware/qcom/qcom_qseecom_uefisecapp.c @@ -221,6 +221,19 @@ struct qsee_rsp_uefi_query_variable_info { * alignment of 8 bytes (64 bits) for GUIDs. Our definition of efi_guid_t, * however, has an alignment of 4 byte (32 bits). So far, this seems to work * fine here. See also the comment on the typedef of efi_guid_t. + * + * Note: It looks like uefisecapp is quite picky about how the memory passed to + * it is structured and aligned. In particular the request/response setup used + * for QSEE_CMD_UEFI_GET_VARIABLE. While qcom_qseecom_app_send(), in theory, + * accepts separate buffers/addresses for the request and response parts, in + * practice, however, it seems to expect them to be both part of a larger + * contiguous block. We initially allocated separate buffers for the request + * and response but this caused the QSEE_CMD_UEFI_GET_VARIABLE command to + * either not write any response to the response buffer or outright crash the + * device. Therefore, we now allocate a single contiguous block of DMA memory + * for both and properly align the data using the macros below. In particular, + * request and response structs are aligned at 8 byte (via __reqdata_offs()), + * following the driver that this has been reverse-engineered from. */ #define qcuefi_buf_align_fields(fields...) \ ({ \ @@ -244,6 +257,12 @@ struct qsee_rsp_uefi_query_variable_info { #define __array_offs(type, count, offset) \ __field_impl(sizeof(type) * (count), __alignof__(type), offset) +#define __array_offs_aligned(type, count, align, offset) \ + __field_impl(sizeof(type) * (count), align, offset) + +#define __reqdata_offs(size, offset) \ + __array_offs_aligned(u8, size, 8, offset) + #define __array(type, count) __array_offs(type, count, NULL) #define __field_offs(type, offset) __array_offs(type, 1, offset) #define __field(type) __array_offs(type, 1, NULL) @@ -277,10 +296,15 @@ static efi_status_t qsee_uefi_get_variable(struct qcuefi_client *qcuefi, const e unsigned long buffer_size = *data_size; efi_status_t efi_status = EFI_SUCCESS; unsigned long name_length; + dma_addr_t cmd_buf_dma; + size_t cmd_buf_size; + void *cmd_buf; size_t guid_offs; size_t name_offs; size_t req_size; size_t rsp_size; + size_t req_offs; + size_t rsp_offs; ssize_t status; if (!name || !guid) @@ -304,17 +328,19 @@ static efi_status_t qsee_uefi_get_variable(struct qcuefi_client *qcuefi, const e __array(u8, buffer_size) ); - req_data = kzalloc(req_size, GFP_KERNEL); - if (!req_data) { + cmd_buf_size = qcuefi_buf_align_fields( + __reqdata_offs(req_size, &req_offs) + __reqdata_offs(rsp_size, &rsp_offs) + ); + + cmd_buf = qseecom_dma_alloc(qcuefi->client, cmd_buf_size, &cmd_buf_dma, GFP_KERNEL); + if (!cmd_buf) { efi_status = EFI_OUT_OF_RESOURCES; goto out; } - rsp_data = kzalloc(rsp_size, GFP_KERNEL); - if (!rsp_data) { - efi_status = EFI_OUT_OF_RESOURCES; - goto out_free_req; - } + req_data = cmd_buf + req_offs; + rsp_data = cmd_buf + rsp_offs; req_data->command_id = QSEE_CMD_UEFI_GET_VARIABLE; req_data->data_size = buffer_size; @@ -332,7 +358,9 @@ static efi_status_t qsee_uefi_get_variable(struct qcuefi_client *qcuefi, const e memcpy(((void *)req_data) + req_data->guid_offset, guid, req_data->guid_size); - status = qcom_qseecom_app_send(qcuefi->client, req_data, req_size, rsp_data, rsp_size); + status = qcom_qseecom_app_send(qcuefi->client, + cmd_buf_dma + req_offs, req_size, + cmd_buf_dma + rsp_offs, rsp_size); if (status) { efi_status = EFI_DEVICE_ERROR; goto out_free; @@ -407,9 +435,7 @@ static efi_status_t qsee_uefi_get_variable(struct qcuefi_client *qcuefi, const e memcpy(data, ((void *)rsp_data) + rsp_data->data_offset, rsp_data->data_size); out_free: - kfree(rsp_data); -out_free_req: - kfree(req_data); + qseecom_dma_free(qcuefi->client, cmd_buf_size, cmd_buf, cmd_buf_dma); out: return efi_status; } @@ -422,10 +448,15 @@ static efi_status_t qsee_uefi_set_variable(struct qcuefi_client *qcuefi, const e struct qsee_rsp_uefi_set_variable *rsp_data; efi_status_t efi_status = EFI_SUCCESS; unsigned long name_length; + dma_addr_t cmd_buf_dma; + size_t cmd_buf_size; + void *cmd_buf; size_t name_offs; size_t guid_offs; size_t data_offs; size_t req_size; + size_t req_offs; + size_t rsp_offs; ssize_t status; if (!name || !guid) @@ -450,17 +481,19 @@ static efi_status_t qsee_uefi_set_variable(struct qcuefi_client *qcuefi, const e __array_offs(u8, data_size, &data_offs) ); - req_data = kzalloc(req_size, GFP_KERNEL); - if (!req_data) { + cmd_buf_size = qcuefi_buf_align_fields( + __reqdata_offs(req_size, &req_offs) + __reqdata_offs(sizeof(*rsp_data), &rsp_offs) + ); + + cmd_buf = qseecom_dma_alloc(qcuefi->client, cmd_buf_size, &cmd_buf_dma, GFP_KERNEL); + if (!cmd_buf) { efi_status = EFI_OUT_OF_RESOURCES; goto out; } - rsp_data = kzalloc(sizeof(*rsp_data), GFP_KERNEL); - if (!rsp_data) { - efi_status = EFI_OUT_OF_RESOURCES; - goto out_free_req; - } + req_data = cmd_buf + req_offs; + rsp_data = cmd_buf + rsp_offs; req_data->command_id = QSEE_CMD_UEFI_SET_VARIABLE; req_data->attributes = attributes; @@ -483,8 +516,9 @@ static efi_status_t qsee_uefi_set_variable(struct qcuefi_client *qcuefi, const e if (data_size) memcpy(((void *)req_data) + req_data->data_offset, data, req_data->data_size); - status = qcom_qseecom_app_send(qcuefi->client, req_data, req_size, rsp_data, - sizeof(*rsp_data)); + status = qcom_qseecom_app_send(qcuefi->client, + cmd_buf_dma + req_offs, req_size, + cmd_buf_dma + rsp_offs, sizeof(*rsp_data)); if (status) { efi_status = EFI_DEVICE_ERROR; goto out_free; @@ -507,9 +541,7 @@ static efi_status_t qsee_uefi_set_variable(struct qcuefi_client *qcuefi, const e } out_free: - kfree(rsp_data); -out_free_req: - kfree(req_data); + qseecom_dma_free(qcuefi->client, cmd_buf_size, cmd_buf, cmd_buf_dma); out: return efi_status; } @@ -521,10 +553,15 @@ static efi_status_t qsee_uefi_get_next_variable(struct qcuefi_client *qcuefi, struct qsee_req_uefi_get_next_variable *req_data; struct qsee_rsp_uefi_get_next_variable *rsp_data; efi_status_t efi_status = EFI_SUCCESS; + dma_addr_t cmd_buf_dma; + size_t cmd_buf_size; + void *cmd_buf; size_t guid_offs; size_t name_offs; size_t req_size; size_t rsp_size; + size_t req_offs; + size_t rsp_offs; ssize_t status; if (!name_size || !name || !guid) @@ -545,17 +582,19 @@ static efi_status_t qsee_uefi_get_next_variable(struct qcuefi_client *qcuefi, __array(*name, *name_size / sizeof(*name)) ); - req_data = kzalloc(req_size, GFP_KERNEL); - if (!req_data) { + cmd_buf_size = qcuefi_buf_align_fields( + __reqdata_offs(req_size, &req_offs) + __reqdata_offs(rsp_size, &rsp_offs) + ); + + cmd_buf = qseecom_dma_alloc(qcuefi->client, cmd_buf_size, &cmd_buf_dma, GFP_KERNEL); + if (!cmd_buf) { efi_status = EFI_OUT_OF_RESOURCES; goto out; } - rsp_data = kzalloc(rsp_size, GFP_KERNEL); - if (!rsp_data) { - efi_status = EFI_OUT_OF_RESOURCES; - goto out_free_req; - } + req_data = cmd_buf + req_offs; + rsp_data = cmd_buf + rsp_offs; req_data->command_id = QSEE_CMD_UEFI_GET_NEXT_VARIABLE; req_data->guid_offset = guid_offs; @@ -572,7 +611,9 @@ static efi_status_t qsee_uefi_get_next_variable(struct qcuefi_client *qcuefi, goto out_free; } - status = qcom_qseecom_app_send(qcuefi->client, req_data, req_size, rsp_data, rsp_size); + status = qcom_qseecom_app_send(qcuefi->client, + cmd_buf_dma + req_offs, req_size, + cmd_buf_dma + rsp_offs, rsp_size); if (status) { efi_status = EFI_DEVICE_ERROR; goto out_free; @@ -645,9 +686,7 @@ static efi_status_t qsee_uefi_get_next_variable(struct qcuefi_client *qcuefi, } out_free: - kfree(rsp_data); -out_free_req: - kfree(req_data); + qseecom_dma_free(qcuefi->client, cmd_buf_size, cmd_buf, cmd_buf_dma); out: return efi_status; } @@ -659,26 +698,34 @@ static efi_status_t qsee_uefi_query_variable_info(struct qcuefi_client *qcuefi, struct qsee_req_uefi_query_variable_info *req_data; struct qsee_rsp_uefi_query_variable_info *rsp_data; efi_status_t efi_status = EFI_SUCCESS; + dma_addr_t cmd_buf_dma; + size_t cmd_buf_size; + void *cmd_buf; + size_t req_offs; + size_t rsp_offs; int status; - req_data = kzalloc(sizeof(*req_data), GFP_KERNEL); - if (!req_data) { + cmd_buf_size = qcuefi_buf_align_fields( + __reqdata_offs(sizeof(*req_data), &req_offs) + __reqdata_offs(sizeof(*rsp_data), &rsp_offs) + ); + + cmd_buf = qseecom_dma_alloc(qcuefi->client, cmd_buf_size, &cmd_buf_dma, GFP_KERNEL); + if (!cmd_buf) { efi_status = EFI_OUT_OF_RESOURCES; goto out; } - rsp_data = kzalloc(sizeof(*rsp_data), GFP_KERNEL); - if (!rsp_data) { - efi_status = EFI_OUT_OF_RESOURCES; - goto out_free_req; - } + req_data = cmd_buf + req_offs; + rsp_data = cmd_buf + rsp_offs; req_data->command_id = QSEE_CMD_UEFI_QUERY_VARIABLE_INFO; req_data->attributes = attr; req_data->length = sizeof(*req_data); - status = qcom_qseecom_app_send(qcuefi->client, req_data, sizeof(*req_data), rsp_data, - sizeof(*rsp_data)); + status = qcom_qseecom_app_send(qcuefi->client, + cmd_buf_dma + req_offs, sizeof(*req_data), + cmd_buf_dma + rsp_offs, sizeof(*rsp_data)); if (status) { efi_status = EFI_DEVICE_ERROR; goto out_free; @@ -711,9 +758,7 @@ static efi_status_t qsee_uefi_query_variable_info(struct qcuefi_client *qcuefi, *max_variable_size = rsp_data->max_variable_size; out_free: - kfree(rsp_data); -out_free_req: - kfree(req_data); + qseecom_dma_free(qcuefi->client, cmd_buf_size, cmd_buf, cmd_buf_dma); out: return efi_status; } diff --git a/drivers/firmware/qcom/qcom_scm.c b/drivers/firmware/qcom/qcom_scm.c index 520de9b5633ab..90283f160a228 100644 --- a/drivers/firmware/qcom/qcom_scm.c +++ b/drivers/firmware/qcom/qcom_scm.c @@ -1576,9 +1576,9 @@ EXPORT_SYMBOL_GPL(qcom_scm_qseecom_app_get_id); /** * qcom_scm_qseecom_app_send() - Send to and receive data from a given QSEE app. * @app_id: The ID of the target app. - * @req: Request buffer sent to the app (must be DMA-mappable). + * @req: DMA address of the request buffer sent to the app. * @req_size: Size of the request buffer. - * @rsp: Response buffer, written to by the app (must be DMA-mappable). + * @rsp: DMA address of the response buffer, written to by the app. * @rsp_size: Size of the response buffer. * * Sends a request to the QSEE app associated with the given ID and read back @@ -1589,33 +1589,13 @@ EXPORT_SYMBOL_GPL(qcom_scm_qseecom_app_get_id); * * Return: Zero on success, nonzero on failure. */ -int qcom_scm_qseecom_app_send(u32 app_id, void *req, size_t req_size, void *rsp, - size_t rsp_size) +int qcom_scm_qseecom_app_send(u32 app_id, dma_addr_t req, size_t req_size, + dma_addr_t rsp, size_t rsp_size) { struct qcom_scm_qseecom_resp res = {}; struct qcom_scm_desc desc = {}; - dma_addr_t req_phys; - dma_addr_t rsp_phys; int status; - /* Map request buffer */ - req_phys = dma_map_single(__scm->dev, req, req_size, DMA_TO_DEVICE); - status = dma_mapping_error(__scm->dev, req_phys); - if (status) { - dev_err(__scm->dev, "qseecom: failed to map request buffer\n"); - return status; - } - - /* Map response buffer */ - rsp_phys = dma_map_single(__scm->dev, rsp, rsp_size, DMA_FROM_DEVICE); - status = dma_mapping_error(__scm->dev, rsp_phys); - if (status) { - dma_unmap_single(__scm->dev, req_phys, req_size, DMA_TO_DEVICE); - dev_err(__scm->dev, "qseecom: failed to map response buffer\n"); - return status; - } - - /* Set up SCM call data */ desc.owner = QSEECOM_TZ_OWNER_TZ_APPS; desc.svc = QSEECOM_TZ_SVC_APP_ID_PLACEHOLDER; desc.cmd = QSEECOM_TZ_CMD_APP_SEND; @@ -1623,18 +1603,13 @@ int qcom_scm_qseecom_app_send(u32 app_id, void *req, size_t req_size, void *rsp, QCOM_SCM_RW, QCOM_SCM_VAL, QCOM_SCM_RW, QCOM_SCM_VAL); desc.args[0] = app_id; - desc.args[1] = req_phys; + desc.args[1] = req; desc.args[2] = req_size; - desc.args[3] = rsp_phys; + desc.args[3] = rsp; desc.args[4] = rsp_size; - /* Perform call */ status = qcom_scm_qseecom_call(&desc, &res); - /* Unmap buffers */ - dma_unmap_single(__scm->dev, rsp_phys, rsp_size, DMA_FROM_DEVICE); - dma_unmap_single(__scm->dev, req_phys, req_size, DMA_TO_DEVICE); - if (status) return status; diff --git a/include/linux/firmware/qcom/qcom_qseecom.h b/include/linux/firmware/qcom/qcom_qseecom.h index 5c28298a98bec..366243ee96096 100644 --- a/include/linux/firmware/qcom/qcom_qseecom.h +++ b/include/linux/firmware/qcom/qcom_qseecom.h @@ -10,6 +10,7 @@ #define __QCOM_QSEECOM_H #include +#include #include #include @@ -24,12 +25,57 @@ struct qseecom_client { u32 app_id; }; +/** + * qseecom_scm_dev() - Get the SCM device associated with the QSEECOM client. + * @client: The QSEECOM client device. + * + * Returns the SCM device under which the provided QSEECOM client device + * operates. This function is intended to be used for DMA allocations. + */ +static inline struct device *qseecom_scm_dev(struct qseecom_client *client) +{ + return client->aux_dev.dev.parent->parent; +} + +/** + * qseecom_dma_alloc() - Allocate DMA memory for a QSEECOM client. + * @client: The QSEECOM client to allocate the memory for. + * @size: The number of bytes to allocate. + * @dma_handle: Pointer to where the DMA address should be stored. + * @gfp: Allocation flags. + * + * Wrapper function for dma_alloc_coherent(), allocating DMA memory usable for + * TZ/QSEECOM communication. Refer to dma_alloc_coherent() for details. + */ +static inline void *qseecom_dma_alloc(struct qseecom_client *client, size_t size, + dma_addr_t *dma_handle, gfp_t gfp) +{ + return dma_alloc_coherent(qseecom_scm_dev(client), size, dma_handle, gfp); +} + +/** + * dma_free_coherent() - Free QSEECOM DMA memory. + * @client: The QSEECOM client for which the memory has been allocated. + * @size: The number of bytes allocated. + * @cpu_addr: Virtual memory address to free. + * @dma_handle: DMA memory address to free. + * + * Wrapper function for dma_free_coherent(), freeing memory previously + * allocated with qseecom_dma_alloc(). Refer to dma_free_coherent() for + * details. + */ +static inline void qseecom_dma_free(struct qseecom_client *client, size_t size, + void *cpu_addr, dma_addr_t dma_handle) +{ + return dma_free_coherent(qseecom_scm_dev(client), size, cpu_addr, dma_handle); +} + /** * qcom_qseecom_app_send() - Send to and receive data from a given QSEE app. * @client: The QSEECOM client associated with the target app. - * @req: Request buffer sent to the app (must be DMA-mappable). + * @req: DMA address of the request buffer sent to the app. * @req_size: Size of the request buffer. - * @rsp: Response buffer, written to by the app (must be DMA-mappable). + * @rsp: DMA address of the response buffer, written to by the app. * @rsp_size: Size of the response buffer. * * Sends a request to the QSEE app associated with the given client and read @@ -43,8 +89,9 @@ struct qseecom_client { * * Return: Zero on success, nonzero on failure. */ -static inline int qcom_qseecom_app_send(struct qseecom_client *client, void *req, size_t req_size, - void *rsp, size_t rsp_size) +static inline int qcom_qseecom_app_send(struct qseecom_client *client, + dma_addr_t req, size_t req_size, + dma_addr_t rsp, size_t rsp_size) { return qcom_scm_qseecom_app_send(client->app_id, req, req_size, rsp, rsp_size); } diff --git a/include/linux/firmware/qcom/qcom_scm.h b/include/linux/firmware/qcom/qcom_scm.h index ccaf288460546..aaa19f93ac430 100644 --- a/include/linux/firmware/qcom/qcom_scm.h +++ b/include/linux/firmware/qcom/qcom_scm.h @@ -118,8 +118,8 @@ bool qcom_scm_lmh_dcvsh_available(void); #ifdef CONFIG_QCOM_QSEECOM int qcom_scm_qseecom_app_get_id(const char *app_name, u32 *app_id); -int qcom_scm_qseecom_app_send(u32 app_id, void *req, size_t req_size, void *rsp, - size_t rsp_size); +int qcom_scm_qseecom_app_send(u32 app_id, dma_addr_t req, size_t req_size, + dma_addr_t rsp, size_t rsp_size); #else /* CONFIG_QCOM_QSEECOM */ @@ -128,9 +128,9 @@ static inline int qcom_scm_qseecom_app_get_id(const char *app_name, u32 *app_id) return -EINVAL; } -static inline int qcom_scm_qseecom_app_send(u32 app_id, void *req, - size_t req_size, void *rsp, - size_t rsp_size) +static inline int qcom_scm_qseecom_app_send(u32 app_id, + dma_addr_t req, size_t req_size, + dma_addr_t rsp, size_t rsp_size) { return -EINVAL; } From d3cf8a17498dd9104c04ad28eeac3ef3339f9f9f Mon Sep 17 00:00:00 2001 From: AngeloGioacchino Del Regno Date: Tue, 9 Apr 2024 16:44:38 +0200 Subject: [PATCH 114/606] regulator: mt6360: De-capitalize devicetree regulator subnodes The MT6360 regulator binding, the example in the MT6360 mfd binding, and the devicetree users of those bindings are rightfully declaring MT6360 regulator subnodes with non-capital names, and luckily without using the deprecated regulator-compatible property. With this driver declaring capitalized BUCKx/LDOx as of_match string for the node names, obviously no regulator gets probed: fix that by changing the MT6360_REGULATOR_DESC macro to add a "match" parameter which gets assigned to the of_match. Fixes: d321571d5e4c ("regulator: mt6360: Add support for MT6360 regulator") Signed-off-by: AngeloGioacchino Del Regno Link: https://msgid.link/r/20240409144438.410060-1-angelogioacchino.delregno@collabora.com Signed-off-by: Mark Brown --- drivers/regulator/mt6360-regulator.c | 32 +++++++++++++++++----------- 1 file changed, 20 insertions(+), 12 deletions(-) diff --git a/drivers/regulator/mt6360-regulator.c b/drivers/regulator/mt6360-regulator.c index ad6587a378d09..24cc9fc94e900 100644 --- a/drivers/regulator/mt6360-regulator.c +++ b/drivers/regulator/mt6360-regulator.c @@ -319,15 +319,15 @@ static unsigned int mt6360_regulator_of_map_mode(unsigned int hw_mode) } } -#define MT6360_REGULATOR_DESC(_name, _sname, ereg, emask, vreg, vmask, \ - mreg, mmask, streg, stmask, vranges, \ - vcnts, offon_delay, irq_tbls) \ +#define MT6360_REGULATOR_DESC(match, _name, _sname, ereg, emask, vreg, \ + vmask, mreg, mmask, streg, stmask, \ + vranges, vcnts, offon_delay, irq_tbls) \ { \ .desc = { \ .name = #_name, \ .supply_name = #_sname, \ .id = MT6360_REGULATOR_##_name, \ - .of_match = of_match_ptr(#_name), \ + .of_match = of_match_ptr(match), \ .regulators_node = of_match_ptr("regulator"), \ .of_map_mode = mt6360_regulator_of_map_mode, \ .owner = THIS_MODULE, \ @@ -351,21 +351,29 @@ static unsigned int mt6360_regulator_of_map_mode(unsigned int hw_mode) } static const struct mt6360_regulator_desc mt6360_regulator_descs[] = { - MT6360_REGULATOR_DESC(BUCK1, BUCK1_VIN, 0x117, 0x40, 0x110, 0xff, 0x117, 0x30, 0x117, 0x04, + MT6360_REGULATOR_DESC("buck1", BUCK1, BUCK1_VIN, + 0x117, 0x40, 0x110, 0xff, 0x117, 0x30, 0x117, 0x04, buck_vout_ranges, 256, 0, buck1_irq_tbls), - MT6360_REGULATOR_DESC(BUCK2, BUCK2_VIN, 0x127, 0x40, 0x120, 0xff, 0x127, 0x30, 0x127, 0x04, + MT6360_REGULATOR_DESC("buck2", BUCK2, BUCK2_VIN, + 0x127, 0x40, 0x120, 0xff, 0x127, 0x30, 0x127, 0x04, buck_vout_ranges, 256, 0, buck2_irq_tbls), - MT6360_REGULATOR_DESC(LDO6, LDO_VIN3, 0x137, 0x40, 0x13B, 0xff, 0x137, 0x30, 0x137, 0x04, + MT6360_REGULATOR_DESC("ldo6", LDO6, LDO_VIN3, + 0x137, 0x40, 0x13B, 0xff, 0x137, 0x30, 0x137, 0x04, ldo_vout_ranges1, 256, 0, ldo6_irq_tbls), - MT6360_REGULATOR_DESC(LDO7, LDO_VIN3, 0x131, 0x40, 0x135, 0xff, 0x131, 0x30, 0x131, 0x04, + MT6360_REGULATOR_DESC("ldo7", LDO7, LDO_VIN3, + 0x131, 0x40, 0x135, 0xff, 0x131, 0x30, 0x131, 0x04, ldo_vout_ranges1, 256, 0, ldo7_irq_tbls), - MT6360_REGULATOR_DESC(LDO1, LDO_VIN1, 0x217, 0x40, 0x21B, 0xff, 0x217, 0x30, 0x217, 0x04, + MT6360_REGULATOR_DESC("ldo1", LDO1, LDO_VIN1, + 0x217, 0x40, 0x21B, 0xff, 0x217, 0x30, 0x217, 0x04, ldo_vout_ranges2, 256, 0, ldo1_irq_tbls), - MT6360_REGULATOR_DESC(LDO2, LDO_VIN1, 0x211, 0x40, 0x215, 0xff, 0x211, 0x30, 0x211, 0x04, + MT6360_REGULATOR_DESC("ldo2", LDO2, LDO_VIN1, + 0x211, 0x40, 0x215, 0xff, 0x211, 0x30, 0x211, 0x04, ldo_vout_ranges2, 256, 0, ldo2_irq_tbls), - MT6360_REGULATOR_DESC(LDO3, LDO_VIN1, 0x205, 0x40, 0x209, 0xff, 0x205, 0x30, 0x205, 0x04, + MT6360_REGULATOR_DESC("ldo3", LDO3, LDO_VIN1, + 0x205, 0x40, 0x209, 0xff, 0x205, 0x30, 0x205, 0x04, ldo_vout_ranges2, 256, 100, ldo3_irq_tbls), - MT6360_REGULATOR_DESC(LDO5, LDO_VIN2, 0x20B, 0x40, 0x20F, 0x7f, 0x20B, 0x30, 0x20B, 0x04, + MT6360_REGULATOR_DESC("ldo5", LDO5, LDO_VIN2, + 0x20B, 0x40, 0x20F, 0x7f, 0x20B, 0x30, 0x20B, 0x04, ldo_vout_ranges3, 128, 100, ldo5_irq_tbls), }; From 4b9a474c7c820391c0913d64431ae9e1f52a5143 Mon Sep 17 00:00:00 2001 From: "end.to.start" Date: Mon, 8 Apr 2024 18:24:54 +0300 Subject: [PATCH 115/606] ASoC: acp: Support microphone from device Acer 315-24p This patch adds microphone detection for the Acer 315-24p, after which a microphone appears on the device and starts working Signed-off-by: end.to.start Link: https://msgid.link/r/20240408152454.45532-1-end.to.start@mail.ru Signed-off-by: Mark Brown --- sound/soc/amd/yc/acp6x-mach.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/sound/soc/amd/yc/acp6x-mach.c b/sound/soc/amd/yc/acp6x-mach.c index 69c68d8e7a6b5..1760b5d42460a 100644 --- a/sound/soc/amd/yc/acp6x-mach.c +++ b/sound/soc/amd/yc/acp6x-mach.c @@ -430,6 +430,13 @@ static const struct dmi_system_id yc_acp_quirk_table[] = { DMI_MATCH(DMI_BOARD_NAME, "MRID6"), } }, + { + .driver_data = &acp6x_card, + .matches = { + DMI_MATCH(DMI_BOARD_VENDOR, "MDC"), + DMI_MATCH(DMI_BOARD_NAME, "Herbag_MDU"), + } + }, { .driver_data = &acp6x_card, .matches = { From 103abab975087e1f01b76fcb54c91dbb65dbc249 Mon Sep 17 00:00:00 2001 From: Derek Fang Date: Mon, 8 Apr 2024 17:10:56 +0800 Subject: [PATCH 116/606] ASoC: rt5645: Fix the electric noise due to the CBJ contacts floating The codec leaves tie combo jack's sleeve/ring2 to floating status default. It would cause electric noise while connecting the active speaker jack during boot or shutdown. This patch requests a gpio to control the additional jack circuit to tie the contacts to the ground or floating. Signed-off-by: Derek Fang Link: https://msgid.link/r/20240408091057.14165-1-derek.fang@realtek.com Signed-off-by: Mark Brown --- sound/soc/codecs/rt5645.c | 25 +++++++++++++++++++++++++ 1 file changed, 25 insertions(+) diff --git a/sound/soc/codecs/rt5645.c b/sound/soc/codecs/rt5645.c index e3ba04484813a..d0d24a53df746 100644 --- a/sound/soc/codecs/rt5645.c +++ b/sound/soc/codecs/rt5645.c @@ -444,6 +444,7 @@ struct rt5645_priv { struct regmap *regmap; struct i2c_client *i2c; struct gpio_desc *gpiod_hp_det; + struct gpio_desc *gpiod_cbj_sleeve; struct snd_soc_jack *hp_jack; struct snd_soc_jack *mic_jack; struct snd_soc_jack *btn_jack; @@ -3186,6 +3187,9 @@ static int rt5645_jack_detect(struct snd_soc_component *component, int jack_inse regmap_update_bits(rt5645->regmap, RT5645_IN1_CTRL2, RT5645_CBJ_MN_JD, 0); + if (rt5645->gpiod_cbj_sleeve) + gpiod_set_value(rt5645->gpiod_cbj_sleeve, 1); + msleep(600); regmap_read(rt5645->regmap, RT5645_IN1_CTRL3, &val); val &= 0x7; @@ -3202,6 +3206,8 @@ static int rt5645_jack_detect(struct snd_soc_component *component, int jack_inse snd_soc_dapm_disable_pin(dapm, "Mic Det Power"); snd_soc_dapm_sync(dapm); rt5645->jack_type = SND_JACK_HEADPHONE; + if (rt5645->gpiod_cbj_sleeve) + gpiod_set_value(rt5645->gpiod_cbj_sleeve, 0); } if (rt5645->pdata.level_trigger_irq) regmap_update_bits(rt5645->regmap, RT5645_IRQ_CTRL2, @@ -3229,6 +3235,9 @@ static int rt5645_jack_detect(struct snd_soc_component *component, int jack_inse if (rt5645->pdata.level_trigger_irq) regmap_update_bits(rt5645->regmap, RT5645_IRQ_CTRL2, RT5645_JD_1_1_MASK, RT5645_JD_1_1_INV); + + if (rt5645->gpiod_cbj_sleeve) + gpiod_set_value(rt5645->gpiod_cbj_sleeve, 0); } return rt5645->jack_type; @@ -4012,6 +4021,16 @@ static int rt5645_i2c_probe(struct i2c_client *i2c) return ret; } + rt5645->gpiod_cbj_sleeve = devm_gpiod_get_optional(&i2c->dev, "cbj-sleeve", + GPIOD_OUT_LOW); + + if (IS_ERR(rt5645->gpiod_cbj_sleeve)) { + ret = PTR_ERR(rt5645->gpiod_cbj_sleeve); + dev_info(&i2c->dev, "failed to initialize gpiod, ret=%d\n", ret); + if (ret != -ENOENT) + return ret; + } + for (i = 0; i < ARRAY_SIZE(rt5645->supplies); i++) rt5645->supplies[i].supply = rt5645_supply_names[i]; @@ -4259,6 +4278,9 @@ static void rt5645_i2c_remove(struct i2c_client *i2c) cancel_delayed_work_sync(&rt5645->jack_detect_work); cancel_delayed_work_sync(&rt5645->rcclock_work); + if (rt5645->gpiod_cbj_sleeve) + gpiod_set_value(rt5645->gpiod_cbj_sleeve, 0); + regulator_bulk_disable(ARRAY_SIZE(rt5645->supplies), rt5645->supplies); } @@ -4274,6 +4296,9 @@ static void rt5645_i2c_shutdown(struct i2c_client *i2c) 0); msleep(20); regmap_write(rt5645->regmap, RT5645_RESET, 0); + + if (rt5645->gpiod_cbj_sleeve) + gpiod_set_value(rt5645->gpiod_cbj_sleeve, 0); } static int __maybe_unused rt5645_sys_suspend(struct device *dev) From 306b38e3fa727d22454a148a364123709e356600 Mon Sep 17 00:00:00 2001 From: Derek Fang Date: Mon, 8 Apr 2024 17:10:57 +0800 Subject: [PATCH 117/606] ASoC: dt-bindings: rt5645: add cbj sleeve gpio property Add an optional gpio property to control external CBJ circuits to avoid some electric noise caused by sleeve/ring2 contacts floating. Signed-off-by: Derek Fang Link: https://msgid.link/r/20240408091057.14165-2-derek.fang@realtek.com Signed-off-by: Mark Brown --- Documentation/devicetree/bindings/sound/rt5645.txt | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/Documentation/devicetree/bindings/sound/rt5645.txt b/Documentation/devicetree/bindings/sound/rt5645.txt index 41a62fd2ae1ff..c1fa379f5f3ea 100644 --- a/Documentation/devicetree/bindings/sound/rt5645.txt +++ b/Documentation/devicetree/bindings/sound/rt5645.txt @@ -20,6 +20,11 @@ Optional properties: a GPIO spec for the external headphone detect pin. If jd-mode = 0, we will get the JD status by getting the value of hp-detect-gpios. +- cbj-sleeve-gpios: + a GPIO spec to control the external combo jack circuit to tie the sleeve/ring2 + contacts to the ground or floating. It could avoid some electric noise from the + active speaker jacks. + - realtek,in2-differential Boolean. Indicate MIC2 input are differential, rather than single-ended. @@ -68,6 +73,7 @@ codec: rt5650@1a { compatible = "realtek,rt5650"; reg = <0x1a>; hp-detect-gpios = <&gpio 19 0>; + cbj-sleeve-gpios = <&gpio 20 0>; interrupt-parent = <&gpio>; interrupts = <7 IRQ_TYPE_EDGE_FALLING>; realtek,dmic-en = "true"; From cb9946971d7cb717b726710e1a9fa4ded00b9135 Mon Sep 17 00:00:00 2001 From: Jack Yu Date: Tue, 9 Apr 2024 06:47:43 +0000 Subject: [PATCH 118/606] ASoC: rt722-sdca: modify channel number to support 4 channels Channel numbers of dmic supports 4 channels, modify channels_max regarding to this issue. Signed-off-by: Jack Yu Link: https://msgid.link/r/6a9b1d1fb2ea4f04b2157799f04053b1@realtek.com Signed-off-by: Mark Brown --- sound/soc/codecs/rt722-sdca.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sound/soc/codecs/rt722-sdca.c b/sound/soc/codecs/rt722-sdca.c index e0ea3a23f7cc6..43bec8dd2ff7a 100644 --- a/sound/soc/codecs/rt722-sdca.c +++ b/sound/soc/codecs/rt722-sdca.c @@ -1330,7 +1330,7 @@ static struct snd_soc_dai_driver rt722_sdca_dai[] = { .capture = { .stream_name = "DP6 DMic Capture", .channels_min = 1, - .channels_max = 2, + .channels_max = 4, .rates = RT722_STEREO_RATES, .formats = RT722_FORMATS, }, From 140e0762ca055d1aa84b17847cde5d9e47f56f76 Mon Sep 17 00:00:00 2001 From: Jack Yu Date: Tue, 9 Apr 2024 06:47:34 +0000 Subject: [PATCH 119/606] ASoC: rt722-sdca: add headset microphone vrefo setting Add vrefo settings to fix jd and headset mic recording issue. Signed-off-by: Jack Yu Link: https://msgid.link/r/727219ed45d3485ba8f4646700aaa8a8@realtek.com Signed-off-by: Mark Brown --- sound/soc/codecs/rt722-sdca.c | 25 +++++++++++++++++++------ sound/soc/codecs/rt722-sdca.h | 3 +++ 2 files changed, 22 insertions(+), 6 deletions(-) diff --git a/sound/soc/codecs/rt722-sdca.c b/sound/soc/codecs/rt722-sdca.c index 43bec8dd2ff7a..e5bd9ef812de1 100644 --- a/sound/soc/codecs/rt722-sdca.c +++ b/sound/soc/codecs/rt722-sdca.c @@ -1439,9 +1439,12 @@ static void rt722_sdca_jack_preset(struct rt722_sdca_priv *rt722) int loop_check, chk_cnt = 100, ret; unsigned int calib_status = 0; - /* Read eFuse */ - rt722_sdca_index_write(rt722, RT722_VENDOR_SPK_EFUSE, RT722_DC_CALIB_CTRL, - 0x4808); + /* Config analog bias */ + rt722_sdca_index_write(rt722, RT722_VENDOR_REG, RT722_ANALOG_BIAS_CTL3, + 0xa081); + /* GE related settings */ + rt722_sdca_index_write(rt722, RT722_VENDOR_HDA_CTL, RT722_GE_RELATED_CTL2, + 0xa009); /* Button A, B, C, D bypass mode */ rt722_sdca_index_write(rt722, RT722_VENDOR_HDA_CTL, RT722_UMP_HID_CTL4, 0xcf00); @@ -1475,9 +1478,6 @@ static void rt722_sdca_jack_preset(struct rt722_sdca_priv *rt722) if ((calib_status & 0x0040) == 0x0) break; } - /* Release HP-JD, EN_CBJ_TIE_GL/R open, en_osw gating auto done bit */ - rt722_sdca_index_write(rt722, RT722_VENDOR_REG, RT722_DIGITAL_MISC_CTRL4, - 0x0010); /* Set ADC09 power entity floating control */ rt722_sdca_index_write(rt722, RT722_VENDOR_HDA_CTL, RT722_ADC0A_08_PDE_FLOAT_CTL, 0x2a12); @@ -1490,8 +1490,21 @@ static void rt722_sdca_jack_preset(struct rt722_sdca_priv *rt722) /* Set DAC03 and HP power entity floating control */ rt722_sdca_index_write(rt722, RT722_VENDOR_HDA_CTL, RT722_DAC03_HP_PDE_FLOAT_CTL, 0x4040); + rt722_sdca_index_write(rt722, RT722_VENDOR_HDA_CTL, RT722_ENT_FLOAT_CTRL_1, + 0x4141); + rt722_sdca_index_write(rt722, RT722_VENDOR_HDA_CTL, RT722_FLOAT_CTRL_1, + 0x0101); /* Fine tune PDE40 latency */ regmap_write(rt722->regmap, 0x2f58, 0x07); + regmap_write(rt722->regmap, 0x2f03, 0x06); + /* MIC VRefo */ + rt722_sdca_index_update_bits(rt722, RT722_VENDOR_REG, + RT722_COMBO_JACK_AUTO_CTL1, 0x0200, 0x0200); + rt722_sdca_index_update_bits(rt722, RT722_VENDOR_REG, + RT722_VREFO_GAT, 0x4000, 0x4000); + /* Release HP-JD, EN_CBJ_TIE_GL/R open, en_osw gating auto done bit */ + rt722_sdca_index_write(rt722, RT722_VENDOR_REG, RT722_DIGITAL_MISC_CTRL4, + 0x0010); } int rt722_sdca_io_init(struct device *dev, struct sdw_slave *slave) diff --git a/sound/soc/codecs/rt722-sdca.h b/sound/soc/codecs/rt722-sdca.h index 44af8901352eb..2464361a7958c 100644 --- a/sound/soc/codecs/rt722-sdca.h +++ b/sound/soc/codecs/rt722-sdca.h @@ -69,6 +69,7 @@ struct rt722_sdca_dmic_kctrl_priv { #define RT722_COMBO_JACK_AUTO_CTL2 0x46 #define RT722_COMBO_JACK_AUTO_CTL3 0x47 #define RT722_DIGITAL_MISC_CTRL4 0x4a +#define RT722_VREFO_GAT 0x63 #define RT722_FSM_CTL 0x67 #define RT722_SDCA_INTR_REC 0x82 #define RT722_SW_CONFIG1 0x8a @@ -127,6 +128,8 @@ struct rt722_sdca_dmic_kctrl_priv { #define RT722_UMP_HID_CTL6 0x66 #define RT722_UMP_HID_CTL7 0x67 #define RT722_UMP_HID_CTL8 0x68 +#define RT722_FLOAT_CTRL_1 0x70 +#define RT722_ENT_FLOAT_CTRL_1 0x76 /* Parameter & Verb control 01 (0x1a)(NID:20h) */ #define RT722_HIDDEN_REG_SW_RESET (0x1 << 14) From 6065e736f82c817c9a597a31ee67f0ce4628e948 Mon Sep 17 00:00:00 2001 From: Samuel Holland Date: Mon, 26 Feb 2024 16:34:46 -0800 Subject: [PATCH 120/606] riscv: Fix TASK_SIZE on 64-bit NOMMU On NOMMU, userspace memory can come from anywhere in physical RAM. The current definition of TASK_SIZE is wrong if any RAM exists above 4G, causing spurious failures in the userspace access routines. Fixes: 6bd33e1ece52 ("riscv: add nommu support") Fixes: c3f896dcf1e4 ("mm: switch the test_vmalloc module to use __vmalloc_node") Signed-off-by: Samuel Holland Reviewed-by: Jisheng Zhang Reviewed-by: Bo Gan Link: https://lore.kernel.org/r/20240227003630.3634533-2-samuel.holland@sifive.com Signed-off-by: Palmer Dabbelt --- arch/riscv/include/asm/pgtable.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/riscv/include/asm/pgtable.h b/arch/riscv/include/asm/pgtable.h index 0c94260b5d0c1..a564a39e5676f 100644 --- a/arch/riscv/include/asm/pgtable.h +++ b/arch/riscv/include/asm/pgtable.h @@ -882,7 +882,7 @@ static inline pte_t pte_swp_clear_exclusive(pte_t pte) #define PAGE_SHARED __pgprot(0) #define PAGE_KERNEL __pgprot(0) #define swapper_pg_dir NULL -#define TASK_SIZE 0xffffffffUL +#define TASK_SIZE _AC(-1, UL) #define VMALLOC_START _AC(0, UL) #define VMALLOC_END TASK_SIZE From aea702dde7e9876fb00571a2602f25130847bf0f Mon Sep 17 00:00:00 2001 From: Samuel Holland Date: Mon, 26 Feb 2024 16:34:47 -0800 Subject: [PATCH 121/606] riscv: Fix loading 64-bit NOMMU kernels past the start of RAM commit 3335068f8721 ("riscv: Use PUD/P4D/PGD pages for the linear mapping") added logic to allow using RAM below the kernel load address. However, this does not work for NOMMU, where PAGE_OFFSET is fixed to the kernel load address. Since that range of memory corresponds to PFNs below ARCH_PFN_OFFSET, mm initialization runs off the beginning of mem_map and corrupts adjacent kernel memory. Fix this by restoring the previous behavior for NOMMU kernels. Fixes: 3335068f8721 ("riscv: Use PUD/P4D/PGD pages for the linear mapping") Signed-off-by: Samuel Holland Link: https://lore.kernel.org/r/20240227003630.3634533-3-samuel.holland@sifive.com Signed-off-by: Palmer Dabbelt --- arch/riscv/include/asm/page.h | 2 +- arch/riscv/mm/init.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/riscv/include/asm/page.h b/arch/riscv/include/asm/page.h index 57e887bfa34cb..94b3d6930fc37 100644 --- a/arch/riscv/include/asm/page.h +++ b/arch/riscv/include/asm/page.h @@ -89,7 +89,7 @@ typedef struct page *pgtable_t; #define PTE_FMT "%08lx" #endif -#ifdef CONFIG_64BIT +#if defined(CONFIG_64BIT) && defined(CONFIG_MMU) /* * We override this value as its generic definition uses __pa too early in * the boot process (before kernel_map.va_pa_offset is set). diff --git a/arch/riscv/mm/init.c b/arch/riscv/mm/init.c index 32cad6a65ccd2..b3e63dec3ab5c 100644 --- a/arch/riscv/mm/init.c +++ b/arch/riscv/mm/init.c @@ -232,7 +232,7 @@ static void __init setup_bootmem(void) * In 64-bit, any use of __va/__pa before this point is wrong as we * did not know the start of DRAM before. */ - if (IS_ENABLED(CONFIG_64BIT)) + if (IS_ENABLED(CONFIG_64BIT) && IS_ENABLED(CONFIG_MMU)) kernel_map.va_pa_offset = PAGE_OFFSET - phys_ram_base; /* From c2b6d3a2bbf6352f7cddff2abe81dc4af4887672 Mon Sep 17 00:00:00 2001 From: Dragan Simic Date: Sun, 7 Apr 2024 19:56:24 +0200 Subject: [PATCH 122/606] arm64: dts: rockchip: Designate the system power controller on QuartzPro64 Designate the primary RK806 PMIC on the Pine64 QuartzPro64 as the system power controller, so the board shuts down properly on poweroff(8). Fixes: 152d3d070a9c ("arm64: dts: rockchip: Add QuartzPro64 SBC device tree") Signed-off-by: Dragan Simic Link: https://lore.kernel.org/r/c602dfb3972a0844f2a87b6245bdc5c3378c5989.1712512497.git.dsimic@manjaro.org Signed-off-by: Heiko Stuebner --- arch/arm64/boot/dts/rockchip/rk3588-quartzpro64.dts | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/arm64/boot/dts/rockchip/rk3588-quartzpro64.dts b/arch/arm64/boot/dts/rockchip/rk3588-quartzpro64.dts index 67414d72e2b6e..22bbfbe729c11 100644 --- a/arch/arm64/boot/dts/rockchip/rk3588-quartzpro64.dts +++ b/arch/arm64/boot/dts/rockchip/rk3588-quartzpro64.dts @@ -456,6 +456,7 @@ <&rk806_dvs2_null>, <&rk806_dvs3_null>; pinctrl-names = "default"; spi-max-frequency = <1000000>; + system-power-controller; vcc1-supply = <&vcc4v0_sys>; vcc2-supply = <&vcc4v0_sys>; From 08cd20bdecd9cfde5c1aec6146fa22ca753efea1 Mon Sep 17 00:00:00 2001 From: Muhammed Efe Cetin Date: Sun, 7 Apr 2024 20:32:10 +0300 Subject: [PATCH 123/606] arm64: dts: rockchip: mark system power controller and fix typo on orangepi-5-plus Mark the PMIC as system power controller, so the board will shut-down properly and fix the typo on rk806_dvs1_null pins property. Fixes: 236d225e1ee7 ("arm64: dts: rockchip: Add board device tree for rk3588-orangepi-5-plus") Signed-off-by: Muhammed Efe Cetin Reviewed-by: Dragan Simic Link: https://lore.kernel.org/r/20240407173210.372585-1-efectn@6tel.net Signed-off-by: Heiko Stuebner --- arch/arm64/boot/dts/rockchip/rk3588-orangepi-5-plus.dts | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/arch/arm64/boot/dts/rockchip/rk3588-orangepi-5-plus.dts b/arch/arm64/boot/dts/rockchip/rk3588-orangepi-5-plus.dts index 1b606ea5b6cf2..1a604429fb266 100644 --- a/arch/arm64/boot/dts/rockchip/rk3588-orangepi-5-plus.dts +++ b/arch/arm64/boot/dts/rockchip/rk3588-orangepi-5-plus.dts @@ -485,6 +485,7 @@ pinctrl-0 = <&pmic_pins>, <&rk806_dvs1_null>, <&rk806_dvs2_null>, <&rk806_dvs3_null>; spi-max-frequency = <1000000>; + system-power-controller; vcc1-supply = <&vcc5v0_sys>; vcc2-supply = <&vcc5v0_sys>; @@ -506,7 +507,7 @@ #gpio-cells = <2>; rk806_dvs1_null: dvs1-null-pins { - pins = "gpio_pwrctrl2"; + pins = "gpio_pwrctrl1"; function = "pin_fun0"; }; From 29148d841edea9335141fae86a0742f539fe1327 Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Sun, 7 Apr 2024 12:28:51 +0200 Subject: [PATCH 124/606] arm64: dts: rockchip: drop redundant pcie-reset-suspend in Scarlet Dumo There is no "pcie-reset-suspend" property in the PCI bindings or Linux driver, so assume this was copied from downstream. Drop the property, but leave the comment, because it might be useful for someone. This fixes dtbs_check warning: rk3399-gru-scarlet-dumo.dtb: pcie@f8000000: Unevaluated properties are not allowed ('pcie-reset-suspend' was unexpected) Signed-off-by: Krzysztof Kozlowski Link: https://lore.kernel.org/r/20240407102854.38672-1-krzysztof.kozlowski@linaro.org Signed-off-by: Heiko Stuebner --- arch/arm64/boot/dts/rockchip/rk3399-gru-scarlet.dtsi | 1 - 1 file changed, 1 deletion(-) diff --git a/arch/arm64/boot/dts/rockchip/rk3399-gru-scarlet.dtsi b/arch/arm64/boot/dts/rockchip/rk3399-gru-scarlet.dtsi index 6d1e3ca863925..d5e035823eb5e 100644 --- a/arch/arm64/boot/dts/rockchip/rk3399-gru-scarlet.dtsi +++ b/arch/arm64/boot/dts/rockchip/rk3399-gru-scarlet.dtsi @@ -689,7 +689,6 @@ camera: &i2c7 { ep-gpios = <&gpio0 3 GPIO_ACTIVE_HIGH>; /* PERST# asserted in S3 */ - pcie-reset-suspend = <1>; vpcie3v3-supply = <&wlan_3v3>; vpcie1v8-supply = <&pp1800_pcie>; From cd0793fc3b03985d90f24232056853ef79ff555e Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Sun, 7 Apr 2024 12:28:53 +0200 Subject: [PATCH 125/606] arm64: dts: rockchip: drop redundant disable-gpios in Lubancat 1 There is no "disable-gpios" property in the PCI bindings or Linux driver, so assume this was copied from downstream. This property looks like some real hardware, just described wrongly. Rockchip PCIe controller (DesignWare based) does not define any other GPIO-s property, except reset-gpios which is already there, so not sure what would be the real property for this GPIO. This fixes dtbs_check warning: rk3566-lubancat-1.dtb: pcie@fe260000: Unevaluated properties are not allowed ('disable-gpios' was unexpected) Signed-off-by: Krzysztof Kozlowski Link: https://lore.kernel.org/r/20240407102854.38672-3-krzysztof.kozlowski@linaro.org Signed-off-by: Heiko Stuebner --- arch/arm64/boot/dts/rockchip/rk3566-lubancat-1.dts | 1 - 1 file changed, 1 deletion(-) diff --git a/arch/arm64/boot/dts/rockchip/rk3566-lubancat-1.dts b/arch/arm64/boot/dts/rockchip/rk3566-lubancat-1.dts index 6ecdf5d283390..c1194d1e438d0 100644 --- a/arch/arm64/boot/dts/rockchip/rk3566-lubancat-1.dts +++ b/arch/arm64/boot/dts/rockchip/rk3566-lubancat-1.dts @@ -447,7 +447,6 @@ &pcie2x1 { reset-gpios = <&gpio0 RK_PB6 GPIO_ACTIVE_HIGH>; - disable-gpios = <&gpio0 RK_PA6 GPIO_ACTIVE_HIGH>; vpcie3v3-supply = <&vcc3v3_pcie>; status = "okay"; }; From d892a6f34adc371ee0dbaa5ba684d02c4431f2e3 Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Sun, 7 Apr 2024 12:28:54 +0200 Subject: [PATCH 126/606] arm64: dts: rockchip: drop redundant disable-gpios in Lubancat 2 There is no "disable-gpios" property in the PCI bindings or Linux driver, so assume this was copied from downstream. This property looks like some real hardware, just described wrongly. Rockchip PCIe controller (DesignWare based) does not define any other GPIO-s property, except reset-gpios which is already there, so not sure what would be the real property for this GPIO. This fixes dtbs_check warning: rk3568-lubancat-2.dtb: pcie@fe260000: Unevaluated properties are not allowed ('disable-gpios' was unexpected) Signed-off-by: Krzysztof Kozlowski Link: https://lore.kernel.org/r/20240407102854.38672-4-krzysztof.kozlowski@linaro.org Signed-off-by: Heiko Stuebner --- arch/arm64/boot/dts/rockchip/rk3568-lubancat-2.dts | 1 - 1 file changed, 1 deletion(-) diff --git a/arch/arm64/boot/dts/rockchip/rk3568-lubancat-2.dts b/arch/arm64/boot/dts/rockchip/rk3568-lubancat-2.dts index a8a4cc190eb32..a3112d5df2008 100644 --- a/arch/arm64/boot/dts/rockchip/rk3568-lubancat-2.dts +++ b/arch/arm64/boot/dts/rockchip/rk3568-lubancat-2.dts @@ -523,7 +523,6 @@ &pcie2x1 { reset-gpios = <&gpio3 RK_PC1 GPIO_ACTIVE_HIGH>; - disable-gpios = <&gpio3 RK_PC2 GPIO_ACTIVE_HIGH>; vpcie3v3-supply = <&vcc3v3_mini_pcie>; status = "okay"; }; From d41201c90f825f19a46afbfb502f22f612d8ccc4 Mon Sep 17 00:00:00 2001 From: Rob Herring Date: Mon, 1 Apr 2024 15:49:58 -0500 Subject: [PATCH 127/606] dt-bindings: rockchip: grf: Add missing type to 'pcie-phy' node 'pcie-phy' is missing any type. Add 'type: object' to indicate it's a node. Signed-off-by: Rob Herring Reviewed-by: Heiko Stuebner Acked-by: Conor Dooley Link: https://lore.kernel.org/r/20240401204959.1698106-1-robh@kernel.org Signed-off-by: Heiko Stuebner --- Documentation/devicetree/bindings/soc/rockchip/grf.yaml | 1 + 1 file changed, 1 insertion(+) diff --git a/Documentation/devicetree/bindings/soc/rockchip/grf.yaml b/Documentation/devicetree/bindings/soc/rockchip/grf.yaml index 0b87c266760c6..79798c7474768 100644 --- a/Documentation/devicetree/bindings/soc/rockchip/grf.yaml +++ b/Documentation/devicetree/bindings/soc/rockchip/grf.yaml @@ -171,6 +171,7 @@ allOf: unevaluatedProperties: false pcie-phy: + type: object description: Documentation/devicetree/bindings/phy/rockchip-pcie-phy.txt From 433d54818f64a2fe0562f8c04c7a81f562368515 Mon Sep 17 00:00:00 2001 From: Jose Ignacio Tornos Martinez Date: Tue, 5 Mar 2024 15:32:18 +0100 Subject: [PATCH 128/606] arm64: dts: rockchip: regulator for sd needs to be always on for BPI-R2Pro With default dts configuration for BPI-R2Pro, the regulator for sd card is powered off when reboot is commanded, and the only solution to detect the sd card again, and therefore, allow rebooting from there, is to do a hardware reset. Configure the regulator for sd to be always on for BPI-R2Pro in order to avoid this issue. Fixes: f901aaadaa2a ("arm64: dts: rockchip: Add Bananapi R2 Pro") Signed-off-by: Jose Ignacio Tornos Martinez Link: https://lore.kernel.org/r/20240305143222.189413-1-jtornosm@redhat.com Signed-off-by: Heiko Stuebner --- arch/arm64/boot/dts/rockchip/rk3568-bpi-r2-pro.dts | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/arm64/boot/dts/rockchip/rk3568-bpi-r2-pro.dts b/arch/arm64/boot/dts/rockchip/rk3568-bpi-r2-pro.dts index 03d6d920446ab..c87fad2c34cba 100644 --- a/arch/arm64/boot/dts/rockchip/rk3568-bpi-r2-pro.dts +++ b/arch/arm64/boot/dts/rockchip/rk3568-bpi-r2-pro.dts @@ -416,6 +416,8 @@ vccio_sd: LDO_REG5 { regulator-name = "vccio_sd"; + regulator-always-on; + regulator-boot-on; regulator-min-microvolt = <1800000>; regulator-max-microvolt = <3300000>; From 452d8950db3e839aba1bb13bc5378f4bac11fa04 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Wed, 3 Apr 2024 10:06:27 +0200 Subject: [PATCH 129/606] power: rt9455: hide unused rt9455_boost_voltage_values The rt9455_boost_voltage_values[] array is only used when USB PHY support is enabled, causing a W=1 warning otherwise: drivers/power/supply/rt9455_charger.c:200:18: error: 'rt9455_boost_voltage_values' defined but not used [-Werror=unused-const-variable=] Enclose the definition in the same #ifdef as the references to it. Fixes: e86d69dd786e ("power_supply: Add support for Richtek RT9455 battery charger") Signed-off-by: Arnd Bergmann Link: https://lore.kernel.org/r/20240403080702.3509288-10-arnd@kernel.org Signed-off-by: Sebastian Reichel --- drivers/power/supply/rt9455_charger.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/power/supply/rt9455_charger.c b/drivers/power/supply/rt9455_charger.c index c345a77f9f78c..e4dbacd50a437 100644 --- a/drivers/power/supply/rt9455_charger.c +++ b/drivers/power/supply/rt9455_charger.c @@ -192,6 +192,7 @@ static const int rt9455_voreg_values[] = { 4450000, 4450000, 4450000, 4450000, 4450000, 4450000, 4450000, 4450000 }; +#if IS_ENABLED(CONFIG_USB_PHY) /* * When the charger is in boost mode, REG02[7:2] represent boost output * voltage. @@ -207,6 +208,7 @@ static const int rt9455_boost_voltage_values[] = { 5600000, 5600000, 5600000, 5600000, 5600000, 5600000, 5600000, 5600000, 5600000, 5600000, 5600000, 5600000, 5600000, 5600000, 5600000, 5600000, }; +#endif /* REG07[3:0] (VMREG) in uV */ static const int rt9455_vmreg_values[] = { From f141dde5dc51ecab18e8b12b76eb416cda0d6798 Mon Sep 17 00:00:00 2001 From: Matthew Sakai Date: Fri, 5 Apr 2024 21:26:21 +0200 Subject: [PATCH 130/606] dm vdo murmurhash: remove unneeded semicolon Reported-by: kernel test robot Closes: https://lore.kernel.org/oe-kbuild-all/202404050327.4ebVLBD3-lkp@intel.com/ Signed-off-by: Matthew Sakai Signed-off-by: Mikulas Patocka Signed-off-by: Mike Snitzer --- drivers/md/dm-vdo/murmurhash3.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/md/dm-vdo/murmurhash3.c b/drivers/md/dm-vdo/murmurhash3.c index 01d2743444ec6..3a989efae1420 100644 --- a/drivers/md/dm-vdo/murmurhash3.c +++ b/drivers/md/dm-vdo/murmurhash3.c @@ -137,7 +137,7 @@ void murmurhash3_128(const void *key, const int len, const u32 seed, void *out) break; default: break; - }; + } } /* finalization */ From ddd3f34c10002e41ed3cd89c9bd8f1d05a22506a Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Wed, 10 Apr 2024 19:26:14 +0200 Subject: [PATCH 131/606] regulator: qcom-refgen: fix module autoloading Add MODULE_DEVICE_TABLE(), so the module could be properly autoloaded based on the alias from of_device_id table. Signed-off-by: Krzysztof Kozlowski Reviewed-by: Konrad Dybcio Link: https://msgid.link/r/20240410172615.255424-1-krzk@kernel.org Signed-off-by: Mark Brown --- drivers/regulator/qcom-refgen-regulator.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/regulator/qcom-refgen-regulator.c b/drivers/regulator/qcom-refgen-regulator.c index 656fe330d38f0..063e12c08e75f 100644 --- a/drivers/regulator/qcom-refgen-regulator.c +++ b/drivers/regulator/qcom-refgen-regulator.c @@ -140,6 +140,7 @@ static const struct of_device_id qcom_refgen_match_table[] = { { .compatible = "qcom,sm8250-refgen-regulator", .data = &sm8250_refgen_desc }, { } }; +MODULE_DEVICE_TABLE(of, qcom_refgen_match_table); static struct platform_driver qcom_refgen_driver = { .probe = qcom_refgen_probe, From 68adb581a39ae63a0ed082c47f01fbbe515efa0e Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Wed, 10 Apr 2024 19:26:15 +0200 Subject: [PATCH 132/606] regulator: vqmmc-ipq4019: fix module autoloading Add MODULE_DEVICE_TABLE(), so the module could be properly autoloaded based on the alias from of_device_id table. Signed-off-by: Krzysztof Kozlowski Reviewed-by: Konrad Dybcio Link: https://msgid.link/r/20240410172615.255424-2-krzk@kernel.org Signed-off-by: Mark Brown --- drivers/regulator/vqmmc-ipq4019-regulator.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/regulator/vqmmc-ipq4019-regulator.c b/drivers/regulator/vqmmc-ipq4019-regulator.c index 086da36abc0b4..4955616517ce9 100644 --- a/drivers/regulator/vqmmc-ipq4019-regulator.c +++ b/drivers/regulator/vqmmc-ipq4019-regulator.c @@ -84,6 +84,7 @@ static const struct of_device_id regulator_ipq4019_of_match[] = { { .compatible = "qcom,vqmmc-ipq4019-regulator", }, {}, }; +MODULE_DEVICE_TABLE(of, regulator_ipq4019_of_match); static struct platform_driver ipq4019_regulator_driver = { .probe = ipq4019_regulator_probe, From 9617cd6f24b294552a817f80f5225431ef67b540 Mon Sep 17 00:00:00 2001 From: Yu Kuai Date: Sat, 6 Apr 2024 17:09:25 +0800 Subject: [PATCH 133/606] block: fix module reference leakage from bdev_open_by_dev error path At the time bdev_may_open() is called, module reference is grabbed already, hence module reference should be released if bdev_may_open() failed. This problem is found by code review. Fixes: ed5cc702d311 ("block: Add config option to not allow writing to mounted devices") Signed-off-by: Yu Kuai Link: https://lore.kernel.org/r/20240406090930.2252838-22-yukuai1@huaweicloud.com Signed-off-by: Christian Brauner --- block/bdev.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/block/bdev.c b/block/bdev.c index b8e32d933a636..3caf24e665596 100644 --- a/block/bdev.c +++ b/block/bdev.c @@ -873,7 +873,7 @@ int bdev_open(struct block_device *bdev, blk_mode_t mode, void *holder, goto abort_claiming; ret = -EBUSY; if (!bdev_may_open(bdev, mode)) - goto abort_claiming; + goto put_module; if (bdev_is_partition(bdev)) ret = blkdev_get_part(bdev, mode); else From eefb831d2e4dd58d58002a2ef75ff989e073230d Mon Sep 17 00:00:00 2001 From: Stefan Binding Date: Thu, 11 Apr 2024 15:26:48 +0100 Subject: [PATCH 134/606] ASoC: cs35l41: Update DSP1RX5/6 Sources for DSP config Currently, all ASoC systems are set to use VPMON for DSP1RX5_SRC, however, this is required only for internal boost systems. External boost systems require VBSTMON instead of VPMON to be the input to DSP1RX5_SRC. Shared Boost Active acts like Internal boost (requires VPMON). Shared Boost Passive acts like External boost (requires VBSTMON) All systems require DSP1RX6_SRC to be set to VBSTMON. Signed-off-by: Stefan Binding Reviewed-by: Richard Fitzgerald Link: https://msgid.link/r/20240411142648.650921-1-sbinding@opensource.cirrus.com Signed-off-by: Mark Brown --- sound/soc/codecs/cs35l41.c | 26 ++++++++++++++++++++------ 1 file changed, 20 insertions(+), 6 deletions(-) diff --git a/sound/soc/codecs/cs35l41.c b/sound/soc/codecs/cs35l41.c index dfb4ce53491bb..f8e57a2fc3e32 100644 --- a/sound/soc/codecs/cs35l41.c +++ b/sound/soc/codecs/cs35l41.c @@ -1094,6 +1094,7 @@ static int cs35l41_handle_pdata(struct device *dev, struct cs35l41_hw_cfg *hw_cf static int cs35l41_dsp_init(struct cs35l41_private *cs35l41) { struct wm_adsp *dsp; + uint32_t dsp1rx5_src; int ret; dsp = &cs35l41->dsp; @@ -1113,16 +1114,29 @@ static int cs35l41_dsp_init(struct cs35l41_private *cs35l41) return ret; } - ret = regmap_write(cs35l41->regmap, CS35L41_DSP1_RX5_SRC, - CS35L41_INPUT_SRC_VPMON); + switch (cs35l41->hw_cfg.bst_type) { + case CS35L41_INT_BOOST: + case CS35L41_SHD_BOOST_ACTV: + dsp1rx5_src = CS35L41_INPUT_SRC_VPMON; + break; + case CS35L41_EXT_BOOST: + case CS35L41_SHD_BOOST_PASS: + dsp1rx5_src = CS35L41_INPUT_SRC_VBSTMON; + break; + default: + dev_err(cs35l41->dev, "wm_halo_init failed - Invalid Boost Type: %d\n", + cs35l41->hw_cfg.bst_type); + goto err_dsp; + } + + ret = regmap_write(cs35l41->regmap, CS35L41_DSP1_RX5_SRC, dsp1rx5_src); if (ret < 0) { - dev_err(cs35l41->dev, "Write INPUT_SRC_VPMON failed: %d\n", ret); + dev_err(cs35l41->dev, "Write DSP1RX5_SRC: %d failed: %d\n", dsp1rx5_src, ret); goto err_dsp; } - ret = regmap_write(cs35l41->regmap, CS35L41_DSP1_RX6_SRC, - CS35L41_INPUT_SRC_CLASSH); + ret = regmap_write(cs35l41->regmap, CS35L41_DSP1_RX6_SRC, CS35L41_INPUT_SRC_VBSTMON); if (ret < 0) { - dev_err(cs35l41->dev, "Write INPUT_SRC_CLASSH failed: %d\n", ret); + dev_err(cs35l41->dev, "Write CS35L41_INPUT_SRC_VBSTMON failed: %d\n", ret); goto err_dsp; } ret = regmap_write(cs35l41->regmap, CS35L41_DSP1_RX7_SRC, From 0bc2e80b9be51712b19e919db5abc97a418f8292 Mon Sep 17 00:00:00 2001 From: Yi Zhang Date: Wed, 10 Apr 2024 08:57:14 +0800 Subject: [PATCH 135/606] nvme: fix warn output about shared namespaces without CONFIG_NVME_MULTIPATH Move the stray '.' that is currently at the end of the line after newline '\n' to before newline character which is the right position. Fixes: ce8d78616a6b ("nvme: warn about shared namespaces without CONFIG_NVME_MULTIPATH") Signed-off-by: Yi Zhang Reviewed-by: Chaitanya Kulkarni Signed-off-by: Keith Busch --- drivers/nvme/host/core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index 27281a9a8951d..e4bec200ebeb2 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -3681,7 +3681,7 @@ static int nvme_init_ns_head(struct nvme_ns *ns, struct nvme_ns_info *info) "Found shared namespace %d, but multipathing not supported.\n", info->nsid); dev_warn_once(ctrl->device, - "Support for shared namespaces without CONFIG_NVME_MULTIPATH is deprecated and will be removed in Linux 6.0\n."); + "Support for shared namespaces without CONFIG_NVME_MULTIPATH is deprecated and will be removed in Linux 6.0.\n"); } } From e1c9216bec2793d051f83d77d93d3d6a899d06d1 Mon Sep 17 00:00:00 2001 From: Cristian Ciocaltea Date: Tue, 9 Apr 2024 01:29:25 +0300 Subject: [PATCH 136/606] phy: phy-rockchip-samsung-hdptx: Select CONFIG_RATIONAL Ensure CONFIG_RATIONAL is selected in order to fix the following link error with some kernel configurations: drivers/phy/rockchip/phy-rockchip-samsung-hdptx.o: in function `rk_hdptx_ropll_tmds_cmn_config': phy-rockchip-samsung-hdptx.c:(.text+0x950): undefined reference to `rational_best_approximation' Fixes: 553be2830c5f ("phy: rockchip: Add Samsung HDMI/eDP Combo PHY driver") Reported-by: kernel test robot Closes: https://lore.kernel.org/oe-kbuild-all/202404090540.2l1TEkDF-lkp@intel.com/ Signed-off-by: Cristian Ciocaltea Reviewed-by: Heiko Stuebner Link: https://lore.kernel.org/r/20240408222926.32708-1-cristian.ciocaltea@collabora.com Signed-off-by: Vinod Koul --- drivers/phy/rockchip/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/phy/rockchip/Kconfig b/drivers/phy/rockchip/Kconfig index a34f67bb7e61a..b60a4b60451e5 100644 --- a/drivers/phy/rockchip/Kconfig +++ b/drivers/phy/rockchip/Kconfig @@ -87,6 +87,7 @@ config PHY_ROCKCHIP_SAMSUNG_HDPTX tristate "Rockchip Samsung HDMI/eDP Combo PHY driver" depends on (ARCH_ROCKCHIP || COMPILE_TEST) && OF select GENERIC_PHY + select RATIONAL help Enable this to support the Rockchip HDMI/eDP Combo PHY with Samsung IP block. From 025a6f7448f7bb5f4fceb62498ee33d89ae266bb Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Mon, 8 Apr 2024 11:30:23 +0200 Subject: [PATCH 137/606] phy: qcom: qmp-combo: fix VCO div offset on v5_5nm and v6 Commit 5abed58a8bde ("phy: qcom: qmp-combo: Fix VCO div offset on v3") fixed a regression introduced in 6.5 by making sure that the correct offset is used for the DP_PHY_VCO_DIV register on v3 hardware. Unfortunately, that fix instead broke DisplayPort on v5_5nm and v6 hardware as it failed to add the corresponding offsets also to those register tables. Fixes: 815891eee668 ("phy: qcom-qmp-combo: Introduce orientation variable") Fixes: 5abed58a8bde ("phy: qcom: qmp-combo: Fix VCO div offset on v3") Cc: stable@vger.kernel.org # 6.5: 5abed58a8bde Cc: Stephen Boyd Cc: Abhinav Kumar Cc: Dmitry Baryshkov Signed-off-by: Johan Hovold Reviewed-by: Dmitry Baryshkov Reviewed-by: Stephen Boyd Reviewed-by: Abhinav Kumar Link: https://lore.kernel.org/r/20240408093023.506-1-johan+linaro@kernel.org Signed-off-by: Vinod Koul --- drivers/phy/qualcomm/phy-qcom-qmp-combo.c | 2 ++ drivers/phy/qualcomm/phy-qcom-qmp-dp-phy-v5.h | 1 + drivers/phy/qualcomm/phy-qcom-qmp-dp-phy-v6.h | 1 + 3 files changed, 4 insertions(+) diff --git a/drivers/phy/qualcomm/phy-qcom-qmp-combo.c b/drivers/phy/qualcomm/phy-qcom-qmp-combo.c index 2a6f70b3e25ff..c21cdb8dbfe74 100644 --- a/drivers/phy/qualcomm/phy-qcom-qmp-combo.c +++ b/drivers/phy/qualcomm/phy-qcom-qmp-combo.c @@ -153,6 +153,7 @@ static const unsigned int qmp_v5_5nm_usb3phy_regs_layout[QPHY_LAYOUT_SIZE] = { [QPHY_COM_BIAS_EN_CLKBUFLR_EN] = QSERDES_V5_COM_BIAS_EN_CLKBUFLR_EN, [QPHY_DP_PHY_STATUS] = QSERDES_V5_DP_PHY_STATUS, + [QPHY_DP_PHY_VCO_DIV] = QSERDES_V5_DP_PHY_VCO_DIV, [QPHY_TX_TX_POL_INV] = QSERDES_V5_5NM_TX_TX_POL_INV, [QPHY_TX_TX_DRV_LVL] = QSERDES_V5_5NM_TX_TX_DRV_LVL, @@ -177,6 +178,7 @@ static const unsigned int qmp_v6_usb3phy_regs_layout[QPHY_LAYOUT_SIZE] = { [QPHY_COM_BIAS_EN_CLKBUFLR_EN] = QSERDES_V6_COM_PLL_BIAS_EN_CLK_BUFLR_EN, [QPHY_DP_PHY_STATUS] = QSERDES_V6_DP_PHY_STATUS, + [QPHY_DP_PHY_VCO_DIV] = QSERDES_V6_DP_PHY_VCO_DIV, [QPHY_TX_TX_POL_INV] = QSERDES_V6_TX_TX_POL_INV, [QPHY_TX_TX_DRV_LVL] = QSERDES_V6_TX_TX_DRV_LVL, diff --git a/drivers/phy/qualcomm/phy-qcom-qmp-dp-phy-v5.h b/drivers/phy/qualcomm/phy-qcom-qmp-dp-phy-v5.h index f5cfacf9be964..181057421c113 100644 --- a/drivers/phy/qualcomm/phy-qcom-qmp-dp-phy-v5.h +++ b/drivers/phy/qualcomm/phy-qcom-qmp-dp-phy-v5.h @@ -7,6 +7,7 @@ #define QCOM_PHY_QMP_DP_PHY_V5_H_ /* Only for QMP V5 PHY - DP PHY registers */ +#define QSERDES_V5_DP_PHY_VCO_DIV 0x070 #define QSERDES_V5_DP_PHY_AUX_INTERRUPT_STATUS 0x0d8 #define QSERDES_V5_DP_PHY_STATUS 0x0dc diff --git a/drivers/phy/qualcomm/phy-qcom-qmp-dp-phy-v6.h b/drivers/phy/qualcomm/phy-qcom-qmp-dp-phy-v6.h index 01a20d3be4b81..fa967a1af058f 100644 --- a/drivers/phy/qualcomm/phy-qcom-qmp-dp-phy-v6.h +++ b/drivers/phy/qualcomm/phy-qcom-qmp-dp-phy-v6.h @@ -7,6 +7,7 @@ #define QCOM_PHY_QMP_DP_PHY_V6_H_ /* Only for QMP V6 PHY - DP PHY registers */ +#define QSERDES_V6_DP_PHY_VCO_DIV 0x070 #define QSERDES_V6_DP_PHY_AUX_INTERRUPT_STATUS 0x0e0 #define QSERDES_V6_DP_PHY_STATUS 0x0e4 From bf6e4ee5c43690e4c5a8a057bbcd4ff986bed052 Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Sat, 6 Apr 2024 16:08:21 +0200 Subject: [PATCH 138/606] phy: ti: tusb1210: Resolve charger-det crash if charger psy is unregistered The power_supply frame-work is not really designed for there to be long living in kernel references to power_supply devices. Specifically unregistering a power_supply while some other code has a reference to it triggers a WARN in power_supply_unregister(): WARN_ON(atomic_dec_return(&psy->use_cnt)); Folllowed by the power_supply still getting removed and the backing data freed anyway, leaving the tusb1210 charger-detect code with a dangling reference, resulting in a crash the next time tusb1210_get_online() is called. Fix this by only holding the reference in tusb1210_get_online() freeing it at the end of the function. Note this still leaves a theoretical race window, but it avoids the issue when manually rmmod-ing the charger chip driver during development. Fixes: 48969a5623ed ("phy: ti: tusb1210: Add charger detection") Signed-off-by: Hans de Goede Link: https://lore.kernel.org/r/20240406140821.18624-1-hdegoede@redhat.com Signed-off-by: Vinod Koul --- drivers/phy/ti/phy-tusb1210.c | 23 ++++++++++++----------- 1 file changed, 12 insertions(+), 11 deletions(-) diff --git a/drivers/phy/ti/phy-tusb1210.c b/drivers/phy/ti/phy-tusb1210.c index 13cd614e12a1d..751fecd466e3e 100644 --- a/drivers/phy/ti/phy-tusb1210.c +++ b/drivers/phy/ti/phy-tusb1210.c @@ -69,7 +69,6 @@ struct tusb1210 { struct delayed_work chg_det_work; struct notifier_block psy_nb; struct power_supply *psy; - struct power_supply *charger; #endif }; @@ -236,19 +235,24 @@ static const char * const tusb1210_chargers[] = { static bool tusb1210_get_online(struct tusb1210 *tusb) { + struct power_supply *charger = NULL; union power_supply_propval val; - int i; + bool online = false; + int i, ret; - for (i = 0; i < ARRAY_SIZE(tusb1210_chargers) && !tusb->charger; i++) - tusb->charger = power_supply_get_by_name(tusb1210_chargers[i]); + for (i = 0; i < ARRAY_SIZE(tusb1210_chargers) && !charger; i++) + charger = power_supply_get_by_name(tusb1210_chargers[i]); - if (!tusb->charger) + if (!charger) return false; - if (power_supply_get_property(tusb->charger, POWER_SUPPLY_PROP_ONLINE, &val)) - return false; + ret = power_supply_get_property(charger, POWER_SUPPLY_PROP_ONLINE, &val); + if (ret == 0) + online = val.intval; + + power_supply_put(charger); - return val.intval; + return online; } static void tusb1210_chg_det_work(struct work_struct *work) @@ -473,9 +477,6 @@ static void tusb1210_remove_charger_detect(struct tusb1210 *tusb) cancel_delayed_work_sync(&tusb->chg_det_work); power_supply_unregister(tusb->psy); } - - if (tusb->charger) - power_supply_put(tusb->charger); } #else static void tusb1210_probe_charger_detect(struct tusb1210 *tusb) { } From 3cba9cfcc1520a2307a29f6fab887bcfc121c417 Mon Sep 17 00:00:00 2001 From: Abdelrahman Morsy Date: Tue, 2 Apr 2024 14:14:06 +0200 Subject: [PATCH 139/606] HID: mcp-2221: cancel delayed_work only when CONFIG_IIO is enabled If the device is unplugged and CONFIG_IIO is not supported, this will result in a warning message at kernel/workqueue. Only cancel delayed work in mcp2221_remove(), when CONFIG_IIO is enabled. Signed-off-by: Abdelrahman Morsy Signed-off-by: Jiri Kosina --- drivers/hid/hid-mcp2221.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/hid/hid-mcp2221.c b/drivers/hid/hid-mcp2221.c index f9cceaeffd081..da5ea5a23b087 100644 --- a/drivers/hid/hid-mcp2221.c +++ b/drivers/hid/hid-mcp2221.c @@ -944,9 +944,11 @@ static void mcp2221_hid_unregister(void *ptr) /* This is needed to be sure hid_hw_stop() isn't called twice by the subsystem */ static void mcp2221_remove(struct hid_device *hdev) { +#if IS_REACHABLE(CONFIG_IIO) struct mcp2221 *mcp = hid_get_drvdata(hdev); cancel_delayed_work_sync(&mcp->init_work); +#endif } #if IS_REACHABLE(CONFIG_IIO) From 37eacb9f6e89fb399a79e952bc9c78eb3e16290e Mon Sep 17 00:00:00 2001 From: Anton Protopopov Date: Fri, 12 Apr 2024 16:11:00 +0200 Subject: [PATCH 140/606] bpf: Fix a verifier verbose message Long ago a map file descriptor in a pseudo ldimm64 instruction could only be present as an immediate value insn[0].imm, and thus this value was used in a verbose verifier message printed when the file descriptor wasn't valid. Since addition of BPF_PSEUDO_MAP_IDX_VALUE/BPF_PSEUDO_MAP_IDX the insn[0].imm field can also contain an index pointing to the file descriptor in the attr.fd_array array. However, if the file descriptor is invalid, the verifier still prints the verbose message containing value of insn[0].imm. Patch the verifier message to always print the actual file descriptor value. Fixes: 387544bfa291 ("bpf: Introduce fd_idx") Signed-off-by: Anton Protopopov Signed-off-by: Daniel Borkmann Acked-by: Daniel Borkmann Link: https://lore.kernel.org/bpf/20240412141100.3562942-1-aspsk@isovalent.com --- kernel/bpf/verifier.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 98188379d5c77..aa24beb65393c 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -18289,8 +18289,7 @@ static int resolve_pseudo_ldimm64(struct bpf_verifier_env *env) f = fdget(fd); map = __bpf_map_get(f); if (IS_ERR(map)) { - verbose(env, "fd %d is not pointing to valid bpf_map\n", - insn[0].imm); + verbose(env, "fd %d is not pointing to valid bpf_map\n", fd); return PTR_ERR(map); } From f011688162ec4c492c12ee7cced74c097270baa2 Mon Sep 17 00:00:00 2001 From: Luca Weiss Date: Mon, 19 Feb 2024 15:33:27 +0100 Subject: [PATCH 141/606] arm64: dts: qcom: Fix type of "wdog" IRQs for remoteprocs The code in qcom_q6v5_init() requests the "wdog" IRQ as IRQF_TRIGGER_RISING. If dt defines the interrupt type as LEVEL_HIGH then the driver will have issues getting the IRQ again after probe deferral with an error like: irq: type mismatch, failed to map hwirq-14 for interrupt-controller@b220000! Fix that by updating the devicetrees to use IRQ_TYPE_EDGE_RISING for these interrupts, as is already used in most dt's. Also the driver was already using the interrupts with that type. Fixes: 3658e411efcb ("arm64: dts: qcom: sc7280: Add ADSP node") Fixes: df62402e5ff9 ("arm64: dts: qcom: sc7280: Add CDSP node") Fixes: 152d1faf1e2f ("arm64: dts: qcom: add SC8280XP platform") Fixes: 8eb5287e8a42 ("arm64: dts: qcom: sm6350: Add CDSP nodes") Fixes: efc33c969f23 ("arm64: dts: qcom: sm6350: Add ADSP nodes") Fixes: fe6fd26aeddf ("arm64: dts: qcom: sm6375: Add ADSP&CDSP") Fixes: 23a8903785b9 ("arm64: dts: qcom: sm8250: Add remoteprocs") Signed-off-by: Luca Weiss Link: https://lore.kernel.org/r/20240219-remoteproc-irqs-v1-1-c5aeb02334bd@fairphone.com [bjorn: Added fixes references] Signed-off-by: Bjorn Andersson --- arch/arm64/boot/dts/qcom/sc7280.dtsi | 4 ++-- arch/arm64/boot/dts/qcom/sc8280xp.dtsi | 6 +++--- arch/arm64/boot/dts/qcom/sm6350.dtsi | 4 ++-- arch/arm64/boot/dts/qcom/sm6375.dtsi | 2 +- arch/arm64/boot/dts/qcom/sm8250.dtsi | 6 +++--- 5 files changed, 11 insertions(+), 11 deletions(-) diff --git a/arch/arm64/boot/dts/qcom/sc7280.dtsi b/arch/arm64/boot/dts/qcom/sc7280.dtsi index 7e7f0f0fb41ba..41f51d3261110 100644 --- a/arch/arm64/boot/dts/qcom/sc7280.dtsi +++ b/arch/arm64/boot/dts/qcom/sc7280.dtsi @@ -3707,7 +3707,7 @@ compatible = "qcom,sc7280-adsp-pas"; reg = <0 0x03700000 0 0x100>; - interrupts-extended = <&pdc 6 IRQ_TYPE_LEVEL_HIGH>, + interrupts-extended = <&pdc 6 IRQ_TYPE_EDGE_RISING>, <&adsp_smp2p_in 0 IRQ_TYPE_EDGE_RISING>, <&adsp_smp2p_in 1 IRQ_TYPE_EDGE_RISING>, <&adsp_smp2p_in 2 IRQ_TYPE_EDGE_RISING>, @@ -3944,7 +3944,7 @@ compatible = "qcom,sc7280-cdsp-pas"; reg = <0 0x0a300000 0 0x10000>; - interrupts-extended = <&intc GIC_SPI 578 IRQ_TYPE_LEVEL_HIGH>, + interrupts-extended = <&intc GIC_SPI 578 IRQ_TYPE_EDGE_RISING>, <&cdsp_smp2p_in 0 IRQ_TYPE_EDGE_RISING>, <&cdsp_smp2p_in 1 IRQ_TYPE_EDGE_RISING>, <&cdsp_smp2p_in 2 IRQ_TYPE_EDGE_RISING>, diff --git a/arch/arm64/boot/dts/qcom/sc8280xp.dtsi b/arch/arm64/boot/dts/qcom/sc8280xp.dtsi index a5b194813079e..c9058c7fc1a38 100644 --- a/arch/arm64/boot/dts/qcom/sc8280xp.dtsi +++ b/arch/arm64/boot/dts/qcom/sc8280xp.dtsi @@ -2641,7 +2641,7 @@ compatible = "qcom,sc8280xp-adsp-pas"; reg = <0 0x03000000 0 0x100>; - interrupts-extended = <&intc GIC_SPI 162 IRQ_TYPE_LEVEL_HIGH>, + interrupts-extended = <&intc GIC_SPI 162 IRQ_TYPE_EDGE_RISING>, <&smp2p_adsp_in 0 IRQ_TYPE_EDGE_RISING>, <&smp2p_adsp_in 1 IRQ_TYPE_EDGE_RISING>, <&smp2p_adsp_in 2 IRQ_TYPE_EDGE_RISING>, @@ -4977,7 +4977,7 @@ compatible = "qcom,sc8280xp-nsp0-pas"; reg = <0 0x1b300000 0 0x100>; - interrupts-extended = <&intc GIC_SPI 578 IRQ_TYPE_LEVEL_HIGH>, + interrupts-extended = <&intc GIC_SPI 578 IRQ_TYPE_EDGE_RISING>, <&smp2p_nsp0_in 0 IRQ_TYPE_EDGE_RISING>, <&smp2p_nsp0_in 1 IRQ_TYPE_EDGE_RISING>, <&smp2p_nsp0_in 2 IRQ_TYPE_EDGE_RISING>, @@ -5108,7 +5108,7 @@ compatible = "qcom,sc8280xp-nsp1-pas"; reg = <0 0x21300000 0 0x100>; - interrupts-extended = <&intc GIC_SPI 887 IRQ_TYPE_LEVEL_HIGH>, + interrupts-extended = <&intc GIC_SPI 887 IRQ_TYPE_EDGE_RISING>, <&smp2p_nsp1_in 0 IRQ_TYPE_EDGE_RISING>, <&smp2p_nsp1_in 1 IRQ_TYPE_EDGE_RISING>, <&smp2p_nsp1_in 2 IRQ_TYPE_EDGE_RISING>, diff --git a/arch/arm64/boot/dts/qcom/sm6350.dtsi b/arch/arm64/boot/dts/qcom/sm6350.dtsi index 24bcec3366efd..0be053555602c 100644 --- a/arch/arm64/boot/dts/qcom/sm6350.dtsi +++ b/arch/arm64/boot/dts/qcom/sm6350.dtsi @@ -1252,7 +1252,7 @@ compatible = "qcom,sm6350-adsp-pas"; reg = <0 0x03000000 0 0x100>; - interrupts-extended = <&pdc 6 IRQ_TYPE_LEVEL_HIGH>, + interrupts-extended = <&pdc 6 IRQ_TYPE_EDGE_RISING>, <&smp2p_adsp_in 0 IRQ_TYPE_EDGE_RISING>, <&smp2p_adsp_in 1 IRQ_TYPE_EDGE_RISING>, <&smp2p_adsp_in 2 IRQ_TYPE_EDGE_RISING>, @@ -1511,7 +1511,7 @@ compatible = "qcom,sm6350-cdsp-pas"; reg = <0 0x08300000 0 0x10000>; - interrupts-extended = <&intc GIC_SPI 578 IRQ_TYPE_LEVEL_HIGH>, + interrupts-extended = <&intc GIC_SPI 578 IRQ_TYPE_EDGE_RISING>, <&smp2p_cdsp_in 0 IRQ_TYPE_EDGE_RISING>, <&smp2p_cdsp_in 1 IRQ_TYPE_EDGE_RISING>, <&smp2p_cdsp_in 2 IRQ_TYPE_EDGE_RISING>, diff --git a/arch/arm64/boot/dts/qcom/sm6375.dtsi b/arch/arm64/boot/dts/qcom/sm6375.dtsi index 4386f8a9c636c..f40509d91bbda 100644 --- a/arch/arm64/boot/dts/qcom/sm6375.dtsi +++ b/arch/arm64/boot/dts/qcom/sm6375.dtsi @@ -1561,7 +1561,7 @@ compatible = "qcom,sm6375-adsp-pas"; reg = <0 0x0a400000 0 0x100>; - interrupts-extended = <&intc GIC_SPI 282 IRQ_TYPE_LEVEL_HIGH>, + interrupts-extended = <&intc GIC_SPI 282 IRQ_TYPE_EDGE_RISING>, <&smp2p_adsp_in 0 IRQ_TYPE_EDGE_RISING>, <&smp2p_adsp_in 1 IRQ_TYPE_EDGE_RISING>, <&smp2p_adsp_in 2 IRQ_TYPE_EDGE_RISING>, diff --git a/arch/arm64/boot/dts/qcom/sm8250.dtsi b/arch/arm64/boot/dts/qcom/sm8250.dtsi index 39bd8f0eba1e6..7f2333c9d17d6 100644 --- a/arch/arm64/boot/dts/qcom/sm8250.dtsi +++ b/arch/arm64/boot/dts/qcom/sm8250.dtsi @@ -3062,7 +3062,7 @@ compatible = "qcom,sm8250-slpi-pas"; reg = <0 0x05c00000 0 0x4000>; - interrupts-extended = <&pdc 9 IRQ_TYPE_LEVEL_HIGH>, + interrupts-extended = <&pdc 9 IRQ_TYPE_EDGE_RISING>, <&smp2p_slpi_in 0 IRQ_TYPE_EDGE_RISING>, <&smp2p_slpi_in 1 IRQ_TYPE_EDGE_RISING>, <&smp2p_slpi_in 2 IRQ_TYPE_EDGE_RISING>, @@ -3766,7 +3766,7 @@ compatible = "qcom,sm8250-cdsp-pas"; reg = <0 0x08300000 0 0x10000>; - interrupts-extended = <&intc GIC_SPI 578 IRQ_TYPE_LEVEL_HIGH>, + interrupts-extended = <&intc GIC_SPI 578 IRQ_TYPE_EDGE_RISING>, <&smp2p_cdsp_in 0 IRQ_TYPE_EDGE_RISING>, <&smp2p_cdsp_in 1 IRQ_TYPE_EDGE_RISING>, <&smp2p_cdsp_in 2 IRQ_TYPE_EDGE_RISING>, @@ -5928,7 +5928,7 @@ compatible = "qcom,sm8250-adsp-pas"; reg = <0 0x17300000 0 0x100>; - interrupts-extended = <&pdc 6 IRQ_TYPE_LEVEL_HIGH>, + interrupts-extended = <&pdc 6 IRQ_TYPE_EDGE_RISING>, <&smp2p_adsp_in 0 IRQ_TYPE_EDGE_RISING>, <&smp2p_adsp_in 1 IRQ_TYPE_EDGE_RISING>, <&smp2p_adsp_in 2 IRQ_TYPE_EDGE_RISING>, From cb939b9b35426852896790aba2f18f46df34e596 Mon Sep 17 00:00:00 2001 From: Rajendra Nayak Date: Sun, 17 Mar 2024 18:59:18 +0530 Subject: [PATCH 142/606] arm64: dts: qcom: x1e80100: Fix the compatible for cluster idle states The compatible's for the cluster/domain idle states of x1e80100 are wrong, fix it. Fixes: af16b00578a7 ("arm64: dts: qcom: Add base X1E80100 dtsi and the QCP dts") Signed-off-by: Rajendra Nayak Reviewed-by: Abel Vesa Reviewed-by: Konrad Dybcio Link: https://lore.kernel.org/r/20240317132918.1068817-1-quic_rjendra@quicinc.com Signed-off-by: Bjorn Andersson --- arch/arm64/boot/dts/qcom/x1e80100.dtsi | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/arm64/boot/dts/qcom/x1e80100.dtsi b/arch/arm64/boot/dts/qcom/x1e80100.dtsi index 8e517f76189e1..6b40082bac68c 100644 --- a/arch/arm64/boot/dts/qcom/x1e80100.dtsi +++ b/arch/arm64/boot/dts/qcom/x1e80100.dtsi @@ -284,7 +284,7 @@ domain-idle-states { CLUSTER_CL4: cluster-sleep-0 { - compatible = "arm,idle-state"; + compatible = "domain-idle-state"; idle-state-name = "l2-ret"; arm,psci-suspend-param = <0x01000044>; entry-latency-us = <350>; @@ -293,7 +293,7 @@ }; CLUSTER_CL5: cluster-sleep-1 { - compatible = "arm,idle-state"; + compatible = "domain-idle-state"; idle-state-name = "ret-pll-off"; arm,psci-suspend-param = <0x01000054>; entry-latency-us = <2200>; From 8b8ec83a1d7d3b6605d9163d2e306971295a4ce8 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Wed, 6 Mar 2024 10:56:50 +0100 Subject: [PATCH 143/606] arm64: dts: qcom: sc8280xp: add missing PCIe minimum OPP Add the missing PCIe CX performance level votes to avoid relying on other drivers (e.g. USB or UFS) to maintain the nominal performance level required for Gen3 speeds. Fixes: 813e83157001 ("arm64: dts: qcom: sc8280xp/sa8540p: add PCIe2-4 nodes") Cc: stable@vger.kernel.org # 6.2 Reviewed-by: Konrad Dybcio Reviewed-by: Manivannan Sadhasivam Signed-off-by: Johan Hovold Link: https://lore.kernel.org/r/20240306095651.4551-5-johan+linaro@kernel.org Signed-off-by: Bjorn Andersson --- arch/arm64/boot/dts/qcom/sc8280xp.dtsi | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/arch/arm64/boot/dts/qcom/sc8280xp.dtsi b/arch/arm64/boot/dts/qcom/sc8280xp.dtsi index c9058c7fc1a38..d0f82e12289e1 100644 --- a/arch/arm64/boot/dts/qcom/sc8280xp.dtsi +++ b/arch/arm64/boot/dts/qcom/sc8280xp.dtsi @@ -1774,6 +1774,7 @@ reset-names = "pci"; power-domains = <&gcc PCIE_4_GDSC>; + required-opps = <&rpmhpd_opp_nom>; phys = <&pcie4_phy>; phy-names = "pciephy"; @@ -1872,6 +1873,7 @@ reset-names = "pci"; power-domains = <&gcc PCIE_3B_GDSC>; + required-opps = <&rpmhpd_opp_nom>; phys = <&pcie3b_phy>; phy-names = "pciephy"; @@ -1970,6 +1972,7 @@ reset-names = "pci"; power-domains = <&gcc PCIE_3A_GDSC>; + required-opps = <&rpmhpd_opp_nom>; phys = <&pcie3a_phy>; phy-names = "pciephy"; @@ -2071,6 +2074,7 @@ reset-names = "pci"; power-domains = <&gcc PCIE_2B_GDSC>; + required-opps = <&rpmhpd_opp_nom>; phys = <&pcie2b_phy>; phy-names = "pciephy"; @@ -2169,6 +2173,7 @@ reset-names = "pci"; power-domains = <&gcc PCIE_2A_GDSC>; + required-opps = <&rpmhpd_opp_nom>; phys = <&pcie2a_phy>; phy-names = "pciephy"; From ecc3ac293ed15ac2536e9fde2810154486f84010 Mon Sep 17 00:00:00 2001 From: Manivannan Sadhasivam Date: Mon, 18 Mar 2024 12:49:03 +0530 Subject: [PATCH 144/606] arm64: dts: qcom: sm8450: Fix the msi-map entries While adding the GIC ITS MSI support, it was found that the msi-map entries needed to be swapped to receive MSIs from the endpoint. But later it was identified that the swapping was needed due to a bug in the Qualcomm PCIe controller driver. And since the bug is now fixed with commit bf79e33cdd89 ("PCI: qcom: Enable BDF to SID translation properly"), let's fix the msi-map entries also to reflect the actual mapping in the hardware. Cc: stable@vger.kernel.org # 6.3: bf79e33cdd89 ("PCI: qcom: Enable BDF to SID translation properly") Fixes: ff384ab56f16 ("arm64: dts: qcom: sm8450: Use GIC-ITS for PCIe0 and PCIe1") Signed-off-by: Manivannan Sadhasivam Reviewed-by: Neil Armstrong Link: https://lore.kernel.org/r/20240318-pci-bdf-sid-fix-v1-1-acca6c5d9cf1@linaro.org Signed-off-by: Bjorn Andersson --- arch/arm64/boot/dts/qcom/sm8450.dtsi | 16 ++++------------ 1 file changed, 4 insertions(+), 12 deletions(-) diff --git a/arch/arm64/boot/dts/qcom/sm8450.dtsi b/arch/arm64/boot/dts/qcom/sm8450.dtsi index b86be34a912b9..024d2653cc307 100644 --- a/arch/arm64/boot/dts/qcom/sm8450.dtsi +++ b/arch/arm64/boot/dts/qcom/sm8450.dtsi @@ -1777,12 +1777,8 @@ ranges = <0x01000000 0x0 0x00000000 0x0 0x60200000 0x0 0x100000>, <0x02000000 0x0 0x60300000 0x0 0x60300000 0x0 0x3d00000>; - /* - * MSIs for BDF (1:0.0) only works with Device ID 0x5980. - * Hence, the IDs are swapped. - */ - msi-map = <0x0 &gic_its 0x5981 0x1>, - <0x100 &gic_its 0x5980 0x1>; + msi-map = <0x0 &gic_its 0x5980 0x1>, + <0x100 &gic_its 0x5981 0x1>; msi-map-mask = <0xff00>; interrupts = , , @@ -1900,12 +1896,8 @@ ranges = <0x01000000 0x0 0x00000000 0x0 0x40200000 0x0 0x100000>, <0x02000000 0x0 0x40300000 0x0 0x40300000 0x0 0x1fd00000>; - /* - * MSIs for BDF (1:0.0) only works with Device ID 0x5a00. - * Hence, the IDs are swapped. - */ - msi-map = <0x0 &gic_its 0x5a01 0x1>, - <0x100 &gic_its 0x5a00 0x1>; + msi-map = <0x0 &gic_its 0x5a00 0x1>, + <0x100 &gic_its 0x5a01 0x1>; msi-map-mask = <0xff00>; interrupts = , , From 98a953fa2f4095b9777dbf59a3ed2ac3c0bf55cb Mon Sep 17 00:00:00 2001 From: Manivannan Sadhasivam Date: Mon, 18 Mar 2024 12:49:04 +0530 Subject: [PATCH 145/606] arm64: dts: qcom: sm8550: Fix the msi-map entries While adding the GIC ITS MSI support, it was found that the msi-map entries needed to be swapped to receive MSIs from the endpoint. But later it was identified that the swapping was needed due to a bug in the Qualcomm PCIe controller driver. And since the bug is now fixed with commit bf79e33cdd89 ("PCI: qcom: Enable BDF to SID translation properly"), let's fix the msi-map entries also to reflect the actual mapping in the hardware. Fixes: 114990ce3edf ("arm64: dts: qcom: sm8550: Use GIC-ITS for PCIe0 and PCIe1") Signed-off-by: Manivannan Sadhasivam Acked-by: Neil Armstrong Tested-by: Neil Armstrong # on SM8550-QRD Link: https://lore.kernel.org/r/20240318-pci-bdf-sid-fix-v1-2-acca6c5d9cf1@linaro.org Signed-off-by: Bjorn Andersson --- arch/arm64/boot/dts/qcom/sm8550.dtsi | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/arch/arm64/boot/dts/qcom/sm8550.dtsi b/arch/arm64/boot/dts/qcom/sm8550.dtsi index 3904348075f67..3348bc06db488 100644 --- a/arch/arm64/boot/dts/qcom/sm8550.dtsi +++ b/arch/arm64/boot/dts/qcom/sm8550.dtsi @@ -1755,9 +1755,8 @@ <&gem_noc MASTER_APPSS_PROC 0 &cnoc_main SLAVE_PCIE_0 0>; interconnect-names = "pcie-mem", "cpu-pcie"; - /* Entries are reversed due to the unusual ITS DeviceID encoding */ - msi-map = <0x0 &gic_its 0x1401 0x1>, - <0x100 &gic_its 0x1400 0x1>; + msi-map = <0x0 &gic_its 0x1400 0x1>, + <0x100 &gic_its 0x1401 0x1>; iommu-map = <0x0 &apps_smmu 0x1400 0x1>, <0x100 &apps_smmu 0x1401 0x1>; @@ -1867,9 +1866,8 @@ <&gem_noc MASTER_APPSS_PROC 0 &cnoc_main SLAVE_PCIE_1 0>; interconnect-names = "pcie-mem", "cpu-pcie"; - /* Entries are reversed due to the unusual ITS DeviceID encoding */ - msi-map = <0x0 &gic_its 0x1481 0x1>, - <0x100 &gic_its 0x1480 0x1>; + msi-map = <0x0 &gic_its 0x1480 0x1>, + <0x100 &gic_its 0x1481 0x1>; iommu-map = <0x0 &apps_smmu 0x1480 0x1>, <0x100 &apps_smmu 0x1481 0x1>; From 6d3bd106ad60383e156f85401c44bf0e56ed6bfc Mon Sep 17 00:00:00 2001 From: Manivannan Sadhasivam Date: Mon, 18 Mar 2024 12:49:05 +0530 Subject: [PATCH 146/606] arm64: dts: qcom: sm8650: Fix the msi-map entries While adding the GIC ITS MSI support, it was found that the msi-map entries needed to be swapped to receive MSIs from the endpoint. But later it was identified that the swapping was needed due to a bug in the Qualcomm PCIe controller driver. And since the bug is now fixed with commit bf79e33cdd89 ("PCI: qcom: Enable BDF to SID translation properly"), let's fix the msi-map entries also to reflect the actual mapping in the hardware. Fixes: a33a532b3b1e ("arm64: dts: qcom: sm8650: Use GIC-ITS for PCIe0 and PCIe1") Signed-off-by: Manivannan Sadhasivam Acked-by: Neil Armstrong Tested-by: Neil Armstrong # on SM8650-QRD Link: https://lore.kernel.org/r/20240318-pci-bdf-sid-fix-v1-3-acca6c5d9cf1@linaro.org Signed-off-by: Bjorn Andersson --- arch/arm64/boot/dts/qcom/sm8650.dtsi | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/arch/arm64/boot/dts/qcom/sm8650.dtsi b/arch/arm64/boot/dts/qcom/sm8650.dtsi index ba72d8f384207..eb117866e59ff 100644 --- a/arch/arm64/boot/dts/qcom/sm8650.dtsi +++ b/arch/arm64/boot/dts/qcom/sm8650.dtsi @@ -2274,9 +2274,8 @@ interrupt-map-mask = <0 0 0 0x7>; #interrupt-cells = <1>; - /* Entries are reversed due to the unusual ITS DeviceID encoding */ - msi-map = <0x0 &gic_its 0x1401 0x1>, - <0x100 &gic_its 0x1400 0x1>; + msi-map = <0x0 &gic_its 0x1400 0x1>, + <0x100 &gic_its 0x1401 0x1>; msi-map-mask = <0xff00>; linux,pci-domain = <0>; @@ -2402,9 +2401,8 @@ interrupt-map-mask = <0 0 0 0x7>; #interrupt-cells = <1>; - /* Entries are reversed due to the unusual ITS DeviceID encoding */ - msi-map = <0x0 &gic_its 0x1481 0x1>, - <0x100 &gic_its 0x1480 0x1>; + msi-map = <0x0 &gic_its 0x1480 0x1>, + <0x100 &gic_its 0x1481 0x1>; msi-map-mask = <0xff00>; linux,pci-domain = <1>; From ecda8309098402f878c96184f29a1b7ec682d772 Mon Sep 17 00:00:00 2001 From: Maximilian Luz Date: Thu, 28 Mar 2024 03:21:57 +0100 Subject: [PATCH 147/606] arm64: dts: qcom: sc8180x: Fix ss_phy_irq for secondary USB controller The ACPI DSDT of the Surface Pro X (SQ2) specifies the interrupts for the secondary UBS controller as Name (_CRS, ResourceTemplate () { Interrupt (ResourceConsumer, Level, ActiveHigh, Shared, ,, ) { 0x000000AA, } Interrupt (ResourceConsumer, Level, ActiveHigh, SharedAndWake, ,, ) { 0x000000A7, // hs_phy_irq: &intc GIC_SPI 136 } Interrupt (ResourceConsumer, Level, ActiveHigh, SharedAndWake, ,, ) { 0x00000228, // ss_phy_irq: &pdc 40 } Interrupt (ResourceConsumer, Edge, ActiveHigh, SharedAndWake, ,, ) { 0x0000020A, // dm_hs_phy_irq: &pdc 10 } Interrupt (ResourceConsumer, Edge, ActiveHigh, SharedAndWake, ,, ) { 0x0000020B, // dp_hs_phy_irq: &pdc 11 } }) Generally, the interrupts above 0x200 map to the PDC interrupts (as used in the devicetree) as ACPI_NUMBER - 0x200. Note that this lines up with dm_hs_phy_irq and dp_hs_phy_irq (as well as the interrupts for the primary USB controller). Based on the snippet above, ss_phy_irq should therefore be PDC 40 (= 0x28) and not PDC 7. The latter is according to ACPI instead used as ss_phy_irq for port 0 of the multiport USB controller). Fix this by setting ss_phy_irq to '&pdc 40'. Fixes: b080f53a8f44 ("arm64: dts: qcom: sc8180x: Add remoteprocs, wifi and usb nodes") Signed-off-by: Maximilian Luz Reviewed-by: Bjorn Andersson Link: https://lore.kernel.org/r/20240328022224.336938-1-luzmaximilian@gmail.com Signed-off-by: Bjorn Andersson --- arch/arm64/boot/dts/qcom/sc8180x.dtsi | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/boot/dts/qcom/sc8180x.dtsi b/arch/arm64/boot/dts/qcom/sc8180x.dtsi index 32afc78d5b769..053f7861c3cec 100644 --- a/arch/arm64/boot/dts/qcom/sc8180x.dtsi +++ b/arch/arm64/boot/dts/qcom/sc8180x.dtsi @@ -2701,7 +2701,7 @@ resets = <&gcc GCC_USB30_SEC_BCR>; power-domains = <&gcc USB30_SEC_GDSC>; interrupts-extended = <&intc GIC_SPI 136 IRQ_TYPE_LEVEL_HIGH>, - <&pdc 7 IRQ_TYPE_LEVEL_HIGH>, + <&pdc 40 IRQ_TYPE_LEVEL_HIGH>, <&pdc 10 IRQ_TYPE_EDGE_BOTH>, <&pdc 11 IRQ_TYPE_EDGE_BOTH>; interrupt-names = "hs_phy_irq", "ss_phy_irq", From 7d045025a24b6336d444d359bd4312f351d017f9 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Wed, 20 Mar 2024 21:43:03 +0200 Subject: [PATCH 148/606] gpio: tangier: Use correct type for the IRQ chip data IRQ chip data contains a pointer to the GPIO chip. Luckily we have the pointers the same, but strictly speaking it's not guaranteed. Even though, still better to fix this. Fixes: ccf6fd6dcc86 ("gpio: merrifield: Introduce GPIO driver to support Merrifield") Signed-off-by: Andy Shevchenko --- drivers/gpio/gpio-tangier.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/drivers/gpio/gpio-tangier.c b/drivers/gpio/gpio-tangier.c index b75e0b12087ac..4b29abafecf6a 100644 --- a/drivers/gpio/gpio-tangier.c +++ b/drivers/gpio/gpio-tangier.c @@ -195,7 +195,8 @@ static int tng_gpio_set_config(struct gpio_chip *chip, unsigned int offset, static void tng_irq_ack(struct irq_data *d) { - struct tng_gpio *priv = irq_data_get_irq_chip_data(d); + struct gpio_chip *gc = irq_data_get_irq_chip_data(d); + struct tng_gpio *priv = gpiochip_get_data(gc); irq_hw_number_t gpio = irqd_to_hwirq(d); void __iomem *gisr; u8 shift; @@ -227,7 +228,8 @@ static void tng_irq_unmask_mask(struct tng_gpio *priv, u32 gpio, bool unmask) static void tng_irq_mask(struct irq_data *d) { - struct tng_gpio *priv = irq_data_get_irq_chip_data(d); + struct gpio_chip *gc = irq_data_get_irq_chip_data(d); + struct tng_gpio *priv = gpiochip_get_data(gc); irq_hw_number_t gpio = irqd_to_hwirq(d); tng_irq_unmask_mask(priv, gpio, false); @@ -236,7 +238,8 @@ static void tng_irq_mask(struct irq_data *d) static void tng_irq_unmask(struct irq_data *d) { - struct tng_gpio *priv = irq_data_get_irq_chip_data(d); + struct gpio_chip *gc = irq_data_get_irq_chip_data(d); + struct tng_gpio *priv = gpiochip_get_data(gc); irq_hw_number_t gpio = irqd_to_hwirq(d); gpiochip_enable_irq(&priv->chip, gpio); From 0064db9ce4aa7cc794e6f4aed60dee0f94fc9bcf Mon Sep 17 00:00:00 2001 From: David Lechner Date: Fri, 12 Apr 2024 17:52:48 -0500 Subject: [PATCH 149/606] spi: axi-spi-engine: fix version format string The version format string in the AXI SPI Engine driver was probably intended to print the version number in the same format as the DT compatible string (e.g. 1.00.a). However, the version just uses semantic versioning so formatting the patch number as a character is not correct and would result in printing control characters for patch numbers less than 32. Fixes: b1353d1c1d45 ("spi: Add Analog Devices AXI SPI Engine controller support") Signed-off-by: David Lechner Link: https://lore.kernel.org/r/20240412-axi-spi-engine-version-printf-v1-1-95e1e842c1a6@baylibre.com Signed-off-by: Mark Brown --- drivers/spi/spi-axi-spi-engine.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/spi/spi-axi-spi-engine.c b/drivers/spi/spi-axi-spi-engine.c index 7cc219d78551a..e358ac5b45097 100644 --- a/drivers/spi/spi-axi-spi-engine.c +++ b/drivers/spi/spi-axi-spi-engine.c @@ -623,7 +623,7 @@ static int spi_engine_probe(struct platform_device *pdev) version = readl(spi_engine->base + ADI_AXI_REG_VERSION); if (ADI_AXI_PCORE_VER_MAJOR(version) != 1) { - dev_err(&pdev->dev, "Unsupported peripheral version %u.%u.%c\n", + dev_err(&pdev->dev, "Unsupported peripheral version %u.%u.%u\n", ADI_AXI_PCORE_VER_MAJOR(version), ADI_AXI_PCORE_VER_MINOR(version), ADI_AXI_PCORE_VER_PATCH(version)); From d2d73a6dd17365c43e109263841f7c26da55cfb0 Mon Sep 17 00:00:00 2001 From: Christian Marangi Date: Fri, 12 Apr 2024 12:50:26 +0200 Subject: [PATCH 150/606] mtd: limit OTP NVMEM cell parse to non-NAND devices MTD OTP logic is very fragile on parsing NVMEM cell and can be problematic with some specific kind of devices. The problem was discovered by e87161321a40 ("mtd: rawnand: macronix: OTP access for MX30LFxG18AC") where OTP support was added to a NAND device. With the case of NAND devices, it does require a node where ECC info are declared and all the fixed partitions, and this cause the OTP codepath to parse this node as OTP NVMEM cells, making probe fail and the NAND device registration fail. MTD OTP parsing should have been limited to always using compatible to prevent this error by using node with compatible "otp-user" or "otp-factory". NVMEM across the years had various iteration on how cells could be declared in DT, in some old implementation, no_of_node should have been enabled but now add_legacy_fixed_of_cells should be used to disable NVMEM to parse child node as NVMEM cell. To fix this and limit any regression with other MTD that makes use of declaring OTP as direct child of the dev node, disable add_legacy_fixed_of_cells if we detect the MTD type is Nand. With the following logic, the OTP NVMEM entry is correctly created with no cells and the MTD Nand is correctly probed and partitions are correctly exposed. Fixes: 4b361cfa8624 ("mtd: core: add OTP nvmem provider support") Cc: # v6.7+ Signed-off-by: Christian Marangi Signed-off-by: Miquel Raynal Link: https://lore.kernel.org/linux-mtd/20240412105030.1598-1-ansuelsmth@gmail.com --- drivers/mtd/mtdcore.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/mtd/mtdcore.c b/drivers/mtd/mtdcore.c index 5887feb347a4e..0de87bc638405 100644 --- a/drivers/mtd/mtdcore.c +++ b/drivers/mtd/mtdcore.c @@ -900,7 +900,7 @@ static struct nvmem_device *mtd_otp_nvmem_register(struct mtd_info *mtd, config.name = compatible; config.id = NVMEM_DEVID_AUTO; config.owner = THIS_MODULE; - config.add_legacy_fixed_of_cells = true; + config.add_legacy_fixed_of_cells = !mtd_type_is_nand(mtd); config.type = NVMEM_TYPE_OTP; config.root_only = true; config.ignore_wp = true; From 1e0fb113646182e073539db96016b00cfeb18ecc Mon Sep 17 00:00:00 2001 From: AngeloGioacchino Del Regno Date: Wed, 10 Apr 2024 10:44:05 +0200 Subject: [PATCH 151/606] power: supply: mt6360_charger: Fix of_match for usb-otg-vbus regulator The of_match shall correspond to the name of the regulator subnode, or the deprecated `regulator-compatible` property must be used: failing to do so, the regulator won't probe (and the driver will as well not probe). Since the devicetree binding for this driver is actually correct and wants DTs to use the "usb-otg-vbus-regulator" subnode name, fix this driver by aligning the `of_match` string to what the DT binding wants. Fixes: 0402e8ebb8b8 ("power: supply: mt6360_charger: add MT6360 charger support") Signed-off-by: AngeloGioacchino Del Regno Reviewed-by: Chen-Yu Tsai Link: https://lore.kernel.org/r/20240410084405.1389378-1-angelogioacchino.delregno@collabora.com Signed-off-by: Sebastian Reichel --- drivers/power/supply/mt6360_charger.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/power/supply/mt6360_charger.c b/drivers/power/supply/mt6360_charger.c index 1305cba61edd4..aca123783efcc 100644 --- a/drivers/power/supply/mt6360_charger.c +++ b/drivers/power/supply/mt6360_charger.c @@ -588,7 +588,7 @@ static const struct regulator_ops mt6360_chg_otg_ops = { }; static const struct regulator_desc mt6360_otg_rdesc = { - .of_match = "usb-otg-vbus", + .of_match = "usb-otg-vbus-regulator", .name = "usb-otg-vbus", .ops = &mt6360_chg_otg_ops, .owner = THIS_MODULE, From abbb99301e9d2c91567e1893dbe34f2f8b52ea9a Mon Sep 17 00:00:00 2001 From: Thorsten Leemhuis Date: Tue, 9 Apr 2024 09:30:44 +0200 Subject: [PATCH 152/606] docs: verify/bisect: use git switch, tag kernel, and various fixes Various small improvements and fixes: * Use the more modern 'git switch' instead of 'git checkout', which makes it more obvious what's happening (among others due to the --discard-changes parameter that is more clear than --force). * Provide a hint how a mainline version number and one from a stable series look like. * When trying to validate the bisection result with a revert, add a special tag to facilitate the identification. * Sync version numbers used in various examples for consistency: stick to 6.0.13, 6.0.15, and 6.1.5. * Fix a few typos and oddities. Signed-off-by: Thorsten Leemhuis Signed-off-by: Jonathan Corbet Link: https://lore.kernel.org/r/85029aa004447b0eeb5043fb014630f2acafacec.1712647788.git.linux@leemhuis.info --- .../verify-bugs-and-bisect-regressions.rst | 117 ++++++++++-------- 1 file changed, 67 insertions(+), 50 deletions(-) diff --git a/Documentation/admin-guide/verify-bugs-and-bisect-regressions.rst b/Documentation/admin-guide/verify-bugs-and-bisect-regressions.rst index d3504826f4015..c999e40c79ab7 100644 --- a/Documentation/admin-guide/verify-bugs-and-bisect-regressions.rst +++ b/Documentation/admin-guide/verify-bugs-and-bisect-regressions.rst @@ -38,8 +38,8 @@ aspects, all of which might be essential in your present case.]* **In case you want to check if a bug is present in code currently supported by developers**, execute just the *preparations* and *segment 1*; while doing so, consider the newest Linux kernel you regularly use to be the 'working' kernel. -In the following example that's assumed to be 6.0.13, which is why the sources -of 6.0 will be used to prepare the .config file. +In the following example that's assumed to be 6.0, which is why its sources +will be used to prepare the .config file. **In case you face a regression**, follow the steps at least till the end of *segment 2*. Then you can submit a preliminary report -- or continue with @@ -61,7 +61,7 @@ will be considered the 'good' release and used to prepare the .config file. cd ~/linux/ git remote add -t master stable \ https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git - git checkout --detach v6.0 + git switch --detach v6.0 # * Hint: if you used an existing clone, ensure no stale .config is around. make olddefconfig # * Ensure the former command picked the .config of the 'working' kernel. @@ -87,7 +87,7 @@ will be considered the 'good' release and used to prepare the .config file. a) Checking out latest mainline code:: cd ~/linux/ - git checkout --force --detach mainline/master + git switch --discard-changes --detach mainline/master b) Build, install, and boot a kernel:: @@ -125,7 +125,7 @@ will be considered the 'good' release and used to prepare the .config file. a) Start by checking out the sources of the 'good' version:: cd ~/linux/ - git checkout --force --detach v6.0 + git switch --discard-changes --detach v6.0 b) Build, install, and boot a kernel as described earlier in *segment 1, section b* -- just feel free to skip the 'du' commands, as you have a rough @@ -157,11 +157,12 @@ will be considered the 'good' release and used to prepare the .config file. works with the newly built kernel. If it does, tell Git by executing ``git bisect good``; if it does not, run ``git bisect bad`` instead. - All three commands will make Git checkout another commit; then re-execute + All three commands will make Git check out another commit; then re-execute this step (e.g. build, install, boot, and test a kernel to then tell Git the outcome). Do so again and again until Git shows which commit broke things. If you run short of disk space during this process, check the - "Supplementary tasks" section below. + section 'Supplementary tasks: cleanup during and after the process' + below. d) Once your finished the bisection, put a few things away:: @@ -172,12 +173,15 @@ will be considered the 'good' release and used to prepare the .config file. e) Try to verify the bisection result:: - git checkout --force --detach mainline/master + git switch --discard-changes --detach mainline/master git revert --no-edit cafec0cacaca0 + cp ~/kernel-config-working .config + ./scripts/config --set-str CONFIG_LOCALVERSION '-local-cafec0cacaca0-reverted' This is optional, as some commits are impossible to revert. But if the second command worked flawlessly, build, install, and boot one more kernel - kernel, which should not show the regression. + kernel; just this time skip the first command copying the base .config file + over, as that already has been taken care off. * **Supplementary tasks**: cleanup during and after the process. @@ -208,7 +212,7 @@ Step-by-step guide on how to verify bugs and bisect regressions =============================================================== This guide describes how to set up your own Linux kernels for investigating bugs -or regressions you intent to report. How far you want to follow the instructions +or regressions you intend to report. How far you want to follow the instructions depends on your issue: Execute all steps till the end of *segment 1* to **verify if your kernel problem @@ -240,12 +244,17 @@ to get things rolling again. For further details on how to report Linux kernel issues or regressions check out Documentation/admin-guide/reporting-issues.rst, which works in conjunction with this document. It among others explains why you need to verify bugs with -the latest 'mainline' kernel, even if you face a problem with a kernel from a -'stable/longterm' series; for users facing a regression it also explains that -sending a preliminary report after finishing segment 2 might be wise, as the -regression and its culprit might be known already. For further details on -what actually qualifies as a regression check out -Documentation/admin-guide/reporting-regressions.rst. +the latest 'mainline' kernel (e.g. versions like 6.0, 6.1-rc1, or 6.1-rc6), +even if you face a problem with a kernel from a 'stable/longterm' series +(say 6.0.13). + +For users facing a regression that document also explains why sending a +preliminary report after segment 2 might be wise, as the regression and its +culprit might be known already. For further details on what actually qualifies +as a regression check out Documentation/admin-guide/reporting-regressions.rst. + +If you run into any problems while following this guide or have ideas how to +improve it, :ref:`please let the kernel developers know `. .. _introprep_bissbs: @@ -286,7 +295,7 @@ Preparations: set up everything to build your own kernels Do you follow this guide to verify if a bug is present in the code developers care for? Then consider the mainline release your 'working' kernel (the newest one you regularly use) is based on to be the 'good' version; if your 'working' - kernel for example is 6.0.11, then your 'good' kernel is 6.0. + kernel for example is 6.0.13, then your 'good' kernel is 6.0. In case you face a regression, it depends on the version range where the regression was introduced: @@ -295,14 +304,14 @@ Preparations: set up everything to build your own kernels 6.1-rc1? Then henceforth regard 6.0 as the last known 'good' version and 6.1-rc1 as the first 'bad' one. - * Some function stopped working when updating from 6.0.11 to 6.1.4? Then for - the time being consider 6.0 as the last 'good' version and 6.1.4 as + * Some function stopped working when updating from 6.0.13 to 6.1.5? Then for + the time being consider 6.0 as the last 'good' version and 6.1.5 as the 'bad' one. Note, at this point it is merely assumed that 6.0 is fine; this assumption will be checked in segment 2. - * A feature you used in 6.0.11 does not work at all or worse in 6.1.13? In + * A feature you used in 6.0.13 does not work at all or worse in 6.1.15? In that case you want to bisect within a stable/longterm series: consider - 6.0.11 as the last known 'good' version and 6.0.13 as the first 'bad' + 6.0.13 as the last known 'good' version and 6.0.15 as the first 'bad' one. Note, in this case you still want to compile and test a mainline kernel as explained in segment 1: the outcome will determine if you need to report your issue to the regular developers or the stable team. @@ -367,7 +376,7 @@ Preparations: set up everything to build your own kernels * Start preparing a kernel build configuration (the '.config' file). Before doing so, ensure you are still running the 'working' kernel an earlier - step told you to boot; if you are unsure, check the current kernel release + step told you to boot; if you are unsure, check the current kernelrelease identifier using ``uname -r``. Afterwards check out the source code for the version earlier established as @@ -375,7 +384,7 @@ Preparations: set up everything to build your own kernels the version number in this and all later Git commands needs to be prefixed with a 'v':: - git checkout --detach v6.0 + git switch --discard-changes --detach v6.0 Now create a build configuration file:: @@ -505,7 +514,7 @@ be a waste of time. [:ref:`details`] * Check out the latest Linux codebase:: cd ~/linux/ - git checkout --force --detach mainline/master + git switch --discard-changes --detach mainline/master [:ref:`details`] @@ -617,7 +626,7 @@ be a waste of time. [:ref:`details`] cd ~/linux/ git remote set-branches --add stable linux-6.0.y git fetch stable - git checkout --force --detach linux-6.0.y + git switch --discard-changes --detach linux-6.0.y Now use the checked out code to build and install another kernel using the commands the earlier steps already described in more detail:: @@ -669,7 +678,7 @@ otherwise would be a waste of time. [:ref:`details`] 'good' (once again assumed to be 6.0 here):: cd ~/linux/ - git checkout --detach v6.0 + git switch --discard-changes --detach v6.0 Now use the checked out code to configure, build, and install another kernel using the commands the previous subsection explained in more detail:: @@ -703,7 +712,7 @@ Segment 3: perform the bisection and validate the result With all the preparations and precaution builds taken care of, you are now ready to begin the bisection. This will make you build quite a few kernels -- usually about 15 in case you encountered a regression when updating to a newer series -(say from 6.0.11 to 6.1.3). But do not worry, due to the trimmed build +(say from 6.0.13 to 6.1.5). But do not worry, due to the trimmed build configuration created earlier this works a lot faster than many people assume: overall on average it will often just take about 10 to 15 minutes to compile each kernel on commodity x86 machines. @@ -745,7 +754,7 @@ each kernel on commodity x86 machines. If compilation fails for some reason, run ``git bisect skip`` and restart executing the stack of commands from the beginning. - In case you skipped the "test latest codebase" step in the guide, check its + In case you skipped the 'test latest codebase' step in the guide, check its description as for why the 'df [...]' and 'make -s kernelrelease [...]' commands are here. @@ -823,16 +832,16 @@ each kernel on commodity x86 machines. Begin by checking out the latest codebase depending on the range you bisected: * Did you face a regression within a stable/longterm series (say between - 6.0.11 and 6.0.13) that does not happen in mainline? Then check out the + 6.0.13 and 6.0.15) that does not happen in mainline? Then check out the latest codebase for the affected series like this:: git fetch stable - git checkout --force --detach linux-6.0.y + git switch --discard-changes --detach linux-6.0.y * In all other cases check out latest mainline:: git fetch mainline - git checkout --force --detach mainline/master + git switch --discard-changes --detach mainline/master If you bisected a regression within a stable/longterm series that also happens in mainline, there is one more thing to do: look up the mainline @@ -846,21 +855,27 @@ each kernel on commodity x86 machines. git revert --no-edit cafec0cacaca0 - If that fails, give up trying and move on to the next step. But if it works, - build a kernel again using the familiar command sequence:: + If that fails, give up trying and move on to the next step; if it works, + adjust the tag to facilitate the identification and prevent accidentally + overwriting another kernel:: cp ~/kernel-config-working .config + ./scripts/config --set-str CONFIG_LOCALVERSION '-local-cafec0cacaca0-reverted' + + Build a kernel using the familiar command sequence, just without copying the + the base .config over:: + make olddefconfig && - make -j $(nproc --all) && + make -j $(nproc --all) # * Check if the free space suffices holding another kernel: df -h /boot/ /lib/modules/ sudo make modules_install command -v installkernel && sudo make install - Make -s kernelrelease | tee -a ~/kernels-built + make -s kernelrelease | tee -a ~/kernels-built reboot - Now check one last time if the feature that made you perform a bisection work - with that kernel. + Now check one last time if the feature that made you perform a bisection works + with that kernel: if everything went well, it should not show the regression. [:ref:`details`] @@ -934,10 +949,12 @@ This concludes the step-by-step guide. Did you run into trouble following any of the above steps not cleared up by the reference section below? Did you spot errors? Or do you have ideas how to -improve the guide? Then please take a moment and let the maintainer of this +improve the guide? + +If any of that applies, please take a moment and let the maintainer of this document know by email (Thorsten Leemhuis ), ideally while CCing the Linux docs mailing list (linux-doc@vger.kernel.org). Such feedback is -vital to improve this document further, which is in everybody's interest, as it +vital to improve this text further, which is in everybody's interest, as it will enable more people to master the task described here -- and hopefully also improve similar guides inspired by this one. @@ -1059,18 +1076,18 @@ Bisection range Establishing the range of commits to be checked is mostly straightforward, except when a regression occurred when switching from a release of one stable -series to a release of a later series (e.g. from 6.0.11 to 6.1.4). In that case +series to a release of a later series (e.g. from 6.0.13 to 6.1.5). In that case Git will need some hand holding, as there is no straight line of descent. That's because with the release of 6.0 mainline carried on to 6.1 while the stable series 6.0.y branched to the side. It's therefore theoretically possible -that the issue you face with 6.1.4 only worked in 6.0.11, as it was fixed by a +that the issue you face with 6.1.5 only worked in 6.0.13, as it was fixed by a commit that went into one of the 6.0.y releases, but never hit mainline or the 6.1.y series. Thankfully that normally should not happen due to the way the stable/longterm maintainers maintain the code. It's thus pretty safe to assume 6.0 as a 'good' kernel. That assumption will be tested anyway, as that kernel will be built and tested in the segment '2' of this guide; Git would force you -to do this as well, if you tried bisecting between 6.0.11 and 6.1.13. +to do this as well, if you tried bisecting between 6.0.13 and 6.1.15. [:ref:`back to step-by-step guide `] @@ -1117,7 +1134,7 @@ These commands install a few packages that are often, but not always needed. You for example might want to skip installing the development headers for ncurses, which you will only need in case you later might want to adjust the kernel build configuration using make the targets 'menuconfig' or 'nconfig'; likewise omit -the headers of Qt6 is you do not plan to adjust the .config using 'xconfig'. +the headers of Qt6 if you do not plan to adjust the .config using 'xconfig'. You furthermore might need additional libraries and their development headers for tasks not covered in this guide -- for example when building utilities from @@ -1184,7 +1201,7 @@ First, execute the following command to retrieve the latest mainline codebase:: https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git Now deepen your clone's history to the second predecessor of the mainline -release of your 'good' version. In case the latter are 6.0 or 6.0.11, 5.19 would +release of your 'good' version. In case the latter are 6.0 or 6.0.13, 5.19 would be the first predecessor and 5.18 the second -- hence deepen the history up to that version:: @@ -1490,7 +1507,7 @@ highly recommended for these reasons: Your report might be ignored if you send it to the wrong party -- and even when you get a reply there is a decent chance that developers tell you to - evaluate which of the two cases it is before they take a closer look. + evaluate which of the two cases it is before they take a closer look. [:ref:`back to step-by-step guide `] @@ -1552,8 +1569,8 @@ by modifying your search terms or using another line from the error messages. In the end, most issues you run into have likely been encountered and reported by others already. That includes issues where the cause is not your -system, but lies in the code. If you run into one of those, you might thus find a -solution (e.g. a patch) or workaround for your issue, too. +system, but lies in the code. If you run into one of those, you might thus find +a solution (e.g. a patch) or workaround for your issue, too. Package your kernel up ~~~~~~~~~~~~~~~~~~~~~~ @@ -1767,8 +1784,8 @@ multitude of reasons why this might happen. Some ideas where to look: Note, if you found and fixed problems with the .config file, you want to use it to build another kernel from the latest codebase, as your earlier tests with -mainline and the latest version from an affected stable/longterm series were most -likely flawed. +mainline and the latest version from an affected stable/longterm series were +most likely flawed. [:ref:`back to step-by-step guide `] @@ -1911,7 +1928,7 @@ Now remove the boot entry for the kernel from your bootloader's configuration; the steps to do that vary quite a bit between Linux distributions. Note, be careful with wildcards like '*' when deleting files or directories -for kernels manually: you might accidentally remove files of a 6.0.11 kernel +for kernels manually: you might accidentally remove files of a 6.0.13 kernel when all you want is to remove 6.0 or 6.0.1. [:ref:`back to step-by-step guide `] From 932c9a5398a7b41cb8e7a0264e5470133b373e11 Mon Sep 17 00:00:00 2001 From: Thorsten Leemhuis Date: Tue, 9 Apr 2024 09:30:45 +0200 Subject: [PATCH 153/606] docs: verify/bisect: add and fetch stable branches ahead of time Add and fetch all required stable branches ahead of time. This fixes a bug, as readers that wanted to bisect a regression within a stable or longterm series otherwise did not have them available at the right time. This way also matches the flow somewhat better and avoids some "if you haven't already added it" phrases that otherwise become necessary in future changes. Signed-off-by: Thorsten Leemhuis Signed-off-by: Jonathan Corbet Link: https://lore.kernel.org/r/57dcf312959476abe6151bf3d35eb79e3e9a83d1.1712647788.git.linux@leemhuis.info --- .../verify-bugs-and-bisect-regressions.rst | 31 +++++++++++-------- 1 file changed, 18 insertions(+), 13 deletions(-) diff --git a/Documentation/admin-guide/verify-bugs-and-bisect-regressions.rst b/Documentation/admin-guide/verify-bugs-and-bisect-regressions.rst index c999e40c79ab7..06278501a4bdc 100644 --- a/Documentation/admin-guide/verify-bugs-and-bisect-regressions.rst +++ b/Documentation/admin-guide/verify-bugs-and-bisect-regressions.rst @@ -136,8 +136,7 @@ will be considered the 'good' release and used to prepare the .config file. * **Segment 3**: perform and validate the bisection. - a) In case your 'broken' version is a stable/longterm release, add the Git - branch holding it:: + a) Retrieve the sources for your 'bad' version:: git remote set-branches --add stable linux-6.1.y git fetch stable @@ -371,6 +370,21 @@ Preparations: set up everything to build your own kernels [:ref:`details`] +.. _stablesources_bissbs: + +* Retrieve the sources for any stable or longterm series you might need. + + Is the version you earlier established as 'bad' a stable or longterm release? + Then download the code for the series it belongs to ('linux-6.1.y' in this + example):: + + git remote set-branches --add stable linux-6.1.y + git fetch stable + + If the version earlier established as 'good' is from a different stable or + longterm series (say 6.0.13), repeat the previous step, but this time for the + branch holding the series the 'good' version belongs to (e.g. linux-6.0.y). + .. _oldconfig_bissbs: * Start preparing a kernel build configuration (the '.config' file). @@ -620,12 +634,10 @@ be a waste of time. [:ref:`details`] reproduce it with the mainline kernel you just built? One that according to the `front page of kernel.org `_ is still supported? Then check if the latest codebase for the particular series might already fix the - problem. To do so, add the stable series Git branch for your 'good' kernel - (again, this here is assumed to be 6.0) and check out the latest version:: + problem. To do so, check out that series latest version (again, this here is + assumed to be 6.0):: cd ~/linux/ - git remote set-branches --add stable linux-6.0.y - git fetch stable git switch --discard-changes --detach linux-6.0.y Now use the checked out code to build and install another kernel using the @@ -717,13 +729,6 @@ configuration created earlier this works a lot faster than many people assume: overall on average it will often just take about 10 to 15 minutes to compile each kernel on commodity x86 machines. -* In case your 'bad' version is a stable/longterm release (say 6.1.5), add its - stable branch, unless you already did so earlier:: - - cd ~/linux/ - git remote set-branches --add stable linux-6.1.y - git fetch stable - .. _bisectstart_bissbs: * Start the bisection and tell Git about the versions earlier established as From 453de3207ff3534dd7165a32a73dd28cc9e8f14f Mon Sep 17 00:00:00 2001 From: Thorsten Leemhuis Date: Tue, 9 Apr 2024 09:30:46 +0200 Subject: [PATCH 154/606] docs: verify/bisect: proper headlines and more spacing Various small improvements and fixes: * Separate ref links from their target with a space for better readability. * Add a proper heading for the note at the end of the step-by-step guide. * Use proper 3rd and 4th level headlines in the reference section and add short intros for the 2nd level headlines that lacked one. Signed-off-by: Thorsten Leemhuis Signed-off-by: Jonathan Corbet Link: https://lore.kernel.org/r/f59f0f235a2192ed93899a7338153e4cb71075f0.1712647788.git.linux@leemhuis.info --- .../verify-bugs-and-bisect-regressions.rst | 194 ++++++++++-------- 1 file changed, 113 insertions(+), 81 deletions(-) diff --git a/Documentation/admin-guide/verify-bugs-and-bisect-regressions.rst b/Documentation/admin-guide/verify-bugs-and-bisect-regressions.rst index 06278501a4bdc..355c2cea5230d 100644 --- a/Documentation/admin-guide/verify-bugs-and-bisect-regressions.rst +++ b/Documentation/admin-guide/verify-bugs-and-bisect-regressions.rst @@ -29,7 +29,7 @@ The essence of the process (aka 'TL;DR') ======================================== *[If you are new to building or bisecting Linux, ignore this section and head -over to the* ":ref:`step-by-step guide`" *below. It utilizes +over to the* ':ref:`step-by-step guide `' *below. It utilizes the same commands as this section while describing them in brief fashion. The steps are nevertheless easy to follow and together with accompanying entries in a reference section mention many alternatives, pitfalls, and additional @@ -224,15 +224,15 @@ report; instead of the latter your could also head straight on and follow *segment 3* to **perform a bisection** for a full-fledged regression report developers are obliged to act upon. - :ref:`Preparations: set up everything to build your own kernels.` + :ref:`Preparations: set up everything to build your own kernels `. - :ref:`Segment 1: try to reproduce the problem with the latest codebase.` + :ref:`Segment 1: try to reproduce the problem with the latest codebase `. - :ref:`Segment 2: check if the kernels you build work fine.` + :ref:`Segment 2: check if the kernels you build work fine `. - :ref:`Segment 3: perform a bisection and validate the result.` + :ref:`Segment 3: perform a bisection and validate the result `. - :ref:`Supplementary tasks: cleanup during and after following this guide.` + :ref:`Supplementary tasks: cleanup during and after following this guide `. The steps in each segment illustrate the important aspects of the process, while a comprehensive reference section holds additional details for almost all of the @@ -260,12 +260,14 @@ improve it, :ref:`please let the kernel developers know `. Preparations: set up everything to build your own kernels --------------------------------------------------------- +The following steps lay the groundwork for all further tasks. + .. _backup_bissbs: * Create a fresh backup and put system repair and restore tools at hand, just to be prepared for the unlikely case of something going sideways. - [:ref:`details`] + [:ref:`details `] .. _vanilla_bissbs: @@ -273,7 +275,7 @@ Preparations: set up everything to build your own kernels builds them automatically. That includes but is not limited to DKMS, openZFS, VirtualBox, and Nvidia's graphics drivers (including the GPLed kernel module). - [:ref:`details`] + [:ref:`details `] .. _secureboot_bissbs: @@ -284,7 +286,7 @@ Preparations: set up everything to build your own kernels their restrictions through a process initiated by ``mokutil --disable-validation``. - [:ref:`details`] + [:ref:`details `] .. _rangecheck_bissbs: @@ -319,13 +321,13 @@ Preparations: set up everything to build your own kernels throughout this guide will refer to the last kernel that has been working fine.* - [:ref:`details`] + [:ref:`details `] .. _bootworking_bissbs: * Boot into the 'working' kernel and briefly use the apparently broken feature. - [:ref:`details`] + [:ref:`details `] .. _diskspace_bissbs: @@ -335,7 +337,7 @@ Preparations: set up everything to build your own kernels debug symbols: both explain approaches reducing the amount of space, which should allow you to master these tasks with about 4 Gigabytes free space. - [:ref:`details`] + [:ref:`details `] .. _buildrequires_bissbs: @@ -345,7 +347,7 @@ Preparations: set up everything to build your own kernels reference section shows how to quickly install those on various popular Linux distributions. - [:ref:`details`] + [:ref:`details `] .. _sources_bissbs: @@ -368,7 +370,7 @@ Preparations: set up everything to build your own kernels git remote add -t master stable \ https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git - [:ref:`details`] + [:ref:`details `] .. _stablesources_bissbs: @@ -421,7 +423,7 @@ Preparations: set up everything to build your own kernels 'make olddefconfig' again and check if it now picked up the right config file as base. - [:ref:`details`] + [:ref:`details `] .. _localmodconfig_bissbs: @@ -455,7 +457,7 @@ Preparations: set up everything to build your own kernels spending much effort on, as long as it boots and allows to properly test the feature that causes trouble. - [:ref:`details`] + [:ref:`details `] .. _tagging_bissbs: @@ -465,7 +467,7 @@ Preparations: set up everything to build your own kernels ./scripts/config --set-str CONFIG_LOCALVERSION '-local' ./scripts/config -e CONFIG_LOCALVERSION_AUTO - [:ref:`details`] + [:ref:`details `] .. _debugsymbols_bissbs: @@ -484,7 +486,7 @@ Preparations: set up everything to build your own kernels ./scripts/config -d DEBUG_INFO -d DEBUG_INFO_DWARF_TOOLCHAIN_DEFAULT \ -d DEBUG_INFO_DWARF4 -d DEBUG_INFO_DWARF5 -e CONFIG_DEBUG_INFO_NONE - [:ref:`details`] + [:ref:`details `] .. _configmods_bissbs: @@ -494,14 +496,14 @@ Preparations: set up everything to build your own kernels * Are you running Debian? Then you want to avoid known problems by performing additional adjustments explained in the reference section. - [:ref:`details`]. + [:ref:`details `]. * If you want to influence other aspects of the configuration, do so now using your preferred tool. Note, to use make targets like 'menuconfig' or 'nconfig', you will need to install the development files of ncurses; for 'xconfig' you likewise need the Qt5 or Qt6 headers. - [:ref:`details`]. + [:ref:`details `]. .. _saveconfig_bissbs: @@ -511,7 +513,7 @@ Preparations: set up everything to build your own kernels make olddefconfig cp .config ~/kernel-config-working - [:ref:`details`] + [:ref:`details `] .. _introlatestcheck_bissbs: @@ -521,7 +523,7 @@ Segment 1: try to reproduce the problem with the latest codebase The following steps verify if the problem occurs with the code currently supported by developers. In case you face a regression, it also checks that the problem is not caused by some .config change, as reporting the issue then would -be a waste of time. [:ref:`details`] +be a waste of time. [:ref:`details `] .. _checkoutmaster_bissbs: @@ -530,7 +532,7 @@ be a waste of time. [:ref:`details`] cd ~/linux/ git switch --discard-changes --detach mainline/master - [:ref:`details`] + [:ref:`details `] .. _build_bissbs: @@ -545,7 +547,7 @@ be a waste of time. [:ref:`details`] reference section for alternatives, which obviously will require other steps to install as well. - [:ref:`details`] + [:ref:`details `] .. _install_bissbs: @@ -578,7 +580,7 @@ be a waste of time. [:ref:`details`] down: if you will build more kernels as described in segment 2 and 3, you will have to perform those again after executing ``command -v installkernel [...]``. - [:ref:`details`] + [:ref:`details `] .. _storagespace_bissbs: @@ -591,7 +593,7 @@ be a waste of time. [:ref:`details`] Write down or remember those two values for later: they enable you to prevent running out of disk space accidentally during a bisection. - [:ref:`details`] + [:ref:`details `] .. _kernelrelease_bissbs: @@ -618,7 +620,7 @@ be a waste of time. [:ref:`details`] If that command does not return '0', check the reference section, as the cause for this might interfere with your testing. - [:ref:`details`] + [:ref:`details `] .. _recheckbroken_bissbs: @@ -626,7 +628,7 @@ be a waste of time. [:ref:`details`] out the instructions in the reference section to ensure nothing went sideways during your tests. - [:ref:`details`] + [:ref:`details `] .. _recheckstablebroken_bissbs: @@ -662,12 +664,12 @@ be a waste of time. [:ref:`details`] Now verify if this kernel is showing the problem. - [:ref:`details`] + [:ref:`details `] Do you follow this guide to verify if a problem is present in the code currently supported by Linux kernel developers? Then you are done at this point. If you later want to remove the kernel you just built, check out -:ref:`Supplementary tasks: cleanup during and after following this guide`. +:ref:`Supplementary tasks: cleanup during and after following this guide `. In case you face a regression, move on and execute at least the next segment as well. @@ -679,7 +681,7 @@ Segment 2: check if the kernels you build work fine In case of a regression, you now want to ensure the trimmed configuration file you created earlier works as expected; a bisection with the .config file -otherwise would be a waste of time. [:ref:`details`] +otherwise would be a waste of time. [:ref:`details `] .. _recheckworking_bissbs: @@ -714,7 +716,7 @@ otherwise would be a waste of time. [:ref:`details`] Now check if this kernel works as expected; if not, consult the reference section for further instructions. - [:ref:`details`] + [:ref:`details `] .. _introbisect_bissbs: @@ -739,7 +741,7 @@ each kernel on commodity x86 machines. git bisect good v6.0 git bisect bad v6.1.5 - [:ref:`details`] + [:ref:`details `] .. _bisectbuild_bissbs: @@ -768,7 +770,7 @@ each kernel on commodity x86 machines. totally normal to see release identifiers like '6.0-rc1-local-gcafec0cacaca0' if you bisect between versions 6.1 and 6.2 for example. - [:ref:`details`] + [:ref:`details `] .. _bisecttest_bissbs: @@ -808,7 +810,7 @@ each kernel on commodity x86 machines. might need to scroll up to see the message mentioning the culprit; alternatively, run ``git bisect log > ~/bisection-log``. - [:ref:`details`] + [:ref:`details `] .. _bisectlog_bissbs: @@ -820,7 +822,7 @@ each kernel on commodity x86 machines. cp .config ~/bisection-config-culprit git bisect reset - [:ref:`details`] + [:ref:`details `] .. _revert_bissbs: @@ -882,7 +884,7 @@ each kernel on commodity x86 machines. Now check one last time if the feature that made you perform a bisection works with that kernel: if everything went well, it should not show the regression. - [:ref:`details`] + [:ref:`details `] .. _introclosure_bissbs: @@ -923,7 +925,7 @@ space might run out. kernel image and related files behind; in that case remove them as described in the reference section. - [:ref:`details`] + [:ref:`details `] .. _finishingtouch_bissbs: @@ -946,11 +948,15 @@ space might run out. the version considered 'good', and the last three or four you compiled during the actual bisection process. - [:ref:`details`] + [:ref:`details `] + .. _submit_improvements: -This concludes the step-by-step guide. +Conclusion +---------- + +You have reached the end of the step-by-step guide. Did you run into trouble following any of the above steps not cleared up by the reference section below? Did you spot errors? Or do you have ideas how to @@ -970,10 +976,20 @@ Reference section for the step-by-step guide This section holds additional information for almost all the items in the above step-by-step guide. +Preparations for building your own kernels +------------------------------------------ + + *The steps in this section lay the groundwork for all further tests.* + [:ref:`... `] + +The steps in all later sections of this guide depend on those described here. + +[:ref:`back to step-by-step guide `]. + .. _backup_bisref: Prepare for emergencies ------------------------ +~~~~~~~~~~~~~~~~~~~~~~~ *Create a fresh backup and put system repair and restore tools at hand.* [:ref:`... `] @@ -988,7 +1004,7 @@ for something going sideways, even if that should not happen. .. _vanilla_bisref: Remove anything related to externally maintained kernel modules ---------------------------------------------------------------- +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ *Remove all software that depends on externally developed kernel drivers or builds them automatically.* [:ref:`...`] @@ -1006,7 +1022,7 @@ explains in more detail. .. _secureboot_bisref: Deal with techniques like Secure Boot -------------------------------------- +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ *On platforms with 'Secure Boot' or similar techniques, prepare everything to ensure the system will permit your self-compiled kernel to boot later.* @@ -1043,7 +1059,7 @@ Afterwards, permit MokManager to reboot the machine. .. _bootworking_bisref: Boot the last kernel that was working -------------------------------------- +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ *Boot into the last working kernel and briefly recheck if the feature that regressed really works.* [:ref:`...`] @@ -1056,7 +1072,7 @@ the right thing. .. _diskspace_bisref: Space requirements ------------------- +~~~~~~~~~~~~~~~~~~ *Ensure to have enough free space for building Linux.* [:ref:`... `] @@ -1074,7 +1090,7 @@ space by quite a few gigabytes. .. _rangecheck_bisref: Bisection range ---------------- +~~~~~~~~~~~~~~~ *Determine the kernel versions considered 'good' and 'bad' throughout this guide.* [:ref:`...`] @@ -1099,7 +1115,7 @@ to do this as well, if you tried bisecting between 6.0.13 and 6.1.15. .. _buildrequires_bisref: Install build requirements --------------------------- +~~~~~~~~~~~~~~~~~~~~~~~~~~ *Install all software required to build a Linux kernel.* [:ref:`...`] @@ -1150,7 +1166,7 @@ the kernel's tools/ directory. .. _sources_bisref: Download the sources using Git ------------------------------- +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ *Retrieve the Linux mainline sources.* [:ref:`...`] @@ -1170,7 +1186,7 @@ work better for you: .. _sources_bundle_bisref: Downloading Linux mainline sources using a bundle -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +""""""""""""""""""""""""""""""""""""""""""""""""" Use the following commands to retrieve the Linux mainline sources using a bundle:: @@ -1241,7 +1257,7 @@ Note, shallow clones have a few peculiar characteristics: .. _oldconfig_bisref: Start defining the build configuration for your kernel ------------------------------------------------------- +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ *Start preparing a kernel build configuration (the '.config' file).* [:ref:`... `] @@ -1301,7 +1317,7 @@ that file to the build machine and store it as ~/linux/.config; afterwards run .. _localmodconfig_bisref: Trim the build configuration for your kernel --------------------------------------------- +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ *Disable any kernel modules apparently superfluous for your setup.* [:ref:`... `] @@ -1350,7 +1366,7 @@ step-by-step guide mentions:: .. _tagging_bisref: Tag the kernels about to be build ---------------------------------- +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ *Ensure all the kernels you will build are clearly identifiable using a special tag and a unique version identifier.* [:ref:`... `] @@ -1366,7 +1382,7 @@ confusing during the bisection. .. _debugsymbols_bisref: Decide to enable or disable debug symbols ------------------------------------------ +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ *Decide how to handle debug symbols.* [:ref:`... `] @@ -1395,7 +1411,7 @@ explains this process in more detail. .. _configmods_bisref: Adjust build configuration --------------------------- +~~~~~~~~~~~~~~~~~~~~~~~~~~ *Check if you may want or need to adjust some other kernel configuration options:* @@ -1406,7 +1422,7 @@ kernel configuration options. .. _configmods_distros_bisref: Distro specific adjustments -~~~~~~~~~~~~~~~~~~~~~~~~~~~ +""""""""""""""""""""""""""" *Are you running* [:ref:`... `] @@ -1431,7 +1447,7 @@ when following this guide on a few commodity distributions. .. _configmods_individual_bisref: Individual adjustments -~~~~~~~~~~~~~~~~~~~~~~ +"""""""""""""""""""""" *If you want to influence the other aspects of the configuration, do so now.* [:ref:`... `] @@ -1448,13 +1464,13 @@ is missing. .. _saveconfig_bisref: Put the .config file aside --------------------------- +~~~~~~~~~~~~~~~~~~~~~~~~~~ *Reprocess the .config after the latest changes and store it in a safe place.* [:ref:`... `] Put the .config you prepared aside, as you want to copy it back to the build -directory every time during this guide before you start building another +directory every time during this guide before you start building another kernel. That's because going back and forth between different versions can alter .config files in odd ways; those occasionally cause side effects that could confuse testing or in some cases render the result of your bisection @@ -1464,8 +1480,8 @@ meaningless. .. _introlatestcheck_bisref: -Try to reproduce the regression ------------------------------------------ +Try to reproduce the problem with the latest codebase +----------------------------------------------------- *Verify the regression is not caused by some .config change and check if it still occurs with the latest codebase.* [:ref:`... `] @@ -1519,21 +1535,21 @@ highly recommended for these reasons: .. _checkoutmaster_bisref: Check out the latest Linux codebase ------------------------------------ +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ *Check out the latest Linux codebase.* - [:ref:`... `] + [:ref:`... `] In case you later want to recheck if an ever newer codebase might fix the problem, remember to run that ``git fetch --shallow-exclude [...]`` command again mentioned earlier to update your local Git repository. -[:ref:`back to step-by-step guide `] +[:ref:`back to step-by-step guide `] .. _build_bisref: Build your kernel ------------------ +~~~~~~~~~~~~~~~~~ *Build the image and the modules of your first kernel using the config file you prepared.* [:ref:`... `] @@ -1543,7 +1559,7 @@ yourself. Another subsection explains how to directly package your kernel up as deb, rpm or tar file. Dealing with build errors -~~~~~~~~~~~~~~~~~~~~~~~~~ +""""""""""""""""""""""""" When a build error occurs, it might be caused by some aspect of your machine's setup that often can be fixed quickly; other times though the problem lies in @@ -1578,7 +1594,7 @@ system, but lies in the code. If you run into one of those, you might thus find a solution (e.g. a patch) or workaround for your issue, too. Package your kernel up -~~~~~~~~~~~~~~~~~~~~~~ +"""""""""""""""""""""" The step-by-step guide uses the default make targets (e.g. 'bzImage' and 'modules' on x86) to build the image and the modules of your kernel, which later @@ -1609,7 +1625,7 @@ distribution's kernel packages. .. _install_bisref: Put the kernel in place ------------------------ +~~~~~~~~~~~~~~~~~~~~~~~ *Install the kernel you just built.* [:ref:`... `] @@ -1652,7 +1668,7 @@ process. Afterwards add your kernel to your bootloader configuration and reboot. .. _storagespace_bisref: Storage requirements per kernel -------------------------------- +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ *Check how much storage space the kernel, its modules, and other related files like the initramfs consume.* [:ref:`... `] @@ -1673,7 +1689,7 @@ need to look in different places. .. _tainted_bisref: Check if your newly built kernel considers itself 'tainted' ------------------------------------------------------------ +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ *Check if the kernel marked itself as 'tainted'.* [:ref:`... `] @@ -1692,7 +1708,7 @@ interest, as your testing might be flawed otherwise. .. _recheckbroken_bisref: Check the kernel built from a recent mainline codebase ------------------------------------------------------- +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ *Verify if your bug occurs with the newly built kernel.* [:ref:`... `] @@ -1718,7 +1734,7 @@ the kernel you built from the latest codebase. These are the most frequent: .. _recheckstablebroken_bisref: Check the kernel built from the latest stable/longterm codebase ---------------------------------------------------------------- +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ *Are you facing a regression within a stable/longterm release, but failed to reproduce it with the kernel you just built using the latest mainline sources? @@ -1763,7 +1779,7 @@ ensure the kernel version you assumed to be 'good' earlier in the process (e.g. .. _recheckworking_bisref: Build your own version of the 'good' kernel -------------------------------------------- +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ *Build your own variant of the working kernel and check if the feature that regressed works as expected with it.* [:ref:`... `] @@ -1794,10 +1810,20 @@ most likely flawed. [:ref:`back to step-by-step guide `] +Perform a bisection and validate the result +------------------------------------------- + + *With all the preparations and precaution builds taken care of, you are now + ready to begin the bisection.* [:ref:`... `] + +The steps in this segment perform and validate the bisection. + +[:ref:`back to step-by-step guide `]. + .. _bisectstart_bisref: Start the bisection -------------------- +~~~~~~~~~~~~~~~~~~~ *Start the bisection and tell Git about the versions earlier established as 'good' and 'bad'.* [:ref:`... `] @@ -1811,7 +1837,7 @@ for you to test. .. _bisectbuild_bisref: Build a kernel from the bisection point ---------------------------------------- +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ *Build, install, and boot a kernel from the code Git checked out using the same commands you used earlier.* [:ref:`... `] @@ -1839,7 +1865,7 @@ There are two things worth of note here: .. _bisecttest_bisref: Bisection checkpoint --------------------- +~~~~~~~~~~~~~~~~~~~~ *Check if the feature that regressed works in the kernel you just built.* [:ref:`... `] @@ -1853,7 +1879,7 @@ will be for nothing. .. _bisectlog_bisref: Put the bisection log away --------------------------- +~~~~~~~~~~~~~~~~~~~~~~~~~~ *Store Git's bisection log and the current .config file in a safe place.* [:ref:`... `] @@ -1873,7 +1899,7 @@ ask for it after you report the regression. .. _revert_bisref: Try reverting the culprit -------------------------- +~~~~~~~~~~~~~~~~~~~~~~~~~ *Try reverting the culprit on top of the latest codebase to see if this fixes your regression.* [:ref:`... `] @@ -1891,14 +1917,20 @@ succeeds, test that kernel version instead. [:ref:`back to step-by-step guide `] +Cleanup steps during and after following this guide +--------------------------------------------------- -Supplementary tasks: cleanup during and after the bisection ------------------------------------------------------------ + *During and after following this guide you might want or need to remove some + of the kernels you installed.* [:ref:`... `] + +The steps in this section describe clean-up procedures. + +[:ref:`back to step-by-step guide `]. .. _makeroom_bisref: Cleaning up during the bisection --------------------------------- +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ *To remove one of the kernels you installed, look up its 'kernelrelease' identifier.* [:ref:`... `] @@ -1939,7 +1971,7 @@ when all you want is to remove 6.0 or 6.0.1. [:ref:`back to step-by-step guide `] Cleaning up after the bisection -------------------------------- +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ .. _finishingtouch_bisref: From a421835a2a327f2b3472dcb755adb57d0f82e478 Mon Sep 17 00:00:00 2001 From: Thorsten Leemhuis Date: Tue, 9 Apr 2024 09:30:47 +0200 Subject: [PATCH 155/606] docs: verify/bisect: explain testing reverts, patches and newer code Rename 'Supplementary tasks' to 'Complementary tasks' while introducing a section 'Optional tasks: test reverts, patches, or later versions': the latter is something readers occasionally will have to do after reporting a bug and thus is best covered here. Signed-off-by: Thorsten Leemhuis Signed-off-by: Jonathan Corbet Link: https://lore.kernel.org/r/dacf26a4c48e9e8f04ecbc77e0a74c9b2a6a1103.1712647788.git.linux@leemhuis.info --- .../verify-bugs-and-bisect-regressions.rst | 128 ++++++++++++++++-- 1 file changed, 115 insertions(+), 13 deletions(-) diff --git a/Documentation/admin-guide/verify-bugs-and-bisect-regressions.rst b/Documentation/admin-guide/verify-bugs-and-bisect-regressions.rst index 355c2cea5230d..1987c827211fc 100644 --- a/Documentation/admin-guide/verify-bugs-and-bisect-regressions.rst +++ b/Documentation/admin-guide/verify-bugs-and-bisect-regressions.rst @@ -160,7 +160,7 @@ will be considered the 'good' release and used to prepare the .config file. this step (e.g. build, install, boot, and test a kernel to then tell Git the outcome). Do so again and again until Git shows which commit broke things. If you run short of disk space during this process, check the - section 'Supplementary tasks: cleanup during and after the process' + section 'Complementary tasks: cleanup during and after the process' below. d) Once your finished the bisection, put a few things away:: @@ -182,7 +182,7 @@ will be considered the 'good' release and used to prepare the .config file. kernel; just this time skip the first command copying the base .config file over, as that already has been taken care off. -* **Supplementary tasks**: cleanup during and after the process. +* **Complementary tasks**: cleanup during and after the process. a) To avoid running out of disk space during a bisection, you might need to remove some kernels you built earlier. You most likely want to keep those @@ -205,6 +205,18 @@ will be considered the 'good' release and used to prepare the .config file. the kernels you built earlier and later you might want to keep around for a week or two. +* **Optional task**: test a debug patch or a proposed fix later:: + + git fetch mainline + git switch --discard-changes --detach mainline/master + git apply /tmp/foobars-proposed-fix-v1.patch + cp ~/kernel-config-working .config + ./scripts/config --set-str CONFIG_LOCALVERSION '-local-foobars-fix-v1' + + Build, install, and boot a kernel as described in *segment 1, section b* -- + but this time omit the first command copying the build configuration over, + as that has been taken care of already. + .. _introguide_bissbs: Step-by-step guide on how to verify bugs and bisect regressions @@ -232,7 +244,9 @@ developers are obliged to act upon. :ref:`Segment 3: perform a bisection and validate the result `. - :ref:`Supplementary tasks: cleanup during and after following this guide `. + :ref:`Complementary tasks: cleanup during and after following this guide `. + + :ref:`Optional tasks: test reverts, patches, or later versions `. The steps in each segment illustrate the important aspects of the process, while a comprehensive reference section holds additional details for almost all of the @@ -669,7 +683,7 @@ be a waste of time. [:ref:`details `] Do you follow this guide to verify if a problem is present in the code currently supported by Linux kernel developers? Then you are done at this point. If you later want to remove the kernel you just built, check out -:ref:`Supplementary tasks: cleanup during and after following this guide `. +:ref:`Complementary tasks: cleanup during and after following this guide `. In case you face a regression, move on and execute at least the next segment as well. @@ -888,7 +902,7 @@ each kernel on commodity x86 machines. .. _introclosure_bissbs: -Supplementary tasks: cleanup during and after the bisection +Complementary tasks: cleanup during and after the bisection ----------------------------------------------------------- During and after following this guide you might want or need to remove some of @@ -950,6 +964,81 @@ space might run out. [:ref:`details `] +.. _introoptional_bissbs: + +Optional: test reverts, patches, or later versions +-------------------------------------------------- + +While or after reporting a bug, you might want or potentially will be asked to +test reverts, debug patches, proposed fixes, or other versions. In that case +follow these instructions. + +* Update your Git clone and check out the latest code. + + * In case you want to test mainline, fetch its latest changes before checking + its code out:: + + git fetch mainline + git switch --discard-changes --detach mainline/master + + * In case you want to test a stable or longterm kernel, first add the branch + holding the series you are interested in (6.2 in the example), unless you + already did so earlier:: + + git remote set-branches --add stable linux-6.2.y + + Then fetch the latest changes and check out the latest version from the + series:: + + git fetch stable + git switch --discard-changes --detach stable/linux-6.2.y + +* Copy your kernel build configuration over:: + + cp ~/kernel-config-working .config + +* Your next step depends on what you want to do: + + * In case you just want to test the latest codebase, head to the next step, + you are already all set. + + * In case you want to test if a revert fixes an issue, revert one or multiple + changes by specifying their commit ids:: + + git revert --no-edit cafec0cacaca0 + + Now give that kernel a special tag to facilitates its identification and + prevent accidentally overwriting another kernel:: + + ./scripts/config --set-str CONFIG_LOCALVERSION '-local-cafec0cacaca0-reverted' + + * In case you want to test a patch, store the patch in a file like + '/tmp/foobars-proposed-fix-v1.patch' and apply it like this:: + + git apply /tmp/foobars-proposed-fix-v1.patch + + In case of multiple patches, repeat this step with the others. + + Now give that kernel a special tag to facilitates its identification and + prevent accidentally overwriting another kernel:: + + ./scripts/config --set-str CONFIG_LOCALVERSION '-local-foobars-fix-v1' + +* Build a kernel using the familiar commands, just without copying the kernel + build configuration over, as that has been taken care of already:: + + make olddefconfig && + make -j $(nproc --all) + # * Check if the free space suffices holding another kernel: + df -h /boot/ /lib/modules/ + sudo make modules_install + command -v installkernel && sudo make install + make -s kernelrelease | tee -a ~/kernels-built + reboot + +* Now verify you booted the newly built kernel and check it. + +[:ref:`details `] .. _submit_improvements: @@ -1986,20 +2075,33 @@ build artifacts and the Linux sources, but will leave the Git repository (~/linux/.git/) behind -- a simple ``git reset --hard`` thus will bring the sources back. -Removing the repository as well would likely be unwise at this point: there is a -decent chance developers will ask you to build another kernel to perform -additional tests. This is often required to debug an issue or check proposed -fixes. Before doing so you want to run the ``git fetch mainline`` command again -followed by ``git checkout mainline/master`` to bring your clone up to date and -checkout the latest codebase. Then apply the patch using ``git apply -`` or ``git am `` and build yet another kernel using the -familiar commands. +Removing the repository as well would likely be unwise at this point: there +is a decent chance developers will ask you to build another kernel to +perform additional tests -- like testing a debug patch or a proposed fix. +Details on how to perform those can be found in the section :ref:`Optional +tasks: test reverts, patches, or later versions `. Additional tests are also the reason why you want to keep the ~/kernel-config-working file around for a few weeks. [:ref:`back to step-by-step guide `] +.. _introoptional_bisref: + +Test reverts, patches, or later versions +---------------------------------------- + + *While or after reporting a bug, you might want or potentially will be asked + to test reverts, patches, proposed fixes, or other versions.* + [:ref:`... `] + +All the commands used in this section should be pretty straight forward, so +there is not much to add except one thing: when setting a kernel tag as +instructed, ensure it is not much longer than the one used in the example, as +problems will arise if the kernelrelease identifier exceeds 63 characters. + +[:ref:`back to step-by-step guide `]. + Additional reading material =========================== From 2bcfd71e8dfca5047f9fbcc2e2ba62c5bb39aa3a Mon Sep 17 00:00:00 2001 From: Thorsten Leemhuis Date: Tue, 9 Apr 2024 09:30:48 +0200 Subject: [PATCH 156/606] docs: verify/bisect: describe how to use a build host Describe how to build kernels on another system (with and without cross-compiling), as building locally can be quite painfully on some slow systems. This is done in an add-on section, as it would make the step-by-step guide to complicated if this special case would be described there. Signed-off-by: Thorsten Leemhuis Signed-off-by: Jonathan Corbet Link: https://lore.kernel.org/r/288160cb4769e46a3280250ca71da0abc4aa002d.1712647788.git.linux@leemhuis.info --- .../verify-bugs-and-bisect-regressions.rst | 78 ++++++++++++++++++- 1 file changed, 74 insertions(+), 4 deletions(-) diff --git a/Documentation/admin-guide/verify-bugs-and-bisect-regressions.rst b/Documentation/admin-guide/verify-bugs-and-bisect-regressions.rst index 1987c827211fc..6193c79764273 100644 --- a/Documentation/admin-guide/verify-bugs-and-bisect-regressions.rst +++ b/Documentation/admin-guide/verify-bugs-and-bisect-regressions.rst @@ -276,6 +276,10 @@ Preparations: set up everything to build your own kernels The following steps lay the groundwork for all further tasks. +Note: the instructions assume you are building and testing on the same +machine; if you want to compile the kernel on another system, check +:ref:`Build kernels on a different machine ` below. + .. _backup_bissbs: * Create a fresh backup and put system repair and restore tools at hand, just @@ -2103,11 +2107,77 @@ problems will arise if the kernelrelease identifier exceeds 63 characters. [:ref:`back to step-by-step guide `]. -Additional reading material -=========================== +Additional information +====================== + +.. _buildhost_bis: + +Build kernels on a different machine +------------------------------------ + +To compile kernels on another system, slightly alter the step-by-step guide's +instructions: + +* Start following the guide on the machine where you want to install and test + the kernels later. + +* After executing ':ref:`Boot into the working kernel and briefly use the + apparently broken feature `', save the list of loaded + modules to a file using ``lsmod > ~/test-machine-lsmod``. Then locate the + build configuration for the running kernel (see ':ref:`Start defining the + build configuration for your kernel `' for hints on where + to find it) and store it as '~/test-machine-config-working'. Transfer both + files to the home directory of your build host. + +* Continue the guide on the build host (e.g. with ':ref:`Ensure to have enough + free space for building [...] `'). + +* When you reach ':ref:`Start preparing a kernel build configuration[...] + `': before running ``make olddefconfig`` for the first time, + execute the following command to base your configuration on the one from the + test machine's 'working' kernel:: + + cp ~/test-machine-config-working ~/linux/.config + +* During the next step to ':ref:`disable any apparently superfluous kernel + modules `' use the following command instead:: + + yes '' | make localmodconfig LSMOD=~/lsmod_foo-machine localmodconfig + +* Continue the guide, but ignore the instructions outlining how to compile, + install, and reboot into a kernel every time they come up. Instead build + like this:: -Further sources ---------------- + cp ~/kernel-config-working .config + make olddefconfig && + make -j $(nproc --all) targz-pkg + + This will generate a gzipped tar file whose name is printed in the last + line shown; for example, a kernel with the kernelrelease identifier + '6.0.0-rc1-local-g928a87efa423' built for x86 machines usually will + be stored as '~/linux/linux-6.0.0-rc1-local-g928a87efa423-x86.tar.gz'. + + Copy that file to your test machine's home directory. + +* Switch to the test machine to check if you have enough space to hold another + kernel. Then extract the file you transferred:: + + sudo tar -xvzf ~/linux-6.0.0-rc1-local-g928a87efa423-x86.tar.gz -C / + + Afterwards :ref:`generate the initramfs and add the kernel to your boot + loader's configuration `; on some distributions the following + command will take care of both these tasks:: + + sudo /sbin/installkernel 6.0.0-rc1-local-g928a87efa423 /boot/vmlinuz-6.0.0-rc1-local-g928a87efa423 + + Now reboot and ensure you started the intended kernel. + +This approach even works when building for another architecture: just install +cross-compilers and add the appropriate parameters to every invocation of make +(e.g. ``make ARCH=arm64 CROSS_COMPILE=aarch64-linux-gnu- [...]``). + +Additional reading material +--------------------------- * The `man page for 'git bisect' `_ and `fighting regressions with 'git bisect' `_ From 8d939ae349343b55984ea821164e2be526d48cd1 Mon Sep 17 00:00:00 2001 From: Thorsten Leemhuis Date: Tue, 9 Apr 2024 09:30:49 +0200 Subject: [PATCH 157/606] docs: verify/bisect: stable regressions: first stable, then mainline Rearrange the instructions so that readers facing a regression within a stable or longterm series first test its latest release before testing mainline. This is less scary for some people. It also reduces the chance that something goes sideways for readers that compile their first kernel, as mainline can cause slightly more trouble. Signed-off-by: Thorsten Leemhuis Signed-off-by: Jonathan Corbet Link: https://lore.kernel.org/r/efd3cb9c68db450091021326bf9c334553df0ec2.1712647788.git.linux@leemhuis.info --- .../verify-bugs-and-bisect-regressions.rst | 91 +++++++++++-------- 1 file changed, 51 insertions(+), 40 deletions(-) diff --git a/Documentation/admin-guide/verify-bugs-and-bisect-regressions.rst b/Documentation/admin-guide/verify-bugs-and-bisect-regressions.rst index 6193c79764273..c389d4fd7599d 100644 --- a/Documentation/admin-guide/verify-bugs-and-bisect-regressions.rst +++ b/Documentation/admin-guide/verify-bugs-and-bisect-regressions.rst @@ -309,31 +309,32 @@ machine; if you want to compile the kernel on another system, check .. _rangecheck_bissbs: * Determine the kernel versions considered 'good' and 'bad' throughout this - guide. + guide: - Do you follow this guide to verify if a bug is present in the code developers - care for? Then consider the mainline release your 'working' kernel (the newest - one you regularly use) is based on to be the 'good' version; if your 'working' - kernel for example is 6.0.13, then your 'good' kernel is 6.0. + * Do you follow this guide to verify if a bug is present in the code the + primary developers care for? Then consider the version of the newest kernel + you regularly use currently as 'good' (e.g. 6.0, 6.0.13, or 6.1-rc2). - In case you face a regression, it depends on the version range where the - regression was introduced: + * Do you face a regression, e.g. something broke or works worse after + switching to a newer kernel version? In that case it depends on the version + range during which the problem appeared: - * Something which used to work in Linux 6.0 broke when switching to Linux - 6.1-rc1? Then henceforth regard 6.0 as the last known 'good' version - and 6.1-rc1 as the first 'bad' one. + * Something regressed when updating from a stable/longterm release + (say 6.0.13) to a newer mainline series (like 6.1-rc7 or 6.1) or a + stable/longterm version based on one (say 6.1.5)? Then consider the + mainline release your working kernel is based on to be the 'good' + version (e.g. 6.0) and the first version to be broken as the 'bad' one + (e.g. 6.1-rc7, 6.1, or 6.1.5). Note, at this point it is merely assumed + that 6.0 is fine; this hypothesis will be checked in segment 2. - * Some function stopped working when updating from 6.0.13 to 6.1.5? Then for - the time being consider 6.0 as the last 'good' version and 6.1.5 as - the 'bad' one. Note, at this point it is merely assumed that 6.0 is fine; - this assumption will be checked in segment 2. + * Something regressed when switching from one mainline version (say 6.0) to + a later one (like 6.1-rc1) or a stable/longterm release based on it + (say 6.1.5)? Then regard the last working version (e.g. 6.0) as 'good' and + the first broken (e.g. 6.1-rc1 or 6.1.5) as 'bad'. - * A feature you used in 6.0.13 does not work at all or worse in 6.1.15? In - that case you want to bisect within a stable/longterm series: consider - 6.0.13 as the last known 'good' version and 6.0.15 as the first 'bad' - one. Note, in this case you still want to compile and test a mainline kernel - as explained in segment 1: the outcome will determine if you need to report - your issue to the regular developers or the stable team. + * Something regressed when updating within a stable/longterm series (say + from 6.0.13 to 6.0.15)? Then consider those versions as 'good' and 'bad' + (e.g. 6.0.13 and 6.0.15), as you need to bisect within that series. *Note, do not confuse 'good' version with 'working' kernel; the latter term throughout this guide will refer to the last kernel that has been working @@ -392,19 +393,13 @@ machine; if you want to compile the kernel on another system, check .. _stablesources_bissbs: -* Retrieve the sources for any stable or longterm series you might need. - - Is the version you earlier established as 'bad' a stable or longterm release? - Then download the code for the series it belongs to ('linux-6.1.y' in this - example):: +* Is one of the versions you earlier established as 'good' or 'bad' a stable or + longterm release (say 6.1.5)? Then download the code for the series it belongs + to ('linux-6.1.y' in this example):: git remote set-branches --add stable linux-6.1.y git fetch stable - If the version earlier established as 'good' is from a different stable or - longterm series (say 6.0.13), repeat the previous step, but this time for the - branch holding the series the 'good' version belongs to (e.g. linux-6.0.y). - .. _oldconfig_bissbs: * Start preparing a kernel build configuration (the '.config' file). @@ -545,10 +540,24 @@ be a waste of time. [:ref:`details `] .. _checkoutmaster_bissbs: -* Check out the latest Linux codebase:: +* Check out the latest Linux codebase. - cd ~/linux/ - git switch --discard-changes --detach mainline/master + * Are your 'good' and 'bad' versions from the same stable or longterm series? + Then check the `front page of kernel.org `_: if it + lists a release from that series without an '[EOL]' tag, checkout the series + latest version ('linux-6.1.y' in the following example):: + + cd ~/linux/ + git switch --discard-changes --detach stable/linux-6.1.y + + Your series is unsupported, if is not listed or carrying a 'end of life' + tag. In that case you might want to check if a successor series (say + linux-6.2.y) or mainline (see next point) fix the bug. + + * In all other cases, run:: + + cd ~/linux/ + git switch --discard-changes --detach mainline/master [:ref:`details `] @@ -650,15 +659,15 @@ be a waste of time. [:ref:`details `] .. _recheckstablebroken_bissbs: -* Are you facing a problem within a stable/longterm series, but failed to - reproduce it with the mainline kernel you just built? One that according to - the `front page of kernel.org `_ is still supported? Then - check if the latest codebase for the particular series might already fix the - problem. To do so, check out that series latest version (again, this here is - assumed to be 6.0):: +* Did you just built a stable or longterm kernel? And were you able to reproduce + the regression with it? Then you should test the latest mainline codebase as + well, because the result determines which developers the bug must be submitted + to. + + To prepare that test, check out current mainline:: cd ~/linux/ - git switch --discard-changes --detach linux-6.0.y + git switch --discard-changes --detach mainline/master Now use the checked out code to build and install another kernel using the commands the earlier steps already described in more detail:: @@ -680,7 +689,9 @@ be a waste of time. [:ref:`details `] uname -r cat /proc/sys/kernel/tainted - Now verify if this kernel is showing the problem. + Now verify if this kernel is showing the problem. If it does, then you need + to report the bug to the primary developers; if it does not, report it to the + stable team. See Documentation/admin-guide/reporting-issues.rst for details. [:ref:`details `] From 7e91ed763dc07437777bd012af7a2bd4493731ff Mon Sep 17 00:00:00 2001 From: Jernej Skrabec Date: Fri, 13 Oct 2023 20:17:12 +0200 Subject: [PATCH 158/606] clk: sunxi-ng: h6: Reparent CPUX during PLL CPUX rate change While PLL CPUX clock rate change when CPU is running from it works in vast majority of cases, now and then it causes instability. This leads to system crashes and other undefined behaviour. After a lot of testing (30+ hours) while also doing a lot of frequency switches, we can't observe any instability issues anymore when doing reparenting to stable clock like 24 MHz oscillator. Fixes: 524353ea480b ("clk: sunxi-ng: add support for the Allwinner H6 CCU") Reported-by: Chad Wagner Link: https://forum.libreelec.tv/thread/27295-orange-pi-3-lts-freezes/ Tested-by: Chad Wagner Reviewed-by: Chen-Yu Tsai Link: https://lore.kernel.org/r/20231013181712.2128037-1-jernej.skrabec@gmail.com Signed-off-by: Jernej Skrabec --- drivers/clk/sunxi-ng/ccu-sun50i-h6.c | 19 +++++++++++++++++-- 1 file changed, 17 insertions(+), 2 deletions(-) diff --git a/drivers/clk/sunxi-ng/ccu-sun50i-h6.c b/drivers/clk/sunxi-ng/ccu-sun50i-h6.c index 42568c6161814..892df807275c8 100644 --- a/drivers/clk/sunxi-ng/ccu-sun50i-h6.c +++ b/drivers/clk/sunxi-ng/ccu-sun50i-h6.c @@ -1181,11 +1181,18 @@ static const u32 usb2_clk_regs[] = { SUN50I_H6_USB3_CLK_REG, }; +static struct ccu_mux_nb sun50i_h6_cpu_nb = { + .common = &cpux_clk.common, + .cm = &cpux_clk.mux, + .delay_us = 1, + .bypass_index = 0, /* index of 24 MHz oscillator */ +}; + static int sun50i_h6_ccu_probe(struct platform_device *pdev) { void __iomem *reg; + int i, ret; u32 val; - int i; reg = devm_platform_ioremap_resource(pdev, 0); if (IS_ERR(reg)) @@ -1252,7 +1259,15 @@ static int sun50i_h6_ccu_probe(struct platform_device *pdev) val |= BIT(24); writel(val, reg + SUN50I_H6_HDMI_CEC_CLK_REG); - return devm_sunxi_ccu_probe(&pdev->dev, reg, &sun50i_h6_ccu_desc); + ret = devm_sunxi_ccu_probe(&pdev->dev, reg, &sun50i_h6_ccu_desc); + if (ret) + return ret; + + /* Reparent CPU during PLL CPUX rate changes */ + ccu_mux_notifier_register(pll_cpux_clk.common.hw.clk, + &sun50i_h6_cpu_nb); + + return 0; } static const struct of_device_id sun50i_h6_ccu_ids[] = { From b914ec33b391ec766545a41f0cfc0de3e0b388d7 Mon Sep 17 00:00:00 2001 From: Frank Oltmanns Date: Sun, 10 Mar 2024 14:21:11 +0100 Subject: [PATCH 159/606] clk: sunxi-ng: common: Support minimum and maximum rate The Allwinner SoC's typically have an upper and lower limit for their clocks' rates. Up until now, support for that has been implemented separately for each clock type. Implement that functionality in the sunxi-ng's common part making use of the CCF rate liming capabilities, so that it is available for all clock types. Suggested-by: Maxime Ripard Signed-off-by: Frank Oltmanns Cc: stable@vger.kernel.org Reviewed-by: Jernej Skrabec Acked-by: Maxime Ripard Link: https://lore.kernel.org/r/20240310-pinephone-pll-fixes-v4-1-46fc80c83637@oltmanns.dev Signed-off-by: Jernej Skrabec --- drivers/clk/sunxi-ng/ccu_common.c | 19 +++++++++++++++++++ drivers/clk/sunxi-ng/ccu_common.h | 3 +++ 2 files changed, 22 insertions(+) diff --git a/drivers/clk/sunxi-ng/ccu_common.c b/drivers/clk/sunxi-ng/ccu_common.c index 8babce55302f5..ac0091b4ce242 100644 --- a/drivers/clk/sunxi-ng/ccu_common.c +++ b/drivers/clk/sunxi-ng/ccu_common.c @@ -44,6 +44,16 @@ bool ccu_is_better_rate(struct ccu_common *common, unsigned long current_rate, unsigned long best_rate) { + unsigned long min_rate, max_rate; + + clk_hw_get_rate_range(&common->hw, &min_rate, &max_rate); + + if (current_rate > max_rate) + return false; + + if (current_rate < min_rate) + return false; + if (common->features & CCU_FEATURE_CLOSEST_RATE) return abs(current_rate - target_rate) < abs(best_rate - target_rate); @@ -122,6 +132,7 @@ static int sunxi_ccu_probe(struct sunxi_ccu *ccu, struct device *dev, for (i = 0; i < desc->hw_clks->num ; i++) { struct clk_hw *hw = desc->hw_clks->hws[i]; + struct ccu_common *common = hw_to_ccu_common(hw); const char *name; if (!hw) @@ -136,6 +147,14 @@ static int sunxi_ccu_probe(struct sunxi_ccu *ccu, struct device *dev, pr_err("Couldn't register clock %d - %s\n", i, name); goto err_clk_unreg; } + + if (common->max_rate) + clk_hw_set_rate_range(hw, common->min_rate, + common->max_rate); + else + WARN(common->min_rate, + "No max_rate, ignoring min_rate of clock %d - %s\n", + i, name); } ret = of_clk_add_hw_provider(node, of_clk_hw_onecell_get, diff --git a/drivers/clk/sunxi-ng/ccu_common.h b/drivers/clk/sunxi-ng/ccu_common.h index 942a72c094374..329734f8cf42b 100644 --- a/drivers/clk/sunxi-ng/ccu_common.h +++ b/drivers/clk/sunxi-ng/ccu_common.h @@ -31,6 +31,9 @@ struct ccu_common { u16 lock_reg; u32 prediv; + unsigned long min_rate; + unsigned long max_rate; + unsigned long features; spinlock_t *lock; struct clk_hw hw; From 69f16d9b789821183d342719d2ebd4a5ac7178bc Mon Sep 17 00:00:00 2001 From: Frank Oltmanns Date: Sun, 10 Mar 2024 14:21:12 +0100 Subject: [PATCH 160/606] clk: sunxi-ng: a64: Set minimum and maximum rate for PLL-MIPI When the Allwinner A64's TCON0 searches the ideal rate for the connected panel, it may happen that it requests a rate from its parent PLL-MIPI which PLL-MIPI does not support. This happens for example on the Olimex TERES-I laptop where TCON0 requests PLL-MIPI to change to a rate of several GHz which causes the panel to stay blank. It also happens on the pinephone where a rate of less than 500 MHz is requested which causes instabilities on some phones. Set the minimum and maximum rate of Allwinner A64's PLL-MIPI according to the Allwinner User Manual. Fixes: ca1170b69968 ("clk: sunxi-ng: a64: force select PLL_MIPI in TCON0 mux") Reported-by: Diego Roversi Closes: https://groups.google.com/g/linux-sunxi/c/Rh-Uqqa66bw Tested-by: Diego Roversi Cc: stable@vger.kernel.org Reviewed-by: Maxime Ripard Signed-off-by: Frank Oltmanns Reviewed-by: Jernej Skrabec Link: https://lore.kernel.org/r/20240310-pinephone-pll-fixes-v4-2-46fc80c83637@oltmanns.dev Signed-off-by: Jernej Skrabec --- drivers/clk/sunxi-ng/ccu-sun50i-a64.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/clk/sunxi-ng/ccu-sun50i-a64.c b/drivers/clk/sunxi-ng/ccu-sun50i-a64.c index 8951ffc14ff52..6a4b2b9ef30a8 100644 --- a/drivers/clk/sunxi-ng/ccu-sun50i-a64.c +++ b/drivers/clk/sunxi-ng/ccu-sun50i-a64.c @@ -182,6 +182,8 @@ static struct ccu_nkm pll_mipi_clk = { &ccu_nkm_ops, CLK_SET_RATE_UNGATE | CLK_SET_RATE_PARENT), .features = CCU_FEATURE_CLOSEST_RATE, + .min_rate = 500000000, + .max_rate = 1400000000, }, }; From cebfbc89ae2552dbb58cd9b8206a5c8e0e6301e9 Mon Sep 17 00:00:00 2001 From: Jack Yu Date: Mon, 15 Apr 2024 06:27:23 +0000 Subject: [PATCH 161/606] ASoC: rt715: add vendor clear control register Add vendor clear control register in readable register's callback function. This prevents an access failure reported in Intel CI tests. Signed-off-by: Jack Yu Closes: https://github.com/thesofproject/linux/issues/4860 Tested-by: Pierre-Louis Bossart Link: https://lore.kernel.org/r/6a103ce9134d49d8b3941172c87a7bd4@realtek.com Signed-off-by: Mark Brown --- sound/soc/codecs/rt715-sdw.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/soc/codecs/rt715-sdw.c b/sound/soc/codecs/rt715-sdw.c index 7e13868ff99f0..f012fe0ded6d2 100644 --- a/sound/soc/codecs/rt715-sdw.c +++ b/sound/soc/codecs/rt715-sdw.c @@ -111,6 +111,7 @@ static bool rt715_readable_register(struct device *dev, unsigned int reg) case 0x839d: case 0x83a7: case 0x83a9: + case 0x752001: case 0x752039: return true; default: From 74871791ffa9562d43567c5ff2ae93def3f39f65 Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Mon, 25 Mar 2024 09:34:36 +0100 Subject: [PATCH 162/606] ntfs3: serve as alias for the legacy ntfs driver Johan Hovold reported that removing the legacy ntfs driver broke boot for him since his fstab uses the legacy ntfs driver to access firmware from the original Windows partition. Use ntfs3 as an alias for legacy ntfs if CONFIG_NTFS_FS is selected. This is similar to how ext3 is treated. Link: https://lore.kernel.org/r/Zf2zPf5TO5oYt3I3@hovoldconsulting.com Link: https://lore.kernel.org/r/20240325-hinkriegen-zuziehen-d7e2c490427a@brauner Fixes: 7ffa8f3d3023 ("fs: Remove NTFS classic") Tested-by: Johan Hovold Cc: Matthew Wilcox (Oracle) Cc: Johan Hovold Signed-off-by: Christian Brauner --- fs/ntfs3/Kconfig | 9 +++++++++ fs/ntfs3/super.c | 31 +++++++++++++++++++++++++++++++ 2 files changed, 40 insertions(+) diff --git a/fs/ntfs3/Kconfig b/fs/ntfs3/Kconfig index cdfdf51e55d79..7bc31d69f680d 100644 --- a/fs/ntfs3/Kconfig +++ b/fs/ntfs3/Kconfig @@ -46,3 +46,12 @@ config NTFS3_FS_POSIX_ACL NOTE: this is linux only feature. Windows will ignore these ACLs. If you don't know what Access Control Lists are, say N. + +config NTFS_FS + tristate "NTFS file system support" + select NTFS3_FS + select BUFFER_HEAD + select NLS + help + This config option is here only for backward compatibility. NTFS + filesystem is now handled by the NTFS3 driver. diff --git a/fs/ntfs3/super.c b/fs/ntfs3/super.c index 9df7c20d066f6..8d2e51bae2cbc 100644 --- a/fs/ntfs3/super.c +++ b/fs/ntfs3/super.c @@ -1798,6 +1798,35 @@ static struct file_system_type ntfs_fs_type = { .kill_sb = ntfs3_kill_sb, .fs_flags = FS_REQUIRES_DEV | FS_ALLOW_IDMAP, }; + +#if IS_ENABLED(CONFIG_NTFS_FS) +static struct file_system_type ntfs_legacy_fs_type = { + .owner = THIS_MODULE, + .name = "ntfs", + .init_fs_context = ntfs_init_fs_context, + .parameters = ntfs_fs_parameters, + .kill_sb = ntfs3_kill_sb, + .fs_flags = FS_REQUIRES_DEV | FS_ALLOW_IDMAP, +}; +MODULE_ALIAS_FS("ntfs"); + +static inline void register_as_ntfs_legacy(void) +{ + int err = register_filesystem(&ntfs_legacy_fs_type); + if (err) + pr_warn("ntfs3: Failed to register legacy ntfs filesystem driver: %d\n", err); +} + +static inline void unregister_as_ntfs_legacy(void) +{ + unregister_filesystem(&ntfs_legacy_fs_type); +} +#else +static inline void register_as_ntfs_legacy(void) {} +static inline void unregister_as_ntfs_legacy(void) {} +#endif + + // clang-format on static int __init init_ntfs_fs(void) @@ -1832,6 +1861,7 @@ static int __init init_ntfs_fs(void) goto out1; } + register_as_ntfs_legacy(); err = register_filesystem(&ntfs_fs_type); if (err) goto out; @@ -1849,6 +1879,7 @@ static void __exit exit_ntfs_fs(void) rcu_barrier(); kmem_cache_destroy(ntfs_inode_cachep); unregister_filesystem(&ntfs_fs_type); + unregister_as_ntfs_legacy(); ntfs3_exit_bitmap(); #ifdef CONFIG_PROC_FS From 7430764f5a85d30314aeef2d5438dff1fb0b1d68 Mon Sep 17 00:00:00 2001 From: Devyn Liu Date: Tue, 16 Apr 2024 09:58:39 +0800 Subject: [PATCH 163/606] spi: hisi-kunpeng: Delete the dump interface of data registers in debugfs Due to the reading of FIFO during the dump of data registers in debugfs, if SPI transmission is in progress, it will be affected and may result in transmission failure. Therefore, the dump interface of data registers in debugfs is removed. Fixes: 2b2142f247eb ("spi: hisi-kunpeng: Add debugfs support") Signed-off-by: Devyn Liu Reviewed-by: Jay Fang Link: https://lore.kernel.org/r/20240416015839.3323398-1-liudingyuan@huawei.com Signed-off-by: Mark Brown --- drivers/spi/spi-hisi-kunpeng.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/drivers/spi/spi-hisi-kunpeng.c b/drivers/spi/spi-hisi-kunpeng.c index 35ef5e8e2ffd2..77e9738e42f60 100644 --- a/drivers/spi/spi-hisi-kunpeng.c +++ b/drivers/spi/spi-hisi-kunpeng.c @@ -151,8 +151,6 @@ static const struct debugfs_reg32 hisi_spi_regs[] = { HISI_SPI_DBGFS_REG("ENR", HISI_SPI_ENR), HISI_SPI_DBGFS_REG("FIFOC", HISI_SPI_FIFOC), HISI_SPI_DBGFS_REG("IMR", HISI_SPI_IMR), - HISI_SPI_DBGFS_REG("DIN", HISI_SPI_DIN), - HISI_SPI_DBGFS_REG("DOUT", HISI_SPI_DOUT), HISI_SPI_DBGFS_REG("SR", HISI_SPI_SR), HISI_SPI_DBGFS_REG("RISR", HISI_SPI_RISR), HISI_SPI_DBGFS_REG("ISR", HISI_SPI_ISR), From a0cedbcc8852d6c77b00634b81e41f17f29d9404 Mon Sep 17 00:00:00 2001 From: Zeng Heng Date: Mon, 15 Apr 2024 18:53:28 +0800 Subject: [PATCH 164/606] pinctrl: devicetree: fix refcount leak in pinctrl_dt_to_map() If we fail to allocate propname buffer, we need to drop the reference count we just took. Because the pinctrl_dt_free_maps() includes the droping operation, here we call it directly. Fixes: 91d5c5060ee2 ("pinctrl: devicetree: fix null pointer dereferencing in pinctrl_dt_to_map") Suggested-by: Dan Carpenter Signed-off-by: Zeng Heng Reviewed-by: Dan Carpenter Message-ID: <20240415105328.3651441-1-zengheng4@huawei.com> Signed-off-by: Linus Walleij --- drivers/pinctrl/devicetree.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/drivers/pinctrl/devicetree.c b/drivers/pinctrl/devicetree.c index df1efc2e52025..6a94ecd6a8dea 100644 --- a/drivers/pinctrl/devicetree.c +++ b/drivers/pinctrl/devicetree.c @@ -220,14 +220,16 @@ int pinctrl_dt_to_map(struct pinctrl *p, struct pinctrl_dev *pctldev) for (state = 0; ; state++) { /* Retrieve the pinctrl-* property */ propname = kasprintf(GFP_KERNEL, "pinctrl-%d", state); - if (!propname) - return -ENOMEM; + if (!propname) { + ret = -ENOMEM; + goto err; + } prop = of_find_property(np, propname, &size); kfree(propname); if (!prop) { if (state == 0) { - of_node_put(np); - return -ENODEV; + ret = -ENODEV; + goto err; } break; } From dc7d7447b56bcc9cf79a9c22e4edad200a298e4c Mon Sep 17 00:00:00 2001 From: Xu Kuohai Date: Tue, 16 Apr 2024 14:42:07 +0800 Subject: [PATCH 165/606] bpf, arm64: Fix incorrect runtime stats When __bpf_prog_enter() returns zero, the arm64 register x20 that stores prog start time is not assigned to zero, causing incorrect runtime stats. To fix it, assign the return value of bpf_prog_enter() to x20 register immediately upon its return. Fixes: efc9909fdce0 ("bpf, arm64: Add bpf trampoline for arm64") Reported-by: Ivan Babrou Signed-off-by: Xu Kuohai Signed-off-by: Daniel Borkmann Tested-by: Ivan Babrou Acked-by: Daniel Borkmann Link: https://lore.kernel.org/bpf/20240416064208.2919073-2-xukuohai@huaweicloud.com --- arch/arm64/net/bpf_jit_comp.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/arch/arm64/net/bpf_jit_comp.c b/arch/arm64/net/bpf_jit_comp.c index 122021f9bdfc8..13eac43c632df 100644 --- a/arch/arm64/net/bpf_jit_comp.c +++ b/arch/arm64/net/bpf_jit_comp.c @@ -1844,15 +1844,15 @@ static void invoke_bpf_prog(struct jit_ctx *ctx, struct bpf_tramp_link *l, emit_call(enter_prog, ctx); + /* save return value to callee saved register x20 */ + emit(A64_MOV(1, A64_R(20), A64_R(0)), ctx); + /* if (__bpf_prog_enter(prog) == 0) * goto skip_exec_of_prog; */ branch = ctx->image + ctx->idx; emit(A64_NOP, ctx); - /* save return value to callee saved register x20 */ - emit(A64_MOV(1, A64_R(20), A64_R(0)), ctx); - emit(A64_ADD_I(1, A64_R(0), A64_SP, args_off), ctx); if (!p->jited) emit_addr_mov_i64(A64_R(1), (const u64)p->insnsi, ctx); From 10541b374aa05c8118cc6a529a615882e53f261b Mon Sep 17 00:00:00 2001 From: Xu Kuohai Date: Tue, 16 Apr 2024 14:42:08 +0800 Subject: [PATCH 166/606] riscv, bpf: Fix incorrect runtime stats MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When __bpf_prog_enter() returns zero, the s1 register is not set to zero, resulting in incorrect runtime stats. Fix it by setting s1 immediately upon the return of __bpf_prog_enter(). Fixes: 49b5e77ae3e2 ("riscv, bpf: Add bpf trampoline support for RV64") Signed-off-by: Xu Kuohai Signed-off-by: Daniel Borkmann Reviewed-by: Pu Lehui Acked-by: Björn Töpel Link: https://lore.kernel.org/bpf/20240416064208.2919073-3-xukuohai@huaweicloud.com --- arch/riscv/net/bpf_jit_comp64.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/arch/riscv/net/bpf_jit_comp64.c b/arch/riscv/net/bpf_jit_comp64.c index 1adf2f39ce59c..ec9d692838fca 100644 --- a/arch/riscv/net/bpf_jit_comp64.c +++ b/arch/riscv/net/bpf_jit_comp64.c @@ -722,6 +722,9 @@ static int invoke_bpf_prog(struct bpf_tramp_link *l, int args_off, int retval_of if (ret) return ret; + /* store prog start time */ + emit_mv(RV_REG_S1, RV_REG_A0, ctx); + /* if (__bpf_prog_enter(prog) == 0) * goto skip_exec_of_prog; */ @@ -729,9 +732,6 @@ static int invoke_bpf_prog(struct bpf_tramp_link *l, int args_off, int retval_of /* nop reserved for conditional jump */ emit(rv_nop(), ctx); - /* store prog start time */ - emit_mv(RV_REG_S1, RV_REG_A0, ctx); - /* arg1: &args_off */ emit_addi(RV_REG_A0, RV_REG_FP, -args_off, ctx); if (!p->jited) From feafe59c897500e11becd238a30be1c33eb188a2 Mon Sep 17 00:00:00 2001 From: Kalle Valo Date: Thu, 11 Apr 2024 19:55:15 +0300 Subject: [PATCH 167/606] wifi: ath11k: use RCU when accessing struct inet6_dev::ac_list Commit c3718936ec47 ("ipv6: anycast: complete RCU handling of struct ifacaddr6") converted struct inet6_dev::ac_list to use RCU but missed that ath11k also accesses this list. Now sparse warns: drivers/net/wireless/ath/ath11k/mac.c:9145:21: warning: incorrect type in assignment (different address spaces) drivers/net/wireless/ath/ath11k/mac.c:9145:21: expected struct ifacaddr6 *ifaca6 drivers/net/wireless/ath/ath11k/mac.c:9145:21: got struct ifacaddr6 [noderef] __rcu *ac_list drivers/net/wireless/ath/ath11k/mac.c:9145:53: warning: incorrect type in assignment (different address spaces) drivers/net/wireless/ath/ath11k/mac.c:9145:53: expected struct ifacaddr6 *ifaca6 drivers/net/wireless/ath/ath11k/mac.c:9145:53: got struct ifacaddr6 [noderef] __rcu *aca_next Fix it by using rtnl_dereference(). Also add a note that read_lock_bh() calls rcu_read_lock() which I was not aware of. Tested-on: WCN6855 hw2.0 PCI WLAN.HSP.1.1-03125-QCAHSPSWPL_V1_V2_SILICONZ_LITE-3.6510.37 Fixes: c3718936ec47 ("ipv6: anycast: complete RCU handling of struct ifacaddr6") Signed-off-by: Kalle Valo Acked-by: Jeff Johnson Signed-off-by: Kalle Valo Link: https://msgid.link/20240411165516.4070649-2-kvalo@kernel.org --- drivers/net/wireless/ath/ath11k/mac.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/net/wireless/ath/ath11k/mac.c b/drivers/net/wireless/ath/ath11k/mac.c index a6a37d67a50ad..9f4bf41a3d41e 100644 --- a/drivers/net/wireless/ath/ath11k/mac.c +++ b/drivers/net/wireless/ath/ath11k/mac.c @@ -9020,6 +9020,7 @@ static void ath11k_mac_op_ipv6_changed(struct ieee80211_hw *hw, offload = &arvif->arp_ns_offload; count = 0; + /* Note: read_lock_bh() calls rcu_read_lock() */ read_lock_bh(&idev->lock); memset(offload->ipv6_addr, 0, sizeof(offload->ipv6_addr)); @@ -9050,7 +9051,8 @@ static void ath11k_mac_op_ipv6_changed(struct ieee80211_hw *hw, } /* get anycast address */ - for (ifaca6 = idev->ac_list; ifaca6; ifaca6 = ifaca6->aca_next) { + for (ifaca6 = rcu_dereference(idev->ac_list); ifaca6; + ifaca6 = rcu_dereference(ifaca6->aca_next)) { if (count >= ATH11K_IPV6_MAX_COUNT) goto generate; From 48ef0ba12e6b77a1ce5d09c580c38855b090ae7c Mon Sep 17 00:00:00 2001 From: Ming Lei Date: Tue, 16 Apr 2024 08:56:33 +0800 Subject: [PATCH 168/606] dm: restore synchronous close of device mapper block device 'dmsetup remove' and 'dmsetup remove_all' require synchronous bdev release. Otherwise dm_lock_for_deletion() may return -EBUSY if the open count is > 0, because the open count is dropped in dm_blk_close() which occurs after fput() completes. So if dm_blk_close() is delayed because of asynchronous fput(), this device mapper device is skipped during remove, which is a regression. Fix the issue by using __fput_sync(). Also, DM device removal has long supported being made asynchronous by setting the DMF_DEFERRED_REMOVE flag on the DM device. So leverage using async fput() in close_table_device() if DMF_DEFERRED_REMOVE flag is set. Reported-by: Zhong Changhui Fixes: a28d893eb327 ("md: port block device access to file") Suggested-by: Christian Brauner Signed-off-by: Ming Lei [snitzer: editted commit header, use fput() if DMF_DEFERRED_REMOVE set] Signed-off-by: Mike Snitzer --- drivers/md/dm.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/drivers/md/dm.c b/drivers/md/dm.c index 56aa2a8b9d715..7d0746b37c8ec 100644 --- a/drivers/md/dm.c +++ b/drivers/md/dm.c @@ -765,7 +765,7 @@ static struct table_device *open_table_device(struct mapped_device *md, return td; out_blkdev_put: - fput(bdev_file); + __fput_sync(bdev_file); out_free_td: kfree(td); return ERR_PTR(r); @@ -778,7 +778,13 @@ static void close_table_device(struct table_device *td, struct mapped_device *md { if (md->disk->slave_dir) bd_unlink_disk_holder(td->dm_dev.bdev, md->disk); - fput(td->dm_dev.bdev_file); + + /* Leverage async fput() if DMF_DEFERRED_REMOVE set */ + if (unlikely(test_bit(DMF_DEFERRED_REMOVE, &md->flags))) + fput(td->dm_dev.bdev_file); + else + __fput_sync(td->dm_dev.bdev_file); + put_dax(td->dm_dev.dax_dev); list_del(&td->list); kfree(td); From 61231eb8113ce47991f35024f9c20810b37996bf Mon Sep 17 00:00:00 2001 From: Alexey Brodkin Date: Fri, 29 Mar 2024 10:36:50 +0000 Subject: [PATCH 169/606] ARC: [plat-hsdk]: Remove misplaced interrupt-cells property "gmac" node stands for just an ordinary Ethernet controller, which is by no means a provider of interrupts, i.e. it doesn't serve as an interrupt controller, thus "#interrupt-cells" property doesn't belong to it and so we remove it. Fixes: ------------>8------------ DTC arch/arc/boot/dts/hsdk.dtb arch/arc/boot/dts/hsdk.dts:207.23-235.5: Warning (interrupt_provider): /soc/ethernet@8000: '#interrupt-cells' found, but node is not an interrupt provider arch/arc/boot/dts/hsdk.dtb: Warning (interrupt_map): Failed prerequisite 'interrupt_provider' ------------>8------------ Reported-by: Vineet Gupta Signed-off-by: Alexey Brodkin Signed-off-by: Vineet Gupta --- arch/arc/boot/dts/hsdk.dts | 1 - 1 file changed, 1 deletion(-) diff --git a/arch/arc/boot/dts/hsdk.dts b/arch/arc/boot/dts/hsdk.dts index 6691f42550778..41b980df862b1 100644 --- a/arch/arc/boot/dts/hsdk.dts +++ b/arch/arc/boot/dts/hsdk.dts @@ -205,7 +205,6 @@ }; gmac: ethernet@8000 { - #interrupt-cells = <1>; compatible = "snps,dwmac"; reg = <0x8000 0x2000>; interrupts = <10>; From 7044dcff8301b29269016ebd17df27c4736140d2 Mon Sep 17 00:00:00 2001 From: Benno Lossin Date: Mon, 1 Apr 2024 18:52:50 +0000 Subject: [PATCH 170/606] rust: macros: fix soundness issue in `module!` macro MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The `module!` macro creates glue code that are called by C to initialize the Rust modules using the `Module::init` function. Part of this glue code are the local functions `__init` and `__exit` that are used to initialize/destroy the Rust module. These functions are safe and also visible to the Rust mod in which the `module!` macro is invoked. This means that they can be called by other safe Rust code. But since they contain `unsafe` blocks that rely on only being called at the right time, this is a soundness issue. Wrap these generated functions inside of two private modules, this guarantees that the public functions cannot be called from the outside. Make the safe functions `unsafe` and add SAFETY comments. Cc: stable@vger.kernel.org Reported-by: Björn Roy Baron Closes: https://github.com/Rust-for-Linux/linux/issues/629 Fixes: 1fbde52bde73 ("rust: add `macros` crate") Signed-off-by: Benno Lossin Reviewed-by: Wedson Almeida Filho Link: https://lore.kernel.org/r/20240401185222.12015-1-benno.lossin@proton.me [ Moved `THIS_MODULE` out of the private-in-private modules since it should remain public, as Dirk Behme noticed [1]. Capitalized comments, avoided newline in non-list SAFETY comments and reworded to add Reported-by and newline. ] Link: https://rust-for-linux.zulipchat.com/#narrow/stream/291565-Help/topic/x/near/433512583 [1] Signed-off-by: Miguel Ojeda --- rust/macros/module.rs | 190 +++++++++++++++++++++++++----------------- 1 file changed, 115 insertions(+), 75 deletions(-) diff --git a/rust/macros/module.rs b/rust/macros/module.rs index 27979e582e4b9..acd0393b50957 100644 --- a/rust/macros/module.rs +++ b/rust/macros/module.rs @@ -199,17 +199,6 @@ pub(crate) fn module(ts: TokenStream) -> TokenStream { /// Used by the printing macros, e.g. [`info!`]. const __LOG_PREFIX: &[u8] = b\"{name}\\0\"; - /// The \"Rust loadable module\" mark. - // - // This may be best done another way later on, e.g. as a new modinfo - // key or a new section. For the moment, keep it simple. - #[cfg(MODULE)] - #[doc(hidden)] - #[used] - static __IS_RUST_MODULE: () = (); - - static mut __MOD: Option<{type_}> = None; - // SAFETY: `__this_module` is constructed by the kernel at load time and will not be // freed until the module is unloaded. #[cfg(MODULE)] @@ -221,81 +210,132 @@ pub(crate) fn module(ts: TokenStream) -> TokenStream { kernel::ThisModule::from_ptr(core::ptr::null_mut()) }}; - // Loadable modules need to export the `{{init,cleanup}}_module` identifiers. - /// # Safety - /// - /// This function must not be called after module initialization, because it may be - /// freed after that completes. - #[cfg(MODULE)] - #[doc(hidden)] - #[no_mangle] - #[link_section = \".init.text\"] - pub unsafe extern \"C\" fn init_module() -> core::ffi::c_int {{ - __init() - }} - - #[cfg(MODULE)] - #[doc(hidden)] - #[no_mangle] - pub extern \"C\" fn cleanup_module() {{ - __exit() - }} + // Double nested modules, since then nobody can access the public items inside. + mod __module_init {{ + mod __module_init {{ + use super::super::{type_}; + + /// The \"Rust loadable module\" mark. + // + // This may be best done another way later on, e.g. as a new modinfo + // key or a new section. For the moment, keep it simple. + #[cfg(MODULE)] + #[doc(hidden)] + #[used] + static __IS_RUST_MODULE: () = (); + + static mut __MOD: Option<{type_}> = None; + + // Loadable modules need to export the `{{init,cleanup}}_module` identifiers. + /// # Safety + /// + /// This function must not be called after module initialization, because it may be + /// freed after that completes. + #[cfg(MODULE)] + #[doc(hidden)] + #[no_mangle] + #[link_section = \".init.text\"] + pub unsafe extern \"C\" fn init_module() -> core::ffi::c_int {{ + // SAFETY: This function is inaccessible to the outside due to the double + // module wrapping it. It is called exactly once by the C side via its + // unique name. + unsafe {{ __init() }} + }} - // Built-in modules are initialized through an initcall pointer - // and the identifiers need to be unique. - #[cfg(not(MODULE))] - #[cfg(not(CONFIG_HAVE_ARCH_PREL32_RELOCATIONS))] - #[doc(hidden)] - #[link_section = \"{initcall_section}\"] - #[used] - pub static __{name}_initcall: extern \"C\" fn() -> core::ffi::c_int = __{name}_init; + #[cfg(MODULE)] + #[doc(hidden)] + #[no_mangle] + pub extern \"C\" fn cleanup_module() {{ + // SAFETY: + // - This function is inaccessible to the outside due to the double + // module wrapping it. It is called exactly once by the C side via its + // unique name, + // - furthermore it is only called after `init_module` has returned `0` + // (which delegates to `__init`). + unsafe {{ __exit() }} + }} - #[cfg(not(MODULE))] - #[cfg(CONFIG_HAVE_ARCH_PREL32_RELOCATIONS)] - core::arch::global_asm!( - r#\".section \"{initcall_section}\", \"a\" - __{name}_initcall: - .long __{name}_init - . - .previous - \"# - ); + // Built-in modules are initialized through an initcall pointer + // and the identifiers need to be unique. + #[cfg(not(MODULE))] + #[cfg(not(CONFIG_HAVE_ARCH_PREL32_RELOCATIONS))] + #[doc(hidden)] + #[link_section = \"{initcall_section}\"] + #[used] + pub static __{name}_initcall: extern \"C\" fn() -> core::ffi::c_int = __{name}_init; + + #[cfg(not(MODULE))] + #[cfg(CONFIG_HAVE_ARCH_PREL32_RELOCATIONS)] + core::arch::global_asm!( + r#\".section \"{initcall_section}\", \"a\" + __{name}_initcall: + .long __{name}_init - . + .previous + \"# + ); + + #[cfg(not(MODULE))] + #[doc(hidden)] + #[no_mangle] + pub extern \"C\" fn __{name}_init() -> core::ffi::c_int {{ + // SAFETY: This function is inaccessible to the outside due to the double + // module wrapping it. It is called exactly once by the C side via its + // placement above in the initcall section. + unsafe {{ __init() }} + }} - #[cfg(not(MODULE))] - #[doc(hidden)] - #[no_mangle] - pub extern \"C\" fn __{name}_init() -> core::ffi::c_int {{ - __init() - }} + #[cfg(not(MODULE))] + #[doc(hidden)] + #[no_mangle] + pub extern \"C\" fn __{name}_exit() {{ + // SAFETY: + // - This function is inaccessible to the outside due to the double + // module wrapping it. It is called exactly once by the C side via its + // unique name, + // - furthermore it is only called after `__{name}_init` has returned `0` + // (which delegates to `__init`). + unsafe {{ __exit() }} + }} - #[cfg(not(MODULE))] - #[doc(hidden)] - #[no_mangle] - pub extern \"C\" fn __{name}_exit() {{ - __exit() - }} + /// # Safety + /// + /// This function must only be called once. + unsafe fn __init() -> core::ffi::c_int {{ + match <{type_} as kernel::Module>::init(&super::super::THIS_MODULE) {{ + Ok(m) => {{ + // SAFETY: No data race, since `__MOD` can only be accessed by this + // module and there only `__init` and `__exit` access it. These + // functions are only called once and `__exit` cannot be called + // before or during `__init`. + unsafe {{ + __MOD = Some(m); + }} + return 0; + }} + Err(e) => {{ + return e.to_errno(); + }} + }} + }} - fn __init() -> core::ffi::c_int {{ - match <{type_} as kernel::Module>::init(&THIS_MODULE) {{ - Ok(m) => {{ + /// # Safety + /// + /// This function must + /// - only be called once, + /// - be called after `__init` has been called and returned `0`. + unsafe fn __exit() {{ + // SAFETY: No data race, since `__MOD` can only be accessed by this module + // and there only `__init` and `__exit` access it. These functions are only + // called once and `__init` was already called. unsafe {{ - __MOD = Some(m); + // Invokes `drop()` on `__MOD`, which should be used for cleanup. + __MOD = None; }} - return 0; - }} - Err(e) => {{ - return e.to_errno(); }} - }} - }} - fn __exit() {{ - unsafe {{ - // Invokes `drop()` on `__MOD`, which should be used for cleanup. - __MOD = None; + {modinfo} }} }} - - {modinfo} ", type_ = info.type_, name = info.name, From 8933cf4651e02853ca679be7b2d978dfcdcc5e0c Mon Sep 17 00:00:00 2001 From: Conor Dooley Date: Thu, 4 Apr 2024 15:17:02 +0100 Subject: [PATCH 171/606] rust: make mutually exclusive with CFI_CLANG On RISC-V and arm64, and presumably x86, if CFI_CLANG is enabled, loading a rust module will trigger a kernel panic. Support for sanitisers, including kcfi (CFI_CLANG), is in the works, but for now they're nightly-only options in rustc. Make RUST depend on !CFI_CLANG to prevent configuring a kernel without symmetrical support for kfi. [ Matthew Maurer writes [1]: This patch is fine by me - the last patch needed for KCFI to be functional in Rust just landed upstream last night, so we should revisit this (in the form of enabling it) once we move to `rustc-1.79.0` or later. Ramon de C Valle also gave feedback [2] on the status of KCFI for Rust and created a tracking issue [3] in upstream Rust. - Miguel ] Fixes: 2f7ab1267dc9 ("Kbuild: add Rust support") Cc: stable@vger.kernel.org Signed-off-by: Conor Dooley Acked-by: Nathan Chancellor Link: https://lore.kernel.org/rust-for-linux/CAGSQo024u1gHJgzsO38Xg3c4or+JupoPABQx_+0BLEpPg0cOEA@mail.gmail.com/ [1] Link: https://lore.kernel.org/rust-for-linux/CAOcBZOS2kPyH0Dm7Fuh4GC3=v7nZhyzBj_-dKu3PfAnrHZvaxg@mail.gmail.com/ [2] Link: https://github.com/rust-lang/rust/issues/123479 [3] Link: https://lore.kernel.org/r/20240404-providing-emporium-e652e359c711@spud [ Added feedback from the list, links, and used Cc for the tag. ] Signed-off-by: Miguel Ojeda --- init/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/init/Kconfig b/init/Kconfig index b9a336a3d7d8f..664bedb9a71fb 100644 --- a/init/Kconfig +++ b/init/Kconfig @@ -1899,6 +1899,7 @@ config RUST bool "Rust support" depends on HAVE_RUST depends on RUST_IS_AVAILABLE + depends on !CFI_CLANG depends on !MODVERSIONS depends on !GCC_PLUGINS depends on !RANDSTRUCT From b3de7b433a323bb80303d77e69f1281bfab0a70b Mon Sep 17 00:00:00 2001 From: Rob Herring Date: Mon, 15 Apr 2024 08:11:03 -0500 Subject: [PATCH 172/606] dt-bindings: eeprom: at24: Fix ST M24C64-D compatible schema The schema for the ST M24C64-D compatible string doesn't work. Validation fails as the 'd-wl' suffix is not added to the preceeding schema which defines the entries and vendors. The actual users are incorrect as well because the vendor is listed as Atmel whereas the part is made by ST. As this part doesn't appear to have multiple vendors, move it to its own entry. Fixes: 0997ff1fc143 ("dt-bindings: at24: add ST M24C64-D Additional Write lockable page") Fixes: c761068f484c ("dt-bindings: at24: add ST M24C32-D Additional Write lockable page") Signed-off-by: Rob Herring Reviewed-by: Marek Vasut Acked-by: Conor Dooley Signed-off-by: Bartosz Golaszewski --- Documentation/devicetree/bindings/eeprom/at24.yaml | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/Documentation/devicetree/bindings/eeprom/at24.yaml b/Documentation/devicetree/bindings/eeprom/at24.yaml index 1812ef31d5f1e..3c36cd0510de8 100644 --- a/Documentation/devicetree/bindings/eeprom/at24.yaml +++ b/Documentation/devicetree/bindings/eeprom/at24.yaml @@ -68,14 +68,10 @@ properties: pattern: cs16$ - items: pattern: c32$ - - items: - pattern: c32d-wl$ - items: pattern: cs32$ - items: pattern: c64$ - - items: - pattern: c64d-wl$ - items: pattern: cs64$ - items: @@ -136,6 +132,7 @@ properties: - renesas,r1ex24128 - samsung,s524ad0xd1 - const: atmel,24c128 + - pattern: '^atmel,24c(32|64)d-wl$' # Actual vendor is st label: description: Descriptive name of the EEPROM. From 02bed83d59e37da30b745e30129511b1cc595c92 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Tue, 16 Apr 2024 17:55:02 -0400 Subject: [PATCH 173/606] bcachefs: Fix null ptr deref in twf from BCH_IOCTL_FSCK_OFFLINE We need to initialize the stdio redirects before they're used. Signed-off-by: Kent Overstreet --- fs/bcachefs/chardev.c | 4 +++- fs/bcachefs/thread_with_file.c | 15 +++++++++++++-- fs/bcachefs/thread_with_file.h | 3 +++ 3 files changed, 19 insertions(+), 3 deletions(-) diff --git a/fs/bcachefs/chardev.c b/fs/bcachefs/chardev.c index 72781aad6ba70..4d14f19f51850 100644 --- a/fs/bcachefs/chardev.c +++ b/fs/bcachefs/chardev.c @@ -232,13 +232,15 @@ static long bch2_ioctl_fsck_offline(struct bch_ioctl_fsck_offline __user *user_a /* We need request_key() to be called before we punt to kthread: */ opt_set(thr->opts, nostart, true); + bch2_thread_with_stdio_init(&thr->thr, &bch2_offline_fsck_ops); + thr->c = bch2_fs_open(devs.data, arg.nr_devs, thr->opts); if (!IS_ERR(thr->c) && thr->c->opts.errors == BCH_ON_ERROR_panic) thr->c->opts.errors = BCH_ON_ERROR_ro; - ret = bch2_run_thread_with_stdio(&thr->thr, &bch2_offline_fsck_ops); + ret = __bch2_run_thread_with_stdio(&thr->thr); out: darray_for_each(devs, i) kfree(*i); diff --git a/fs/bcachefs/thread_with_file.c b/fs/bcachefs/thread_with_file.c index 940db15d6a939..b1af7ac430f66 100644 --- a/fs/bcachefs/thread_with_file.c +++ b/fs/bcachefs/thread_with_file.c @@ -294,16 +294,27 @@ static int thread_with_stdio_fn(void *arg) return 0; } -int bch2_run_thread_with_stdio(struct thread_with_stdio *thr, - const struct thread_with_stdio_ops *ops) +void bch2_thread_with_stdio_init(struct thread_with_stdio *thr, + const struct thread_with_stdio_ops *ops) { stdio_buf_init(&thr->stdio.input); stdio_buf_init(&thr->stdio.output); thr->ops = ops; +} +int __bch2_run_thread_with_stdio(struct thread_with_stdio *thr) +{ return bch2_run_thread_with_file(&thr->thr, &thread_with_stdio_fops, thread_with_stdio_fn); } +int bch2_run_thread_with_stdio(struct thread_with_stdio *thr, + const struct thread_with_stdio_ops *ops) +{ + bch2_thread_with_stdio_init(thr, ops); + + return __bch2_run_thread_with_stdio(thr); +} + int bch2_run_thread_with_stdout(struct thread_with_stdio *thr, const struct thread_with_stdio_ops *ops) { diff --git a/fs/bcachefs/thread_with_file.h b/fs/bcachefs/thread_with_file.h index af54ea8f5b0ff..1d63d14d7dcae 100644 --- a/fs/bcachefs/thread_with_file.h +++ b/fs/bcachefs/thread_with_file.h @@ -63,6 +63,9 @@ struct thread_with_stdio { const struct thread_with_stdio_ops *ops; }; +void bch2_thread_with_stdio_init(struct thread_with_stdio *, + const struct thread_with_stdio_ops *); +int __bch2_run_thread_with_stdio(struct thread_with_stdio *); int bch2_run_thread_with_stdio(struct thread_with_stdio *, const struct thread_with_stdio_ops *); int bch2_run_thread_with_stdout(struct thread_with_stdio *, From 9fd5a48a1e3ff79ed54922668279ccb4d7190a62 Mon Sep 17 00:00:00 2001 From: Nathan Chancellor Date: Tue, 16 Apr 2024 08:16:02 -0700 Subject: [PATCH 174/606] bcachefs: Fix format specifier in validate_bset_keys() When building for 32-bit platforms, for which size_t is 'unsigned int', there is a warning from a format string in validate_bset_keys(): fs/bcachefs/btree_io.c: In function 'validate_bset_keys': fs/bcachefs/btree_io.c:891:34: error: format '%lu' expects argument of type 'long unsigned int', but argument 12 has type 'unsigned int' [-Werror=format=] 891 | "bad k->u64s %u (min %u max %lu)", k->u64s, | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ fs/bcachefs/btree_io.c:603:32: note: in definition of macro 'btree_err' 603 | msg, ##__VA_ARGS__); \ | ^~~ fs/bcachefs/btree_io.c:887:21: note: in expansion of macro 'btree_err_on' 887 | if (btree_err_on(!bkeyp_u64s_valid(&b->format, k), | ^~~~~~~~~~~~ fs/bcachefs/btree_io.c:891:64: note: format string is defined here 891 | "bad k->u64s %u (min %u max %lu)", k->u64s, | ~~^ | | | long unsigned int | %u cc1: all warnings being treated as errors BKEY_U64s is size_t so the entire expression is promoted to size_t. Use the '%zu' specifier so that there is no warning regardless of the width of size_t. Fixes: 031ad9e7dbd1 ("bcachefs: Check for packed bkeys that are too big") Reported-by: kernel test robot Closes: https://lore.kernel.org/oe-kbuild-all/202404130747.wH6Dd23p-lkp@intel.com/ Closes: https://lore.kernel.org/oe-kbuild-all/202404131536.HdAMBOVc-lkp@intel.com/ Signed-off-by: Nathan Chancellor Signed-off-by: Kent Overstreet --- fs/bcachefs/btree_io.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/bcachefs/btree_io.c b/fs/bcachefs/btree_io.c index 9678b2375bedd..debb0edc3455a 100644 --- a/fs/bcachefs/btree_io.c +++ b/fs/bcachefs/btree_io.c @@ -888,7 +888,7 @@ static int validate_bset_keys(struct bch_fs *c, struct btree *b, -BCH_ERR_btree_node_read_err_fixable, c, NULL, b, i, btree_node_bkey_bad_u64s, - "bad k->u64s %u (min %u max %lu)", k->u64s, + "bad k->u64s %u (min %u max %zu)", k->u64s, bkeyp_key_u64s(&b->format, k), U8_MAX - BKEY_U64s + bkeyp_key_u64s(&b->format, k))) goto drop_this_key; From fabb4d49854281027454b0fa305d33f6c9ec4b47 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Mon, 15 Apr 2024 22:54:10 -0400 Subject: [PATCH 175/606] bcachefs: node scan: ignore multiple nodes with same seq if interior Interior nodes are not really needed, when we have to scan - but if this pops up for leaf nodes we'll need a real heuristic. Signed-off-by: Kent Overstreet --- fs/bcachefs/btree_node_scan.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/fs/bcachefs/btree_node_scan.c b/fs/bcachefs/btree_node_scan.c index 866bd278439f8..c60794264da28 100644 --- a/fs/bcachefs/btree_node_scan.c +++ b/fs/bcachefs/btree_node_scan.c @@ -302,6 +302,8 @@ static int handle_overwrites(struct bch_fs *c, start->max_key = bpos_predecessor(n->min_key); start->range_updated = true; + } else if (n->level) { + n->overwritten = true; } else { struct printbuf buf = PRINTBUF; From 79055f50a65fe5eb58e9da1f79fb0a4f4bc82fff Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Mon, 15 Apr 2024 23:53:12 -0400 Subject: [PATCH 176/606] bcachefs: make sure to release last journal pin in replay This fixes a deadlock when journal replay has many keys to insert that were from fsck, not the journal. Signed-off-by: Kent Overstreet --- fs/bcachefs/recovery.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/fs/bcachefs/recovery.c b/fs/bcachefs/recovery.c index 0f328aba9760b..be5b476193270 100644 --- a/fs/bcachefs/recovery.c +++ b/fs/bcachefs/recovery.c @@ -249,7 +249,10 @@ int bch2_journal_replay(struct bch_fs *c) struct journal_key *k = *kp; - replay_now_at(j, k->journal_seq); + if (k->journal_seq) + replay_now_at(j, k->journal_seq); + else + replay_now_at(j, j->replay_journal_seq_end); ret = commit_do(trans, NULL, NULL, BCH_TRANS_COMMIT_no_enospc| From 8c75cdcdf869acabfdc7858827099dcde9f24e6c Mon Sep 17 00:00:00 2001 From: Felix Fietkau Date: Mon, 15 Apr 2024 14:18:11 +0200 Subject: [PATCH 177/606] wifi: mac80211: split mesh fast tx cache into local/proxied/forwarded Depending on the origin of the packets (and their SA), 802.11 + mesh headers could be filled in differently. In order to properly deal with that, add a new field to the lookup key, indicating the type (local, proxied or forwarded). This can fix spurious packet drop issues that depend on the order in which nodes/hosts communicate with each other. Fixes: d5edb9ae8d56 ("wifi: mac80211: mesh fast xmit support") Signed-off-by: Felix Fietkau Link: https://msgid.link/20240415121811.13391-1-nbd@nbd.name [use sizeof_field() for key_len] Signed-off-by: Johannes Berg --- net/mac80211/mesh.c | 8 +++++++- net/mac80211/mesh.h | 36 +++++++++++++++++++++++++++++++++--- net/mac80211/mesh_pathtbl.c | 31 ++++++++++++++++++++++--------- net/mac80211/rx.c | 13 ++++++++++--- 4 files changed, 72 insertions(+), 16 deletions(-) diff --git a/net/mac80211/mesh.c b/net/mac80211/mesh.c index 32475da98d739..cbc9b5e40cb35 100644 --- a/net/mac80211/mesh.c +++ b/net/mac80211/mesh.c @@ -747,6 +747,9 @@ bool ieee80211_mesh_xmit_fast(struct ieee80211_sub_if_data *sdata, struct sk_buff *skb, u32 ctrl_flags) { struct ieee80211_if_mesh *ifmsh = &sdata->u.mesh; + struct ieee80211_mesh_fast_tx_key key = { + .type = MESH_FAST_TX_TYPE_LOCAL + }; struct ieee80211_mesh_fast_tx *entry; struct ieee80211s_hdr *meshhdr; u8 sa[ETH_ALEN] __aligned(2); @@ -782,7 +785,10 @@ bool ieee80211_mesh_xmit_fast(struct ieee80211_sub_if_data *sdata, return false; } - entry = mesh_fast_tx_get(sdata, skb->data); + ether_addr_copy(key.addr, skb->data); + if (!ether_addr_equal(skb->data + ETH_ALEN, sdata->vif.addr)) + key.type = MESH_FAST_TX_TYPE_PROXIED; + entry = mesh_fast_tx_get(sdata, &key); if (!entry) return false; diff --git a/net/mac80211/mesh.h b/net/mac80211/mesh.h index d913ce7ba72ef..3f9664e4e00c6 100644 --- a/net/mac80211/mesh.h +++ b/net/mac80211/mesh.h @@ -134,10 +134,39 @@ struct mesh_path { #define MESH_FAST_TX_CACHE_THRESHOLD_SIZE 384 #define MESH_FAST_TX_CACHE_TIMEOUT 8000 /* msecs */ +/** + * enum ieee80211_mesh_fast_tx_type - cached mesh fast tx entry type + * + * @MESH_FAST_TX_TYPE_LOCAL: tx from the local vif address as SA + * @MESH_FAST_TX_TYPE_PROXIED: local tx with a different SA (e.g. bridged) + * @MESH_FAST_TX_TYPE_FORWARDED: forwarded from a different mesh point + * @NUM_MESH_FAST_TX_TYPE: number of entry types + */ +enum ieee80211_mesh_fast_tx_type { + MESH_FAST_TX_TYPE_LOCAL, + MESH_FAST_TX_TYPE_PROXIED, + MESH_FAST_TX_TYPE_FORWARDED, + + /* must be last */ + NUM_MESH_FAST_TX_TYPE +}; + + +/** + * struct ieee80211_mesh_fast_tx_key - cached mesh fast tx entry key + * + * @addr: The Ethernet DA for this entry + * @type: cache entry type + */ +struct ieee80211_mesh_fast_tx_key { + u8 addr[ETH_ALEN] __aligned(2); + u16 type; +}; + /** * struct ieee80211_mesh_fast_tx - cached mesh fast tx entry * @rhash: rhashtable pointer - * @addr_key: The Ethernet DA which is the key for this entry + * @key: the lookup key for this cache entry * @fast_tx: base fast_tx data * @hdr: cached mesh and rfc1042 headers * @hdrlen: length of mesh + rfc1042 @@ -148,7 +177,7 @@ struct mesh_path { */ struct ieee80211_mesh_fast_tx { struct rhash_head rhash; - u8 addr_key[ETH_ALEN] __aligned(2); + struct ieee80211_mesh_fast_tx_key key; struct ieee80211_fast_tx fast_tx; u8 hdr[sizeof(struct ieee80211s_hdr) + sizeof(rfc1042_header)]; @@ -334,7 +363,8 @@ void mesh_path_tx_root_frame(struct ieee80211_sub_if_data *sdata); bool mesh_action_is_path_sel(struct ieee80211_mgmt *mgmt); struct ieee80211_mesh_fast_tx * -mesh_fast_tx_get(struct ieee80211_sub_if_data *sdata, const u8 *addr); +mesh_fast_tx_get(struct ieee80211_sub_if_data *sdata, + struct ieee80211_mesh_fast_tx_key *key); bool ieee80211_mesh_xmit_fast(struct ieee80211_sub_if_data *sdata, struct sk_buff *skb, u32 ctrl_flags); void mesh_fast_tx_cache(struct ieee80211_sub_if_data *sdata, diff --git a/net/mac80211/mesh_pathtbl.c b/net/mac80211/mesh_pathtbl.c index 91b55d6a68b97..a6b62169f0848 100644 --- a/net/mac80211/mesh_pathtbl.c +++ b/net/mac80211/mesh_pathtbl.c @@ -37,8 +37,8 @@ static const struct rhashtable_params mesh_rht_params = { static const struct rhashtable_params fast_tx_rht_params = { .nelem_hint = 10, .automatic_shrinking = true, - .key_len = ETH_ALEN, - .key_offset = offsetof(struct ieee80211_mesh_fast_tx, addr_key), + .key_len = sizeof_field(struct ieee80211_mesh_fast_tx, key), + .key_offset = offsetof(struct ieee80211_mesh_fast_tx, key), .head_offset = offsetof(struct ieee80211_mesh_fast_tx, rhash), .hashfn = mesh_table_hash, }; @@ -431,20 +431,21 @@ static void mesh_fast_tx_entry_free(struct mesh_tx_cache *cache, } struct ieee80211_mesh_fast_tx * -mesh_fast_tx_get(struct ieee80211_sub_if_data *sdata, const u8 *addr) +mesh_fast_tx_get(struct ieee80211_sub_if_data *sdata, + struct ieee80211_mesh_fast_tx_key *key) { struct ieee80211_mesh_fast_tx *entry; struct mesh_tx_cache *cache; cache = &sdata->u.mesh.tx_cache; - entry = rhashtable_lookup(&cache->rht, addr, fast_tx_rht_params); + entry = rhashtable_lookup(&cache->rht, key, fast_tx_rht_params); if (!entry) return NULL; if (!(entry->mpath->flags & MESH_PATH_ACTIVE) || mpath_expired(entry->mpath)) { spin_lock_bh(&cache->walk_lock); - entry = rhashtable_lookup(&cache->rht, addr, fast_tx_rht_params); + entry = rhashtable_lookup(&cache->rht, key, fast_tx_rht_params); if (entry) mesh_fast_tx_entry_free(cache, entry); spin_unlock_bh(&cache->walk_lock); @@ -489,18 +490,24 @@ void mesh_fast_tx_cache(struct ieee80211_sub_if_data *sdata, if (!sta) return; + build.key.type = MESH_FAST_TX_TYPE_LOCAL; if ((meshhdr->flags & MESH_FLAGS_AE) == MESH_FLAGS_AE_A5_A6) { /* This is required to keep the mppath alive */ mppath = mpp_path_lookup(sdata, meshhdr->eaddr1); if (!mppath) return; build.mppath = mppath; + if (!ether_addr_equal(meshhdr->eaddr2, sdata->vif.addr)) + build.key.type = MESH_FAST_TX_TYPE_PROXIED; } else if (ieee80211_has_a4(hdr->frame_control)) { mppath = mpath; } else { return; } + if (!ether_addr_equal(hdr->addr4, sdata->vif.addr)) + build.key.type = MESH_FAST_TX_TYPE_FORWARDED; + /* rate limit, in case fast xmit can't be enabled */ if (mppath->fast_tx_check == jiffies) return; @@ -547,7 +554,7 @@ void mesh_fast_tx_cache(struct ieee80211_sub_if_data *sdata, } } - memcpy(build.addr_key, mppath->dst, ETH_ALEN); + memcpy(build.key.addr, mppath->dst, ETH_ALEN); build.timestamp = jiffies; build.fast_tx.band = info->band; build.fast_tx.da_offs = offsetof(struct ieee80211_hdr, addr3); @@ -646,12 +653,18 @@ void mesh_fast_tx_flush_addr(struct ieee80211_sub_if_data *sdata, const u8 *addr) { struct mesh_tx_cache *cache = &sdata->u.mesh.tx_cache; + struct ieee80211_mesh_fast_tx_key key = {}; struct ieee80211_mesh_fast_tx *entry; + int i; + ether_addr_copy(key.addr, addr); spin_lock_bh(&cache->walk_lock); - entry = rhashtable_lookup_fast(&cache->rht, addr, fast_tx_rht_params); - if (entry) - mesh_fast_tx_entry_free(cache, entry); + for (i = 0; i < NUM_MESH_FAST_TX_TYPE; i++) { + key.type = i; + entry = rhashtable_lookup_fast(&cache->rht, &key, fast_tx_rht_params); + if (entry) + mesh_fast_tx_entry_free(cache, entry); + } spin_unlock_bh(&cache->walk_lock); } diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c index 685185dc04f97..6e24864f9a40b 100644 --- a/net/mac80211/rx.c +++ b/net/mac80211/rx.c @@ -2763,7 +2763,10 @@ ieee80211_rx_mesh_fast_forward(struct ieee80211_sub_if_data *sdata, struct sk_buff *skb, int hdrlen) { struct ieee80211_if_mesh *ifmsh = &sdata->u.mesh; - struct ieee80211_mesh_fast_tx *entry = NULL; + struct ieee80211_mesh_fast_tx_key key = { + .type = MESH_FAST_TX_TYPE_FORWARDED + }; + struct ieee80211_mesh_fast_tx *entry; struct ieee80211s_hdr *mesh_hdr; struct tid_ampdu_tx *tid_tx; struct sta_info *sta; @@ -2772,9 +2775,13 @@ ieee80211_rx_mesh_fast_forward(struct ieee80211_sub_if_data *sdata, mesh_hdr = (struct ieee80211s_hdr *)(skb->data + sizeof(eth)); if ((mesh_hdr->flags & MESH_FLAGS_AE) == MESH_FLAGS_AE_A5_A6) - entry = mesh_fast_tx_get(sdata, mesh_hdr->eaddr1); + ether_addr_copy(key.addr, mesh_hdr->eaddr1); else if (!(mesh_hdr->flags & MESH_FLAGS_AE)) - entry = mesh_fast_tx_get(sdata, skb->data); + ether_addr_copy(key.addr, skb->data); + else + return false; + + entry = mesh_fast_tx_get(sdata, &key); if (!entry) return false; From dbfff5bf9292714f02ace002fea8ce6599ea1145 Mon Sep 17 00:00:00 2001 From: Avraham Stern Date: Mon, 15 Apr 2024 11:54:43 +0300 Subject: [PATCH 178/606] wifi: iwlwifi: mvm: remove old PASN station when adding a new one If a PASN station is added, and an old PASN station already exists for the same mac address, remove the old station before adding the new one. Keeping the old station caueses old security context to be used in measurements. Fixes: 0739a7d70e00 ("iwlwifi: mvm: initiator: add option for adding a PASN responder") Signed-off-by: Avraham Stern Signed-off-by: Miri Korenblit Link: https://msgid.link/20240415114847.ef3544a416f2.I4e8c7c8ca22737f4f908ae5cd4fc0b920c703dd3@changeid Signed-off-by: Johannes Berg --- drivers/net/wireless/intel/iwlwifi/mvm/ftm-initiator.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/ftm-initiator.c b/drivers/net/wireless/intel/iwlwifi/mvm/ftm-initiator.c index 4863a3c746406..d84d7e955bb02 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/ftm-initiator.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/ftm-initiator.c @@ -53,6 +53,8 @@ int iwl_mvm_ftm_add_pasn_sta(struct iwl_mvm *mvm, struct ieee80211_vif *vif, if (!pasn) return -ENOBUFS; + iwl_mvm_ftm_remove_pasn_sta(mvm, addr); + pasn->cipher = iwl_mvm_cipher_to_location_cipher(cipher); switch (pasn->cipher) { From bada85a3f584763deadd201147778c3e791d279c Mon Sep 17 00:00:00 2001 From: Miri Korenblit Date: Mon, 15 Apr 2024 11:54:44 +0300 Subject: [PATCH 179/606] wifi: iwlwifi: mvm: return uid from iwl_mvm_build_scan_cmd This function is supposed to return a uid on success, and an errno in failure. But it currently returns the return value of the specific cmd version handler, which in turn returns 0 on success and errno otherwise. This means that on success, iwl_mvm_build_scan_cmd will return 0 regardless if the actual uid. Fix this by returning the uid if the handler succeeded. Fixes: 687db6ff5b70 ("iwlwifi: scan: make new scan req versioning flow") Signed-off-by: Miri Korenblit Reviewed-by: Ilan Peer Link: https://msgid.link/20240415114847.5e2d602b3190.I4c4931021be74a67a869384c8f8ee7463e0c7857@changeid Signed-off-by: Johannes Berg --- drivers/net/wireless/intel/iwlwifi/mvm/scan.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/scan.c b/drivers/net/wireless/intel/iwlwifi/mvm/scan.c index f3e3986b4c72f..11559563ae381 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/scan.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/scan.c @@ -2813,7 +2813,8 @@ static int iwl_mvm_build_scan_cmd(struct iwl_mvm *mvm, if (ver_handler->version != scan_ver) continue; - return ver_handler->handler(mvm, vif, params, type, uid); + err = ver_handler->handler(mvm, vif, params, type, uid); + return err ? : uid; } err = iwl_mvm_scan_umac(mvm, vif, params, type, uid); From 9a039db9273b44427b3daca88173e57596545ec0 Mon Sep 17 00:00:00 2001 From: Peter Ujfalusi Date: Wed, 17 Apr 2024 10:58:04 +0300 Subject: [PATCH 180/606] ASoC: SOF: Core: Handle error returned by sof_select_ipc_and_paths The patch which fixed the missing remove_late() calls missed a case when sof_select_ipc_and_paths() could return with error and in this case sof_init_environment() would just return with 0. Do not ignore the error code returned by sof_select_ipc_and_paths(). Fixes: 90f8917e7a15 ("ASoC: SOF: Core: Add remove_late() to sof_init_environment failure path") Signed-off-by: Peter Ujfalusi Reviewed-by: Pierre-Louis Bossart Link: https://lore.kernel.org/r/20240417075804.10829-1-peter.ujfalusi@linux.intel.com Signed-off-by: Mark Brown --- sound/soc/sof/core.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/sound/soc/sof/core.c b/sound/soc/sof/core.c index cc84d4c81be9d..238bda5f6b76f 100644 --- a/sound/soc/sof/core.c +++ b/sound/soc/sof/core.c @@ -350,7 +350,9 @@ static int sof_init_environment(struct snd_sof_dev *sdev) } ret = sof_select_ipc_and_paths(sdev); - if (!ret && plat_data->ipc_type != base_profile->ipc_type) { + if (ret) { + goto err_machine_check; + } else if (plat_data->ipc_type != base_profile->ipc_type) { /* IPC type changed, re-initialize the ops */ sof_ops_free(sdev); From 93b36e1d3748c352a70c69aa378715e6572e51d1 Mon Sep 17 00:00:00 2001 From: Rob Herring Date: Fri, 12 Apr 2024 15:44:04 -0500 Subject: [PATCH 181/606] arm64: dts: rockchip: Fix USB interface compatible string on kobol-helios64 The correct compatible string for a USB interface node begins with "usbif", not "usb". Fix the Rockchip RK3399 based Kobol Helios64 board. Signed-off-by: Rob Herring Link: https://lore.kernel.org/r/20240412204405.3703638-1-robh@kernel.org Signed-off-by: Heiko Stuebner --- arch/arm64/boot/dts/rockchip/rk3399-kobol-helios64.dts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/boot/dts/rockchip/rk3399-kobol-helios64.dts b/arch/arm64/boot/dts/rockchip/rk3399-kobol-helios64.dts index dfb2a0bdea5b7..9586bb12a5d8f 100644 --- a/arch/arm64/boot/dts/rockchip/rk3399-kobol-helios64.dts +++ b/arch/arm64/boot/dts/rockchip/rk3399-kobol-helios64.dts @@ -611,7 +611,7 @@ #size-cells = <0>; interface@0 { /* interface 0 of configuration 1 */ - compatible = "usbbda,8156.config1.0"; + compatible = "usbifbda,8156.config1.0"; reg = <0 1>; }; }; From fa845c73497f5e9d2f6f1cf48c3aad05c2fdacb8 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Tue, 16 Apr 2024 19:16:45 -0400 Subject: [PATCH 182/606] bcachefs: Fix bch2_dev_btree_bitmap_marked_sectors() shift Fixes: 27c15ed297cb bcachefs: bch_member.btree_allocated_bitmap Signed-off-by: Kent Overstreet --- fs/bcachefs/sb-members.c | 4 ++-- fs/bcachefs/sb-members.h | 6 +++--- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/fs/bcachefs/sb-members.c b/fs/bcachefs/sb-members.c index 522a969345e52..5b8e621ac5eb5 100644 --- a/fs/bcachefs/sb-members.c +++ b/fs/bcachefs/sb-members.c @@ -463,8 +463,8 @@ static void __bch2_dev_btree_bitmap_mark(struct bch_sb_field_members_v2 *mi, uns m->btree_bitmap_shift += resize; } - for (unsigned bit = sectors >> m->btree_bitmap_shift; - bit << m->btree_bitmap_shift < end; + for (unsigned bit = start >> m->btree_bitmap_shift; + (u64) bit << m->btree_bitmap_shift < end; bit++) bitmap |= BIT_ULL(bit); diff --git a/fs/bcachefs/sb-members.h b/fs/bcachefs/sb-members.h index b27c3e4467cf2..5efa64eca5f85 100644 --- a/fs/bcachefs/sb-members.h +++ b/fs/bcachefs/sb-members.h @@ -235,11 +235,11 @@ static inline bool bch2_dev_btree_bitmap_marked_sectors(struct bch_dev *ca, u64 { u64 end = start + sectors; - if (end > 64 << ca->mi.btree_bitmap_shift) + if (end > 64ULL << ca->mi.btree_bitmap_shift) return false; - for (unsigned bit = sectors >> ca->mi.btree_bitmap_shift; - bit << ca->mi.btree_bitmap_shift < end; + for (unsigned bit = start >> ca->mi.btree_bitmap_shift; + (u64) bit << ca->mi.btree_bitmap_shift < end; bit++) if (!(ca->mi.btree_allocated_bitmap & BIT_ULL(bit))) return false; From 605109ff5e43addefdf92d1cfa2a693114430024 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Wed, 17 Apr 2024 02:04:23 -0400 Subject: [PATCH 183/606] bcachefs: KEY_TYPE_error is allowed for reflink KEY_TYPE_error is left behind when we have to delete all pointers in an extent in fsck; it allows errors to be correctly returned by reads later. Signed-off-by: Kent Overstreet --- fs/bcachefs/bcachefs_format.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/fs/bcachefs/bcachefs_format.h b/fs/bcachefs/bcachefs_format.h index 085987435a5ea..f7fbfccd2b1e4 100644 --- a/fs/bcachefs/bcachefs_format.h +++ b/fs/bcachefs/bcachefs_format.h @@ -1504,7 +1504,8 @@ enum btree_id_flags { BIT_ULL(KEY_TYPE_stripe)) \ x(reflink, 7, BTREE_ID_EXTENTS|BTREE_ID_DATA, \ BIT_ULL(KEY_TYPE_reflink_v)| \ - BIT_ULL(KEY_TYPE_indirect_inline_data)) \ + BIT_ULL(KEY_TYPE_indirect_inline_data)| \ + BIT_ULL(KEY_TYPE_error)) \ x(subvolumes, 8, 0, \ BIT_ULL(KEY_TYPE_subvolume)) \ x(snapshots, 9, 0, \ From 719aec84b106ba3bd3639eddb2be46c510ef683a Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Wed, 17 Apr 2024 02:17:21 -0400 Subject: [PATCH 184/606] bcachefs: fix leak in bch2_gc_write_reflink_key Signed-off-by: Kent Overstreet --- fs/bcachefs/btree_gc.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/fs/bcachefs/btree_gc.c b/fs/bcachefs/btree_gc.c index ecbd9598f69fd..791470b0c6545 100644 --- a/fs/bcachefs/btree_gc.c +++ b/fs/bcachefs/btree_gc.c @@ -1587,7 +1587,7 @@ static int bch2_gc_write_reflink_key(struct btree_trans *trans, struct bkey_i *new = bch2_bkey_make_mut_noupdate(trans, k); ret = PTR_ERR_OR_ZERO(new); if (ret) - return ret; + goto out; if (!r->refcount) new->k.type = KEY_TYPE_deleted; @@ -1595,6 +1595,7 @@ static int bch2_gc_write_reflink_key(struct btree_trans *trans, *bkey_refcount(bkey_i_to_s(new)) = cpu_to_le64(r->refcount); ret = bch2_trans_update(trans, iter, new, 0); } +out: fsck_err: printbuf_exit(&buf); return ret; From 0389c09b2fb702ca7924ddf550ce0c8af708b8be Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Wed, 17 Apr 2024 17:27:43 -0400 Subject: [PATCH 185/606] bcachefs: Fix bio alloc in check_extent_checksum() if the buffer is virtually mapped it won't be a single bvec Signed-off-by: Kent Overstreet --- fs/bcachefs/backpointers.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/bcachefs/backpointers.c b/fs/bcachefs/backpointers.c index fadb1078903d2..a200442010025 100644 --- a/fs/bcachefs/backpointers.c +++ b/fs/bcachefs/backpointers.c @@ -470,7 +470,7 @@ static int check_extent_checksum(struct btree_trans *trans, goto err; } - bio = bio_alloc(ca->disk_sb.bdev, 1, REQ_OP_READ, GFP_KERNEL); + bio = bio_alloc(ca->disk_sb.bdev, buf_pages(data_buf, bytes), REQ_OP_READ, GFP_KERNEL); bio->bi_iter.bi_sector = p.ptr.offset; bch2_bio_map(bio, data_buf, bytes); ret = submit_bio_wait(bio); From e5a78fdec0114266d3c47df413d2d7955807fad9 Mon Sep 17 00:00:00 2001 From: David Sterba Date: Tue, 24 Jan 2023 20:44:38 +0100 Subject: [PATCH 186/606] btrfs: remove colon from messages with state The message format in syslog is usually made of two parts: prefix ":" message Various tools parse the prefix up to the first ":". When there's an additional status of a btrfs filesystem like [5.199782] BTRFS info (device nvme1n1p1: state M): use zstd compression, level 9 where 'M' is for remount, there's one more ":" that does not conform to the format. Remove it. Reviewed-by: Anand Jain Signed-off-by: David Sterba --- fs/btrfs/messages.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/btrfs/messages.c b/fs/btrfs/messages.c index c96dd66fd0f72..210d9c82e2ae0 100644 --- a/fs/btrfs/messages.c +++ b/fs/btrfs/messages.c @@ -7,7 +7,7 @@ #ifdef CONFIG_PRINTK -#define STATE_STRING_PREFACE ": state " +#define STATE_STRING_PREFACE " state " #define STATE_STRING_BUF_LEN (sizeof(STATE_STRING_PREFACE) + BTRFS_FS_STATE_COUNT + 1) /* From 7192833c4e55b26e8f15ef58577867a1bc808036 Mon Sep 17 00:00:00 2001 From: Naohiro Aota Date: Tue, 9 Apr 2024 23:18:52 +0900 Subject: [PATCH 187/606] btrfs: scrub: run relocation repair when/only needed When btrfs scrub finds an error, it reads mirrors to find correct data. If all the errors are fixed, sctx->error_bitmap is cleared for the stripe range. However, in the zoned mode, it runs relocation to repair scrub errors when the bitmap is *not* empty, which is a flipped condition. Also, it runs the relocation even if the scrub is read-only. This was missed by a fix in commit 1f2030ff6e49 ("btrfs: scrub: respect the read-only flag during repair"). The repair is only necessary when there is a repaired sector and should be done on read-write scrub. So, tweak the condition for both regular and zoned case. Fixes: 54765392a1b9 ("btrfs: scrub: introduce helper to queue a stripe for scrub") Fixes: 1f2030ff6e49 ("btrfs: scrub: respect the read-only flag during repair") CC: stable@vger.kernel.org # 6.6+ Reviewed-by: Qu Wenruo Reviewed-by: Johannes Thumshirn Signed-off-by: Naohiro Aota Signed-off-by: David Sterba --- fs/btrfs/scrub.c | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/fs/btrfs/scrub.c b/fs/btrfs/scrub.c index fa25004ab04e7..4b22cfe9a98cb 100644 --- a/fs/btrfs/scrub.c +++ b/fs/btrfs/scrub.c @@ -1012,6 +1012,7 @@ static void scrub_stripe_read_repair_worker(struct work_struct *work) struct btrfs_fs_info *fs_info = sctx->fs_info; int num_copies = btrfs_num_copies(fs_info, stripe->bg->start, stripe->bg->length); + unsigned long repaired; int mirror; int i; @@ -1078,16 +1079,15 @@ static void scrub_stripe_read_repair_worker(struct work_struct *work) * Submit the repaired sectors. For zoned case, we cannot do repair * in-place, but queue the bg to be relocated. */ - if (btrfs_is_zoned(fs_info)) { - if (!bitmap_empty(&stripe->error_bitmap, stripe->nr_sectors)) + bitmap_andnot(&repaired, &stripe->init_error_bitmap, &stripe->error_bitmap, + stripe->nr_sectors); + if (!sctx->readonly && !bitmap_empty(&repaired, stripe->nr_sectors)) { + if (btrfs_is_zoned(fs_info)) { btrfs_repair_one_zone(fs_info, sctx->stripes[0].bg->start); - } else if (!sctx->readonly) { - unsigned long repaired; - - bitmap_andnot(&repaired, &stripe->init_error_bitmap, - &stripe->error_bitmap, stripe->nr_sectors); - scrub_write_sectors(sctx, stripe, repaired, false); - wait_scrub_stripe_io(stripe); + } else { + scrub_write_sectors(sctx, stripe, repaired, false); + wait_scrub_stripe_io(stripe); + } } scrub_stripe_report_errors(sctx, stripe); From 131a821a243f89be312ced9e62ccc37b2cf3846c Mon Sep 17 00:00:00 2001 From: Sweet Tea Dorminy Date: Sat, 6 Apr 2024 04:45:02 -0400 Subject: [PATCH 188/606] btrfs: fallback if compressed IO fails for ENOSPC In commit b4ccace878f4 ("btrfs: refactor submit_compressed_extents()"), if an async extent compressed but failed to find enough space, we changed from falling back to an uncompressed write to just failing the write altogether. The principle was that if there's not enough space to write the compressed version of the data, there can't possibly be enough space to write the larger, uncompressed version of the data. However, this isn't necessarily true: due to fragmentation, there could be enough discontiguous free blocks to write the uncompressed version, but not enough contiguous free blocks to write the smaller but unsplittable compressed version. This has occurred to an internal workload which relied on write()'s return value indicating there was space. While rare, it has happened a few times. Thus, in order to prevent early ENOSPC, re-add a fallback to uncompressed writing. Fixes: b4ccace878f4 ("btrfs: refactor submit_compressed_extents()") CC: stable@vger.kernel.org # 6.1+ Reviewed-by: Qu Wenruo Co-developed-by: Neal Gompa Signed-off-by: Neal Gompa Signed-off-by: Sweet Tea Dorminy Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/inode.c | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index c65fe5de40220..7fed887e700c4 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -1145,13 +1145,13 @@ static void submit_one_async_extent(struct async_chunk *async_chunk, 0, *alloc_hint, &ins, 1, 1); if (ret) { /* - * Here we used to try again by going back to non-compressed - * path for ENOSPC. But we can't reserve space even for - * compressed size, how could it work for uncompressed size - * which requires larger size? So here we directly go error - * path. + * We can't reserve contiguous space for the compressed size. + * Unlikely, but it's possible that we could have enough + * non-contiguous space for the uncompressed size instead. So + * fall back to uncompressed. */ - goto out_free; + submit_uncompressed_range(inode, async_extent, locked_page); + goto done; } /* Here we're doing allocation and writeback of the compressed pages */ @@ -1203,7 +1203,6 @@ static void submit_one_async_extent(struct async_chunk *async_chunk, out_free_reserve: btrfs_dec_block_group_reservations(fs_info, ins.objectid); btrfs_free_reserved_extent(fs_info, ins.objectid, ins.offset, 1); -out_free: mapping_set_error(inode->vfs_inode.i_mapping, -EIO); extent_clear_unlock_delalloc(inode, start, end, NULL, EXTENT_LOCKED | EXTENT_DELALLOC | From 6aff4c26ed677b1f464f721fbd3e7767f24a684d Mon Sep 17 00:00:00 2001 From: Patrik Jakobsson Date: Mon, 15 Apr 2024 13:27:31 +0200 Subject: [PATCH 189/606] drm/gma500: Remove lid code Due to a change in the order of initialization, the lid timer got started before proper setup was made. This resulted in a crash during boot. The lid switch is handled by gma500 through a timer that periodically polls the opregion for changes. These types of ACPI events shouldn't be handled by the graphics driver so let's get rid of the lid code. This fixes the crash during boot. Reported-by: Enrico Bartky Fixes: 8f1aaccb04b7 ("drm/gma500: Implement client-based fbdev emulation") Tested-by: Enrico Bartky Signed-off-by: Patrik Jakobsson Reviewed-by: Thomas Zimmermann Signed-off-by: Thomas Zimmermann Link: https://patchwork.freedesktop.org/patch/msgid/20240415112731.31841-1-patrik.r.jakobsson@gmail.com --- drivers/gpu/drm/gma500/Makefile | 1 - drivers/gpu/drm/gma500/psb_device.c | 5 +- drivers/gpu/drm/gma500/psb_drv.h | 9 ---- drivers/gpu/drm/gma500/psb_lid.c | 80 ----------------------------- 4 files changed, 1 insertion(+), 94 deletions(-) delete mode 100644 drivers/gpu/drm/gma500/psb_lid.c diff --git a/drivers/gpu/drm/gma500/Makefile b/drivers/gpu/drm/gma500/Makefile index 4f302cd5e1a6c..58fed80c7392a 100644 --- a/drivers/gpu/drm/gma500/Makefile +++ b/drivers/gpu/drm/gma500/Makefile @@ -34,7 +34,6 @@ gma500_gfx-y += \ psb_intel_lvds.o \ psb_intel_modes.o \ psb_intel_sdvo.o \ - psb_lid.o \ psb_irq.o gma500_gfx-$(CONFIG_ACPI) += opregion.o diff --git a/drivers/gpu/drm/gma500/psb_device.c b/drivers/gpu/drm/gma500/psb_device.c index dcfcd7b89d4a1..6dece8f0e380f 100644 --- a/drivers/gpu/drm/gma500/psb_device.c +++ b/drivers/gpu/drm/gma500/psb_device.c @@ -73,8 +73,7 @@ static int psb_backlight_setup(struct drm_device *dev) } psb_intel_lvds_set_brightness(dev, PSB_MAX_BRIGHTNESS); - /* This must occur after the backlight is properly initialised */ - psb_lid_timer_init(dev_priv); + return 0; } @@ -259,8 +258,6 @@ static int psb_chip_setup(struct drm_device *dev) static void psb_chip_teardown(struct drm_device *dev) { - struct drm_psb_private *dev_priv = to_drm_psb_private(dev); - psb_lid_timer_takedown(dev_priv); gma_intel_teardown_gmbus(dev); } diff --git a/drivers/gpu/drm/gma500/psb_drv.h b/drivers/gpu/drm/gma500/psb_drv.h index c5edfa4aa4ccd..83c17689c454f 100644 --- a/drivers/gpu/drm/gma500/psb_drv.h +++ b/drivers/gpu/drm/gma500/psb_drv.h @@ -162,7 +162,6 @@ #define PSB_NUM_VBLANKS 2 #define PSB_WATCHDOG_DELAY (HZ * 2) -#define PSB_LID_DELAY (HZ / 10) #define PSB_MAX_BRIGHTNESS 100 @@ -491,11 +490,7 @@ struct drm_psb_private { /* Hotplug handling */ struct work_struct hotplug_work; - /* LID-Switch */ - spinlock_t lid_lock; - struct timer_list lid_timer; struct psb_intel_opregion opregion; - u32 lid_last_state; /* Watchdog */ uint32_t apm_reg; @@ -591,10 +586,6 @@ struct psb_ops { int i2c_bus; /* I2C bus identifier for Moorestown */ }; -/* psb_lid.c */ -extern void psb_lid_timer_init(struct drm_psb_private *dev_priv); -extern void psb_lid_timer_takedown(struct drm_psb_private *dev_priv); - /* modesetting */ extern void psb_modeset_init(struct drm_device *dev); extern void psb_modeset_cleanup(struct drm_device *dev); diff --git a/drivers/gpu/drm/gma500/psb_lid.c b/drivers/gpu/drm/gma500/psb_lid.c deleted file mode 100644 index 58a7fe3926360..0000000000000 --- a/drivers/gpu/drm/gma500/psb_lid.c +++ /dev/null @@ -1,80 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-only -/************************************************************************** - * Copyright (c) 2007, Intel Corporation. - * - * Authors: Thomas Hellstrom - **************************************************************************/ - -#include - -#include "psb_drv.h" -#include "psb_intel_reg.h" -#include "psb_reg.h" - -static void psb_lid_timer_func(struct timer_list *t) -{ - struct drm_psb_private *dev_priv = from_timer(dev_priv, t, lid_timer); - struct drm_device *dev = (struct drm_device *)&dev_priv->dev; - struct timer_list *lid_timer = &dev_priv->lid_timer; - unsigned long irq_flags; - u32 __iomem *lid_state = dev_priv->opregion.lid_state; - u32 pp_status; - - if (readl(lid_state) == dev_priv->lid_last_state) - goto lid_timer_schedule; - - if ((readl(lid_state)) & 0x01) { - /*lid state is open*/ - REG_WRITE(PP_CONTROL, REG_READ(PP_CONTROL) | POWER_TARGET_ON); - do { - pp_status = REG_READ(PP_STATUS); - } while ((pp_status & PP_ON) == 0 && - (pp_status & PP_SEQUENCE_MASK) != 0); - - if (REG_READ(PP_STATUS) & PP_ON) { - /*FIXME: should be backlight level before*/ - psb_intel_lvds_set_brightness(dev, 100); - } else { - DRM_DEBUG("LVDS panel never powered up"); - return; - } - } else { - psb_intel_lvds_set_brightness(dev, 0); - - REG_WRITE(PP_CONTROL, REG_READ(PP_CONTROL) & ~POWER_TARGET_ON); - do { - pp_status = REG_READ(PP_STATUS); - } while ((pp_status & PP_ON) == 0); - } - dev_priv->lid_last_state = readl(lid_state); - -lid_timer_schedule: - spin_lock_irqsave(&dev_priv->lid_lock, irq_flags); - if (!timer_pending(lid_timer)) { - lid_timer->expires = jiffies + PSB_LID_DELAY; - add_timer(lid_timer); - } - spin_unlock_irqrestore(&dev_priv->lid_lock, irq_flags); -} - -void psb_lid_timer_init(struct drm_psb_private *dev_priv) -{ - struct timer_list *lid_timer = &dev_priv->lid_timer; - unsigned long irq_flags; - - spin_lock_init(&dev_priv->lid_lock); - spin_lock_irqsave(&dev_priv->lid_lock, irq_flags); - - timer_setup(lid_timer, psb_lid_timer_func, 0); - - lid_timer->expires = jiffies + PSB_LID_DELAY; - - add_timer(lid_timer); - spin_unlock_irqrestore(&dev_priv->lid_lock, irq_flags); -} - -void psb_lid_timer_takedown(struct drm_psb_private *dev_priv) -{ - del_timer_sync(&dev_priv->lid_timer); -} - From 2f7ef5bb4a2f3e481ef05fab946edb97c84f67cf Mon Sep 17 00:00:00 2001 From: Johannes Thumshirn Date: Wed, 17 Apr 2024 10:45:47 +0200 Subject: [PATCH 190/606] btrfs: fix information leak in btrfs_ioctl_logical_to_ino() Syzbot reported the following information leak for in btrfs_ioctl_logical_to_ino(): BUG: KMSAN: kernel-infoleak in instrument_copy_to_user include/linux/instrumented.h:114 [inline] BUG: KMSAN: kernel-infoleak in _copy_to_user+0xbc/0x110 lib/usercopy.c:40 instrument_copy_to_user include/linux/instrumented.h:114 [inline] _copy_to_user+0xbc/0x110 lib/usercopy.c:40 copy_to_user include/linux/uaccess.h:191 [inline] btrfs_ioctl_logical_to_ino+0x440/0x750 fs/btrfs/ioctl.c:3499 btrfs_ioctl+0x714/0x1260 vfs_ioctl fs/ioctl.c:51 [inline] __do_sys_ioctl fs/ioctl.c:904 [inline] __se_sys_ioctl+0x261/0x450 fs/ioctl.c:890 __x64_sys_ioctl+0x96/0xe0 fs/ioctl.c:890 x64_sys_call+0x1883/0x3b50 arch/x86/include/generated/asm/syscalls_64.h:17 do_syscall_x64 arch/x86/entry/common.c:52 [inline] do_syscall_64+0xcf/0x1e0 arch/x86/entry/common.c:83 entry_SYSCALL_64_after_hwframe+0x77/0x7f Uninit was created at: __kmalloc_large_node+0x231/0x370 mm/slub.c:3921 __do_kmalloc_node mm/slub.c:3954 [inline] __kmalloc_node+0xb07/0x1060 mm/slub.c:3973 kmalloc_node include/linux/slab.h:648 [inline] kvmalloc_node+0xc0/0x2d0 mm/util.c:634 kvmalloc include/linux/slab.h:766 [inline] init_data_container+0x49/0x1e0 fs/btrfs/backref.c:2779 btrfs_ioctl_logical_to_ino+0x17c/0x750 fs/btrfs/ioctl.c:3480 btrfs_ioctl+0x714/0x1260 vfs_ioctl fs/ioctl.c:51 [inline] __do_sys_ioctl fs/ioctl.c:904 [inline] __se_sys_ioctl+0x261/0x450 fs/ioctl.c:890 __x64_sys_ioctl+0x96/0xe0 fs/ioctl.c:890 x64_sys_call+0x1883/0x3b50 arch/x86/include/generated/asm/syscalls_64.h:17 do_syscall_x64 arch/x86/entry/common.c:52 [inline] do_syscall_64+0xcf/0x1e0 arch/x86/entry/common.c:83 entry_SYSCALL_64_after_hwframe+0x77/0x7f Bytes 40-65535 of 65536 are uninitialized Memory access of size 65536 starts at ffff888045a40000 This happens, because we're copying a 'struct btrfs_data_container' back to user-space. This btrfs_data_container is allocated in 'init_data_container()' via kvmalloc(), which does not zero-fill the memory. Fix this by using kvzalloc() which zeroes out the memory on allocation. CC: stable@vger.kernel.org # 4.14+ Reported-by: Reviewed-by: Qu Wenruo Reviewed-by: Filipe Manana Signed-off-by: Johannes Thumshirn Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/backref.c | 12 +++--------- 1 file changed, 3 insertions(+), 9 deletions(-) diff --git a/fs/btrfs/backref.c b/fs/btrfs/backref.c index c1e6a5bbeeaff..58110c9686673 100644 --- a/fs/btrfs/backref.c +++ b/fs/btrfs/backref.c @@ -2776,20 +2776,14 @@ struct btrfs_data_container *init_data_container(u32 total_bytes) size_t alloc_bytes; alloc_bytes = max_t(size_t, total_bytes, sizeof(*data)); - data = kvmalloc(alloc_bytes, GFP_KERNEL); + data = kvzalloc(alloc_bytes, GFP_KERNEL); if (!data) return ERR_PTR(-ENOMEM); - if (total_bytes >= sizeof(*data)) { + if (total_bytes >= sizeof(*data)) data->bytes_left = total_bytes - sizeof(*data); - data->bytes_missing = 0; - } else { + else data->bytes_missing = sizeof(*data) - total_bytes; - data->bytes_left = 0; - } - - data->elem_cnt = 0; - data->elem_missed = 0; return data; } From fe1c6c7acce10baf9521d6dccc17268d91ee2305 Mon Sep 17 00:00:00 2001 From: Qu Wenruo Date: Tue, 9 Apr 2024 20:32:34 +0930 Subject: [PATCH 191/606] btrfs: fix wrong block_start calculation for btrfs_drop_extent_map_range() [BUG] During my extent_map cleanup/refactor, with extra sanity checks, extent-map-tests::test_case_7() would not pass the checks. The problem is, after btrfs_drop_extent_map_range(), the resulted extent_map has a @block_start way too large. Meanwhile my btrfs_file_extent_item based members are returning a correct @disk_bytenr/@offset combination. The extent map layout looks like this: 0 16K 32K 48K | PINNED | | Regular | The regular em at [32K, 48K) also has 32K @block_start. Then drop range [0, 36K), which should shrink the regular one to be [36K, 48K). However the @block_start is incorrect, we expect 32K + 4K, but got 52K. [CAUSE] Inside btrfs_drop_extent_map_range() function, if we hit an extent_map that covers the target range but is still beyond it, we need to split that extent map into half: |<-- drop range -->| |<----- existing extent_map --->| And if the extent map is not compressed, we need to forward extent_map::block_start by the difference between the end of drop range and the extent map start. However in that particular case, the difference is calculated using (start + len - em->start). The problem is @start can be modified if the drop range covers any pinned extent. This leads to wrong calculation, and would be caught by my later extent_map sanity checks, which checks the em::block_start against btrfs_file_extent_item::disk_bytenr + btrfs_file_extent_item::offset. This is a regression caused by commit c962098ca4af ("btrfs: fix incorrect splitting in btrfs_drop_extent_map_range"), which removed the @len update for pinned extents. [FIX] Fix it by avoiding using @start completely, and use @end - em->start instead, which @end is exclusive bytenr number. And update the test case to verify the @block_start to prevent such problem from happening. Thankfully this is not going to lead to any data corruption, as IO path does not utilize btrfs_drop_extent_map_range() with @skip_pinned set. So this fix is only here for the sake of consistency/correctness. CC: stable@vger.kernel.org # 6.5+ Fixes: c962098ca4af ("btrfs: fix incorrect splitting in btrfs_drop_extent_map_range") Reviewed-by: Filipe Manana Signed-off-by: Qu Wenruo Signed-off-by: David Sterba --- fs/btrfs/extent_map.c | 2 +- fs/btrfs/tests/extent-map-tests.c | 5 +++++ 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/fs/btrfs/extent_map.c b/fs/btrfs/extent_map.c index 445f7716f1e2f..24a048210b157 100644 --- a/fs/btrfs/extent_map.c +++ b/fs/btrfs/extent_map.c @@ -817,7 +817,7 @@ void btrfs_drop_extent_map_range(struct btrfs_inode *inode, u64 start, u64 end, split->block_len = em->block_len; split->orig_start = em->orig_start; } else { - const u64 diff = start + len - em->start; + const u64 diff = end - em->start; split->block_len = split->len; split->block_start += diff; diff --git a/fs/btrfs/tests/extent-map-tests.c b/fs/btrfs/tests/extent-map-tests.c index 253cce7ffecfe..47b5d301038ee 100644 --- a/fs/btrfs/tests/extent-map-tests.c +++ b/fs/btrfs/tests/extent-map-tests.c @@ -847,6 +847,11 @@ static int test_case_7(struct btrfs_fs_info *fs_info) goto out; } + if (em->block_start != SZ_32K + SZ_4K) { + test_err("em->block_start is %llu, expected 36K", em->block_start); + goto out; + } + free_extent_map(em); read_lock(&em_tree->lock); From 4cbb5050bffc49c716381ea2ecb07306dd46f83a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Amadeusz=20S=C5=82awi=C5=84ski?= Date: Thu, 18 Apr 2024 16:26:21 +0200 Subject: [PATCH 192/606] ASoC: Intel: avs: Set name of control as in topology MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When creating controls attached to widgets, there are a lot of rules if they get their name prefixed with widget name or not. Due to that controls ended up with weirdly looking names like "ssp0_fe DSP Volume", while topology set it to "DSP Volume". Fix this by setting no_wname_in_kcontrol_name to true in avs topology widgets which disables unwanted behaviour. Fixes: be2b81b519d7 ("ASoC: Intel: avs: Parse control tuples") Signed-off-by: Amadeusz Sławiński Link: https://lore.kernel.org/r/20240418142621.2487478-1-amadeuszx.slawinski@linux.intel.com Signed-off-by: Mark Brown --- sound/soc/intel/avs/topology.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/sound/soc/intel/avs/topology.c b/sound/soc/intel/avs/topology.c index 13061bd1488bb..42b42903ae9de 100644 --- a/sound/soc/intel/avs/topology.c +++ b/sound/soc/intel/avs/topology.c @@ -1582,6 +1582,8 @@ static int avs_widget_load(struct snd_soc_component *comp, int index, if (!le32_to_cpu(dw->priv.size)) return 0; + w->no_wname_in_kcontrol_name = true; + if (w->ignore_suspend && !AVS_S0IX_SUPPORTED) { dev_info_once(comp->dev, "Device does not support S0IX, check BIOS settings\n"); w->ignore_suspend = false; From d18ca8635db2f88c17acbdf6412f26d4f6aff414 Mon Sep 17 00:00:00 2001 From: Joao Paulo Goncalves Date: Wed, 17 Apr 2024 15:41:38 -0300 Subject: [PATCH 193/606] ASoC: ti: davinci-mcasp: Fix race condition during probe When using davinci-mcasp as CPU DAI with simple-card, there are some conditions that cause simple-card to finish registering a sound card before davinci-mcasp finishes registering all sound components. This creates a non-working sound card from userspace with no problem indication apart from not being able to play/record audio on a PCM stream. The issue arises during simultaneous probe execution of both drivers. Specifically, the simple-card driver, awaiting a CPU DAI, proceeds as soon as davinci-mcasp registers its DAI. However, this process can lead to the client mutex lock (client_mutex in soc-core.c) being held or davinci-mcasp being preempted before PCM DMA registration on davinci-mcasp finishes. This situation occurs when the probes of both drivers run concurrently. Below is the code path for this condition. To solve the issue, defer davinci-mcasp CPU DAI registration to the last step in the audio part of it. This way, simple-card CPU DAI parsing will be deferred until all audio components are registered. Fail Code Path: simple-card.c: probe starts simple-card.c: simple_dai_link_of: simple_parse_node(..,cpu,..) returns EPROBE_DEFER, no CPU DAI yet davinci-mcasp.c: probe starts davinci-mcasp.c: devm_snd_soc_register_component() register CPU DAI simple-card.c: probes again, finish CPU DAI parsing and call devm_snd_soc_register_card() simple-card.c: finish probe davinci-mcasp.c: *dma_pcm_platform_register() register PCM DMA davinci-mcasp.c: probe finish Cc: stable@vger.kernel.org Fixes: 9fbd58cf4ab0 ("ASoC: davinci-mcasp: Choose PCM driver based on configured DMA controller") Signed-off-by: Joao Paulo Goncalves Acked-by: Peter Ujfalusi Reviewed-by: Jai Luthra Link: https://lore.kernel.org/r/20240417184138.1104774-1-jpaulo.silvagoncalves@gmail.com Signed-off-by: Mark Brown --- sound/soc/ti/davinci-mcasp.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/sound/soc/ti/davinci-mcasp.c b/sound/soc/ti/davinci-mcasp.c index b892d66f78470..1e760c3155213 100644 --- a/sound/soc/ti/davinci-mcasp.c +++ b/sound/soc/ti/davinci-mcasp.c @@ -2417,12 +2417,6 @@ static int davinci_mcasp_probe(struct platform_device *pdev) mcasp_reparent_fck(pdev); - ret = devm_snd_soc_register_component(&pdev->dev, &davinci_mcasp_component, - &davinci_mcasp_dai[mcasp->op_mode], 1); - - if (ret != 0) - goto err; - ret = davinci_mcasp_get_dma_type(mcasp); switch (ret) { case PCM_EDMA: @@ -2449,6 +2443,12 @@ static int davinci_mcasp_probe(struct platform_device *pdev) goto err; } + ret = devm_snd_soc_register_component(&pdev->dev, &davinci_mcasp_component, + &davinci_mcasp_dai[mcasp->op_mode], 1); + + if (ret != 0) + goto err; + no_audio: ret = davinci_mcasp_init_gpiochip(mcasp); if (ret) { From 6a94cf996f104633bfb8d260eedf96a0dbebb384 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Fri, 19 Apr 2024 08:51:47 +0200 Subject: [PATCH 194/606] Revert "wifi: iwlwifi: bump FW API to 90 for BZ/SC devices" Revert the API version bump, the kernel doesn't actually have all the code to deal with that version yet. Fixes: 653a90f6b226 ("wifi: iwlwifi: bump FW API to 90 for BZ/SC devices") Link: https://msgid.link/20240419085147.cd756fadab03.Ibccbb65be8e05b516cae1b9fb27a959662f9f51a@changeid Signed-off-by: Johannes Berg --- drivers/net/wireless/intel/iwlwifi/cfg/bz.c | 2 +- drivers/net/wireless/intel/iwlwifi/cfg/sc.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/wireless/intel/iwlwifi/cfg/bz.c b/drivers/net/wireless/intel/iwlwifi/cfg/bz.c index 072b0a5827d19..eca1457caa0ca 100644 --- a/drivers/net/wireless/intel/iwlwifi/cfg/bz.c +++ b/drivers/net/wireless/intel/iwlwifi/cfg/bz.c @@ -10,7 +10,7 @@ #include "fw/api/txq.h" /* Highest firmware API version supported */ -#define IWL_BZ_UCODE_API_MAX 90 +#define IWL_BZ_UCODE_API_MAX 89 /* Lowest firmware API version supported */ #define IWL_BZ_UCODE_API_MIN 80 diff --git a/drivers/net/wireless/intel/iwlwifi/cfg/sc.c b/drivers/net/wireless/intel/iwlwifi/cfg/sc.c index 9b79279fd76ca..dbbcb2d0968c0 100644 --- a/drivers/net/wireless/intel/iwlwifi/cfg/sc.c +++ b/drivers/net/wireless/intel/iwlwifi/cfg/sc.c @@ -10,7 +10,7 @@ #include "fw/api/txq.h" /* Highest firmware API version supported */ -#define IWL_SC_UCODE_API_MAX 90 +#define IWL_SC_UCODE_API_MAX 89 /* Lowest firmware API version supported */ #define IWL_SC_UCODE_API_MIN 82 From 89884459a0b9e6ecd62a1ddfdb7708b34ee33649 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Thu, 18 Apr 2024 10:52:19 +0200 Subject: [PATCH 195/606] wifi: mac80211: fix idle calculation with multi-link The vif's idle state doesn't automatically go to true when any link removes the channel context, it's only idle when _all_ links no longer have a channel context. Fix that. Reviewed-by: Miriam Rachel Korenblit Link: https://msgid.link/20240418105220.90df97557702.I05d2228ce85c203b9f2d6da8538cc16dce46752a@changeid Signed-off-by: Johannes Berg --- net/mac80211/chan.c | 27 ++++++++++++++++++++++----- 1 file changed, 22 insertions(+), 5 deletions(-) diff --git a/net/mac80211/chan.c b/net/mac80211/chan.c index 80e4b9784131d..ccacaed32817a 100644 --- a/net/mac80211/chan.c +++ b/net/mac80211/chan.c @@ -797,6 +797,7 @@ static int ieee80211_assign_link_chanctx(struct ieee80211_link_data *link, struct ieee80211_local *local = sdata->local; struct ieee80211_chanctx_conf *conf; struct ieee80211_chanctx *curr_ctx = NULL; + bool new_idle; int ret = 0; if (WARN_ON(sdata->vif.type == NL80211_IFTYPE_NAN)) @@ -829,8 +830,6 @@ static int ieee80211_assign_link_chanctx(struct ieee80211_link_data *link, out: rcu_assign_pointer(link->conf->chanctx_conf, conf); - sdata->vif.cfg.idle = !conf; - if (curr_ctx && ieee80211_chanctx_num_assigned(local, curr_ctx) > 0) { ieee80211_recalc_chanctx_chantype(local, curr_ctx); ieee80211_recalc_smps_chanctx(local, curr_ctx); @@ -843,9 +842,27 @@ static int ieee80211_assign_link_chanctx(struct ieee80211_link_data *link, ieee80211_recalc_chanctx_min_def(local, new_ctx, NULL); } - if (sdata->vif.type != NL80211_IFTYPE_P2P_DEVICE && - sdata->vif.type != NL80211_IFTYPE_MONITOR) - ieee80211_vif_cfg_change_notify(sdata, BSS_CHANGED_IDLE); + if (conf) { + new_idle = false; + } else { + struct ieee80211_link_data *tmp; + + new_idle = true; + for_each_sdata_link(local, tmp) { + if (rcu_access_pointer(tmp->conf->chanctx_conf)) { + new_idle = false; + break; + } + } + } + + if (new_idle != sdata->vif.cfg.idle) { + sdata->vif.cfg.idle = new_idle; + + if (sdata->vif.type != NL80211_IFTYPE_P2P_DEVICE && + sdata->vif.type != NL80211_IFTYPE_MONITOR) + ieee80211_vif_cfg_change_notify(sdata, BSS_CHANGED_IDLE); + } ieee80211_check_fast_xmit_iface(sdata); From 645acc6f55918feacc4572dd80acbb152b2208d9 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Thu, 18 Apr 2024 10:52:20 +0200 Subject: [PATCH 196/606] wifi: mac80211: mlme: re-parse with correct mode When doing re-parsing in ieee80211_determine_chan_mode(), the conn->mode is changed, and the whole point of doing the parsing again was to parse as the downgraded mode. However, that didn't actually work, because the setting was copied before and never changed again. Fix that. Fixes: 310c8387c638 ("wifi: mac80211: clean up connection process") Reviewed-by: Miriam Rachel Korenblit Link: https://msgid.link/20240418105220.5e0d1fcb5622.Ib0673e0bc90033fd6d387b6a5f107c040eb907cf@changeid Signed-off-by: Johannes Berg --- net/mac80211/mlme.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index db7128f6c901e..98cb475a14c82 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -616,7 +616,6 @@ ieee80211_determine_chan_mode(struct ieee80211_sub_if_data *sdata, .from_ap = true, .start = ies->data, .len = ies->len, - .mode = conn->mode, }; struct ieee802_11_elems *elems; struct ieee80211_supported_band *sband; @@ -625,6 +624,7 @@ ieee80211_determine_chan_mode(struct ieee80211_sub_if_data *sdata, int ret; again: + parse_params.mode = conn->mode; elems = ieee802_11_parse_elems_full(&parse_params); if (!elems) return ERR_PTR(-ENOMEM); From 1ac6f60aab36ae3f0520cc7ace02ad32240b8a1f Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Thu, 18 Apr 2024 10:52:21 +0200 Subject: [PATCH 197/606] wifi: mac80211: mlme: fix memory leak When re-parsing the elements here (with changed mode), free the original ones first to avoid leaking memory. Fixes: 310c8387c638 ("wifi: mac80211: clean up connection process") Reviewed-by: Miriam Rachel Korenblit Link: https://msgid.link/20240418105220.458421e3bbff.Icb5b84cba3ea420794cf009cf18ec3d76e434736@changeid Signed-off-by: Johannes Berg --- net/mac80211/mlme.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index 98cb475a14c82..6fa3752b740ea 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -753,8 +753,10 @@ ieee80211_determine_chan_mode(struct ieee80211_sub_if_data *sdata, } /* the mode can only decrease, so this must terminate */ - if (ap_mode != conn->mode) + if (ap_mode != conn->mode) { + kfree(elems); goto again; + } mlme_link_id_dbg(sdata, link_id, "connecting with %s mode, max bandwidth %d MHz\n", From 2fb5dfe18e8255dbec4d0f8e81297de8e3490285 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Thu, 18 Apr 2024 10:52:22 +0200 Subject: [PATCH 198/606] wifi: mac80211: mlme: re-parse if AP mode is less than client If the AP mode ends up being determined less than the client mode, there may be different reasons for this, e.g. AP misconfiguration. If this happens in a way that causes e.g. EHT to be rejected, the elements need to be re-parsed since we'll connect as HE, but not reparsing means that we'll still think it's OK to use multi-link, so we can connect in a non-sensical configuration of advertising only HE on a secondary link. This normally won't happen for the assoc link because that reuses the mode from authentication, and if that's not EHT, multi-link association is rejected. Fix this inconsistency by parsing the elements again if the mode was different from the first parsing attempt. Print the message a bit later to avoid printing "determined AP ... to be HE" twice in cases where ieee80211_determine_ap_chan() returned a lesser mode, rather than the regulatory downgrades below changing it. Fixes: 310c8387c638 ("wifi: mac80211: clean up connection process") Reviewed-by: Miriam Rachel Korenblit Link: https://msgid.link/20240418105220.d1f25d92cfe7.Ia21eff6cdcae2f5aca13cf8e742a986af5e70f89@changeid Signed-off-by: Johannes Berg --- net/mac80211/mlme.c | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index 6fa3752b740ea..502c34d52fbec 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -632,15 +632,21 @@ ieee80211_determine_chan_mode(struct ieee80211_sub_if_data *sdata, ap_mode = ieee80211_determine_ap_chan(sdata, channel, bss->vht_cap_info, elems, false, conn, &ap_chandef); - mlme_link_id_dbg(sdata, link_id, "determined AP %pM to be %s\n", - cbss->bssid, ieee80211_conn_mode_str(ap_mode)); - /* this should be impossible since parsing depends on our mode */ if (WARN_ON(ap_mode > conn->mode)) { ret = -EINVAL; goto free; } + if (conn->mode != ap_mode) { + conn->mode = ap_mode; + kfree(elems); + goto again; + } + + mlme_link_id_dbg(sdata, link_id, "determined AP %pM to be %s\n", + cbss->bssid, ieee80211_conn_mode_str(ap_mode)); + sband = sdata->local->hw.wiphy->bands[channel->band]; switch (channel->band) { @@ -691,7 +697,6 @@ ieee80211_determine_chan_mode(struct ieee80211_sub_if_data *sdata, break; } - conn->mode = ap_mode; chanreq->oper = ap_chandef; /* wider-bandwidth OFDMA is only done in EHT */ From 801ea33ae82d6a9d954074fbcf8ea9d18f1543a7 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Thu, 18 Apr 2024 10:52:23 +0200 Subject: [PATCH 199/606] wifi: nl80211: don't free NULL coalescing rule If the parsing fails, we can dereference a NULL pointer here. Cc: stable@vger.kernel.org Fixes: be29b99a9b51 ("cfg80211/nl80211: Add packet coalesce support") Reviewed-by: Miriam Rachel Korenblit Link: https://msgid.link/20240418105220.b328f80406e7.Id75d961050deb05b3e4e354e024866f350c68103@changeid Signed-off-by: Johannes Berg --- net/wireless/nl80211.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index b4edba6b0b7ba..30ff9a4708134 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -14030,6 +14030,8 @@ static int nl80211_set_coalesce(struct sk_buff *skb, struct genl_info *info) error: for (i = 0; i < new_coalesce.n_rules; i++) { tmp_rule = &new_coalesce.rules[i]; + if (!tmp_rule) + continue; for (j = 0; j < tmp_rule->n_patterns; j++) kfree(tmp_rule->patterns[j].mask); kfree(tmp_rule->patterns); From 2a4e01e5270b9fa9f6e6e0a4c24ac51a758636f9 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Thu, 18 Apr 2024 10:52:24 +0200 Subject: [PATCH 200/606] wifi: mac80211_hwsim: init peer measurement result If we don't get all the values here, we might pass them to cfg80211 uninitialized. Fix that, even if the input might then not make much sense. Fixes: 2af3b2a631b1 ("mac80211_hwsim: add PMSR report support via virtio") Reviewed-by: Miriam Rachel Korenblit Link: https://msgid.link/20240418105220.e1317621c1f9.If7dd447de24d7493d133284db5e9e482e4e299f8@changeid Signed-off-by: Johannes Berg --- drivers/net/wireless/virtual/mac80211_hwsim.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/wireless/virtual/mac80211_hwsim.c b/drivers/net/wireless/virtual/mac80211_hwsim.c index b55fe320633c7..59e1fc0018df3 100644 --- a/drivers/net/wireless/virtual/mac80211_hwsim.c +++ b/drivers/net/wireless/virtual/mac80211_hwsim.c @@ -3899,7 +3899,7 @@ static int hwsim_pmsr_report_nl(struct sk_buff *msg, struct genl_info *info) } nla_for_each_nested(peer, peers, rem) { - struct cfg80211_pmsr_result result; + struct cfg80211_pmsr_result result = {}; err = mac80211_hwsim_parse_pmsr_result(peer, &result, info); if (err) From cb55e08dba3526796e35d24a6d5db4ed6dcb8a4b Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Thu, 18 Apr 2024 10:52:25 +0200 Subject: [PATCH 201/606] wifi: mac80211: remove link before AP If the AP removal timer is long, we don't really want to remove the link immediately. However, we really should do it _before_ the AP removes it (which happens at or after count reaches 0), so subtract 1 from the countdown when scheduling the timer. This causes the link removal work to run just after the beacon with value 1 is received. If the counter is already zero, do it immediately. This fixes an issue where we do the removal too late and receive a beacon from the AP that's no longer associated with the MLD, but thus removed EHT and ML elements, and then we disconnect instead from the whole MLD, since one of the associated APs changed mode from EHT to HE. Fixes: 8eb8dd2ffbbb ("wifi: mac80211: Support link removal using Reconfiguration ML element") Reviewed-by: Ilan Peer Reviewed-by: Miriam Rachel Korenblit Link: https://msgid.link/20240418105220.03ac4a09fa74.Ifb8c8d38e3402721a81ce5981568f47b5c5889cb@changeid Signed-off-by: Johannes Berg --- net/mac80211/mlme.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index 502c34d52fbec..6f0880ec89da0 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -5844,8 +5844,11 @@ static void ieee80211_ml_reconfiguration(struct ieee80211_sub_if_data *sdata, continue; } - link_delay = link_conf->beacon_int * - link_removal_timeout[link_id]; + if (link_removal_timeout[link_id] < 1) + link_delay = 0; + else + link_delay = link_conf->beacon_int * + (link_removal_timeout[link_id] - 1); if (!delay) delay = link_delay; From c53d8a59351e4347452e263e2e5d7446ec93da83 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Thu, 18 Apr 2024 10:52:26 +0200 Subject: [PATCH 202/606] wifi: mac80211: fix unaligned le16 access The AP removal timer field need not be aligned, so the code shouldn't access it directly, but use unaligned loads. Use get_unaligned_le16(), which even is shorter than the current code since it doesn't need a cast. Fixes: 8eb8dd2ffbbb ("wifi: mac80211: Support link removal using Reconfiguration ML element") Reviewed-by: Ilan Peer Reviewed-by: Miriam Rachel Korenblit Link: https://msgid.link/20240418105220.356788ba0045.I2b3cdb3644e205d5bb10322c345c0499171cf5d2@changeid Signed-off-by: Johannes Berg --- net/mac80211/mlme.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index 6f0880ec89da0..3bbb216a0fc8c 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -5819,7 +5819,7 @@ static void ieee80211_ml_reconfiguration(struct ieee80211_sub_if_data *sdata, */ if (control & IEEE80211_MLE_STA_RECONF_CONTROL_AP_REM_TIMER_PRESENT) - link_removal_timeout[link_id] = le16_to_cpu(*(__le16 *)pos); + link_removal_timeout[link_id] = get_unaligned_le16(pos); } removed_links &= sdata->vif.valid_links; From 69197dfc64007b5292cc960581548f41ccd44828 Mon Sep 17 00:00:00 2001 From: Duanqiang Wen Date: Thu, 18 Apr 2024 10:15:56 +0800 Subject: [PATCH 203/606] net: libwx: fix alloc msix vectors failed driver needs queue msix vectors and one misc irq vector, but only queue vectors need irq affinity. when num_online_cpus is less than chip max msix vectors, driver will acquire (num_online_cpus + 1) vecotrs, and call pci_alloc_irq_vectors_affinity functions with affinity params without setting pre_vectors or post_vectors, it will cause return error code -ENOSPC. Misc irq vector is vector 0, driver need to set affinity params .pre_vectors = 1. Fixes: 3f703186113f ("net: libwx: Add irq flow functions") Signed-off-by: Duanqiang Wen Signed-off-by: David S. Miller --- drivers/net/ethernet/wangxun/libwx/wx_lib.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/wangxun/libwx/wx_lib.c b/drivers/net/ethernet/wangxun/libwx/wx_lib.c index 6dff2c85682d8..6fae161cbcb82 100644 --- a/drivers/net/ethernet/wangxun/libwx/wx_lib.c +++ b/drivers/net/ethernet/wangxun/libwx/wx_lib.c @@ -1598,7 +1598,7 @@ static void wx_set_num_queues(struct wx *wx) */ static int wx_acquire_msix_vectors(struct wx *wx) { - struct irq_affinity affd = {0, }; + struct irq_affinity affd = { .pre_vectors = 1 }; int nvecs, i; /* We start by asking for one vector per queue pair */ From f58f45c1e5b92975e91754f5407250085a6ae7cf Mon Sep 17 00:00:00 2001 From: David Bauer Date: Thu, 18 Apr 2024 15:29:08 +0200 Subject: [PATCH 204/606] vxlan: drop packets from invalid src-address The VXLAN driver currently does not check if the inner layer2 source-address is valid. In case source-address snooping/learning is enabled, a entry in the FDB for the invalid address is created with the layer3 address of the tunnel endpoint. If the frame happens to have a non-unicast address set, all this non-unicast traffic is subsequently not flooded to the tunnel network but sent to the learnt host in the FDB. To make matters worse, this FDB entry does not expire. Apply the same filtering for packets as it is done for bridges. This not only drops these invalid packets but avoids them from being learnt into the FDB. Fixes: d342894c5d2f ("vxlan: virtual extensible lan") Suggested-by: Ido Schimmel Signed-off-by: David Bauer Signed-off-by: David S. Miller --- drivers/net/vxlan/vxlan_core.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/net/vxlan/vxlan_core.c b/drivers/net/vxlan/vxlan_core.c index 3495591a5c29b..ba319fc219571 100644 --- a/drivers/net/vxlan/vxlan_core.c +++ b/drivers/net/vxlan/vxlan_core.c @@ -1615,6 +1615,10 @@ static bool vxlan_set_mac(struct vxlan_dev *vxlan, if (ether_addr_equal(eth_hdr(skb)->h_source, vxlan->dev->dev_addr)) return false; + /* Ignore packets from invalid src-address */ + if (!is_valid_ether_addr(eth_hdr(skb)->h_source)) + return false; + /* Get address from the outer IP header */ if (vxlan_get_sk_family(vs) == AF_INET) { saddr.sin.sin_addr.s_addr = ip_hdr(skb)->saddr; From a386c30410450ea87cd38070f9feaca49dadce29 Mon Sep 17 00:00:00 2001 From: Lucas Stach Date: Thu, 4 Apr 2024 10:17:56 +0200 Subject: [PATCH 205/606] drm/atomic-helper: fix parameter order in drm_format_conv_state_copy() call MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Old and new state parameters are swapped, so the old state was cleared instead of the new duplicated state. Fixes: 903674588a48 ("drm/atomic-helper: Add format-conversion state to shadow-plane state") Signed-off-by: Lucas Stach Tested-by: Leonard Göhrs Reviewed-by: Thomas Zimmermann Cc: # v6.8+ Signed-off-by: Thomas Zimmermann Link: https://patchwork.freedesktop.org/patch/msgid/20240404081756.2714424-1-l.stach@pengutronix.de --- drivers/gpu/drm/drm_gem_atomic_helper.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/drm_gem_atomic_helper.c b/drivers/gpu/drm/drm_gem_atomic_helper.c index e440f458b6633..93337543aac32 100644 --- a/drivers/gpu/drm/drm_gem_atomic_helper.c +++ b/drivers/gpu/drm/drm_gem_atomic_helper.c @@ -224,8 +224,8 @@ __drm_gem_duplicate_shadow_plane_state(struct drm_plane *plane, __drm_atomic_helper_plane_duplicate_state(plane, &new_shadow_plane_state->base); - drm_format_conv_state_copy(&shadow_plane_state->fmtcnv_state, - &new_shadow_plane_state->fmtcnv_state); + drm_format_conv_state_copy(&new_shadow_plane_state->fmtcnv_state, + &shadow_plane_state->fmtcnv_state); } EXPORT_SYMBOL(__drm_gem_duplicate_shadow_plane_state); From dad80c6bff770d25f67ec25fe011730e4a463008 Mon Sep 17 00:00:00 2001 From: David Howells Date: Thu, 4 Apr 2024 16:05:19 +0100 Subject: [PATCH 206/606] cifs: Fix reacquisition of volume cookie on still-live connection During mount, cifs_mount_get_tcon() gets a tcon resource connection record and then attaches an fscache volume cookie to it. However, it does this irrespective of whether or not the tcon returned from cifs_get_tcon() is a new record or one that's already in use. This leads to a warning about a volume cookie collision and a leaked volume cookie because tcon->fscache gets reset. Fix this be adding a mutex and a "we've already tried this" flag and only doing it once for the lifetime of the tcon. [!] Note: Looking at cifs_mount_get_tcon(), a more general solution may actually be required. Reacquiring the volume cookie isn't the only thing that function does: it also partially reinitialises the tcon record without any locking - which may cause live filesystem ops already using the tcon through a previous mount to malfunction. This can be reproduced simply by something like: mount //example.com/test /xfstest.test -o user=shares,pass=xxx,fsc mount //example.com/test /mnt -o user=shares,pass=xxx,fsc Fixes: 70431bfd825d ("cifs: Support fscache indexing rewrite") Signed-off-by: David Howells Acked-by: Paulo Alcantara (Red Hat) cc: Shyam Prasad N cc: linux-cifs@vger.kernel.org cc: linux-fsdevel@vger.kernel.org Signed-off-by: Steve French --- fs/smb/client/cifsglob.h | 2 ++ fs/smb/client/fscache.c | 13 +++++++++++++ fs/smb/client/misc.c | 3 +++ 3 files changed, 18 insertions(+) diff --git a/fs/smb/client/cifsglob.h b/fs/smb/client/cifsglob.h index d6669ce4ae87f..fc09d1c0ee070 100644 --- a/fs/smb/client/cifsglob.h +++ b/fs/smb/client/cifsglob.h @@ -1276,7 +1276,9 @@ struct cifs_tcon { __u32 max_cached_dirs; #ifdef CONFIG_CIFS_FSCACHE u64 resource_id; /* server resource id */ + bool fscache_acquired; /* T if we've tried acquiring a cookie */ struct fscache_volume *fscache; /* cookie for share */ + struct mutex fscache_lock; /* Prevent regetting a cookie */ #endif struct list_head pending_opens; /* list of incomplete opens */ struct cached_fids *cfids; diff --git a/fs/smb/client/fscache.c b/fs/smb/client/fscache.c index 340efce8f0529..113bde8f1e613 100644 --- a/fs/smb/client/fscache.c +++ b/fs/smb/client/fscache.c @@ -43,12 +43,23 @@ int cifs_fscache_get_super_cookie(struct cifs_tcon *tcon) char *key; int ret = -ENOMEM; + if (tcon->fscache_acquired) + return 0; + + mutex_lock(&tcon->fscache_lock); + if (tcon->fscache_acquired) { + mutex_unlock(&tcon->fscache_lock); + return 0; + } + tcon->fscache_acquired = true; + tcon->fscache = NULL; switch (sa->sa_family) { case AF_INET: case AF_INET6: break; default: + mutex_unlock(&tcon->fscache_lock); cifs_dbg(VFS, "Unknown network family '%d'\n", sa->sa_family); return -EINVAL; } @@ -57,6 +68,7 @@ int cifs_fscache_get_super_cookie(struct cifs_tcon *tcon) sharename = extract_sharename(tcon->tree_name); if (IS_ERR(sharename)) { + mutex_unlock(&tcon->fscache_lock); cifs_dbg(FYI, "%s: couldn't extract sharename\n", __func__); return PTR_ERR(sharename); } @@ -90,6 +102,7 @@ int cifs_fscache_get_super_cookie(struct cifs_tcon *tcon) kfree(key); out: kfree(sharename); + mutex_unlock(&tcon->fscache_lock); return ret; } diff --git a/fs/smb/client/misc.c b/fs/smb/client/misc.c index 7d15a1969b818..ad44f8d66b377 100644 --- a/fs/smb/client/misc.c +++ b/fs/smb/client/misc.c @@ -139,6 +139,9 @@ tcon_info_alloc(bool dir_leases_enabled) atomic_set(&ret_buf->num_local_opens, 0); atomic_set(&ret_buf->num_remote_opens, 0); ret_buf->stats_from_time = ktime_get_real_seconds(); +#ifdef CONFIG_CIFS_FSCACHE + mutex_init(&ret_buf->fscache_lock); +#endif return ret_buf; } From afc23febd51c7e24361e3a9c09f3e892eb0a41ea Mon Sep 17 00:00:00 2001 From: David Howells Date: Thu, 4 Apr 2024 13:51:36 +0100 Subject: [PATCH 207/606] cifs: Add tracing for the cifs_tcon struct refcounting Add tracing for the refcounting/lifecycle of the cifs_tcon struct, marking different events with different labels and giving each tcon its own debug ID so that the tracelines corresponding to individual tcons can be distinguished. This can be enabled with: echo 1 >/sys/kernel/debug/tracing/events/cifs/smb3_tcon_ref/enable Signed-off-by: David Howells Acked-by: Paulo Alcantara (Red Hat) cc: Shyam Prasad N cc: linux-cifs@vger.kernel.org cc: linux-fsdevel@vger.kernel.org Signed-off-by: Steve French --- fs/smb/client/cifsfs.c | 2 + fs/smb/client/cifsglob.h | 1 + fs/smb/client/cifsproto.h | 9 ++-- fs/smb/client/connect.c | 21 ++++---- fs/smb/client/fscache.c | 7 +++ fs/smb/client/misc.c | 10 ++-- fs/smb/client/smb2misc.c | 10 ++-- fs/smb/client/smb2ops.c | 7 ++- fs/smb/client/smb2pdu.c | 8 +-- fs/smb/client/smb2transport.c | 2 + fs/smb/client/trace.h | 92 ++++++++++++++++++++++++++++++++++- 11 files changed, 143 insertions(+), 26 deletions(-) diff --git a/fs/smb/client/cifsfs.c b/fs/smb/client/cifsfs.c index d41eedbff674a..30781789dfd9f 100644 --- a/fs/smb/client/cifsfs.c +++ b/fs/smb/client/cifsfs.c @@ -739,6 +739,8 @@ static void cifs_umount_begin(struct super_block *sb) spin_lock(&cifs_tcp_ses_lock); spin_lock(&tcon->tc_lock); + trace_smb3_tcon_ref(tcon->debug_id, tcon->tc_count, + netfs_trace_tcon_ref_see_umount); if ((tcon->tc_count > 1) || (tcon->status == TID_EXITING)) { /* we have other mounts to same share or we have already tried to umount this and woken up diff --git a/fs/smb/client/cifsglob.h b/fs/smb/client/cifsglob.h index fc09d1c0ee070..6ff35570db813 100644 --- a/fs/smb/client/cifsglob.h +++ b/fs/smb/client/cifsglob.h @@ -1190,6 +1190,7 @@ struct cifs_fattr { */ struct cifs_tcon { struct list_head tcon_list; + int debug_id; /* Debugging for tracing */ int tc_count; struct list_head rlist; /* reconnect list */ spinlock_t tc_lock; /* protect anything here that is not protected */ diff --git a/fs/smb/client/cifsproto.h b/fs/smb/client/cifsproto.h index 8e0a348f1f660..fbc358c09da3b 100644 --- a/fs/smb/client/cifsproto.h +++ b/fs/smb/client/cifsproto.h @@ -303,7 +303,7 @@ cifs_get_tcp_session(struct smb3_fs_context *ctx, struct TCP_Server_Info *primary_server); extern void cifs_put_tcp_session(struct TCP_Server_Info *server, int from_reconnect); -extern void cifs_put_tcon(struct cifs_tcon *tcon); +extern void cifs_put_tcon(struct cifs_tcon *tcon, enum smb3_tcon_ref_trace trace); extern void cifs_release_automount_timer(void); @@ -530,8 +530,9 @@ extern int CIFSSMBLogoff(const unsigned int xid, struct cifs_ses *ses); extern struct cifs_ses *sesInfoAlloc(void); extern void sesInfoFree(struct cifs_ses *); -extern struct cifs_tcon *tcon_info_alloc(bool dir_leases_enabled); -extern void tconInfoFree(struct cifs_tcon *); +extern struct cifs_tcon *tcon_info_alloc(bool dir_leases_enabled, + enum smb3_tcon_ref_trace trace); +extern void tconInfoFree(struct cifs_tcon *tcon, enum smb3_tcon_ref_trace trace); extern int cifs_sign_rqst(struct smb_rqst *rqst, struct TCP_Server_Info *server, __u32 *pexpected_response_sequence_number); @@ -721,8 +722,6 @@ static inline int cifs_create_options(struct cifs_sb_info *cifs_sb, int options) return options; } -struct super_block *cifs_get_tcon_super(struct cifs_tcon *tcon); -void cifs_put_tcon_super(struct super_block *sb); int cifs_wait_for_server_reconnect(struct TCP_Server_Info *server, bool retry); /* Put references of @ses and its children */ diff --git a/fs/smb/client/connect.c b/fs/smb/client/connect.c index 4e35970681bf0..7a16e12f5da87 100644 --- a/fs/smb/client/connect.c +++ b/fs/smb/client/connect.c @@ -1943,7 +1943,7 @@ cifs_setup_ipc(struct cifs_ses *ses, struct smb3_fs_context *ctx) } /* no need to setup directory caching on IPC share, so pass in false */ - tcon = tcon_info_alloc(false); + tcon = tcon_info_alloc(false, netfs_trace_tcon_ref_new_ipc); if (tcon == NULL) return -ENOMEM; @@ -1960,7 +1960,7 @@ cifs_setup_ipc(struct cifs_ses *ses, struct smb3_fs_context *ctx) if (rc) { cifs_server_dbg(VFS, "failed to connect to IPC (rc=%d)\n", rc); - tconInfoFree(tcon); + tconInfoFree(tcon, netfs_trace_tcon_ref_free_ipc_fail); goto out; } @@ -2043,7 +2043,7 @@ void __cifs_put_smb_ses(struct cifs_ses *ses) * files on session close, as specified in MS-SMB2 3.3.5.6 Receiving an * SMB2 LOGOFF Request. */ - tconInfoFree(tcon); + tconInfoFree(tcon, netfs_trace_tcon_ref_free_ipc); if (do_logoff) { xid = get_xid(); rc = server->ops->logoff(xid, ses); @@ -2432,6 +2432,8 @@ cifs_find_tcon(struct cifs_ses *ses, struct smb3_fs_context *ctx) continue; } ++tcon->tc_count; + trace_smb3_tcon_ref(tcon->debug_id, tcon->tc_count, + netfs_trace_tcon_ref_get_find); spin_unlock(&tcon->tc_lock); spin_unlock(&cifs_tcp_ses_lock); return tcon; @@ -2441,7 +2443,7 @@ cifs_find_tcon(struct cifs_ses *ses, struct smb3_fs_context *ctx) } void -cifs_put_tcon(struct cifs_tcon *tcon) +cifs_put_tcon(struct cifs_tcon *tcon, enum smb3_tcon_ref_trace trace) { unsigned int xid; struct cifs_ses *ses; @@ -2457,6 +2459,7 @@ cifs_put_tcon(struct cifs_tcon *tcon) cifs_dbg(FYI, "%s: tc_count=%d\n", __func__, tcon->tc_count); spin_lock(&cifs_tcp_ses_lock); spin_lock(&tcon->tc_lock); + trace_smb3_tcon_ref(tcon->debug_id, tcon->tc_count - 1, trace); if (--tcon->tc_count > 0) { spin_unlock(&tcon->tc_lock); spin_unlock(&cifs_tcp_ses_lock); @@ -2493,7 +2496,7 @@ cifs_put_tcon(struct cifs_tcon *tcon) _free_xid(xid); cifs_fscache_release_super_cookie(tcon); - tconInfoFree(tcon); + tconInfoFree(tcon, netfs_trace_tcon_ref_free); cifs_put_smb_ses(ses); } @@ -2547,7 +2550,7 @@ cifs_get_tcon(struct cifs_ses *ses, struct smb3_fs_context *ctx) nohandlecache = ctx->nohandlecache; else nohandlecache = true; - tcon = tcon_info_alloc(!nohandlecache); + tcon = tcon_info_alloc(!nohandlecache, netfs_trace_tcon_ref_new); if (tcon == NULL) { rc = -ENOMEM; goto out_fail; @@ -2737,7 +2740,7 @@ cifs_get_tcon(struct cifs_ses *ses, struct smb3_fs_context *ctx) return tcon; out_fail: - tconInfoFree(tcon); + tconInfoFree(tcon, netfs_trace_tcon_ref_free_fail); return ERR_PTR(rc); } @@ -2754,7 +2757,7 @@ cifs_put_tlink(struct tcon_link *tlink) } if (!IS_ERR(tlink_tcon(tlink))) - cifs_put_tcon(tlink_tcon(tlink)); + cifs_put_tcon(tlink_tcon(tlink), netfs_trace_tcon_ref_put_tlink); kfree(tlink); } @@ -3319,7 +3322,7 @@ void cifs_mount_put_conns(struct cifs_mount_ctx *mnt_ctx) int rc = 0; if (mnt_ctx->tcon) - cifs_put_tcon(mnt_ctx->tcon); + cifs_put_tcon(mnt_ctx->tcon, netfs_trace_tcon_ref_put_mnt_ctx); else if (mnt_ctx->ses) cifs_put_smb_ses(mnt_ctx->ses); else if (mnt_ctx->server) diff --git a/fs/smb/client/fscache.c b/fs/smb/client/fscache.c index 113bde8f1e613..1a895e6243ee9 100644 --- a/fs/smb/client/fscache.c +++ b/fs/smb/client/fscache.c @@ -94,6 +94,11 @@ int cifs_fscache_get_super_cookie(struct cifs_tcon *tcon) } pr_err("Cache volume key already in use (%s)\n", key); vcookie = NULL; + trace_smb3_tcon_ref(tcon->debug_id, tcon->tc_count, + netfs_trace_tcon_ref_see_fscache_collision); + } else { + trace_smb3_tcon_ref(tcon->debug_id, tcon->tc_count, + netfs_trace_tcon_ref_see_fscache_okay); } tcon->fscache = vcookie; @@ -115,6 +120,8 @@ void cifs_fscache_release_super_cookie(struct cifs_tcon *tcon) cifs_fscache_fill_volume_coherency(tcon, &cd); fscache_relinquish_volume(tcon->fscache, &cd, false); tcon->fscache = NULL; + trace_smb3_tcon_ref(tcon->debug_id, tcon->tc_count, + netfs_trace_tcon_ref_see_fscache_relinq); } void cifs_fscache_get_inode_cookie(struct inode *inode) diff --git a/fs/smb/client/misc.c b/fs/smb/client/misc.c index ad44f8d66b377..07c468ddb88a8 100644 --- a/fs/smb/client/misc.c +++ b/fs/smb/client/misc.c @@ -111,9 +111,10 @@ sesInfoFree(struct cifs_ses *buf_to_free) } struct cifs_tcon * -tcon_info_alloc(bool dir_leases_enabled) +tcon_info_alloc(bool dir_leases_enabled, enum smb3_tcon_ref_trace trace) { struct cifs_tcon *ret_buf; + static atomic_t tcon_debug_id; ret_buf = kzalloc(sizeof(*ret_buf), GFP_KERNEL); if (!ret_buf) @@ -130,7 +131,8 @@ tcon_info_alloc(bool dir_leases_enabled) atomic_inc(&tconInfoAllocCount); ret_buf->status = TID_NEW; - ++ret_buf->tc_count; + ret_buf->debug_id = atomic_inc_return(&tcon_debug_id); + ret_buf->tc_count = 1; spin_lock_init(&ret_buf->tc_lock); INIT_LIST_HEAD(&ret_buf->openFileList); INIT_LIST_HEAD(&ret_buf->tcon_list); @@ -142,17 +144,19 @@ tcon_info_alloc(bool dir_leases_enabled) #ifdef CONFIG_CIFS_FSCACHE mutex_init(&ret_buf->fscache_lock); #endif + trace_smb3_tcon_ref(ret_buf->debug_id, ret_buf->tc_count, trace); return ret_buf; } void -tconInfoFree(struct cifs_tcon *tcon) +tconInfoFree(struct cifs_tcon *tcon, enum smb3_tcon_ref_trace trace) { if (tcon == NULL) { cifs_dbg(FYI, "Null buffer passed to tconInfoFree\n"); return; } + trace_smb3_tcon_ref(tcon->debug_id, tcon->tc_count, trace); free_cached_dirs(tcon->cfids); atomic_dec(&tconInfoAllocCount); kfree(tcon->nativeFileSystem); diff --git a/fs/smb/client/smb2misc.c b/fs/smb/client/smb2misc.c index cc72be5a93a93..677ef6f99a5be 100644 --- a/fs/smb/client/smb2misc.c +++ b/fs/smb/client/smb2misc.c @@ -767,7 +767,7 @@ smb2_cancelled_close_fid(struct work_struct *work) if (rc) cifs_tcon_dbg(VFS, "Close cancelled mid failed rc:%d\n", rc); - cifs_put_tcon(tcon); + cifs_put_tcon(tcon, netfs_trace_tcon_ref_put_cancelled_close_fid); kfree(cancelled); } @@ -811,6 +811,8 @@ smb2_handle_cancelled_close(struct cifs_tcon *tcon, __u64 persistent_fid, if (tcon->tc_count <= 0) { struct TCP_Server_Info *server = NULL; + trace_smb3_tcon_ref(tcon->debug_id, tcon->tc_count, + netfs_trace_tcon_ref_see_cancelled_close); WARN_ONCE(tcon->tc_count < 0, "tcon refcount is negative"); spin_unlock(&cifs_tcp_ses_lock); @@ -823,12 +825,14 @@ smb2_handle_cancelled_close(struct cifs_tcon *tcon, __u64 persistent_fid, return 0; } tcon->tc_count++; + trace_smb3_tcon_ref(tcon->debug_id, tcon->tc_count, + netfs_trace_tcon_ref_get_cancelled_close); spin_unlock(&cifs_tcp_ses_lock); rc = __smb2_handle_cancelled_cmd(tcon, SMB2_CLOSE_HE, 0, persistent_fid, volatile_fid); if (rc) - cifs_put_tcon(tcon); + cifs_put_tcon(tcon, netfs_trace_tcon_ref_put_cancelled_close); return rc; } @@ -856,7 +860,7 @@ smb2_handle_cancelled_mid(struct mid_q_entry *mid, struct TCP_Server_Info *serve rsp->PersistentFileId, rsp->VolatileFileId); if (rc) - cifs_put_tcon(tcon); + cifs_put_tcon(tcon, netfs_trace_tcon_ref_put_cancelled_mid); return rc; } diff --git a/fs/smb/client/smb2ops.c b/fs/smb/client/smb2ops.c index 78c94d0350fe9..28f0b7d19d534 100644 --- a/fs/smb/client/smb2ops.c +++ b/fs/smb/client/smb2ops.c @@ -2915,8 +2915,11 @@ smb2_get_dfs_refer(const unsigned int xid, struct cifs_ses *ses, tcon = list_first_entry_or_null(&ses->tcon_list, struct cifs_tcon, tcon_list); - if (tcon) + if (tcon) { tcon->tc_count++; + trace_smb3_tcon_ref(tcon->debug_id, tcon->tc_count, + netfs_trace_tcon_ref_get_dfs_refer); + } spin_unlock(&cifs_tcp_ses_lock); } @@ -2980,6 +2983,8 @@ smb2_get_dfs_refer(const unsigned int xid, struct cifs_ses *ses, /* ipc tcons are not refcounted */ spin_lock(&cifs_tcp_ses_lock); tcon->tc_count--; + trace_smb3_tcon_ref(tcon->debug_id, tcon->tc_count, + netfs_trace_tcon_ref_dec_dfs_refer); /* tc_count can never go negative */ WARN_ON(tcon->tc_count < 0); spin_unlock(&cifs_tcp_ses_lock); diff --git a/fs/smb/client/smb2pdu.c b/fs/smb/client/smb2pdu.c index 86c647a947ccd..a5efce03cb58e 100644 --- a/fs/smb/client/smb2pdu.c +++ b/fs/smb/client/smb2pdu.c @@ -4138,6 +4138,8 @@ void smb2_reconnect_server(struct work_struct *work) list_for_each_entry(tcon, &ses->tcon_list, tcon_list) { if (tcon->need_reconnect || tcon->need_reopen_files) { tcon->tc_count++; + trace_smb3_tcon_ref(tcon->debug_id, tcon->tc_count, + netfs_trace_tcon_ref_get_reconnect_server); list_add_tail(&tcon->rlist, &tmp_list); tcon_selected = true; } @@ -4176,14 +4178,14 @@ void smb2_reconnect_server(struct work_struct *work) if (tcon->ipc) cifs_put_smb_ses(tcon->ses); else - cifs_put_tcon(tcon); + cifs_put_tcon(tcon, netfs_trace_tcon_ref_put_reconnect_server); } if (!ses_exist) goto done; /* allocate a dummy tcon struct used for reconnect */ - tcon = tcon_info_alloc(false); + tcon = tcon_info_alloc(false, netfs_trace_tcon_ref_new_reconnect_server); if (!tcon) { resched = true; list_for_each_entry_safe(ses, ses2, &tmp_ses_list, rlist) { @@ -4206,7 +4208,7 @@ void smb2_reconnect_server(struct work_struct *work) list_del_init(&ses->rlist); cifs_put_smb_ses(ses); } - tconInfoFree(tcon); + tconInfoFree(tcon, netfs_trace_tcon_ref_free_reconnect_server); done: cifs_dbg(FYI, "Reconnecting tcons and channels finished\n"); diff --git a/fs/smb/client/smb2transport.c b/fs/smb/client/smb2transport.c index 1d6e54f7879e6..02135a6053051 100644 --- a/fs/smb/client/smb2transport.c +++ b/fs/smb/client/smb2transport.c @@ -189,6 +189,8 @@ smb2_find_smb_sess_tcon_unlocked(struct cifs_ses *ses, __u32 tid) if (tcon->tid != tid) continue; ++tcon->tc_count; + trace_smb3_tcon_ref(tcon->debug_id, tcon->tc_count, + netfs_trace_tcon_ref_get_find_sess_tcon); return tcon; } diff --git a/fs/smb/client/trace.h b/fs/smb/client/trace.h index 5e83cb9da9028..604e52876cd2d 100644 --- a/fs/smb/client/trace.h +++ b/fs/smb/client/trace.h @@ -3,6 +3,9 @@ * Copyright (C) 2018, Microsoft Corporation. * * Author(s): Steve French + * + * Please use this 3-part article as a reference for writing new tracepoints: + * https://lwn.net/Articles/379903/ */ #undef TRACE_SYSTEM #define TRACE_SYSTEM cifs @@ -15,9 +18,70 @@ #include /* - * Please use this 3-part article as a reference for writing new tracepoints: - * https://lwn.net/Articles/379903/ + * Specify enums for tracing information. + */ +#define smb3_tcon_ref_traces \ + EM(netfs_trace_tcon_ref_dec_dfs_refer, "DEC DfsRef") \ + EM(netfs_trace_tcon_ref_free, "FRE ") \ + EM(netfs_trace_tcon_ref_free_fail, "FRE Fail ") \ + EM(netfs_trace_tcon_ref_free_ipc, "FRE Ipc ") \ + EM(netfs_trace_tcon_ref_free_ipc_fail, "FRE Ipc-F ") \ + EM(netfs_trace_tcon_ref_free_reconnect_server, "FRE Reconn") \ + EM(netfs_trace_tcon_ref_get_cancelled_close, "GET Cn-Cls") \ + EM(netfs_trace_tcon_ref_get_dfs_refer, "GET DfsRef") \ + EM(netfs_trace_tcon_ref_get_find, "GET Find ") \ + EM(netfs_trace_tcon_ref_get_find_sess_tcon, "GET FndSes") \ + EM(netfs_trace_tcon_ref_get_reconnect_server, "GET Reconn") \ + EM(netfs_trace_tcon_ref_new, "NEW ") \ + EM(netfs_trace_tcon_ref_new_ipc, "NEW Ipc ") \ + EM(netfs_trace_tcon_ref_new_reconnect_server, "NEW Reconn") \ + EM(netfs_trace_tcon_ref_put_cancelled_close, "PUT Cn-Cls") \ + EM(netfs_trace_tcon_ref_put_cancelled_close_fid, "PUT Cn-Fid") \ + EM(netfs_trace_tcon_ref_put_cancelled_mid, "PUT Cn-Mid") \ + EM(netfs_trace_tcon_ref_put_mnt_ctx, "PUT MntCtx") \ + EM(netfs_trace_tcon_ref_put_reconnect_server, "PUT Reconn") \ + EM(netfs_trace_tcon_ref_put_tlink, "PUT Tlink ") \ + EM(netfs_trace_tcon_ref_see_cancelled_close, "SEE Cn-Cls") \ + EM(netfs_trace_tcon_ref_see_fscache_collision, "SEE FV-CO!") \ + EM(netfs_trace_tcon_ref_see_fscache_okay, "SEE FV-Ok ") \ + EM(netfs_trace_tcon_ref_see_fscache_relinq, "SEE FV-Rlq") \ + E_(netfs_trace_tcon_ref_see_umount, "SEE Umount") + +#undef EM +#undef E_ + +/* + * Define those tracing enums. + */ +#ifndef __SMB3_DECLARE_TRACE_ENUMS_ONCE_ONLY +#define __SMB3_DECLARE_TRACE_ENUMS_ONCE_ONLY + +#define EM(a, b) a, +#define E_(a, b) a + +enum smb3_tcon_ref_trace { smb3_tcon_ref_traces } __mode(byte); + +#undef EM +#undef E_ +#endif + +/* + * Export enum symbols via userspace. + */ +#define EM(a, b) TRACE_DEFINE_ENUM(a); +#define E_(a, b) TRACE_DEFINE_ENUM(a); + +smb3_tcon_ref_traces; + +#undef EM +#undef E_ + +/* + * Now redefine the EM() and E_() macros to map the enums to the strings that + * will be printed in the output. */ +#define EM(a, b) { a, b }, +#define E_(a, b) { a, b } /* For logging errors in read or write */ DECLARE_EVENT_CLASS(smb3_rw_err_class, @@ -1125,6 +1189,30 @@ DEFINE_SMB3_CREDIT_EVENT(waitff_credits); DEFINE_SMB3_CREDIT_EVENT(overflow_credits); DEFINE_SMB3_CREDIT_EVENT(set_credits); + +TRACE_EVENT(smb3_tcon_ref, + TP_PROTO(unsigned int tcon_debug_id, int ref, + enum smb3_tcon_ref_trace trace), + TP_ARGS(tcon_debug_id, ref, trace), + TP_STRUCT__entry( + __field(unsigned int, tcon) + __field(int, ref) + __field(enum smb3_tcon_ref_trace, trace) + ), + TP_fast_assign( + __entry->tcon = tcon_debug_id; + __entry->ref = ref; + __entry->trace = trace; + ), + TP_printk("TC=%08x %s r=%u", + __entry->tcon, + __print_symbolic(__entry->trace, smb3_tcon_ref_traces), + __entry->ref) + ); + + +#undef EM +#undef E_ #endif /* _CIFS_TRACE_H */ #undef TRACE_INCLUDE_PATH From 18d86965e31f9be4d477da0744a7cdc9815858de Mon Sep 17 00:00:00 2001 From: Paulo Alcantara Date: Fri, 19 Apr 2024 12:05:07 -0300 Subject: [PATCH 208/606] smb: client: fix rename(2) regression against samba After commit 2c7d399e551c ("smb: client: reuse file lease key in compound operations") the client started reusing lease keys for rename, unlink and set path size operations to prevent it from breaking its own leases and thus causing unnecessary lease breaks to same connection. The implementation relies on positive dentries and cifsInodeInfo::lease_granted to decide whether reusing lease keys for the compound requests. cifsInodeInfo::lease_granted was introduced by commit 0ab95c2510b6 ("Defer close only when lease is enabled.") to indicate whether lease caching is granted for a specific file, but that can only happen until file is open, so cifsInodeInfo::lease_granted was left uninitialised in ->alloc_inode and then client started sending random lease keys for files that hadn't any leases. This fixes the following test case against samba: mount.cifs //srv/share /mnt/1 -o ...,nosharesock mount.cifs //srv/share /mnt/2 -o ...,nosharesock touch /mnt/1/foo; tail -f /mnt/1/foo & pid=$! mv /mnt/2/foo /mnt/2/bar # fails with -EIO kill $pid Fixes: 0ab95c2510b6 ("Defer close only when lease is enabled.") Signed-off-by: Paulo Alcantara (Red Hat) Signed-off-by: Steve French --- fs/smb/client/cifsfs.c | 1 + 1 file changed, 1 insertion(+) diff --git a/fs/smb/client/cifsfs.c b/fs/smb/client/cifsfs.c index 30781789dfd9f..39277c37185ca 100644 --- a/fs/smb/client/cifsfs.c +++ b/fs/smb/client/cifsfs.c @@ -389,6 +389,7 @@ cifs_alloc_inode(struct super_block *sb) * server, can not assume caching of file data or metadata. */ cifs_set_oplock_level(cifs_inode, 0); + cifs_inode->lease_granted = false; cifs_inode->flags = 0; spin_lock_init(&cifs_inode->writers_lock); cifs_inode->writers = 0; From 32ac501957e5f68fe0e4bf88fb4db75cfb8f6566 Mon Sep 17 00:00:00 2001 From: Srinivas Kandagatla Date: Fri, 19 Apr 2024 15:00:12 +0100 Subject: [PATCH 209/606] ASoC: codecs: wsa881x: set clk_stop_mode1 flag WSA881x codecs do not retain the state while clock is stopped, so mark this with clk_stop_mode1 flag. Fixes: a0aab9e1404a ("ASoC: codecs: add wsa881x amplifier support") Signed-off-by: Srinivas Kandagatla Link: https://lore.kernel.org/r/20240419140012.91384-1-srinivas.kandagatla@linaro.org Signed-off-by: Mark Brown --- sound/soc/codecs/wsa881x.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/soc/codecs/wsa881x.c b/sound/soc/codecs/wsa881x.c index 3c025dabaf7a4..1253695bebd86 100644 --- a/sound/soc/codecs/wsa881x.c +++ b/sound/soc/codecs/wsa881x.c @@ -1155,6 +1155,7 @@ static int wsa881x_probe(struct sdw_slave *pdev, pdev->prop.sink_ports = GENMASK(WSA881X_MAX_SWR_PORTS, 0); pdev->prop.sink_dpn_prop = wsa_sink_dpn_prop; pdev->prop.scp_int1_mask = SDW_SCP_INT1_BUS_CLASH | SDW_SCP_INT1_PARITY; + pdev->prop.clk_stop_mode1 = true; gpiod_direction_output(wsa881x->sd_n, !wsa881x->sd_n_val); wsa881x->regmap = devm_regmap_init_sdw(pdev, &wsa881x_regmap_config); From c119f4ede3fa90a9463f50831761c28f989bfb20 Mon Sep 17 00:00:00 2001 From: Namjae Jeon Date: Thu, 11 Apr 2024 23:02:15 +0900 Subject: [PATCH 210/606] ksmbd: fix slab-out-of-bounds in smb2_allocate_rsp_buf If ->ProtocolId is SMB2_TRANSFORM_PROTO_NUM, smb2 request size validation could be skipped. if request size is smaller than sizeof(struct smb2_query_info_req), slab-out-of-bounds read can happen in smb2_allocate_rsp_buf(). This patch allocate response buffer after decrypting transform request. smb3_decrypt_req() will validate transform request size and avoid slab-out-of-bound in smb2_allocate_rsp_buf(). Reported-by: Norbert Szetei Cc: stable@vger.kernel.org Signed-off-by: Namjae Jeon Signed-off-by: Steve French --- fs/smb/server/server.c | 13 +++++-------- 1 file changed, 5 insertions(+), 8 deletions(-) diff --git a/fs/smb/server/server.c b/fs/smb/server/server.c index c0788188aa82f..c67fbc8d6683e 100644 --- a/fs/smb/server/server.c +++ b/fs/smb/server/server.c @@ -167,20 +167,17 @@ static void __handle_ksmbd_work(struct ksmbd_work *work, int rc; bool is_chained = false; - if (conn->ops->allocate_rsp_buf(work)) - return; - if (conn->ops->is_transform_hdr && conn->ops->is_transform_hdr(work->request_buf)) { rc = conn->ops->decrypt_req(work); - if (rc < 0) { - conn->ops->set_rsp_status(work, STATUS_DATA_ERROR); - goto send; - } - + if (rc < 0) + return; work->encrypted = true; } + if (conn->ops->allocate_rsp_buf(work)) + return; + rc = conn->ops->init_rsp_hdr(work); if (rc) { /* either uid or tid is not correct */ From 17cf0c2794bdb6f39671265aa18aea5c22ee8c4a Mon Sep 17 00:00:00 2001 From: Namjae Jeon Date: Fri, 12 Apr 2024 09:45:00 +0900 Subject: [PATCH 211/606] ksmbd: validate request buffer size in smb2_allocate_rsp_buf() The response buffer should be allocated in smb2_allocate_rsp_buf before validating request. But the fields in payload as well as smb2 header is used in smb2_allocate_rsp_buf(). This patch add simple buffer size validation to avoid potencial out-of-bounds in request buffer. Cc: stable@vger.kernel.org Signed-off-by: Namjae Jeon Signed-off-by: Steve French --- fs/smb/server/smb2pdu.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/fs/smb/server/smb2pdu.c b/fs/smb/server/smb2pdu.c index 5723bbf372d7c..ee4b2875a021c 100644 --- a/fs/smb/server/smb2pdu.c +++ b/fs/smb/server/smb2pdu.c @@ -535,6 +535,10 @@ int smb2_allocate_rsp_buf(struct ksmbd_work *work) if (cmd == SMB2_QUERY_INFO_HE) { struct smb2_query_info_req *req; + if (get_rfc1002_len(work->request_buf) < + offsetof(struct smb2_query_info_req, OutputBufferLength)) + return -EINVAL; + req = smb2_get_msg(work->request_buf); if ((req->InfoType == SMB2_O_INFO_FILE && (req->FileInfoClass == FILE_FULL_EA_INFORMATION || From 4973b04d3ea577db80c501c5f14e68ec69fe1794 Mon Sep 17 00:00:00 2001 From: Marios Makassikis Date: Mon, 15 Apr 2024 15:12:48 +0200 Subject: [PATCH 212/606] ksmbd: clear RENAME_NOREPLACE before calling vfs_rename File overwrite case is explicitly handled, so it is not necessary to pass RENAME_NOREPLACE to vfs_rename. Clearing the flag fixes rename operations when the share is a ntfs-3g mount. The latter uses an older version of fuse with no support for flags in the ->rename op. Cc: stable@vger.kernel.org Signed-off-by: Marios Makassikis Acked-by: Namjae Jeon Signed-off-by: Steve French --- fs/smb/server/vfs.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/fs/smb/server/vfs.c b/fs/smb/server/vfs.c index 22f0f3db3ac92..51b1b0bed616e 100644 --- a/fs/smb/server/vfs.c +++ b/fs/smb/server/vfs.c @@ -754,10 +754,15 @@ int ksmbd_vfs_rename(struct ksmbd_work *work, const struct path *old_path, goto out4; } + /* + * explicitly handle file overwrite case, for compatibility with + * filesystems that may not support rename flags (e.g: fuse) + */ if ((flags & RENAME_NOREPLACE) && d_is_positive(new_dentry)) { err = -EEXIST; goto out4; } + flags &= ~(RENAME_NOREPLACE); if (old_child == trap) { err = -EINVAL; From 0268a7cc7fdc47d90b6c18859de7718d5059f6f1 Mon Sep 17 00:00:00 2001 From: Namjae Jeon Date: Fri, 19 Apr 2024 23:46:34 +0900 Subject: [PATCH 213/606] ksmbd: common: use struct_group_attr instead of struct_group for network_open_info 4byte padding cause the connection issue with the applications of MacOS. smb2_close response size increases by 4 bytes by padding, And the smb client of MacOS check it and stop the connection. This patch use struct_group_attr instead of struct_group for network_open_info to use __packed to avoid padding. Fixes: 0015eb6e1238 ("smb: client, common: fix fortify warnings") Cc: stable@vger.kernel.org Signed-off-by: Namjae Jeon Signed-off-by: Steve French --- fs/smb/common/smb2pdu.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/smb/common/smb2pdu.h b/fs/smb/common/smb2pdu.h index 1b594307c9d5a..202ff91281560 100644 --- a/fs/smb/common/smb2pdu.h +++ b/fs/smb/common/smb2pdu.h @@ -711,7 +711,7 @@ struct smb2_close_rsp { __le16 StructureSize; /* 60 */ __le16 Flags; __le32 Reserved; - struct_group(network_open_info, + struct_group_attr(network_open_info, __packed, __le64 CreationTime; __le64 LastAccessTime; __le64 LastWriteTime; From e9d8c2f95ab8acaf3f4d4a53682a4afa3c263692 Mon Sep 17 00:00:00 2001 From: Namjae Jeon Date: Sat, 20 Apr 2024 09:17:58 +0900 Subject: [PATCH 214/606] ksmbd: add continuous availability share parameter If capabilities of the share is not SMB2_SHARE_CAP_CONTINUOUS_AVAILABILITY, ksmbd should not grant a persistent handle to the client. This patch add continuous availability share parameter to control it. Signed-off-by: Namjae Jeon Signed-off-by: Steve French --- fs/smb/server/ksmbd_netlink.h | 35 ++++++++++++++++++----------------- fs/smb/server/smb2pdu.c | 11 +++++++++-- 2 files changed, 27 insertions(+), 19 deletions(-) diff --git a/fs/smb/server/ksmbd_netlink.h b/fs/smb/server/ksmbd_netlink.h index 686b321c5a8bb..f4e55199938d5 100644 --- a/fs/smb/server/ksmbd_netlink.h +++ b/fs/smb/server/ksmbd_netlink.h @@ -340,23 +340,24 @@ enum KSMBD_TREE_CONN_STATUS { /* * Share config flags. */ -#define KSMBD_SHARE_FLAG_INVALID (0) -#define KSMBD_SHARE_FLAG_AVAILABLE BIT(0) -#define KSMBD_SHARE_FLAG_BROWSEABLE BIT(1) -#define KSMBD_SHARE_FLAG_WRITEABLE BIT(2) -#define KSMBD_SHARE_FLAG_READONLY BIT(3) -#define KSMBD_SHARE_FLAG_GUEST_OK BIT(4) -#define KSMBD_SHARE_FLAG_GUEST_ONLY BIT(5) -#define KSMBD_SHARE_FLAG_STORE_DOS_ATTRS BIT(6) -#define KSMBD_SHARE_FLAG_OPLOCKS BIT(7) -#define KSMBD_SHARE_FLAG_PIPE BIT(8) -#define KSMBD_SHARE_FLAG_HIDE_DOT_FILES BIT(9) -#define KSMBD_SHARE_FLAG_INHERIT_OWNER BIT(10) -#define KSMBD_SHARE_FLAG_STREAMS BIT(11) -#define KSMBD_SHARE_FLAG_FOLLOW_SYMLINKS BIT(12) -#define KSMBD_SHARE_FLAG_ACL_XATTR BIT(13) -#define KSMBD_SHARE_FLAG_UPDATE BIT(14) -#define KSMBD_SHARE_FLAG_CROSSMNT BIT(15) +#define KSMBD_SHARE_FLAG_INVALID (0) +#define KSMBD_SHARE_FLAG_AVAILABLE BIT(0) +#define KSMBD_SHARE_FLAG_BROWSEABLE BIT(1) +#define KSMBD_SHARE_FLAG_WRITEABLE BIT(2) +#define KSMBD_SHARE_FLAG_READONLY BIT(3) +#define KSMBD_SHARE_FLAG_GUEST_OK BIT(4) +#define KSMBD_SHARE_FLAG_GUEST_ONLY BIT(5) +#define KSMBD_SHARE_FLAG_STORE_DOS_ATTRS BIT(6) +#define KSMBD_SHARE_FLAG_OPLOCKS BIT(7) +#define KSMBD_SHARE_FLAG_PIPE BIT(8) +#define KSMBD_SHARE_FLAG_HIDE_DOT_FILES BIT(9) +#define KSMBD_SHARE_FLAG_INHERIT_OWNER BIT(10) +#define KSMBD_SHARE_FLAG_STREAMS BIT(11) +#define KSMBD_SHARE_FLAG_FOLLOW_SYMLINKS BIT(12) +#define KSMBD_SHARE_FLAG_ACL_XATTR BIT(13) +#define KSMBD_SHARE_FLAG_UPDATE BIT(14) +#define KSMBD_SHARE_FLAG_CROSSMNT BIT(15) +#define KSMBD_SHARE_FLAG_CONTINUOUS_AVAILABILITY BIT(16) /* * Tree connect request flags. diff --git a/fs/smb/server/smb2pdu.c b/fs/smb/server/smb2pdu.c index ee4b2875a021c..355824151c2d8 100644 --- a/fs/smb/server/smb2pdu.c +++ b/fs/smb/server/smb2pdu.c @@ -1988,7 +1988,12 @@ int smb2_tree_connect(struct ksmbd_work *work) write_unlock(&sess->tree_conns_lock); rsp->StructureSize = cpu_to_le16(16); out_err1: - rsp->Capabilities = 0; + if (server_conf.flags & KSMBD_GLOBAL_FLAG_DURABLE_HANDLE && + test_share_config_flag(share, + KSMBD_SHARE_FLAG_CONTINUOUS_AVAILABILITY)) + rsp->Capabilities = SMB2_SHARE_CAP_CONTINUOUS_AVAILABILITY; + else + rsp->Capabilities = 0; rsp->Reserved = 0; /* default manual caching */ rsp->ShareFlags = SMB2_SHAREFLAG_MANUAL_CACHING; @@ -3502,7 +3507,9 @@ int smb2_open(struct ksmbd_work *work) memcpy(fp->client_guid, conn->ClientGUID, SMB2_CLIENT_GUID_SIZE); if (dh_info.type == DURABLE_REQ_V2 || dh_info.type == DURABLE_REQ) { - if (dh_info.type == DURABLE_REQ_V2 && dh_info.persistent) + if (dh_info.type == DURABLE_REQ_V2 && dh_info.persistent && + test_share_config_flag(work->tcon->share_conf, + KSMBD_SHARE_FLAG_CONTINUOUS_AVAILABILITY)) fp->is_persistent = true; else fp->is_durable = true; From 680d11f6e5427b6af1321932286722d24a8b16c1 Mon Sep 17 00:00:00 2001 From: Yick Xie Date: Fri, 19 Apr 2024 01:06:10 +0800 Subject: [PATCH 215/606] udp: preserve the connected status if only UDP cmsg If "udp_cmsg_send()" returned 0 (i.e. only UDP cmsg), "connected" should not be set to 0. Otherwise it stops the connected socket from using the cached route. Fixes: 2e8de8576343 ("udp: add gso segment cmsg") Signed-off-by: Yick Xie Cc: stable@vger.kernel.org Reviewed-by: Willem de Bruijn Link: https://lore.kernel.org/r/20240418170610.867084-1-yick.xie@gmail.com Signed-off-by: Jakub Kicinski --- net/ipv4/udp.c | 5 +++-- net/ipv6/udp.c | 5 +++-- 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index c02bf011d4a6f..420905be5f30c 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -1123,16 +1123,17 @@ int udp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) if (msg->msg_controllen) { err = udp_cmsg_send(sk, msg, &ipc.gso_size); - if (err > 0) + if (err > 0) { err = ip_cmsg_send(sk, msg, &ipc, sk->sk_family == AF_INET6); + connected = 0; + } if (unlikely(err < 0)) { kfree(ipc.opt); return err; } if (ipc.opt) free = 1; - connected = 0; } if (!ipc.opt) { struct ip_options_rcu *inet_opt; diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index 8b1dd7f512491..1a4cccdd40c9c 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -1474,9 +1474,11 @@ int udpv6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) ipc6.opt = opt; err = udp_cmsg_send(sk, msg, &ipc6.gso_size); - if (err > 0) + if (err > 0) { err = ip6_datagram_send_ctl(sock_net(sk), sk, msg, fl6, &ipc6); + connected = false; + } if (err < 0) { fl6_sock_release(flowlabel); return err; @@ -1488,7 +1490,6 @@ int udpv6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) } if (!(opt->opt_nflen|opt->opt_flen)) opt = NULL; - connected = false; } if (!opt) { opt = txopt_get(np); From 9f898fc2c31fbf0ac5ecd289f528a716464cb005 Mon Sep 17 00:00:00 2001 From: Justin Chen Date: Thu, 18 Apr 2024 11:05:41 -0700 Subject: [PATCH 216/606] net: bcmasp: fix memory leak when bringing down interface When bringing down the TX rings we flush the rings but forget to reclaimed the flushed packets. This leads to a memory leak since we do not free the dma mapped buffers. This also leads to tx control block corruption when bringing down the interface for power management. Fixes: 490cb412007d ("net: bcmasp: Add support for ASP2.0 Ethernet controller") Signed-off-by: Justin Chen Acked-by: Florian Fainelli Reviewed-by: Simon Horman Link: https://lore.kernel.org/r/20240418180541.2271719-1-justin.chen@broadcom.com Signed-off-by: Jakub Kicinski --- .../net/ethernet/broadcom/asp2/bcmasp_intf.c | 21 ++++++++++++------- 1 file changed, 14 insertions(+), 7 deletions(-) diff --git a/drivers/net/ethernet/broadcom/asp2/bcmasp_intf.c b/drivers/net/ethernet/broadcom/asp2/bcmasp_intf.c index 72ea97c5d5d42..82768b0e90262 100644 --- a/drivers/net/ethernet/broadcom/asp2/bcmasp_intf.c +++ b/drivers/net/ethernet/broadcom/asp2/bcmasp_intf.c @@ -436,10 +436,8 @@ static void umac_init(struct bcmasp_intf *intf) umac_wl(intf, 0x800, UMC_RX_MAX_PKT_SZ); } -static int bcmasp_tx_poll(struct napi_struct *napi, int budget) +static int bcmasp_tx_reclaim(struct bcmasp_intf *intf) { - struct bcmasp_intf *intf = - container_of(napi, struct bcmasp_intf, tx_napi); struct bcmasp_intf_stats64 *stats = &intf->stats64; struct device *kdev = &intf->parent->pdev->dev; unsigned long read, released = 0; @@ -482,10 +480,16 @@ static int bcmasp_tx_poll(struct napi_struct *napi, int budget) DESC_RING_COUNT); } - /* Ensure all descriptors have been written to DRAM for the hardware - * to see updated contents. - */ - wmb(); + return released; +} + +static int bcmasp_tx_poll(struct napi_struct *napi, int budget) +{ + struct bcmasp_intf *intf = + container_of(napi, struct bcmasp_intf, tx_napi); + int released = 0; + + released = bcmasp_tx_reclaim(intf); napi_complete(&intf->tx_napi); @@ -797,6 +801,7 @@ static void bcmasp_init_tx(struct bcmasp_intf *intf) intf->tx_spb_dma_read = intf->tx_spb_dma_addr; intf->tx_spb_index = 0; intf->tx_spb_clean_index = 0; + memset(intf->tx_cbs, 0, sizeof(struct bcmasp_tx_cb) * DESC_RING_COUNT); /* Make sure channels are disabled */ tx_spb_ctrl_wl(intf, 0x0, TX_SPB_CTRL_ENABLE); @@ -885,6 +890,8 @@ static void bcmasp_netif_deinit(struct net_device *dev) } while (timeout-- > 0); tx_spb_dma_wl(intf, 0x0, TX_SPB_DMA_FIFO_CTRL); + bcmasp_tx_reclaim(intf); + umac_enable_set(intf, UMC_CMD_TX_EN, 0); phy_stop(dev->phydev); From 976c44af48141cd8595601c0af2a19a43c5b228b Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Thu, 18 Apr 2024 15:46:06 +0200 Subject: [PATCH 217/606] mlxsw: core: Unregister EMAD trap using FORWARD action The device's manual (PRM - Programmer's Reference Manual) classifies the trap that is used to deliver EMAD responses as an "event trap". Among other things, it means that the only actions that can be associated with the trap are TRAP and FORWARD (NOP). Currently, during driver de-initialization the driver unregisters the trap by setting its action to DISCARD, which violates the above guideline. Future firmware versions will prevent such misuses by returning an error. This does not prevent the driver from working, but an error will be printed to the kernel log during module removal / devlink reload: mlxsw_spectrum 0000:03:00.0: Reg cmd access status failed (status=7(bad parameter)) mlxsw_spectrum 0000:03:00.0: Reg cmd access failed (reg_id=7003(hpkt),type=write) Suppress the error message by aligning the driver to the manual and use a FORWARD (NOP) action when unregistering the trap. Fixes: 4ec14b7634b2 ("mlxsw: Add interface to access registers and process events") Cc: Jiri Pirko Cc: Amit Cohen Signed-off-by: Ido Schimmel Reviewed-by: Petr Machata Reviewed-by: Simon Horman Signed-off-by: Petr Machata Link: https://lore.kernel.org/r/753a89e14008fde08cb4a2c1e5f537b81d8eb2d6.1713446092.git.petrm@nvidia.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/mellanox/mlxsw/core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlxsw/core.c b/drivers/net/ethernet/mellanox/mlxsw/core.c index e4d7739bd7c88..4a79c0d7e7ad8 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/core.c +++ b/drivers/net/ethernet/mellanox/mlxsw/core.c @@ -849,7 +849,7 @@ static void mlxsw_emad_rx_listener_func(struct sk_buff *skb, u16 local_port, static const struct mlxsw_listener mlxsw_emad_rx_listener = MLXSW_RXL(mlxsw_emad_rx_listener_func, ETHEMAD, TRAP_TO_CPU, false, - EMAD, DISCARD); + EMAD, FORWARD); static int mlxsw_emad_tlv_enable(struct mlxsw_core *mlxsw_core) { From 7e2050a8366315aeaf0316b3d362e67cf58f3ea8 Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Thu, 18 Apr 2024 15:46:07 +0200 Subject: [PATCH 218/606] mlxsw: core_env: Fix driver initialization with old firmware The driver queries the Management Capabilities Mask (MCAM) register during initialization to understand if it can read up to 128 bytes from transceiver modules. However, not all firmware versions support this register, leading to the driver failing to load. Fix by treating an error in the register query as an indication that the feature is not supported. Fixes: 1f4aea1f72da ("mlxsw: core_env: Read transceiver module EEPROM in 128 bytes chunks") Reported-by: Tim 'mithro' Ansell Signed-off-by: Ido Schimmel Reviewed-by: Petr Machata Signed-off-by: Petr Machata Reviewed-by: Simon Horman Link: https://lore.kernel.org/r/0afa8b2e8bac178f5f88211344429176dcc72281.1713446092.git.petrm@nvidia.com Signed-off-by: Jakub Kicinski --- .../net/ethernet/mellanox/mlxsw/core_env.c | 20 ++++++------------- 1 file changed, 6 insertions(+), 14 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlxsw/core_env.c b/drivers/net/ethernet/mellanox/mlxsw/core_env.c index 53b150b7ae4e7..6c06b05927608 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/core_env.c +++ b/drivers/net/ethernet/mellanox/mlxsw/core_env.c @@ -1357,24 +1357,20 @@ static struct mlxsw_linecards_event_ops mlxsw_env_event_ops = { .got_inactive = mlxsw_env_got_inactive, }; -static int mlxsw_env_max_module_eeprom_len_query(struct mlxsw_env *mlxsw_env) +static void mlxsw_env_max_module_eeprom_len_query(struct mlxsw_env *mlxsw_env) { char mcam_pl[MLXSW_REG_MCAM_LEN]; - bool mcia_128b_supported; + bool mcia_128b_supported = false; int err; mlxsw_reg_mcam_pack(mcam_pl, MLXSW_REG_MCAM_FEATURE_GROUP_ENHANCED_FEATURES); err = mlxsw_reg_query(mlxsw_env->core, MLXSW_REG(mcam), mcam_pl); - if (err) - return err; - - mlxsw_reg_mcam_unpack(mcam_pl, MLXSW_REG_MCAM_MCIA_128B, - &mcia_128b_supported); + if (!err) + mlxsw_reg_mcam_unpack(mcam_pl, MLXSW_REG_MCAM_MCIA_128B, + &mcia_128b_supported); mlxsw_env->max_eeprom_len = mcia_128b_supported ? 128 : 48; - - return 0; } int mlxsw_env_init(struct mlxsw_core *mlxsw_core, @@ -1445,15 +1441,11 @@ int mlxsw_env_init(struct mlxsw_core *mlxsw_core, if (err) goto err_type_set; - err = mlxsw_env_max_module_eeprom_len_query(env); - if (err) - goto err_eeprom_len_query; - + mlxsw_env_max_module_eeprom_len_query(env); env->line_cards[0]->active = true; return 0; -err_eeprom_len_query: err_type_set: mlxsw_env_module_event_disable(env, 0); err_mlxsw_env_module_event_enable: From 773501d01e6bc3f2557882a25679392d982d5f3e Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Thu, 18 Apr 2024 15:46:08 +0200 Subject: [PATCH 219/606] mlxsw: pci: Fix driver initialization with old firmware The driver queries the Management Capabilities Mask (MCAM) register during initialization to understand if a new and deeper reset flow is supported. However, not all firmware versions support this register, leading to the driver failing to load. Fix by treating an error in the register query as an indication that the feature is not supported. Fixes: f257c73e5356 ("mlxsw: pci: Add support for new reset flow") Reported-by: Tim 'mithro' Ansell Signed-off-by: Ido Schimmel Reviewed-by: Petr Machata Reviewed-by: Simon Horman Reviewed-by: Kalesh AP Signed-off-by: Petr Machata Link: https://lore.kernel.org/r/ee968c49d53bac96a4c66d1b09ebbd097d81aca5.1713446092.git.petrm@nvidia.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/mellanox/mlxsw/pci.c | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlxsw/pci.c b/drivers/net/ethernet/mellanox/mlxsw/pci.c index af99bf17eb36d..f42a1b1c93687 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/pci.c +++ b/drivers/net/ethernet/mellanox/mlxsw/pci.c @@ -1530,7 +1530,7 @@ mlxsw_pci_reset(struct mlxsw_pci *mlxsw_pci, const struct pci_device_id *id) { struct pci_dev *pdev = mlxsw_pci->pdev; char mcam_pl[MLXSW_REG_MCAM_LEN]; - bool pci_reset_supported; + bool pci_reset_supported = false; u32 sys_status; int err; @@ -1548,11 +1548,9 @@ mlxsw_pci_reset(struct mlxsw_pci *mlxsw_pci, const struct pci_device_id *id) mlxsw_reg_mcam_pack(mcam_pl, MLXSW_REG_MCAM_FEATURE_GROUP_ENHANCED_FEATURES); err = mlxsw_reg_query(mlxsw_pci->core, MLXSW_REG(mcam), mcam_pl); - if (err) - return err; - - mlxsw_reg_mcam_unpack(mcam_pl, MLXSW_REG_MCAM_PCI_RESET, - &pci_reset_supported); + if (!err) + mlxsw_reg_mcam_unpack(mcam_pl, MLXSW_REG_MCAM_PCI_RESET, + &pci_reset_supported); if (pci_reset_supported) { pci_dbg(pdev, "Starting PCI reset flow\n"); From fcdbc1d7a4b638e5d5668de461f320386f3002aa Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Wed, 17 Apr 2024 15:19:50 -0400 Subject: [PATCH 220/606] bcachefs: Check for journal entries overruning end of sb clean section Fix a missing bounds check in superblock validation. Note that we don't yet have repair code for this case - repair code for individual items is generally low priority, since the whole superblock is checksummed, validated prior to write, and we have backups. Reported-by: lei lu Signed-off-by: Kent Overstreet --- fs/bcachefs/sb-clean.c | 8 ++++++++ fs/bcachefs/sb-errors_types.h | 3 ++- 2 files changed, 10 insertions(+), 1 deletion(-) diff --git a/fs/bcachefs/sb-clean.c b/fs/bcachefs/sb-clean.c index 5980ba2563fe9..35ca3f138de6f 100644 --- a/fs/bcachefs/sb-clean.c +++ b/fs/bcachefs/sb-clean.c @@ -29,6 +29,14 @@ int bch2_sb_clean_validate_late(struct bch_fs *c, struct bch_sb_field_clean *cle for (entry = clean->start; entry < (struct jset_entry *) vstruct_end(&clean->field); entry = vstruct_next(entry)) { + if (vstruct_end(entry) > vstruct_end(&clean->field)) { + bch_err(c, "journal entry (u64s %u) overran end of superblock clean section (u64s %u) by %zu", + le16_to_cpu(entry->u64s), le32_to_cpu(clean->field.u64s), + (u64 *) vstruct_end(entry) - (u64 *) vstruct_end(&clean->field)); + bch2_sb_error_count(c, BCH_FSCK_ERR_sb_clean_entry_overrun); + return -BCH_ERR_fsck_repair_unimplemented; + } + ret = bch2_journal_entry_validate(c, NULL, entry, le16_to_cpu(c->disk_sb.sb->version), BCH_SB_BIG_ENDIAN(c->disk_sb.sb), diff --git a/fs/bcachefs/sb-errors_types.h b/fs/bcachefs/sb-errors_types.h index 4ca6e7b0d8aae..06c7a644f4a44 100644 --- a/fs/bcachefs/sb-errors_types.h +++ b/fs/bcachefs/sb-errors_types.h @@ -271,7 +271,8 @@ x(btree_root_unreadable_and_scan_found_nothing, 263) \ x(snapshot_node_missing, 264) \ x(dup_backpointer_to_bad_csum_extent, 265) \ - x(btree_bitmap_not_marked, 266) + x(btree_bitmap_not_marked, 266) \ + x(sb_clean_entry_overrun, 267) enum bch_sb_error_id { #define x(t, n) BCH_FSCK_ERR_##t = n, From ec438ac59d7a8bd7e76d3e1201d55071be484626 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Sat, 20 Apr 2024 00:31:32 -0400 Subject: [PATCH 221/606] bcachefs: Fix missing call to bch2_fs_allocator_background_exit() Signed-off-by: Kent Overstreet --- fs/bcachefs/super.c | 1 + 1 file changed, 1 insertion(+) diff --git a/fs/bcachefs/super.c b/fs/bcachefs/super.c index 8daf80a38d60c..88e214c609bb2 100644 --- a/fs/bcachefs/super.c +++ b/fs/bcachefs/super.c @@ -544,6 +544,7 @@ static void __bch2_fs_free(struct bch_fs *c) bch2_find_btree_nodes_exit(&c->found_btree_nodes); bch2_free_pending_node_rewrites(c); + bch2_fs_allocator_background_exit(c); bch2_fs_sb_errors_exit(c); bch2_fs_counters_exit(c); bch2_fs_snapshots_exit(c); From 32cf5a4eda464d76d553ee3f1b06c4d33d796c52 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Fri, 19 Apr 2024 11:51:12 -0400 Subject: [PATCH 222/606] Revert "svcrdma: Add Write chunk WRs to the RPC's Send WR chain" Performance regression reported with NFS/RDMA using Omnipath, bisected to commit e084ee673c77 ("svcrdma: Add Write chunk WRs to the RPC's Send WR chain"). Tracing on the server reports: nfsd-7771 [060] 1758.891809: svcrdma_sq_post_err: cq.id=205 cid=226 sc_sq_avail=13643/851 status=-12 sq_post_err reports ENOMEM, and the rdma->sc_sq_avail (13643) is larger than rdma->sc_sq_depth (851). The number of available Send Queue entries is always supposed to be smaller than the Send Queue depth. That seems like a Send Queue accounting bug in svcrdma. As it's getting to be late in the 6.9-rc cycle, revert this commit. It can be revisited in a subsequent kernel release. Link: https://bugzilla.kernel.org/show_bug.cgi?id=218743 Fixes: e084ee673c77 ("svcrdma: Add Write chunk WRs to the RPC's Send WR chain") Signed-off-by: Chuck Lever --- include/linux/sunrpc/svc_rdma.h | 13 +--- net/sunrpc/xprtrdma/svc_rdma_rw.c | 86 +++++++-------------------- net/sunrpc/xprtrdma/svc_rdma_sendto.c | 5 +- 3 files changed, 26 insertions(+), 78 deletions(-) diff --git a/include/linux/sunrpc/svc_rdma.h b/include/linux/sunrpc/svc_rdma.h index 24cd199dd6f3a..d33bab33099ab 100644 --- a/include/linux/sunrpc/svc_rdma.h +++ b/include/linux/sunrpc/svc_rdma.h @@ -210,7 +210,6 @@ struct svc_rdma_recv_ctxt { */ struct svc_rdma_write_info { struct svcxprt_rdma *wi_rdma; - struct list_head wi_list; const struct svc_rdma_chunk *wi_chunk; @@ -239,10 +238,7 @@ struct svc_rdma_send_ctxt { struct ib_cqe sc_cqe; struct xdr_buf sc_hdrbuf; struct xdr_stream sc_stream; - - struct list_head sc_write_info_list; struct svc_rdma_write_info sc_reply_info; - void *sc_xprt_buf; int sc_page_count; int sc_cur_sge_no; @@ -274,14 +270,11 @@ extern void svc_rdma_cc_init(struct svcxprt_rdma *rdma, extern void svc_rdma_cc_release(struct svcxprt_rdma *rdma, struct svc_rdma_chunk_ctxt *cc, enum dma_data_direction dir); -extern void svc_rdma_write_chunk_release(struct svcxprt_rdma *rdma, - struct svc_rdma_send_ctxt *ctxt); extern void svc_rdma_reply_chunk_release(struct svcxprt_rdma *rdma, struct svc_rdma_send_ctxt *ctxt); -extern int svc_rdma_prepare_write_list(struct svcxprt_rdma *rdma, - const struct svc_rdma_pcl *write_pcl, - struct svc_rdma_send_ctxt *sctxt, - const struct xdr_buf *xdr); +extern int svc_rdma_send_write_list(struct svcxprt_rdma *rdma, + const struct svc_rdma_recv_ctxt *rctxt, + const struct xdr_buf *xdr); extern int svc_rdma_prepare_reply_chunk(struct svcxprt_rdma *rdma, const struct svc_rdma_pcl *write_pcl, const struct svc_rdma_pcl *reply_pcl, diff --git a/net/sunrpc/xprtrdma/svc_rdma_rw.c b/net/sunrpc/xprtrdma/svc_rdma_rw.c index f2a100c4c81f1..40797114d50a4 100644 --- a/net/sunrpc/xprtrdma/svc_rdma_rw.c +++ b/net/sunrpc/xprtrdma/svc_rdma_rw.c @@ -230,28 +230,6 @@ static void svc_rdma_write_info_free(struct svc_rdma_write_info *info) queue_work(svcrdma_wq, &info->wi_work); } -/** - * svc_rdma_write_chunk_release - Release Write chunk I/O resources - * @rdma: controlling transport - * @ctxt: Send context that is being released - */ -void svc_rdma_write_chunk_release(struct svcxprt_rdma *rdma, - struct svc_rdma_send_ctxt *ctxt) -{ - struct svc_rdma_write_info *info; - struct svc_rdma_chunk_ctxt *cc; - - while (!list_empty(&ctxt->sc_write_info_list)) { - info = list_first_entry(&ctxt->sc_write_info_list, - struct svc_rdma_write_info, wi_list); - list_del(&info->wi_list); - - cc = &info->wi_cc; - svc_rdma_wake_send_waiters(rdma, cc->cc_sqecount); - svc_rdma_write_info_free(info); - } -} - /** * svc_rdma_reply_chunk_release - Release Reply chunk I/O resources * @rdma: controlling transport @@ -308,11 +286,13 @@ static void svc_rdma_write_done(struct ib_cq *cq, struct ib_wc *wc) struct ib_cqe *cqe = wc->wr_cqe; struct svc_rdma_chunk_ctxt *cc = container_of(cqe, struct svc_rdma_chunk_ctxt, cc_cqe); + struct svc_rdma_write_info *info = + container_of(cc, struct svc_rdma_write_info, wi_cc); switch (wc->status) { case IB_WC_SUCCESS: trace_svcrdma_wc_write(&cc->cc_cid); - return; + break; case IB_WC_WR_FLUSH_ERR: trace_svcrdma_wc_write_flush(wc, &cc->cc_cid); break; @@ -320,11 +300,12 @@ static void svc_rdma_write_done(struct ib_cq *cq, struct ib_wc *wc) trace_svcrdma_wc_write_err(wc, &cc->cc_cid); } - /* The RDMA Write has flushed, so the client won't get - * some of the outgoing RPC message. Signal the loss - * to the client by closing the connection. - */ - svc_xprt_deferred_close(&rdma->sc_xprt); + svc_rdma_wake_send_waiters(rdma, cc->cc_sqecount); + + if (unlikely(wc->status != IB_WC_SUCCESS)) + svc_xprt_deferred_close(&rdma->sc_xprt); + + svc_rdma_write_info_free(info); } /** @@ -620,19 +601,13 @@ static int svc_rdma_xb_write(const struct xdr_buf *xdr, void *data) return xdr->len; } -/* Link Write WRs for @chunk onto @sctxt's WR chain. - */ -static int svc_rdma_prepare_write_chunk(struct svcxprt_rdma *rdma, - struct svc_rdma_send_ctxt *sctxt, - const struct svc_rdma_chunk *chunk, - const struct xdr_buf *xdr) +static int svc_rdma_send_write_chunk(struct svcxprt_rdma *rdma, + const struct svc_rdma_chunk *chunk, + const struct xdr_buf *xdr) { struct svc_rdma_write_info *info; struct svc_rdma_chunk_ctxt *cc; - struct ib_send_wr *first_wr; struct xdr_buf payload; - struct list_head *pos; - struct ib_cqe *cqe; int ret; if (xdr_buf_subsegment(xdr, &payload, chunk->ch_position, @@ -648,25 +623,10 @@ static int svc_rdma_prepare_write_chunk(struct svcxprt_rdma *rdma, if (ret != payload.len) goto out_err; - ret = -EINVAL; - if (unlikely(cc->cc_sqecount > rdma->sc_sq_depth)) - goto out_err; - - first_wr = sctxt->sc_wr_chain; - cqe = &cc->cc_cqe; - list_for_each(pos, &cc->cc_rwctxts) { - struct svc_rdma_rw_ctxt *rwc; - - rwc = list_entry(pos, struct svc_rdma_rw_ctxt, rw_list); - first_wr = rdma_rw_ctx_wrs(&rwc->rw_ctx, rdma->sc_qp, - rdma->sc_port_num, cqe, first_wr); - cqe = NULL; - } - sctxt->sc_wr_chain = first_wr; - sctxt->sc_sqecount += cc->cc_sqecount; - list_add(&info->wi_list, &sctxt->sc_write_info_list); - trace_svcrdma_post_write_chunk(&cc->cc_cid, cc->cc_sqecount); + ret = svc_rdma_post_chunk_ctxt(rdma, cc); + if (ret < 0) + goto out_err; return 0; out_err: @@ -675,27 +635,25 @@ static int svc_rdma_prepare_write_chunk(struct svcxprt_rdma *rdma, } /** - * svc_rdma_prepare_write_list - Construct WR chain for sending Write list + * svc_rdma_send_write_list - Send all chunks on the Write list * @rdma: controlling RDMA transport - * @write_pcl: Write list provisioned by the client - * @sctxt: Send WR resources + * @rctxt: Write list provisioned by the client * @xdr: xdr_buf containing an RPC Reply message * * Returns zero on success, or a negative errno if one or more * Write chunks could not be sent. */ -int svc_rdma_prepare_write_list(struct svcxprt_rdma *rdma, - const struct svc_rdma_pcl *write_pcl, - struct svc_rdma_send_ctxt *sctxt, - const struct xdr_buf *xdr) +int svc_rdma_send_write_list(struct svcxprt_rdma *rdma, + const struct svc_rdma_recv_ctxt *rctxt, + const struct xdr_buf *xdr) { struct svc_rdma_chunk *chunk; int ret; - pcl_for_each_chunk(chunk, write_pcl) { + pcl_for_each_chunk(chunk, &rctxt->rc_write_pcl) { if (!chunk->ch_payload_length) break; - ret = svc_rdma_prepare_write_chunk(rdma, sctxt, chunk, xdr); + ret = svc_rdma_send_write_chunk(rdma, chunk, xdr); if (ret < 0) return ret; } diff --git a/net/sunrpc/xprtrdma/svc_rdma_sendto.c b/net/sunrpc/xprtrdma/svc_rdma_sendto.c index dfca39abd16c8..bb5436b719e05 100644 --- a/net/sunrpc/xprtrdma/svc_rdma_sendto.c +++ b/net/sunrpc/xprtrdma/svc_rdma_sendto.c @@ -142,7 +142,6 @@ svc_rdma_send_ctxt_alloc(struct svcxprt_rdma *rdma) ctxt->sc_send_wr.sg_list = ctxt->sc_sges; ctxt->sc_send_wr.send_flags = IB_SEND_SIGNALED; ctxt->sc_cqe.done = svc_rdma_wc_send; - INIT_LIST_HEAD(&ctxt->sc_write_info_list); ctxt->sc_xprt_buf = buffer; xdr_buf_init(&ctxt->sc_hdrbuf, ctxt->sc_xprt_buf, rdma->sc_max_req_size); @@ -228,7 +227,6 @@ static void svc_rdma_send_ctxt_release(struct svcxprt_rdma *rdma, struct ib_device *device = rdma->sc_cm_id->device; unsigned int i; - svc_rdma_write_chunk_release(rdma, ctxt); svc_rdma_reply_chunk_release(rdma, ctxt); if (ctxt->sc_page_count) @@ -1015,8 +1013,7 @@ int svc_rdma_sendto(struct svc_rqst *rqstp) if (!p) goto put_ctxt; - ret = svc_rdma_prepare_write_list(rdma, &rctxt->rc_write_pcl, sctxt, - &rqstp->rq_res); + ret = svc_rdma_send_write_list(rdma, rctxt, &rqstp->rq_res); if (ret < 0) goto put_ctxt; From 6e4d9bd110e293513c3c2a3ff2dfa0a0735699e0 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Sat, 20 Apr 2024 15:13:20 -0400 Subject: [PATCH 223/606] bcachefs: bkey_cached.btree_trans_barrier_seq needs to be a ulong this stores the SRCU sequence number, which we use to check if an SRCU barrier has elapsed; this is a partial fix for the key cache shrinker not actually freeing. Signed-off-by: Kent Overstreet --- fs/bcachefs/btree_types.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/bcachefs/btree_types.h b/fs/bcachefs/btree_types.h index e0c982a4195c7..c69b233c41bb3 100644 --- a/fs/bcachefs/btree_types.h +++ b/fs/bcachefs/btree_types.h @@ -321,9 +321,9 @@ struct bkey_cached { struct btree_bkey_cached_common c; unsigned long flags; + unsigned long btree_trans_barrier_seq; u16 u64s; bool valid; - u32 btree_trans_barrier_seq; struct bkey_cached_key key; struct rhash_head hash; From adfe9357c39e251ffe22ceaa1edb4b7662ed76e6 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Sat, 20 Apr 2024 15:35:40 -0400 Subject: [PATCH 224/606] bcachefs: Tweak btree key cache shrinker so it actually frees Freeing key cache items is a multi stage process; we need to wait for an SRCU grace period to elapse, and we handle this ourselves - partially to avoid callback overhead, but primarily so that when allocating we can first allocate from the freed items waiting for an SRCU grace period. Previously, the shrinker was counting the items on the 'waiting for SRCU grace period' lists as items being scanned, but this meant that too many items waiting for an SRCU grace period could prevent it from doing any work at all. After this, we're seeing that items skipped due to the accessed bit are the main cause of the shrinker not making any progress, and we actually want the key cache shrinker to run quite aggressively because reclaimed items will still generally be found (more compactly) in the btree node cache - so we also tweak the shrinker to not count those against nr_to_scan. Signed-off-by: Kent Overstreet --- fs/bcachefs/btree_key_cache.c | 19 ++++--------------- 1 file changed, 4 insertions(+), 15 deletions(-) diff --git a/fs/bcachefs/btree_key_cache.c b/fs/bcachefs/btree_key_cache.c index 88a3582a32757..e8c1c530cd95f 100644 --- a/fs/bcachefs/btree_key_cache.c +++ b/fs/bcachefs/btree_key_cache.c @@ -842,8 +842,6 @@ static unsigned long bch2_btree_key_cache_scan(struct shrinker *shrink, * Newest freed entries are at the end of the list - once we hit one * that's too new to be freed, we can bail out: */ - scanned += bc->nr_freed_nonpcpu; - list_for_each_entry_safe(ck, t, &bc->freed_nonpcpu, list) { if (!poll_state_synchronize_srcu(&c->btree_trans_barrier, ck->btree_trans_barrier_seq)) @@ -857,11 +855,6 @@ static unsigned long bch2_btree_key_cache_scan(struct shrinker *shrink, bc->nr_freed_nonpcpu--; } - if (scanned >= nr) - goto out; - - scanned += bc->nr_freed_pcpu; - list_for_each_entry_safe(ck, t, &bc->freed_pcpu, list) { if (!poll_state_synchronize_srcu(&c->btree_trans_barrier, ck->btree_trans_barrier_seq)) @@ -875,9 +868,6 @@ static unsigned long bch2_btree_key_cache_scan(struct shrinker *shrink, bc->nr_freed_pcpu--; } - if (scanned >= nr) - goto out; - rcu_read_lock(); tbl = rht_dereference_rcu(bc->table.tbl, &bc->table); if (bc->shrink_iter >= tbl->size) @@ -893,12 +883,12 @@ static unsigned long bch2_btree_key_cache_scan(struct shrinker *shrink, next = rht_dereference_bucket_rcu(pos->next, tbl, bc->shrink_iter); ck = container_of(pos, struct bkey_cached, hash); - if (test_bit(BKEY_CACHED_DIRTY, &ck->flags)) + if (test_bit(BKEY_CACHED_DIRTY, &ck->flags)) { goto next; - - if (test_bit(BKEY_CACHED_ACCESSED, &ck->flags)) + } else if (test_bit(BKEY_CACHED_ACCESSED, &ck->flags)) { clear_bit(BKEY_CACHED_ACCESSED, &ck->flags); - else if (bkey_cached_lock_for_evict(ck)) { + goto next; + } else if (bkey_cached_lock_for_evict(ck)) { bkey_cached_evict(bc, ck); bkey_cached_free(bc, ck); } @@ -916,7 +906,6 @@ static unsigned long bch2_btree_key_cache_scan(struct shrinker *shrink, } while (scanned < nr && bc->shrink_iter != start); rcu_read_unlock(); -out: memalloc_nofs_restore(flags); srcu_read_unlock(&c->btree_trans_barrier, srcu_idx); mutex_unlock(&bc->lock); From 85ab365f7cdf2b2a713823a93e7e5e94f0529627 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Fri, 19 Apr 2024 21:54:32 -0400 Subject: [PATCH 225/606] bcachefs: Fix deadlock in journal write path bch2_journal_write() was incorrectly waiting on earlier journal writes synchronously; this usually worked because most of the time we'd be running in the context of a thread that did a journal_buf_put(), but sometimes we'd be running out of the same workqueue that completes those prior journal writes. Additionally, this makes sure to punt to a workqueue before submitting preflushes - we really don't want to be calling submit_bio() in the main transaction commit path. Signed-off-by: Kent Overstreet --- fs/bcachefs/journal_io.c | 60 ++++++++++++++++++++++++++++------------ 1 file changed, 42 insertions(+), 18 deletions(-) diff --git a/fs/bcachefs/journal_io.c b/fs/bcachefs/journal_io.c index 9aa28b52ab926..eb1f9d6f5a196 100644 --- a/fs/bcachefs/journal_io.c +++ b/fs/bcachefs/journal_io.c @@ -1723,7 +1723,7 @@ static void journal_write_endio(struct bio *bio) percpu_ref_put(&ca->io_ref); } -static CLOSURE_CALLBACK(do_journal_write) +static CLOSURE_CALLBACK(journal_write_submit) { closure_type(w, struct journal_buf, io); struct journal *j = container_of(w, struct journal, buf[w->idx]); @@ -1768,6 +1768,44 @@ static CLOSURE_CALLBACK(do_journal_write) continue_at(cl, journal_write_done, j->wq); } +static CLOSURE_CALLBACK(journal_write_preflush) +{ + closure_type(w, struct journal_buf, io); + struct journal *j = container_of(w, struct journal, buf[w->idx]); + struct bch_fs *c = container_of(j, struct bch_fs, journal); + + if (j->seq_ondisk + 1 != le64_to_cpu(w->data->seq)) { + spin_lock(&j->lock); + closure_wait(&j->async_wait, cl); + spin_unlock(&j->lock); + + continue_at(cl, journal_write_preflush, j->wq); + return; + } + + if (w->separate_flush) { + for_each_rw_member(c, ca) { + percpu_ref_get(&ca->io_ref); + + struct journal_device *ja = &ca->journal; + struct bio *bio = &ja->bio[w->idx]->bio; + bio_reset(bio, ca->disk_sb.bdev, + REQ_OP_WRITE|REQ_SYNC|REQ_META|REQ_PREFLUSH); + bio->bi_end_io = journal_write_endio; + bio->bi_private = ca; + closure_bio_submit(bio, cl); + } + + continue_at(cl, journal_write_submit, j->wq); + } else { + /* + * no need to punt to another work item if we're not waiting on + * preflushes + */ + journal_write_submit(&cl->work); + } +} + static int bch2_journal_write_prep(struct journal *j, struct journal_buf *w) { struct bch_fs *c = container_of(j, struct bch_fs, journal); @@ -2033,23 +2071,9 @@ CLOSURE_CALLBACK(bch2_journal_write) goto err; if (!JSET_NO_FLUSH(w->data)) - closure_wait_event(&j->async_wait, j->seq_ondisk + 1 == le64_to_cpu(w->data->seq)); - - if (!JSET_NO_FLUSH(w->data) && w->separate_flush) { - for_each_rw_member(c, ca) { - percpu_ref_get(&ca->io_ref); - - struct journal_device *ja = &ca->journal; - struct bio *bio = &ja->bio[w->idx]->bio; - bio_reset(bio, ca->disk_sb.bdev, - REQ_OP_WRITE|REQ_SYNC|REQ_META|REQ_PREFLUSH); - bio->bi_end_io = journal_write_endio; - bio->bi_private = ca; - closure_bio_submit(bio, cl); - } - } - - continue_at(cl, do_journal_write, j->wq); + continue_at(cl, journal_write_preflush, j->wq); + else + continue_at(cl, journal_write_submit, j->wq); return; no_io: continue_at(cl, journal_write_done, j->wq); From 0e42f381193d7f9b47922f1c4308e7729a45ba13 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Sat, 20 Apr 2024 22:26:47 -0400 Subject: [PATCH 226/606] bcachefs: Fix inode early destruction path discard_new_inode() is the wrong interface to use when we need to free an inode that was never inserted into the inode hash table; we can bypass the whole iput() -> evict() path and replace it with __destroy_inode(); kmem_cache_free() - this fixes a WARN_ON() about I_NEW. Signed-off-by: Kent Overstreet --- fs/bcachefs/fs.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/fs/bcachefs/fs.c b/fs/bcachefs/fs.c index b5ea9fa1259d1..fce690007edfc 100644 --- a/fs/bcachefs/fs.c +++ b/fs/bcachefs/fs.c @@ -188,7 +188,8 @@ static struct bch_inode_info *bch2_inode_insert(struct bch_fs *c, struct bch_ino BUG_ON(!old); if (unlikely(old != inode)) { - discard_new_inode(&inode->v); + __destroy_inode(&inode->v); + kmem_cache_free(bch2_inode_cache, inode); inode = old; } else { mutex_lock(&c->vfs_inodes_lock); @@ -225,8 +226,10 @@ static struct bch_inode_info *bch2_new_inode(struct btree_trans *trans) if (unlikely(!inode)) { int ret = drop_locks_do(trans, (inode = to_bch_ei(new_inode(c->vfs_sb))) ? 0 : -ENOMEM); - if (ret && inode) - discard_new_inode(&inode->v); + if (ret && inode) { + __destroy_inode(&inode->v); + kmem_cache_free(bch2_inode_cache, inode); + } if (ret) return ERR_PTR(ret); } From e027b71762e84ee9d4ba9ad5401b956b9e83ed2a Mon Sep 17 00:00:00 2001 From: Andrei Simion Date: Thu, 4 Apr 2024 15:38:23 +0300 Subject: [PATCH 227/606] ARM: dts: microchip: at91-sama7g5ek: Replace regulator-suspend-voltage with the valid property By checking the pmic node with microchip,mcp16502.yaml# 'regulator-suspend-voltage' does not match any of the regexes 'pinctrl-[0-9]+' from schema microchip,mcp16502.yaml# which inherits regulator.yaml#. So replace regulator-suspend-voltage with regulator-suspend-microvolt to avoid the inconsitency. Fixes: 85b1304b9daa ("ARM: dts: at91: sama7g5ek: set regulator voltages for standby state") Signed-off-by: Andrei Simion Acked-by: Nicolas Ferre Link: https://lore.kernel.org/r/20240404123824.19182-2-andrei.simion@microchip.com [claudiu.beznea: added a dot before starting the last sentence in commit description] Signed-off-by: Claudiu Beznea --- arch/arm/boot/dts/microchip/at91-sama7g5ek.dts | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/arch/arm/boot/dts/microchip/at91-sama7g5ek.dts b/arch/arm/boot/dts/microchip/at91-sama7g5ek.dts index 217e9b96c61e5..20b2497657ae4 100644 --- a/arch/arm/boot/dts/microchip/at91-sama7g5ek.dts +++ b/arch/arm/boot/dts/microchip/at91-sama7g5ek.dts @@ -293,7 +293,7 @@ regulator-state-standby { regulator-on-in-suspend; - regulator-suspend-voltage = <1150000>; + regulator-suspend-microvolt = <1150000>; regulator-mode = <4>; }; @@ -314,7 +314,7 @@ regulator-state-standby { regulator-on-in-suspend; - regulator-suspend-voltage = <1050000>; + regulator-suspend-microvolt = <1050000>; regulator-mode = <4>; }; @@ -331,7 +331,7 @@ regulator-always-on; regulator-state-standby { - regulator-suspend-voltage = <1800000>; + regulator-suspend-microvolt = <1800000>; regulator-on-in-suspend; }; @@ -346,7 +346,7 @@ regulator-max-microvolt = <3700000>; regulator-state-standby { - regulator-suspend-voltage = <1800000>; + regulator-suspend-microvolt = <1800000>; regulator-on-in-suspend; }; From 1fe5e0a31e6202025a100fc08cd5902f6abbaaba Mon Sep 17 00:00:00 2001 From: Andrei Simion Date: Thu, 4 Apr 2024 15:38:24 +0300 Subject: [PATCH 228/606] ARM: dts: microchip: at91-sama7g54_curiosity: Replace regulator-suspend-voltage with the valid property By checking the pmic node with microchip,mcp16502.yaml# 'regulator-suspend-voltage' does not match any of the regexes 'pinctrl-[0-9]+' from schema microchip,mcp16502.yaml# which inherits regulator.yaml#. So replace regulator-suspend-voltage with regulator-suspend-microvolt to avoid the inconsitency. Fixes: ebd6591f8ddb ("ARM: dts: microchip: sama7g54_curiosity: Add initial device tree of the board") Signed-off-by: Andrei Simion Acked-by: Nicolas Ferre Link: https://lore.kernel.org/r/20240404123824.19182-3-andrei.simion@microchip.com Signed-off-by: Claudiu Beznea --- arch/arm/boot/dts/microchip/at91-sama7g54_curiosity.dts | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/arch/arm/boot/dts/microchip/at91-sama7g54_curiosity.dts b/arch/arm/boot/dts/microchip/at91-sama7g54_curiosity.dts index 4f609e9e510ef..009d2c8324210 100644 --- a/arch/arm/boot/dts/microchip/at91-sama7g54_curiosity.dts +++ b/arch/arm/boot/dts/microchip/at91-sama7g54_curiosity.dts @@ -242,7 +242,7 @@ regulator-state-standby { regulator-on-in-suspend; - regulator-suspend-voltage = <1150000>; + regulator-suspend-microvolt = <1150000>; regulator-mode = <4>; }; @@ -263,7 +263,7 @@ regulator-state-standby { regulator-on-in-suspend; - regulator-suspend-voltage = <1050000>; + regulator-suspend-microvolt = <1050000>; regulator-mode = <4>; }; @@ -280,7 +280,7 @@ regulator-always-on; regulator-state-standby { - regulator-suspend-voltage = <1800000>; + regulator-suspend-microvolt = <1800000>; regulator-on-in-suspend; }; @@ -296,7 +296,7 @@ regulator-always-on; regulator-state-standby { - regulator-suspend-voltage = <3300000>; + regulator-suspend-microvolt = <3300000>; regulator-on-in-suspend; }; From 10947b276b90df38e60aa3efd6b4b7a4b3c92fab Mon Sep 17 00:00:00 2001 From: Marek Vasut Date: Fri, 5 Apr 2024 22:21:53 +0200 Subject: [PATCH 229/606] arm64: dts: imx8mp: Fix assigned-clocks for second CSI2 The first CSI2 pixel clock are supplied from IMX8MP_CLK_MEDIA_CAM1_PIX_ROOT, the second CSI2 pixel clock are supplied from IMX8MP_CLK_MEDIA_CAM2_PIX_ROOT, both clock are supplied from SYS_PLL2 and configured using assigned-clock DT properties. Each CSI2 DT node configures its IMX8MP_CLK_MEDIA_CAMn_PIX_ROOT clock. This used to be the case until likely a copy-paste error in commit f78835d1e616 ("arm64: dts: imx8mp: reparent MEDIA_MIPI_PHY1_REF to CLK_24M") which changed the second CSI2 node to configure IMX8MP_CLK_MEDIA_CAM1_PIX_ROOT using its assigned-clocks property. Fix the second CSI2 assigned-clock property back to the original correct IMX8MP_CLK_MEDIA_CAM2_PIX_ROOT . Fixes: f78835d1e616 ("arm64: dts: imx8mp: reparent MEDIA_MIPI_PHY1_REF to CLK_24M") Signed-off-by: Marek Vasut Signed-off-by: Shawn Guo --- arch/arm64/boot/dts/freescale/imx8mp.dtsi | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/boot/dts/freescale/imx8mp.dtsi b/arch/arm64/boot/dts/freescale/imx8mp.dtsi index bfc5c81a5bd4e..8141926e4ef14 100644 --- a/arch/arm64/boot/dts/freescale/imx8mp.dtsi +++ b/arch/arm64/boot/dts/freescale/imx8mp.dtsi @@ -1672,7 +1672,7 @@ <&clk IMX8MP_CLK_MEDIA_MIPI_PHY1_REF_ROOT>, <&clk IMX8MP_CLK_MEDIA_AXI_ROOT>; clock-names = "pclk", "wrap", "phy", "axi"; - assigned-clocks = <&clk IMX8MP_CLK_MEDIA_CAM1_PIX>, + assigned-clocks = <&clk IMX8MP_CLK_MEDIA_CAM2_PIX>, <&clk IMX8MP_CLK_MEDIA_MIPI_PHY1_REF>; assigned-clock-parents = <&clk IMX8MP_SYS_PLL2_1000M>, <&clk IMX8MP_CLK_24M>; From e858beeddfa3a400844c0e22d2118b3b52f1ea5e Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Sun, 21 Apr 2024 23:32:18 -0400 Subject: [PATCH 230/606] bcachefs: If we run merges at a lower watermark, they must be nonblocking Fix another deadlock related to the merge path; previously, we switched to always running merges at a lower watermark (because they are noncritical); but when we run at a lower watermark we also need to run nonblocking or we've introduced a new deadlock. Signed-off-by: Kent Overstreet Reported-and-tested-by: s@m-h.ug --- fs/bcachefs/btree_update_interior.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/fs/bcachefs/btree_update_interior.c b/fs/bcachefs/btree_update_interior.c index 6030c396754f6..b4efd8cc4d1a2 100644 --- a/fs/bcachefs/btree_update_interior.c +++ b/fs/bcachefs/btree_update_interior.c @@ -1960,7 +1960,11 @@ int __bch2_foreground_maybe_merge(struct btree_trans *trans, if ((flags & BCH_WATERMARK_MASK) == BCH_WATERMARK_interior_updates) return 0; - flags &= ~BCH_WATERMARK_MASK; + if ((flags & BCH_WATERMARK_MASK) <= BCH_WATERMARK_reclaim) { + flags &= ~BCH_WATERMARK_MASK; + flags |= BCH_WATERMARK_btree; + flags |= BCH_TRANS_COMMIT_journal_reclaim; + } b = trans->paths[path].l[level].b; From 91112fc6212a9be6f3be636d885df9c17395e1a4 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Sat, 20 Apr 2024 15:44:36 +0200 Subject: [PATCH 231/606] wifi: iwlwifi: mvm: fix link ID management On older (pre-MLD API) devices, we started also calling iwl_mvm_set_link_mapping()/iwl_mvm_unset_link_mapping(), but of course not also iwl_mvm_remove_link(). Since the link ID was only released in iwl_mvm_remove_link() this causes us to run out of FW link IDs very quickly. Fix it by releasing the link ID correctly. Fixes: a8b5d4809b50 ("wifi: iwlwifi: mvm: Configure the link mapping for non-MLD FW") Link: https://msgid.link/20240420154435.dce72db5d5e3.Ic40b454b24f1c7b380a1eedf67455d9cf2f58541@changeid Signed-off-by: Johannes Berg --- drivers/net/wireless/intel/iwlwifi/mvm/link.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/link.c b/drivers/net/wireless/intel/iwlwifi/mvm/link.c index 9f69e04594e49..fe5bba8561d0c 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/link.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/link.c @@ -279,6 +279,7 @@ int iwl_mvm_unset_link_mapping(struct iwl_mvm *mvm, struct ieee80211_vif *vif, RCU_INIT_POINTER(mvm->link_id_to_link_conf[link_info->fw_link_id], NULL); + iwl_mvm_release_fw_link_id(mvm, link_info->fw_link_id); return 0; } @@ -296,7 +297,6 @@ int iwl_mvm_remove_link(struct iwl_mvm *mvm, struct ieee80211_vif *vif, return 0; cmd.link_id = cpu_to_le32(link_info->fw_link_id); - iwl_mvm_release_fw_link_id(mvm, link_info->fw_link_id); link_info->fw_link_id = IWL_MVM_FW_LINK_ID_INVALID; cmd.spec_link_id = link_conf->link_id; cmd.phy_id = cpu_to_le32(FW_CTXT_INVALID); From 02cd2d3be1c31a3fd328ee83e576340d34bc57d9 Mon Sep 17 00:00:00 2001 From: Claudiu Beznea Date: Fri, 19 Apr 2024 09:38:22 +0300 Subject: [PATCH 232/606] pinctrl: renesas: rzg2l: Configure the interrupt type on resume Commit dce0919c83c3 ("irqchip/renesas-rzg2l: Do not set TIEN and TINT source at the same time") removed the setup of TINT from rzg2l_irqc_irq_enable(). To address the spurious interrupt issue the setup of TINT has been moved in rzg2l_tint_set_edge() through rzg2l_disable_tint_and_set_tint_source(). With this, the interrupts are not properly re-configured after a suspend-to-RAM cycle. To address this issue and avoid spurious interrupts while resumming set the interrupt type before enabling it. Fixes: dce0919c83c3 ("irqchip/renesas-rzg2l: Do not set TIEN and TINT source at the same time") Signed-off-by: Claudiu Beznea Reviewed-by: Geert Uytterhoeven Link: https://lore.kernel.org/r/20240419063822.3467424-1-claudiu.beznea.uj@bp.renesas.com Signed-off-by: Geert Uytterhoeven --- drivers/pinctrl/renesas/pinctrl-rzg2l.c | 23 +++++++++++++---------- 1 file changed, 13 insertions(+), 10 deletions(-) diff --git a/drivers/pinctrl/renesas/pinctrl-rzg2l.c b/drivers/pinctrl/renesas/pinctrl-rzg2l.c index 93916553bcc72..20425afc6b331 100644 --- a/drivers/pinctrl/renesas/pinctrl-rzg2l.c +++ b/drivers/pinctrl/renesas/pinctrl-rzg2l.c @@ -2045,7 +2045,9 @@ static void rzg2l_gpio_irq_restore(struct rzg2l_pinctrl *pctrl) for (unsigned int i = 0; i < RZG2L_TINT_MAX_INTERRUPT; i++) { struct irq_data *data; + unsigned long flags; unsigned int virq; + int ret; if (!pctrl->hwirq[i]) continue; @@ -2063,17 +2065,18 @@ static void rzg2l_gpio_irq_restore(struct rzg2l_pinctrl *pctrl) continue; } - if (!irqd_irq_disabled(data)) { - unsigned long flags; - - /* - * This has to be atomically executed to protect against a concurrent - * interrupt. - */ - raw_spin_lock_irqsave(&pctrl->lock.rlock, flags); + /* + * This has to be atomically executed to protect against a concurrent + * interrupt. + */ + raw_spin_lock_irqsave(&pctrl->lock.rlock, flags); + ret = rzg2l_gpio_irq_set_type(data, irqd_get_trigger_type(data)); + if (!ret && !irqd_irq_disabled(data)) rzg2l_gpio_irq_enable(data); - raw_spin_unlock_irqrestore(&pctrl->lock.rlock, flags); - } + raw_spin_unlock_irqrestore(&pctrl->lock.rlock, flags); + + if (ret) + dev_crit(pctrl->dev, "Failed to set IRQ type for virq=%u\n", virq); } } From 0b8fe5bd73249dc20be2e88a12041f8920797b59 Mon Sep 17 00:00:00 2001 From: Daniele Palmas Date: Thu, 18 Apr 2024 13:12:07 +0200 Subject: [PATCH 233/606] net: usb: qmi_wwan: add Telit FN920C04 compositions Add the following Telit FN920C04 compositions: 0x10a0: rmnet + tty (AT/NMEA) + tty (AT) + tty (diag) T: Bus=03 Lev=01 Prnt=03 Port=06 Cnt=01 Dev#= 5 Spd=480 MxCh= 0 D: Ver= 2.01 Cls=00(>ifc ) Sub=00 Prot=00 MxPS=64 #Cfgs= 1 P: Vendor=1bc7 ProdID=10a0 Rev=05.15 S: Manufacturer=Telit Cinterion S: Product=FN920 S: SerialNumber=92c4c4d8 C: #Ifs= 4 Cfg#= 1 Atr=e0 MxPwr=500mA I: If#= 0 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=ff Prot=50 Driver=qmi_wwan E: Ad=01(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=81(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=82(I) Atr=03(Int.) MxPS= 8 Ivl=32ms I: If#= 1 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=ff Prot=60 Driver=option E: Ad=02(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=83(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=84(I) Atr=03(Int.) MxPS= 10 Ivl=32ms I: If#= 2 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=ff Prot=40 Driver=option E: Ad=03(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=85(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=86(I) Atr=03(Int.) MxPS= 10 Ivl=32ms I: If#= 3 Alt= 0 #EPs= 2 Cls=ff(vend.) Sub=ff Prot=30 Driver=option E: Ad=04(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=87(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms 0x10a4: rmnet + tty (AT) + tty (AT) + tty (diag) T: Bus=03 Lev=01 Prnt=03 Port=06 Cnt=01 Dev#= 8 Spd=480 MxCh= 0 D: Ver= 2.01 Cls=00(>ifc ) Sub=00 Prot=00 MxPS=64 #Cfgs= 1 P: Vendor=1bc7 ProdID=10a4 Rev=05.15 S: Manufacturer=Telit Cinterion S: Product=FN920 S: SerialNumber=92c4c4d8 C: #Ifs= 4 Cfg#= 1 Atr=e0 MxPwr=500mA I: If#= 0 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=ff Prot=50 Driver=qmi_wwan E: Ad=01(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=81(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=82(I) Atr=03(Int.) MxPS= 8 Ivl=32ms I: If#= 1 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=ff Prot=40 Driver=option E: Ad=02(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=83(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=84(I) Atr=03(Int.) MxPS= 10 Ivl=32ms I: If#= 2 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=ff Prot=40 Driver=option E: Ad=03(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=85(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=86(I) Atr=03(Int.) MxPS= 10 Ivl=32ms I: If#= 3 Alt= 0 #EPs= 2 Cls=ff(vend.) Sub=ff Prot=30 Driver=option E: Ad=04(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=87(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms 0x10a9: rmnet + tty (AT) + tty (diag) + DPL (data packet logging) + adb T: Bus=03 Lev=01 Prnt=03 Port=06 Cnt=01 Dev#= 9 Spd=480 MxCh= 0 D: Ver= 2.01 Cls=00(>ifc ) Sub=00 Prot=00 MxPS=64 #Cfgs= 1 P: Vendor=1bc7 ProdID=10a9 Rev=05.15 S: Manufacturer=Telit Cinterion S: Product=FN920 S: SerialNumber=92c4c4d8 C: #Ifs= 5 Cfg#= 1 Atr=e0 MxPwr=500mA I: If#= 0 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=ff Prot=50 Driver=qmi_wwan E: Ad=01(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=81(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=82(I) Atr=03(Int.) MxPS= 8 Ivl=32ms I: If#= 1 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=ff Prot=40 Driver=option E: Ad=02(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=83(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=84(I) Atr=03(Int.) MxPS= 10 Ivl=32ms I: If#= 2 Alt= 0 #EPs= 2 Cls=ff(vend.) Sub=ff Prot=30 Driver=option E: Ad=03(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=85(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms I: If#= 3 Alt= 0 #EPs= 1 Cls=ff(vend.) Sub=ff Prot=80 Driver=(none) E: Ad=86(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms I: If#= 4 Alt= 0 #EPs= 2 Cls=ff(vend.) Sub=42 Prot=01 Driver=(none) E: Ad=04(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=87(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms Signed-off-by: Daniele Palmas Signed-off-by: David S. Miller --- drivers/net/usb/qmi_wwan.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/net/usb/qmi_wwan.c b/drivers/net/usb/qmi_wwan.c index edc34402e787f..a5469cf5cf670 100644 --- a/drivers/net/usb/qmi_wwan.c +++ b/drivers/net/usb/qmi_wwan.c @@ -1368,6 +1368,9 @@ static const struct usb_device_id products[] = { {QMI_QUIRK_SET_DTR(0x1bc7, 0x1060, 2)}, /* Telit LN920 */ {QMI_QUIRK_SET_DTR(0x1bc7, 0x1070, 2)}, /* Telit FN990 */ {QMI_QUIRK_SET_DTR(0x1bc7, 0x1080, 2)}, /* Telit FE990 */ + {QMI_QUIRK_SET_DTR(0x1bc7, 0x10a0, 0)}, /* Telit FN920C04 */ + {QMI_QUIRK_SET_DTR(0x1bc7, 0x10a4, 0)}, /* Telit FN920C04 */ + {QMI_QUIRK_SET_DTR(0x1bc7, 0x10a9, 0)}, /* Telit FN920C04 */ {QMI_FIXED_INTF(0x1bc7, 0x1100, 3)}, /* Telit ME910 */ {QMI_FIXED_INTF(0x1bc7, 0x1101, 3)}, /* Telit ME910 dual modem */ {QMI_FIXED_INTF(0x1bc7, 0x1200, 5)}, /* Telit LE920 */ From c58e88d49097bd12dfcfef4f075b43f5d5830941 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Sat, 20 Apr 2024 07:01:16 +0000 Subject: [PATCH 234/606] icmp: prevent possible NULL dereferences from icmp_build_probe() First problem is a double call to __in_dev_get_rcu(), because the second one could return NULL. if (__in_dev_get_rcu(dev) && __in_dev_get_rcu(dev)->ifa_list) Second problem is a read from dev->ip6_ptr with no NULL check: if (!list_empty(&rcu_dereference(dev->ip6_ptr)->addr_list)) Use the correct RCU API to fix these. v2: add missing include Fixes: d329ea5bd884 ("icmp: add response to RFC 8335 PROBE messages") Signed-off-by: Eric Dumazet Cc: Andreas Roeseler Reviewed-by: David Ahern Signed-off-by: David S. Miller --- net/ipv4/icmp.c | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c index e63a3bf996176..437e782b9663b 100644 --- a/net/ipv4/icmp.c +++ b/net/ipv4/icmp.c @@ -92,6 +92,7 @@ #include #include #include +#include /* * Build xmit assembly blocks @@ -1032,6 +1033,8 @@ bool icmp_build_probe(struct sk_buff *skb, struct icmphdr *icmphdr) struct icmp_ext_hdr *ext_hdr, _ext_hdr; struct icmp_ext_echo_iio *iio, _iio; struct net *net = dev_net(skb->dev); + struct inet6_dev *in6_dev; + struct in_device *in_dev; struct net_device *dev; char buff[IFNAMSIZ]; u16 ident_len; @@ -1115,10 +1118,15 @@ bool icmp_build_probe(struct sk_buff *skb, struct icmphdr *icmphdr) /* Fill bits in reply message */ if (dev->flags & IFF_UP) status |= ICMP_EXT_ECHOREPLY_ACTIVE; - if (__in_dev_get_rcu(dev) && __in_dev_get_rcu(dev)->ifa_list) + + in_dev = __in_dev_get_rcu(dev); + if (in_dev && rcu_access_pointer(in_dev->ifa_list)) status |= ICMP_EXT_ECHOREPLY_IPV4; - if (!list_empty(&rcu_dereference(dev->ip6_ptr)->addr_list)) + + in6_dev = __in6_dev_get(dev); + if (in6_dev && !list_empty(&in6_dev->addr_list)) status |= ICMP_EXT_ECHOREPLY_IPV6; + dev_put(dev); icmphdr->un.echo.sequence |= htons(status); return true; From 70dcdf5f8c41ce2379d48d497db10af4a09ea075 Mon Sep 17 00:00:00 2001 From: Alex Elder Date: Sun, 21 Apr 2024 14:44:58 -0500 Subject: [PATCH 235/606] mailmap: add entries for Alex Elder Define my kernel.org address to be the canonical one, and add mailmap entries for the various addresses (including typos) that have been used over the years. Signed-off-by: Alex Elder Signed-off-by: David S. Miller --- .mailmap | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/.mailmap b/.mailmap index 8284692f96107..f932ce611898e 100644 --- a/.mailmap +++ b/.mailmap @@ -38,6 +38,16 @@ Alexei Starovoitov Alexei Starovoitov Alexei Starovoitov Alexey Makhalov +Alex Elder +Alex Elder +Alex Elder +Alex Elder +Alex Elder +Alex Elder +Alex Elder +Alex Elder +Alex Elder +Alex Elder Alex Hung Alex Shi Alex Shi From 843c3280686fc1a83d89ee1e0b5599c9f6b09d0c Mon Sep 17 00:00:00 2001 From: Claudio Imbrenda Date: Tue, 16 Apr 2024 13:42:19 +0200 Subject: [PATCH 236/606] s390/mm: Fix storage key clearing for guest huge pages The function __storage_key_init_range() expects the end address to be the first byte outside the range to be initialized. I.e. end - start should be the size of the area to be initialized. The current code works because __storage_key_init_range() will still loop over every page in the range, but it is slower than using sske_frame(). Fixes: 964c2c05c9f3 ("s390/mm: Clear huge page storage keys on enable_skey") Reviewed-by: Heiko Carstens Signed-off-by: Claudio Imbrenda Link: https://lore.kernel.org/r/20240416114220.28489-2-imbrenda@linux.ibm.com Signed-off-by: Alexander Gordeev --- arch/s390/mm/gmap.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/s390/mm/gmap.c b/arch/s390/mm/gmap.c index 094b43b121cd5..12d22a7fa32fd 100644 --- a/arch/s390/mm/gmap.c +++ b/arch/s390/mm/gmap.c @@ -2661,7 +2661,7 @@ static int __s390_enable_skey_hugetlb(pte_t *pte, unsigned long addr, return 0; start = pmd_val(*pmd) & HPAGE_MASK; - end = start + HPAGE_SIZE - 1; + end = start + HPAGE_SIZE; __storage_key_init_range(start, end); set_bit(PG_arch_1, &page->flags); cond_resched(); From 412050af2ea39407fe43324b0be4ab641530ce88 Mon Sep 17 00:00:00 2001 From: Claudio Imbrenda Date: Tue, 16 Apr 2024 13:42:20 +0200 Subject: [PATCH 237/606] s390/mm: Fix clearing storage keys for huge pages The function __storage_key_init_range() expects the end address to be the first byte outside the range to be initialized. I.e. end - start should be the size of the area to be initialized. The current code works because __storage_key_init_range() will still loop over every page in the range, but it is slower than using sske_frame(). Fixes: 3afdfca69870 ("s390/mm: Clear skeys for newly mapped huge guest pmds") Reviewed-by: Heiko Carstens Signed-off-by: Claudio Imbrenda Link: https://lore.kernel.org/r/20240416114220.28489-3-imbrenda@linux.ibm.com Signed-off-by: Alexander Gordeev --- arch/s390/mm/hugetlbpage.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/s390/mm/hugetlbpage.c b/arch/s390/mm/hugetlbpage.c index c2e8242bd15dd..dc3db86e13ffb 100644 --- a/arch/s390/mm/hugetlbpage.c +++ b/arch/s390/mm/hugetlbpage.c @@ -139,7 +139,7 @@ static void clear_huge_pte_skeys(struct mm_struct *mm, unsigned long rste) } if (!test_and_set_bit(PG_arch_1, &page->flags)) - __storage_key_init_range(paddr, paddr + size - 1); + __storage_key_init_range(paddr, paddr + size); } void __set_huge_pte_at(struct mm_struct *mm, unsigned long addr, From 4fd1edcdf13c0d234543ecf502092be65c5177db Mon Sep 17 00:00:00 2001 From: Hangbin Liu Date: Fri, 19 Apr 2024 16:02:00 +0800 Subject: [PATCH 238/606] bridge/br_netlink.c: no need to return void function br_info_notify is a void function. There is no need to return. Fixes: b6d0425b816e ("bridge: cfm: Netlink Notifications.") Signed-off-by: Hangbin Liu Acked-by: Nikolay Aleksandrov Signed-off-by: David S. Miller --- net/bridge/br_netlink.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/bridge/br_netlink.c b/net/bridge/br_netlink.c index 2cf4fc7562639..f17dbac7d8284 100644 --- a/net/bridge/br_netlink.c +++ b/net/bridge/br_netlink.c @@ -667,7 +667,7 @@ void br_ifinfo_notify(int event, const struct net_bridge *br, { u32 filter = RTEXT_FILTER_BRVLAN_COMPRESSED; - return br_info_notify(event, br, port, filter); + br_info_notify(event, br, port, filter); } /* From 11b1b8bc2b98e21ddf47e08b56c21502c685b2c3 Mon Sep 17 00:00:00 2001 From: Tianchen Ding Date: Wed, 6 Mar 2024 10:21:32 +0800 Subject: [PATCH 239/606] sched/eevdf: Always update V if se->on_rq when reweighting reweight_eevdf() needs the latest V to do accurate calculation for new ve and vd. So update V unconditionally when se is runnable. Fixes: eab03c23c2a1 ("sched/eevdf: Fix vruntime adjustment on reweight") Suggested-by: Abel Wu Signed-off-by: Tianchen Ding Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: Abel Wu Tested-by: K Prateek Nayak Tested-by: Chen Yu Link: https://lore.kernel.org/r/20240306022133.81008-2-dtcccc@linux.alibaba.com --- kernel/sched/fair.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index 03be0d1330a6b..5551ce2af73e7 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -3790,9 +3790,8 @@ static void reweight_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, if (se->on_rq) { /* commit outstanding execution time */ - if (curr) - update_curr(cfs_rq); - else + update_curr(cfs_rq); + if (!curr) __dequeue_entity(cfs_rq, se); update_load_sub(&cfs_rq->load, se->load.weight); } From afae8002b4fd3560c8f5f1567f3c3202c30a70fa Mon Sep 17 00:00:00 2001 From: Tianchen Ding Date: Wed, 6 Mar 2024 10:21:33 +0800 Subject: [PATCH 240/606] sched/eevdf: Fix miscalculation in reweight_entity() when se is not curr reweight_eevdf() only keeps V unchanged inside itself. When se != cfs_rq->curr, it would be dequeued from rb tree first. So that V is changed and the result is wrong. Pass the original V to reweight_eevdf() to fix this issue. Fixes: eab03c23c2a1 ("sched/eevdf: Fix vruntime adjustment on reweight") Signed-off-by: Tianchen Ding [peterz: flip if() condition for clarity] Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: Abel Wu Link: https://lkml.kernel.org/r/20240306022133.81008-3-dtcccc@linux.alibaba.com --- kernel/sched/fair.c | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index 5551ce2af73e7..6d266917d38d7 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -3676,11 +3676,10 @@ static inline void dequeue_load_avg(struct cfs_rq *cfs_rq, struct sched_entity *se) { } #endif -static void reweight_eevdf(struct cfs_rq *cfs_rq, struct sched_entity *se, +static void reweight_eevdf(struct sched_entity *se, u64 avruntime, unsigned long weight) { unsigned long old_weight = se->load.weight; - u64 avruntime = avg_vruntime(cfs_rq); s64 vlag, vslice; /* @@ -3787,24 +3786,26 @@ static void reweight_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, unsigned long weight) { bool curr = cfs_rq->curr == se; + u64 avruntime; if (se->on_rq) { /* commit outstanding execution time */ update_curr(cfs_rq); + avruntime = avg_vruntime(cfs_rq); if (!curr) __dequeue_entity(cfs_rq, se); update_load_sub(&cfs_rq->load, se->load.weight); } dequeue_load_avg(cfs_rq, se); - if (!se->on_rq) { + if (se->on_rq) { + reweight_eevdf(se, avruntime, weight); + } else { /* * Because we keep se->vlag = V - v_i, while: lag_i = w_i*(V - v_i), * we need to scale se->vlag when w_i changes. */ se->vlag = div_s64(se->vlag * se->load.weight, weight); - } else { - reweight_eevdf(cfs_rq, se, weight); } update_load_set(&se->load, weight); From 1560d1f6eb6b398bddd80c16676776c0325fe5fe Mon Sep 17 00:00:00 2001 From: Xuewen Yan Date: Mon, 22 Apr 2024 16:22:38 +0800 Subject: [PATCH 241/606] sched/eevdf: Prevent vlag from going out of bounds in reweight_eevdf() It was possible to have pick_eevdf() return NULL, which then causes a NULL-deref. This turned out to be due to entity_eligible() returning falsely negative because of a s64 multiplcation overflow. Specifically, reweight_eevdf() computes the vlag without considering the limit placed upon vlag as update_entity_lag() does, and then the scaling multiplication (remember that weight is 20bit fixed point) can overflow. This then leads to the new vruntime being weird which then causes the above entity_eligible() to go side-ways and claim nothing is eligible. Thus limit the range of vlag accordingly. All this was quite rare, but fatal when it does happen. Closes: https://lore.kernel.org/all/ZhuYyrh3mweP_Kd8@nz.home/ Closes: https://lore.kernel.org/all/CA+9S74ih+45M_2TPUY_mPPVDhNvyYfy1J1ftSix+KjiTVxg8nw@mail.gmail.com/ Closes: https://lore.kernel.org/lkml/202401301012.2ed95df0-oliver.sang@intel.com/ Fixes: eab03c23c2a1 ("sched/eevdf: Fix vruntime adjustment on reweight") Reported-by: Sergei Trofimovich Reported-by: Igor Raits Reported-by: Breno Leitao Reported-by: kernel test robot Reported-by: Yujie Liu Signed-off-by: Xuewen Yan Reviewed-and-tested-by: Chen Yu Signed-off-by: Peter Zijlstra (Intel) Link: https://lore.kernel.org/r/20240422082238.5784-1-xuewen.yan@unisoc.com --- kernel/sched/fair.c | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-) diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index 6d266917d38d7..c62805dbd6088 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -696,15 +696,21 @@ u64 avg_vruntime(struct cfs_rq *cfs_rq) * * XXX could add max_slice to the augmented data to track this. */ -static void update_entity_lag(struct cfs_rq *cfs_rq, struct sched_entity *se) +static s64 entity_lag(u64 avruntime, struct sched_entity *se) { - s64 lag, limit; + s64 vlag, limit; + + vlag = avruntime - se->vruntime; + limit = calc_delta_fair(max_t(u64, 2*se->slice, TICK_NSEC), se); + + return clamp(vlag, -limit, limit); +} +static void update_entity_lag(struct cfs_rq *cfs_rq, struct sched_entity *se) +{ SCHED_WARN_ON(!se->on_rq); - lag = avg_vruntime(cfs_rq) - se->vruntime; - limit = calc_delta_fair(max_t(u64, 2*se->slice, TICK_NSEC), se); - se->vlag = clamp(lag, -limit, limit); + se->vlag = entity_lag(avg_vruntime(cfs_rq), se); } /* @@ -3760,7 +3766,7 @@ static void reweight_eevdf(struct sched_entity *se, u64 avruntime, * = V - vl' */ if (avruntime != se->vruntime) { - vlag = (s64)(avruntime - se->vruntime); + vlag = entity_lag(avruntime, se); vlag = div_s64(vlag * old_weight, weight); se->vruntime = avruntime - vlag; } From 7474b1c82be3780692d537d331f9aa7fc1e5a368 Mon Sep 17 00:00:00 2001 From: Vikas Gupta Date: Fri, 19 Apr 2024 11:34:47 -0700 Subject: [PATCH 242/606] bnxt_en: refactor reset close code Introduce bnxt_fw_fatal_close() API which can be used to stop data path and disable device when firmware is in fatal state. Signed-off-by: Vikas Gupta Signed-off-by: Michael Chan Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/bnxt/bnxt.c | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c index 57e61f9631678..c852f87c842f3 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c @@ -13037,6 +13037,16 @@ static void bnxt_rx_ring_reset(struct bnxt *bp) bnxt_rtnl_unlock_sp(bp); } +static void bnxt_fw_fatal_close(struct bnxt *bp) +{ + bnxt_tx_disable(bp); + bnxt_disable_napi(bp); + bnxt_disable_int_sync(bp); + bnxt_free_irq(bp); + bnxt_clear_int_mode(bp); + pci_disable_device(bp->pdev); +} + static void bnxt_fw_reset_close(struct bnxt *bp) { bnxt_ulp_stop(bp); @@ -13050,12 +13060,7 @@ static void bnxt_fw_reset_close(struct bnxt *bp) pci_read_config_word(bp->pdev, PCI_SUBSYSTEM_ID, &val); if (val == 0xffff) bp->fw_reset_min_dsecs = 0; - bnxt_tx_disable(bp); - bnxt_disable_napi(bp); - bnxt_disable_int_sync(bp); - bnxt_free_irq(bp); - bnxt_clear_int_mode(bp); - pci_disable_device(bp->pdev); + bnxt_fw_fatal_close(bp); } __bnxt_close_nic(bp, true, false); bnxt_vf_reps_free(bp); From a1acdc226baec331512f815d6ac9dd6f8435cc7f Mon Sep 17 00:00:00 2001 From: Vikas Gupta Date: Fri, 19 Apr 2024 11:34:48 -0700 Subject: [PATCH 243/606] bnxt_en: Fix the PCI-AER routines We do not support two simultaneous recoveries so check for reset flag, BNXT_STATE_IN_FW_RESET, and do not proceed with AER further. When the pci channel state is pci_channel_io_frozen, the PCIe link can not be trusted so we disable the traffic immediately and stop BAR access by calling bnxt_fw_fatal_close(). BAR access after AER fatal error can cause an NMI. Fixes: f75d9a0aa967 ("bnxt_en: Re-write PCI BARs after PCI fatal error.") Signed-off-by: Vikas Gupta Signed-off-by: Michael Chan Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/bnxt/bnxt.c | 19 ++++++++++++++++--- 1 file changed, 16 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c index c852f87c842f3..86c1c30c70d5f 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c @@ -15378,6 +15378,7 @@ static pci_ers_result_t bnxt_io_error_detected(struct pci_dev *pdev, { struct net_device *netdev = pci_get_drvdata(pdev); struct bnxt *bp = netdev_priv(netdev); + bool abort = false; netdev_info(netdev, "PCI I/O error detected\n"); @@ -15386,16 +15387,27 @@ static pci_ers_result_t bnxt_io_error_detected(struct pci_dev *pdev, bnxt_ulp_stop(bp); - if (state == pci_channel_io_perm_failure) { + if (test_and_set_bit(BNXT_STATE_IN_FW_RESET, &bp->state)) { + netdev_err(bp->dev, "Firmware reset already in progress\n"); + abort = true; + } + + if (abort || state == pci_channel_io_perm_failure) { rtnl_unlock(); return PCI_ERS_RESULT_DISCONNECT; } - if (state == pci_channel_io_frozen) + /* Link is not reliable anymore if state is pci_channel_io_frozen + * so we disable bus master to prevent any potential bad DMAs before + * freeing kernel memory. + */ + if (state == pci_channel_io_frozen) { set_bit(BNXT_STATE_PCI_CHANNEL_IO_FROZEN, &bp->state); + bnxt_fw_fatal_close(bp); + } if (netif_running(netdev)) - bnxt_close(netdev); + __bnxt_close_nic(bp, true, true); if (pci_is_enabled(pdev)) pci_disable_device(pdev); @@ -15479,6 +15491,7 @@ static pci_ers_result_t bnxt_io_slot_reset(struct pci_dev *pdev) } reset_exit: + clear_bit(BNXT_STATE_IN_FW_RESET, &bp->state); bnxt_clear_reservations(bp, true); rtnl_unlock(); From 41e54045b741daf61e03c82d442227af3d12111f Mon Sep 17 00:00:00 2001 From: Michael Chan Date: Fri, 19 Apr 2024 11:34:49 -0700 Subject: [PATCH 244/606] bnxt_en: Fix error recovery for 5760X (P7) chips During error recovery, such as AER fatal error slot reset, we call bnxt_try_map_fw_health_reg() to try to get access to the health register to determine the firmware state. Fix bnxt_try_map_fw_health_reg() to recognize the P7 chip correctly and set up the health register. This fixes this type of AER slot reset failure: bnxt_en 0000:04:00.0: AER: PCIe Bus Error: severity=Uncorrectable (Fatal), type=Inaccessible, (Unregistered Agent ID) bnxt_en 0000:04:00.0 enp4s0f0np0: PCI I/O error detected bnxt_en 0000:04:00.0 bnxt_re0: Handle device suspend call bnxt_en 0000:04:00.1 enp4s0f1np1: PCI I/O error detected bnxt_en 0000:04:00.1 bnxt_re1: Handle device suspend call pcieport 0000:00:02.0: AER: Root Port link has been reset (0) bnxt_en 0000:04:00.0 enp4s0f0np0: PCI Slot Reset bnxt_en 0000:04:00.0: enabling device (0000 -> 0002) bnxt_en 0000:04:00.0: Firmware not ready bnxt_en 0000:04:00.1 enp4s0f1np1: PCI Slot Reset bnxt_en 0000:04:00.1: enabling device (0000 -> 0002) bnxt_en 0000:04:00.1: Firmware not ready pcieport 0000:00:02.0: AER: device recovery failed Fixes: a432a45bdba4 ("bnxt_en: Define basic P7 macros") Signed-off-by: Michael Chan Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/bnxt/bnxt.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c index 86c1c30c70d5f..ed04a90a4fdde 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c @@ -9089,7 +9089,7 @@ static void bnxt_try_map_fw_health_reg(struct bnxt *bp) BNXT_FW_HEALTH_WIN_BASE + BNXT_GRC_REG_CHIP_NUM); } - if (!BNXT_CHIP_P5(bp)) + if (!BNXT_CHIP_P5_PLUS(bp)) return; status_loc = BNXT_GRC_REG_STATUS_P5 | From 784354349d2c988590c63a5a001ca37b2a6d4da1 Mon Sep 17 00:00:00 2001 From: Nayna Jain Date: Wed, 17 Apr 2024 23:12:30 -0400 Subject: [PATCH 245/606] powerpc/pseries: make max polling consistent for longer H_CALLs Currently, plpks_confirm_object_flushed() function polls for 5msec in total instead of 5sec. Keep max polling time consistent for all the H_CALLs, which take longer than expected, to be 5sec. Also, make use of fsleep() everywhere to insert delay. Reported-by: Nageswara R Sastry Fixes: 2454a7af0f2a ("powerpc/pseries: define driver for Platform KeyStore") Signed-off-by: Nayna Jain Tested-by: Nageswara R Sastry Reviewed-by: Andrew Donnellan Signed-off-by: Michael Ellerman Link: https://msgid.link/20240418031230.170954-1-nayna@linux.ibm.com --- arch/powerpc/include/asm/plpks.h | 5 ++--- arch/powerpc/platforms/pseries/plpks.c | 10 +++++----- 2 files changed, 7 insertions(+), 8 deletions(-) diff --git a/arch/powerpc/include/asm/plpks.h b/arch/powerpc/include/asm/plpks.h index 23b77027c9163..7a84069759b03 100644 --- a/arch/powerpc/include/asm/plpks.h +++ b/arch/powerpc/include/asm/plpks.h @@ -44,9 +44,8 @@ #define PLPKS_MAX_DATA_SIZE 4000 // Timeouts for PLPKS operations -#define PLPKS_MAX_TIMEOUT 5000 // msec -#define PLPKS_FLUSH_SLEEP 10 // msec -#define PLPKS_FLUSH_SLEEP_RANGE 400 +#define PLPKS_MAX_TIMEOUT (5 * USEC_PER_SEC) +#define PLPKS_FLUSH_SLEEP 10000 // usec struct plpks_var { char *component; diff --git a/arch/powerpc/platforms/pseries/plpks.c b/arch/powerpc/platforms/pseries/plpks.c index febe18f251d0c..4a595493d28ae 100644 --- a/arch/powerpc/platforms/pseries/plpks.c +++ b/arch/powerpc/platforms/pseries/plpks.c @@ -415,8 +415,7 @@ static int plpks_confirm_object_flushed(struct label *label, break; } - usleep_range(PLPKS_FLUSH_SLEEP, - PLPKS_FLUSH_SLEEP + PLPKS_FLUSH_SLEEP_RANGE); + fsleep(PLPKS_FLUSH_SLEEP); timeout = timeout + PLPKS_FLUSH_SLEEP; } while (timeout < PLPKS_MAX_TIMEOUT); @@ -464,9 +463,10 @@ int plpks_signed_update_var(struct plpks_var *var, u64 flags) continuetoken = retbuf[0]; if (pseries_status_to_err(rc) == -EBUSY) { - int delay_ms = get_longbusy_msecs(rc); - mdelay(delay_ms); - timeout += delay_ms; + int delay_us = get_longbusy_msecs(rc) * 1000; + + fsleep(delay_us); + timeout += delay_us; } rc = pseries_status_to_err(rc); } while (rc == -EBUSY && timeout < PLPKS_MAX_TIMEOUT); From c6f48506ba30c722dd9d89aa6a40eb1926277dff Mon Sep 17 00:00:00 2001 From: Puranjay Mohan Date: Fri, 19 Apr 2024 18:28:32 +0000 Subject: [PATCH 246/606] arm32, bpf: Reimplement sign-extension mov instruction The current implementation of the mov instruction with sign extension has the following problems: 1. It clobbers the source register if it is not stacked because it sign extends the source and then moves it to the destination. 2. If the dst_reg is stacked, the current code doesn't write the value back in case of 64-bit mov. 3. There is room for improvement by emitting fewer instructions. The steps for fixing this and the instructions emitted by the JIT are explained below with examples in all combinations: Case A: offset == 32: ===================== Case A.1: src and dst are stacked registers: -------------------------------------------- 1. Load src_lo into tmp_lo 2. Store tmp_lo into dst_lo 3. Sign extend tmp_lo into tmp_hi 4. Store tmp_hi to dst_hi Example: r3 = (s32)r3 r3 is a stacked register ldr r6, [r11, #-16] // Load r3_lo into tmp_lo // str to dst_lo is not emitted because src_lo == dst_lo asr r7, r6, #31 // Sign extend tmp_lo into tmp_hi str r7, [r11, #-12] // Store tmp_hi into r3_hi Case A.2: src is stacked but dst is not: ---------------------------------------- 1. Load src_lo into dst_lo 2. Sign extend dst_lo into dst_hi Example: r6 = (s32)r3 r6 maps to {ARM_R5, ARM_R4} and r3 is stacked ldr r4, [r11, #-16] // Load r3_lo into r6_lo asr r5, r4, #31 // Sign extend r6_lo into r6_hi Case A.3: src is not stacked but dst is stacked: ------------------------------------------------ 1. Store src_lo into dst_lo 2. Sign extend src_lo into tmp_hi 3. Store tmp_hi to dst_hi Example: r3 = (s32)r6 r3 is stacked and r6 maps to {ARM_R5, ARM_R4} str r4, [r11, #-16] // Store r6_lo to r3_lo asr r7, r4, #31 // Sign extend r6_lo into tmp_hi str r7, [r11, #-12] // Store tmp_hi to dest_hi Case A.4: Both src and dst are not stacked: ------------------------------------------- 1. Mov src_lo into dst_lo 2. Sign extend src_lo into dst_hi Example: (bf) r6 = (s32)r6 r6 maps to {ARM_R5, ARM_R4} // Mov not emitted because dst == src asr r5, r4, #31 // Sign extend r6_lo into r6_hi Case B: offset != 32: ===================== Case B.1: src and dst are stacked registers: -------------------------------------------- 1. Load src_lo into tmp_lo 2. Sign extend tmp_lo according to offset. 3. Store tmp_lo into dst_lo 4. Sign extend tmp_lo into tmp_hi 5. Store tmp_hi to dst_hi Example: r9 = (s8)r3 r9 and r3 are both stacked registers ldr r6, [r11, #-16] // Load r3_lo into tmp_lo lsl r6, r6, #24 // Sign extend tmp_lo asr r6, r6, #24 // .. str r6, [r11, #-56] // Store tmp_lo to r9_lo asr r7, r6, #31 // Sign extend tmp_lo to tmp_hi str r7, [r11, #-52] // Store tmp_hi to r9_hi Case B.2: src is stacked but dst is not: ---------------------------------------- 1. Load src_lo into dst_lo 2. Sign extend dst_lo according to offset. 3. Sign extend tmp_lo into dst_hi Example: r6 = (s8)r3 r6 maps to {ARM_R5, ARM_R4} and r3 is stacked ldr r4, [r11, #-16] // Load r3_lo to r6_lo lsl r4, r4, #24 // Sign extend r6_lo asr r4, r4, #24 // .. asr r5, r4, #31 // Sign extend r6_lo into r6_hi Case B.3: src is not stacked but dst is stacked: ------------------------------------------------ 1. Sign extend src_lo into tmp_lo according to offset. 2. Store tmp_lo into dst_lo. 3. Sign extend src_lo into tmp_hi. 4. Store tmp_hi to dst_hi. Example: r3 = (s8)r1 r3 is stacked and r1 maps to {ARM_R3, ARM_R2} lsl r6, r2, #24 // Sign extend r1_lo to tmp_lo asr r6, r6, #24 // .. str r6, [r11, #-16] // Store tmp_lo to r3_lo asr r7, r6, #31 // Sign extend tmp_lo to tmp_hi str r7, [r11, #-12] // Store tmp_hi to r3_hi Case B.4: Both src and dst are not stacked: ------------------------------------------- 1. Sign extend src_lo into dst_lo according to offset. 2. Sign extend dst_lo into dst_hi. Example: r6 = (s8)r1 r6 maps to {ARM_R5, ARM_R4} and r1 maps to {ARM_R3, ARM_R2} lsl r4, r2, #24 // Sign extend r1_lo to r6_lo asr r4, r4, #24 // .. asr r5, r4, #31 // Sign extend r6_lo to r6_hi Fixes: fc832653fa0d ("arm32, bpf: add support for sign-extension mov instruction") Reported-by: syzbot+186522670e6722692d86@syzkaller.appspotmail.com Signed-off-by: Puranjay Mohan Signed-off-by: Daniel Borkmann Reviewed-by: Russell King (Oracle) Closes: https://lore.kernel.org/all/000000000000e9a8d80615163f2a@google.com Link: https://lore.kernel.org/bpf/20240419182832.27707-1-puranjay@kernel.org --- arch/arm/net/bpf_jit_32.c | 56 ++++++++++++++++++++++++++++++--------- 1 file changed, 43 insertions(+), 13 deletions(-) diff --git a/arch/arm/net/bpf_jit_32.c b/arch/arm/net/bpf_jit_32.c index 1d672457d02ff..72b5cd697f5d9 100644 --- a/arch/arm/net/bpf_jit_32.c +++ b/arch/arm/net/bpf_jit_32.c @@ -871,16 +871,11 @@ static inline void emit_a32_alu_r64(const bool is64, const s8 dst[], } /* dst = src (4 bytes)*/ -static inline void emit_a32_mov_r(const s8 dst, const s8 src, const u8 off, - struct jit_ctx *ctx) { +static inline void emit_a32_mov_r(const s8 dst, const s8 src, struct jit_ctx *ctx) { const s8 *tmp = bpf2a32[TMP_REG_1]; s8 rt; rt = arm_bpf_get_reg32(src, tmp[0], ctx); - if (off && off != 32) { - emit(ARM_LSL_I(rt, rt, 32 - off), ctx); - emit(ARM_ASR_I(rt, rt, 32 - off), ctx); - } arm_bpf_put_reg32(dst, rt, ctx); } @@ -889,15 +884,15 @@ static inline void emit_a32_mov_r64(const bool is64, const s8 dst[], const s8 src[], struct jit_ctx *ctx) { if (!is64) { - emit_a32_mov_r(dst_lo, src_lo, 0, ctx); + emit_a32_mov_r(dst_lo, src_lo, ctx); if (!ctx->prog->aux->verifier_zext) /* Zero out high 4 bytes */ emit_a32_mov_i(dst_hi, 0, ctx); } else if (__LINUX_ARM_ARCH__ < 6 && ctx->cpu_architecture < CPU_ARCH_ARMv5TE) { /* complete 8 byte move */ - emit_a32_mov_r(dst_lo, src_lo, 0, ctx); - emit_a32_mov_r(dst_hi, src_hi, 0, ctx); + emit_a32_mov_r(dst_lo, src_lo, ctx); + emit_a32_mov_r(dst_hi, src_hi, ctx); } else if (is_stacked(src_lo) && is_stacked(dst_lo)) { const u8 *tmp = bpf2a32[TMP_REG_1]; @@ -917,17 +912,52 @@ static inline void emit_a32_mov_r64(const bool is64, const s8 dst[], static inline void emit_a32_movsx_r64(const bool is64, const u8 off, const s8 dst[], const s8 src[], struct jit_ctx *ctx) { const s8 *tmp = bpf2a32[TMP_REG_1]; - const s8 *rt; + s8 rs; + s8 rd; - rt = arm_bpf_get_reg64(dst, tmp, ctx); + if (is_stacked(dst_lo)) + rd = tmp[1]; + else + rd = dst_lo; + rs = arm_bpf_get_reg32(src_lo, rd, ctx); + /* rs may be one of src[1], dst[1], or tmp[1] */ + + /* Sign extend rs if needed. If off == 32, lower 32-bits of src are moved to dst and sign + * extension only happens in the upper 64 bits. + */ + if (off != 32) { + /* Sign extend rs into rd */ + emit(ARM_LSL_I(rd, rs, 32 - off), ctx); + emit(ARM_ASR_I(rd, rd, 32 - off), ctx); + } else { + rd = rs; + } + + /* Write rd to dst_lo + * + * Optimization: + * Assume: + * 1. dst == src and stacked. + * 2. off == 32 + * + * In this case src_lo was loaded into rd(tmp[1]) but rd was not sign extended as off==32. + * So, we don't need to write rd back to dst_lo as they have the same value. + * This saves us one str instruction. + */ + if (dst_lo != src_lo || off != 32) + arm_bpf_put_reg32(dst_lo, rd, ctx); - emit_a32_mov_r(dst_lo, src_lo, off, ctx); if (!is64) { if (!ctx->prog->aux->verifier_zext) /* Zero out high 4 bytes */ emit_a32_mov_i(dst_hi, 0, ctx); } else { - emit(ARM_ASR_I(rt[0], rt[1], 31), ctx); + if (is_stacked(dst_hi)) { + emit(ARM_ASR_I(tmp[0], rd, 31), ctx); + arm_bpf_put_reg32(dst_hi, tmp[0], ctx); + } else { + emit(ARM_ASR_I(dst_hi, rd, 31), ctx); + } } } From d05dcfdf5e1659b2949d13060284eff3888b644e Mon Sep 17 00:00:00 2001 From: Eric Van Hensbergen Date: Mon, 15 Apr 2024 20:24:37 +0000 Subject: [PATCH 247/606] fs/9p: mitigate inode collisions Detect and mitigate inode collsions that now occur since we fixed 9p generating duplicate inode structures. Underlying cause of these appears to be a race condition between reuse of inode numbers in underlying file system and cleanup of inode numbers in the client. Enabling caching makes this much more likely to happen as it increases cleanup latency due to writebacks. Reported-by: Kent Overstreet Signed-off-by: Eric Van Hensbergen --- fs/9p/v9fs.h | 11 ++++++----- fs/9p/vfs_inode.c | 37 +++++++++++++++++++++++++++++-------- fs/9p/vfs_inode_dotl.c | 28 ++++++++++++++++++++-------- fs/9p/vfs_super.c | 2 +- 4 files changed, 56 insertions(+), 22 deletions(-) diff --git a/fs/9p/v9fs.h b/fs/9p/v9fs.h index 9defa12208f98..1775fcc7f0e8e 100644 --- a/fs/9p/v9fs.h +++ b/fs/9p/v9fs.h @@ -179,13 +179,14 @@ extern int v9fs_vfs_rename(struct mnt_idmap *idmap, struct inode *old_dir, struct dentry *old_dentry, struct inode *new_dir, struct dentry *new_dentry, unsigned int flags); -extern struct inode *v9fs_fid_iget(struct super_block *sb, struct p9_fid *fid); +extern struct inode *v9fs_fid_iget(struct super_block *sb, struct p9_fid *fid, + bool new); extern const struct inode_operations v9fs_dir_inode_operations_dotl; extern const struct inode_operations v9fs_file_inode_operations_dotl; extern const struct inode_operations v9fs_symlink_inode_operations_dotl; extern const struct netfs_request_ops v9fs_req_ops; extern struct inode *v9fs_fid_iget_dotl(struct super_block *sb, - struct p9_fid *fid); + struct p9_fid *fid, bool new); /* other default globals */ #define V9FS_PORT 564 @@ -224,12 +225,12 @@ static inline int v9fs_proto_dotl(struct v9fs_session_info *v9ses) */ static inline struct inode * v9fs_get_inode_from_fid(struct v9fs_session_info *v9ses, struct p9_fid *fid, - struct super_block *sb) + struct super_block *sb, bool new) { if (v9fs_proto_dotl(v9ses)) - return v9fs_fid_iget_dotl(sb, fid); + return v9fs_fid_iget_dotl(sb, fid, new); else - return v9fs_fid_iget(sb, fid); + return v9fs_fid_iget(sb, fid, new); } #endif diff --git a/fs/9p/vfs_inode.c b/fs/9p/vfs_inode.c index 47bd77199e20c..7a3308d776060 100644 --- a/fs/9p/vfs_inode.c +++ b/fs/9p/vfs_inode.c @@ -364,7 +364,8 @@ void v9fs_evict_inode(struct inode *inode) clear_inode(inode); } -struct inode *v9fs_fid_iget(struct super_block *sb, struct p9_fid *fid) +struct inode * +v9fs_fid_iget(struct super_block *sb, struct p9_fid *fid, bool new) { dev_t rdev; int retval; @@ -376,8 +377,18 @@ struct inode *v9fs_fid_iget(struct super_block *sb, struct p9_fid *fid) inode = iget_locked(sb, QID2INO(&fid->qid)); if (unlikely(!inode)) return ERR_PTR(-ENOMEM); - if (!(inode->i_state & I_NEW)) - return inode; + if (!(inode->i_state & I_NEW)) { + if (!new) { + goto done; + } else { + p9_debug(P9_DEBUG_VFS, "WARNING: Inode collision %ld\n", + inode->i_ino); + iput(inode); + remove_inode_hash(inode); + inode = iget_locked(sb, QID2INO(&fid->qid)); + WARN_ON(!(inode->i_state & I_NEW)); + } + } /* * initialize the inode with the stat info @@ -401,11 +412,11 @@ struct inode *v9fs_fid_iget(struct super_block *sb, struct p9_fid *fid) v9fs_set_netfs_context(inode); v9fs_cache_inode_get_cookie(inode); unlock_new_inode(inode); +done: return inode; error: iget_failed(inode); return ERR_PTR(retval); - } /** @@ -437,8 +448,15 @@ static int v9fs_at_to_dotl_flags(int flags) */ static void v9fs_dec_count(struct inode *inode) { - if (!S_ISDIR(inode->i_mode) || inode->i_nlink > 2) - drop_nlink(inode); + if (!S_ISDIR(inode->i_mode) || inode->i_nlink > 2) { + if (inode->i_nlink) { + drop_nlink(inode); + } else { + p9_debug(P9_DEBUG_VFS, + "WARNING: unexpected i_nlink zero %d inode %ld\n", + inode->i_nlink, inode->i_ino); + } + } } /** @@ -489,6 +507,9 @@ static int v9fs_remove(struct inode *dir, struct dentry *dentry, int flags) } else v9fs_dec_count(inode); + if (inode->i_nlink <= 0) /* no more refs unhash it */ + remove_inode_hash(inode); + v9fs_invalidate_inode_attr(inode); v9fs_invalidate_inode_attr(dir); @@ -554,7 +575,7 @@ v9fs_create(struct v9fs_session_info *v9ses, struct inode *dir, /* * instantiate inode and assign the unopened fid to the dentry */ - inode = v9fs_get_inode_from_fid(v9ses, fid, dir->i_sb); + inode = v9fs_get_inode_from_fid(v9ses, fid, dir->i_sb, true); if (IS_ERR(inode)) { err = PTR_ERR(inode); p9_debug(P9_DEBUG_VFS, @@ -683,7 +704,7 @@ struct dentry *v9fs_vfs_lookup(struct inode *dir, struct dentry *dentry, else if (IS_ERR(fid)) inode = ERR_CAST(fid); else - inode = v9fs_get_inode_from_fid(v9ses, fid, dir->i_sb); + inode = v9fs_get_inode_from_fid(v9ses, fid, dir->i_sb, false); /* * If we had a rename on the server and a parallel lookup * for the new name, then make sure we instantiate with diff --git a/fs/9p/vfs_inode_dotl.c b/fs/9p/vfs_inode_dotl.c index 55dde186041a3..c61b97bd13b9a 100644 --- a/fs/9p/vfs_inode_dotl.c +++ b/fs/9p/vfs_inode_dotl.c @@ -52,7 +52,10 @@ static kgid_t v9fs_get_fsgid_for_create(struct inode *dir_inode) return current_fsgid(); } -struct inode *v9fs_fid_iget_dotl(struct super_block *sb, struct p9_fid *fid) + + +struct inode * +v9fs_fid_iget_dotl(struct super_block *sb, struct p9_fid *fid, bool new) { int retval; struct inode *inode; @@ -62,8 +65,18 @@ struct inode *v9fs_fid_iget_dotl(struct super_block *sb, struct p9_fid *fid) inode = iget_locked(sb, QID2INO(&fid->qid)); if (unlikely(!inode)) return ERR_PTR(-ENOMEM); - if (!(inode->i_state & I_NEW)) - return inode; + if (!(inode->i_state & I_NEW)) { + if (!new) { + goto done; + } else { /* deal with race condition in inode number reuse */ + p9_debug(P9_DEBUG_ERROR, "WARNING: Inode collision %lx\n", + inode->i_ino); + iput(inode); + remove_inode_hash(inode); + inode = iget_locked(sb, QID2INO(&fid->qid)); + WARN_ON(!(inode->i_state & I_NEW)); + } + } /* * initialize the inode with the stat info @@ -90,12 +103,11 @@ struct inode *v9fs_fid_iget_dotl(struct super_block *sb, struct p9_fid *fid) goto error; unlock_new_inode(inode); - +done: return inode; error: iget_failed(inode); return ERR_PTR(retval); - } struct dotl_openflag_map { @@ -247,7 +259,7 @@ v9fs_vfs_atomic_open_dotl(struct inode *dir, struct dentry *dentry, p9_debug(P9_DEBUG_VFS, "p9_client_walk failed %d\n", err); goto out; } - inode = v9fs_fid_iget_dotl(dir->i_sb, fid); + inode = v9fs_fid_iget_dotl(dir->i_sb, fid, true); if (IS_ERR(inode)) { err = PTR_ERR(inode); p9_debug(P9_DEBUG_VFS, "inode creation failed %d\n", err); @@ -340,7 +352,7 @@ static int v9fs_vfs_mkdir_dotl(struct mnt_idmap *idmap, } /* instantiate inode and assign the unopened fid to the dentry */ - inode = v9fs_fid_iget_dotl(dir->i_sb, fid); + inode = v9fs_fid_iget_dotl(dir->i_sb, fid, true); if (IS_ERR(inode)) { err = PTR_ERR(inode); p9_debug(P9_DEBUG_VFS, "inode creation failed %d\n", @@ -776,7 +788,7 @@ v9fs_vfs_mknod_dotl(struct mnt_idmap *idmap, struct inode *dir, err); goto error; } - inode = v9fs_fid_iget_dotl(dir->i_sb, fid); + inode = v9fs_fid_iget_dotl(dir->i_sb, fid, true); if (IS_ERR(inode)) { err = PTR_ERR(inode); p9_debug(P9_DEBUG_VFS, "inode creation failed %d\n", diff --git a/fs/9p/vfs_super.c b/fs/9p/vfs_super.c index 55e67e36ae682..f52fdf42945cf 100644 --- a/fs/9p/vfs_super.c +++ b/fs/9p/vfs_super.c @@ -139,7 +139,7 @@ static struct dentry *v9fs_mount(struct file_system_type *fs_type, int flags, else sb->s_d_op = &v9fs_dentry_operations; - inode = v9fs_get_inode_from_fid(v9ses, fid, sb); + inode = v9fs_get_inode_from_fid(v9ses, fid, sb, true); if (IS_ERR(inode)) { retval = PTR_ERR(inode); goto release_sb; From 77d8aa79ecfb209308e0644c02f655122b31def7 Mon Sep 17 00:00:00 2001 From: Takayuki Nagata Date: Mon, 15 Apr 2024 16:47:49 +0900 Subject: [PATCH 248/606] cifs: reinstate original behavior again for forceuid/forcegid forceuid/forcegid should be enabled by default when uid=/gid= options are specified, but commit 24e0a1eff9e2 ("cifs: switch to new mount api") changed the behavior. Due to the change, a mounted share does not show intentional uid/gid for files and directories even though uid=/gid= options are specified since forceuid/forcegid are not enabled. This patch reinstates original behavior that overrides uid/gid with specified uid/gid by the options. Fixes: 24e0a1eff9e2 ("cifs: switch to new mount api") Signed-off-by: Takayuki Nagata Acked-by: Paulo Alcantara (Red Hat) Acked-by: Ronnie Sahlberg Acked-by: Tom Talpey Signed-off-by: Steve French --- fs/smb/client/fs_context.c | 12 ++++++++++++ fs/smb/client/fs_context.h | 2 ++ 2 files changed, 14 insertions(+) diff --git a/fs/smb/client/fs_context.c b/fs/smb/client/fs_context.c index 6c727d8c31e87..3bbac925d0766 100644 --- a/fs/smb/client/fs_context.c +++ b/fs/smb/client/fs_context.c @@ -748,6 +748,16 @@ static int smb3_fs_context_validate(struct fs_context *fc) /* set the port that we got earlier */ cifs_set_port((struct sockaddr *)&ctx->dstaddr, ctx->port); + if (ctx->uid_specified && !ctx->forceuid_specified) { + ctx->override_uid = 1; + pr_notice("enabling forceuid mount option implicitly because uid= option is specified\n"); + } + + if (ctx->gid_specified && !ctx->forcegid_specified) { + ctx->override_gid = 1; + pr_notice("enabling forcegid mount option implicitly because gid= option is specified\n"); + } + if (ctx->override_uid && !ctx->uid_specified) { ctx->override_uid = 0; pr_notice("ignoring forceuid mount option specified with no uid= option\n"); @@ -1019,12 +1029,14 @@ static int smb3_fs_context_parse_param(struct fs_context *fc, ctx->override_uid = 0; else ctx->override_uid = 1; + ctx->forceuid_specified = true; break; case Opt_forcegid: if (result.negated) ctx->override_gid = 0; else ctx->override_gid = 1; + ctx->forcegid_specified = true; break; case Opt_perm: if (result.negated) diff --git a/fs/smb/client/fs_context.h b/fs/smb/client/fs_context.h index a947bddeba273..cf577ec0dd0ac 100644 --- a/fs/smb/client/fs_context.h +++ b/fs/smb/client/fs_context.h @@ -165,6 +165,8 @@ enum cifs_param { }; struct smb3_fs_context { + bool forceuid_specified; + bool forcegid_specified; bool uid_specified; bool cruid_specified; bool gid_specified; From 4b759dd5765503bd466defac7d93aca14c23a15d Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Fri, 5 Apr 2024 15:00:16 -0700 Subject: [PATCH 249/606] cxl/core: Fix potential payload size confusion in cxl_mem_get_poison() A recent change to cxl_mem_get_records_log() [1] highlighted a subtle nuance of looping calls to cxl_internal_send_cmd(), i.e. that cxl_internal_send_cmd() modifies the 'size_out' member of the @mbox_cmd argument. That mechanism is useful for communicating underflow, but it is unwanted when reusing @mbox_cmd for a subsequent submission. It turns out that cxl_xfer_log() avoids this scenario by always redefining @mbox_cmd each iteration. Update cxl_mem_get_records_log() and cxl_mem_get_poison() to follow the same style as cxl_xfer_log(), i.e. re-define @mbox_cmd each iteration. The cxl_mem_get_records_log() change is just a style fixup, but the cxl_mem_get_poison() change is a potential fix, per Alison [2]: Poison list retrieval can hit this case if the MORE flag is set and a follow on read of the list delivers more records than the previous read. ie. device gives one record, sets the _MORE flag, then gives 5. Not an urgent fix since this behavior has not been seen in the wild, but worth tracking as a fix. Cc: Kwangjin Ko Cc: Alison Schofield Fixes: ed83f7ca398b ("cxl/mbox: Add GET_POISON_LIST mailbox command") Link: http://lore.kernel.org/r/20240402081404.1106-2-kwangjin.ko@sk.com [1] Link: http://lore.kernel.org/r/ZhAhAL/GOaWFrauw@aschofie-mobl2 [2] Signed-off-by: Dan Williams Reviewed-by: Ira Weiny Reviewed-by: Alison Schofield Link: https://lore.kernel.org/r/171235441633.2716581.12330082428680958635.stgit@dwillia2-xfh.jf.intel.com Signed-off-by: Dave Jiang --- drivers/cxl/core/mbox.c | 38 +++++++++++++++++--------------------- 1 file changed, 17 insertions(+), 21 deletions(-) diff --git a/drivers/cxl/core/mbox.c b/drivers/cxl/core/mbox.c index f0f54aeccc872..65185c9fa0013 100644 --- a/drivers/cxl/core/mbox.c +++ b/drivers/cxl/core/mbox.c @@ -946,25 +946,22 @@ static void cxl_mem_get_records_log(struct cxl_memdev_state *mds, struct cxl_memdev *cxlmd = mds->cxlds.cxlmd; struct device *dev = mds->cxlds.dev; struct cxl_get_event_payload *payload; - struct cxl_mbox_cmd mbox_cmd; u8 log_type = type; u16 nr_rec; mutex_lock(&mds->event.log_lock); payload = mds->event.buf; - mbox_cmd = (struct cxl_mbox_cmd) { - .opcode = CXL_MBOX_OP_GET_EVENT_RECORD, - .payload_in = &log_type, - .size_in = sizeof(log_type), - .payload_out = payload, - .min_out = struct_size(payload, records, 0), - }; - do { int rc, i; - - mbox_cmd.size_out = mds->payload_size; + struct cxl_mbox_cmd mbox_cmd = (struct cxl_mbox_cmd) { + .opcode = CXL_MBOX_OP_GET_EVENT_RECORD, + .payload_in = &log_type, + .size_in = sizeof(log_type), + .payload_out = payload, + .size_out = mds->payload_size, + .min_out = struct_size(payload, records, 0), + }; rc = cxl_internal_send_cmd(mds, &mbox_cmd); if (rc) { @@ -1297,7 +1294,6 @@ int cxl_mem_get_poison(struct cxl_memdev *cxlmd, u64 offset, u64 len, struct cxl_memdev_state *mds = to_cxl_memdev_state(cxlmd->cxlds); struct cxl_mbox_poison_out *po; struct cxl_mbox_poison_in pi; - struct cxl_mbox_cmd mbox_cmd; int nr_records = 0; int rc; @@ -1309,16 +1305,16 @@ int cxl_mem_get_poison(struct cxl_memdev *cxlmd, u64 offset, u64 len, pi.offset = cpu_to_le64(offset); pi.length = cpu_to_le64(len / CXL_POISON_LEN_MULT); - mbox_cmd = (struct cxl_mbox_cmd) { - .opcode = CXL_MBOX_OP_GET_POISON, - .size_in = sizeof(pi), - .payload_in = &pi, - .size_out = mds->payload_size, - .payload_out = po, - .min_out = struct_size(po, record, 0), - }; - do { + struct cxl_mbox_cmd mbox_cmd = (struct cxl_mbox_cmd){ + .opcode = CXL_MBOX_OP_GET_POISON, + .size_in = sizeof(pi), + .payload_in = &pi, + .size_out = mds->payload_size, + .payload_out = po, + .min_out = struct_size(po, record, 0), + }; + rc = cxl_internal_send_cmd(mds, &mbox_cmd); if (rc) break; From e70316d17f6ab49a6038ffd115397fd68f8c7be8 Mon Sep 17 00:00:00 2001 From: Tom Lendacky Date: Mon, 22 Apr 2024 08:39:21 -0500 Subject: [PATCH 250/606] x86/sev: Check for MWAITX and MONITORX opcodes in the #VC handler The MWAITX and MONITORX instructions generate the same #VC error code as the MWAIT and MONITOR instructions, respectively. Update the #VC handler opcode checking to also support the MWAITX and MONITORX opcodes. Fixes: e3ef461af35a ("x86/sev: Harden #VC instruction emulation somewhat") Signed-off-by: Tom Lendacky Signed-off-by: Borislav Petkov (AMD) Link: https://lore.kernel.org/r/453d5a7cfb4b9fe818b6fb67f93ae25468bc9e23.1713793161.git.thomas.lendacky@amd.com --- arch/x86/kernel/sev-shared.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/arch/x86/kernel/sev-shared.c b/arch/x86/kernel/sev-shared.c index 8b04958da5e7d..b4f8fa0f722cd 100644 --- a/arch/x86/kernel/sev-shared.c +++ b/arch/x86/kernel/sev-shared.c @@ -1203,12 +1203,14 @@ static enum es_result vc_check_opcode_bytes(struct es_em_ctxt *ctxt, break; case SVM_EXIT_MONITOR: - if (opcode == 0x010f && modrm == 0xc8) + /* MONITOR and MONITORX instructions generate the same error code */ + if (opcode == 0x010f && (modrm == 0xc8 || modrm == 0xfa)) return ES_OK; break; case SVM_EXIT_MWAIT: - if (opcode == 0x010f && modrm == 0xc9) + /* MWAIT and MWAITX instructions generate the same error code */ + if (opcode == 0x010f && (modrm == 0xc9 || modrm == 0xfb)) return ES_OK; break; From 05d92ee782eeb7b939bdd0189e6efcab9195bf95 Mon Sep 17 00:00:00 2001 From: Jarred White Date: Mon, 8 Apr 2024 22:23:09 -0700 Subject: [PATCH 251/606] ACPI: CPPC: Fix bit_offset shift in MASK_VAL() macro Commit 2f4a4d63a193 ("ACPI: CPPC: Use access_width over bit_width for system memory accesses") neglected to properly wrap the bit_offset shift when it comes to applying the mask. This may cause incorrect values to be read and may cause the cpufreq module not be loaded. [ 11.059751] cpu_capacity: CPU0 missing/invalid highest performance. [ 11.066005] cpu_capacity: partial information: fallback to 1024 for all CPUs Also, corrected the bitmask generation in GENMASK (extra bit being added). Fixes: 2f4a4d63a193 ("ACPI: CPPC: Use access_width over bit_width for system memory accesses") Signed-off-by: Jarred White Cc: 5.15+ # 5.15+ Reviewed-by: Vanshidhar Konda Signed-off-by: Rafael J. Wysocki --- drivers/acpi/cppc_acpi.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/acpi/cppc_acpi.c b/drivers/acpi/cppc_acpi.c index 4bfbe55553f41..00a30ca35e78d 100644 --- a/drivers/acpi/cppc_acpi.c +++ b/drivers/acpi/cppc_acpi.c @@ -170,8 +170,8 @@ show_cppc_data(cppc_get_perf_ctrs, cppc_perf_fb_ctrs, wraparound_time); #define GET_BIT_WIDTH(reg) ((reg)->access_width ? (8 << ((reg)->access_width - 1)) : (reg)->bit_width) /* Shift and apply the mask for CPC reads/writes */ -#define MASK_VAL(reg, val) ((val) >> ((reg)->bit_offset & \ - GENMASK(((reg)->bit_width), 0))) +#define MASK_VAL(reg, val) (((val) >> (reg)->bit_offset) & \ + GENMASK(((reg)->bit_width) - 1, 0)) static ssize_t show_feedback_ctrs(struct kobject *kobj, struct kobj_attribute *attr, char *buf) From f489c948028b69cea235d9c0de1cc10eeb26a172 Mon Sep 17 00:00:00 2001 From: Vanshidhar Konda Date: Thu, 11 Apr 2024 16:18:44 -0700 Subject: [PATCH 252/606] ACPI: CPPC: Fix access width used for PCC registers commit 2f4a4d63a193 ("ACPI: CPPC: Use access_width over bit_width for system memory accesses") modified cpc_read()/cpc_write() to use access_width to read CPC registers. However, for PCC registers the access width field in the ACPI register macro specifies the PCC subspace ID. For non-zero PCC subspace ID it is incorrectly treated as access width. This causes errors when reading from PCC registers in the CPPC driver. For PCC registers, base the size of read/write on the bit width field. The debug message in cpc_read()/cpc_write() is updated to print relevant information for the address space type used to read the register. Fixes: 2f4a4d63a193 ("ACPI: CPPC: Use access_width over bit_width for system memory accesses") Signed-off-by: Vanshidhar Konda Tested-by: Jarred White Reviewed-by: Jarred White Reviewed-by: Easwar Hariharan Cc: 5.15+ # 5.15+ Signed-off-by: Rafael J. Wysocki --- drivers/acpi/cppc_acpi.c | 53 ++++++++++++++++++++++++++++------------ 1 file changed, 37 insertions(+), 16 deletions(-) diff --git a/drivers/acpi/cppc_acpi.c b/drivers/acpi/cppc_acpi.c index 00a30ca35e78d..a40b6f3946efe 100644 --- a/drivers/acpi/cppc_acpi.c +++ b/drivers/acpi/cppc_acpi.c @@ -1002,14 +1002,14 @@ static int cpc_read(int cpu, struct cpc_register_resource *reg_res, u64 *val) } *val = 0; + size = GET_BIT_WIDTH(reg); if (reg->space_id == ACPI_ADR_SPACE_SYSTEM_IO) { - u32 width = GET_BIT_WIDTH(reg); u32 val_u32; acpi_status status; status = acpi_os_read_port((acpi_io_address)reg->address, - &val_u32, width); + &val_u32, size); if (ACPI_FAILURE(status)) { pr_debug("Error: Failed to read SystemIO port %llx\n", reg->address); @@ -1018,17 +1018,22 @@ static int cpc_read(int cpu, struct cpc_register_resource *reg_res, u64 *val) *val = val_u32; return 0; - } else if (reg->space_id == ACPI_ADR_SPACE_PLATFORM_COMM && pcc_ss_id >= 0) + } else if (reg->space_id == ACPI_ADR_SPACE_PLATFORM_COMM && pcc_ss_id >= 0) { + /* + * For registers in PCC space, the register size is determined + * by the bit width field; the access size is used to indicate + * the PCC subspace id. + */ + size = reg->bit_width; vaddr = GET_PCC_VADDR(reg->address, pcc_ss_id); + } else if (reg->space_id == ACPI_ADR_SPACE_SYSTEM_MEMORY) vaddr = reg_res->sys_mem_vaddr; else if (reg->space_id == ACPI_ADR_SPACE_FIXED_HARDWARE) return cpc_read_ffh(cpu, reg, val); else return acpi_os_read_memory((acpi_physical_address)reg->address, - val, reg->bit_width); - - size = GET_BIT_WIDTH(reg); + val, size); switch (size) { case 8: @@ -1044,8 +1049,13 @@ static int cpc_read(int cpu, struct cpc_register_resource *reg_res, u64 *val) *val = readq_relaxed(vaddr); break; default: - pr_debug("Error: Cannot read %u bit width from PCC for ss: %d\n", - reg->bit_width, pcc_ss_id); + if (reg->space_id == ACPI_ADR_SPACE_SYSTEM_MEMORY) { + pr_debug("Error: Cannot read %u bit width from system memory: 0x%llx\n", + size, reg->address); + } else if (reg->space_id == ACPI_ADR_SPACE_PLATFORM_COMM) { + pr_debug("Error: Cannot read %u bit width from PCC for ss: %d\n", + size, pcc_ss_id); + } return -EFAULT; } @@ -1063,12 +1073,13 @@ static int cpc_write(int cpu, struct cpc_register_resource *reg_res, u64 val) int pcc_ss_id = per_cpu(cpu_pcc_subspace_idx, cpu); struct cpc_reg *reg = ®_res->cpc_entry.reg; + size = GET_BIT_WIDTH(reg); + if (reg->space_id == ACPI_ADR_SPACE_SYSTEM_IO) { - u32 width = GET_BIT_WIDTH(reg); acpi_status status; status = acpi_os_write_port((acpi_io_address)reg->address, - (u32)val, width); + (u32)val, size); if (ACPI_FAILURE(status)) { pr_debug("Error: Failed to write SystemIO port %llx\n", reg->address); @@ -1076,17 +1087,22 @@ static int cpc_write(int cpu, struct cpc_register_resource *reg_res, u64 val) } return 0; - } else if (reg->space_id == ACPI_ADR_SPACE_PLATFORM_COMM && pcc_ss_id >= 0) + } else if (reg->space_id == ACPI_ADR_SPACE_PLATFORM_COMM && pcc_ss_id >= 0) { + /* + * For registers in PCC space, the register size is determined + * by the bit width field; the access size is used to indicate + * the PCC subspace id. + */ + size = reg->bit_width; vaddr = GET_PCC_VADDR(reg->address, pcc_ss_id); + } else if (reg->space_id == ACPI_ADR_SPACE_SYSTEM_MEMORY) vaddr = reg_res->sys_mem_vaddr; else if (reg->space_id == ACPI_ADR_SPACE_FIXED_HARDWARE) return cpc_write_ffh(cpu, reg, val); else return acpi_os_write_memory((acpi_physical_address)reg->address, - val, reg->bit_width); - - size = GET_BIT_WIDTH(reg); + val, size); if (reg->space_id == ACPI_ADR_SPACE_SYSTEM_MEMORY) val = MASK_VAL(reg, val); @@ -1105,8 +1121,13 @@ static int cpc_write(int cpu, struct cpc_register_resource *reg_res, u64 val) writeq_relaxed(val, vaddr); break; default: - pr_debug("Error: Cannot write %u bit width to PCC for ss: %d\n", - reg->bit_width, pcc_ss_id); + if (reg->space_id == ACPI_ADR_SPACE_SYSTEM_MEMORY) { + pr_debug("Error: Cannot write %u bit width to system memory: 0x%llx\n", + size, reg->address); + } else if (reg->space_id == ACPI_ADR_SPACE_PLATFORM_COMM) { + pr_debug("Error: Cannot write %u bit width to PCC for ss: %d\n", + size, pcc_ss_id); + } ret_val = -EFAULT; break; } From 5bcf0dcbf9066348058b88a510c57f70f384c92c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Toke=20H=C3=B8iland-J=C3=B8rgensen?= Date: Thu, 18 Apr 2024 09:18:39 +0200 Subject: [PATCH 253/606] xdp: use flags field to disambiguate broadcast redirect MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When redirecting a packet using XDP, the bpf_redirect_map() helper will set up the redirect destination information in struct bpf_redirect_info (using the __bpf_xdp_redirect_map() helper function), and the xdp_do_redirect() function will read this information after the XDP program returns and pass the frame on to the right redirect destination. When using the BPF_F_BROADCAST flag to do multicast redirect to a whole map, __bpf_xdp_redirect_map() sets the 'map' pointer in struct bpf_redirect_info to point to the destination map to be broadcast. And xdp_do_redirect() reacts to the value of this map pointer to decide whether it's dealing with a broadcast or a single-value redirect. However, if the destination map is being destroyed before xdp_do_redirect() is called, the map pointer will be cleared out (by bpf_clear_redirect_map()) without waiting for any XDP programs to stop running. This causes xdp_do_redirect() to think that the redirect was to a single target, but the target pointer is also NULL (since broadcast redirects don't have a single target), so this causes a crash when a NULL pointer is passed to dev_map_enqueue(). To fix this, change xdp_do_redirect() to react directly to the presence of the BPF_F_BROADCAST flag in the 'flags' value in struct bpf_redirect_info to disambiguate between a single-target and a broadcast redirect. And only read the 'map' pointer if the broadcast flag is set, aborting if that has been cleared out in the meantime. This prevents the crash, while keeping the atomic (cmpxchg-based) clearing of the map pointer itself, and without adding any more checks in the non-broadcast fast path. Fixes: e624d4ed4aa8 ("xdp: Extend xdp_redirect_map with broadcast support") Reported-and-tested-by: syzbot+af9492708df9797198d6@syzkaller.appspotmail.com Signed-off-by: Toke Høiland-Jørgensen Acked-by: Stanislav Fomichev Reviewed-by: Hangbin Liu Acked-by: Jesper Dangaard Brouer Link: https://lore.kernel.org/r/20240418071840.156411-1-toke@redhat.com Signed-off-by: Martin KaFai Lau --- net/core/filter.c | 42 ++++++++++++++++++++++++++++++++---------- 1 file changed, 32 insertions(+), 10 deletions(-) diff --git a/net/core/filter.c b/net/core/filter.c index 8adf95765cdd9..ae5254f712c94 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -4360,10 +4360,12 @@ static __always_inline int __xdp_do_redirect_frame(struct bpf_redirect_info *ri, enum bpf_map_type map_type = ri->map_type; void *fwd = ri->tgt_value; u32 map_id = ri->map_id; + u32 flags = ri->flags; struct bpf_map *map; int err; ri->map_id = 0; /* Valid map id idr range: [1,INT_MAX[ */ + ri->flags = 0; ri->map_type = BPF_MAP_TYPE_UNSPEC; if (unlikely(!xdpf)) { @@ -4375,11 +4377,20 @@ static __always_inline int __xdp_do_redirect_frame(struct bpf_redirect_info *ri, case BPF_MAP_TYPE_DEVMAP: fallthrough; case BPF_MAP_TYPE_DEVMAP_HASH: - map = READ_ONCE(ri->map); - if (unlikely(map)) { + if (unlikely(flags & BPF_F_BROADCAST)) { + map = READ_ONCE(ri->map); + + /* The map pointer is cleared when the map is being torn + * down by bpf_clear_redirect_map() + */ + if (unlikely(!map)) { + err = -ENOENT; + break; + } + WRITE_ONCE(ri->map, NULL); err = dev_map_enqueue_multi(xdpf, dev, map, - ri->flags & BPF_F_EXCLUDE_INGRESS); + flags & BPF_F_EXCLUDE_INGRESS); } else { err = dev_map_enqueue(fwd, xdpf, dev); } @@ -4442,9 +4453,9 @@ EXPORT_SYMBOL_GPL(xdp_do_redirect_frame); static int xdp_do_generic_redirect_map(struct net_device *dev, struct sk_buff *skb, struct xdp_buff *xdp, - struct bpf_prog *xdp_prog, - void *fwd, - enum bpf_map_type map_type, u32 map_id) + struct bpf_prog *xdp_prog, void *fwd, + enum bpf_map_type map_type, u32 map_id, + u32 flags) { struct bpf_redirect_info *ri = this_cpu_ptr(&bpf_redirect_info); struct bpf_map *map; @@ -4454,11 +4465,20 @@ static int xdp_do_generic_redirect_map(struct net_device *dev, case BPF_MAP_TYPE_DEVMAP: fallthrough; case BPF_MAP_TYPE_DEVMAP_HASH: - map = READ_ONCE(ri->map); - if (unlikely(map)) { + if (unlikely(flags & BPF_F_BROADCAST)) { + map = READ_ONCE(ri->map); + + /* The map pointer is cleared when the map is being torn + * down by bpf_clear_redirect_map() + */ + if (unlikely(!map)) { + err = -ENOENT; + break; + } + WRITE_ONCE(ri->map, NULL); err = dev_map_redirect_multi(dev, skb, xdp_prog, map, - ri->flags & BPF_F_EXCLUDE_INGRESS); + flags & BPF_F_EXCLUDE_INGRESS); } else { err = dev_map_generic_redirect(fwd, skb, xdp_prog); } @@ -4495,9 +4515,11 @@ int xdp_do_generic_redirect(struct net_device *dev, struct sk_buff *skb, enum bpf_map_type map_type = ri->map_type; void *fwd = ri->tgt_value; u32 map_id = ri->map_id; + u32 flags = ri->flags; int err; ri->map_id = 0; /* Valid map id idr range: [1,INT_MAX[ */ + ri->flags = 0; ri->map_type = BPF_MAP_TYPE_UNSPEC; if (map_type == BPF_MAP_TYPE_UNSPEC && map_id == INT_MAX) { @@ -4517,7 +4539,7 @@ int xdp_do_generic_redirect(struct net_device *dev, struct sk_buff *skb, return 0; } - return xdp_do_generic_redirect_map(dev, skb, xdp, xdp_prog, fwd, map_type, map_id); + return xdp_do_generic_redirect_map(dev, skb, xdp, xdp_prog, fwd, map_type, map_id, flags); err: _trace_xdp_redirect_err(dev, xdp_prog, ri->tgt_index, err); return err; From a4e3899065ffa87d49dc20e8c17501edbc189692 Mon Sep 17 00:00:00 2001 From: Matthias Schiffer Date: Wed, 17 Apr 2024 12:37:37 +0200 Subject: [PATCH 254/606] net: dsa: mv88e6xx: fix supported_interfaces setup in mv88e6250_phylink_get_caps() With the recent PHYLINK changes requiring supported_interfaces to be set, MV88E6250 family switches like the 88E6020 fail to probe - cmode is never initialized on these devices, so mv88e6250_phylink_get_caps() does not set any supported_interfaces flags. Instead of a cmode, on 88E6250 we have a read-only port mode value that encodes similar information. There is no reason to bother mapping port mode to the cmodes of other switch models; instead we introduce a mv88e6250_setup_supported_interfaces() that is called directly from mv88e6250_phylink_get_caps(). Fixes: de5c9bf40c45 ("net: phylink: require supported_interfaces to be filled") Signed-off-by: Matthias Schiffer Link: https://lore.kernel.org/r/20240417103737.166651-1-matthias.schiffer@ew.tq-group.com Signed-off-by: Jakub Kicinski --- drivers/net/dsa/mv88e6xxx/chip.c | 56 +++++++++++++++++++++++++++++--- drivers/net/dsa/mv88e6xxx/port.h | 23 ++++++++++--- 2 files changed, 71 insertions(+), 8 deletions(-) diff --git a/drivers/net/dsa/mv88e6xxx/chip.c b/drivers/net/dsa/mv88e6xxx/chip.c index c95787cb90867..59b5dd0e2f41d 100644 --- a/drivers/net/dsa/mv88e6xxx/chip.c +++ b/drivers/net/dsa/mv88e6xxx/chip.c @@ -566,13 +566,61 @@ static void mv88e6xxx_translate_cmode(u8 cmode, unsigned long *supported) phy_interface_set_rgmii(supported); } -static void mv88e6250_phylink_get_caps(struct mv88e6xxx_chip *chip, int port, - struct phylink_config *config) +static void +mv88e6250_setup_supported_interfaces(struct mv88e6xxx_chip *chip, int port, + struct phylink_config *config) { unsigned long *supported = config->supported_interfaces; + int err; + u16 reg; - /* Translate the default cmode */ - mv88e6xxx_translate_cmode(chip->ports[port].cmode, supported); + err = mv88e6xxx_port_read(chip, port, MV88E6XXX_PORT_STS, ®); + if (err) { + dev_err(chip->dev, "p%d: failed to read port status\n", port); + return; + } + + switch (reg & MV88E6250_PORT_STS_PORTMODE_MASK) { + case MV88E6250_PORT_STS_PORTMODE_MII_10_HALF_PHY: + case MV88E6250_PORT_STS_PORTMODE_MII_100_HALF_PHY: + case MV88E6250_PORT_STS_PORTMODE_MII_10_FULL_PHY: + case MV88E6250_PORT_STS_PORTMODE_MII_100_FULL_PHY: + __set_bit(PHY_INTERFACE_MODE_REVMII, supported); + break; + + case MV88E6250_PORT_STS_PORTMODE_MII_HALF: + case MV88E6250_PORT_STS_PORTMODE_MII_FULL: + __set_bit(PHY_INTERFACE_MODE_MII, supported); + break; + + case MV88E6250_PORT_STS_PORTMODE_MII_DUAL_100_RMII_FULL_PHY: + case MV88E6250_PORT_STS_PORTMODE_MII_200_RMII_FULL_PHY: + case MV88E6250_PORT_STS_PORTMODE_MII_10_100_RMII_HALF_PHY: + case MV88E6250_PORT_STS_PORTMODE_MII_10_100_RMII_FULL_PHY: + __set_bit(PHY_INTERFACE_MODE_REVRMII, supported); + break; + + case MV88E6250_PORT_STS_PORTMODE_MII_DUAL_100_RMII_FULL: + case MV88E6250_PORT_STS_PORTMODE_MII_10_100_RMII_FULL: + __set_bit(PHY_INTERFACE_MODE_RMII, supported); + break; + + case MV88E6250_PORT_STS_PORTMODE_MII_100_RGMII: + __set_bit(PHY_INTERFACE_MODE_RGMII, supported); + break; + + default: + dev_err(chip->dev, + "p%d: invalid port mode in status register: %04x\n", + port, reg); + } +} + +static void mv88e6250_phylink_get_caps(struct mv88e6xxx_chip *chip, int port, + struct phylink_config *config) +{ + if (!mv88e6xxx_phy_is_internal(chip, port)) + mv88e6250_setup_supported_interfaces(chip, port, config); config->mac_capabilities = MAC_SYM_PAUSE | MAC_10 | MAC_100; } diff --git a/drivers/net/dsa/mv88e6xxx/port.h b/drivers/net/dsa/mv88e6xxx/port.h index 86deeb347cbc1..ddadeb9bfdaee 100644 --- a/drivers/net/dsa/mv88e6xxx/port.h +++ b/drivers/net/dsa/mv88e6xxx/port.h @@ -25,10 +25,25 @@ #define MV88E6250_PORT_STS_PORTMODE_PHY_100_HALF 0x0900 #define MV88E6250_PORT_STS_PORTMODE_PHY_10_FULL 0x0a00 #define MV88E6250_PORT_STS_PORTMODE_PHY_100_FULL 0x0b00 -#define MV88E6250_PORT_STS_PORTMODE_MII_10_HALF 0x0c00 -#define MV88E6250_PORT_STS_PORTMODE_MII_100_HALF 0x0d00 -#define MV88E6250_PORT_STS_PORTMODE_MII_10_FULL 0x0e00 -#define MV88E6250_PORT_STS_PORTMODE_MII_100_FULL 0x0f00 +/* - Modes with PHY suffix use output instead of input clock + * - Modes without RMII or RGMII use MII + * - Modes without speed do not have a fixed speed specified in the manual + * ("DC to x MHz" - variable clock support?) + */ +#define MV88E6250_PORT_STS_PORTMODE_MII_DISABLED 0x0000 +#define MV88E6250_PORT_STS_PORTMODE_MII_100_RGMII 0x0100 +#define MV88E6250_PORT_STS_PORTMODE_MII_DUAL_100_RMII_FULL_PHY 0x0200 +#define MV88E6250_PORT_STS_PORTMODE_MII_200_RMII_FULL_PHY 0x0400 +#define MV88E6250_PORT_STS_PORTMODE_MII_DUAL_100_RMII_FULL 0x0600 +#define MV88E6250_PORT_STS_PORTMODE_MII_10_100_RMII_FULL 0x0700 +#define MV88E6250_PORT_STS_PORTMODE_MII_HALF 0x0800 +#define MV88E6250_PORT_STS_PORTMODE_MII_10_100_RMII_HALF_PHY 0x0900 +#define MV88E6250_PORT_STS_PORTMODE_MII_FULL 0x0a00 +#define MV88E6250_PORT_STS_PORTMODE_MII_10_100_RMII_FULL_PHY 0x0b00 +#define MV88E6250_PORT_STS_PORTMODE_MII_10_HALF_PHY 0x0c00 +#define MV88E6250_PORT_STS_PORTMODE_MII_100_HALF_PHY 0x0d00 +#define MV88E6250_PORT_STS_PORTMODE_MII_10_FULL_PHY 0x0e00 +#define MV88E6250_PORT_STS_PORTMODE_MII_100_FULL_PHY 0x0f00 #define MV88E6XXX_PORT_STS_LINK 0x0800 #define MV88E6XXX_PORT_STS_DUPLEX 0x0400 #define MV88E6XXX_PORT_STS_SPEED_MASK 0x0300 From 98a821546b3919a10a58faa12ebe5e9a55cd638e Mon Sep 17 00:00:00 2001 From: Zhu Lingshan Date: Mon, 15 Apr 2024 19:10:47 +0800 Subject: [PATCH 255/606] vDPA: code clean for vhost_vdpa uapi This commit cleans up the uapi for vhost_vdpa by better naming some of the enums which report blk information to user space, and they are not in any official releases yet. Fixes: 1ac61ddfee93 ("vDPA: report virtio-blk flush info to user space") Fixes: ae1374b7f72c ("vDPA: report virtio-block read-only info to user space") Fixes: 330b8aea6924 ("vDPA: report virtio-block max segment size to user space") Signed-off-by: Zhu Lingshan Message-Id: <20240415111047.1047774-1-lingshan.zhu@intel.com> Signed-off-by: Michael S. Tsirkin --- drivers/vdpa/vdpa.c | 6 +++--- include/uapi/linux/vdpa.h | 6 +++--- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/drivers/vdpa/vdpa.c b/drivers/vdpa/vdpa.c index b246067e074bc..6cb96a1e8b7df 100644 --- a/drivers/vdpa/vdpa.c +++ b/drivers/vdpa/vdpa.c @@ -967,7 +967,7 @@ vdpa_dev_blk_seg_size_config_fill(struct sk_buff *msg, u64 features, val_u32 = __virtio32_to_cpu(true, config->size_max); - return nla_put_u32(msg, VDPA_ATTR_DEV_BLK_CFG_SEG_SIZE, val_u32); + return nla_put_u32(msg, VDPA_ATTR_DEV_BLK_CFG_SIZE_MAX, val_u32); } /* fill the block size*/ @@ -1089,7 +1089,7 @@ static int vdpa_dev_blk_ro_config_fill(struct sk_buff *msg, u64 features) u8 ro; ro = ((features & BIT_ULL(VIRTIO_BLK_F_RO)) == 0) ? 0 : 1; - if (nla_put_u8(msg, VDPA_ATTR_DEV_BLK_CFG_READ_ONLY, ro)) + if (nla_put_u8(msg, VDPA_ATTR_DEV_BLK_READ_ONLY, ro)) return -EMSGSIZE; return 0; @@ -1100,7 +1100,7 @@ static int vdpa_dev_blk_flush_config_fill(struct sk_buff *msg, u64 features) u8 flush; flush = ((features & BIT_ULL(VIRTIO_BLK_F_FLUSH)) == 0) ? 0 : 1; - if (nla_put_u8(msg, VDPA_ATTR_DEV_BLK_CFG_FLUSH, flush)) + if (nla_put_u8(msg, VDPA_ATTR_DEV_BLK_FLUSH, flush)) return -EMSGSIZE; return 0; diff --git a/include/uapi/linux/vdpa.h b/include/uapi/linux/vdpa.h index 43c51698195ce..842bf1201ac41 100644 --- a/include/uapi/linux/vdpa.h +++ b/include/uapi/linux/vdpa.h @@ -57,7 +57,7 @@ enum vdpa_attr { VDPA_ATTR_DEV_FEATURES, /* u64 */ VDPA_ATTR_DEV_BLK_CFG_CAPACITY, /* u64 */ - VDPA_ATTR_DEV_BLK_CFG_SEG_SIZE, /* u32 */ + VDPA_ATTR_DEV_BLK_CFG_SIZE_MAX, /* u32 */ VDPA_ATTR_DEV_BLK_CFG_BLK_SIZE, /* u32 */ VDPA_ATTR_DEV_BLK_CFG_SEG_MAX, /* u32 */ VDPA_ATTR_DEV_BLK_CFG_NUM_QUEUES, /* u16 */ @@ -70,8 +70,8 @@ enum vdpa_attr { VDPA_ATTR_DEV_BLK_CFG_DISCARD_SEC_ALIGN,/* u32 */ VDPA_ATTR_DEV_BLK_CFG_MAX_WRITE_ZEROES_SEC, /* u32 */ VDPA_ATTR_DEV_BLK_CFG_MAX_WRITE_ZEROES_SEG, /* u32 */ - VDPA_ATTR_DEV_BLK_CFG_READ_ONLY, /* u8 */ - VDPA_ATTR_DEV_BLK_CFG_FLUSH, /* u8 */ + VDPA_ATTR_DEV_BLK_READ_ONLY, /* u8 */ + VDPA_ATTR_DEV_BLK_FLUSH, /* u8 */ /* new attributes must be added above here */ VDPA_ATTR_MAX, From 97ec32b583bb08f72146eee2c1a1918e05760f8c Mon Sep 17 00:00:00 2001 From: David Christensen Date: Thu, 18 Apr 2024 15:55:17 -0400 Subject: [PATCH 256/606] MAINTAINERS: eth: mark IBM eHEA as an Orphan Current maintainer Douglas Miller has left IBM and no replacement has been assigned for the driver. The eHEA hardware was last used on IBM POWER7 systems, the last of which reached end-of-support at the end of 2020. Signed-off-by: David Christensen Reviewed-by: Pradeep Satyanarayana Acked-by: Michael Ellerman (powerpc) Link: https://lore.kernel.org/r/20240418195517.528577-1-drc@linux.ibm.com Signed-off-by: Jakub Kicinski --- MAINTAINERS | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/MAINTAINERS b/MAINTAINERS index c23fda1aa1f09..2d5acd6d98c4b 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -7829,9 +7829,8 @@ W: http://aeschi.ch.eu.org/efs/ F: fs/efs/ EHEA (IBM pSeries eHEA 10Gb ethernet adapter) DRIVER -M: Douglas Miller L: netdev@vger.kernel.org -S: Maintained +S: Orphan F: drivers/net/ethernet/ibm/ehea/ ELM327 CAN NETWORK DRIVER From 6bea4f03c6a4e973ef369e15aac88f37981db49e Mon Sep 17 00:00:00 2001 From: Paul Geurts Date: Thu, 18 Apr 2024 21:25:38 +0200 Subject: [PATCH 257/606] NFC: trf7970a: disable all regulators on removal During module probe, regulator 'vin' and 'vdd-io' are used and enabled, but the vdd-io regulator overwrites the 'vin' regulator pointer. During remove, only the vdd-io is disabled, as the vin regulator pointer is not available anymore. When regulator_put() is called during resource cleanup a kernel warning is given, as the regulator is still enabled. Store the two regulators in separate pointers and disable both the regulators on module remove. Fixes: 49d22c70aaf0 ("NFC: trf7970a: Add device tree option of 1.8 Volt IO voltage") Signed-off-by: Paul Geurts Reviewed-by: Krzysztof Kozlowski Link: https://lore.kernel.org/r/DB7PR09MB26847A4EBF88D9EDFEB1DA0F950E2@DB7PR09MB2684.eurprd09.prod.outlook.com Signed-off-by: Jakub Kicinski --- drivers/nfc/trf7970a.c | 42 +++++++++++++++++++++++------------------- 1 file changed, 23 insertions(+), 19 deletions(-) diff --git a/drivers/nfc/trf7970a.c b/drivers/nfc/trf7970a.c index 7eb17f46a8153..9e1a34e23af26 100644 --- a/drivers/nfc/trf7970a.c +++ b/drivers/nfc/trf7970a.c @@ -424,7 +424,8 @@ struct trf7970a { enum trf7970a_state state; struct device *dev; struct spi_device *spi; - struct regulator *regulator; + struct regulator *vin_regulator; + struct regulator *vddio_regulator; struct nfc_digital_dev *ddev; u32 quirks; bool is_initiator; @@ -1883,7 +1884,7 @@ static int trf7970a_power_up(struct trf7970a *trf) if (trf->state != TRF7970A_ST_PWR_OFF) return 0; - ret = regulator_enable(trf->regulator); + ret = regulator_enable(trf->vin_regulator); if (ret) { dev_err(trf->dev, "%s - Can't enable VIN: %d\n", __func__, ret); return ret; @@ -1926,7 +1927,7 @@ static int trf7970a_power_down(struct trf7970a *trf) if (trf->en2_gpiod && !(trf->quirks & TRF7970A_QUIRK_EN2_MUST_STAY_LOW)) gpiod_set_value_cansleep(trf->en2_gpiod, 0); - ret = regulator_disable(trf->regulator); + ret = regulator_disable(trf->vin_regulator); if (ret) dev_err(trf->dev, "%s - Can't disable VIN: %d\n", __func__, ret); @@ -2065,37 +2066,37 @@ static int trf7970a_probe(struct spi_device *spi) mutex_init(&trf->lock); INIT_DELAYED_WORK(&trf->timeout_work, trf7970a_timeout_work_handler); - trf->regulator = devm_regulator_get(&spi->dev, "vin"); - if (IS_ERR(trf->regulator)) { - ret = PTR_ERR(trf->regulator); + trf->vin_regulator = devm_regulator_get(&spi->dev, "vin"); + if (IS_ERR(trf->vin_regulator)) { + ret = PTR_ERR(trf->vin_regulator); dev_err(trf->dev, "Can't get VIN regulator: %d\n", ret); goto err_destroy_lock; } - ret = regulator_enable(trf->regulator); + ret = regulator_enable(trf->vin_regulator); if (ret) { dev_err(trf->dev, "Can't enable VIN: %d\n", ret); goto err_destroy_lock; } - uvolts = regulator_get_voltage(trf->regulator); + uvolts = regulator_get_voltage(trf->vin_regulator); if (uvolts > 4000000) trf->chip_status_ctrl = TRF7970A_CHIP_STATUS_VRS5_3; - trf->regulator = devm_regulator_get(&spi->dev, "vdd-io"); - if (IS_ERR(trf->regulator)) { - ret = PTR_ERR(trf->regulator); + trf->vddio_regulator = devm_regulator_get(&spi->dev, "vdd-io"); + if (IS_ERR(trf->vddio_regulator)) { + ret = PTR_ERR(trf->vddio_regulator); dev_err(trf->dev, "Can't get VDD_IO regulator: %d\n", ret); - goto err_destroy_lock; + goto err_disable_vin_regulator; } - ret = regulator_enable(trf->regulator); + ret = regulator_enable(trf->vddio_regulator); if (ret) { dev_err(trf->dev, "Can't enable VDD_IO: %d\n", ret); - goto err_destroy_lock; + goto err_disable_vin_regulator; } - if (regulator_get_voltage(trf->regulator) == 1800000) { + if (regulator_get_voltage(trf->vddio_regulator) == 1800000) { trf->io_ctrl = TRF7970A_REG_IO_CTRL_IO_LOW; dev_dbg(trf->dev, "trf7970a config vdd_io to 1.8V\n"); } @@ -2108,7 +2109,7 @@ static int trf7970a_probe(struct spi_device *spi) if (!trf->ddev) { dev_err(trf->dev, "Can't allocate NFC digital device\n"); ret = -ENOMEM; - goto err_disable_regulator; + goto err_disable_vddio_regulator; } nfc_digital_set_parent_dev(trf->ddev, trf->dev); @@ -2137,8 +2138,10 @@ static int trf7970a_probe(struct spi_device *spi) trf7970a_shutdown(trf); err_free_ddev: nfc_digital_free_device(trf->ddev); -err_disable_regulator: - regulator_disable(trf->regulator); +err_disable_vddio_regulator: + regulator_disable(trf->vddio_regulator); +err_disable_vin_regulator: + regulator_disable(trf->vin_regulator); err_destroy_lock: mutex_destroy(&trf->lock); return ret; @@ -2157,7 +2160,8 @@ static void trf7970a_remove(struct spi_device *spi) nfc_digital_unregister_device(trf->ddev); nfc_digital_free_device(trf->ddev); - regulator_disable(trf->regulator); + regulator_disable(trf->vddio_regulator); + regulator_disable(trf->vin_regulator); mutex_destroy(&trf->lock); } From df70d04d56975f527b9c965322cf56e245909071 Mon Sep 17 00:00:00 2001 From: Wedson Almeida Filho Date: Thu, 28 Mar 2024 16:54:53 -0300 Subject: [PATCH 258/606] rust: phy: implement `Send` for `Registration` In preparation for requiring `Send` for `Module` implementations in the next patch. Cc: FUJITA Tomonori Cc: Trevor Gross Cc: netdev@vger.kernel.org Signed-off-by: Wedson Almeida Filho Reviewed-by: Alice Ryhl Link: https://lore.kernel.org/r/20240328195457.225001-2-wedsonaf@gmail.com Signed-off-by: Miguel Ojeda --- rust/kernel/net/phy.rs | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/rust/kernel/net/phy.rs b/rust/kernel/net/phy.rs index 96e09c6e8530b..265d0e1c13710 100644 --- a/rust/kernel/net/phy.rs +++ b/rust/kernel/net/phy.rs @@ -640,6 +640,10 @@ pub struct Registration { drivers: Pin<&'static mut [DriverVTable]>, } +// SAFETY: The only action allowed in a `Registration` instance is dropping it, which is safe to do +// from any thread because `phy_drivers_unregister` can be called from any thread context. +unsafe impl Send for Registration {} + impl Registration { /// Registers a PHY driver. pub fn register( From 323617f649c0966ad5e741e47e27e06d3a680d8f Mon Sep 17 00:00:00 2001 From: Wedson Almeida Filho Date: Thu, 28 Mar 2024 16:54:54 -0300 Subject: [PATCH 259/606] rust: kernel: require `Send` for `Module` implementations The thread that calls the module initialisation code when a module is loaded is not guaranteed [in fact, it is unlikely] to be the same one that calls the module cleanup code on module unload, therefore, `Module` implementations must be `Send` to account for them moving from one thread to another implicitly. Signed-off-by: Wedson Almeida Filho Reviewed-by: Alice Ryhl Reviewed-by: Benno Lossin Cc: stable@vger.kernel.org # 6.8.x: df70d04d5697: rust: phy: implement `Send` for `Registration` Cc: stable@vger.kernel.org Fixes: 247b365dc8dc ("rust: add `kernel` crate") Link: https://lore.kernel.org/r/20240328195457.225001-3-wedsonaf@gmail.com Signed-off-by: Miguel Ojeda --- rust/kernel/lib.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rust/kernel/lib.rs b/rust/kernel/lib.rs index be68d5e567b1a..6858e2f8a3ed9 100644 --- a/rust/kernel/lib.rs +++ b/rust/kernel/lib.rs @@ -65,7 +65,7 @@ const __LOG_PREFIX: &[u8] = b"rust_kernel\0"; /// The top level entrypoint to implementing a kernel module. /// /// For any teardown or cleanup operations, your type may implement [`Drop`]. -pub trait Module: Sized + Sync { +pub trait Module: Sized + Sync + Send { /// Called at module initialization time. /// /// Use this method to perform whatever setup or registration your module From 50cfe93b01475ba36878b65d35d812e1bb48ac71 Mon Sep 17 00:00:00 2001 From: Miguel Ojeda Date: Mon, 22 Apr 2024 11:12:15 +0200 Subject: [PATCH 260/606] kbuild: rust: remove unneeded `@rustc_cfg` to avoid ICE When KUnit tests are enabled, under very big kernel configurations (e.g. `allyesconfig`), we can trigger a `rustdoc` ICE [1]: RUSTDOC TK rust/kernel/lib.rs error: the compiler unexpectedly panicked. this is a bug. The reason is that this build step has a duplicated `@rustc_cfg` argument, which contains the kernel configuration, and thus a lot of arguments. The factor 2 happens to be enough to reach the ICE. Thus remove the unneeded `@rustc_cfg`. By doing so, we clean up the command and workaround the ICE. The ICE has been fixed in the upcoming Rust 1.79 [2]. Cc: stable@vger.kernel.org Fixes: a66d733da801 ("rust: support running Rust documentation tests as KUnit ones") Link: https://github.com/rust-lang/rust/issues/122722 [1] Link: https://github.com/rust-lang/rust/pull/122840 [2] Reviewed-by: Alice Ryhl Link: https://lore.kernel.org/r/20240422091215.526688-1-ojeda@kernel.org Signed-off-by: Miguel Ojeda --- rust/Makefile | 1 - 1 file changed, 1 deletion(-) diff --git a/rust/Makefile b/rust/Makefile index 846e6ab9d5a9b..86a125c4243cb 100644 --- a/rust/Makefile +++ b/rust/Makefile @@ -175,7 +175,6 @@ quiet_cmd_rustdoc_test_kernel = RUSTDOC TK $< mkdir -p $(objtree)/$(obj)/test/doctests/kernel; \ OBJTREE=$(abspath $(objtree)) \ $(RUSTDOC) --test $(rust_flags) \ - @$(objtree)/include/generated/rustc_cfg \ -L$(objtree)/$(obj) --extern alloc --extern kernel \ --extern build_error --extern macros \ --extern bindings --extern uapi \ From f2602fba4723e408380eb9a56e921d36a1ae21f8 Mon Sep 17 00:00:00 2001 From: Simon Trimmer Date: Mon, 22 Apr 2024 11:32:11 +0100 Subject: [PATCH 261/606] ASoC: cs35l56: Avoid static analysis warning of uninitialised variable Static checkers complain that the silicon_uid variable passed by pointer to cs35l56_read_silicon_uid() could later be used uninitialised when calling cs_amp_get_efi_calibration_data(). cs35l56_read_silicon_uid() must have succeeded to call cs_amp_get_efi_calibration_data() and that would have populated the variable. However, initialise the value so we are not haunted by it forevermore. Signed-off-by: Simon Trimmer Fixes: e1830f66f6c6 ("ASoC: cs35l56: Add helper functions for amp calibration") Signed-off-by: Richard Fitzgerald Link: https://lore.kernel.org/r/20240422103211.236063-1-rf@opensource.cirrus.com Signed-off-by: Mark Brown --- sound/soc/codecs/cs35l56-shared.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sound/soc/codecs/cs35l56-shared.c b/sound/soc/codecs/cs35l56-shared.c index ec1d95e570615..fd02b621da52c 100644 --- a/sound/soc/codecs/cs35l56-shared.c +++ b/sound/soc/codecs/cs35l56-shared.c @@ -706,7 +706,7 @@ EXPORT_SYMBOL_NS_GPL(cs35l56_calibration_controls, SND_SOC_CS35L56_SHARED); int cs35l56_get_calibration(struct cs35l56_base *cs35l56_base) { - u64 silicon_uid; + u64 silicon_uid = 0; int ret; /* Driver can't apply calibration to a secured part, so skip */ From 96e20adc43c4f81e9163a5188cee75a6dd393e09 Mon Sep 17 00:00:00 2001 From: Matti Vaittinen Date: Mon, 22 Apr 2024 09:38:33 +0300 Subject: [PATCH 262/606] regulator: change stubbed devm_regulator_get_enable to return Ok The devm_regulator_get_enable() should be a 'call and forget' API, meaning, when it is used to enable the regulators, the API does not provide a handle to do any further control of the regulators. It gives no real benefit to return an error from the stub if CONFIG_REGULATOR is not set. On the contrary, returning and error is causing problems to drivers when hardware is such it works out just fine with no regulator control. Returning an error forces drivers to specifically handle the case where CONFIG_REGULATOR is not set, making the mere existence of the stub questionalble. Furthermore, the stub of the regulator_enable() seems to be returning Ok. Change the stub implementation for the devm_regulator_get_enable() to return Ok so drivers do not separately handle the case where the CONFIG_REGULATOR is not set. Signed-off-by: Matti Vaittinen Reported-by: Aleksander Mazur Suggested-by: Guenter Roeck Fixes: da279e6965b3 ("regulator: Add devm helpers for get and enable") Reviewed-by: Guenter Roeck Link: https://lore.kernel.org/r/ZiYF6d1V1vSPcsJS@drtxq0yyyyyyyyyyyyyby-3.rev.dnainternet.fi Signed-off-by: Mark Brown --- include/linux/regulator/consumer.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/linux/regulator/consumer.h b/include/linux/regulator/consumer.h index 4660582a33022..71232fb7dda31 100644 --- a/include/linux/regulator/consumer.h +++ b/include/linux/regulator/consumer.h @@ -320,7 +320,7 @@ devm_regulator_get_exclusive(struct device *dev, const char *id) static inline int devm_regulator_get_enable(struct device *dev, const char *id) { - return -ENODEV; + return 0; } static inline int devm_regulator_get_enable_optional(struct device *dev, From 49a940dbdc3107fecd5e6d3063dc07128177e058 Mon Sep 17 00:00:00 2001 From: Gaurav Batra Date: Mon, 22 Apr 2024 15:51:41 -0500 Subject: [PATCH 263/606] powerpc/pseries/iommu: LPAR panics during boot up with a frozen PE At the time of LPAR boot up, partition firmware provides Open Firmware property ibm,dma-window for the PE. This property is provided on the PCI bus the PE is attached to. There are execptions where the partition firmware might not provide this property for the PE at the time of LPAR boot up. One of the scenario is where the firmware has frozen the PE due to some error condition. This PE is frozen for 24 hours or unless the whole system is reinitialized. Within this time frame, if the LPAR is booted, the frozen PE will be presented to the LPAR but ibm,dma-window property could be missing. Today, under these circumstances, the LPAR oopses with NULL pointer dereference, when configuring the PCI bus the PE is attached to. BUG: Kernel NULL pointer dereference on read at 0x000000c8 Faulting instruction address: 0xc0000000001024c0 Oops: Kernel access of bad area, sig: 7 [#1] LE PAGE_SIZE=64K MMU=Radix SMP NR_CPUS=2048 NUMA pSeries Modules linked in: Supported: Yes CPU: 0 PID: 1 Comm: swapper/0 Not tainted 6.4.0-150600.9-default #1 Hardware name: IBM,9043-MRX POWER10 (raw) 0x800200 0xf000006 of:IBM,FW1060.00 (NM1060_023) hv:phyp pSeries NIP: c0000000001024c0 LR: c0000000001024b0 CTR: c000000000102450 REGS: c0000000037db5c0 TRAP: 0300 Not tainted (6.4.0-150600.9-default) MSR: 8000000002009033 CR: 28000822 XER: 00000000 CFAR: c00000000010254c DAR: 00000000000000c8 DSISR: 00080000 IRQMASK: 0 ... NIP [c0000000001024c0] pci_dma_bus_setup_pSeriesLP+0x70/0x2a0 LR [c0000000001024b0] pci_dma_bus_setup_pSeriesLP+0x60/0x2a0 Call Trace: pci_dma_bus_setup_pSeriesLP+0x60/0x2a0 (unreliable) pcibios_setup_bus_self+0x1c0/0x370 __of_scan_bus+0x2f8/0x330 pcibios_scan_phb+0x280/0x3d0 pcibios_init+0x88/0x12c do_one_initcall+0x60/0x320 kernel_init_freeable+0x344/0x3e4 kernel_init+0x34/0x1d0 ret_from_kernel_user_thread+0x14/0x1c Fixes: b1fc44eaa9ba ("pseries/iommu/ddw: Fix kdump to work in absence of ibm,dma-window") Signed-off-by: Gaurav Batra Signed-off-by: Michael Ellerman Link: https://msgid.link/20240422205141.10662-1-gbatra@linux.ibm.com --- arch/powerpc/platforms/pseries/iommu.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/arch/powerpc/platforms/pseries/iommu.c b/arch/powerpc/platforms/pseries/iommu.c index e8c4129697b14..b1e6d275cda9e 100644 --- a/arch/powerpc/platforms/pseries/iommu.c +++ b/arch/powerpc/platforms/pseries/iommu.c @@ -786,8 +786,16 @@ static void pci_dma_bus_setup_pSeriesLP(struct pci_bus *bus) * parent bus. During reboot, there will be ibm,dma-window property to * define DMA window. For kdump, there will at least be default window or DDW * or both. + * There is an exception to the above. In case the PE goes into frozen + * state, firmware may not provide ibm,dma-window property at the time + * of LPAR boot up. */ + if (!pdn) { + pr_debug(" no ibm,dma-window property !\n"); + return; + } + ppci = PCI_DN(pdn); pr_debug(" parent is %pOF, iommu_table: 0x%p\n", From d55f90e9b243faa5bcd5c8a323a8f43040500106 Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Tue, 16 Apr 2024 12:08:51 +0200 Subject: [PATCH 264/606] ntfs3: enforce read-only when used as legacy ntfs driver Ensure that ntfs3 is mounted read-only when it is used to provide the legacy ntfs driver. Signed-off-by: Christian Brauner --- fs/ntfs3/ntfs_fs.h | 2 ++ fs/ntfs3/super.c | 36 ++++++++++++++++++++++++++++++++---- 2 files changed, 34 insertions(+), 4 deletions(-) diff --git a/fs/ntfs3/ntfs_fs.h b/fs/ntfs3/ntfs_fs.h index 79356fd29a141..184c8bc76b927 100644 --- a/fs/ntfs3/ntfs_fs.h +++ b/fs/ntfs3/ntfs_fs.h @@ -1154,4 +1154,6 @@ static inline void le64_sub_cpu(__le64 *var, u64 val) *var = cpu_to_le64(le64_to_cpu(*var) - val); } +bool is_legacy_ntfs(struct super_block *sb); + #endif /* _LINUX_NTFS3_NTFS_FS_H */ diff --git a/fs/ntfs3/super.c b/fs/ntfs3/super.c index 8d2e51bae2cbc..b26d95a8d3274 100644 --- a/fs/ntfs3/super.c +++ b/fs/ntfs3/super.c @@ -408,6 +408,12 @@ static int ntfs_fs_reconfigure(struct fs_context *fc) struct ntfs_mount_options *new_opts = fc->fs_private; int ro_rw; + /* If ntfs3 is used as legacy ntfs enforce read-only mode. */ + if (is_legacy_ntfs(sb)) { + fc->sb_flags |= SB_RDONLY; + goto out; + } + ro_rw = sb_rdonly(sb) && !(fc->sb_flags & SB_RDONLY); if (ro_rw && (sbi->flags & NTFS_FLAGS_NEED_REPLAY)) { errorf(fc, @@ -427,8 +433,6 @@ static int ntfs_fs_reconfigure(struct fs_context *fc) fc, "ntfs3: Cannot use different iocharset when remounting!"); - sync_filesystem(sb); - if (ro_rw && (sbi->volume.flags & VOLUME_FLAG_DIRTY) && !new_opts->force) { errorf(fc, @@ -436,6 +440,8 @@ static int ntfs_fs_reconfigure(struct fs_context *fc) return -EINVAL; } +out: + sync_filesystem(sb); swap(sbi->options, fc->fs_private); return 0; @@ -1613,6 +1619,8 @@ static int ntfs_fill_super(struct super_block *sb, struct fs_context *fc) } #endif + if (is_legacy_ntfs(sb)) + sb->s_flags |= SB_RDONLY; return 0; put_inode_out: @@ -1730,7 +1738,7 @@ static const struct fs_context_operations ntfs_context_ops = { * This will called when mount/remount. We will first initialize * options so that if remount we can use just that. */ -static int ntfs_init_fs_context(struct fs_context *fc) +static int __ntfs_init_fs_context(struct fs_context *fc) { struct ntfs_mount_options *opts; struct ntfs_sb_info *sbi; @@ -1778,6 +1786,11 @@ static int ntfs_init_fs_context(struct fs_context *fc) return -ENOMEM; } +static int ntfs_init_fs_context(struct fs_context *fc) +{ + return __ntfs_init_fs_context(fc); +} + static void ntfs3_kill_sb(struct super_block *sb) { struct ntfs_sb_info *sbi = sb->s_fs_info; @@ -1800,10 +1813,20 @@ static struct file_system_type ntfs_fs_type = { }; #if IS_ENABLED(CONFIG_NTFS_FS) +static int ntfs_legacy_init_fs_context(struct fs_context *fc) +{ + int ret; + + ret = __ntfs_init_fs_context(fc); + /* If ntfs3 is used as legacy ntfs enforce read-only mode. */ + fc->sb_flags |= SB_RDONLY; + return ret; +} + static struct file_system_type ntfs_legacy_fs_type = { .owner = THIS_MODULE, .name = "ntfs", - .init_fs_context = ntfs_init_fs_context, + .init_fs_context = ntfs_legacy_init_fs_context, .parameters = ntfs_fs_parameters, .kill_sb = ntfs3_kill_sb, .fs_flags = FS_REQUIRES_DEV | FS_ALLOW_IDMAP, @@ -1821,9 +1844,14 @@ static inline void unregister_as_ntfs_legacy(void) { unregister_filesystem(&ntfs_legacy_fs_type); } +bool is_legacy_ntfs(struct super_block *sb) +{ + return sb->s_type == &ntfs_legacy_fs_type; +} #else static inline void register_as_ntfs_legacy(void) {} static inline void unregister_as_ntfs_legacy(void) {} +bool is_legacy_ntfs(struct super_block *sb) { return false; } #endif From 9b872cc50daa7d1cb07d5bfd27ee9fa3f4e7eda9 Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Tue, 16 Apr 2024 12:20:50 +0200 Subject: [PATCH 265/606] ntfs3: add legacy ntfs file operations To ensure that ioctl()s can't be used to circumvent write restrictions. Signed-off-by: Christian Brauner --- fs/ntfs3/dir.c | 7 +++++++ fs/ntfs3/file.c | 8 ++++++++ fs/ntfs3/inode.c | 20 ++++++++++++++++---- fs/ntfs3/ntfs_fs.h | 2 ++ 4 files changed, 33 insertions(+), 4 deletions(-) diff --git a/fs/ntfs3/dir.c b/fs/ntfs3/dir.c index 5cf3d9decf646..263635199b60d 100644 --- a/fs/ntfs3/dir.c +++ b/fs/ntfs3/dir.c @@ -616,4 +616,11 @@ const struct file_operations ntfs_dir_operations = { .compat_ioctl = ntfs_compat_ioctl, #endif }; + +const struct file_operations ntfs_legacy_dir_operations = { + .llseek = generic_file_llseek, + .read = generic_read_dir, + .iterate_shared = ntfs_readdir, + .open = ntfs_file_open, +}; // clang-format on diff --git a/fs/ntfs3/file.c b/fs/ntfs3/file.c index 5418662c80d88..b73969e05052a 100644 --- a/fs/ntfs3/file.c +++ b/fs/ntfs3/file.c @@ -1236,4 +1236,12 @@ const struct file_operations ntfs_file_operations = { .fallocate = ntfs_fallocate, .release = ntfs_file_release, }; + +const struct file_operations ntfs_legacy_file_operations = { + .llseek = generic_file_llseek, + .read_iter = ntfs_file_read_iter, + .splice_read = ntfs_file_splice_read, + .open = ntfs_file_open, + .release = ntfs_file_release, +}; // clang-format on diff --git a/fs/ntfs3/inode.c b/fs/ntfs3/inode.c index eb7a8c9fba018..d273eda1cf45d 100644 --- a/fs/ntfs3/inode.c +++ b/fs/ntfs3/inode.c @@ -440,7 +440,10 @@ static struct inode *ntfs_read_mft(struct inode *inode, * Usually a hard links to directories are disabled. */ inode->i_op = &ntfs_dir_inode_operations; - inode->i_fop = &ntfs_dir_operations; + if (is_legacy_ntfs(inode->i_sb)) + inode->i_fop = &ntfs_legacy_dir_operations; + else + inode->i_fop = &ntfs_dir_operations; ni->i_valid = 0; } else if (S_ISLNK(mode)) { ni->std_fa &= ~FILE_ATTRIBUTE_DIRECTORY; @@ -450,7 +453,10 @@ static struct inode *ntfs_read_mft(struct inode *inode, } else if (S_ISREG(mode)) { ni->std_fa &= ~FILE_ATTRIBUTE_DIRECTORY; inode->i_op = &ntfs_file_inode_operations; - inode->i_fop = &ntfs_file_operations; + if (is_legacy_ntfs(inode->i_sb)) + inode->i_fop = &ntfs_legacy_file_operations; + else + inode->i_fop = &ntfs_file_operations; inode->i_mapping->a_ops = is_compressed(ni) ? &ntfs_aops_cmpr : &ntfs_aops; if (ino != MFT_REC_MFT) @@ -1614,7 +1620,10 @@ struct inode *ntfs_create_inode(struct mnt_idmap *idmap, struct inode *dir, if (S_ISDIR(mode)) { inode->i_op = &ntfs_dir_inode_operations; - inode->i_fop = &ntfs_dir_operations; + if (is_legacy_ntfs(inode->i_sb)) + inode->i_fop = &ntfs_legacy_dir_operations; + else + inode->i_fop = &ntfs_dir_operations; } else if (S_ISLNK(mode)) { inode->i_op = &ntfs_link_inode_operations; inode->i_fop = NULL; @@ -1623,7 +1632,10 @@ struct inode *ntfs_create_inode(struct mnt_idmap *idmap, struct inode *dir, inode_nohighmem(inode); } else if (S_ISREG(mode)) { inode->i_op = &ntfs_file_inode_operations; - inode->i_fop = &ntfs_file_operations; + if (is_legacy_ntfs(inode->i_sb)) + inode->i_fop = &ntfs_legacy_file_operations; + else + inode->i_fop = &ntfs_file_operations; inode->i_mapping->a_ops = is_compressed(ni) ? &ntfs_aops_cmpr : &ntfs_aops; init_rwsem(&ni->file.run_lock); diff --git a/fs/ntfs3/ntfs_fs.h b/fs/ntfs3/ntfs_fs.h index 184c8bc76b927..5f4d288c6adfb 100644 --- a/fs/ntfs3/ntfs_fs.h +++ b/fs/ntfs3/ntfs_fs.h @@ -493,6 +493,7 @@ struct inode *dir_search_u(struct inode *dir, const struct cpu_str *uni, struct ntfs_fnd *fnd); bool dir_is_empty(struct inode *dir); extern const struct file_operations ntfs_dir_operations; +extern const struct file_operations ntfs_legacy_dir_operations; /* Globals from file.c */ int ntfs_getattr(struct mnt_idmap *idmap, const struct path *path, @@ -507,6 +508,7 @@ long ntfs_compat_ioctl(struct file *filp, u32 cmd, unsigned long arg); extern const struct inode_operations ntfs_special_inode_operations; extern const struct inode_operations ntfs_file_inode_operations; extern const struct file_operations ntfs_file_operations; +extern const struct file_operations ntfs_legacy_file_operations; /* Globals from frecord.c */ void ni_remove_mi(struct ntfs_inode *ni, struct mft_inode *mi); From 619606a7b8d5e54b71578ecc988d3f8e1896bbc6 Mon Sep 17 00:00:00 2001 From: David Howells Date: Wed, 17 Apr 2024 09:47:19 +0100 Subject: [PATCH 266/606] netfs: Fix writethrough-mode error handling Fix the error return in netfs_perform_write() acting in writethrough-mode to return any cached error in the case that netfs_end_writethrough() returns 0. This can affect the use of O_SYNC/O_DSYNC/RWF_SYNC/RWF_DSYNC in 9p and afs. Fixes: 41d8e7673a77 ("netfs: Implement a write-through caching option") Signed-off-by: David Howells Link: https://lore.kernel.org/r/6736.1713343639@warthog.procyon.org.uk Reviewed-by: Jeff Layton cc: Eric Van Hensbergen cc: Latchesar Ionkov cc: Dominique Martinet cc: Christian Schoenebeck cc: Marc Dionne cc: netfs@lists.linux.dev cc: linux-fsdevel@vger.kernel.org cc: v9fs@lists.linux.dev cc: linux-afs@lists.infradead.org Signed-off-by: Christian Brauner --- fs/netfs/buffered_write.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/fs/netfs/buffered_write.c b/fs/netfs/buffered_write.c index 9a0d32e4b422a..8f13ca8fbc74d 100644 --- a/fs/netfs/buffered_write.c +++ b/fs/netfs/buffered_write.c @@ -164,7 +164,7 @@ ssize_t netfs_perform_write(struct kiocb *iocb, struct iov_iter *iter, enum netfs_how_to_modify howto; enum netfs_folio_trace trace; unsigned int bdp_flags = (iocb->ki_flags & IOCB_SYNC) ? 0: BDP_ASYNC; - ssize_t written = 0, ret; + ssize_t written = 0, ret, ret2; loff_t i_size, pos = iocb->ki_pos, from, to; size_t max_chunk = PAGE_SIZE << MAX_PAGECACHE_ORDER; bool maybe_trouble = false; @@ -395,10 +395,12 @@ ssize_t netfs_perform_write(struct kiocb *iocb, struct iov_iter *iter, out: if (unlikely(wreq)) { - ret = netfs_end_writethrough(wreq, iocb); + ret2 = netfs_end_writethrough(wreq, iocb); wbc_detach_inode(&wbc); - if (ret == -EIOCBQUEUED) - return ret; + if (ret2 == -EIOCBQUEUED) + return ret2; + if (ret == 0) + ret = ret2; } iocb->ki_pos += written; From f42c97027fb75776e2e9358d16bf4a99aeb04cf2 Mon Sep 17 00:00:00 2001 From: Daniel Okazaki Date: Mon, 22 Apr 2024 17:43:36 +0000 Subject: [PATCH 267/606] eeprom: at24: fix memory corruption race condition If the eeprom is not accessible, an nvmem device will be registered, the read will fail, and the device will be torn down. If another driver accesses the nvmem device after the teardown, it will reference invalid memory. Move the failure point before registering the nvmem device. Signed-off-by: Daniel Okazaki Fixes: b20eb4c1f026 ("eeprom: at24: drop unnecessary label") Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20240422174337.2487142-1-dtokazaki@google.com Signed-off-by: Bartosz Golaszewski --- drivers/misc/eeprom/at24.c | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/drivers/misc/eeprom/at24.c b/drivers/misc/eeprom/at24.c index 572333ead5fb8..4bd4f32bcdabb 100644 --- a/drivers/misc/eeprom/at24.c +++ b/drivers/misc/eeprom/at24.c @@ -758,15 +758,6 @@ static int at24_probe(struct i2c_client *client) } pm_runtime_enable(dev); - at24->nvmem = devm_nvmem_register(dev, &nvmem_config); - if (IS_ERR(at24->nvmem)) { - pm_runtime_disable(dev); - if (!pm_runtime_status_suspended(dev)) - regulator_disable(at24->vcc_reg); - return dev_err_probe(dev, PTR_ERR(at24->nvmem), - "failed to register nvmem\n"); - } - /* * Perform a one-byte test read to verify that the chip is functional, * unless powering on the device is to be avoided during probe (i.e. @@ -782,6 +773,15 @@ static int at24_probe(struct i2c_client *client) } } + at24->nvmem = devm_nvmem_register(dev, &nvmem_config); + if (IS_ERR(at24->nvmem)) { + pm_runtime_disable(dev); + if (!pm_runtime_status_suspended(dev)) + regulator_disable(at24->vcc_reg); + return dev_err_probe(dev, PTR_ERR(at24->nvmem), + "failed to register nvmem\n"); + } + /* If this a SPD EEPROM, probe for DDR3 thermal sensor */ if (cdata == &at24_data_spd) at24_probe_temp_sensor(client); From bda16500dd0b05e2e047093b36cbe0873c95aeae Mon Sep 17 00:00:00 2001 From: Jack Yu Date: Tue, 23 Apr 2024 06:59:35 +0000 Subject: [PATCH 268/606] ASoC: rt715-sdca: volume step modification Volume step (dB/step) modification to fix format error which shown in amixer control. Signed-off-by: Jack Yu Link: https://lore.kernel.org/r/b1f546ad16dc4c7abb7daa7396e8345c@realtek.com Signed-off-by: Mark Brown --- sound/soc/codecs/rt715-sdca.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/sound/soc/codecs/rt715-sdca.c b/sound/soc/codecs/rt715-sdca.c index 3fb7b9adb61de..bc3579203c7a4 100644 --- a/sound/soc/codecs/rt715-sdca.c +++ b/sound/soc/codecs/rt715-sdca.c @@ -316,7 +316,7 @@ static int rt715_sdca_set_amp_gain_8ch_get(struct snd_kcontrol *kcontrol, return 0; } -static const DECLARE_TLV_DB_SCALE(in_vol_tlv, -17625, 375, 0); +static const DECLARE_TLV_DB_SCALE(in_vol_tlv, -1725, 75, 0); static const DECLARE_TLV_DB_SCALE(mic_vol_tlv, 0, 1000, 0); static int rt715_sdca_get_volsw(struct snd_kcontrol *kcontrol, @@ -477,7 +477,7 @@ static const struct snd_kcontrol_new rt715_sdca_snd_controls[] = { RT715_SDCA_FU_VOL_CTRL, CH_01), SDW_SDCA_CTL(FUN_MIC_ARRAY, RT715_SDCA_FU_ADC7_27_VOL, RT715_SDCA_FU_VOL_CTRL, CH_02), - 0x2f, 0x7f, 0, + 0x2f, 0x3f, 0, rt715_sdca_set_amp_gain_get, rt715_sdca_set_amp_gain_put, in_vol_tlv), RT715_SDCA_EXT_TLV("FU02 Capture Volume", @@ -485,13 +485,13 @@ static const struct snd_kcontrol_new rt715_sdca_snd_controls[] = { RT715_SDCA_FU_VOL_CTRL, CH_01), rt715_sdca_set_amp_gain_4ch_get, rt715_sdca_set_amp_gain_4ch_put, - in_vol_tlv, 4, 0x7f), + in_vol_tlv, 4, 0x3f), RT715_SDCA_EXT_TLV("FU06 Capture Volume", SDW_SDCA_CTL(FUN_MIC_ARRAY, RT715_SDCA_FU_ADC10_11_VOL, RT715_SDCA_FU_VOL_CTRL, CH_01), rt715_sdca_set_amp_gain_4ch_get, rt715_sdca_set_amp_gain_4ch_put, - in_vol_tlv, 4, 0x7f), + in_vol_tlv, 4, 0x3f), /* MIC Boost Control */ RT715_SDCA_BOOST_EXT_TLV("FU0E Boost", SDW_SDCA_CTL(FUN_MIC_ARRAY, RT715_SDCA_FU_DMIC_GAIN_EN, From 467324bcfe1a31ec65d0cf4aa59421d6b7a7d52b Mon Sep 17 00:00:00 2001 From: Duoming Zhou Date: Fri, 19 Apr 2024 10:04:56 +0800 Subject: [PATCH 269/606] ax25: Fix netdev refcount issue The dev_tracker is added to ax25_cb in ax25_bind(). When the ax25 device is detaching, the dev_tracker of ax25_cb should be deallocated in ax25_kill_by_device() instead of the dev_tracker of ax25_dev. The log reported by ref_tracker is shown below: [ 80.884935] ref_tracker: reference already released. [ 80.885150] ref_tracker: allocated in: [ 80.885349] ax25_dev_device_up+0x105/0x540 [ 80.885730] ax25_device_event+0xa4/0x420 [ 80.885730] notifier_call_chain+0xc9/0x1e0 [ 80.885730] __dev_notify_flags+0x138/0x280 [ 80.885730] dev_change_flags+0xd7/0x180 [ 80.885730] dev_ifsioc+0x6a9/0xa30 [ 80.885730] dev_ioctl+0x4d8/0xd90 [ 80.885730] sock_do_ioctl+0x1c2/0x2d0 [ 80.885730] sock_ioctl+0x38b/0x4f0 [ 80.885730] __se_sys_ioctl+0xad/0xf0 [ 80.885730] do_syscall_64+0xc4/0x1b0 [ 80.885730] entry_SYSCALL_64_after_hwframe+0x67/0x6f [ 80.885730] ref_tracker: freed in: [ 80.885730] ax25_device_event+0x272/0x420 [ 80.885730] notifier_call_chain+0xc9/0x1e0 [ 80.885730] dev_close_many+0x272/0x370 [ 80.885730] unregister_netdevice_many_notify+0x3b5/0x1180 [ 80.885730] unregister_netdev+0xcf/0x120 [ 80.885730] sixpack_close+0x11f/0x1b0 [ 80.885730] tty_ldisc_kill+0xcb/0x190 [ 80.885730] tty_ldisc_hangup+0x338/0x3d0 [ 80.885730] __tty_hangup+0x504/0x740 [ 80.885730] tty_release+0x46e/0xd80 [ 80.885730] __fput+0x37f/0x770 [ 80.885730] __x64_sys_close+0x7b/0xb0 [ 80.885730] do_syscall_64+0xc4/0x1b0 [ 80.885730] entry_SYSCALL_64_after_hwframe+0x67/0x6f [ 80.893739] ------------[ cut here ]------------ [ 80.894030] WARNING: CPU: 2 PID: 140 at lib/ref_tracker.c:255 ref_tracker_free+0x47b/0x6b0 [ 80.894297] Modules linked in: [ 80.894929] CPU: 2 PID: 140 Comm: ax25_conn_rel_6 Not tainted 6.9.0-rc4-g8cd26fd90c1a #11 [ 80.895190] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.14.0-0-g155821a1990b-prebuilt.qem4 [ 80.895514] RIP: 0010:ref_tracker_free+0x47b/0x6b0 [ 80.895808] Code: 83 c5 18 4c 89 eb 48 c1 eb 03 8a 04 13 84 c0 0f 85 df 01 00 00 41 83 7d 00 00 75 4b 4c 89 ff 9 [ 80.896171] RSP: 0018:ffff888009edf8c0 EFLAGS: 00000286 [ 80.896339] RAX: 1ffff1100141ac00 RBX: 1ffff1100149463b RCX: dffffc0000000000 [ 80.896502] RDX: 0000000000000001 RSI: 0000000000000246 RDI: ffff88800a0d6518 [ 80.896925] RBP: ffff888009edf9b0 R08: ffff88806d3288d3 R09: 1ffff1100da6511a [ 80.897212] R10: dffffc0000000000 R11: ffffed100da6511b R12: ffff88800a4a31d4 [ 80.897859] R13: ffff88800a4a31d8 R14: dffffc0000000000 R15: ffff88800a0d6518 [ 80.898279] FS: 00007fd88b7fe700(0000) GS:ffff88806d300000(0000) knlGS:0000000000000000 [ 80.899436] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 80.900181] CR2: 00007fd88c001d48 CR3: 000000000993e000 CR4: 00000000000006f0 ... [ 80.935774] ref_tracker: sp%d@000000000bb9df3d has 1/1 users at [ 80.935774] ax25_bind+0x424/0x4e0 [ 80.935774] __sys_bind+0x1d9/0x270 [ 80.935774] __x64_sys_bind+0x75/0x80 [ 80.935774] do_syscall_64+0xc4/0x1b0 [ 80.935774] entry_SYSCALL_64_after_hwframe+0x67/0x6f Change ax25_dev->dev_tracker to the dev_tracker of ax25_cb in order to mitigate the bug. Fixes: feef318c855a ("ax25: fix UAF bugs of net_device caused by rebinding operation") Signed-off-by: Duoming Zhou Link: https://lore.kernel.org/r/20240419020456.29826-1-duoming@zju.edu.cn Signed-off-by: Paolo Abeni --- net/ax25/af_ax25.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/ax25/af_ax25.c b/net/ax25/af_ax25.c index 558e158c98d01..9169efb2f43aa 100644 --- a/net/ax25/af_ax25.c +++ b/net/ax25/af_ax25.c @@ -103,7 +103,7 @@ static void ax25_kill_by_device(struct net_device *dev) s->ax25_dev = NULL; if (sk->sk_socket) { netdev_put(ax25_dev->dev, - &ax25_dev->dev_tracker); + &s->dev_tracker); ax25_dev_put(ax25_dev); } ax25_cb_del(s); From 7ca803b489455b9242c81b4befe546ea3a692e5c Mon Sep 17 00:00:00 2001 From: AngeloGioacchino Del Regno Date: Mon, 18 Mar 2024 12:32:37 +0100 Subject: [PATCH 270/606] soc: mediatek: mtk-svs: Append "-thermal" to thermal zone names The thermal framework registers thermal zones as specified in DT and including the "-thermal" suffix: append that to the driver specified tzone_name to actually match the thermal zone name as registered by the thermal API. Fixes: 2bfbf82956e2 ("soc: mediatek: mtk-svs: Constify runtime-immutable members of svs_bank") Link: https://lore.kernel.org/r/20240318113237.125802-1-angelogioacchino.delregno@collabora.com Signed-off-by: AngeloGioacchino Del Regno --- drivers/soc/mediatek/mtk-svs.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/drivers/soc/mediatek/mtk-svs.c b/drivers/soc/mediatek/mtk-svs.c index c832f5c670bcf..9a91298c12539 100644 --- a/drivers/soc/mediatek/mtk-svs.c +++ b/drivers/soc/mediatek/mtk-svs.c @@ -1768,6 +1768,7 @@ static int svs_bank_resource_setup(struct svs_platform *svsp) const struct svs_bank_pdata *bdata; struct svs_bank *svsb; struct dev_pm_opp *opp; + char tz_name_buf[20]; unsigned long freq; int count, ret; u32 idx, i; @@ -1819,10 +1820,12 @@ static int svs_bank_resource_setup(struct svs_platform *svsp) } if (!IS_ERR_OR_NULL(bdata->tzone_name)) { - svsb->tzd = thermal_zone_get_zone_by_name(bdata->tzone_name); + snprintf(tz_name_buf, ARRAY_SIZE(tz_name_buf), + "%s-thermal", bdata->tzone_name); + svsb->tzd = thermal_zone_get_zone_by_name(tz_name_buf); if (IS_ERR(svsb->tzd)) { dev_err(svsb->dev, "cannot get \"%s\" thermal zone\n", - bdata->tzone_name); + tz_name_buf); return PTR_ERR(svsb->tzd); } } From ab6cd6bb33cc0bbb8dbf8cc264a1013b2019561e Mon Sep 17 00:00:00 2001 From: Daniel Golle Date: Wed, 7 Feb 2024 17:42:41 +0000 Subject: [PATCH 271/606] soc: mediatek: mtk-socinfo: depends on CONFIG_SOC_BUS The mtk-socinfo driver uses symbols 'soc_device_register' and 'soc_device_unregister' which are part of the bus driver for System-on-Chip devices. Select SOC_BUS to make sure that driver is built and the symbols are available. Fixes: 423a54da3c7e ("soc: mediatek: mtk-socinfo: Add driver for getting chip information") Signed-off-by: Daniel Golle Reviewed-by: Chen-Yu Tsai Reviewed-by: Matthias Brugger Reviewed-by: AngeloGioacchino Del Regno Link: https://lore.kernel.org/r/cc8f7f7da5bdccce514a320e0ae7468659cf7346.1707327680.git.daniel@makrotopia.org Signed-off-by: AngeloGioacchino Del Regno --- drivers/soc/mediatek/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/soc/mediatek/Kconfig b/drivers/soc/mediatek/Kconfig index 50c664b65f4d4..1b7afb19ccd63 100644 --- a/drivers/soc/mediatek/Kconfig +++ b/drivers/soc/mediatek/Kconfig @@ -72,6 +72,7 @@ config MTK_SOCINFO tristate "MediaTek SoC Information" default y depends on NVMEM_MTK_EFUSE + select SOC_BUS help The MediaTek SoC Information (mtk-socinfo) driver provides information about the SoC to the userspace including the From a44f2eb106a46f2275a79de54ce0ea63e4f3d8c8 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Fri, 19 Apr 2024 19:08:26 -0700 Subject: [PATCH 272/606] tools: ynl: don't ignore errors in NLMSG_DONE messages NLMSG_DONE contains an error code, it has to be extracted. Prior to this change all dumps will end in success, and in case of failure the result is silently truncated. Fixes: e4b48ed460d3 ("tools: ynl: add a completely generic client") Signed-off-by: Jakub Kicinski Reviewed-by: Donald Hunter Link: https://lore.kernel.org/r/20240420020827.3288615-1-kuba@kernel.org Signed-off-by: Paolo Abeni --- tools/net/ynl/lib/ynl.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tools/net/ynl/lib/ynl.py b/tools/net/ynl/lib/ynl.py index 5fa7957f6e0f5..25810e18b0a73 100644 --- a/tools/net/ynl/lib/ynl.py +++ b/tools/net/ynl/lib/ynl.py @@ -182,6 +182,7 @@ def __init__(self, msg, offset, attr_space=None): self.done = 1 extack_off = 20 elif self.nl_type == Netlink.NLMSG_DONE: + self.error = struct.unpack("i", self.raw[0:4])[0] self.done = 1 extack_off = 4 From 5ea6764d9095e234b024054f75ebbccc4f0eb146 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cl=C3=A9ment=20L=C3=A9ger?= Date: Tue, 9 Apr 2024 16:38:37 +0200 Subject: [PATCH 273/606] riscv: hwprobe: fix invalid sign extension for RISCV_HWPROBE_EXT_ZVFHMIN MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The current definition yields a negative 32bits signed value which result in a mask with is obviously incorrect. Replace it by using a 1ULL bit shift value to obtain a single set bit mask. Fixes: 5dadda5e6a59 ("riscv: hwprobe: export Zvfh[min] ISA extensions") Signed-off-by: Clément Léger Reviewed-by: Alexandre Ghiti Link: https://lore.kernel.org/r/20240409143839.558784-1-cleger@rivosinc.com Signed-off-by: Palmer Dabbelt --- arch/riscv/include/uapi/asm/hwprobe.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/riscv/include/uapi/asm/hwprobe.h b/arch/riscv/include/uapi/asm/hwprobe.h index 9f2a8e3ff2048..2902f68dc913a 100644 --- a/arch/riscv/include/uapi/asm/hwprobe.h +++ b/arch/riscv/include/uapi/asm/hwprobe.h @@ -54,7 +54,7 @@ struct riscv_hwprobe { #define RISCV_HWPROBE_EXT_ZFHMIN (1 << 28) #define RISCV_HWPROBE_EXT_ZIHINTNTL (1 << 29) #define RISCV_HWPROBE_EXT_ZVFH (1 << 30) -#define RISCV_HWPROBE_EXT_ZVFHMIN (1 << 31) +#define RISCV_HWPROBE_EXT_ZVFHMIN (1ULL << 31) #define RISCV_HWPROBE_EXT_ZFA (1ULL << 32) #define RISCV_HWPROBE_EXT_ZTSO (1ULL << 33) #define RISCV_HWPROBE_EXT_ZACAS (1ULL << 34) From 57a01eafdcf78f6da34fad9ff075ed5dfdd9f420 Mon Sep 17 00:00:00 2001 From: Sven Schnelle Date: Tue, 23 Apr 2024 08:19:05 +0200 Subject: [PATCH 274/606] workqueue: Fix selection of wake_cpu in kick_pool() With cpu_possible_mask=0-63 and cpu_online_mask=0-7 the following kernel oops was observed: smp: Bringing up secondary CPUs ... smp: Brought up 1 node, 8 CPUs Unable to handle kernel pointer dereference in virtual kernel address space Failing address: 0000000000000000 TEID: 0000000000000803 [..] Call Trace: arch_vcpu_is_preempted+0x12/0x80 select_idle_sibling+0x42/0x560 select_task_rq_fair+0x29a/0x3b0 try_to_wake_up+0x38e/0x6e0 kick_pool+0xa4/0x198 __queue_work.part.0+0x2bc/0x3a8 call_timer_fn+0x36/0x160 __run_timers+0x1e2/0x328 __run_timer_base+0x5a/0x88 run_timer_softirq+0x40/0x78 __do_softirq+0x118/0x388 irq_exit_rcu+0xc0/0xd8 do_ext_irq+0xae/0x168 ext_int_handler+0xbe/0xf0 psw_idle_exit+0x0/0xc default_idle_call+0x3c/0x110 do_idle+0xd4/0x158 cpu_startup_entry+0x40/0x48 rest_init+0xc6/0xc8 start_kernel+0x3c4/0x5e0 startup_continue+0x3c/0x50 The crash is caused by calling arch_vcpu_is_preempted() for an offline CPU. To avoid this, select the cpu with cpumask_any_and_distribute() to mask __pod_cpumask with cpu_online_mask. In case no cpu is left in the pool, skip the assignment. tj: This doesn't fully fix the bug as CPUs can still go down between picking the target CPU and the wake call. Fixing that likely requires adding cpu_online() test to either the sched or s390 arch code. However, regardless of how that is fixed, workqueue shouldn't be picking a CPU which isn't online as that would result in unpredictable and worse behavior. Signed-off-by: Sven Schnelle Fixes: 8639ecebc9b1 ("workqueue: Implement non-strict affinity scope for unbound workqueues") Cc: stable@vger.kernel.org # v6.6+ Signed-off-by: Tejun Heo --- kernel/workqueue.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/kernel/workqueue.c b/kernel/workqueue.c index 0066c8f6c1544..a2af0aaf026ba 100644 --- a/kernel/workqueue.c +++ b/kernel/workqueue.c @@ -1277,8 +1277,12 @@ static bool kick_pool(struct worker_pool *pool) !cpumask_test_cpu(p->wake_cpu, pool->attrs->__pod_cpumask)) { struct work_struct *work = list_first_entry(&pool->worklist, struct work_struct, entry); - p->wake_cpu = cpumask_any_distribute(pool->attrs->__pod_cpumask); - get_work_pwq(work)->stats[PWQ_STAT_REPATRIATED]++; + int wake_cpu = cpumask_any_and_distribute(pool->attrs->__pod_cpumask, + cpu_online_mask); + if (wake_cpu < nr_cpu_ids) { + p->wake_cpu = wake_cpu; + get_work_pwq(work)->stats[PWQ_STAT_REPATRIATED]++; + } } #endif wake_up_process(p); From 68301ef471b63f25d6e6144a0820fea52257a34a Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Tue, 23 Apr 2024 21:19:36 +0200 Subject: [PATCH 275/606] ACPI: PM: s2idle: Evaluate all Low-Power S0 Idle _DSM functions Commit 073237281a50 ("ACPI: PM: s2idle: Enable Low-Power S0 Idle MSFT UUID for non-AMD systems") attempted to avoid evaluating the same Low- Power S0 Idle _DSM functions for different UUIDs, but that turns out to be a mistake, because some systems in the field are adversely affected by it. Address this by allowing all Low-Power S0 Idle _DSM functions to be evaluated, but still print the message regarding duplication of Low- Power S0 Idle _DSM function sets for different UUIDs. Fixes: 073237281a50 ("ACPI: PM: s2idle: Enable Low-Power S0 Idle MSFT UUID for non-AMD systems") Closes: https://bugzilla.kernel.org/show_bug.cgi?id=218750 Reported-and-tested-by: Mark Pearson Suggested-by: Mario Limonciello Signed-off-by: Rafael J. Wysocki Reviewed-by: Mario Limonciello --- drivers/acpi/x86/s2idle.c | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/drivers/acpi/x86/s2idle.c b/drivers/acpi/x86/s2idle.c index cd84af23f7eac..dd0b40b9bbe8b 100644 --- a/drivers/acpi/x86/s2idle.c +++ b/drivers/acpi/x86/s2idle.c @@ -492,16 +492,14 @@ static int lps0_device_attach(struct acpi_device *adev, unsigned int func_mask; /* - * Avoid evaluating the same _DSM function for two - * different UUIDs and prioritize the MSFT one. + * Log a message if the _DSM function sets for two + * different UUIDs overlap. */ func_mask = lps0_dsm_func_mask & lps0_dsm_func_mask_microsoft; - if (func_mask) { + if (func_mask) acpi_handle_info(adev->handle, "Duplicate LPS0 _DSM functions (mask: 0x%x)\n", func_mask); - lps0_dsm_func_mask &= ~func_mask; - } } } From 24729b307eefcd7c476065cd7351c1a018082c19 Mon Sep 17 00:00:00 2001 From: Wesley Cheng Date: Mon, 8 Apr 2024 18:40:59 -0700 Subject: [PATCH 276/606] usb: gadget: f_fs: Fix race between aio_cancel() and AIO request complete FFS based applications can utilize the aio_cancel() callback to dequeue pending USB requests submitted to the UDC. There is a scenario where the FFS application issues an AIO cancel call, while the UDC is handling a soft disconnect. For a DWC3 based implementation, the callstack looks like the following: DWC3 Gadget FFS Application dwc3_gadget_soft_disconnect() ... --> dwc3_stop_active_transfers() --> dwc3_gadget_giveback(-ESHUTDOWN) --> ffs_epfile_async_io_complete() ffs_aio_cancel() --> usb_ep_free_request() --> usb_ep_dequeue() There is currently no locking implemented between the AIO completion handler and AIO cancel, so the issue occurs if the completion routine is running in parallel to an AIO cancel call coming from the FFS application. As the completion call frees the USB request (io_data->req) the FFS application is also referencing it for the usb_ep_dequeue() call. This can lead to accessing a stale/hanging pointer. commit b566d38857fc ("usb: gadget: f_fs: use io_data->status consistently") relocated the usb_ep_free_request() into ffs_epfile_async_io_complete(). However, in order to properly implement locking to mitigate this issue, the spinlock can't be added to ffs_epfile_async_io_complete(), as usb_ep_dequeue() (if successfully dequeuing a USB request) will call the function driver's completion handler in the same context. Hence, leading into a deadlock. Fix this issue by moving the usb_ep_free_request() back to ffs_user_copy_worker(), and ensuring that it explicitly sets io_data->req to NULL after freeing it within the ffs->eps_lock. This resolves the race condition above, as the ffs_aio_cancel() routine will not continue attempting to dequeue a request that has already been freed, or the ffs_user_copy_work() not freeing the USB request until the AIO cancel is done referencing it. This fix depends on commit b566d38857fc ("usb: gadget: f_fs: use io_data->status consistently") Fixes: 2e4c7553cd6f ("usb: gadget: f_fs: add aio support") Cc: stable # b566d38857fc ("usb: gadget: f_fs: use io_data->status consistently") Signed-off-by: Wesley Cheng Link: https://lore.kernel.org/r/20240409014059.6740-1-quic_wcheng@quicinc.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/gadget/function/f_fs.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/drivers/usb/gadget/function/f_fs.c b/drivers/usb/gadget/function/f_fs.c index f855f1fc8e5e1..aa80c2a6b8e07 100644 --- a/drivers/usb/gadget/function/f_fs.c +++ b/drivers/usb/gadget/function/f_fs.c @@ -852,6 +852,7 @@ static void ffs_user_copy_worker(struct work_struct *work) work); int ret = io_data->status; bool kiocb_has_eventfd = io_data->kiocb->ki_flags & IOCB_EVENTFD; + unsigned long flags; if (io_data->read && ret > 0) { kthread_use_mm(io_data->mm); @@ -864,6 +865,11 @@ static void ffs_user_copy_worker(struct work_struct *work) if (io_data->ffs->ffs_eventfd && !kiocb_has_eventfd) eventfd_signal(io_data->ffs->ffs_eventfd); + spin_lock_irqsave(&io_data->ffs->eps_lock, flags); + usb_ep_free_request(io_data->ep, io_data->req); + io_data->req = NULL; + spin_unlock_irqrestore(&io_data->ffs->eps_lock, flags); + if (io_data->read) kfree(io_data->to_free); ffs_free_buffer(io_data); @@ -877,7 +883,6 @@ static void ffs_epfile_async_io_complete(struct usb_ep *_ep, struct ffs_data *ffs = io_data->ffs; io_data->status = req->status ? req->status : req->actual; - usb_ep_free_request(_ep, req); INIT_WORK(&io_data->work, ffs_user_copy_worker); queue_work(ffs->io_completion_wq, &io_data->work); From ec6ce7075ef879b91a8710829016005dc8170f17 Mon Sep 17 00:00:00 2001 From: Peter Korsgaard Date: Thu, 4 Apr 2024 12:06:35 +0200 Subject: [PATCH 277/606] usb: gadget: composite: fix OS descriptors w_value logic The OS descriptors logic had the high/low byte of w_value inverted, causing the extended properties to not be accessible for interface != 0. >From the Microsoft documentation: https://learn.microsoft.com/en-us/windows-hardware/drivers/usbcon/microsoft-os-1-0-descriptors-specification OS_Desc_CompatID.doc (w_index = 0x4): - wValue: High Byte = InterfaceNumber. InterfaceNumber is set to the number of the interface or function that is associated with the descriptor, typically 0x00. Because a device can have only one extended compat ID descriptor, it should ignore InterfaceNumber, regardless of the value, and simply return the descriptor. Low Byte = 0. PageNumber is used to retrieve descriptors that are larger than 64 KB. The header section is 16 bytes, so PageNumber is set to 0 for this request. We currently do not support >64KB compat ID descriptors, so verify that the low byte is 0. OS_Desc_Ext_Prop.doc (w_index = 0x5): - wValue: High byte = InterfaceNumber. The high byte of wValue is set to the number of the interface or function that is associated with the descriptor. Low byte = PageNumber. The low byte of wValue is used to retrieve descriptors that are larger than 64 KB. The header section is 10 bytes, so PageNumber is set to 0 for this request. We also don't support >64KB extended properties, so verify that the low byte is 0 and use the high byte for the interface number. Fixes: 37a3a533429e ("usb: gadget: OS Feature Descriptors support") Cc: stable Signed-off-by: Peter Korsgaard Link: https://lore.kernel.org/r/20240404100635.3215340-1-peter@korsgaard.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/gadget/composite.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/usb/gadget/composite.c b/drivers/usb/gadget/composite.c index 0ace45b66a31c..0e151b54aae82 100644 --- a/drivers/usb/gadget/composite.c +++ b/drivers/usb/gadget/composite.c @@ -2112,7 +2112,7 @@ composite_setup(struct usb_gadget *gadget, const struct usb_ctrlrequest *ctrl) buf[5] = 0x01; switch (ctrl->bRequestType & USB_RECIP_MASK) { case USB_RECIP_DEVICE: - if (w_index != 0x4 || (w_value >> 8)) + if (w_index != 0x4 || (w_value & 0xff)) break; buf[6] = w_index; /* Number of ext compat interfaces */ @@ -2128,9 +2128,9 @@ composite_setup(struct usb_gadget *gadget, const struct usb_ctrlrequest *ctrl) } break; case USB_RECIP_INTERFACE: - if (w_index != 0x5 || (w_value >> 8)) + if (w_index != 0x5 || (w_value & 0xff)) break; - interface = w_value & 0xFF; + interface = w_value >> 8; if (interface >= MAX_CONFIG_INTERFACES || !os_desc_cfg->interface[interface]) break; From 650ae71c80749fc7cb8858c8049f532eaec64410 Mon Sep 17 00:00:00 2001 From: Ivan Avdeev Date: Sat, 13 Apr 2024 11:01:24 -0400 Subject: [PATCH 278/606] usb: gadget: uvc: use correct buffer size when parsing configfs lists This commit fixes uvc gadget support on 32-bit platforms. Commit 0df28607c5cb ("usb: gadget: uvc: Generalise helper functions for reuse") introduced a helper function __uvcg_iter_item_entries() to aid with parsing lists of items on configfs attributes stores. This function is a generalization of another very similar function, which used a stack-allocated temporary buffer of fixed size for each item in the list and used the sizeof() operator to check for potential buffer overruns. The new function was changed to allocate the now variably sized temp buffer on heap, but wasn't properly updated to also check for max buffer size using the computed size instead of sizeof() operator. As a result, the maximum item size was 7 (plus null terminator) on 64-bit platforms, and 3 on 32-bit ones. While 7 is accidentally just barely enough, 3 is definitely too small for some of UVC configfs attributes. For example, dwFrameInteval, specified in 100ns units, usually has 6-digit item values, e.g. 166666 for 60fps. Cc: stable@vger.kernel.org Fixes: 0df28607c5cb ("usb: gadget: uvc: Generalise helper functions for reuse") Signed-off-by: Ivan Avdeev Link: https://lore.kernel.org/r/20240413150124.1062026-1-me@provod.works Signed-off-by: Greg Kroah-Hartman --- drivers/usb/gadget/function/uvc_configfs.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/usb/gadget/function/uvc_configfs.c b/drivers/usb/gadget/function/uvc_configfs.c index 7e704b2bcfd1c..a4377df612f51 100644 --- a/drivers/usb/gadget/function/uvc_configfs.c +++ b/drivers/usb/gadget/function/uvc_configfs.c @@ -92,10 +92,10 @@ static int __uvcg_iter_item_entries(const char *page, size_t len, while (pg - page < len) { i = 0; - while (i < sizeof(buf) && (pg - page < len) && + while (i < bufsize && (pg - page < len) && *pg != '\0' && *pg != '\n') buf[i++] = *pg++; - if (i == sizeof(buf)) { + if (i == bufsize) { ret = -EINVAL; goto out_free_buf; } From 4a237d55446ff67655dc3eed2d4a41997536fc4c Mon Sep 17 00:00:00 2001 From: Thinh Nguyen Date: Wed, 17 Apr 2024 23:14:30 +0000 Subject: [PATCH 279/606] usb: xhci-plat: Don't include xhci.h The xhci_plat.h should not need to include the entire xhci.h header. This can cause redefinition in dwc3 if it selectively includes some xHCI definitions. This is a prerequisite change for a fix to disable suspend during initialization for dwc3. Cc: stable@vger.kernel.org Signed-off-by: Thinh Nguyen Link: https://lore.kernel.org/r/310acfa01c957a10d9feaca3f7206269866ba2eb.1713394973.git.Thinh.Nguyen@synopsys.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/host/xhci-plat.h | 4 +++- drivers/usb/host/xhci-rzv2m.c | 1 + 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/usb/host/xhci-plat.h b/drivers/usb/host/xhci-plat.h index 2d15386f2c504..6475130eac4b3 100644 --- a/drivers/usb/host/xhci-plat.h +++ b/drivers/usb/host/xhci-plat.h @@ -8,7 +8,9 @@ #ifndef _XHCI_PLAT_H #define _XHCI_PLAT_H -#include "xhci.h" /* for hcd_to_xhci() */ +struct device; +struct platform_device; +struct usb_hcd; struct xhci_plat_priv { const char *firmware_name; diff --git a/drivers/usb/host/xhci-rzv2m.c b/drivers/usb/host/xhci-rzv2m.c index ec65b24eafa86..4f59867d7117c 100644 --- a/drivers/usb/host/xhci-rzv2m.c +++ b/drivers/usb/host/xhci-rzv2m.c @@ -6,6 +6,7 @@ */ #include +#include "xhci.h" #include "xhci-plat.h" #include "xhci-rzv2m.h" From 6d735722063a945de56472bdc6bfcb170fd43b86 Mon Sep 17 00:00:00 2001 From: Thinh Nguyen Date: Wed, 17 Apr 2024 23:14:36 +0000 Subject: [PATCH 280/606] usb: dwc3: core: Prevent phy suspend during init GUSB3PIPECTL.SUSPENDENABLE and GUSB2PHYCFG.SUSPHY should be cleared during initialization. Suspend during initialization can result in undefined behavior due to clock synchronization failure, which often seen as core soft reset timeout. The programming guide recommended these bits to be cleared during initialization for DWC_usb3.0 version 1.94 and above (along with DWC_usb31 and DWC_usb32). The current check in the driver does not account if it's set by default setting from coreConsultant. This is especially the case for DRD when switching mode to ensure the phy clocks are available to change mode. Depending on the platforms/design, some may be affected more than others. This is noted in the DWC_usb3x programming guide under the above registers. Let's just disable them during driver load and mode switching. Restore them when the controller initialization completes. Note that some platforms workaround this issue by disabling phy suspend through "snps,dis_u3_susphy_quirk" and "snps,dis_u2_susphy_quirk" when they should not need to. Cc: stable@vger.kernel.org Fixes: 9ba3aca8fe82 ("usb: dwc3: Disable phy suspend after power-on reset") Signed-off-by: Thinh Nguyen Link: https://lore.kernel.org/r/20da4e5a0c4678c9587d3da23f83bdd6d77353e9.1713394973.git.Thinh.Nguyen@synopsys.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/dwc3/core.c | 90 +++++++++++++++++---------------------- drivers/usb/dwc3/core.h | 1 + drivers/usb/dwc3/gadget.c | 2 + drivers/usb/dwc3/host.c | 27 ++++++++++++ 4 files changed, 68 insertions(+), 52 deletions(-) diff --git a/drivers/usb/dwc3/core.c b/drivers/usb/dwc3/core.c index 31684cdaaae30..100041320e8dd 100644 --- a/drivers/usb/dwc3/core.c +++ b/drivers/usb/dwc3/core.c @@ -104,6 +104,27 @@ static int dwc3_get_dr_mode(struct dwc3 *dwc) return 0; } +void dwc3_enable_susphy(struct dwc3 *dwc, bool enable) +{ + u32 reg; + + reg = dwc3_readl(dwc->regs, DWC3_GUSB3PIPECTL(0)); + if (enable && !dwc->dis_u3_susphy_quirk) + reg |= DWC3_GUSB3PIPECTL_SUSPHY; + else + reg &= ~DWC3_GUSB3PIPECTL_SUSPHY; + + dwc3_writel(dwc->regs, DWC3_GUSB3PIPECTL(0), reg); + + reg = dwc3_readl(dwc->regs, DWC3_GUSB2PHYCFG(0)); + if (enable && !dwc->dis_u2_susphy_quirk) + reg |= DWC3_GUSB2PHYCFG_SUSPHY; + else + reg &= ~DWC3_GUSB2PHYCFG_SUSPHY; + + dwc3_writel(dwc->regs, DWC3_GUSB2PHYCFG(0), reg); +} + void dwc3_set_prtcap(struct dwc3 *dwc, u32 mode) { u32 reg; @@ -585,11 +606,8 @@ static int dwc3_core_ulpi_init(struct dwc3 *dwc) */ static int dwc3_phy_setup(struct dwc3 *dwc) { - unsigned int hw_mode; u32 reg; - hw_mode = DWC3_GHWPARAMS0_MODE(dwc->hwparams.hwparams0); - reg = dwc3_readl(dwc->regs, DWC3_GUSB3PIPECTL(0)); /* @@ -599,21 +617,16 @@ static int dwc3_phy_setup(struct dwc3 *dwc) reg &= ~DWC3_GUSB3PIPECTL_UX_EXIT_PX; /* - * Above 1.94a, it is recommended to set DWC3_GUSB3PIPECTL_SUSPHY - * to '0' during coreConsultant configuration. So default value - * will be '0' when the core is reset. Application needs to set it - * to '1' after the core initialization is completed. - */ - if (!DWC3_VER_IS_WITHIN(DWC3, ANY, 194A)) - reg |= DWC3_GUSB3PIPECTL_SUSPHY; - - /* - * For DRD controllers, GUSB3PIPECTL.SUSPENDENABLE must be cleared after - * power-on reset, and it can be set after core initialization, which is - * after device soft-reset during initialization. + * Above DWC_usb3.0 1.94a, it is recommended to set + * DWC3_GUSB3PIPECTL_SUSPHY to '0' during coreConsultant configuration. + * So default value will be '0' when the core is reset. Application + * needs to set it to '1' after the core initialization is completed. + * + * Similarly for DRD controllers, GUSB3PIPECTL.SUSPENDENABLE must be + * cleared after power-on reset, and it can be set after core + * initialization. */ - if (hw_mode == DWC3_GHWPARAMS0_MODE_DRD) - reg &= ~DWC3_GUSB3PIPECTL_SUSPHY; + reg &= ~DWC3_GUSB3PIPECTL_SUSPHY; if (dwc->u2ss_inp3_quirk) reg |= DWC3_GUSB3PIPECTL_U2SSINP3OK; @@ -639,9 +652,6 @@ static int dwc3_phy_setup(struct dwc3 *dwc) if (dwc->tx_de_emphasis_quirk) reg |= DWC3_GUSB3PIPECTL_TX_DEEPH(dwc->tx_de_emphasis); - if (dwc->dis_u3_susphy_quirk) - reg &= ~DWC3_GUSB3PIPECTL_SUSPHY; - if (dwc->dis_del_phy_power_chg_quirk) reg &= ~DWC3_GUSB3PIPECTL_DEPOCHANGE; @@ -689,24 +699,15 @@ static int dwc3_phy_setup(struct dwc3 *dwc) } /* - * Above 1.94a, it is recommended to set DWC3_GUSB2PHYCFG_SUSPHY to - * '0' during coreConsultant configuration. So default value will - * be '0' when the core is reset. Application needs to set it to - * '1' after the core initialization is completed. - */ - if (!DWC3_VER_IS_WITHIN(DWC3, ANY, 194A)) - reg |= DWC3_GUSB2PHYCFG_SUSPHY; - - /* - * For DRD controllers, GUSB2PHYCFG.SUSPHY must be cleared after - * power-on reset, and it can be set after core initialization, which is - * after device soft-reset during initialization. + * Above DWC_usb3.0 1.94a, it is recommended to set + * DWC3_GUSB2PHYCFG_SUSPHY to '0' during coreConsultant configuration. + * So default value will be '0' when the core is reset. Application + * needs to set it to '1' after the core initialization is completed. + * + * Similarly for DRD controllers, GUSB2PHYCFG.SUSPHY must be cleared + * after power-on reset, and it can be set after core initialization. */ - if (hw_mode == DWC3_GHWPARAMS0_MODE_DRD) - reg &= ~DWC3_GUSB2PHYCFG_SUSPHY; - - if (dwc->dis_u2_susphy_quirk) - reg &= ~DWC3_GUSB2PHYCFG_SUSPHY; + reg &= ~DWC3_GUSB2PHYCFG_SUSPHY; if (dwc->dis_enblslpm_quirk) reg &= ~DWC3_GUSB2PHYCFG_ENBLSLPM; @@ -1227,21 +1228,6 @@ static int dwc3_core_init(struct dwc3 *dwc) if (ret) goto err_exit_phy; - if (hw_mode == DWC3_GHWPARAMS0_MODE_DRD && - !DWC3_VER_IS_WITHIN(DWC3, ANY, 194A)) { - if (!dwc->dis_u3_susphy_quirk) { - reg = dwc3_readl(dwc->regs, DWC3_GUSB3PIPECTL(0)); - reg |= DWC3_GUSB3PIPECTL_SUSPHY; - dwc3_writel(dwc->regs, DWC3_GUSB3PIPECTL(0), reg); - } - - if (!dwc->dis_u2_susphy_quirk) { - reg = dwc3_readl(dwc->regs, DWC3_GUSB2PHYCFG(0)); - reg |= DWC3_GUSB2PHYCFG_SUSPHY; - dwc3_writel(dwc->regs, DWC3_GUSB2PHYCFG(0), reg); - } - } - dwc3_core_setup_global_control(dwc); dwc3_core_num_eps(dwc); diff --git a/drivers/usb/dwc3/core.h b/drivers/usb/dwc3/core.h index 7e80dd3d466b8..180dd8d29287c 100644 --- a/drivers/usb/dwc3/core.h +++ b/drivers/usb/dwc3/core.h @@ -1580,6 +1580,7 @@ int dwc3_event_buffers_setup(struct dwc3 *dwc); void dwc3_event_buffers_cleanup(struct dwc3 *dwc); int dwc3_core_soft_reset(struct dwc3 *dwc); +void dwc3_enable_susphy(struct dwc3 *dwc, bool enable); #if IS_ENABLED(CONFIG_USB_DWC3_HOST) || IS_ENABLED(CONFIG_USB_DWC3_DUAL_ROLE) int dwc3_host_init(struct dwc3 *dwc); diff --git a/drivers/usb/dwc3/gadget.c b/drivers/usb/dwc3/gadget.c index 4df2661f66751..f94f68f1e7d2b 100644 --- a/drivers/usb/dwc3/gadget.c +++ b/drivers/usb/dwc3/gadget.c @@ -2924,6 +2924,7 @@ static int __dwc3_gadget_start(struct dwc3 *dwc) dwc3_ep0_out_start(dwc); dwc3_gadget_enable_irq(dwc); + dwc3_enable_susphy(dwc, true); return 0; @@ -4690,6 +4691,7 @@ void dwc3_gadget_exit(struct dwc3 *dwc) if (!dwc->gadget) return; + dwc3_enable_susphy(dwc, false); usb_del_gadget(dwc->gadget); dwc3_gadget_free_endpoints(dwc); usb_put_gadget(dwc->gadget); diff --git a/drivers/usb/dwc3/host.c b/drivers/usb/dwc3/host.c index 0204787df81d5..a171b27a7845a 100644 --- a/drivers/usb/dwc3/host.c +++ b/drivers/usb/dwc3/host.c @@ -10,10 +10,13 @@ #include #include #include +#include +#include #include "../host/xhci-port.h" #include "../host/xhci-ext-caps.h" #include "../host/xhci-caps.h" +#include "../host/xhci-plat.h" #include "core.h" #define XHCI_HCSPARAMS1 0x4 @@ -57,6 +60,24 @@ static void dwc3_power_off_all_roothub_ports(struct dwc3 *dwc) } } +static void dwc3_xhci_plat_start(struct usb_hcd *hcd) +{ + struct platform_device *pdev; + struct dwc3 *dwc; + + if (!usb_hcd_is_primary_hcd(hcd)) + return; + + pdev = to_platform_device(hcd->self.controller); + dwc = dev_get_drvdata(pdev->dev.parent); + + dwc3_enable_susphy(dwc, true); +} + +static const struct xhci_plat_priv dwc3_xhci_plat_quirk = { + .plat_start = dwc3_xhci_plat_start, +}; + static void dwc3_host_fill_xhci_irq_res(struct dwc3 *dwc, int irq, char *name) { @@ -167,6 +188,11 @@ int dwc3_host_init(struct dwc3 *dwc) } } + ret = platform_device_add_data(xhci, &dwc3_xhci_plat_quirk, + sizeof(struct xhci_plat_priv)); + if (ret) + goto err; + ret = platform_device_add(xhci); if (ret) { dev_err(dwc->dev, "failed to register xHCI device\n"); @@ -192,6 +218,7 @@ void dwc3_host_exit(struct dwc3 *dwc) if (dwc->sys_wakeup) device_init_wakeup(&dwc->xhci->dev, false); + dwc3_enable_susphy(dwc, false); platform_device_unregister(dwc->xhci); dwc->xhci = NULL; } From a4b46d450c49f32e9d4247b421e58083fde304ce Mon Sep 17 00:00:00 2001 From: Alan Stern Date: Thu, 18 Apr 2024 11:13:13 -0400 Subject: [PATCH 281/606] USB: core: Fix access violation during port device removal Testing with KASAN and syzkaller revealed a bug in port.c:disable_store(): usb_hub_to_struct_hub() can return NULL if the hub that the port belongs to is concurrently removed, but the function does not check for this possibility before dereferencing the returned value. It turns out that the first dereference is unnecessary, since hub->intfdev is the parent of the port device, so it can be changed easily. Adding a check for hub == NULL prevents further problems. The same bug exists in the disable_show() routine, and it can be fixed the same way. Signed-off-by: Alan Stern Reported-and-tested-by: Yue Sun Reported-by: xingwei lee Link: https://lore.kernel.org/linux-usb/CAEkJfYON+ry7xPx=AiLR9jzUNT+i_Va68ACajOC3HoacOfL1ig@mail.gmail.com/ Fixes: f061f43d7418 ("usb: hub: port: add sysfs entry to switch port power") CC: Michael Grzeschik CC: stable@vger.kernel.org Link: https://lore.kernel.org/r/393aa580-15a5-44ca-ad3b-6462461cd313@rowland.harvard.edu Signed-off-by: Greg Kroah-Hartman --- drivers/usb/core/port.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/drivers/usb/core/port.c b/drivers/usb/core/port.c index 0e1262a077aea..e7da2fca11a48 100644 --- a/drivers/usb/core/port.c +++ b/drivers/usb/core/port.c @@ -51,13 +51,15 @@ static ssize_t disable_show(struct device *dev, struct usb_port *port_dev = to_usb_port(dev); struct usb_device *hdev = to_usb_device(dev->parent->parent); struct usb_hub *hub = usb_hub_to_struct_hub(hdev); - struct usb_interface *intf = to_usb_interface(hub->intfdev); + struct usb_interface *intf = to_usb_interface(dev->parent); int port1 = port_dev->portnum; u16 portstatus, unused; bool disabled; int rc; struct kernfs_node *kn; + if (!hub) + return -ENODEV; hub_get(hub); rc = usb_autopm_get_interface(intf); if (rc < 0) @@ -101,12 +103,14 @@ static ssize_t disable_store(struct device *dev, struct device_attribute *attr, struct usb_port *port_dev = to_usb_port(dev); struct usb_device *hdev = to_usb_device(dev->parent->parent); struct usb_hub *hub = usb_hub_to_struct_hub(hdev); - struct usb_interface *intf = to_usb_interface(hub->intfdev); + struct usb_interface *intf = to_usb_interface(dev->parent); int port1 = port_dev->portnum; bool disabled; int rc; struct kernfs_node *kn; + if (!hub) + return -ENODEV; rc = kstrtobool(buf, &disabled); if (rc) return rc; From 0aea736ddb877b93f6d2dd8cf439840d6b4970a9 Mon Sep 17 00:00:00 2001 From: Chris Wulff Date: Tue, 23 Apr 2024 18:02:15 +0000 Subject: [PATCH 282/606] usb: gadget: f_fs: Fix a race condition when processing setup packets. If the USB driver passes a pointer into the TRB buffer for creq, this buffer can be overwritten with the status response as soon as the event is queued. This can make the final check return USB_GADGET_DELAYED_STATUS when it shouldn't. Instead use the stored wLength. Fixes: 4d644abf2569 ("usb: gadget: f_fs: Only return delayed status when len is 0") Cc: stable Signed-off-by: Chris Wulff Link: https://lore.kernel.org/r/CO1PR17MB5419BD664264A558B2395E28E1112@CO1PR17MB5419.namprd17.prod.outlook.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/gadget/function/f_fs.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/usb/gadget/function/f_fs.c b/drivers/usb/gadget/function/f_fs.c index aa80c2a6b8e07..a057cbedf3c9b 100644 --- a/drivers/usb/gadget/function/f_fs.c +++ b/drivers/usb/gadget/function/f_fs.c @@ -3811,7 +3811,7 @@ static int ffs_func_setup(struct usb_function *f, __ffs_event_add(ffs, FUNCTIONFS_SETUP); spin_unlock_irqrestore(&ffs->ev.waitq.lock, flags); - return creq->wLength == 0 ? USB_GADGET_DELAYED_STATUS : 0; + return ffs->ev.setup.wLength == 0 ? USB_GADGET_DELAYED_STATUS : 0; } static bool ffs_func_req_match(struct usb_function *f, From d80eee97cb4e90768a81c856ac71d721996d86b7 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Thu, 18 Apr 2024 16:57:29 +0200 Subject: [PATCH 283/606] usb: typec: qcom-pmic: fix use-after-free on late probe errors Make sure to stop and deregister the port in case of late probe errors to avoid use-after-free issues when the underlying memory is released by devres. Fixes: a4422ff22142 ("usb: typec: qcom: Add Qualcomm PMIC Type-C driver") Cc: stable@vger.kernel.org # 6.5 Cc: Bryan O'Donoghue Signed-off-by: Johan Hovold Reviewed-by: Dmitry Baryshkov Reviewed-by: Bryan O'Donoghue Reviewed-by: Heikki Krogerus Link: https://lore.kernel.org/r/20240418145730.4605-2-johan+linaro@kernel.org Signed-off-by: Greg Kroah-Hartman --- drivers/usb/typec/tcpm/qcom/qcom_pmic_typec.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/drivers/usb/typec/tcpm/qcom/qcom_pmic_typec.c b/drivers/usb/typec/tcpm/qcom/qcom_pmic_typec.c index e48412cdcb0fb..d3958c061a972 100644 --- a/drivers/usb/typec/tcpm/qcom/qcom_pmic_typec.c +++ b/drivers/usb/typec/tcpm/qcom/qcom_pmic_typec.c @@ -104,14 +104,18 @@ static int qcom_pmic_typec_probe(struct platform_device *pdev) ret = tcpm->port_start(tcpm, tcpm->tcpm_port); if (ret) - goto fwnode_remove; + goto port_unregister; ret = tcpm->pdphy_start(tcpm, tcpm->tcpm_port); if (ret) - goto fwnode_remove; + goto port_stop; return 0; +port_stop: + tcpm->port_stop(tcpm); +port_unregister: + tcpm_unregister_port(tcpm->tcpm_port); fwnode_remove: fwnode_remove_software_node(tcpm->tcpc.fwnode); From f2004e82abb679735e1dff99f9c94eb4bfe735b4 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Thu, 18 Apr 2024 16:57:30 +0200 Subject: [PATCH 284/606] usb: typec: qcom-pmic: fix pdphy start() error handling Move disabling of the vdd-pdphy supply to the start() function which enabled it for symmetry and to make sure that it is disabled as intended in all error paths of pmic_typec_pdphy_reset() (i.e. not just when qcom_pmic_typec_pdphy_enable() fails). Cc: stable+noautosel@kernel.org # Not needed in any stable release, just a minor bugfix Fixes: a4422ff22142 ("usb: typec: qcom: Add Qualcomm PMIC Type-C driver") Signed-off-by: Johan Hovold Reviewed-by: Bryan O'Donoghue Reviewed-by: Heikki Krogerus Reviewed-by: Dmitry Baryshkov Link: https://lore.kernel.org/r/20240418145730.4605-3-johan+linaro@kernel.org Signed-off-by: Greg Kroah-Hartman --- drivers/usb/typec/tcpm/qcom/qcom_pmic_typec_pdphy.c | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/drivers/usb/typec/tcpm/qcom/qcom_pmic_typec_pdphy.c b/drivers/usb/typec/tcpm/qcom/qcom_pmic_typec_pdphy.c index 6560f4fc98d5a..5b7f52b74a40a 100644 --- a/drivers/usb/typec/tcpm/qcom/qcom_pmic_typec_pdphy.c +++ b/drivers/usb/typec/tcpm/qcom/qcom_pmic_typec_pdphy.c @@ -475,10 +475,8 @@ static int qcom_pmic_typec_pdphy_enable(struct pmic_typec_pdphy *pmic_typec_pdph qcom_pmic_typec_pdphy_reset_off(pmic_typec_pdphy); done: - if (ret) { - regulator_disable(pmic_typec_pdphy->vdd_pdphy); + if (ret) dev_err(dev, "pdphy_enable fail %d\n", ret); - } return ret; } @@ -524,12 +522,17 @@ static int qcom_pmic_typec_pdphy_start(struct pmic_typec *tcpm, ret = pmic_typec_pdphy_reset(pmic_typec_pdphy); if (ret) - return ret; + goto err_disable_vdd_pdhy; for (i = 0; i < pmic_typec_pdphy->nr_irqs; i++) enable_irq(pmic_typec_pdphy->irq_data[i].irq); return 0; + +err_disable_vdd_pdhy: + regulator_disable(pmic_typec_pdphy->vdd_pdphy); + + return ret; } static void qcom_pmic_typec_pdphy_stop(struct pmic_typec *tcpm) From 9c8ecb9308d8013ff9ac9d36fdd8ae746033b93c Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Tue, 23 Apr 2024 17:48:43 -0400 Subject: [PATCH 285/606] Revert "NFSD: Reschedule CB operations when backchannel rpc_clnt is shut down" The reverted commit attempted to enable NFSD to retransmit pending callback operations if an NFS client disconnects, but unintentionally introduces a hazardous behavior regression if the client becomes permanently unreachable while callback operations are still pending. A disconnect can occur due to network partition or if the NFS server needs to force the NFS client to retransmit (for example, if a GSS window under-run occurs). Reverting the commit will make NFSD behave the same as it did in v6.8 and before. Pending callback operations are permanently lost if the client connection is terminated before the client receives them. For some callback operations, this loss is not harmful. However, for CB_RECALL, the loss means a delegation might be revoked unnecessarily. For CB_OFFLOAD, pending COPY operations will never complete unless the NFS client subsequently sends an OFFLOAD_STATUS operation, which the Linux NFS client does not currently implement. These issues still need to be addressed somehow. Reported-by: Dai Ngo Link: https://bugzilla.kernel.org/show_bug.cgi?id=218735 Signed-off-by: Chuck Lever --- fs/nfsd/nfs4callback.c | 20 ++------------------ 1 file changed, 2 insertions(+), 18 deletions(-) diff --git a/fs/nfsd/nfs4callback.c b/fs/nfsd/nfs4callback.c index e440f72b9d4ea..d153af81f4060 100644 --- a/fs/nfsd/nfs4callback.c +++ b/fs/nfsd/nfs4callback.c @@ -986,14 +986,6 @@ static bool nfsd4_queue_cb(struct nfsd4_callback *cb) return queue_delayed_work(callback_wq, &cb->cb_work, 0); } -static void nfsd4_queue_cb_delayed(struct nfsd4_callback *cb, - unsigned long msecs) -{ - trace_nfsd_cb_queue(cb->cb_clp, cb); - queue_delayed_work(callback_wq, &cb->cb_work, - msecs_to_jiffies(msecs)); -} - static void nfsd41_cb_inflight_begin(struct nfs4_client *clp) { atomic_inc(&clp->cl_cb_inflight); @@ -1502,16 +1494,8 @@ nfsd4_run_cb_work(struct work_struct *work) clnt = clp->cl_cb_client; if (!clnt) { - if (test_bit(NFSD4_CLIENT_CB_KILL, &clp->cl_flags)) - nfsd41_destroy_cb(cb); - else { - /* - * XXX: Ideally, we could wait for the client to - * reconnect, but I haven't figured out how - * to do that yet. - */ - nfsd4_queue_cb_delayed(cb, 25); - } + /* Callback channel broken, or client killed; give up: */ + nfsd41_destroy_cb(cb); return; } From 8ddb7142c8ab37371c6fd167a8aded97922c6268 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Tue, 23 Apr 2024 17:52:24 -0400 Subject: [PATCH 286/606] Revert "NFSD: Convert the callback workqueue to use delayed_work" This commit was a pre-requisite for commit c1ccfcf1a9bf ("NFSD: Reschedule CB operations when backchannel rpc_clnt is shut down"), which has already been reverted. Signed-off-by: Chuck Lever --- fs/nfsd/nfs4callback.c | 6 +++--- fs/nfsd/state.h | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/fs/nfsd/nfs4callback.c b/fs/nfsd/nfs4callback.c index d153af81f4060..6aff46701aa1a 100644 --- a/fs/nfsd/nfs4callback.c +++ b/fs/nfsd/nfs4callback.c @@ -983,7 +983,7 @@ static struct workqueue_struct *callback_wq; static bool nfsd4_queue_cb(struct nfsd4_callback *cb) { trace_nfsd_cb_queue(cb->cb_clp, cb); - return queue_delayed_work(callback_wq, &cb->cb_work, 0); + return queue_work(callback_wq, &cb->cb_work); } static void nfsd41_cb_inflight_begin(struct nfs4_client *clp) @@ -1482,7 +1482,7 @@ static void nfsd4_run_cb_work(struct work_struct *work) { struct nfsd4_callback *cb = - container_of(work, struct nfsd4_callback, cb_work.work); + container_of(work, struct nfsd4_callback, cb_work); struct nfs4_client *clp = cb->cb_clp; struct rpc_clnt *clnt; int flags; @@ -1528,7 +1528,7 @@ void nfsd4_init_cb(struct nfsd4_callback *cb, struct nfs4_client *clp, cb->cb_msg.rpc_argp = cb; cb->cb_msg.rpc_resp = cb; cb->cb_ops = ops; - INIT_DELAYED_WORK(&cb->cb_work, nfsd4_run_cb_work); + INIT_WORK(&cb->cb_work, nfsd4_run_cb_work); cb->cb_status = 0; cb->cb_need_restart = false; cb->cb_holds_slot = false; diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h index 01c6f34456469..2ed0fcf879fd1 100644 --- a/fs/nfsd/state.h +++ b/fs/nfsd/state.h @@ -68,7 +68,7 @@ struct nfsd4_callback { struct nfs4_client *cb_clp; struct rpc_message cb_msg; const struct nfsd4_callback_ops *cb_ops; - struct delayed_work cb_work; + struct work_struct cb_work; int cb_seq_status; int cb_status; bool cb_need_restart; From ff33132605c1a0acea59e4c523cb7c6fabe856b2 Mon Sep 17 00:00:00 2001 From: Matti Vaittinen Date: Tue, 23 Apr 2024 14:38:28 +0300 Subject: [PATCH 287/606] regulator: change devm_regulator_get_enable_optional() stub to return Ok The devm_regulator_get_enable_optional() should be a 'call and forget' API, meaning, when it is used to enable the regulators, the API does not provide a handle to do any further control of the regulators. It gives no real benefit to return an error from the stub if CONFIG_REGULATOR is not set. On the contrary, returning an error is causing problems to drivers when hardware is such it works out just fine with no regulator control. Returning an error forces drivers to specifically handle the case where CONFIG_REGULATOR is not set, making the mere existence of the stub questionalble. Change the stub implementation for the devm_regulator_get_enable_optional() to return Ok so drivers do not separately handle the case where the CONFIG_REGULATOR is not set. Signed-off-by: Matti Vaittinen Fixes: da279e6965b3 ("regulator: Add devm helpers for get and enable") Reviewed-by: Guenter Roeck Link: https://lore.kernel.org/r/ZiedtOE00Zozd3XO@fedora Signed-off-by: Mark Brown --- include/linux/regulator/consumer.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/linux/regulator/consumer.h b/include/linux/regulator/consumer.h index 71232fb7dda31..ed180ca419dad 100644 --- a/include/linux/regulator/consumer.h +++ b/include/linux/regulator/consumer.h @@ -326,7 +326,7 @@ static inline int devm_regulator_get_enable(struct device *dev, const char *id) static inline int devm_regulator_get_enable_optional(struct device *dev, const char *id) { - return -ENODEV; + return 0; } static inline struct regulator *__must_check From 3584718cf2ec7e79b6814f2596dcf398c5fb2eca Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Sun, 21 Apr 2024 17:52:48 +0000 Subject: [PATCH 288/606] net: fix sk_memory_allocated_{add|sub} vs softirqs Jonathan Heathcote reported a regression caused by blamed commit on aarch64 architecture. x86 happens to have irq-safe __this_cpu_add_return() and __this_cpu_sub(), but this is not generic. I think my confusion came from "struct sock" argument, because these helpers are called with a locked socket. But the memory accounting is per-proto (and per-cpu after the blamed commit). We might cleanup these helpers later to directly accept a "struct proto *proto" argument. Switch to this_cpu_add_return() and this_cpu_xchg() operations, and get rid of preempt_disable()/preempt_enable() pairs. Fast path becomes a bit faster as a result :) Many thanks to Jonathan Heathcote for his awesome report and investigations. Fixes: 3cd3399dd7a8 ("net: implement per-cpu reserves for memory_allocated") Reported-by: Jonathan Heathcote Closes: https://lore.kernel.org/netdev/VI1PR01MB42407D7947B2EA448F1E04EFD10D2@VI1PR01MB4240.eurprd01.prod.exchangelabs.com/ Signed-off-by: Eric Dumazet Acked-by: Soheil Hassas Yeganeh Reviewed-by: Shakeel Butt Link: https://lore.kernel.org/r/20240421175248.1692552-1-edumazet@google.com Signed-off-by: Jakub Kicinski --- include/net/sock.h | 38 ++++++++++++++++++++------------------ 1 file changed, 20 insertions(+), 18 deletions(-) diff --git a/include/net/sock.h b/include/net/sock.h index f57bfd8a2ad2d..b4b553df7870c 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -1410,32 +1410,34 @@ sk_memory_allocated(const struct sock *sk) #define SK_MEMORY_PCPU_RESERVE (1 << (20 - PAGE_SHIFT)) extern int sysctl_mem_pcpu_rsv; +static inline void proto_memory_pcpu_drain(struct proto *proto) +{ + int val = this_cpu_xchg(*proto->per_cpu_fw_alloc, 0); + + if (val) + atomic_long_add(val, proto->memory_allocated); +} + static inline void -sk_memory_allocated_add(struct sock *sk, int amt) +sk_memory_allocated_add(const struct sock *sk, int val) { - int local_reserve; + struct proto *proto = sk->sk_prot; - preempt_disable(); - local_reserve = __this_cpu_add_return(*sk->sk_prot->per_cpu_fw_alloc, amt); - if (local_reserve >= READ_ONCE(sysctl_mem_pcpu_rsv)) { - __this_cpu_sub(*sk->sk_prot->per_cpu_fw_alloc, local_reserve); - atomic_long_add(local_reserve, sk->sk_prot->memory_allocated); - } - preempt_enable(); + val = this_cpu_add_return(*proto->per_cpu_fw_alloc, val); + + if (unlikely(val >= READ_ONCE(sysctl_mem_pcpu_rsv))) + proto_memory_pcpu_drain(proto); } static inline void -sk_memory_allocated_sub(struct sock *sk, int amt) +sk_memory_allocated_sub(const struct sock *sk, int val) { - int local_reserve; + struct proto *proto = sk->sk_prot; - preempt_disable(); - local_reserve = __this_cpu_sub_return(*sk->sk_prot->per_cpu_fw_alloc, amt); - if (local_reserve <= -READ_ONCE(sysctl_mem_pcpu_rsv)) { - __this_cpu_sub(*sk->sk_prot->per_cpu_fw_alloc, local_reserve); - atomic_long_add(local_reserve, sk->sk_prot->memory_allocated); - } - preempt_enable(); + val = this_cpu_sub_return(*proto->per_cpu_fw_alloc, val); + + if (unlikely(val <= -READ_ONCE(sysctl_mem_pcpu_rsv))) + proto_memory_pcpu_drain(proto); } #define SK_ALLOC_PERCPU_COUNTER_BATCH 16 From 58a4c9b1e5a3e53c9148e80b90e1e43897ce77d1 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Sun, 21 Apr 2024 18:43:26 +0000 Subject: [PATCH 289/606] ipv4: check for NULL idev in ip_route_use_hint() syzbot was able to trigger a NULL deref in fib_validate_source() in an old tree [1]. It appears the bug exists in latest trees. All calls to __in_dev_get_rcu() must be checked for a NULL result. [1] general protection fault, probably for non-canonical address 0xdffffc0000000000: 0000 [#1] SMP KASAN KASAN: null-ptr-deref in range [0x0000000000000000-0x0000000000000007] CPU: 2 PID: 3257 Comm: syz-executor.3 Not tainted 5.10.0-syzkaller #0 Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS 1.16.3-debian-1.16.3-2~bpo12+1 04/01/2014 RIP: 0010:fib_validate_source+0xbf/0x15a0 net/ipv4/fib_frontend.c:425 Code: 18 f2 f2 f2 f2 42 c7 44 20 23 f3 f3 f3 f3 48 89 44 24 78 42 c6 44 20 27 f3 e8 5d 88 48 fc 4c 89 e8 48 c1 e8 03 48 89 44 24 18 <42> 80 3c 20 00 74 08 4c 89 ef e8 d2 15 98 fc 48 89 5c 24 10 41 bf RSP: 0018:ffffc900015fee40 EFLAGS: 00010246 RAX: 0000000000000000 RBX: ffff88800f7a4000 RCX: ffff88800f4f90c0 RDX: 0000000000000000 RSI: 0000000004001eac RDI: ffff8880160c64c0 RBP: ffffc900015ff060 R08: 0000000000000000 R09: ffff88800f7a4000 R10: 0000000000000002 R11: ffff88800f4f90c0 R12: dffffc0000000000 R13: 0000000000000000 R14: 0000000000000000 R15: ffff88800f7a4000 FS: 00007f938acfe6c0(0000) GS:ffff888058c00000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 00007f938acddd58 CR3: 000000001248e000 CR4: 0000000000352ef0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 Call Trace: ip_route_use_hint+0x410/0x9b0 net/ipv4/route.c:2231 ip_rcv_finish_core+0x2c4/0x1a30 net/ipv4/ip_input.c:327 ip_list_rcv_finish net/ipv4/ip_input.c:612 [inline] ip_sublist_rcv+0x3ed/0xe50 net/ipv4/ip_input.c:638 ip_list_rcv+0x422/0x470 net/ipv4/ip_input.c:673 __netif_receive_skb_list_ptype net/core/dev.c:5572 [inline] __netif_receive_skb_list_core+0x6b1/0x890 net/core/dev.c:5620 __netif_receive_skb_list net/core/dev.c:5672 [inline] netif_receive_skb_list_internal+0x9f9/0xdc0 net/core/dev.c:5764 netif_receive_skb_list+0x55/0x3e0 net/core/dev.c:5816 xdp_recv_frames net/bpf/test_run.c:257 [inline] xdp_test_run_batch net/bpf/test_run.c:335 [inline] bpf_test_run_xdp_live+0x1818/0x1d00 net/bpf/test_run.c:363 bpf_prog_test_run_xdp+0x81f/0x1170 net/bpf/test_run.c:1376 bpf_prog_test_run+0x349/0x3c0 kernel/bpf/syscall.c:3736 __sys_bpf+0x45c/0x710 kernel/bpf/syscall.c:5115 __do_sys_bpf kernel/bpf/syscall.c:5201 [inline] __se_sys_bpf kernel/bpf/syscall.c:5199 [inline] __x64_sys_bpf+0x7c/0x90 kernel/bpf/syscall.c:5199 Fixes: 02b24941619f ("ipv4: use dst hint for ipv4 list receive") Reported-by: syzbot Signed-off-by: Eric Dumazet Acked-by: Paolo Abeni Link: https://lore.kernel.org/r/20240421184326.1704930-1-edumazet@google.com Signed-off-by: Jakub Kicinski --- net/ipv4/route.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/net/ipv4/route.c b/net/ipv4/route.c index d36ace160d426..b814fdab19f71 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -2166,6 +2166,9 @@ int ip_route_use_hint(struct sk_buff *skb, __be32 daddr, __be32 saddr, int err = -EINVAL; u32 tag = 0; + if (!in_dev) + return -EINVAL; + if (ipv4_is_multicast(saddr) || ipv4_is_lbcast(saddr)) goto martian_source; From 4ce62d5b2f7aecd4900e7d6115588ad7f9acccca Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Sun, 21 Apr 2024 19:38:28 +0000 Subject: [PATCH 290/606] net: usb: ax88179_178a: stop lying about skb->truesize Some usb drivers try to set small skb->truesize and break core networking stacks. In this patch, I removed one of the skb->truesize overide. I also replaced one skb_clone() by an allocation of a fresh and small skb, to get minimally sized skbs, like we did in commit 1e2c61172342 ("net: cdc_ncm: reduce skb truesize in rx path") Fixes: f8ebb3ac881b ("net: usb: ax88179_178a: Fix packet receiving") Reported-by: shironeko Closes: https://lore.kernel.org/netdev/c110f41a0d2776b525930f213ca9715c@tesaguri.club/ Signed-off-by: Eric Dumazet Cc: Jose Alonso Reviewed-by: Simon Horman Link: https://lore.kernel.org/r/20240421193828.1966195-1-edumazet@google.com Signed-off-by: Jakub Kicinski --- drivers/net/usb/ax88179_178a.c | 11 +++-------- 1 file changed, 3 insertions(+), 8 deletions(-) diff --git a/drivers/net/usb/ax88179_178a.c b/drivers/net/usb/ax88179_178a.c index 752f821a19901..df9d767cb5242 100644 --- a/drivers/net/usb/ax88179_178a.c +++ b/drivers/net/usb/ax88179_178a.c @@ -1456,21 +1456,16 @@ static int ax88179_rx_fixup(struct usbnet *dev, struct sk_buff *skb) /* Skip IP alignment pseudo header */ skb_pull(skb, 2); - skb->truesize = SKB_TRUESIZE(pkt_len_plus_padd); ax88179_rx_checksum(skb, pkt_hdr); return 1; } - ax_skb = skb_clone(skb, GFP_ATOMIC); + ax_skb = netdev_alloc_skb_ip_align(dev->net, pkt_len); if (!ax_skb) return 0; - skb_trim(ax_skb, pkt_len); + skb_put(ax_skb, pkt_len); + memcpy(ax_skb->data, skb->data + 2, pkt_len); - /* Skip IP alignment pseudo header */ - skb_pull(ax_skb, 2); - - skb->truesize = pkt_len_plus_padd + - SKB_DATA_ALIGN(sizeof(struct sk_buff)); ax88179_rx_checksum(ax_skb, pkt_hdr); usbnet_skb_return(dev, ax_skb); From 80e679b352c3ce5158f3f778cfb77eb767e586fb Mon Sep 17 00:00:00 2001 From: Hyunwoo Kim Date: Mon, 22 Apr 2024 05:33:40 -0400 Subject: [PATCH 291/606] tcp: Fix Use-After-Free in tcp_ao_connect_init Since call_rcu, which is called in the hlist_for_each_entry_rcu traversal of tcp_ao_connect_init, is not part of the RCU read critical section, it is possible that the RCU grace period will pass during the traversal and the key will be free. To prevent this, it should be changed to hlist_for_each_entry_safe. Fixes: 7c2ffaf21bd6 ("net/tcp: Calculate TCP-AO traffic keys") Signed-off-by: Hyunwoo Kim Reviewed-by: Eric Dumazet Acked-by: Dmitry Safonov <0x7f454c46@gmail.com> Link: https://lore.kernel.org/r/ZiYu9NJ/ClR8uSkH@v4bel-B760M-AORUS-ELITE-AX Signed-off-by: Jakub Kicinski --- net/ipv4/tcp_ao.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/ipv4/tcp_ao.c b/net/ipv4/tcp_ao.c index 3afeeb68e8a7e..781b67a525719 100644 --- a/net/ipv4/tcp_ao.c +++ b/net/ipv4/tcp_ao.c @@ -1068,6 +1068,7 @@ void tcp_ao_connect_init(struct sock *sk) { struct tcp_sock *tp = tcp_sk(sk); struct tcp_ao_info *ao_info; + struct hlist_node *next; union tcp_ao_addr *addr; struct tcp_ao_key *key; int family, l3index; @@ -1090,7 +1091,7 @@ void tcp_ao_connect_init(struct sock *sk) l3index = l3mdev_master_ifindex_by_index(sock_net(sk), sk->sk_bound_dev_if); - hlist_for_each_entry_rcu(key, &ao_info->head, node) { + hlist_for_each_entry_safe(key, next, &ao_info->head, node) { if (!tcp_ao_key_cmp(key, l3index, addr, key->prefixlen, family, -1, -1)) continue; From 2eb9dd497a698dc384c0dd3e0311d541eb2e13dd Mon Sep 17 00:00:00 2001 From: Joshua Ashton Date: Thu, 2 Nov 2023 04:21:55 +0000 Subject: [PATCH 292/606] drm/amd/display: Set color_mgmt_changed to true on unsuspend Otherwise we can end up with a frame on unsuspend where color management is not applied when userspace has not committed themselves. Fixes re-applying color management on Steam Deck/Gamescope on S3 resume. Signed-off-by: Joshua Ashton Reviewed-by: Harry Wentland Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index 6d2f60c61decc..f3f94d109726d 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -3029,6 +3029,7 @@ static int dm_resume(void *handle) dc_stream_release(dm_new_crtc_state->stream); dm_new_crtc_state->stream = NULL; } + dm_new_crtc_state->base.color_mgmt_changed = true; } for_each_new_plane_in_state(dm->cached_state, plane, new_plane_state, i) { From 37865e02e6ccecdda240f33b4332105a5c734984 Mon Sep 17 00:00:00 2001 From: Felix Kuehling Date: Wed, 17 Apr 2024 21:13:59 -0400 Subject: [PATCH 293/606] drm/amdkfd: Fix eviction fence handling Handle case that dma_fence_get_rcu_safe returns NULL. If restore work is already scheduled, only update its timer. The same work item cannot be queued twice, so undo the extra queue eviction. Fixes: 9a1c1339abf9 ("drm/amdkfd: Run restore_workers on freezable WQs") Signed-off-by: Felix Kuehling Reviewed-by: Philip Yang Tested-by: Gang BA Reviewed-by: Gang BA Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/amdkfd/kfd_process.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process.c b/drivers/gpu/drm/amd/amdkfd/kfd_process.c index b79986412cd83..aafdf064651fa 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_process.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_process.c @@ -1922,6 +1922,8 @@ static int signal_eviction_fence(struct kfd_process *p) rcu_read_lock(); ef = dma_fence_get_rcu_safe(&p->ef); rcu_read_unlock(); + if (!ef) + return -EINVAL; ret = dma_fence_signal(ef); dma_fence_put(ef); @@ -1949,10 +1951,9 @@ static void evict_process_worker(struct work_struct *work) * they are responsible stopping the queues and scheduling * the restore work. */ - if (!signal_eviction_fence(p)) - queue_delayed_work(kfd_restore_wq, &p->restore_work, - msecs_to_jiffies(PROCESS_RESTORE_TIME_MS)); - else + if (signal_eviction_fence(p) || + mod_delayed_work(kfd_restore_wq, &p->restore_work, + msecs_to_jiffies(PROCESS_RESTORE_TIME_MS))) kfd_process_restore_queues(p); pr_debug("Finished evicting pasid 0x%x\n", p->pasid); From 25e9227c6afd200bed6774c866980b8e36d033af Mon Sep 17 00:00:00 2001 From: Mukul Joshi Date: Thu, 18 Apr 2024 11:32:34 -0400 Subject: [PATCH 294/606] drm/amdgpu: Fix leak when GPU memory allocation fails Free the sync object if the memory allocation fails for any reason. Signed-off-by: Mukul Joshi Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c index df58a6a1a67ec..7c23ba19af33a 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c @@ -1854,6 +1854,7 @@ int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu( err_bo_create: amdgpu_amdkfd_unreserve_mem_limit(adev, aligned_size, flags, xcp_id); err_reserve_limit: + amdgpu_sync_free(&(*mem)->sync); mutex_destroy(&(*mem)->lock); if (gobj) drm_gem_object_put(gobj); From 9c783a11214553a54f0915a7260a3ce624d36bf2 Mon Sep 17 00:00:00 2001 From: Lang Yu Date: Sun, 7 Apr 2024 12:36:00 +0800 Subject: [PATCH 295/606] drm/amdkfd: make sure VM is ready for updating operations MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When page table BOs were evicted but not validated before updating page tables, VM is still in evicting state, amdgpu_vm_update_range returns -EBUSY and restore_process_worker runs into a dead loop. v2: Split the BO validation and page table update into two separate loops in amdgpu_amdkfd_restore_process_bos. (Felix) 1.Validate BOs 2.Validate VM (and DMABuf attachments) 3.Update page tables for the BOs validated above Fixes: 50661eb1a2c8 ("drm/amdgpu: Auto-validate DMABuf imports in compute VMs") Signed-off-by: Lang Yu Acked-by: Christian König Reviewed-by: Felix Kuehling Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org --- .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c | 34 +++++++++++-------- 1 file changed, 20 insertions(+), 14 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c index 7c23ba19af33a..2131de36e3dac 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c @@ -2901,13 +2901,12 @@ int amdgpu_amdkfd_gpuvm_restore_process_bos(void *info, struct dma_fence __rcu * amdgpu_sync_create(&sync_obj); - /* Validate BOs and map them to GPUVM (update VM page tables). */ + /* Validate BOs managed by KFD */ list_for_each_entry(mem, &process_info->kfd_bo_list, validate_list) { struct amdgpu_bo *bo = mem->bo; uint32_t domain = mem->domain; - struct kfd_mem_attachment *attachment; struct dma_resv_iter cursor; struct dma_fence *fence; @@ -2932,6 +2931,25 @@ int amdgpu_amdkfd_gpuvm_restore_process_bos(void *info, struct dma_fence __rcu * goto validate_map_fail; } } + } + + if (failed_size) + pr_debug("0x%lx/0x%lx in system\n", failed_size, total_size); + + /* Validate PDs, PTs and evicted DMABuf imports last. Otherwise BO + * validations above would invalidate DMABuf imports again. + */ + ret = process_validate_vms(process_info, &exec.ticket); + if (ret) { + pr_debug("Validating VMs failed, ret: %d\n", ret); + goto validate_map_fail; + } + + /* Update mappings managed by KFD. */ + list_for_each_entry(mem, &process_info->kfd_bo_list, + validate_list) { + struct kfd_mem_attachment *attachment; + list_for_each_entry(attachment, &mem->attachments, list) { if (!attachment->is_mapped) continue; @@ -2948,18 +2966,6 @@ int amdgpu_amdkfd_gpuvm_restore_process_bos(void *info, struct dma_fence __rcu * } } - if (failed_size) - pr_debug("0x%lx/0x%lx in system\n", failed_size, total_size); - - /* Validate PDs, PTs and evicted DMABuf imports last. Otherwise BO - * validations above would invalidate DMABuf imports again. - */ - ret = process_validate_vms(process_info, &exec.ticket); - if (ret) { - pr_debug("Validating VMs failed, ret: %d\n", ret); - goto validate_map_fail; - } - /* Update mappings not managed by KFD */ list_for_each_entry(peer_vm, &process_info->vm_list_head, vm_list_node) { From 0e95ed6452cb079cf9587c774a475a7d83c7e040 Mon Sep 17 00:00:00 2001 From: Ma Jun Date: Tue, 16 Apr 2024 17:30:12 +0800 Subject: [PATCH 296/606] drm/amdgpu/pm: Remove gpu_od if it's an empty directory gpu_od should be removed if it's an empty directory Signed-off-by: Ma Jun Reported-by: Yang Wang Reviewed-by: Yang Wang Suggested-by: Lijo Lazar Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/pm/amdgpu_pm.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/gpu/drm/amd/pm/amdgpu_pm.c b/drivers/gpu/drm/amd/pm/amdgpu_pm.c index f09b9d49297e8..bbd0169010c2d 100644 --- a/drivers/gpu/drm/amd/pm/amdgpu_pm.c +++ b/drivers/gpu/drm/amd/pm/amdgpu_pm.c @@ -4261,6 +4261,13 @@ static int amdgpu_od_set_init(struct amdgpu_device *adev) } } + /* + * If gpu_od is the only member in the list, that means gpu_od is an + * empty directory, so remove it. + */ + if (list_is_singular(&adev->pm.od_kobj_list)) + goto err_out; + return 0; err_out: From aebd3eb9d3ae017e6260043f6bcace2f5ef60694 Mon Sep 17 00:00:00 2001 From: Lijo Lazar Date: Wed, 10 Apr 2024 19:30:46 +0530 Subject: [PATCH 297/606] drm/amdgpu: Assign correct bits for SDMA HDP flush HDP Flush request bit can be kept unique per AID, and doesn't need to be unique SOC-wide. Assign only bits 10-13 for SDMA v4.4.2. Signed-off-by: Lijo Lazar Acked-by: Alex Deucher Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c b/drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c index 82eab49be82bb..e708468ac54dd 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c @@ -368,7 +368,8 @@ static void sdma_v4_4_2_ring_emit_hdp_flush(struct amdgpu_ring *ring) u32 ref_and_mask = 0; const struct nbio_hdp_flush_reg *nbio_hf_reg = adev->nbio.hdp_flush_reg; - ref_and_mask = nbio_hf_reg->ref_and_mask_sdma0 << ring->me; + ref_and_mask = nbio_hf_reg->ref_and_mask_sdma0 + << (ring->me % adev->sdma.num_inst_per_aid); sdma_v4_4_2_wait_reg_mem(ring, 0, 1, adev->nbio.funcs->get_hdp_flush_done_offset(adev), From d59198d2d0c5cb2a360819b000b0f173c472c9ef Mon Sep 17 00:00:00 2001 From: Peyton Lee Date: Fri, 19 Apr 2024 14:07:39 +0800 Subject: [PATCH 298/606] drm/amdgpu/vpe: fix vpe dpm setup failed The vpe dpm settings should be done before firmware is loaded. Otherwise, the frequency cannot be successfully raised. Signed-off-by: Peyton Lee Reviewed-by: Lang Yu Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_vpe.c | 2 +- drivers/gpu/drm/amd/amdgpu/vpe_v6_1.c | 14 +++++++------- 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vpe.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vpe.c index 6695481f870f8..c23d97d34b7ec 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vpe.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vpe.c @@ -205,7 +205,7 @@ int amdgpu_vpe_configure_dpm(struct amdgpu_vpe *vpe) dpm_ctl &= 0xfffffffe; /* Disable DPM */ WREG32(vpe_get_reg_offset(vpe, 0, vpe->regs.dpm_enable), dpm_ctl); dev_dbg(adev->dev, "%s: disable vpe dpm\n", __func__); - return 0; + return -EINVAL; } int amdgpu_vpe_psp_update_sram(struct amdgpu_device *adev) diff --git a/drivers/gpu/drm/amd/amdgpu/vpe_v6_1.c b/drivers/gpu/drm/amd/amdgpu/vpe_v6_1.c index 769eb8f7bb3c5..09315dd5a1ec9 100644 --- a/drivers/gpu/drm/amd/amdgpu/vpe_v6_1.c +++ b/drivers/gpu/drm/amd/amdgpu/vpe_v6_1.c @@ -144,6 +144,12 @@ static int vpe_v6_1_load_microcode(struct amdgpu_vpe *vpe) WREG32(vpe_get_reg_offset(vpe, j, regVPEC_CNTL), ret); } + /* setup collaborate mode */ + vpe_v6_1_set_collaborate_mode(vpe, true); + /* setup DPM */ + if (amdgpu_vpe_configure_dpm(vpe)) + dev_warn(adev->dev, "VPE failed to enable DPM\n"); + /* * For VPE 6.1.1, still only need to add master's offset, and psp will apply it to slave as well. * Here use instance 0 as master. @@ -159,11 +165,7 @@ static int vpe_v6_1_load_microcode(struct amdgpu_vpe *vpe) adev->vpe.cmdbuf_cpu_addr[0] = f32_offset; adev->vpe.cmdbuf_cpu_addr[1] = f32_cntl; - amdgpu_vpe_psp_update_sram(adev); - vpe_v6_1_set_collaborate_mode(vpe, true); - amdgpu_vpe_configure_dpm(vpe); - - return 0; + return amdgpu_vpe_psp_update_sram(adev); } vpe_hdr = (const struct vpe_firmware_header_v1_0 *)adev->vpe.fw->data; @@ -196,8 +198,6 @@ static int vpe_v6_1_load_microcode(struct amdgpu_vpe *vpe) } vpe_v6_1_halt(vpe, false); - vpe_v6_1_set_collaborate_mode(vpe, true); - amdgpu_vpe_configure_dpm(vpe); return 0; } From b0b13d532105e0e682d95214933bb8483a063184 Mon Sep 17 00:00:00 2001 From: Felix Kuehling Date: Thu, 18 Apr 2024 13:56:42 -0400 Subject: [PATCH 299/606] drm/amdgpu: Update BO eviction priorities MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Make SVM BOs more likely to get evicted than other BOs. These BOs opportunistically use available VRAM, but can fall back relatively seamlessly to system memory. It also avoids SVM migrations evicting other, more important BOs as they will evict other SVM allocations first. Signed-off-by: Felix Kuehling Acked-by: Mukul Joshi Tested-by: Mukul Joshi Reviewed-by: Christian König Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_object.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c index 2099159a693fa..ce733e3cb35d0 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c @@ -605,6 +605,8 @@ int amdgpu_bo_create(struct amdgpu_device *adev, else amdgpu_bo_placement_from_domain(bo, bp->domain); if (bp->type == ttm_bo_type_kernel) + bo->tbo.priority = 2; + else if (!(bp->flags & AMDGPU_GEM_CREATE_DISCARDABLE)) bo->tbo.priority = 1; if (!bp->destroy) From e26305f369ed0e087a043c2cdc76f3d9a6efb3bd Mon Sep 17 00:00:00 2001 From: Felix Kuehling Date: Fri, 19 Apr 2024 13:25:58 -0400 Subject: [PATCH 300/606] drm/amdkfd: Fix rescheduling of restore worker Handle the case that the restore worker was already scheduled by another eviction while the restore was in progress. Fixes: 9a1c1339abf9 ("drm/amdkfd: Run restore_workers on freezable WQs") Signed-off-by: Felix Kuehling Reviewed-by: Philip Yang Tested-by: Yunxiang Li Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/amdkfd/kfd_process.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process.c b/drivers/gpu/drm/amd/amdkfd/kfd_process.c index aafdf064651fa..58c1fe5421934 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_process.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_process.c @@ -2012,9 +2012,9 @@ static void restore_process_worker(struct work_struct *work) if (ret) { pr_debug("Failed to restore BOs of pasid 0x%x, retry after %d ms\n", p->pasid, PROCESS_BACK_OFF_TIME_MS); - ret = queue_delayed_work(kfd_restore_wq, &p->restore_work, - msecs_to_jiffies(PROCESS_BACK_OFF_TIME_MS)); - WARN(!ret, "reschedule restore work failed\n"); + if (mod_delayed_work(kfd_restore_wq, &p->restore_work, + msecs_to_jiffies(PROCESS_RESTORE_TIME_MS))) + kfd_process_restore_queues(p); } } From 661d71ee5a010bdc0663e0db701931aff920e8e1 Mon Sep 17 00:00:00 2001 From: Lang Yu Date: Fri, 19 Apr 2024 15:40:08 +0800 Subject: [PATCH 301/606] drm/amdgpu/umsch: don't execute umsch test when GPU is in reset/suspend MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit umsch test needs full GPU functionality(e.g., VM update, TLB flush, possibly buffer moving under memory pressure) which may be not ready under these states. Just skip it to avoid potential issues. Signed-off-by: Lang Yu Reviewed-by: Christian König Reviewed-by: Veerabadhran Gopalakrishnan Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/amdgpu/amdgpu_umsch_mm.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_umsch_mm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_umsch_mm.c index 0df97c3e3a700..f7c73533e336f 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_umsch_mm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_umsch_mm.c @@ -774,6 +774,9 @@ static int umsch_mm_late_init(void *handle) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; + if (amdgpu_in_reset(adev) || adev->in_s0ix || adev->in_suspend) + return 0; + return umsch_mm_test(adev); } From 30d1cda8ce31ab49051ff7159280c542a738b23d Mon Sep 17 00:00:00 2001 From: Lijo Lazar Date: Fri, 12 Apr 2024 13:11:14 +0530 Subject: [PATCH 302/606] drm/amd/pm: Restore config space after reset During mode-2 reset, pci config space registers are affected at device side. However, certain platforms have switches which assign virtual BAR addresses and returns the same even after device is reset. This affects pci_restore_state() as it doesn't issue another config write, if the value read is same as the saved value. Add a workaround to write saved config space values from driver side. Presently, these switches are in platforms with SMU v13.0.6 SOCs, hence restrict the workaround only to those. Signed-off-by: Lijo Lazar Reviewed-by: Asad Kamal Signed-off-by: Alex Deucher --- .../drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c | 25 +++++++++++++++++++ 1 file changed, 25 insertions(+) diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c index 3957af057d54f..c977ebe88001d 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c @@ -2294,6 +2294,17 @@ static ssize_t smu_v13_0_6_get_gpu_metrics(struct smu_context *smu, void **table return sizeof(*gpu_metrics); } +static void smu_v13_0_6_restore_pci_config(struct smu_context *smu) +{ + struct amdgpu_device *adev = smu->adev; + int i; + + for (i = 0; i < 16; i++) + pci_write_config_dword(adev->pdev, i * 4, + adev->pdev->saved_config_space[i]); + pci_restore_msi_state(adev->pdev); +} + static int smu_v13_0_6_mode2_reset(struct smu_context *smu) { int ret = 0, index; @@ -2315,6 +2326,20 @@ static int smu_v13_0_6_mode2_reset(struct smu_context *smu) /* Restore the config space saved during init */ amdgpu_device_load_pci_state(adev->pdev); + /* Certain platforms have switches which assign virtual BAR values to + * devices. OS uses the virtual BAR values and device behind the switch + * is assgined another BAR value. When device's config space registers + * are queried, switch returns the virtual BAR values. When mode-2 reset + * is performed, switch is unaware of it, and will continue to return + * the same virtual values to the OS.This affects + * pci_restore_config_space() API as it doesn't write the value saved if + * the current value read from config space is the same as what is + * saved. As a workaround, make sure the config space is restored + * always. + */ + if (!(adev->flags & AMD_IS_APU)) + smu_v13_0_6_restore_pci_config(smu); + dev_dbg(smu->adev->dev, "wait for reset ack\n"); do { ret = smu_cmn_wait_for_response(smu); From 1e214f7faaf5d842754cd5cfcd76308bfedab3b5 Mon Sep 17 00:00:00 2001 From: Mukul Joshi Date: Thu, 18 Apr 2024 15:13:58 -0400 Subject: [PATCH 303/606] drm/amdkfd: Add VRAM accounting for SVM migration Do VRAM accounting when doing migrations to vram to make sure there is enough available VRAM and migrating to VRAM doesn't evict other possible non-unified memory BOs. If migrating to VRAM fails, driver can fall back to using system memory seamlessly. Signed-off-by: Mukul Joshi Reviewed-by: Felix Kuehling Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdkfd/kfd_migrate.c | 16 +++++++++++++++- drivers/gpu/drm/amd/amdkfd/kfd_svm.c | 2 +- 2 files changed, 16 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c index bdc01ca9609a7..5c8d81bfce7ab 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c @@ -509,10 +509,19 @@ svm_migrate_ram_to_vram(struct svm_range *prange, uint32_t best_loc, start = start_mgr << PAGE_SHIFT; end = (last_mgr + 1) << PAGE_SHIFT; + r = amdgpu_amdkfd_reserve_mem_limit(node->adev, + prange->npages * PAGE_SIZE, + KFD_IOC_ALLOC_MEM_FLAGS_VRAM, + node->xcp ? node->xcp->id : 0); + if (r) { + dev_dbg(node->adev->dev, "failed to reserve VRAM, r: %ld\n", r); + return -ENOSPC; + } + r = svm_range_vram_node_new(node, prange, true); if (r) { dev_dbg(node->adev->dev, "fail %ld to alloc vram\n", r); - return r; + goto out; } ttm_res_offset = (start_mgr - prange->start + prange->offset) << PAGE_SHIFT; @@ -545,6 +554,11 @@ svm_migrate_ram_to_vram(struct svm_range *prange, uint32_t best_loc, svm_range_vram_node_free(prange); } +out: + amdgpu_amdkfd_unreserve_mem_limit(node->adev, + prange->npages * PAGE_SIZE, + KFD_IOC_ALLOC_MEM_FLAGS_VRAM, + node->xcp ? node->xcp->id : 0); return r < 0 ? r : 0; } diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c index f0f7f48af4137..386875e6eb96b 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c @@ -3426,7 +3426,7 @@ svm_range_trigger_migration(struct mm_struct *mm, struct svm_range *prange, mm, KFD_MIGRATE_TRIGGER_PREFETCH); *migrated = !r; - return r; + return 0; } int svm_range_schedule_evict_svm_bo(struct amdgpu_amdkfd_fence *fence) From fe93b0927bc58cb1d64230f45744e527d9d8482c Mon Sep 17 00:00:00 2001 From: Prike Liang Date: Mon, 25 Mar 2024 15:33:34 +0800 Subject: [PATCH 304/606] drm/amdgpu: Fix the ring buffer size for queue VM flush MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Here are the corrections needed for the queue ring buffer size calculation for the following cases: - Remove the KIQ VM flush ring usage. - Add the invalidate TLBs packet for gfx10 and gfx11 queue. - There's no VM flush and PFP sync, so remove the gfx9 real ring and compute ring buffer usage. Signed-off-by: Prike Liang Reviewed-by: Christian König Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c | 3 +-- drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c | 3 +-- drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c | 2 -- 3 files changed, 2 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c index f90905ef32c76..701146d649c35 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c @@ -9186,7 +9186,7 @@ static const struct amdgpu_ring_funcs gfx_v10_0_ring_funcs_gfx = { 7 + /* PIPELINE_SYNC */ SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 + SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 + - 2 + /* VM_FLUSH */ + 4 + /* VM_FLUSH */ 8 + /* FENCE for VM_FLUSH */ 20 + /* GDS switch */ 4 + /* double SWITCH_BUFFER, @@ -9276,7 +9276,6 @@ static const struct amdgpu_ring_funcs gfx_v10_0_ring_funcs_kiq = { 7 + /* gfx_v10_0_ring_emit_pipeline_sync */ SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 + SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 + - 2 + /* gfx_v10_0_ring_emit_vm_flush */ 8 + 8 + 8, /* gfx_v10_0_ring_emit_fence_kiq x3 for user fence, vm fence */ .emit_ib_size = 7, /* gfx_v10_0_ring_emit_ib_compute */ .emit_ib = gfx_v10_0_ring_emit_ib_compute, diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c index f7325b02a191f..f00e05aba46a4 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c @@ -6192,7 +6192,7 @@ static const struct amdgpu_ring_funcs gfx_v11_0_ring_funcs_gfx = { 7 + /* PIPELINE_SYNC */ SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 + SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 + - 2 + /* VM_FLUSH */ + 4 + /* VM_FLUSH */ 8 + /* FENCE for VM_FLUSH */ 20 + /* GDS switch */ 5 + /* COND_EXEC */ @@ -6278,7 +6278,6 @@ static const struct amdgpu_ring_funcs gfx_v11_0_ring_funcs_kiq = { 7 + /* gfx_v11_0_ring_emit_pipeline_sync */ SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 + SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 + - 2 + /* gfx_v11_0_ring_emit_vm_flush */ 8 + 8 + 8, /* gfx_v11_0_ring_emit_fence_kiq x3 for user fence, vm fence */ .emit_ib_size = 7, /* gfx_v11_0_ring_emit_ib_compute */ .emit_ib = gfx_v11_0_ring_emit_ib_compute, diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c index 6f97a6d0e6d05..99dbd2341120d 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c @@ -6981,7 +6981,6 @@ static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_compute = { 7 + /* gfx_v9_0_ring_emit_pipeline_sync */ SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 + SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 + - 2 + /* gfx_v9_0_ring_emit_vm_flush */ 8 + 8 + 8 + /* gfx_v9_0_ring_emit_fence x3 for user fence, vm fence */ 7 + /* gfx_v9_0_emit_mem_sync */ 5 + /* gfx_v9_0_emit_wave_limit for updating mmSPI_WCL_PIPE_PERCENT_GFX register */ @@ -7019,7 +7018,6 @@ static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_kiq = { 7 + /* gfx_v9_0_ring_emit_pipeline_sync */ SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 + SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 + - 2 + /* gfx_v9_0_ring_emit_vm_flush */ 8 + 8 + 8, /* gfx_v9_0_ring_emit_fence_kiq x3 for user fence, vm fence */ .emit_ib_size = 7, /* gfx_v9_0_ring_emit_ib_compute */ .emit_fence = gfx_v9_0_ring_emit_fence_kiq, From 9792b7cc18aaa0c2acae6af5d0acf249bcb1ab0d Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Sun, 14 Apr 2024 21:20:56 -0400 Subject: [PATCH 305/606] drm/amdgpu/sdma5.2: use legacy HDP flush for SDMA2/3 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This avoids a potential conflict with firmwares with the newer HDP flush mechanism. Reviewed-by: Christian König Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c | 26 +++++++++++++++----------- 1 file changed, 15 insertions(+), 11 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c b/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c index 42f4bd250def6..da01b524b9f2a 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c @@ -280,17 +280,21 @@ static void sdma_v5_2_ring_emit_hdp_flush(struct amdgpu_ring *ring) u32 ref_and_mask = 0; const struct nbio_hdp_flush_reg *nbio_hf_reg = adev->nbio.hdp_flush_reg; - ref_and_mask = nbio_hf_reg->ref_and_mask_sdma0 << ring->me; - - amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_POLL_REGMEM) | - SDMA_PKT_POLL_REGMEM_HEADER_HDP_FLUSH(1) | - SDMA_PKT_POLL_REGMEM_HEADER_FUNC(3)); /* == */ - amdgpu_ring_write(ring, (adev->nbio.funcs->get_hdp_flush_done_offset(adev)) << 2); - amdgpu_ring_write(ring, (adev->nbio.funcs->get_hdp_flush_req_offset(adev)) << 2); - amdgpu_ring_write(ring, ref_and_mask); /* reference */ - amdgpu_ring_write(ring, ref_and_mask); /* mask */ - amdgpu_ring_write(ring, SDMA_PKT_POLL_REGMEM_DW5_RETRY_COUNT(0xfff) | - SDMA_PKT_POLL_REGMEM_DW5_INTERVAL(10)); /* retry count, poll interval */ + if (ring->me > 1) { + amdgpu_asic_flush_hdp(adev, ring); + } else { + ref_and_mask = nbio_hf_reg->ref_and_mask_sdma0 << ring->me; + + amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_POLL_REGMEM) | + SDMA_PKT_POLL_REGMEM_HEADER_HDP_FLUSH(1) | + SDMA_PKT_POLL_REGMEM_HEADER_FUNC(3)); /* == */ + amdgpu_ring_write(ring, (adev->nbio.funcs->get_hdp_flush_done_offset(adev)) << 2); + amdgpu_ring_write(ring, (adev->nbio.funcs->get_hdp_flush_req_offset(adev)) << 2); + amdgpu_ring_write(ring, ref_and_mask); /* reference */ + amdgpu_ring_write(ring, ref_and_mask); /* mask */ + amdgpu_ring_write(ring, SDMA_PKT_POLL_REGMEM_DW5_RETRY_COUNT(0xfff) | + SDMA_PKT_POLL_REGMEM_DW5_INTERVAL(10)); /* retry count, poll interval */ + } } /** From 948255282074d9367e01908b3f5dcf8c10fc9c3d Mon Sep 17 00:00:00 2001 From: Jack Xiao Date: Mon, 22 Apr 2024 16:22:54 +0800 Subject: [PATCH 306/606] drm/amdgpu/mes: fix use-after-free issue MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Delete fence fallback timer to fix the ramdom use-after-free issue. v2: move to amdgpu_mes.c Signed-off-by: Jack Xiao Acked-by: Lijo Lazar Acked-by: Christian König Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c index a00cf4756ad0e..1569bef030eac 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c @@ -1132,6 +1132,7 @@ void amdgpu_mes_remove_ring(struct amdgpu_device *adev, return; amdgpu_mes_remove_hw_queue(adev, ring->hw_queue_id); + del_timer_sync(&ring->fence_drv.fallback_timer); amdgpu_ring_fini(ring); kfree(ring); } From d40f92020c7a225b77e68599e4b099a4a0823408 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Mon, 22 Apr 2024 14:43:48 -1000 Subject: [PATCH 307/606] workqueue: The default node_nr_active should have its max set to max_active The default nna (node_nr_active) is used when the pool isn't tied to a specific NUMA node. This can happen in the following cases: 1. On NUMA, if per-node pwq init failure and the fallback pwq is used. 2. On NUMA, if a pool is configured to span multiple nodes. 3. On single node setups. 5797b1c18919 ("workqueue: Implement system-wide nr_active enforcement for unbound workqueues") set the default nna->max to min_active because only #1 was being considered. For #2 and #3, using min_active means that the max concurrency in normal operation is pushed down to min_active which is currently 8, which can obviously lead to performance issues. exact value nna->max is set to doesn't really matter. #2 can only happen if the workqueue is intentionally configured to ignore NUMA boundaries and there's no good way to distribute max_active in this case. #3 is the default behavior on single node machines. Let's set it the default nna->max to max_active. This fixes the artificially lowered concurrency problem on single node machines and shouldn't hurt anything for other cases. Signed-off-by: Tejun Heo Reported-by: Shinichiro Kawasaki Fixes: 5797b1c18919 ("workqueue: Implement system-wide nr_active enforcement for unbound workqueues") Link: https://lore.kernel.org/dm-devel/20240410084531.2134621-1-shinichiro.kawasaki@wdc.com/ Signed-off-by: Tejun Heo --- kernel/workqueue.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/workqueue.c b/kernel/workqueue.c index a2af0aaf026ba..5f536c63a48d7 100644 --- a/kernel/workqueue.c +++ b/kernel/workqueue.c @@ -1610,7 +1610,7 @@ static void wq_update_node_max_active(struct workqueue_struct *wq, int off_cpu) min_active, max_active); } - wq_node_nr_active(wq, NUMA_NO_NODE)->max = min_active; + wq_node_nr_active(wq, NUMA_NO_NODE)->max = max_active; } /** From 697f3342477170bdf8759157bdc19c0b7b3e9d14 Mon Sep 17 00:00:00 2001 From: Baoquan He Date: Wed, 24 Apr 2024 12:36:07 +0800 Subject: [PATCH 308/606] LoongArch: Fix Kconfig item and left code related to CRASH_CORE In commit 85fcde402db191b5 ("kexec: split crashkernel reservation code out from crash_core.c"), crashkernel reservation code is split out from crash_core.c, and add CRASH_RESERVE to control it. And also rename each ARCH's to accordingly. But the relevant part in LoongArch is missed. Do it now. Fixes: 85fcde402db1 ("kexec: split crashkernel reservation code out from crash_core.c") Signed-off-by: Baoquan He Signed-off-by: Huacai Chen --- arch/loongarch/Kconfig | 2 +- arch/loongarch/include/asm/{crash_core.h => crash_reserve.h} | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) rename arch/loongarch/include/asm/{crash_core.h => crash_reserve.h} (75%) diff --git a/arch/loongarch/Kconfig b/arch/loongarch/Kconfig index a5f300ec6f280..54ad04dacdee9 100644 --- a/arch/loongarch/Kconfig +++ b/arch/loongarch/Kconfig @@ -595,7 +595,7 @@ config ARCH_SELECTS_CRASH_DUMP select RELOCATABLE config ARCH_HAS_GENERIC_CRASHKERNEL_RESERVATION - def_bool CRASH_CORE + def_bool CRASH_RESERVE config RELOCATABLE bool "Relocatable kernel" diff --git a/arch/loongarch/include/asm/crash_core.h b/arch/loongarch/include/asm/crash_reserve.h similarity index 75% rename from arch/loongarch/include/asm/crash_core.h rename to arch/loongarch/include/asm/crash_reserve.h index 218bdbfa527ba..a1d9b84b1c7d5 100644 --- a/arch/loongarch/include/asm/crash_core.h +++ b/arch/loongarch/include/asm/crash_reserve.h @@ -1,6 +1,6 @@ /* SPDX-License-Identifier: GPL-2.0-only */ -#ifndef _LOONGARCH_CRASH_CORE_H -#define _LOONGARCH_CRASH_CORE_H +#ifndef _LOONGARCH_CRASH_RESERVE_H +#define _LOONGARCH_CRASH_RESERVE_H #define CRASH_ALIGN SZ_2M From 7ab22b5c2af54e233f3d05d7d601025947e4ff05 Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Wed, 24 Apr 2024 12:36:07 +0800 Subject: [PATCH 309/606] LoongArch: Fix a build error due to __tlb_remove_tlb_entry() With LLVM=1 and W=1 we get: ./include/asm-generic/tlb.h:629:10: error: parameter 'ptep' set but not used [-Werror,-Wunused-but-set-parameter] We fixed a similar issue via Arnd in the introducing commit, missed the LoongArch variant. Turns out, there is no need for LoongArch to have a custom variant, so let's just drop it and rely on the asm-generic one. Fixes: 4d5bf0b6183f ("mm/mmu_gather: add tlb_remove_tlb_entries()") Closes: https://lkml.kernel.org/r/CANiq72mQh3O9S4umbvrKBgMMorty48UMwS01U22FR0mRyd3cyQ@mail.gmail.com Reported-by: Miguel Ojeda Reviewed-by: Miguel Ojeda Tested-by: Miguel Ojeda Tested-by: Arnd Bergmann Signed-off-by: David Hildenbrand Signed-off-by: Huacai Chen --- arch/loongarch/include/asm/tlb.h | 2 -- 1 file changed, 2 deletions(-) diff --git a/arch/loongarch/include/asm/tlb.h b/arch/loongarch/include/asm/tlb.h index da7a3b5b9374a..e071f5e9e8580 100644 --- a/arch/loongarch/include/asm/tlb.h +++ b/arch/loongarch/include/asm/tlb.h @@ -132,8 +132,6 @@ static __always_inline void invtlb_all(u32 op, u32 info, u64 addr) ); } -#define __tlb_remove_tlb_entry(tlb, ptep, address) do { } while (0) - static void tlb_flush(struct mmu_gather *tlb); #define tlb_flush tlb_flush From efb44ff64c95340b06331fc48634b99efc9dd77c Mon Sep 17 00:00:00 2001 From: Jiantao Shan Date: Wed, 24 Apr 2024 12:36:07 +0800 Subject: [PATCH 310/606] LoongArch: Fix access error when read fault on a write-only VMA As with most architectures, allow handling of read faults in VMAs that have VM_WRITE but without VM_READ (WRITE implies READ). Otherwise, reading before writing a write-only memory will error while reading after writing everything is fine. BTW, move the VM_EXEC judgement before VM_READ/VM_WRITE to make logic a little clearer. Cc: stable@vger.kernel.org Fixes: 09cfefb7fa70c3af01 ("LoongArch: Add memory management") Signed-off-by: Jiantao Shan Signed-off-by: Huacai Chen --- arch/loongarch/mm/fault.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/loongarch/mm/fault.c b/arch/loongarch/mm/fault.c index 1fc2f6813ea02..97b40defde060 100644 --- a/arch/loongarch/mm/fault.c +++ b/arch/loongarch/mm/fault.c @@ -202,10 +202,10 @@ static void __kprobes __do_page_fault(struct pt_regs *regs, if (!(vma->vm_flags & VM_WRITE)) goto bad_area; } else { - if (!(vma->vm_flags & VM_READ) && address != exception_era(regs)) - goto bad_area; if (!(vma->vm_flags & VM_EXEC) && address == exception_era(regs)) goto bad_area; + if (!(vma->vm_flags & (VM_READ | VM_WRITE)) && address != exception_era(regs)) + goto bad_area; } /* From d3119bc985fb645ad3b2a9cf9952c1d56d9daaa3 Mon Sep 17 00:00:00 2001 From: Huacai Chen Date: Wed, 24 Apr 2024 12:36:07 +0800 Subject: [PATCH 311/606] LoongArch: Fix callchain parse error with kernel tracepoint events In order to fix perf's callchain parse error for LoongArch, we implement perf_arch_fetch_caller_regs() which fills several necessary registers used for callchain unwinding, including sp, fp, and era. This is similar to the following commits. commit b3eac0265bf6: ("arm: perf: Fix callchain parse error with kernel tracepoint events") commit 5b09a094f2fb: ("arm64: perf: Fix callchain parse error with kernel tracepoint events") commit 9a7e8ec0d4cc: ("riscv: perf: Fix callchain parse error with kernel tracepoint events") Test with commands: perf record -e sched:sched_switch -g --call-graph dwarf perf report Without this patch: Children Self Command Shared Object Symbol ........ ........ ............. ................. .................... 43.41% 43.41% swapper [unknown] [k] 0000000000000000 10.94% 10.94% loong-container [unknown] [k] 0000000000000000 | |--5.98%--0x12006ba38 | |--2.56%--0x12006bb84 | --2.40%--0x12006b6b8 With this patch, callchain can be parsed correctly: Children Self Command Shared Object Symbol ........ ........ ............. ................. .................... 47.57% 47.57% swapper [kernel.vmlinux] [k] __schedule | ---__schedule 26.76% 26.76% loong-container [kernel.vmlinux] [k] __schedule | |--13.78%--0x12006ba38 | | | |--9.19%--__schedule | | | --4.59%--handle_syscall | do_syscall | sys_futex | do_futex | futex_wait | futex_wait_queue_me | hrtimer_start_range_ns | __schedule | |--8.38%--0x12006bb84 | handle_syscall | do_syscall | sys_epoll_pwait | do_epoll_wait | schedule_hrtimeout_range_clock | hrtimer_start_range_ns | __schedule | --4.59%--0x12006b6b8 handle_syscall do_syscall sys_nanosleep hrtimer_nanosleep do_nanosleep hrtimer_start_range_ns __schedule Cc: stable@vger.kernel.org Fixes: b37042b2bb7cd751f0 ("LoongArch: Add perf events support") Reported-by: Youling Tang Suggested-by: Youling Tang Signed-off-by: Huacai Chen --- arch/loongarch/include/asm/perf_event.h | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/arch/loongarch/include/asm/perf_event.h b/arch/loongarch/include/asm/perf_event.h index 2a35a0bc2aaab..52b638059e40b 100644 --- a/arch/loongarch/include/asm/perf_event.h +++ b/arch/loongarch/include/asm/perf_event.h @@ -7,6 +7,14 @@ #ifndef __LOONGARCH_PERF_EVENT_H__ #define __LOONGARCH_PERF_EVENT_H__ +#include + #define perf_arch_bpf_user_pt_regs(regs) (struct user_pt_regs *)regs +#define perf_arch_fetch_caller_regs(regs, __ip) { \ + (regs)->csr_era = (__ip); \ + (regs)->regs[3] = current_stack_pointer; \ + (regs)->regs[22] = (unsigned long) __builtin_frame_address(0); \ +} + #endif /* __LOONGARCH_PERF_EVENT_H__ */ From a1383ac7284afc2f0ca39edee57dea4db70e66f3 Mon Sep 17 00:00:00 2001 From: Daniele Ceraolo Spurio Date: Sun, 21 Apr 2024 12:07:01 +0300 Subject: [PATCH 312/606] mei: pxp: match against PCI_CLASS_DISPLAY_OTHER The ATS-M class is PCI_CLASS_DISPLAY_OTHER instead of PCI_CLASS_DISPLAY_VGA, so we need to match against that class as well. The matching is still restricted to Intel devices only. Fixes: ceeedd951f8a ("mei: pxp: match without driver name") Signed-off-by: Daniele Ceraolo Spurio Signed-off-by: Tomas Winkler Link: https://lore.kernel.org/r/20240421090701.216028-1-tomas.winkler@intel.com Signed-off-by: Greg Kroah-Hartman --- drivers/misc/mei/pxp/mei_pxp.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/drivers/misc/mei/pxp/mei_pxp.c b/drivers/misc/mei/pxp/mei_pxp.c index b1e4c23b31a32..49abc95677cda 100644 --- a/drivers/misc/mei/pxp/mei_pxp.c +++ b/drivers/misc/mei/pxp/mei_pxp.c @@ -236,8 +236,11 @@ static int mei_pxp_component_match(struct device *dev, int subcomponent, pdev = to_pci_dev(dev); - if (pdev->class != (PCI_CLASS_DISPLAY_VGA << 8) || - pdev->vendor != PCI_VENDOR_ID_INTEL) + if (pdev->vendor != PCI_VENDOR_ID_INTEL) + return 0; + + if (pdev->class != (PCI_CLASS_DISPLAY_VGA << 8) && + pdev->class != (PCI_CLASS_DISPLAY_OTHER << 8)) return 0; if (subcomponent != I915_COMPONENT_PXP) From 4108a30f1097eead0f6bd5d885e6bf093b4d460f Mon Sep 17 00:00:00 2001 From: Alexander Usyskin Date: Sun, 21 Apr 2024 16:56:31 +0300 Subject: [PATCH 313/606] mei: me: add lunar lake point M DID Add Lunar (Point) Lake M device id. Cc: stable@vger.kernel.org Signed-off-by: Alexander Usyskin Signed-off-by: Tomas Winkler Link: https://lore.kernel.org/r/20240421135631.223362-1-tomas.winkler@intel.com Signed-off-by: Greg Kroah-Hartman --- drivers/misc/mei/hw-me-regs.h | 2 ++ drivers/misc/mei/pci-me.c | 2 ++ 2 files changed, 4 insertions(+) diff --git a/drivers/misc/mei/hw-me-regs.h b/drivers/misc/mei/hw-me-regs.h index aac36750d2c54..c3a6657dcd4a2 100644 --- a/drivers/misc/mei/hw-me-regs.h +++ b/drivers/misc/mei/hw-me-regs.h @@ -115,6 +115,8 @@ #define MEI_DEV_ID_ARL_S 0x7F68 /* Arrow Lake Point S */ #define MEI_DEV_ID_ARL_H 0x7770 /* Arrow Lake Point H */ +#define MEI_DEV_ID_LNL_M 0xA870 /* Lunar Lake Point M */ + /* * MEI HW Section */ diff --git a/drivers/misc/mei/pci-me.c b/drivers/misc/mei/pci-me.c index c39718042e2e0..7f59dd38c32f5 100644 --- a/drivers/misc/mei/pci-me.c +++ b/drivers/misc/mei/pci-me.c @@ -122,6 +122,8 @@ static const struct pci_device_id mei_me_pci_tbl[] = { {MEI_PCI_DEVICE(MEI_DEV_ID_ARL_S, MEI_ME_PCH15_CFG)}, {MEI_PCI_DEVICE(MEI_DEV_ID_ARL_H, MEI_ME_PCH15_CFG)}, + {MEI_PCI_DEVICE(MEI_DEV_ID_LNL_M, MEI_ME_PCH15_CFG)}, + /* required last entry */ {0, } }; From 9a1f1d04f63c59550a5364858b46eeffdf03e8d6 Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Tue, 23 Apr 2024 20:41:22 -0600 Subject: [PATCH 314/606] smb: client: Fix struct_group() usage in __packed structs Use struct_group_attr() in __packed structs, instead of struct_group(). Below you can see the pahole output before/after changes: pahole -C smb2_file_network_open_info fs/smb/client/smb2ops.o struct smb2_file_network_open_info { union { struct { __le64 CreationTime; /* 0 8 */ __le64 LastAccessTime; /* 8 8 */ __le64 LastWriteTime; /* 16 8 */ __le64 ChangeTime; /* 24 8 */ __le64 AllocationSize; /* 32 8 */ __le64 EndOfFile; /* 40 8 */ __le32 Attributes; /* 48 4 */ }; /* 0 56 */ struct { __le64 CreationTime; /* 0 8 */ __le64 LastAccessTime; /* 8 8 */ __le64 LastWriteTime; /* 16 8 */ __le64 ChangeTime; /* 24 8 */ __le64 AllocationSize; /* 32 8 */ __le64 EndOfFile; /* 40 8 */ __le32 Attributes; /* 48 4 */ } network_open_info; /* 0 56 */ }; /* 0 56 */ __le32 Reserved; /* 56 4 */ /* size: 60, cachelines: 1, members: 2 */ /* last cacheline: 60 bytes */ } __attribute__((__packed__)); pahole -C smb2_file_network_open_info fs/smb/client/smb2ops.o struct smb2_file_network_open_info { union { struct { __le64 CreationTime; /* 0 8 */ __le64 LastAccessTime; /* 8 8 */ __le64 LastWriteTime; /* 16 8 */ __le64 ChangeTime; /* 24 8 */ __le64 AllocationSize; /* 32 8 */ __le64 EndOfFile; /* 40 8 */ __le32 Attributes; /* 48 4 */ } __attribute__((__packed__)); /* 0 52 */ struct { __le64 CreationTime; /* 0 8 */ __le64 LastAccessTime; /* 8 8 */ __le64 LastWriteTime; /* 16 8 */ __le64 ChangeTime; /* 24 8 */ __le64 AllocationSize; /* 32 8 */ __le64 EndOfFile; /* 40 8 */ __le32 Attributes; /* 48 4 */ } __attribute__((__packed__)) network_open_info; /* 0 52 */ }; /* 0 52 */ __le32 Reserved; /* 52 4 */ /* size: 56, cachelines: 1, members: 2 */ /* last cacheline: 56 bytes */ }; pahole -C smb_com_open_rsp fs/smb/client/cifssmb.o struct smb_com_open_rsp { ... union { struct { __le64 CreationTime; /* 48 8 */ __le64 LastAccessTime; /* 56 8 */ /* --- cacheline 1 boundary (64 bytes) --- */ __le64 LastWriteTime; /* 64 8 */ __le64 ChangeTime; /* 72 8 */ __le32 FileAttributes; /* 80 4 */ }; /* 48 40 */ struct { __le64 CreationTime; /* 48 8 */ __le64 LastAccessTime; /* 56 8 */ /* --- cacheline 1 boundary (64 bytes) --- */ __le64 LastWriteTime; /* 64 8 */ __le64 ChangeTime; /* 72 8 */ __le32 FileAttributes; /* 80 4 */ } common_attributes; /* 48 40 */ }; /* 48 40 */ ... /* size: 111, cachelines: 2, members: 14 */ /* last cacheline: 47 bytes */ } __attribute__((__packed__)); pahole -C smb_com_open_rsp fs/smb/client/cifssmb.o struct smb_com_open_rsp { ... union { struct { __le64 CreationTime; /* 48 8 */ __le64 LastAccessTime; /* 56 8 */ /* --- cacheline 1 boundary (64 bytes) --- */ __le64 LastWriteTime; /* 64 8 */ __le64 ChangeTime; /* 72 8 */ __le32 FileAttributes; /* 80 4 */ } __attribute__((__packed__)); /* 48 36 */ struct { __le64 CreationTime; /* 48 8 */ __le64 LastAccessTime; /* 56 8 */ /* --- cacheline 1 boundary (64 bytes) --- */ __le64 LastWriteTime; /* 64 8 */ __le64 ChangeTime; /* 72 8 */ __le32 FileAttributes; /* 80 4 */ } __attribute__((__packed__)) common_attributes; /* 48 36 */ }; /* 48 36 */ ... /* size: 107, cachelines: 2, members: 14 */ /* last cacheline: 43 bytes */ } __attribute__((__packed__)); pahole -C FILE_ALL_INFO fs/smb/client/cifssmb.o typedef struct { union { struct { __le64 CreationTime; /* 0 8 */ __le64 LastAccessTime; /* 8 8 */ __le64 LastWriteTime; /* 16 8 */ __le64 ChangeTime; /* 24 8 */ __le32 Attributes; /* 32 4 */ }; /* 0 40 */ struct { __le64 CreationTime; /* 0 8 */ __le64 LastAccessTime; /* 8 8 */ __le64 LastWriteTime; /* 16 8 */ __le64 ChangeTime; /* 24 8 */ __le32 Attributes; /* 32 4 */ } common_attributes; /* 0 40 */ }; /* 0 40 */ ... /* size: 113, cachelines: 2, members: 17 */ /* last cacheline: 49 bytes */ } __attribute__((__packed__)) FILE_ALL_INFO; pahole -C FILE_ALL_INFO fs/smb/client/cifssmb.o typedef struct { union { struct { __le64 CreationTime; /* 0 8 */ __le64 LastAccessTime; /* 8 8 */ __le64 LastWriteTime; /* 16 8 */ __le64 ChangeTime; /* 24 8 */ __le32 Attributes; /* 32 4 */ } __attribute__((__packed__)); /* 0 36 */ struct { __le64 CreationTime; /* 0 8 */ __le64 LastAccessTime; /* 8 8 */ __le64 LastWriteTime; /* 16 8 */ __le64 ChangeTime; /* 24 8 */ __le32 Attributes; /* 32 4 */ } __attribute__((__packed__)) common_attributes; /* 0 36 */ }; /* 0 36 */ ... /* size: 109, cachelines: 2, members: 17 */ /* last cacheline: 45 bytes */ } __attribute__((__packed__)) FILE_ALL_INFO; Fixes: 0015eb6e1238 ("smb: client, common: fix fortify warnings") Cc: stable@vger.kernel.org Reviewed-by: Namjae Jeon Signed-off-by: Gustavo A. R. Silva Signed-off-by: Steve French --- fs/smb/client/cifspdu.h | 4 ++-- fs/smb/client/smb2pdu.h | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/fs/smb/client/cifspdu.h b/fs/smb/client/cifspdu.h index c0513fbb8a59d..c46d418c1c0c3 100644 --- a/fs/smb/client/cifspdu.h +++ b/fs/smb/client/cifspdu.h @@ -882,7 +882,7 @@ typedef struct smb_com_open_rsp { __u8 OplockLevel; __u16 Fid; __le32 CreateAction; - struct_group(common_attributes, + struct_group_attr(common_attributes, __packed, __le64 CreationTime; __le64 LastAccessTime; __le64 LastWriteTime; @@ -2266,7 +2266,7 @@ typedef struct { /* QueryFileInfo/QueryPathinfo (also for SetPath/SetFile) data buffer formats */ /******************************************************************************/ typedef struct { /* data block encoding of response to level 263 QPathInfo */ - struct_group(common_attributes, + struct_group_attr(common_attributes, __packed, __le64 CreationTime; __le64 LastAccessTime; __le64 LastWriteTime; diff --git a/fs/smb/client/smb2pdu.h b/fs/smb/client/smb2pdu.h index c72a3b2886b7f..2fccf0d4f53d2 100644 --- a/fs/smb/client/smb2pdu.h +++ b/fs/smb/client/smb2pdu.h @@ -320,7 +320,7 @@ struct smb2_file_reparse_point_info { } __packed; struct smb2_file_network_open_info { - struct_group(network_open_info, + struct_group_attr(network_open_info, __packed, __le64 CreationTime; __le64 LastAccessTime; __le64 LastWriteTime; From f2a904107ee2b647bb7794a1a82b67740d7c8a64 Mon Sep 17 00:00:00 2001 From: Hyunwoo Kim Date: Mon, 22 Apr 2024 05:39:30 -0400 Subject: [PATCH 315/606] net: gtp: Fix Use-After-Free in gtp_dellink Since call_rcu, which is called in the hlist_for_each_entry_rcu traversal of gtp_dellink, is not part of the RCU read critical section, it is possible that the RCU grace period will pass during the traversal and the key will be free. To prevent this, it should be changed to hlist_for_each_entry_safe. Fixes: 94dc550a5062 ("gtp: fix an use-after-free in ipv4_pdp_find()") Signed-off-by: Hyunwoo Kim Reviewed-by: Eric Dumazet Signed-off-by: David S. Miller --- drivers/net/gtp.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/gtp.c b/drivers/net/gtp.c index ba4704c2c640b..e62d6cbdf9bc6 100644 --- a/drivers/net/gtp.c +++ b/drivers/net/gtp.c @@ -1098,11 +1098,12 @@ static int gtp_newlink(struct net *src_net, struct net_device *dev, static void gtp_dellink(struct net_device *dev, struct list_head *head) { struct gtp_dev *gtp = netdev_priv(dev); + struct hlist_node *next; struct pdp_ctx *pctx; int i; for (i = 0; i < gtp->hash_size; i++) - hlist_for_each_entry_rcu(pctx, >p->tid_hash[i], hlist_tid) + hlist_for_each_entry_safe(pctx, next, >p->tid_hash[i], hlist_tid) pdp_context_delete(pctx); list_del_rcu(>p->list); From 5b5f724b05c550e10693a53a81cadca901aefd16 Mon Sep 17 00:00:00 2001 From: Daniel Golle Date: Sun, 21 Apr 2024 01:08:31 +0100 Subject: [PATCH 316/606] net: phy: mediatek-ge-soc: follow netdev LED trigger semantics Only blink if the link is up on a LED which is programmed to also indicate link-status. Otherwise, if both LEDs are in use to indicate different speeds, the resulting blinking being inverted on LEDs which aren't switched on at a specific speed is quite counter-intuitive. Also make sure that state left behind by reset or the bootloader is recognized correctly including the half-duplex and full-duplex bits as well as the (unsupported by Linux netdev trigger semantics) link-down bit. Fixes: c66937b0f8db ("net: phy: mediatek-ge-soc: support PHY LEDs") Signed-off-by: Daniel Golle Signed-off-by: David S. Miller --- drivers/net/phy/mediatek-ge-soc.c | 43 +++++++++++++++++++------------ 1 file changed, 26 insertions(+), 17 deletions(-) diff --git a/drivers/net/phy/mediatek-ge-soc.c b/drivers/net/phy/mediatek-ge-soc.c index 0f3a1538a8b8e..f4f9412d0cd7e 100644 --- a/drivers/net/phy/mediatek-ge-soc.c +++ b/drivers/net/phy/mediatek-ge-soc.c @@ -216,6 +216,9 @@ #define MTK_PHY_LED_ON_LINK1000 BIT(0) #define MTK_PHY_LED_ON_LINK100 BIT(1) #define MTK_PHY_LED_ON_LINK10 BIT(2) +#define MTK_PHY_LED_ON_LINK (MTK_PHY_LED_ON_LINK10 |\ + MTK_PHY_LED_ON_LINK100 |\ + MTK_PHY_LED_ON_LINK1000) #define MTK_PHY_LED_ON_LINKDOWN BIT(3) #define MTK_PHY_LED_ON_FDX BIT(4) /* Full duplex */ #define MTK_PHY_LED_ON_HDX BIT(5) /* Half duplex */ @@ -231,6 +234,12 @@ #define MTK_PHY_LED_BLINK_100RX BIT(3) #define MTK_PHY_LED_BLINK_10TX BIT(4) #define MTK_PHY_LED_BLINK_10RX BIT(5) +#define MTK_PHY_LED_BLINK_RX (MTK_PHY_LED_BLINK_10RX |\ + MTK_PHY_LED_BLINK_100RX |\ + MTK_PHY_LED_BLINK_1000RX) +#define MTK_PHY_LED_BLINK_TX (MTK_PHY_LED_BLINK_10TX |\ + MTK_PHY_LED_BLINK_100TX |\ + MTK_PHY_LED_BLINK_1000TX) #define MTK_PHY_LED_BLINK_COLLISION BIT(6) #define MTK_PHY_LED_BLINK_RX_CRC_ERR BIT(7) #define MTK_PHY_LED_BLINK_RX_IDLE_ERR BIT(8) @@ -1247,11 +1256,9 @@ static int mt798x_phy_led_hw_control_get(struct phy_device *phydev, u8 index, if (blink < 0) return -EIO; - if ((on & (MTK_PHY_LED_ON_LINK1000 | MTK_PHY_LED_ON_LINK100 | - MTK_PHY_LED_ON_LINK10)) || - (blink & (MTK_PHY_LED_BLINK_1000RX | MTK_PHY_LED_BLINK_100RX | - MTK_PHY_LED_BLINK_10RX | MTK_PHY_LED_BLINK_1000TX | - MTK_PHY_LED_BLINK_100TX | MTK_PHY_LED_BLINK_10TX))) + if ((on & (MTK_PHY_LED_ON_LINK | MTK_PHY_LED_ON_FDX | MTK_PHY_LED_ON_HDX | + MTK_PHY_LED_ON_LINKDOWN)) || + (blink & (MTK_PHY_LED_BLINK_RX | MTK_PHY_LED_BLINK_TX))) set_bit(bit_netdev, &priv->led_state); else clear_bit(bit_netdev, &priv->led_state); @@ -1269,7 +1276,7 @@ static int mt798x_phy_led_hw_control_get(struct phy_device *phydev, u8 index, if (!rules) return 0; - if (on & (MTK_PHY_LED_ON_LINK1000 | MTK_PHY_LED_ON_LINK100 | MTK_PHY_LED_ON_LINK10)) + if (on & MTK_PHY_LED_ON_LINK) *rules |= BIT(TRIGGER_NETDEV_LINK); if (on & MTK_PHY_LED_ON_LINK10) @@ -1287,10 +1294,10 @@ static int mt798x_phy_led_hw_control_get(struct phy_device *phydev, u8 index, if (on & MTK_PHY_LED_ON_HDX) *rules |= BIT(TRIGGER_NETDEV_HALF_DUPLEX); - if (blink & (MTK_PHY_LED_BLINK_1000RX | MTK_PHY_LED_BLINK_100RX | MTK_PHY_LED_BLINK_10RX)) + if (blink & MTK_PHY_LED_BLINK_RX) *rules |= BIT(TRIGGER_NETDEV_RX); - if (blink & (MTK_PHY_LED_BLINK_1000TX | MTK_PHY_LED_BLINK_100TX | MTK_PHY_LED_BLINK_10TX)) + if (blink & MTK_PHY_LED_BLINK_TX) *rules |= BIT(TRIGGER_NETDEV_TX); return 0; @@ -1323,15 +1330,19 @@ static int mt798x_phy_led_hw_control_set(struct phy_device *phydev, u8 index, on |= MTK_PHY_LED_ON_LINK1000; if (rules & BIT(TRIGGER_NETDEV_RX)) { - blink |= MTK_PHY_LED_BLINK_10RX | - MTK_PHY_LED_BLINK_100RX | - MTK_PHY_LED_BLINK_1000RX; + blink |= (on & MTK_PHY_LED_ON_LINK) ? + (((on & MTK_PHY_LED_ON_LINK10) ? MTK_PHY_LED_BLINK_10RX : 0) | + ((on & MTK_PHY_LED_ON_LINK100) ? MTK_PHY_LED_BLINK_100RX : 0) | + ((on & MTK_PHY_LED_ON_LINK1000) ? MTK_PHY_LED_BLINK_1000RX : 0)) : + MTK_PHY_LED_BLINK_RX; } if (rules & BIT(TRIGGER_NETDEV_TX)) { - blink |= MTK_PHY_LED_BLINK_10TX | - MTK_PHY_LED_BLINK_100TX | - MTK_PHY_LED_BLINK_1000TX; + blink |= (on & MTK_PHY_LED_ON_LINK) ? + (((on & MTK_PHY_LED_ON_LINK10) ? MTK_PHY_LED_BLINK_10TX : 0) | + ((on & MTK_PHY_LED_ON_LINK100) ? MTK_PHY_LED_BLINK_100TX : 0) | + ((on & MTK_PHY_LED_ON_LINK1000) ? MTK_PHY_LED_BLINK_1000TX : 0)) : + MTK_PHY_LED_BLINK_TX; } if (blink || on) @@ -1344,9 +1355,7 @@ static int mt798x_phy_led_hw_control_set(struct phy_device *phydev, u8 index, MTK_PHY_LED0_ON_CTRL, MTK_PHY_LED_ON_FDX | MTK_PHY_LED_ON_HDX | - MTK_PHY_LED_ON_LINK10 | - MTK_PHY_LED_ON_LINK100 | - MTK_PHY_LED_ON_LINK1000, + MTK_PHY_LED_ON_LINK, on); if (ret) From 2718a7fdf292b2dcb49c856fa8a6a955ebbbc45f Mon Sep 17 00:00:00 2001 From: Wenkuan Wang Date: Wed, 10 Apr 2024 11:53:08 +0800 Subject: [PATCH 317/606] x86/CPU/AMD: Add models 0x10-0x1f to the Zen5 range Add some more Zen5 models. Fixes: 3e4147f33f8b ("x86/CPU/AMD: Add X86_FEATURE_ZEN5") Signed-off-by: Wenkuan Wang Signed-off-by: Borislav Petkov (AMD) Link: https://lore.kernel.org/r/20240423144111.1362-1-bp@kernel.org --- arch/x86/kernel/cpu/amd.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c index cb9eece55904d..307302af0aeee 100644 --- a/arch/x86/kernel/cpu/amd.c +++ b/arch/x86/kernel/cpu/amd.c @@ -459,8 +459,7 @@ static void bsp_init_amd(struct cpuinfo_x86 *c) case 0x1a: switch (c->x86_model) { - case 0x00 ... 0x0f: - case 0x20 ... 0x2f: + case 0x00 ... 0x2f: case 0x40 ... 0x4f: case 0x70 ... 0x7f: setup_force_cpu_cap(X86_FEATURE_ZEN5); From b53c6bd5d271d023857174b8fd3e32f98ae51372 Mon Sep 17 00:00:00 2001 From: David Kaplan Date: Sun, 21 Apr 2024 21:17:28 +0200 Subject: [PATCH 318/606] x86/cpu: Fix check for RDPKRU in __show_regs() cpu_feature_enabled(X86_FEATURE_OSPKE) does not necessarily reflect whether CR4.PKE is set on the CPU. In particular, they may differ on non-BSP CPUs before setup_pku() is executed. In this scenario, RDPKRU will #UD causing the system to hang. Fix by checking CR4 for PKE enablement which is always correct for the current CPU. The scenario happens by inserting a WARN* before setup_pku() in identiy_cpu() or some other diagnostic which would lead to calling __show_regs(). [ bp: Massage commit message. ] Signed-off-by: David Kaplan Signed-off-by: Borislav Petkov (AMD) Link: https://lore.kernel.org/r/20240421191728.32239-1-bp@kernel.org --- arch/x86/kernel/process_64.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c index 7062b84dd467d..6d3d20e3e43a9 100644 --- a/arch/x86/kernel/process_64.c +++ b/arch/x86/kernel/process_64.c @@ -139,7 +139,7 @@ void __show_regs(struct pt_regs *regs, enum show_regs_mode mode, log_lvl, d3, d6, d7); } - if (cpu_feature_enabled(X86_FEATURE_OSPKE)) + if (cr4 & X86_CR4_PKE) printk("%sPKRU: %08x\n", log_lvl, read_pkru()); } From 78d9161d2bcd442d93d917339297ffa057dbee8c Mon Sep 17 00:00:00 2001 From: Nam Cao Date: Tue, 23 Apr 2024 13:50:53 +0200 Subject: [PATCH 319/606] fbdev: fix incorrect address computation in deferred IO With deferred IO enabled, a page fault happens when data is written to the framebuffer device. Then driver determines which page is being updated by calculating the offset of the written virtual address within the virtual memory area, and uses this offset to get the updated page within the internal buffer. This page is later copied to hardware (thus the name "deferred IO"). This offset calculation is only correct if the virtual memory area is mapped to the beginning of the internal buffer. Otherwise this is wrong. For example, if users do: mmap(ptr, 4096, PROT_WRITE, MAP_FIXED | MAP_SHARED, fd, 0xff000); Then the virtual memory area will mapped at offset 0xff000 within the internal buffer. This offset 0xff000 is not accounted for, and wrong page is updated. Correct the calculation by using vmf->pgoff instead. With this change, the variable "offset" will no longer hold the exact offset value, but it is rounded down to multiples of PAGE_SIZE. But this is still correct, because this variable is only used to calculate the page offset. Reported-by: Harshit Mogalapalli Closes: https://lore.kernel.org/linux-fbdev/271372d6-e665-4e7f-b088-dee5f4ab341a@oracle.com Fixes: 56c134f7f1b5 ("fbdev: Track deferred-I/O pages in pageref struct") Cc: Signed-off-by: Nam Cao Reviewed-by: Thomas Zimmermann Tested-by: Harshit Mogalapalli Signed-off-by: Thomas Zimmermann Link: https://patchwork.freedesktop.org/patch/msgid/20240423115053.4490-1-namcao@linutronix.de --- drivers/video/fbdev/core/fb_defio.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/video/fbdev/core/fb_defio.c b/drivers/video/fbdev/core/fb_defio.c index dae96c9f61cf8..806ecd32219b6 100644 --- a/drivers/video/fbdev/core/fb_defio.c +++ b/drivers/video/fbdev/core/fb_defio.c @@ -196,7 +196,7 @@ static vm_fault_t fb_deferred_io_track_page(struct fb_info *info, unsigned long */ static vm_fault_t fb_deferred_io_page_mkwrite(struct fb_info *info, struct vm_fault *vmf) { - unsigned long offset = vmf->address - vmf->vma->vm_start; + unsigned long offset = vmf->pgoff << PAGE_SHIFT; struct page *page = vmf->page; file_update_time(vmf->vma->vm_file); From d806f474a9a7993648a2c70642ee129316d8deff Mon Sep 17 00:00:00 2001 From: Prathamesh Shete Date: Wed, 24 Apr 2024 15:25:14 +0530 Subject: [PATCH 320/606] gpio: tegra186: Fix tegra186_gpio_is_accessible() check The controller has several register bits describing access control information for a given GPIO pin. When SCR_SEC_[R|W]EN is unset, it means we have full read/write access to all the registers for given GPIO pin. When SCR_SEC[R|W]EN is set, it means we need to further check the accompanying SCR_SEC_G1[R|W] bit to determine read/write access to all the registers for given GPIO pin. This check was previously declaring that a GPIO pin was accessible only if either of the following conditions were met: - SCR_SEC_REN + SCR_SEC_WEN both set or - SCR_SEC_REN + SCR_SEC_WEN both set and SCR_SEC_G1R + SCR_SEC_G1W both set Update the check to properly handle cases where only one of SCR_SEC_REN or SCR_SEC_WEN is set. Fixes: b2b56a163230 ("gpio: tegra186: Check GPIO pin permission before access.") Signed-off-by: Prathamesh Shete Acked-by: Thierry Reding Link: https://lore.kernel.org/r/20240424095514.24397-1-pshete@nvidia.com Signed-off-by: Bartosz Golaszewski --- drivers/gpio/gpio-tegra186.c | 20 +++++++++++--------- 1 file changed, 11 insertions(+), 9 deletions(-) diff --git a/drivers/gpio/gpio-tegra186.c b/drivers/gpio/gpio-tegra186.c index d87dd06db40d0..9130c691a2dd3 100644 --- a/drivers/gpio/gpio-tegra186.c +++ b/drivers/gpio/gpio-tegra186.c @@ -36,12 +36,6 @@ #define TEGRA186_GPIO_SCR_SEC_REN BIT(27) #define TEGRA186_GPIO_SCR_SEC_G1W BIT(9) #define TEGRA186_GPIO_SCR_SEC_G1R BIT(1) -#define TEGRA186_GPIO_FULL_ACCESS (TEGRA186_GPIO_SCR_SEC_WEN | \ - TEGRA186_GPIO_SCR_SEC_REN | \ - TEGRA186_GPIO_SCR_SEC_G1R | \ - TEGRA186_GPIO_SCR_SEC_G1W) -#define TEGRA186_GPIO_SCR_SEC_ENABLE (TEGRA186_GPIO_SCR_SEC_WEN | \ - TEGRA186_GPIO_SCR_SEC_REN) /* control registers */ #define TEGRA186_GPIO_ENABLE_CONFIG 0x00 @@ -177,10 +171,18 @@ static inline bool tegra186_gpio_is_accessible(struct tegra_gpio *gpio, unsigned value = __raw_readl(secure + TEGRA186_GPIO_SCR); - if ((value & TEGRA186_GPIO_SCR_SEC_ENABLE) == 0) - return true; + /* + * When SCR_SEC_[R|W]EN is unset, then we have full read/write access to all the + * registers for given GPIO pin. + * When SCR_SEC[R|W]EN is set, then there is need to further check the accompanying + * SCR_SEC_G1[R|W] bit to determine read/write access to all the registers for given + * GPIO pin. + */ - if ((value & TEGRA186_GPIO_FULL_ACCESS) == TEGRA186_GPIO_FULL_ACCESS) + if (((value & TEGRA186_GPIO_SCR_SEC_REN) == 0 || + ((value & TEGRA186_GPIO_SCR_SEC_REN) && (value & TEGRA186_GPIO_SCR_SEC_G1R))) && + ((value & TEGRA186_GPIO_SCR_SEC_WEN) == 0 || + ((value & TEGRA186_GPIO_SCR_SEC_WEN) && (value & TEGRA186_GPIO_SCR_SEC_G1W)))) return true; return false; From a0a8d15a798be4b8f20aca2ba91bf6b688c6a640 Mon Sep 17 00:00:00 2001 From: "Kirill A. Shutemov" Date: Wed, 24 Apr 2024 11:20:35 +0300 Subject: [PATCH 321/606] x86/tdx: Preserve shared bit on mprotect() The TDX guest platform takes one bit from the physical address to indicate if the page is shared (accessible by VMM). This bit is not part of the physical_mask and is not preserved during mprotect(). As a result, the 'shared' bit is lost during mprotect() on shared mappings. _COMMON_PAGE_CHG_MASK specifies which PTE bits need to be preserved during modification. AMD includes 'sme_me_mask' in the define to preserve the 'encrypt' bit. To cover both Intel and AMD cases, include 'cc_mask' in _COMMON_PAGE_CHG_MASK instead of 'sme_me_mask'. Reported-and-tested-by: Chris Oo Fixes: 41394e33f3a0 ("x86/tdx: Extend the confidential computing API to support TDX guests") Signed-off-by: Kirill A. Shutemov Signed-off-by: Dave Hansen Reviewed-by: Rick Edgecombe Reviewed-by: Kuppuswamy Sathyanarayanan Reviewed-by: Tom Lendacky Cc: stable@vger.kernel.org Link: https://lore.kernel.org/all/20240424082035.4092071-1-kirill.shutemov%40linux.intel.com --- arch/x86/include/asm/coco.h | 1 + arch/x86/include/asm/pgtable_types.h | 3 ++- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/arch/x86/include/asm/coco.h b/arch/x86/include/asm/coco.h index c086699b0d0c5..aa6c8f8ca9588 100644 --- a/arch/x86/include/asm/coco.h +++ b/arch/x86/include/asm/coco.h @@ -25,6 +25,7 @@ u64 cc_mkdec(u64 val); void cc_random_init(void); #else #define cc_vendor (CC_VENDOR_NONE) +static const u64 cc_mask = 0; static inline u64 cc_mkenc(u64 val) { diff --git a/arch/x86/include/asm/pgtable_types.h b/arch/x86/include/asm/pgtable_types.h index 0b748ee16b3d9..9abb8cc4cd474 100644 --- a/arch/x86/include/asm/pgtable_types.h +++ b/arch/x86/include/asm/pgtable_types.h @@ -148,7 +148,7 @@ #define _COMMON_PAGE_CHG_MASK (PTE_PFN_MASK | _PAGE_PCD | _PAGE_PWT | \ _PAGE_SPECIAL | _PAGE_ACCESSED | \ _PAGE_DIRTY_BITS | _PAGE_SOFT_DIRTY | \ - _PAGE_DEVMAP | _PAGE_ENC | _PAGE_UFFD_WP) + _PAGE_DEVMAP | _PAGE_CC | _PAGE_UFFD_WP) #define _PAGE_CHG_MASK (_COMMON_PAGE_CHG_MASK | _PAGE_PAT) #define _HPAGE_CHG_MASK (_COMMON_PAGE_CHG_MASK | _PAGE_PSE | _PAGE_PAT_LARGE) @@ -173,6 +173,7 @@ enum page_cache_mode { }; #endif +#define _PAGE_CC (_AT(pteval_t, cc_mask)) #define _PAGE_ENC (_AT(pteval_t, sme_me_mask)) #define _PAGE_CACHE_MASK (_PAGE_PWT | _PAGE_PCD | _PAGE_PAT) From d6dab9017b7cf155e73ba5c7f498de1beb5f8e24 Mon Sep 17 00:00:00 2001 From: Himal Prasad Ghimiray Date: Fri, 12 Apr 2024 23:42:06 +0530 Subject: [PATCH 322/606] drm/xe: Remove sysfs only once on action add failure The drmm_add_action_or_reset function automatically invokes the action (sysfs removal) in the event of a failure; therefore, there's no necessity to call it within the return check. Modify the return type of xe_gt_ccs_mode_sysfs_init to int, allowing the caller to pass errors up the call chain. Should sysfs creation or drmm_add_action_or_reset fail, error propagation will prompt a driver load abort. -v2 Edit commit message (Nikula/Lucas) use err_force_wake label instead of new. (Lucas) Avoid unnecessary warn/error messages. (Lucas) Fixes: f3bc5bb4d53d ("drm/xe: Allow userspace to configure CCS mode") Cc: Lucas De Marchi Cc: Jani Nikula Cc: Rodrigo Vivi Cc: Niranjana Vishwanathapura Reviewed-by: Lucas De Marchi Signed-off-by: Himal Prasad Ghimiray Link: https://patchwork.freedesktop.org/patch/msgid/20240412181211.1155732-3-himal.prasad.ghimiray@intel.com Signed-off-by: Lucas De Marchi (cherry picked from commit a99641e38704202ae2a97202b3d249208c9cda7f) Signed-off-by: Lucas De Marchi --- drivers/gpu/drm/xe/xe_gt.c | 4 +++- drivers/gpu/drm/xe/xe_gt_ccs_mode.c | 19 +++++++------------ drivers/gpu/drm/xe/xe_gt_ccs_mode.h | 2 +- 3 files changed, 11 insertions(+), 14 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_gt.c b/drivers/gpu/drm/xe/xe_gt.c index a0afe1ba6dd5c..f9705430ada93 100644 --- a/drivers/gpu/drm/xe/xe_gt.c +++ b/drivers/gpu/drm/xe/xe_gt.c @@ -378,7 +378,9 @@ static int gt_fw_domain_init(struct xe_gt *gt) err); /* Initialize CCS mode sysfs after early initialization of HW engines */ - xe_gt_ccs_mode_sysfs_init(gt); + err = xe_gt_ccs_mode_sysfs_init(gt); + if (err) + goto err_force_wake; /* * Stash hardware-reported version. Since this register does not exist diff --git a/drivers/gpu/drm/xe/xe_gt_ccs_mode.c b/drivers/gpu/drm/xe/xe_gt_ccs_mode.c index 529fc286cd06c..396aeb5b99242 100644 --- a/drivers/gpu/drm/xe/xe_gt_ccs_mode.c +++ b/drivers/gpu/drm/xe/xe_gt_ccs_mode.c @@ -167,25 +167,20 @@ static void xe_gt_ccs_mode_sysfs_fini(struct drm_device *drm, void *arg) * and it is expected that there are no open drm clients while doing so. * The number of available compute slices is exposed to user through a per-gt * 'num_cslices' sysfs interface. + * + * Returns: Returns error value for failure and 0 for success. */ -void xe_gt_ccs_mode_sysfs_init(struct xe_gt *gt) +int xe_gt_ccs_mode_sysfs_init(struct xe_gt *gt) { struct xe_device *xe = gt_to_xe(gt); int err; if (!xe_gt_ccs_mode_enabled(gt)) - return; + return 0; err = sysfs_create_files(gt->sysfs, gt_ccs_mode_attrs); - if (err) { - drm_warn(&xe->drm, "Sysfs creation for ccs_mode failed err: %d\n", err); - return; - } + if (err) + return err; - err = drmm_add_action_or_reset(&xe->drm, xe_gt_ccs_mode_sysfs_fini, gt); - if (err) { - sysfs_remove_files(gt->sysfs, gt_ccs_mode_attrs); - drm_warn(&xe->drm, "%s: drmm_add_action_or_reset failed, err: %d\n", - __func__, err); - } + return drmm_add_action_or_reset(&xe->drm, xe_gt_ccs_mode_sysfs_fini, gt); } diff --git a/drivers/gpu/drm/xe/xe_gt_ccs_mode.h b/drivers/gpu/drm/xe/xe_gt_ccs_mode.h index f39975aaaab0d..f8779852cf0d2 100644 --- a/drivers/gpu/drm/xe/xe_gt_ccs_mode.h +++ b/drivers/gpu/drm/xe/xe_gt_ccs_mode.h @@ -12,7 +12,7 @@ #include "xe_platform_types.h" void xe_gt_apply_ccs_mode(struct xe_gt *gt); -void xe_gt_ccs_mode_sysfs_init(struct xe_gt *gt); +int xe_gt_ccs_mode_sysfs_init(struct xe_gt *gt); static inline bool xe_gt_ccs_mode_enabled(const struct xe_gt *gt) { From f38c4d224aa37fce1e3fe05db4377ef888f0737f Mon Sep 17 00:00:00 2001 From: Himal Prasad Ghimiray Date: Fri, 12 Apr 2024 23:42:07 +0530 Subject: [PATCH 323/606] drm/xe: call free_gsc_pkt only once on action add failure The drmm_add_action_or_reset function automatically invokes the action (free_gsc_pkt) in the event of a failure; therefore, there's no necessity to call it within the return check. -v2 Fix commit message. (Lucas) Fixes: d8b1571312b7 ("drm/xe/huc: HuC authentication via GSC") Cc: Rodrigo Vivi Cc: Daniele Ceraolo Spurio Reviewed-by: Lucas De Marchi Reviewed-by: Daniele Ceraolo Spurio Signed-off-by: Himal Prasad Ghimiray Link: https://patchwork.freedesktop.org/patch/msgid/20240412181211.1155732-4-himal.prasad.ghimiray@intel.com Signed-off-by: Lucas De Marchi (cherry picked from commit 22bf0bc04d273ca002a47de55693797b13076602) Signed-off-by: Lucas De Marchi --- drivers/gpu/drm/xe/xe_huc.c | 9 +-------- 1 file changed, 1 insertion(+), 8 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_huc.c b/drivers/gpu/drm/xe/xe_huc.c index b545f850087cd..6b9b1cbedd379 100644 --- a/drivers/gpu/drm/xe/xe_huc.c +++ b/drivers/gpu/drm/xe/xe_huc.c @@ -53,7 +53,6 @@ static int huc_alloc_gsc_pkt(struct xe_huc *huc) struct xe_gt *gt = huc_to_gt(huc); struct xe_device *xe = gt_to_xe(gt); struct xe_bo *bo; - int err; /* we use a single object for both input and output */ bo = xe_bo_create_pin_map(xe, gt_to_tile(gt), NULL, @@ -66,13 +65,7 @@ static int huc_alloc_gsc_pkt(struct xe_huc *huc) huc->gsc_pkt = bo; - err = drmm_add_action_or_reset(&xe->drm, free_gsc_pkt, huc); - if (err) { - free_gsc_pkt(&xe->drm, huc); - return err; - } - - return 0; + return drmm_add_action_or_reset(&xe->drm, free_gsc_pkt, huc); } int xe_huc_init(struct xe_huc *huc) From e3e989522ac9a6b7960c75b762e1e9568717b31e Mon Sep 17 00:00:00 2001 From: Michal Wajdeczko Date: Fri, 19 Apr 2024 17:03:51 +0200 Subject: [PATCH 324/606] drm/xe/guc: Fix arguments passed to relay G2H handlers MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit By default CT code was passing just payload of the G2H event message, while Relay code expects full G2H message including HXG header which contains DATA0 field. Fix that. Fixes: 26d4481ac23f ("drm/xe/guc: Start handling GuC Relay event messages") Signed-off-by: Michal Wajdeczko Reviewed-by: Piotr Piórkowski Link: https://patchwork.freedesktop.org/patch/msgid/20240419150351.358-1-michal.wajdeczko@intel.com (cherry picked from commit 48c64d495fbef343c59598a793d583dfd199d389) Signed-off-by: Lucas De Marchi --- drivers/gpu/drm/xe/xe_guc_ct.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_guc_ct.c b/drivers/gpu/drm/xe/xe_guc_ct.c index 355edd4d758af..7f32547f94b26 100644 --- a/drivers/gpu/drm/xe/xe_guc_ct.c +++ b/drivers/gpu/drm/xe/xe_guc_ct.c @@ -1054,10 +1054,10 @@ static int process_g2h_msg(struct xe_guc_ct *ct, u32 *msg, u32 len) adj_len); break; case XE_GUC_ACTION_GUC2PF_RELAY_FROM_VF: - ret = xe_guc_relay_process_guc2pf(&guc->relay, payload, adj_len); + ret = xe_guc_relay_process_guc2pf(&guc->relay, hxg, hxg_len); break; case XE_GUC_ACTION_GUC2VF_RELAY_FROM_PF: - ret = xe_guc_relay_process_guc2vf(&guc->relay, payload, adj_len); + ret = xe_guc_relay_process_guc2vf(&guc->relay, hxg, hxg_len); break; default: drm_err(&xe->drm, "unexpected action 0x%04x\n", action); From 1c5a1627f48105cbab81d25ec2f72232bfaa8185 Mon Sep 17 00:00:00 2001 From: Chen Yu Date: Wed, 10 Apr 2024 18:23:01 +0800 Subject: [PATCH 325/606] efi/unaccepted: touch soft lockup during memory accept Commit 50e782a86c98 ("efi/unaccepted: Fix soft lockups caused by parallel memory acceptance") has released the spinlock so other CPUs can do memory acceptance in parallel and not triggers softlockup on other CPUs. However the softlock up was intermittent shown up if the memory of the TD guest is large, and the timeout of softlockup is set to 1 second: RIP: 0010:_raw_spin_unlock_irqrestore Call Trace: ? __hrtimer_run_queues ? hrtimer_interrupt ? watchdog_timer_fn ? __sysvec_apic_timer_interrupt ? __pfx_watchdog_timer_fn ? sysvec_apic_timer_interrupt ? __hrtimer_run_queues ? hrtimer_interrupt ? asm_sysvec_apic_timer_interrupt ? _raw_spin_unlock_irqrestore ? __sysvec_apic_timer_interrupt ? sysvec_apic_timer_interrupt accept_memory try_to_accept_memory do_huge_pmd_anonymous_page get_page_from_freelist __handle_mm_fault __alloc_pages __folio_alloc ? __tdx_hypercall handle_mm_fault vma_alloc_folio do_user_addr_fault do_huge_pmd_anonymous_page exc_page_fault ? __do_huge_pmd_anonymous_page asm_exc_page_fault __handle_mm_fault When the local irq is enabled at the end of accept_memory(), the softlockup detects that the watchdog on single CPU has not been fed for a while. That is to say, even other CPUs will not be blocked by spinlock, the current CPU might be stunk with local irq disabled for a while, which hurts not only nmi watchdog but also softlockup. Chao Gao pointed out that the memory accept could be time costly and there was similar report before. Thus to avoid any softlocup detection during this stage, give the softlockup a flag to skip the timeout check at the end of accept_memory(), by invoking touch_softlockup_watchdog(). Reported-by: Hossain, Md Iqbal Signed-off-by: Chen Yu Reviewed-by: Kirill A. Shutemov Fixes: 50e782a86c98 ("efi/unaccepted: Fix soft lockups caused by parallel memory acceptance") Signed-off-by: Ard Biesheuvel --- drivers/firmware/efi/unaccepted_memory.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/firmware/efi/unaccepted_memory.c b/drivers/firmware/efi/unaccepted_memory.c index 5b439d04079c8..50f6503fe49f5 100644 --- a/drivers/firmware/efi/unaccepted_memory.c +++ b/drivers/firmware/efi/unaccepted_memory.c @@ -4,6 +4,7 @@ #include #include #include +#include #include /* Protects unaccepted memory bitmap and accepting_list */ @@ -149,6 +150,9 @@ void accept_memory(phys_addr_t start, phys_addr_t end) } list_del(&range.list); + + touch_softlockup_watchdog(); + spin_unlock_irqrestore(&unaccepted_memory_lock, flags); } From 91f098704c25106d88706fc9f8bcfce01fdb97df Mon Sep 17 00:00:00 2001 From: Lai Jiangshan Date: Wed, 24 Apr 2024 21:51:54 +0800 Subject: [PATCH 326/606] workqueue: Fix divide error in wq_update_node_max_active() Yue Sun and xingwei lee reported a divide error bug in wq_update_node_max_active(): divide error: 0000 [#1] PREEMPT SMP KASAN PTI CPU: 1 PID: 21 Comm: cpuhp/1 Not tainted 6.9.0-rc5 #1 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.15.0-1 04/01/2014 RIP: 0010:wq_update_node_max_active+0x369/0x6b0 kernel/workqueue.c:1605 Code: 24 bf 00 00 00 80 44 89 fe e8 83 27 33 00 41 83 fc ff 75 0d 41 81 ff 00 00 00 80 0f 84 68 01 00 00 e8 fb 22 33 00 44 89 f8 99 <41> f7 fc 89 c5 89 c7 44 89 ee e8 a8 24 33 00 89 ef 8b 5c 24 04 89 RSP: 0018:ffffc9000018fbb0 EFLAGS: 00010293 RAX: 00000000000000ff RBX: 0000000000000001 RCX: ffff888100ada500 RDX: 0000000000000000 RSI: 00000000000000ff RDI: 0000000080000000 RBP: 0000000000000001 R08: ffffffff815b1fcd R09: 1ffff1100364ad72 R10: dffffc0000000000 R11: ffffed100364ad73 R12: 0000000000000000 R13: 0000000000000100 R14: 0000000000000000 R15: 00000000000000ff FS: 0000000000000000(0000) GS:ffff888135c00000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 00007fb8c06ca6f8 CR3: 000000010d6c6000 CR4: 0000000000750ef0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 PKRU: 55555554 Call Trace: workqueue_offline_cpu+0x56f/0x600 kernel/workqueue.c:6525 cpuhp_invoke_callback+0x4e1/0x870 kernel/cpu.c:194 cpuhp_thread_fun+0x411/0x7d0 kernel/cpu.c:1092 smpboot_thread_fn+0x544/0xa10 kernel/smpboot.c:164 kthread+0x2ed/0x390 kernel/kthread.c:388 ret_from_fork+0x4b/0x80 arch/x86/kernel/process.c:147 ret_from_fork_asm+0x11/0x20 arch/x86/entry/entry_64.S:244 Modules linked in: ---[ end trace 0000000000000000 ]--- After analysis, it happens when all of the CPUs in a workqueue's affinity get offine. The problem can be easily reproduced by: # echo 8 > /sys/devices/virtual/workqueue//cpumask # echo 0 > /sys/devices/system/cpu/cpu3/online Use the default max_actives for nodes when all of the CPUs in the workqueue's affinity get offline to fix the problem. Reported-by: Yue Sun Reported-by: xingwei lee Link: https://lore.kernel.org/lkml/CAEkJfYPGS1_4JqvpSo0=FM0S1ytB8CEbyreLTtWpR900dUZymw@mail.gmail.com/ Fixes: 5797b1c18919 ("workqueue: Implement system-wide nr_active enforcement for unbound workqueues") Cc: stable@vger.kernel.org Signed-off-by: Lai Jiangshan Signed-off-by: Tejun Heo --- kernel/workqueue.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/kernel/workqueue.c b/kernel/workqueue.c index 5f536c63a48d7..d2dbe099286b9 100644 --- a/kernel/workqueue.c +++ b/kernel/workqueue.c @@ -1598,6 +1598,15 @@ static void wq_update_node_max_active(struct workqueue_struct *wq, int off_cpu) if (off_cpu >= 0) total_cpus--; + /* If all CPUs of the wq get offline, use the default values */ + if (unlikely(!total_cpus)) { + for_each_node(node) + wq_node_nr_active(wq, node)->max = min_active; + + wq_node_nr_active(wq, NUMA_NO_NODE)->max = max_active; + return; + } + for_each_node(node) { int node_cpus; From 6ddb4f372fc63210034b903d96ebbeb3c7195adb Mon Sep 17 00:00:00 2001 From: Oliver Upton Date: Wed, 24 Apr 2024 17:39:58 +0000 Subject: [PATCH 327/606] KVM: arm64: vgic-v2: Check for non-NULL vCPU in vgic_v2_parse_attr() vgic_v2_parse_attr() is responsible for finding the vCPU that matches the user-provided CPUID, which (of course) may not be valid. If the ID is invalid, kvm_get_vcpu_by_id() returns NULL, which isn't handled gracefully. Similar to the GICv3 uaccess flow, check that kvm_get_vcpu_by_id() actually returns something and fail the ioctl if not. Cc: stable@vger.kernel.org Fixes: 7d450e282171 ("KVM: arm/arm64: vgic-new: Add userland access to VGIC dist registers") Reported-by: Alexander Potapenko Tested-by: Alexander Potapenko Reviewed-by: Alexander Potapenko Reviewed-by: Marc Zyngier Link: https://lore.kernel.org/r/20240424173959.3776798-2-oliver.upton@linux.dev Signed-off-by: Oliver Upton --- arch/arm64/kvm/vgic/vgic-kvm-device.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/arch/arm64/kvm/vgic/vgic-kvm-device.c b/arch/arm64/kvm/vgic/vgic-kvm-device.c index f48b8dab8b3d2..1d26bb5b02f4b 100644 --- a/arch/arm64/kvm/vgic/vgic-kvm-device.c +++ b/arch/arm64/kvm/vgic/vgic-kvm-device.c @@ -338,12 +338,12 @@ int kvm_register_vgic_device(unsigned long type) int vgic_v2_parse_attr(struct kvm_device *dev, struct kvm_device_attr *attr, struct vgic_reg_attr *reg_attr) { - int cpuid; + int cpuid = FIELD_GET(KVM_DEV_ARM_VGIC_CPUID_MASK, attr->attr); - cpuid = FIELD_GET(KVM_DEV_ARM_VGIC_CPUID_MASK, attr->attr); - - reg_attr->vcpu = kvm_get_vcpu_by_id(dev->kvm, cpuid); reg_attr->addr = attr->attr & KVM_DEV_ARM_VGIC_OFFSET_MASK; + reg_attr->vcpu = kvm_get_vcpu_by_id(dev->kvm, cpuid); + if (!reg_attr->vcpu) + return -EINVAL; return 0; } From 160933e330f4c5a13931d725a4d952a4b9aefa71 Mon Sep 17 00:00:00 2001 From: Oliver Upton Date: Wed, 24 Apr 2024 17:39:59 +0000 Subject: [PATCH 328/606] KVM: selftests: Add test for uaccesses to non-existent vgic-v2 CPUIF Assert that accesses to a non-existent vgic-v2 CPU interface consistently fail across the various KVM device attr ioctls. This also serves as a regression test for a bug wherein KVM hits a NULL dereference when the CPUID specified in the ioctl is invalid. Note that there is no need to print the observed errno, as TEST_ASSERT() will take care of it. Reviewed-by: Marc Zyngier Link: https://lore.kernel.org/r/20240424173959.3776798-3-oliver.upton@linux.dev Signed-off-by: Oliver Upton --- .../testing/selftests/kvm/aarch64/vgic_init.c | 49 +++++++++++++++++++ 1 file changed, 49 insertions(+) diff --git a/tools/testing/selftests/kvm/aarch64/vgic_init.c b/tools/testing/selftests/kvm/aarch64/vgic_init.c index eef816b80993f..ca917c71ff602 100644 --- a/tools/testing/selftests/kvm/aarch64/vgic_init.c +++ b/tools/testing/selftests/kvm/aarch64/vgic_init.c @@ -84,6 +84,18 @@ static struct vm_gic vm_gic_create_with_vcpus(uint32_t gic_dev_type, return v; } +static struct vm_gic vm_gic_create_barebones(uint32_t gic_dev_type) +{ + struct vm_gic v; + + v.gic_dev_type = gic_dev_type; + v.vm = vm_create_barebones(); + v.gic_fd = kvm_create_device(v.vm, gic_dev_type); + + return v; +} + + static void vm_gic_destroy(struct vm_gic *v) { close(v->gic_fd); @@ -357,6 +369,40 @@ static void test_vcpus_then_vgic(uint32_t gic_dev_type) vm_gic_destroy(&v); } +#define KVM_VGIC_V2_ATTR(offset, cpu) \ + (FIELD_PREP(KVM_DEV_ARM_VGIC_OFFSET_MASK, offset) | \ + FIELD_PREP(KVM_DEV_ARM_VGIC_CPUID_MASK, cpu)) + +#define GIC_CPU_CTRL 0x00 + +static void test_v2_uaccess_cpuif_no_vcpus(void) +{ + struct vm_gic v; + u64 val = 0; + int ret; + + v = vm_gic_create_barebones(KVM_DEV_TYPE_ARM_VGIC_V2); + subtest_dist_rdist(&v); + + ret = __kvm_has_device_attr(v.gic_fd, KVM_DEV_ARM_VGIC_GRP_CPU_REGS, + KVM_VGIC_V2_ATTR(GIC_CPU_CTRL, 0)); + TEST_ASSERT(ret && errno == EINVAL, + "accessed non-existent CPU interface, want errno: %i", + EINVAL); + ret = __kvm_device_attr_get(v.gic_fd, KVM_DEV_ARM_VGIC_GRP_CPU_REGS, + KVM_VGIC_V2_ATTR(GIC_CPU_CTRL, 0), &val); + TEST_ASSERT(ret && errno == EINVAL, + "accessed non-existent CPU interface, want errno: %i", + EINVAL); + ret = __kvm_device_attr_set(v.gic_fd, KVM_DEV_ARM_VGIC_GRP_CPU_REGS, + KVM_VGIC_V2_ATTR(GIC_CPU_CTRL, 0), &val); + TEST_ASSERT(ret && errno == EINVAL, + "accessed non-existent CPU interface, want errno: %i", + EINVAL); + + vm_gic_destroy(&v); +} + static void test_v3_new_redist_regions(void) { struct kvm_vcpu *vcpus[NR_VCPUS]; @@ -675,6 +721,9 @@ void run_tests(uint32_t gic_dev_type) test_vcpus_then_vgic(gic_dev_type); test_vgic_then_vcpus(gic_dev_type); + if (VGIC_DEV_IS_V2(gic_dev_type)) + test_v2_uaccess_cpuif_no_vcpus(); + if (VGIC_DEV_IS_V3(gic_dev_type)) { test_v3_new_redist_regions(); test_v3_typer_accesses(); From 9bf4e919ccad613b3596eebf1ff37b05b6405307 Mon Sep 17 00:00:00 2001 From: Nathan Chancellor Date: Mon, 1 Apr 2024 11:24:17 -0700 Subject: [PATCH 329/606] Bluetooth: Fix type of len in {l2cap,sco}_sock_getsockopt_old() After an innocuous optimization change in LLVM main (19.0.0), x86_64 allmodconfig (which enables CONFIG_KCSAN / -fsanitize=thread) fails to build due to the checks in check_copy_size(): In file included from net/bluetooth/sco.c:27: In file included from include/linux/module.h:13: In file included from include/linux/stat.h:19: In file included from include/linux/time.h:60: In file included from include/linux/time32.h:13: In file included from include/linux/timex.h:67: In file included from arch/x86/include/asm/timex.h:6: In file included from arch/x86/include/asm/tsc.h:10: In file included from arch/x86/include/asm/msr.h:15: In file included from include/linux/percpu.h:7: In file included from include/linux/smp.h:118: include/linux/thread_info.h:244:4: error: call to '__bad_copy_from' declared with 'error' attribute: copy source size is too small 244 | __bad_copy_from(); | ^ The same exact error occurs in l2cap_sock.c. The copy_to_user() statements that are failing come from l2cap_sock_getsockopt_old() and sco_sock_getsockopt_old(). This does not occur with GCC with or without KCSAN or Clang without KCSAN enabled. len is defined as an 'int' because it is assigned from '__user int *optlen'. However, it is clamped against the result of sizeof(), which has a type of 'size_t' ('unsigned long' for 64-bit platforms). This is done with min_t() because min() requires compatible types, which results in both len and the result of sizeof() being casted to 'unsigned int', meaning len changes signs and the result of sizeof() is truncated. From there, len is passed to copy_to_user(), which has a third parameter type of 'unsigned long', so it is widened and changes signs again. This excessive casting in combination with the KCSAN instrumentation causes LLVM to fail to eliminate the __bad_copy_from() call, failing the build. The official recommendation from LLVM developers is to consistently use long types for all size variables to avoid the unnecessary casting in the first place. Change the type of len to size_t in both l2cap_sock_getsockopt_old() and sco_sock_getsockopt_old(). This clears up the error while allowing min_t() to be replaced with min(), resulting in simpler code with no casts and fewer implicit conversions. While len is a different type than optlen now, it should result in no functional change because the result of sizeof() will clamp all values of optlen in the same manner as before. Cc: stable@vger.kernel.org Closes: https://github.com/ClangBuiltLinux/linux/issues/2007 Link: https://github.com/llvm/llvm-project/issues/85647 Signed-off-by: Nathan Chancellor Reviewed-by: Justin Stitt Signed-off-by: Luiz Augusto von Dentz --- net/bluetooth/l2cap_sock.c | 7 ++++--- net/bluetooth/sco.c | 7 ++++--- 2 files changed, 8 insertions(+), 6 deletions(-) diff --git a/net/bluetooth/l2cap_sock.c b/net/bluetooth/l2cap_sock.c index e7d810b23082f..5cc83f906c123 100644 --- a/net/bluetooth/l2cap_sock.c +++ b/net/bluetooth/l2cap_sock.c @@ -439,7 +439,8 @@ static int l2cap_sock_getsockopt_old(struct socket *sock, int optname, struct l2cap_chan *chan = l2cap_pi(sk)->chan; struct l2cap_options opts; struct l2cap_conninfo cinfo; - int len, err = 0; + int err = 0; + size_t len; u32 opt; BT_DBG("sk %p", sk); @@ -486,7 +487,7 @@ static int l2cap_sock_getsockopt_old(struct socket *sock, int optname, BT_DBG("mode 0x%2.2x", chan->mode); - len = min_t(unsigned int, len, sizeof(opts)); + len = min(len, sizeof(opts)); if (copy_to_user(optval, (char *) &opts, len)) err = -EFAULT; @@ -536,7 +537,7 @@ static int l2cap_sock_getsockopt_old(struct socket *sock, int optname, cinfo.hci_handle = chan->conn->hcon->handle; memcpy(cinfo.dev_class, chan->conn->hcon->dev_class, 3); - len = min_t(unsigned int, len, sizeof(cinfo)); + len = min(len, sizeof(cinfo)); if (copy_to_user(optval, (char *) &cinfo, len)) err = -EFAULT; diff --git a/net/bluetooth/sco.c b/net/bluetooth/sco.c index 368e026f4d15c..5d03c5440b06f 100644 --- a/net/bluetooth/sco.c +++ b/net/bluetooth/sco.c @@ -964,7 +964,8 @@ static int sco_sock_getsockopt_old(struct socket *sock, int optname, struct sock *sk = sock->sk; struct sco_options opts; struct sco_conninfo cinfo; - int len, err = 0; + int err = 0; + size_t len; BT_DBG("sk %p", sk); @@ -986,7 +987,7 @@ static int sco_sock_getsockopt_old(struct socket *sock, int optname, BT_DBG("mtu %u", opts.mtu); - len = min_t(unsigned int, len, sizeof(opts)); + len = min(len, sizeof(opts)); if (copy_to_user(optval, (char *)&opts, len)) err = -EFAULT; @@ -1004,7 +1005,7 @@ static int sco_sock_getsockopt_old(struct socket *sock, int optname, cinfo.hci_handle = sco_pi(sk)->conn->hcon->handle; memcpy(cinfo.dev_class, sco_pi(sk)->conn->hcon->dev_class, 3); - len = min_t(unsigned int, len, sizeof(cinfo)); + len = min(len, sizeof(cinfo)); if (copy_to_user(optval, (char *)&cinfo, len)) err = -EFAULT; From 2e7ed5f5e69b6fe93dd3c6b651d041e0a7a456d1 Mon Sep 17 00:00:00 2001 From: Luiz Augusto von Dentz Date: Fri, 5 Apr 2024 16:40:33 -0400 Subject: [PATCH 330/606] Bluetooth: hci_sync: Use advertised PHYs on hci_le_ext_create_conn_sync The extended advertising reports do report the PHYs so this store then in hci_conn so it can be later used in hci_le_ext_create_conn_sync to narrow the PHYs to be scanned since the controller will also perform a scan having a smaller set of PHYs shall reduce the time it takes to find and connect peers. Fixes: 288c90224eec ("Bluetooth: Enable all supported LE PHY by default") Signed-off-by: Luiz Augusto von Dentz --- include/net/bluetooth/hci_core.h | 4 +++- net/bluetooth/hci_conn.c | 6 ++++-- net/bluetooth/hci_event.c | 20 ++++++++++++-------- net/bluetooth/hci_sync.c | 9 ++++++--- net/bluetooth/l2cap_core.c | 2 +- 5 files changed, 26 insertions(+), 15 deletions(-) diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h index 56fb42df44a33..02af7d7013dae 100644 --- a/include/net/bluetooth/hci_core.h +++ b/include/net/bluetooth/hci_core.h @@ -738,6 +738,8 @@ struct hci_conn { __u8 le_per_adv_data[HCI_MAX_PER_AD_TOT_LEN]; __u16 le_per_adv_data_len; __u16 le_per_adv_data_offset; + __u8 le_adv_phy; + __u8 le_adv_sec_phy; __u8 le_tx_phy; __u8 le_rx_phy; __s8 rssi; @@ -1512,7 +1514,7 @@ struct hci_conn *hci_connect_le_scan(struct hci_dev *hdev, bdaddr_t *dst, enum conn_reasons conn_reason); struct hci_conn *hci_connect_le(struct hci_dev *hdev, bdaddr_t *dst, u8 dst_type, bool dst_resolved, u8 sec_level, - u16 conn_timeout, u8 role); + u16 conn_timeout, u8 role, u8 phy, u8 sec_phy); void hci_connect_le_scan_cleanup(struct hci_conn *conn, u8 status); struct hci_conn *hci_connect_acl(struct hci_dev *hdev, bdaddr_t *dst, u8 sec_level, u8 auth_type, diff --git a/net/bluetooth/hci_conn.c b/net/bluetooth/hci_conn.c index 3ad74f76983b2..05346250f7195 100644 --- a/net/bluetooth/hci_conn.c +++ b/net/bluetooth/hci_conn.c @@ -1263,7 +1263,7 @@ u8 hci_conn_set_handle(struct hci_conn *conn, u16 handle) struct hci_conn *hci_connect_le(struct hci_dev *hdev, bdaddr_t *dst, u8 dst_type, bool dst_resolved, u8 sec_level, - u16 conn_timeout, u8 role) + u16 conn_timeout, u8 role, u8 phy, u8 sec_phy) { struct hci_conn *conn; struct smp_irk *irk; @@ -1326,6 +1326,8 @@ struct hci_conn *hci_connect_le(struct hci_dev *hdev, bdaddr_t *dst, conn->dst_type = dst_type; conn->sec_level = BT_SECURITY_LOW; conn->conn_timeout = conn_timeout; + conn->le_adv_phy = phy; + conn->le_adv_sec_phy = sec_phy; err = hci_connect_le_sync(hdev, conn); if (err) { @@ -2273,7 +2275,7 @@ struct hci_conn *hci_connect_cis(struct hci_dev *hdev, bdaddr_t *dst, le = hci_connect_le(hdev, dst, dst_type, false, BT_SECURITY_LOW, HCI_LE_CONN_TIMEOUT, - HCI_ROLE_SLAVE); + HCI_ROLE_SLAVE, 0, 0); else le = hci_connect_le_scan(hdev, dst, dst_type, BT_SECURITY_LOW, diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c index a8b8cfebe0180..4d70402e295f2 100644 --- a/net/bluetooth/hci_event.c +++ b/net/bluetooth/hci_event.c @@ -6038,7 +6038,7 @@ static void hci_le_conn_update_complete_evt(struct hci_dev *hdev, void *data, static struct hci_conn *check_pending_le_conn(struct hci_dev *hdev, bdaddr_t *addr, u8 addr_type, bool addr_resolved, - u8 adv_type) + u8 adv_type, u8 phy, u8 sec_phy) { struct hci_conn *conn; struct hci_conn_params *params; @@ -6093,7 +6093,7 @@ static struct hci_conn *check_pending_le_conn(struct hci_dev *hdev, conn = hci_connect_le(hdev, addr, addr_type, addr_resolved, BT_SECURITY_LOW, hdev->def_le_autoconnect_timeout, - HCI_ROLE_MASTER); + HCI_ROLE_MASTER, phy, sec_phy); if (!IS_ERR(conn)) { /* If HCI_AUTO_CONN_EXPLICIT is set, conn is already owned * by higher layer that tried to connect, if no then @@ -6128,8 +6128,9 @@ static struct hci_conn *check_pending_le_conn(struct hci_dev *hdev, static void process_adv_report(struct hci_dev *hdev, u8 type, bdaddr_t *bdaddr, u8 bdaddr_type, bdaddr_t *direct_addr, - u8 direct_addr_type, s8 rssi, u8 *data, u8 len, - bool ext_adv, bool ctl_time, u64 instant) + u8 direct_addr_type, u8 phy, u8 sec_phy, s8 rssi, + u8 *data, u8 len, bool ext_adv, bool ctl_time, + u64 instant) { struct discovery_state *d = &hdev->discovery; struct smp_irk *irk; @@ -6217,7 +6218,7 @@ static void process_adv_report(struct hci_dev *hdev, u8 type, bdaddr_t *bdaddr, * for advertising reports) and is already verified to be RPA above. */ conn = check_pending_le_conn(hdev, bdaddr, bdaddr_type, bdaddr_resolved, - type); + type, phy, sec_phy); if (!ext_adv && conn && type == LE_ADV_IND && len <= max_adv_len(hdev)) { /* Store report for later inclusion by @@ -6363,7 +6364,8 @@ static void hci_le_adv_report_evt(struct hci_dev *hdev, void *data, if (info->length <= max_adv_len(hdev)) { rssi = info->data[info->length]; process_adv_report(hdev, info->type, &info->bdaddr, - info->bdaddr_type, NULL, 0, rssi, + info->bdaddr_type, NULL, 0, + HCI_ADV_PHY_1M, 0, rssi, info->data, info->length, false, false, instant); } else { @@ -6448,6 +6450,8 @@ static void hci_le_ext_adv_report_evt(struct hci_dev *hdev, void *data, if (legacy_evt_type != LE_ADV_INVALID) { process_adv_report(hdev, legacy_evt_type, &info->bdaddr, info->bdaddr_type, NULL, 0, + info->primary_phy, + info->secondary_phy, info->rssi, info->data, info->length, !(evt_type & LE_EXT_ADV_LEGACY_PDU), false, instant); @@ -6730,8 +6734,8 @@ static void hci_le_direct_adv_report_evt(struct hci_dev *hdev, void *data, process_adv_report(hdev, info->type, &info->bdaddr, info->bdaddr_type, &info->direct_addr, - info->direct_addr_type, info->rssi, NULL, 0, - false, false, instant); + info->direct_addr_type, HCI_ADV_PHY_1M, 0, + info->rssi, NULL, 0, false, false, instant); } hci_dev_unlock(hdev); diff --git a/net/bluetooth/hci_sync.c b/net/bluetooth/hci_sync.c index c5d8799046ccf..4c707eb64e6f6 100644 --- a/net/bluetooth/hci_sync.c +++ b/net/bluetooth/hci_sync.c @@ -6346,7 +6346,8 @@ static int hci_le_ext_create_conn_sync(struct hci_dev *hdev, plen = sizeof(*cp); - if (scan_1m(hdev)) { + if (scan_1m(hdev) && (conn->le_adv_phy == HCI_ADV_PHY_1M || + conn->le_adv_sec_phy == HCI_ADV_PHY_1M)) { cp->phys |= LE_SCAN_PHY_1M; set_ext_conn_params(conn, p); @@ -6354,7 +6355,8 @@ static int hci_le_ext_create_conn_sync(struct hci_dev *hdev, plen += sizeof(*p); } - if (scan_2m(hdev)) { + if (scan_2m(hdev) && (conn->le_adv_phy == HCI_ADV_PHY_2M || + conn->le_adv_sec_phy == HCI_ADV_PHY_2M)) { cp->phys |= LE_SCAN_PHY_2M; set_ext_conn_params(conn, p); @@ -6362,7 +6364,8 @@ static int hci_le_ext_create_conn_sync(struct hci_dev *hdev, plen += sizeof(*p); } - if (scan_coded(hdev)) { + if (scan_coded(hdev) && (conn->le_adv_phy == HCI_ADV_PHY_CODED || + conn->le_adv_sec_phy == HCI_ADV_PHY_CODED)) { cp->phys |= LE_SCAN_PHY_CODED; set_ext_conn_params(conn, p); diff --git a/net/bluetooth/l2cap_core.c b/net/bluetooth/l2cap_core.c index dc08974087936..84fc70862d78a 100644 --- a/net/bluetooth/l2cap_core.c +++ b/net/bluetooth/l2cap_core.c @@ -7018,7 +7018,7 @@ int l2cap_chan_connect(struct l2cap_chan *chan, __le16 psm, u16 cid, if (hci_dev_test_flag(hdev, HCI_ADVERTISING)) hcon = hci_connect_le(hdev, dst, dst_type, false, chan->sec_level, timeout, - HCI_ROLE_SLAVE); + HCI_ROLE_SLAVE, 0, 0); else hcon = hci_connect_le_scan(hdev, dst, dst_type, chan->sec_level, timeout, From d1a5a7eede2977da3d2002d5ea3b519019cc1a98 Mon Sep 17 00:00:00 2001 From: WangYuli Date: Fri, 29 Mar 2024 10:34:39 +0800 Subject: [PATCH 331/606] Bluetooth: btusb: Add Realtek RTL8852BE support ID 0x0bda:0x4853 Add the support ID(0x0bda, 0x4853) to usb_device_id table for Realtek RTL8852BE. Without this change the device utilizes an obsolete version of the firmware that is encoded in it rather than the updated Realtek firmware and config files from the firmware directory. The latter files implement many new features. The device table is as follows: T: Bus=03 Lev=01 Prnt=01 Port=09 Cnt=03 Dev#= 4 Spd=12 MxCh= 0 D: Ver= 1.00 Cls=e0(wlcon) Sub=01 Prot=01 MxPS=64 #Cfgs= 1 P: Vendor=0bda ProdID=4853 Rev= 0.00 S: Manufacturer=Realtek S: Product=Bluetooth Radio S: SerialNumber=00e04c000001 C:* #Ifs= 2 Cfg#= 1 Atr=e0 MxPwr=500mA I:* If#= 0 Alt= 0 #EPs= 3 Cls=e0(wlcon) Sub=01 Prot=01 Driver=btusb E: Ad=81(I) Atr=03(Int.) MxPS= 16 Ivl=1ms E: Ad=02(O) Atr=02(Bulk) MxPS= 64 Ivl=0ms E: Ad=82(I) Atr=02(Bulk) MxPS= 64 Ivl=0ms I:* If#= 1 Alt= 0 #EPs= 2 Cls=e0(wlcon) Sub=01 Prot=01 Driver=btusb E: Ad=03(O) Atr=01(Isoc) MxPS= 0 Ivl=1ms E: Ad=83(I) Atr=01(Isoc) MxPS= 0 Ivl=1ms I: If#= 1 Alt= 1 #EPs= 2 Cls=e0(wlcon) Sub=01 Prot=01 Driver=btusb E: Ad=03(O) Atr=01(Isoc) MxPS= 9 Ivl=1ms E: Ad=83(I) Atr=01(Isoc) MxPS= 9 Ivl=1ms I: If#= 1 Alt= 2 #EPs= 2 Cls=e0(wlcon) Sub=01 Prot=01 Driver=btusb E: Ad=03(O) Atr=01(Isoc) MxPS= 17 Ivl=1ms E: Ad=83(I) Atr=01(Isoc) MxPS= 17 Ivl=1ms I: If#= 1 Alt= 3 #EPs= 2 Cls=e0(wlcon) Sub=01 Prot=01 Driver=btusb E: Ad=03(O) Atr=01(Isoc) MxPS= 25 Ivl=1ms E: Ad=83(I) Atr=01(Isoc) MxPS= 25 Ivl=1ms I: If#= 1 Alt= 4 #EPs= 2 Cls=e0(wlcon) Sub=01 Prot=01 Driver=btusb E: Ad=03(O) Atr=01(Isoc) MxPS= 33 Ivl=1ms E: Ad=83(I) Atr=01(Isoc) MxPS= 33 Ivl=1ms I: If#= 1 Alt= 5 #EPs= 2 Cls=e0(wlcon) Sub=01 Prot=01 Driver=btusb E: Ad=03(O) Atr=01(Isoc) MxPS= 49 Ivl=1ms E: Ad=83(I) Atr=01(Isoc) MxPS= 49 Ivl=1ms Cc: stable@vger.kernel.org Signed-off-by: Larry Finger Signed-off-by: WangYuli Signed-off-by: Luiz Augusto von Dentz --- drivers/bluetooth/btusb.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/bluetooth/btusb.c b/drivers/bluetooth/btusb.c index 06e915b57283f..d9c621d15fee0 100644 --- a/drivers/bluetooth/btusb.c +++ b/drivers/bluetooth/btusb.c @@ -542,6 +542,8 @@ static const struct usb_device_id quirks_table[] = { /* Realtek 8852BE Bluetooth devices */ { USB_DEVICE(0x0cb8, 0xc559), .driver_info = BTUSB_REALTEK | BTUSB_WIDEBAND_SPEECH }, + { USB_DEVICE(0x0bda, 0x4853), .driver_info = BTUSB_REALTEK | + BTUSB_WIDEBAND_SPEECH }, { USB_DEVICE(0x0bda, 0x887b), .driver_info = BTUSB_REALTEK | BTUSB_WIDEBAND_SPEECH }, { USB_DEVICE(0x0bda, 0xb85b), .driver_info = BTUSB_REALTEK | From b23d98d46d2858dcc0fd016caff165cbdc24e70a Mon Sep 17 00:00:00 2001 From: Zijun Hu Date: Mon, 25 Mar 2024 16:11:49 +0800 Subject: [PATCH 332/606] Bluetooth: btusb: Fix triggering coredump implementation for QCA btusb_coredump_qca() uses __hci_cmd_sync() to send a vendor-specific command to trigger firmware coredump, but the command does not have any event as its sync response, so it is not suitable to use __hci_cmd_sync(), fixed by using __hci_cmd_send(). Fixes: 20981ce2d5a5 ("Bluetooth: btusb: Add WCN6855 devcoredump support") Signed-off-by: Zijun Hu Signed-off-by: Luiz Augusto von Dentz --- drivers/bluetooth/btusb.c | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/drivers/bluetooth/btusb.c b/drivers/bluetooth/btusb.c index d9c621d15fee0..e3946f7b736e3 100644 --- a/drivers/bluetooth/btusb.c +++ b/drivers/bluetooth/btusb.c @@ -3482,13 +3482,12 @@ static void btusb_dump_hdr_qca(struct hci_dev *hdev, struct sk_buff *skb) static void btusb_coredump_qca(struct hci_dev *hdev) { + int err; static const u8 param[] = { 0x26 }; - struct sk_buff *skb; - skb = __hci_cmd_sync(hdev, 0xfc0c, 1, param, HCI_CMD_TIMEOUT); - if (IS_ERR(skb)) - bt_dev_err(hdev, "%s: triggle crash failed (%ld)", __func__, PTR_ERR(skb)); - kfree_skb(skb); + err = __hci_cmd_send(hdev, 0xfc0c, 1, param); + if (err < 0) + bt_dev_err(hdev, "%s: triggle crash failed (%d)", __func__, err); } /* From a9a830a676a9a93c5020f5c61236166931fa4266 Mon Sep 17 00:00:00 2001 From: Luiz Augusto von Dentz Date: Mon, 15 Apr 2024 13:41:01 -0400 Subject: [PATCH 333/606] Bluetooth: hci_event: Fix sending HCI_OP_READ_ENC_KEY_SIZE The code shall always check if HCI_QUIRK_BROKEN_READ_ENC_KEY_SIZE has been set before attempting to use HCI_OP_READ_ENC_KEY_SIZE. Fixes: c569242cd492 ("Bluetooth: hci_event: set the conn encrypted before conn establishes") Signed-off-by: Luiz Augusto von Dentz --- include/net/bluetooth/hci_core.h | 4 ++++ net/bluetooth/hci_event.c | 5 ++--- 2 files changed, 6 insertions(+), 3 deletions(-) diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h index 02af7d7013dae..e8f581f3f3ce6 100644 --- a/include/net/bluetooth/hci_core.h +++ b/include/net/bluetooth/hci_core.h @@ -1907,6 +1907,10 @@ void hci_conn_del_sysfs(struct hci_conn *conn); #define privacy_mode_capable(dev) (use_ll_privacy(dev) && \ (hdev->commands[39] & 0x04)) +#define read_key_size_capable(dev) \ + ((dev)->commands[20] & 0x10 && \ + !test_bit(HCI_QUIRK_BROKEN_READ_ENC_KEY_SIZE, &hdev->quirks)) + /* Use enhanced synchronous connection if command is supported and its quirk * has not been set. */ diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c index 4d70402e295f2..4a27e4a17a674 100644 --- a/net/bluetooth/hci_event.c +++ b/net/bluetooth/hci_event.c @@ -3218,7 +3218,7 @@ static void hci_conn_complete_evt(struct hci_dev *hdev, void *data, if (key) { set_bit(HCI_CONN_ENCRYPT, &conn->flags); - if (!(hdev->commands[20] & 0x10)) { + if (!read_key_size_capable(hdev)) { conn->enc_key_size = HCI_LINK_KEY_SIZE; } else { cp.handle = cpu_to_le16(conn->handle); @@ -3666,8 +3666,7 @@ static void hci_encrypt_change_evt(struct hci_dev *hdev, void *data, * controller really supports it. If it doesn't, assume * the default size (16). */ - if (!(hdev->commands[20] & 0x10) || - test_bit(HCI_QUIRK_BROKEN_READ_ENC_KEY_SIZE, &hdev->quirks)) { + if (!read_key_size_capable(hdev)) { conn->enc_key_size = HCI_LINK_KEY_SIZE; goto notify; } From 32868e126c78876a8a5ddfcb6ac8cb2fffcf4d27 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Tue, 16 Apr 2024 11:15:09 +0200 Subject: [PATCH 334/606] Bluetooth: qca: fix invalid device address check Qualcomm Bluetooth controllers may not have been provisioned with a valid device address and instead end up using the default address 00:00:00:00:5a:ad. This was previously believed to be due to lack of persistent storage for the address but it may also be due to integrators opting to not use the on-chip OTP memory and instead store the address elsewhere (e.g. in storage managed by secure world firmware). According to Qualcomm, at least WCN6750, WCN6855 and WCN7850 have on-chip OTP storage for the address. As the device type alone cannot be used to determine when the address is valid, instead read back the address during setup() and only set the HCI_QUIRK_USE_BDADDR_PROPERTY flag when needed. This specifically makes sure that controllers that have been provisioned with an address do not start as unconfigured. Reported-by: Janaki Ramaiah Thota Link: https://lore.kernel.org/r/124a7d54-5a18-4be7-9a76-a12017f6cce5@quicinc.com/ Fixes: 5971752de44c ("Bluetooth: hci_qca: Set HCI_QUIRK_USE_BDADDR_PROPERTY for wcn3990") Fixes: e668eb1e1578 ("Bluetooth: hci_core: Don't stop BT if the BD address missing in dts") Fixes: 6945795bc81a ("Bluetooth: fix use-bdaddr-property quirk") Cc: stable@vger.kernel.org # 6.5 Cc: Matthias Kaehlcke Signed-off-by: Johan Hovold Reported-by: Janaki Ramaiah Thota Signed-off-by: Luiz Augusto von Dentz --- drivers/bluetooth/btqca.c | 38 +++++++++++++++++++++++++++++++++++++ drivers/bluetooth/hci_qca.c | 2 -- 2 files changed, 38 insertions(+), 2 deletions(-) diff --git a/drivers/bluetooth/btqca.c b/drivers/bluetooth/btqca.c index 19cfc342fc7bb..216826c31ee34 100644 --- a/drivers/bluetooth/btqca.c +++ b/drivers/bluetooth/btqca.c @@ -15,6 +15,8 @@ #define VERSION "0.1" +#define QCA_BDADDR_DEFAULT (&(bdaddr_t) {{ 0xad, 0x5a, 0x00, 0x00, 0x00, 0x00 }}) + int qca_read_soc_version(struct hci_dev *hdev, struct qca_btsoc_version *ver, enum qca_btsoc_type soc_type) { @@ -612,6 +614,38 @@ int qca_set_bdaddr_rome(struct hci_dev *hdev, const bdaddr_t *bdaddr) } EXPORT_SYMBOL_GPL(qca_set_bdaddr_rome); +static int qca_check_bdaddr(struct hci_dev *hdev) +{ + struct hci_rp_read_bd_addr *bda; + struct sk_buff *skb; + int err; + + if (bacmp(&hdev->public_addr, BDADDR_ANY)) + return 0; + + skb = __hci_cmd_sync(hdev, HCI_OP_READ_BD_ADDR, 0, NULL, + HCI_INIT_TIMEOUT); + if (IS_ERR(skb)) { + err = PTR_ERR(skb); + bt_dev_err(hdev, "Failed to read device address (%d)", err); + return err; + } + + if (skb->len != sizeof(*bda)) { + bt_dev_err(hdev, "Device address length mismatch"); + kfree_skb(skb); + return -EIO; + } + + bda = (struct hci_rp_read_bd_addr *)skb->data; + if (!bacmp(&bda->bdaddr, QCA_BDADDR_DEFAULT)) + set_bit(HCI_QUIRK_USE_BDADDR_PROPERTY, &hdev->quirks); + + kfree_skb(skb); + + return 0; +} + static void qca_generate_hsp_nvm_name(char *fwname, size_t max_size, struct qca_btsoc_version ver, u8 rom_ver, u16 bid) { @@ -818,6 +852,10 @@ int qca_uart_setup(struct hci_dev *hdev, uint8_t baudrate, break; } + err = qca_check_bdaddr(hdev); + if (err) + return err; + bt_dev_info(hdev, "QCA setup on UART is completed"); return 0; diff --git a/drivers/bluetooth/hci_qca.c b/drivers/bluetooth/hci_qca.c index ecbc52eaf1010..92fa20f5ac7d0 100644 --- a/drivers/bluetooth/hci_qca.c +++ b/drivers/bluetooth/hci_qca.c @@ -1905,8 +1905,6 @@ static int qca_setup(struct hci_uart *hu) case QCA_WCN6750: case QCA_WCN6855: case QCA_WCN7850: - set_bit(HCI_QUIRK_USE_BDADDR_PROPERTY, &hdev->quirks); - qcadev = serdev_device_get_drvdata(hu->serdev); if (qcadev->bdaddr_property_broken) set_bit(HCI_QUIRK_BDADDR_PROPERTY_BROKEN, &hdev->quirks); From 6eb5fcc416f127f220b9177a5c9ae751cac1cda8 Mon Sep 17 00:00:00 2001 From: Luiz Augusto von Dentz Date: Tue, 16 Apr 2024 15:34:45 -0400 Subject: [PATCH 335/606] Bluetooth: MGMT: Fix failing to MGMT_OP_ADD_UUID/MGMT_OP_REMOVE_UUID These commands don't require the adapter to be up and running so don't use hci_cmd_sync_queue which would check that flag, instead use hci_cmd_sync_submit which would ensure mgmt_class_complete is set properly regardless if any command was actually run or not. Link: https://github.com/bluez/bluez/issues/809 Fixes: d883a4669a1d ("Bluetooth: hci_sync: Only allow hci_cmd_sync_queue if running") Signed-off-by: Luiz Augusto von Dentz --- net/bluetooth/mgmt.c | 20 +++++++++++++++----- 1 file changed, 15 insertions(+), 5 deletions(-) diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c index 32ed6e9245a30..657abd9bcc879 100644 --- a/net/bluetooth/mgmt.c +++ b/net/bluetooth/mgmt.c @@ -2623,7 +2623,11 @@ static int add_uuid(struct sock *sk, struct hci_dev *hdev, void *data, u16 len) goto failed; } - err = hci_cmd_sync_queue(hdev, add_uuid_sync, cmd, mgmt_class_complete); + /* MGMT_OP_ADD_UUID don't require adapter the UP/Running so use + * hci_cmd_sync_submit instead of hci_cmd_sync_queue. + */ + err = hci_cmd_sync_submit(hdev, add_uuid_sync, cmd, + mgmt_class_complete); if (err < 0) { mgmt_pending_free(cmd); goto failed; @@ -2717,8 +2721,11 @@ static int remove_uuid(struct sock *sk, struct hci_dev *hdev, void *data, goto unlock; } - err = hci_cmd_sync_queue(hdev, remove_uuid_sync, cmd, - mgmt_class_complete); + /* MGMT_OP_REMOVE_UUID don't require adapter the UP/Running so use + * hci_cmd_sync_submit instead of hci_cmd_sync_queue. + */ + err = hci_cmd_sync_submit(hdev, remove_uuid_sync, cmd, + mgmt_class_complete); if (err < 0) mgmt_pending_free(cmd); @@ -2784,8 +2791,11 @@ static int set_dev_class(struct sock *sk, struct hci_dev *hdev, void *data, goto unlock; } - err = hci_cmd_sync_queue(hdev, set_class_sync, cmd, - mgmt_class_complete); + /* MGMT_OP_SET_DEV_CLASS don't require adapter the UP/Running so use + * hci_cmd_sync_submit instead of hci_cmd_sync_queue. + */ + err = hci_cmd_sync_submit(hdev, set_class_sync, cmd, + mgmt_class_complete); if (err < 0) mgmt_pending_free(cmd); From 18bdb386a1a30e7a3d7732a98e45e69cf6b5710d Mon Sep 17 00:00:00 2001 From: Sean Wang Date: Wed, 17 Apr 2024 16:27:38 -0700 Subject: [PATCH 336/606] Bluetooth: btusb: mediatek: Fix double free of skb in coredump hci_devcd_append() would free the skb on error so the caller don't have to free it again otherwise it would cause the double free of skb. Fixes: 0b7015132878 ("Bluetooth: btusb: mediatek: add MediaTek devcoredump support") Reported-by : Dan Carpenter Signed-off-by: Sean Wang Signed-off-by: Luiz Augusto von Dentz --- drivers/bluetooth/btmtk.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/drivers/bluetooth/btmtk.c b/drivers/bluetooth/btmtk.c index ac8ebccd35075..812fd2a8f853e 100644 --- a/drivers/bluetooth/btmtk.c +++ b/drivers/bluetooth/btmtk.c @@ -380,8 +380,10 @@ int btmtk_process_coredump(struct hci_dev *hdev, struct sk_buff *skb) switch (data->cd_info.state) { case HCI_DEVCOREDUMP_IDLE: err = hci_devcd_init(hdev, MTK_COREDUMP_SIZE); - if (err < 0) + if (err < 0) { + kfree_skb(skb); break; + } data->cd_info.cnt = 0; /* It is supposed coredump can be done within 5 seconds */ @@ -407,9 +409,6 @@ int btmtk_process_coredump(struct hci_dev *hdev, struct sk_buff *skb) break; } - if (err < 0) - kfree_skb(skb); - return err; } EXPORT_SYMBOL_GPL(btmtk_process_coredump); From 73e87c0a49fda31d7b589edccf4c72e924411371 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Mon, 22 Apr 2024 15:57:47 +0200 Subject: [PATCH 337/606] Bluetooth: qca: fix NULL-deref on non-serdev suspend Qualcomm ROME controllers can be registered from the Bluetooth line discipline and in this case the HCI UART serdev pointer is NULL. Add the missing sanity check to prevent a NULL-pointer dereference when wakeup() is called for a non-serdev controller during suspend. Just return true for now to restore the original behaviour and address the crash with pre-6.2 kernels, which do not have commit e9b3e5b8c657 ("Bluetooth: hci_qca: only assign wakeup with serial port support") that causes the crash to happen already at setup() time. Fixes: c1a74160eaf1 ("Bluetooth: hci_qca: Add device_may_wakeup support") Cc: stable@vger.kernel.org # 5.13 Signed-off-by: Johan Hovold Signed-off-by: Luiz Augusto von Dentz --- drivers/bluetooth/hci_qca.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/bluetooth/hci_qca.c b/drivers/bluetooth/hci_qca.c index 92fa20f5ac7d0..94c85f4fbf3bf 100644 --- a/drivers/bluetooth/hci_qca.c +++ b/drivers/bluetooth/hci_qca.c @@ -1672,6 +1672,9 @@ static bool qca_wakeup(struct hci_dev *hdev) struct hci_uart *hu = hci_get_drvdata(hdev); bool wakeup; + if (!hu->serdev) + return true; + /* BT SoC attached through the serial bus is handled by the serdev driver. * So we need to use the device handle of the serdev driver to get the * status of device may wakeup. From 7ddb9de6af0f1c71147785b12fd7c8ec3f06cc86 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Mon, 22 Apr 2024 15:57:48 +0200 Subject: [PATCH 338/606] Bluetooth: qca: fix NULL-deref on non-serdev setup Qualcomm ROME controllers can be registered from the Bluetooth line discipline and in this case the HCI UART serdev pointer is NULL. Add the missing sanity check to prevent a NULL-pointer dereference when setup() is called for a non-serdev controller. Fixes: e9b3e5b8c657 ("Bluetooth: hci_qca: only assign wakeup with serial port support") Cc: stable@vger.kernel.org # 6.2 Cc: Zhengping Jiang Signed-off-by: Johan Hovold Signed-off-by: Luiz Augusto von Dentz --- drivers/bluetooth/hci_qca.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/bluetooth/hci_qca.c b/drivers/bluetooth/hci_qca.c index 94c85f4fbf3bf..b621a0a40ea48 100644 --- a/drivers/bluetooth/hci_qca.c +++ b/drivers/bluetooth/hci_qca.c @@ -1958,8 +1958,10 @@ static int qca_setup(struct hci_uart *hu) qca_debugfs_init(hdev); hu->hdev->hw_error = qca_hw_error; hu->hdev->cmd_timeout = qca_cmd_timeout; - if (device_can_wakeup(hu->serdev->ctrl->dev.parent)) - hu->hdev->wakeup = qca_wakeup; + if (hu->serdev) { + if (device_can_wakeup(hu->serdev->ctrl->dev.parent)) + hu->hdev->wakeup = qca_wakeup; + } } else if (ret == -ENOENT) { /* No patch/nvm-config found, run with original fw/config */ set_bit(QCA_ROM_FW, &qca->flags); From 88cd6e6b2d327faa13e4505b07f1e380e51b21ff Mon Sep 17 00:00:00 2001 From: Chun-Yi Lee Date: Wed, 24 Apr 2024 21:59:03 +0800 Subject: [PATCH 339/606] Bluetooth: hci_sync: Using hci_cmd_sync_submit when removing Adv Monitor Since the d883a4669a1de be introduced in v6.4, bluetooth daemon got the following failed message of MGMT_OP_REMOVE_ADV_MONITOR command when controller is power-off: bluetoothd[20976]: src/adapter.c:reset_adv_monitors_complete() Failed to reset Adv Monitors: Failed> Normally this situation is happened when the bluetoothd deamon be started manually after system booting. Which means that bluetoothd received MGMT_EV_INDEX_ADDED event after kernel runs hci_power_off(). Base on doc/mgmt-api.txt, the MGMT_OP_REMOVE_ADV_MONITOR command can be used when the controller is not powered. This patch changes the code in remove_adv_monitor() to use hci_cmd_sync_submit() instead of hci_cmd_sync_queue(). Fixes: d883a4669a1de ("Bluetooth: hci_sync: Only allow hci_cmd_sync_queue if running") Cc: Luiz Augusto von Dentz Cc: Manish Mandlik Cc: Archie Pusaka Cc: Miao-chen Chou Signed-off-by: Chun-Yi Lee Signed-off-by: Luiz Augusto von Dentz --- net/bluetooth/mgmt.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c index 657abd9bcc879..965f621ef865a 100644 --- a/net/bluetooth/mgmt.c +++ b/net/bluetooth/mgmt.c @@ -5485,8 +5485,8 @@ static int remove_adv_monitor(struct sock *sk, struct hci_dev *hdev, goto unlock; } - err = hci_cmd_sync_queue(hdev, mgmt_remove_adv_monitor_sync, cmd, - mgmt_remove_adv_monitor_complete); + err = hci_cmd_sync_submit(hdev, mgmt_remove_adv_monitor_sync, cmd, + mgmt_remove_adv_monitor_complete); if (err) { mgmt_pending_remove(cmd); From 3d05fc82237aa97162d0d7dc300b55bb34e91d02 Mon Sep 17 00:00:00 2001 From: Bartosz Golaszewski Date: Wed, 24 Apr 2024 14:29:32 +0200 Subject: [PATCH 340/606] Bluetooth: qca: set power_ctrl_enabled on NULL returned by gpiod_get_optional() Any return value from gpiod_get_optional() other than a pointer to a GPIO descriptor or a NULL-pointer is an error and the driver should abort probing. That being said: commit 56d074d26c58 ("Bluetooth: hci_qca: don't use IS_ERR_OR_NULL() with gpiod_get_optional()") no longer sets power_ctrl_enabled on NULL-pointer returned by devm_gpiod_get_optional(). Restore this behavior but bail-out on errors. While at it: also bail-out on error returned when trying to get the "swctrl" GPIO. Reported-by: Wren Turkal Reported-by: Zijun Hu Closes: https://lore.kernel.org/linux-bluetooth/1713449192-25926-2-git-send-email-quic_zijuhu@quicinc.com/ Fixes: 56d074d26c58 ("Bluetooth: hci_qca: don't use IS_ERR_OR_NULL() with gpiod_get_optional()") Reviewed-by: Krzysztof Kozlowski Signed-off-by: Bartosz Golaszewski Tested-by: Wren Turkal" Reported-by: Wren Turkal Reported-by: Zijun Hu Reviewed-by: Krzysztof Kozlowski Reviewed-by: Krzysztof Kozlowski Signed-off-by: Luiz Augusto von Dentz --- drivers/bluetooth/hci_qca.c | 18 +++++++++++++----- 1 file changed, 13 insertions(+), 5 deletions(-) diff --git a/drivers/bluetooth/hci_qca.c b/drivers/bluetooth/hci_qca.c index b621a0a40ea48..0c9c9ee56592d 100644 --- a/drivers/bluetooth/hci_qca.c +++ b/drivers/bluetooth/hci_qca.c @@ -2332,16 +2332,21 @@ static int qca_serdev_probe(struct serdev_device *serdev) (data->soc_type == QCA_WCN6750 || data->soc_type == QCA_WCN6855)) { dev_err(&serdev->dev, "failed to acquire BT_EN gpio\n"); - power_ctrl_enabled = false; + return PTR_ERR(qcadev->bt_en); } + if (!qcadev->bt_en) + power_ctrl_enabled = false; + qcadev->sw_ctrl = devm_gpiod_get_optional(&serdev->dev, "swctrl", GPIOD_IN); if (IS_ERR(qcadev->sw_ctrl) && (data->soc_type == QCA_WCN6750 || data->soc_type == QCA_WCN6855 || - data->soc_type == QCA_WCN7850)) - dev_warn(&serdev->dev, "failed to acquire SW_CTRL gpio\n"); + data->soc_type == QCA_WCN7850)) { + dev_err(&serdev->dev, "failed to acquire SW_CTRL gpio\n"); + return PTR_ERR(qcadev->sw_ctrl); + } qcadev->susclk = devm_clk_get_optional(&serdev->dev, NULL); if (IS_ERR(qcadev->susclk)) { @@ -2360,10 +2365,13 @@ static int qca_serdev_probe(struct serdev_device *serdev) qcadev->bt_en = devm_gpiod_get_optional(&serdev->dev, "enable", GPIOD_OUT_LOW); if (IS_ERR(qcadev->bt_en)) { - dev_warn(&serdev->dev, "failed to acquire enable gpio\n"); - power_ctrl_enabled = false; + dev_err(&serdev->dev, "failed to acquire enable gpio\n"); + return PTR_ERR(qcadev->bt_en); } + if (!qcadev->bt_en) + power_ctrl_enabled = false; + qcadev->susclk = devm_clk_get_optional(&serdev->dev, NULL); if (IS_ERR(qcadev->susclk)) { dev_warn(&serdev->dev, "failed to acquire clk\n"); From be81415a32ef6d8a8a85529fcfac03d05b3e757d Mon Sep 17 00:00:00 2001 From: Vicki Pfau Date: Wed, 3 Apr 2024 20:53:45 -0700 Subject: [PATCH 341/606] Input: xpad - add support for ASUS ROG RAIKIRI Add the VID/PID for ASUS ROG RAIKIRI to xpad_device and the VID to xpad_table Signed-off-by: Vicki Pfau Link: https://lore.kernel.org/r/20240404035345.159643-1-vi@endrift.com Signed-off-by: Dmitry Torokhov --- drivers/input/joystick/xpad.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/input/joystick/xpad.c b/drivers/input/joystick/xpad.c index f50848ed5575d..6fadaddb2b908 100644 --- a/drivers/input/joystick/xpad.c +++ b/drivers/input/joystick/xpad.c @@ -208,6 +208,7 @@ static const struct xpad_device { { 0x0738, 0xcb29, "Saitek Aviator Stick AV8R02", 0, XTYPE_XBOX360 }, { 0x0738, 0xf738, "Super SFIV FightStick TE S", 0, XTYPE_XBOX360 }, { 0x07ff, 0xffff, "Mad Catz GamePad", 0, XTYPE_XBOX360 }, + { 0x0b05, 0x1a38, "ASUS ROG RAIKIRI", 0, XTYPE_XBOXONE }, { 0x0c12, 0x0005, "Intec wireless", 0, XTYPE_XBOX }, { 0x0c12, 0x8801, "Nyko Xbox Controller", 0, XTYPE_XBOX }, { 0x0c12, 0x8802, "Zeroplus Xbox Controller", 0, XTYPE_XBOX }, @@ -487,6 +488,7 @@ static const struct usb_device_id xpad_table[] = { { USB_DEVICE(0x0738, 0x4540) }, /* Mad Catz Beat Pad */ XPAD_XBOXONE_VENDOR(0x0738), /* Mad Catz FightStick TE 2 */ XPAD_XBOX360_VENDOR(0x07ff), /* Mad Catz Gamepad */ + XPAD_XBOXONE_VENDOR(0x0b05), /* ASUS controllers */ XPAD_XBOX360_VENDOR(0x0c12), /* Zeroplus X-Box 360 controllers */ XPAD_XBOX360_VENDOR(0x0e6f), /* 0x0e6f Xbox 360 controllers */ XPAD_XBOXONE_VENDOR(0x0e6f), /* 0x0e6f Xbox One controllers */ From e10d3ba4d434ed172914617ed8d74bd411421193 Mon Sep 17 00:00:00 2001 From: Ismael Luceno Date: Sun, 21 Apr 2024 16:22:32 +0200 Subject: [PATCH 342/606] ipvs: Fix checksumming on GSO of SCTP packets It was observed in the wild that pairs of consecutive packets would leave the IPVS with the same wrong checksum, and the issue only went away when disabling GSO. IPVS needs to avoid computing the SCTP checksum when using GSO. Fixes: 90017accff61 ("sctp: Add GSO support") Co-developed-by: Firo Yang Signed-off-by: Ismael Luceno Tested-by: Andreas Taschner Acked-by: Julian Anastasov Signed-off-by: Pablo Neira Ayuso --- net/netfilter/ipvs/ip_vs_proto_sctp.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/net/netfilter/ipvs/ip_vs_proto_sctp.c b/net/netfilter/ipvs/ip_vs_proto_sctp.c index a0921adc31a9f..1e689c7141271 100644 --- a/net/netfilter/ipvs/ip_vs_proto_sctp.c +++ b/net/netfilter/ipvs/ip_vs_proto_sctp.c @@ -126,7 +126,8 @@ sctp_snat_handler(struct sk_buff *skb, struct ip_vs_protocol *pp, if (sctph->source != cp->vport || payload_csum || skb->ip_summed == CHECKSUM_PARTIAL) { sctph->source = cp->vport; - sctp_nat_csum(skb, sctph, sctphoff); + if (!skb_is_gso(skb) || !skb_is_gso_sctp(skb)) + sctp_nat_csum(skb, sctph, sctphoff); } else { skb->ip_summed = CHECKSUM_UNNECESSARY; } @@ -174,7 +175,8 @@ sctp_dnat_handler(struct sk_buff *skb, struct ip_vs_protocol *pp, (skb->ip_summed == CHECKSUM_PARTIAL && !(skb_dst(skb)->dev->features & NETIF_F_SCTP_CRC))) { sctph->dest = cp->dport; - sctp_nat_csum(skb, sctph, sctphoff); + if (!skb_is_gso(skb) || !skb_is_gso_sctp(skb)) + sctp_nat_csum(skb, sctph, sctphoff); } else if (skb->ip_summed != CHECKSUM_PARTIAL) { skb->ip_summed = CHECKSUM_UNNECESSARY; } From 5ea7b72d4fac2fdbc0425cd8f2ea33abe95235b2 Mon Sep 17 00:00:00 2001 From: Hyunwoo Kim Date: Mon, 22 Apr 2024 05:37:17 -0400 Subject: [PATCH 343/606] net: openvswitch: Fix Use-After-Free in ovs_ct_exit Since kfree_rcu, which is called in the hlist_for_each_entry_rcu traversal of ovs_ct_limit_exit, is not part of the RCU read critical section, it is possible that the RCU grace period will pass during the traversal and the key will be free. To prevent this, it should be changed to hlist_for_each_entry_safe. Fixes: 11efd5cb04a1 ("openvswitch: Support conntrack zone limit") Signed-off-by: Hyunwoo Kim Reviewed-by: Eric Dumazet Reviewed-by: Aaron Conole Link: https://lore.kernel.org/r/ZiYvzQN/Ry5oeFQW@v4bel-B760M-AORUS-ELITE-AX Signed-off-by: Jakub Kicinski --- net/openvswitch/conntrack.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/openvswitch/conntrack.c b/net/openvswitch/conntrack.c index 74b63cdb59923..2928c142a2ddb 100644 --- a/net/openvswitch/conntrack.c +++ b/net/openvswitch/conntrack.c @@ -1593,9 +1593,9 @@ static void ovs_ct_limit_exit(struct net *net, struct ovs_net *ovs_net) for (i = 0; i < CT_LIMIT_HASH_BUCKETS; ++i) { struct hlist_head *head = &info->limits[i]; struct ovs_ct_limit *ct_limit; + struct hlist_node *next; - hlist_for_each_entry_rcu(ct_limit, head, hlist_node, - lockdep_ovsl_is_held()) + hlist_for_each_entry_safe(ct_limit, next, head, hlist_node) kfree_rcu(ct_limit, rcu); } kfree(info->limits); From 961990efc608d559249f5637254fa0a9aa888b1c Mon Sep 17 00:00:00 2001 From: John Garry Date: Fri, 12 Apr 2024 09:44:07 +0000 Subject: [PATCH 344/606] scsi: sd: Only print updates to permanent stream count Just rescanning a partition causes a print similar to the following to appear: [ 1.484964] sd 0:0:0:0: [sda] permanent stream count = 5 This is bothersome, so only print this message for an update. Fixes: 4f53138fffc2 ("scsi: sd: Translate data lifetime information") Signed-off-by: John Garry Link: https://lore.kernel.org/r/20240412094407.496251-1-john.g.garry@oracle.com Reviewed-by: Bart Van Assche Signed-off-by: Martin K. Petersen --- drivers/scsi/sd.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c index 58fdf679341dc..65cdc8b77e358 100644 --- a/drivers/scsi/sd.c +++ b/drivers/scsi/sd.c @@ -3120,6 +3120,7 @@ static void sd_read_io_hints(struct scsi_disk *sdkp, unsigned char *buffer) { struct scsi_device *sdp = sdkp->device; const struct scsi_io_group_descriptor *desc, *start, *end; + u16 permanent_stream_count_old; struct scsi_sense_hdr sshdr; struct scsi_mode_data data; int res; @@ -3140,12 +3141,13 @@ static void sd_read_io_hints(struct scsi_disk *sdkp, unsigned char *buffer) for (desc = start; desc < end; desc++) if (!desc->st_enble || !sd_is_perm_stream(sdkp, desc - start)) break; + permanent_stream_count_old = sdkp->permanent_stream_count; sdkp->permanent_stream_count = desc - start; if (sdkp->rscs && sdkp->permanent_stream_count < 2) sd_printk(KERN_INFO, sdkp, "Unexpected: RSCS has been set and the permanent stream count is %u\n", sdkp->permanent_stream_count); - else if (sdkp->permanent_stream_count) + else if (sdkp->permanent_stream_count != permanent_stream_count_old) sd_printk(KERN_INFO, sdkp, "permanent stream count = %d\n", sdkp->permanent_stream_count); } From 627f9c1bb882765a84aa78015abbacd783d429be Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Mon, 22 Apr 2024 17:25:54 +0200 Subject: [PATCH 345/606] mlxsw: spectrum_acl_tcam: Fix race in region ID allocation Region identifiers can be allocated both when user space tries to insert a new tc filter and when filters are migrated from one region to another as part of the rehash delayed work. There is no lock protecting the bitmap from which these identifiers are allocated from, which is racy and leads to bad parameter errors from the device's firmware. Fix by converting the bitmap to IDA which handles its own locking. For consistency, do the same for the group identifiers that are part of the same structure. Fixes: 2bffc5322fd8 ("mlxsw: spectrum_acl: Don't take mutex in mlxsw_sp_acl_tcam_vregion_rehash_work()") Reported-by: Amit Cohen Signed-off-by: Ido Schimmel Tested-by: Alexander Zubkov Reviewed-by: Petr Machata Signed-off-by: Petr Machata Reviewed-by: Simon Horman Link: https://lore.kernel.org/r/ce494b7940cadfe84f3e18da7785b51ef5f776e3.1713797103.git.petrm@nvidia.com Signed-off-by: Jakub Kicinski --- .../mellanox/mlxsw/spectrum_acl_tcam.c | 61 ++++++++----------- .../mellanox/mlxsw/spectrum_acl_tcam.h | 5 +- 2 files changed, 30 insertions(+), 36 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_tcam.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_tcam.c index f20052776b3f2..b6a4652a6475a 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_tcam.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_tcam.c @@ -10,6 +10,7 @@ #include #include #include +#include #include #include @@ -58,41 +59,43 @@ int mlxsw_sp_acl_tcam_priority_get(struct mlxsw_sp *mlxsw_sp, static int mlxsw_sp_acl_tcam_region_id_get(struct mlxsw_sp_acl_tcam *tcam, u16 *p_id) { - u16 id; + int id; - id = find_first_zero_bit(tcam->used_regions, tcam->max_regions); - if (id < tcam->max_regions) { - __set_bit(id, tcam->used_regions); - *p_id = id; - return 0; - } - return -ENOBUFS; + id = ida_alloc_max(&tcam->used_regions, tcam->max_regions - 1, + GFP_KERNEL); + if (id < 0) + return id; + + *p_id = id; + + return 0; } static void mlxsw_sp_acl_tcam_region_id_put(struct mlxsw_sp_acl_tcam *tcam, u16 id) { - __clear_bit(id, tcam->used_regions); + ida_free(&tcam->used_regions, id); } static int mlxsw_sp_acl_tcam_group_id_get(struct mlxsw_sp_acl_tcam *tcam, u16 *p_id) { - u16 id; + int id; - id = find_first_zero_bit(tcam->used_groups, tcam->max_groups); - if (id < tcam->max_groups) { - __set_bit(id, tcam->used_groups); - *p_id = id; - return 0; - } - return -ENOBUFS; + id = ida_alloc_max(&tcam->used_groups, tcam->max_groups - 1, + GFP_KERNEL); + if (id < 0) + return id; + + *p_id = id; + + return 0; } static void mlxsw_sp_acl_tcam_group_id_put(struct mlxsw_sp_acl_tcam *tcam, u16 id) { - __clear_bit(id, tcam->used_groups); + ida_free(&tcam->used_groups, id); } struct mlxsw_sp_acl_tcam_pattern { @@ -1549,19 +1552,11 @@ int mlxsw_sp_acl_tcam_init(struct mlxsw_sp *mlxsw_sp, if (max_tcam_regions < max_regions) max_regions = max_tcam_regions; - tcam->used_regions = bitmap_zalloc(max_regions, GFP_KERNEL); - if (!tcam->used_regions) { - err = -ENOMEM; - goto err_alloc_used_regions; - } + ida_init(&tcam->used_regions); tcam->max_regions = max_regions; max_groups = MLXSW_CORE_RES_GET(mlxsw_sp->core, ACL_MAX_GROUPS); - tcam->used_groups = bitmap_zalloc(max_groups, GFP_KERNEL); - if (!tcam->used_groups) { - err = -ENOMEM; - goto err_alloc_used_groups; - } + ida_init(&tcam->used_groups); tcam->max_groups = max_groups; tcam->max_group_size = MLXSW_CORE_RES_GET(mlxsw_sp->core, ACL_MAX_GROUP_SIZE); @@ -1575,10 +1570,8 @@ int mlxsw_sp_acl_tcam_init(struct mlxsw_sp *mlxsw_sp, return 0; err_tcam_init: - bitmap_free(tcam->used_groups); -err_alloc_used_groups: - bitmap_free(tcam->used_regions); -err_alloc_used_regions: + ida_destroy(&tcam->used_groups); + ida_destroy(&tcam->used_regions); mlxsw_sp_acl_tcam_rehash_params_unregister(mlxsw_sp); err_rehash_params_register: mutex_destroy(&tcam->lock); @@ -1591,8 +1584,8 @@ void mlxsw_sp_acl_tcam_fini(struct mlxsw_sp *mlxsw_sp, const struct mlxsw_sp_acl_tcam_ops *ops = mlxsw_sp->acl_tcam_ops; ops->fini(mlxsw_sp, tcam->priv); - bitmap_free(tcam->used_groups); - bitmap_free(tcam->used_regions); + ida_destroy(&tcam->used_groups); + ida_destroy(&tcam->used_regions); mlxsw_sp_acl_tcam_rehash_params_unregister(mlxsw_sp); mutex_destroy(&tcam->lock); } diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_tcam.h b/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_tcam.h index 462bf448497d3..79a1d86065125 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_tcam.h +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_tcam.h @@ -6,15 +6,16 @@ #include #include +#include #include "reg.h" #include "spectrum.h" #include "core_acl_flex_keys.h" struct mlxsw_sp_acl_tcam { - unsigned long *used_regions; /* bit array */ + struct ida used_regions; unsigned int max_regions; - unsigned long *used_groups; /* bit array */ + struct ida used_groups; unsigned int max_groups; unsigned int max_group_size; struct mutex lock; /* guards vregion list */ From d90cfe20562407d9f080d24123078d666d730707 Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Mon, 22 Apr 2024 17:25:55 +0200 Subject: [PATCH 346/606] mlxsw: spectrum_acl_tcam: Fix race during rehash delayed work The purpose of the rehash delayed work is to reduce the number of masks (eRPs) used by an ACL region as the eRP bank is a global and limited resource. This is done in three steps: 1. Creating a new set of masks and a new ACL region which will use the new masks and to which the existing filters will be migrated to. The new region is assigned to 'vregion->region' and the region from which the filters are migrated from is assigned to 'vregion->region2'. 2. Migrating all the filters from the old region to the new region. 3. Destroying the old region and setting 'vregion->region2' to NULL. Only the second steps is performed under the 'vregion->lock' mutex although its comments says that among other things it "Protects consistency of region, region2 pointers". This is problematic as the first step can race with filter insertion from user space that uses 'vregion->region', but under the mutex. Fix by holding the mutex across the entirety of the delayed work and not only during the second step. Fixes: 2bffc5322fd8 ("mlxsw: spectrum_acl: Don't take mutex in mlxsw_sp_acl_tcam_vregion_rehash_work()") Signed-off-by: Ido Schimmel Tested-by: Alexander Zubkov Reviewed-by: Petr Machata Signed-off-by: Petr Machata Reviewed-by: Simon Horman Link: https://lore.kernel.org/r/1ec1d54edf2bad0a369e6b4fa030aba64e1f124b.1713797103.git.petrm@nvidia.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_tcam.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_tcam.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_tcam.c index b6a4652a6475a..9c0c728bb42dc 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_tcam.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_tcam.c @@ -718,7 +718,9 @@ static void mlxsw_sp_acl_tcam_vregion_rehash_work(struct work_struct *work) rehash.dw.work); int credits = MLXSW_SP_ACL_TCAM_VREGION_REHASH_CREDITS; + mutex_lock(&vregion->lock); mlxsw_sp_acl_tcam_vregion_rehash(vregion->mlxsw_sp, vregion, &credits); + mutex_unlock(&vregion->lock); if (credits < 0) /* Rehash gone out of credits so it was interrupted. * Schedule the work as soon as possible to continue. @@ -1323,7 +1325,6 @@ mlxsw_sp_acl_tcam_vregion_migrate(struct mlxsw_sp *mlxsw_sp, int err, err2; trace_mlxsw_sp_acl_tcam_vregion_migrate(mlxsw_sp, vregion); - mutex_lock(&vregion->lock); err = mlxsw_sp_acl_tcam_vchunk_migrate_all(mlxsw_sp, vregion, ctx, credits); if (err) { @@ -1343,7 +1344,6 @@ mlxsw_sp_acl_tcam_vregion_migrate(struct mlxsw_sp *mlxsw_sp, /* Let the rollback to be continued later on. */ } } - mutex_unlock(&vregion->lock); trace_mlxsw_sp_acl_tcam_vregion_migrate_end(mlxsw_sp, vregion); return err; } From 79b5b4b18bc85b19d3a518483f9abbbe6d7b3ba4 Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Mon, 22 Apr 2024 17:25:56 +0200 Subject: [PATCH 347/606] mlxsw: spectrum_acl_tcam: Fix possible use-after-free during activity update The rule activity update delayed work periodically traverses the list of configured rules and queries their activity from the device. As part of this task it accesses the entry pointed by 'ventry->entry', but this entry can be changed concurrently by the rehash delayed work, leading to a use-after-free [1]. Fix by closing the race and perform the activity query under the 'vregion->lock' mutex. [1] BUG: KASAN: slab-use-after-free in mlxsw_sp_acl_tcam_flower_rule_activity_get+0x121/0x140 Read of size 8 at addr ffff8881054ed808 by task kworker/0:18/181 CPU: 0 PID: 181 Comm: kworker/0:18 Not tainted 6.9.0-rc2-custom-00781-gd5ab772d32f7 #2 Hardware name: Mellanox Technologies Ltd. MSN3700/VMOD0005, BIOS 5.11 01/06/2019 Workqueue: mlxsw_core mlxsw_sp_acl_rule_activity_update_work Call Trace: dump_stack_lvl+0xc6/0x120 print_report+0xce/0x670 kasan_report+0xd7/0x110 mlxsw_sp_acl_tcam_flower_rule_activity_get+0x121/0x140 mlxsw_sp_acl_rule_activity_update_work+0x219/0x400 process_one_work+0x8eb/0x19b0 worker_thread+0x6c9/0xf70 kthread+0x2c9/0x3b0 ret_from_fork+0x4d/0x80 ret_from_fork_asm+0x1a/0x30 Allocated by task 1039: kasan_save_stack+0x33/0x60 kasan_save_track+0x14/0x30 __kasan_kmalloc+0x8f/0xa0 __kmalloc+0x19c/0x360 mlxsw_sp_acl_tcam_entry_create+0x7b/0x1f0 mlxsw_sp_acl_tcam_vchunk_migrate_all+0x30d/0xb50 mlxsw_sp_acl_tcam_vregion_rehash_work+0x157/0x1300 process_one_work+0x8eb/0x19b0 worker_thread+0x6c9/0xf70 kthread+0x2c9/0x3b0 ret_from_fork+0x4d/0x80 ret_from_fork_asm+0x1a/0x30 Freed by task 1039: kasan_save_stack+0x33/0x60 kasan_save_track+0x14/0x30 kasan_save_free_info+0x3b/0x60 poison_slab_object+0x102/0x170 __kasan_slab_free+0x14/0x30 kfree+0xc1/0x290 mlxsw_sp_acl_tcam_vchunk_migrate_all+0x3d7/0xb50 mlxsw_sp_acl_tcam_vregion_rehash_work+0x157/0x1300 process_one_work+0x8eb/0x19b0 worker_thread+0x6c9/0xf70 kthread+0x2c9/0x3b0 ret_from_fork+0x4d/0x80 ret_from_fork_asm+0x1a/0x30 Fixes: 2bffc5322fd8 ("mlxsw: spectrum_acl: Don't take mutex in mlxsw_sp_acl_tcam_vregion_rehash_work()") Signed-off-by: Ido Schimmel Tested-by: Alexander Zubkov Reviewed-by: Petr Machata Signed-off-by: Petr Machata Reviewed-by: Simon Horman Link: https://lore.kernel.org/r/1fcce0a60b231ebeb2515d91022284ba7b4ffe7a.1713797103.git.petrm@nvidia.com Signed-off-by: Jakub Kicinski --- .../net/ethernet/mellanox/mlxsw/spectrum_acl_tcam.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_tcam.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_tcam.c index 9c0c728bb42dc..7e69225c057de 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_tcam.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_tcam.c @@ -1159,8 +1159,14 @@ mlxsw_sp_acl_tcam_ventry_activity_get(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_acl_tcam_ventry *ventry, bool *activity) { - return mlxsw_sp_acl_tcam_entry_activity_get(mlxsw_sp, - ventry->entry, activity); + struct mlxsw_sp_acl_tcam_vregion *vregion = ventry->vchunk->vregion; + int err; + + mutex_lock(&vregion->lock); + err = mlxsw_sp_acl_tcam_entry_activity_get(mlxsw_sp, ventry->entry, + activity); + mutex_unlock(&vregion->lock); + return err; } static int From 54225988889931467a9b55fdbef534079b665519 Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Mon, 22 Apr 2024 17:25:57 +0200 Subject: [PATCH 348/606] mlxsw: spectrum_acl_tcam: Fix possible use-after-free during rehash The rehash delayed work migrates filters from one region to another according to the number of available credits. The migrated from region is destroyed at the end of the work if the number of credits is non-negative as the assumption is that this is indicative of migration being complete. This assumption is incorrect as a non-negative number of credits can also be the result of a failed migration. The destruction of a region that still has filters referencing it can result in a use-after-free [1]. Fix by not destroying the region if migration failed. [1] BUG: KASAN: slab-use-after-free in mlxsw_sp_acl_ctcam_region_entry_remove+0x21d/0x230 Read of size 8 at addr ffff8881735319e8 by task kworker/0:31/3858 CPU: 0 PID: 3858 Comm: kworker/0:31 Tainted: G W 6.9.0-rc2-custom-00782-gf2275c2157d8 #5 Hardware name: Mellanox Technologies Ltd. MSN3700/VMOD0005, BIOS 5.11 01/06/2019 Workqueue: mlxsw_core mlxsw_sp_acl_tcam_vregion_rehash_work Call Trace: dump_stack_lvl+0xc6/0x120 print_report+0xce/0x670 kasan_report+0xd7/0x110 mlxsw_sp_acl_ctcam_region_entry_remove+0x21d/0x230 mlxsw_sp_acl_ctcam_entry_del+0x2e/0x70 mlxsw_sp_acl_atcam_entry_del+0x81/0x210 mlxsw_sp_acl_tcam_vchunk_migrate_all+0x3cd/0xb50 mlxsw_sp_acl_tcam_vregion_rehash_work+0x157/0x1300 process_one_work+0x8eb/0x19b0 worker_thread+0x6c9/0xf70 kthread+0x2c9/0x3b0 ret_from_fork+0x4d/0x80 ret_from_fork_asm+0x1a/0x30 Allocated by task 174: kasan_save_stack+0x33/0x60 kasan_save_track+0x14/0x30 __kasan_kmalloc+0x8f/0xa0 __kmalloc+0x19c/0x360 mlxsw_sp_acl_tcam_region_create+0xdf/0x9c0 mlxsw_sp_acl_tcam_vregion_rehash_work+0x954/0x1300 process_one_work+0x8eb/0x19b0 worker_thread+0x6c9/0xf70 kthread+0x2c9/0x3b0 ret_from_fork+0x4d/0x80 ret_from_fork_asm+0x1a/0x30 Freed by task 7: kasan_save_stack+0x33/0x60 kasan_save_track+0x14/0x30 kasan_save_free_info+0x3b/0x60 poison_slab_object+0x102/0x170 __kasan_slab_free+0x14/0x30 kfree+0xc1/0x290 mlxsw_sp_acl_tcam_region_destroy+0x272/0x310 mlxsw_sp_acl_tcam_vregion_rehash_work+0x731/0x1300 process_one_work+0x8eb/0x19b0 worker_thread+0x6c9/0xf70 kthread+0x2c9/0x3b0 ret_from_fork+0x4d/0x80 ret_from_fork_asm+0x1a/0x30 Fixes: c9c9af91f1d9 ("mlxsw: spectrum_acl: Allow to interrupt/continue rehash work") Signed-off-by: Ido Schimmel Tested-by: Alexander Zubkov Reviewed-by: Petr Machata Signed-off-by: Petr Machata Reviewed-by: Simon Horman Link: https://lore.kernel.org/r/3e412b5659ec2310c5c615760dfe5eac18dd7ebd.1713797103.git.petrm@nvidia.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_tcam.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_tcam.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_tcam.c index 7e69225c057de..1ff0b2c7c11de 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_tcam.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_tcam.c @@ -1451,6 +1451,7 @@ mlxsw_sp_acl_tcam_vregion_rehash(struct mlxsw_sp *mlxsw_sp, ctx, credits); if (err) { dev_err(mlxsw_sp->bus_info->dev, "Failed to migrate vregion\n"); + return; } if (*credits >= 0) From 5bcf925587e9b5d36420d572a0b4d131c90fb306 Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Mon, 22 Apr 2024 17:25:58 +0200 Subject: [PATCH 349/606] mlxsw: spectrum_acl_tcam: Rate limit error message In the rare cases when the device resources are exhausted it is likely that the rehash delayed work will fail. An error message will be printed whenever this happens which can be overwhelming considering the fact that the work is per-region and that there can be hundreds of regions. Fix by rate limiting the error message. Fixes: e5e7962ee5c2 ("mlxsw: spectrum_acl: Implement region migration according to hints") Signed-off-by: Ido Schimmel Tested-by: Alexander Zubkov Reviewed-by: Petr Machata Signed-off-by: Petr Machata Reviewed-by: Simon Horman Link: https://lore.kernel.org/r/c510763b2ebd25e7990d80183feff91cde593145.1713797103.git.petrm@nvidia.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_tcam.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_tcam.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_tcam.c index 1ff0b2c7c11de..568ae7092fe0e 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_tcam.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_tcam.c @@ -1450,7 +1450,7 @@ mlxsw_sp_acl_tcam_vregion_rehash(struct mlxsw_sp *mlxsw_sp, err = mlxsw_sp_acl_tcam_vregion_migrate(mlxsw_sp, vregion, ctx, credits); if (err) { - dev_err(mlxsw_sp->bus_info->dev, "Failed to migrate vregion\n"); + dev_err_ratelimited(mlxsw_sp->bus_info->dev, "Failed to migrate vregion\n"); return; } From 8ca3f7a7b61393804c46f170743c3b839df13977 Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Mon, 22 Apr 2024 17:25:59 +0200 Subject: [PATCH 350/606] mlxsw: spectrum_acl_tcam: Fix memory leak during rehash The rehash delayed work migrates filters from one region to another. This is done by iterating over all chunks (all the filters with the same priority) in the region and in each chunk iterating over all the filters. If the migration fails, the code tries to migrate the filters back to the old region. However, the rollback itself can also fail in which case another migration will be erroneously performed. Besides the fact that this ping pong is not a very good idea, it also creates a problem. Each virtual chunk references two chunks: The currently used one ('vchunk->chunk') and a backup ('vchunk->chunk2'). During migration the first holds the chunk we want to migrate filters to and the second holds the chunk we are migrating filters from. The code currently assumes - but does not verify - that the backup chunk does not exist (NULL) if the currently used chunk does not reference the target region. This assumption breaks when we are trying to rollback a rollback, resulting in the backup chunk being overwritten and leaked [1]. Fix by not rolling back a failed rollback and add a warning to avoid future cases. [1] WARNING: CPU: 5 PID: 1063 at lib/parman.c:291 parman_destroy+0x17/0x20 Modules linked in: CPU: 5 PID: 1063 Comm: kworker/5:11 Tainted: G W 6.9.0-rc2-custom-00784-gc6a05c468a0b #14 Hardware name: Mellanox Technologies Ltd. MSN3700/VMOD0005, BIOS 5.11 01/06/2019 Workqueue: mlxsw_core mlxsw_sp_acl_tcam_vregion_rehash_work RIP: 0010:parman_destroy+0x17/0x20 [...] Call Trace: mlxsw_sp_acl_atcam_region_fini+0x19/0x60 mlxsw_sp_acl_tcam_region_destroy+0x49/0xf0 mlxsw_sp_acl_tcam_vregion_rehash_work+0x1f1/0x470 process_one_work+0x151/0x370 worker_thread+0x2cb/0x3e0 kthread+0xd0/0x100 ret_from_fork+0x34/0x50 ret_from_fork_asm+0x1a/0x30 Fixes: 843500518509 ("mlxsw: spectrum_acl: Do rollback as another call to mlxsw_sp_acl_tcam_vchunk_migrate_all()") Signed-off-by: Ido Schimmel Tested-by: Alexander Zubkov Reviewed-by: Petr Machata Signed-off-by: Petr Machata Reviewed-by: Simon Horman Link: https://lore.kernel.org/r/d5edd4f4503934186ae5cfe268503b16345b4e0f.1713797103.git.petrm@nvidia.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_tcam.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_tcam.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_tcam.c index 568ae7092fe0e..0902eb7651e14 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_tcam.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_tcam.c @@ -1200,6 +1200,8 @@ mlxsw_sp_acl_tcam_vchunk_migrate_start(struct mlxsw_sp *mlxsw_sp, { struct mlxsw_sp_acl_tcam_chunk *new_chunk; + WARN_ON(vchunk->chunk2); + new_chunk = mlxsw_sp_acl_tcam_chunk_create(mlxsw_sp, vchunk, region); if (IS_ERR(new_chunk)) return PTR_ERR(new_chunk); @@ -1334,6 +1336,8 @@ mlxsw_sp_acl_tcam_vregion_migrate(struct mlxsw_sp *mlxsw_sp, err = mlxsw_sp_acl_tcam_vchunk_migrate_all(mlxsw_sp, vregion, ctx, credits); if (err) { + if (ctx->this_is_rollback) + return err; /* In case migration was not successful, we need to swap * so the original region pointer is assigned again * to vregion->region. From 743edc8547a92b6192aa1f1b6bb78233fa21dc9b Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Mon, 22 Apr 2024 17:26:00 +0200 Subject: [PATCH 351/606] mlxsw: spectrum_acl_tcam: Fix warning during rehash As previously explained, the rehash delayed work migrates filters from one region to another. This is done by iterating over all chunks (all the filters with the same priority) in the region and in each chunk iterating over all the filters. When the work runs out of credits it stores the current chunk and entry as markers in the per-work context so that it would know where to resume the migration from the next time the work is scheduled. Upon error, the chunk marker is reset to NULL, but without resetting the entry markers despite being relative to it. This can result in migration being resumed from an entry that does not belong to the chunk being migrated. In turn, this will eventually lead to a chunk being iterated over as if it is an entry. Because of how the two structures happen to be defined, this does not lead to KASAN splats, but to warnings such as [1]. Fix by creating a helper that resets all the markers and call it from all the places the currently only reset the chunk marker. For good measures also call it when starting a completely new rehash. Add a warning to avoid future cases. [1] WARNING: CPU: 7 PID: 1076 at drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_keys.c:407 mlxsw_afk_encode+0x242/0x2f0 Modules linked in: CPU: 7 PID: 1076 Comm: kworker/7:24 Tainted: G W 6.9.0-rc3-custom-00880-g29e61d91b77b #29 Hardware name: Mellanox Technologies Ltd. MSN3700/VMOD0005, BIOS 5.11 01/06/2019 Workqueue: mlxsw_core mlxsw_sp_acl_tcam_vregion_rehash_work RIP: 0010:mlxsw_afk_encode+0x242/0x2f0 [...] Call Trace: mlxsw_sp_acl_atcam_entry_add+0xd9/0x3c0 mlxsw_sp_acl_tcam_entry_create+0x5e/0xa0 mlxsw_sp_acl_tcam_vchunk_migrate_all+0x109/0x290 mlxsw_sp_acl_tcam_vregion_rehash_work+0x6c/0x470 process_one_work+0x151/0x370 worker_thread+0x2cb/0x3e0 kthread+0xd0/0x100 ret_from_fork+0x34/0x50 Fixes: 6f9579d4e302 ("mlxsw: spectrum_acl: Remember where to continue rehash migration") Signed-off-by: Ido Schimmel Tested-by: Alexander Zubkov Reviewed-by: Petr Machata Signed-off-by: Petr Machata Reviewed-by: Simon Horman Link: https://lore.kernel.org/r/cc17eed86b41dd829d39b07906fec074a9ce580e.1713797103.git.petrm@nvidia.com Signed-off-by: Jakub Kicinski --- .../mellanox/mlxsw/spectrum_acl_tcam.c | 20 ++++++++++++++++--- 1 file changed, 17 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_tcam.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_tcam.c index 0902eb7651e14..e8c6078866213 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_tcam.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_tcam.c @@ -730,6 +730,17 @@ static void mlxsw_sp_acl_tcam_vregion_rehash_work(struct work_struct *work) mlxsw_sp_acl_tcam_vregion_rehash_work_schedule(vregion); } +static void +mlxsw_sp_acl_tcam_rehash_ctx_vchunk_reset(struct mlxsw_sp_acl_tcam_rehash_ctx *ctx) +{ + /* The entry markers are relative to the current chunk and therefore + * needs to be reset together with the chunk marker. + */ + ctx->current_vchunk = NULL; + ctx->start_ventry = NULL; + ctx->stop_ventry = NULL; +} + static void mlxsw_sp_acl_tcam_rehash_ctx_vchunk_changed(struct mlxsw_sp_acl_tcam_vchunk *vchunk) { @@ -752,7 +763,7 @@ mlxsw_sp_acl_tcam_rehash_ctx_vregion_changed(struct mlxsw_sp_acl_tcam_vregion *v * the current chunk pointer to make sure all chunks * are properly migrated. */ - vregion->rehash.ctx.current_vchunk = NULL; + mlxsw_sp_acl_tcam_rehash_ctx_vchunk_reset(&vregion->rehash.ctx); } static struct mlxsw_sp_acl_tcam_vregion * @@ -1220,7 +1231,7 @@ mlxsw_sp_acl_tcam_vchunk_migrate_end(struct mlxsw_sp *mlxsw_sp, { mlxsw_sp_acl_tcam_chunk_destroy(mlxsw_sp, vchunk->chunk2); vchunk->chunk2 = NULL; - ctx->current_vchunk = NULL; + mlxsw_sp_acl_tcam_rehash_ctx_vchunk_reset(ctx); } static int @@ -1252,6 +1263,8 @@ mlxsw_sp_acl_tcam_vchunk_migrate_one(struct mlxsw_sp *mlxsw_sp, ventry = list_first_entry(&vchunk->ventry_list, typeof(*ventry), list); + WARN_ON(ventry->vchunk != vchunk); + list_for_each_entry_from(ventry, &vchunk->ventry_list, list) { /* During rollback, once we reach the ventry that failed * to migrate, we are done. @@ -1343,7 +1356,7 @@ mlxsw_sp_acl_tcam_vregion_migrate(struct mlxsw_sp *mlxsw_sp, * to vregion->region. */ swap(vregion->region, vregion->region2); - ctx->current_vchunk = NULL; + mlxsw_sp_acl_tcam_rehash_ctx_vchunk_reset(ctx); ctx->this_is_rollback = true; err2 = mlxsw_sp_acl_tcam_vchunk_migrate_all(mlxsw_sp, vregion, ctx, credits); @@ -1402,6 +1415,7 @@ mlxsw_sp_acl_tcam_vregion_rehash_start(struct mlxsw_sp *mlxsw_sp, ctx->hints_priv = hints_priv; ctx->this_is_rollback = false; + mlxsw_sp_acl_tcam_rehash_ctx_vchunk_reset(ctx); return 0; From b377add0f0117409c418ddd6504bd682ebe0bf79 Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Mon, 22 Apr 2024 17:26:01 +0200 Subject: [PATCH 352/606] mlxsw: spectrum_acl_tcam: Fix incorrect list API usage Both the function that migrates all the chunks within a region and the function that migrates all the entries within a chunk call list_first_entry() on the respective lists without checking that the lists are not empty. This is incorrect usage of the API, which leads to the following warning [1]. Fix by returning if the lists are empty as there is nothing to migrate in this case. [1] WARNING: CPU: 0 PID: 6437 at drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_tcam.c:1266 mlxsw_sp_acl_tcam_vchunk_migrate_all+0x1f1/0> Modules linked in: CPU: 0 PID: 6437 Comm: kworker/0:37 Not tainted 6.9.0-rc3-custom-00883-g94a65f079ef6 #39 Hardware name: Mellanox Technologies Ltd. MSN3700/VMOD0005, BIOS 5.11 01/06/2019 Workqueue: mlxsw_core mlxsw_sp_acl_tcam_vregion_rehash_work RIP: 0010:mlxsw_sp_acl_tcam_vchunk_migrate_all+0x1f1/0x2c0 [...] Call Trace: mlxsw_sp_acl_tcam_vregion_rehash_work+0x6c/0x4a0 process_one_work+0x151/0x370 worker_thread+0x2cb/0x3e0 kthread+0xd0/0x100 ret_from_fork+0x34/0x50 ret_from_fork_asm+0x1a/0x30 Fixes: 6f9579d4e302 ("mlxsw: spectrum_acl: Remember where to continue rehash migration") Signed-off-by: Ido Schimmel Tested-by: Alexander Zubkov Reviewed-by: Petr Machata Signed-off-by: Petr Machata Reviewed-by: Simon Horman Link: https://lore.kernel.org/r/4628e9a22d1d84818e28310abbbc498e7bc31bc9.1713797103.git.petrm@nvidia.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_tcam.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_tcam.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_tcam.c index e8c6078866213..89a5ebc3463ff 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_tcam.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_tcam.c @@ -1254,6 +1254,9 @@ mlxsw_sp_acl_tcam_vchunk_migrate_one(struct mlxsw_sp *mlxsw_sp, return 0; } + if (list_empty(&vchunk->ventry_list)) + goto out; + /* If the migration got interrupted, we have the ventry to start from * stored in context. */ @@ -1305,6 +1308,7 @@ mlxsw_sp_acl_tcam_vchunk_migrate_one(struct mlxsw_sp *mlxsw_sp, } } +out: mlxsw_sp_acl_tcam_vchunk_migrate_end(mlxsw_sp, vchunk, ctx); return 0; } @@ -1318,6 +1322,9 @@ mlxsw_sp_acl_tcam_vchunk_migrate_all(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_acl_tcam_vchunk *vchunk; int err; + if (list_empty(&vregion->vchunk_list)) + return 0; + /* If the migration got interrupted, we have the vchunk * we are working on stored in context. */ From fb4e2b70a7194b209fc7320bbf33b375f7114bd5 Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Mon, 22 Apr 2024 17:26:02 +0200 Subject: [PATCH 353/606] mlxsw: spectrum_acl_tcam: Fix memory leak when canceling rehash work The rehash delayed work is rescheduled with a delay if the number of credits at end of the work is not negative as supposedly it means that the migration ended. Otherwise, it is rescheduled immediately. After "mlxsw: spectrum_acl_tcam: Fix possible use-after-free during rehash" the above is no longer accurate as a non-negative number of credits is no longer indicative of the migration being done. It can also happen if the work encountered an error in which case the migration will resume the next time the work is scheduled. The significance of the above is that it is possible for the work to be pending and associated with hints that were allocated when the migration started. This leads to the hints being leaked [1] when the work is canceled while pending as part of ACL region dismantle. Fix by freeing the hints if hints are associated with a work that was canceled while pending. Blame the original commit since the reliance on not having a pending work associated with hints is fragile. [1] unreferenced object 0xffff88810e7c3000 (size 256): comm "kworker/0:16", pid 176, jiffies 4295460353 hex dump (first 32 bytes): 00 30 95 11 81 88 ff ff 61 00 00 00 00 00 00 80 .0......a....... 00 00 61 00 40 00 00 00 00 00 00 00 04 00 00 00 ..a.@........... backtrace (crc 2544ddb9): [<00000000cf8cfab3>] kmalloc_trace+0x23f/0x2a0 [<000000004d9a1ad9>] objagg_hints_get+0x42/0x390 [<000000000b143cf3>] mlxsw_sp_acl_erp_rehash_hints_get+0xca/0x400 [<0000000059bdb60a>] mlxsw_sp_acl_tcam_vregion_rehash_work+0x868/0x1160 [<00000000e81fd734>] process_one_work+0x59c/0xf20 [<00000000ceee9e81>] worker_thread+0x799/0x12c0 [<00000000bda6fe39>] kthread+0x246/0x300 [<0000000070056d23>] ret_from_fork+0x34/0x70 [<00000000dea2b93e>] ret_from_fork_asm+0x1a/0x30 Fixes: c9c9af91f1d9 ("mlxsw: spectrum_acl: Allow to interrupt/continue rehash work") Signed-off-by: Ido Schimmel Tested-by: Alexander Zubkov Signed-off-by: Petr Machata Reviewed-by: Simon Horman Link: https://lore.kernel.org/r/0cc12ebb07c4d4c41a1265ee2c28b392ff997a86.1713797103.git.petrm@nvidia.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_tcam.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_tcam.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_tcam.c index 89a5ebc3463ff..92a406f02eae7 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_tcam.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_tcam.c @@ -836,10 +836,14 @@ mlxsw_sp_acl_tcam_vregion_destroy(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_acl_tcam *tcam = vregion->tcam; if (vgroup->vregion_rehash_enabled && ops->region_rehash_hints_get) { + struct mlxsw_sp_acl_tcam_rehash_ctx *ctx = &vregion->rehash.ctx; + mutex_lock(&tcam->lock); list_del(&vregion->tlist); mutex_unlock(&tcam->lock); - cancel_delayed_work_sync(&vregion->rehash.dw); + if (cancel_delayed_work_sync(&vregion->rehash.dw) && + ctx->hints_priv) + ops->region_rehash_hints_put(ctx->hints_priv); } mlxsw_sp_acl_tcam_vgroup_vregion_detach(mlxsw_sp, vregion); if (vregion->region2) From 8092162335554c8ef5e7f50eff68aa9cfbdbf865 Mon Sep 17 00:00:00 2001 From: Edward Liaw Date: Thu, 11 Apr 2024 23:19:49 +0000 Subject: [PATCH 354/606] selftests/harness: remove use of LINE_MAX Android was seeing a compliation error because its C library does not define LINE_MAX. This replaces the use of LINE_MAX / snprintf with asprintf, which will change the behavior to not truncate the test name if it is over 2048 chars long. See also: https://github.com/llvm/llvm-project/issues/88119 [akpm@linux-foundation.org: remove limits.h include, per Edward] [akpm@linux-foundation.org: check asprintf() return] [usama.anjum@collabora.com: fix undeclared function error] Link: https://lkml.kernel.org/r/20240417075530.3807625-1-usama.anjum@collabora.com Link: https://lkml.kernel.org/r/20240411231954.62156-1-edliaw@google.com Fixes: 38c957f07038 ("selftests: kselftest_harness: generate test name once") Signed-off-by: Edward Liaw Signed-off-by: Muhammad Usama Anjum Cc: Andy Lutomirski Cc: Axel Rasmussen Cc: Bill Wendling Cc: David Hildenbrand Cc: Edward Liaw Cc: Justin Stitt Cc: Kees Cook Cc: "Mike Rapoport (IBM)" Cc: Nathan Chancellor Cc: Nick Desaulniers Cc: Peter Xu Cc: Shuah Khan Cc: Will Drewry Signed-off-by: Andrew Morton --- tools/testing/selftests/kselftest_harness.h | 12 ++++++++---- tools/testing/selftests/mm/mdwe_test.c | 1 + 2 files changed, 9 insertions(+), 4 deletions(-) diff --git a/tools/testing/selftests/kselftest_harness.h b/tools/testing/selftests/kselftest_harness.h index 4fd735e48ee7e..f0ae1f6466db4 100644 --- a/tools/testing/selftests/kselftest_harness.h +++ b/tools/testing/selftests/kselftest_harness.h @@ -56,7 +56,6 @@ #include #include #include -#include #include #include #include @@ -1156,7 +1155,7 @@ void __run_test(struct __fixture_metadata *f, struct __test_metadata *t) { struct __test_xfail *xfail; - char test_name[LINE_MAX]; + char *test_name; const char *diagnostic; /* reset test struct */ @@ -1164,8 +1163,12 @@ void __run_test(struct __fixture_metadata *f, t->trigger = 0; memset(t->results->reason, 0, sizeof(t->results->reason)); - snprintf(test_name, sizeof(test_name), "%s%s%s.%s", - f->name, variant->name[0] ? "." : "", variant->name, t->name); + if (asprintf(&test_name, "%s%s%s.%s", f->name, + variant->name[0] ? "." : "", variant->name, t->name) == -1) { + ksft_print_msg("ERROR ALLOCATING MEMORY\n"); + t->exit_code = KSFT_FAIL; + _exit(t->exit_code); + } ksft_print_msg(" RUN %s ...\n", test_name); @@ -1203,6 +1206,7 @@ void __run_test(struct __fixture_metadata *f, ksft_test_result_code(t->exit_code, test_name, diagnostic ? "%s" : "", diagnostic); + free(test_name); } static int test_harness_run(int argc, char **argv) diff --git a/tools/testing/selftests/mm/mdwe_test.c b/tools/testing/selftests/mm/mdwe_test.c index 200bedcdc32e9..1e01d3ddc11c5 100644 --- a/tools/testing/selftests/mm/mdwe_test.c +++ b/tools/testing/selftests/mm/mdwe_test.c @@ -7,6 +7,7 @@ #include #include +#define _GNU_SOURCE #include #include #include From 6db7412c142006985a15765785cf6c0c0dd75374 Mon Sep 17 00:00:00 2001 From: Muhammad Usama Anjum Date: Tue, 16 Apr 2024 21:26:58 +0500 Subject: [PATCH 355/606] selftests: mm: fix unused and uninitialized variable warning Fix the warnings by initializing and marking the variable as unused. I've caught the warnings by using clang. split_huge_page_test.c:303:6: warning: variable 'dummy' set but not used [-Wunused-but-set-variable] 303 | int dummy; | ^ split_huge_page_test.c:343:3: warning: variable 'dummy' is uninitialized when used here [-Wuninitialized] 343 | dummy += *(*addr + i); | ^~~~~ split_huge_page_test.c:303:11: note: initialize the variable 'dummy' to silence this warning 303 | int dummy; | ^ | = 0 2 warnings generated. Link: https://lkml.kernel.org/r/20240416162658.3353622-1-usama.anjum@collabora.com Fixes: fc4d182316bd ("mm: huge_memory: enable debugfs to split huge pages to any order") Signed-off-by: Muhammad Usama Anjum Reviewed-by: Zi Yan Cc: Bill Wendling Cc: Justin Stitt Cc: Muhammad Usama Anjum Cc: Nathan Chancellor Cc: Nick Desaulniers Cc: Shuah Khan Signed-off-by: Andrew Morton --- tools/testing/selftests/mm/split_huge_page_test.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/testing/selftests/mm/split_huge_page_test.c b/tools/testing/selftests/mm/split_huge_page_test.c index 6c988bd2f3356..d3c7f5fb3e7b7 100644 --- a/tools/testing/selftests/mm/split_huge_page_test.c +++ b/tools/testing/selftests/mm/split_huge_page_test.c @@ -300,7 +300,7 @@ int create_pagecache_thp_and_fd(const char *testfile, size_t fd_size, int *fd, char **addr) { size_t i; - int dummy; + int __attribute__((unused)) dummy = 0; srand(time(NULL)); From b76b46902c2d0395488c8412e1116c2486cdfcb2 Mon Sep 17 00:00:00 2001 From: Peter Xu Date: Wed, 17 Apr 2024 17:18:35 -0400 Subject: [PATCH 356/606] mm/hugetlb: fix missing hugetlb_lock for resv uncharge There is a recent report on UFFDIO_COPY over hugetlb: https://lore.kernel.org/all/000000000000ee06de0616177560@google.com/ 350: lockdep_assert_held(&hugetlb_lock); Should be an issue in hugetlb but triggered in an userfault context, where it goes into the unlikely path where two threads modifying the resv map together. Mike has a fix in that path for resv uncharge but it looks like the locking criteria was overlooked: hugetlb_cgroup_uncharge_folio_rsvd() will update the cgroup pointer, so it requires to be called with the lock held. Link: https://lkml.kernel.org/r/20240417211836.2742593-3-peterx@redhat.com Fixes: 79aa925bf239 ("hugetlb_cgroup: fix reservation accounting") Signed-off-by: Peter Xu Reported-by: syzbot+4b8077a5fccc61c385a1@syzkaller.appspotmail.com Reviewed-by: Mina Almasry Cc: David Hildenbrand Cc: Signed-off-by: Andrew Morton --- mm/hugetlb.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/mm/hugetlb.c b/mm/hugetlb.c index 31d00eee028f1..53e0ab5c0845c 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -3268,9 +3268,12 @@ struct folio *alloc_hugetlb_folio(struct vm_area_struct *vma, rsv_adjust = hugepage_subpool_put_pages(spool, 1); hugetlb_acct_memory(h, -rsv_adjust); - if (deferred_reserve) + if (deferred_reserve) { + spin_lock_irq(&hugetlb_lock); hugetlb_cgroup_uncharge_folio_rsvd(hstate_index(h), pages_per_huge_page(h), folio); + spin_unlock_irq(&hugetlb_lock); + } } if (!memcg_charge_ret) From 12bbaae7635a56049779db3bef6e7140d9aa5f67 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Thu, 21 Mar 2024 14:24:40 +0000 Subject: [PATCH 357/606] mm: create FOLIO_FLAG_FALSE and FOLIO_TYPE_OPS macros Following the separation of FOLIO_FLAGS from PAGEFLAGS, separate FOLIO_FLAG_FALSE from PAGEFLAG_FALSE and FOLIO_TYPE_OPS from PAGE_TYPE_OPS. Link: https://lkml.kernel.org/r/20240321142448.1645400-3-willy@infradead.org Fixes: 9c5ccf2db04b ("mm: remove HUGETLB_PAGE_DTOR") Signed-off-by: Matthew Wilcox (Oracle) Reviewed-by: David Hildenbrand Acked-by: Vlastimil Babka Cc: Miaohe Lin Cc: Muchun Song Cc: Oscar Salvador Cc: Signed-off-by: Andrew Morton --- include/linux/page-flags.h | 70 +++++++++++++++++++++++++------------- 1 file changed, 47 insertions(+), 23 deletions(-) diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h index 652d77805e99d..dc1607f1415eb 100644 --- a/include/linux/page-flags.h +++ b/include/linux/page-flags.h @@ -458,30 +458,51 @@ static __always_inline int TestClearPage##uname(struct page *page) \ TESTSETFLAG(uname, lname, policy) \ TESTCLEARFLAG(uname, lname, policy) +#define FOLIO_TEST_FLAG_FALSE(name) \ +static inline bool folio_test_##name(const struct folio *folio) \ +{ return false; } +#define FOLIO_SET_FLAG_NOOP(name) \ +static inline void folio_set_##name(struct folio *folio) { } +#define FOLIO_CLEAR_FLAG_NOOP(name) \ +static inline void folio_clear_##name(struct folio *folio) { } +#define __FOLIO_SET_FLAG_NOOP(name) \ +static inline void __folio_set_##name(struct folio *folio) { } +#define __FOLIO_CLEAR_FLAG_NOOP(name) \ +static inline void __folio_clear_##name(struct folio *folio) { } +#define FOLIO_TEST_SET_FLAG_FALSE(name) \ +static inline bool folio_test_set_##name(struct folio *folio) \ +{ return false; } +#define FOLIO_TEST_CLEAR_FLAG_FALSE(name) \ +static inline bool folio_test_clear_##name(struct folio *folio) \ +{ return false; } + +#define FOLIO_FLAG_FALSE(name) \ +FOLIO_TEST_FLAG_FALSE(name) \ +FOLIO_SET_FLAG_NOOP(name) \ +FOLIO_CLEAR_FLAG_NOOP(name) + #define TESTPAGEFLAG_FALSE(uname, lname) \ -static inline bool folio_test_##lname(const struct folio *folio) { return false; } \ +FOLIO_TEST_FLAG_FALSE(lname) \ static inline int Page##uname(const struct page *page) { return 0; } #define SETPAGEFLAG_NOOP(uname, lname) \ -static inline void folio_set_##lname(struct folio *folio) { } \ +FOLIO_SET_FLAG_NOOP(lname) \ static inline void SetPage##uname(struct page *page) { } #define CLEARPAGEFLAG_NOOP(uname, lname) \ -static inline void folio_clear_##lname(struct folio *folio) { } \ +FOLIO_CLEAR_FLAG_NOOP(lname) \ static inline void ClearPage##uname(struct page *page) { } #define __CLEARPAGEFLAG_NOOP(uname, lname) \ -static inline void __folio_clear_##lname(struct folio *folio) { } \ +__FOLIO_CLEAR_FLAG_NOOP(lname) \ static inline void __ClearPage##uname(struct page *page) { } #define TESTSETFLAG_FALSE(uname, lname) \ -static inline bool folio_test_set_##lname(struct folio *folio) \ -{ return 0; } \ +FOLIO_TEST_SET_FLAG_FALSE(lname) \ static inline int TestSetPage##uname(struct page *page) { return 0; } #define TESTCLEARFLAG_FALSE(uname, lname) \ -static inline bool folio_test_clear_##lname(struct folio *folio) \ -{ return 0; } \ +FOLIO_TEST_CLEAR_FLAG_FALSE(lname) \ static inline int TestClearPage##uname(struct page *page) { return 0; } #define PAGEFLAG_FALSE(uname, lname) TESTPAGEFLAG_FALSE(uname, lname) \ @@ -977,35 +998,38 @@ static inline int page_has_type(const struct page *page) return page_type_has_type(page->page_type); } +#define FOLIO_TYPE_OPS(lname, fname) \ +static __always_inline bool folio_test_##fname(const struct folio *folio)\ +{ \ + return folio_test_type(folio, PG_##lname); \ +} \ +static __always_inline void __folio_set_##fname(struct folio *folio) \ +{ \ + VM_BUG_ON_FOLIO(!folio_test_type(folio, 0), folio); \ + folio->page.page_type &= ~PG_##lname; \ +} \ +static __always_inline void __folio_clear_##fname(struct folio *folio) \ +{ \ + VM_BUG_ON_FOLIO(!folio_test_##fname(folio), folio); \ + folio->page.page_type |= PG_##lname; \ +} + #define PAGE_TYPE_OPS(uname, lname, fname) \ +FOLIO_TYPE_OPS(lname, fname) \ static __always_inline int Page##uname(const struct page *page) \ { \ return PageType(page, PG_##lname); \ } \ -static __always_inline int folio_test_##fname(const struct folio *folio)\ -{ \ - return folio_test_type(folio, PG_##lname); \ -} \ static __always_inline void __SetPage##uname(struct page *page) \ { \ VM_BUG_ON_PAGE(!PageType(page, 0), page); \ page->page_type &= ~PG_##lname; \ } \ -static __always_inline void __folio_set_##fname(struct folio *folio) \ -{ \ - VM_BUG_ON_FOLIO(!folio_test_type(folio, 0), folio); \ - folio->page.page_type &= ~PG_##lname; \ -} \ static __always_inline void __ClearPage##uname(struct page *page) \ { \ VM_BUG_ON_PAGE(!Page##uname(page), page); \ page->page_type |= PG_##lname; \ -} \ -static __always_inline void __folio_clear_##fname(struct folio *folio) \ -{ \ - VM_BUG_ON_FOLIO(!folio_test_##fname(folio), folio); \ - folio->page.page_type |= PG_##lname; \ -} \ +} /* * PageBuddy() indicates that the page is free and in the buddy system From fd1a745ce03e37945674c14833870a9af0882e2d Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Thu, 21 Mar 2024 14:24:42 +0000 Subject: [PATCH 358/606] mm: support page_mapcount() on page_has_type() pages Return 0 for pages which can't be mapped. This matches how page_mapped() works. It is more convenient for users to not have to filter out these pages. Link: https://lkml.kernel.org/r/20240321142448.1645400-5-willy@infradead.org Fixes: 9c5ccf2db04b ("mm: remove HUGETLB_PAGE_DTOR") Signed-off-by: Matthew Wilcox (Oracle) Reviewed-by: David Hildenbrand Acked-by: Vlastimil Babka Cc: Miaohe Lin Cc: Muchun Song Cc: Oscar Salvador Cc: Signed-off-by: Andrew Morton --- fs/proc/page.c | 7 ++----- include/linux/mm.h | 8 +++++--- include/linux/page-flags.h | 4 ++-- 3 files changed, 9 insertions(+), 10 deletions(-) diff --git a/fs/proc/page.c b/fs/proc/page.c index 195b077c0facb..9223856c934b4 100644 --- a/fs/proc/page.c +++ b/fs/proc/page.c @@ -67,7 +67,7 @@ static ssize_t kpagecount_read(struct file *file, char __user *buf, */ ppage = pfn_to_online_page(pfn); - if (!ppage || PageSlab(ppage) || page_has_type(ppage)) + if (!ppage) pcount = 0; else pcount = page_mapcount(ppage); @@ -124,11 +124,8 @@ u64 stable_page_flags(struct page *page) /* * pseudo flags for the well known (anonymous) memory mapped pages - * - * Note that page->_mapcount is overloaded in SLAB, so the - * simple test in page_mapped() is not enough. */ - if (!PageSlab(page) && page_mapped(page)) + if (page_mapped(page)) u |= 1 << KPF_MMAP; if (PageAnon(page)) u |= 1 << KPF_ANON; diff --git a/include/linux/mm.h b/include/linux/mm.h index 7b0ee64225de9..b6bdaa18b9e9d 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -1223,14 +1223,16 @@ static inline void page_mapcount_reset(struct page *page) * a large folio, it includes the number of times this page is mapped * as part of that folio. * - * The result is undefined for pages which cannot be mapped into userspace. - * For example SLAB or special types of pages. See function page_has_type(). - * They use this field in struct page differently. + * Will report 0 for pages which cannot be mapped into userspace, eg + * slab, page tables and similar. */ static inline int page_mapcount(struct page *page) { int mapcount = atomic_read(&page->_mapcount) + 1; + /* Handle page_has_type() pages */ + if (mapcount < 0) + mapcount = 0; if (unlikely(PageCompound(page))) mapcount += folio_entire_mapcount(page_folio(page)); diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h index dc1607f1415eb..35a0087d09108 100644 --- a/include/linux/page-flags.h +++ b/include/linux/page-flags.h @@ -971,12 +971,12 @@ static inline bool is_page_hwpoison(struct page *page) * page_type may be used. Because it is initialised to -1, we invert the * sense of the bit, so __SetPageFoo *clears* the bit used for PageFoo, and * __ClearPageFoo *sets* the bit used for PageFoo. We reserve a few high and - * low bits so that an underflow or overflow of page_mapcount() won't be + * low bits so that an underflow or overflow of _mapcount won't be * mistaken for a page type value. */ #define PAGE_TYPE_BASE 0xf0000000 -/* Reserve 0x0000007f to catch underflows of page_mapcount */ +/* Reserve 0x0000007f to catch underflows of _mapcount */ #define PAGE_MAPCOUNT_RESERVE -128 #define PG_buddy 0x00000080 #define PG_offline 0x00000100 From d99e3140a4d33e26066183ff727d8f02f56bec64 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Thu, 21 Mar 2024 14:24:43 +0000 Subject: [PATCH 359/606] mm: turn folio_test_hugetlb into a PageType The current folio_test_hugetlb() can be fooled by a concurrent folio split into returning true for a folio which has never belonged to hugetlbfs. This can't happen if the caller holds a refcount on it, but we have a few places (memory-failure, compaction, procfs) which do not and should not take a speculative reference. Since hugetlb pages do not use individual page mapcounts (they are always fully mapped and use the entire_mapcount field to record the number of mappings), the PageType field is available now that page_mapcount() ignores the value in this field. In compaction and with CONFIG_DEBUG_VM enabled, the current implementation can result in an oops, as reported by Luis. This happens since 9c5ccf2db04b ("mm: remove HUGETLB_PAGE_DTOR") effectively added some VM_BUG_ON() checks in the PageHuge() testing path. [willy@infradead.org: update vmcoreinfo] Link: https://lkml.kernel.org/r/ZgGZUvsdhaT1Va-T@casper.infradead.org Link: https://lkml.kernel.org/r/20240321142448.1645400-6-willy@infradead.org Fixes: 9c5ccf2db04b ("mm: remove HUGETLB_PAGE_DTOR") Signed-off-by: Matthew Wilcox (Oracle) Reviewed-by: David Hildenbrand Acked-by: Vlastimil Babka Reported-by: Luis Chamberlain Closes: https://bugzilla.kernel.org/show_bug.cgi?id=218227 Cc: Miaohe Lin Cc: Muchun Song Cc: Oscar Salvador Cc: Signed-off-by: Andrew Morton --- include/linux/page-flags.h | 70 ++++++++++++++++------------------ include/trace/events/mmflags.h | 1 + kernel/vmcore_info.c | 5 +-- mm/hugetlb.c | 22 ++--------- 4 files changed, 39 insertions(+), 59 deletions(-) diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h index 35a0087d09108..4bf1c25fd1dc5 100644 --- a/include/linux/page-flags.h +++ b/include/linux/page-flags.h @@ -190,7 +190,6 @@ enum pageflags { /* At least one page in this folio has the hwpoison flag set */ PG_has_hwpoisoned = PG_error, - PG_hugetlb = PG_active, PG_large_rmappable = PG_workingset, /* anon or file-backed */ }; @@ -876,29 +875,6 @@ TESTPAGEFLAG_FALSE(LargeRmappable, large_rmappable) #define PG_head_mask ((1UL << PG_head)) -#ifdef CONFIG_HUGETLB_PAGE -int PageHuge(const struct page *page); -SETPAGEFLAG(HugeTLB, hugetlb, PF_SECOND) -CLEARPAGEFLAG(HugeTLB, hugetlb, PF_SECOND) - -/** - * folio_test_hugetlb - Determine if the folio belongs to hugetlbfs - * @folio: The folio to test. - * - * Context: Any context. Caller should have a reference on the folio to - * prevent it from being turned into a tail page. - * Return: True for hugetlbfs folios, false for anon folios or folios - * belonging to other filesystems. - */ -static inline bool folio_test_hugetlb(const struct folio *folio) -{ - return folio_test_large(folio) && - test_bit(PG_hugetlb, const_folio_flags(folio, 1)); -} -#else -TESTPAGEFLAG_FALSE(Huge, hugetlb) -#endif - #ifdef CONFIG_TRANSPARENT_HUGEPAGE /* * PageHuge() only returns true for hugetlbfs pages, but not for @@ -954,18 +930,6 @@ PAGEFLAG_FALSE(HasHWPoisoned, has_hwpoisoned) TESTSCFLAG_FALSE(HasHWPoisoned, has_hwpoisoned) #endif -/* - * Check if a page is currently marked HWPoisoned. Note that this check is - * best effort only and inherently racy: there is no way to synchronize with - * failing hardware. - */ -static inline bool is_page_hwpoison(struct page *page) -{ - if (PageHWPoison(page)) - return true; - return PageHuge(page) && PageHWPoison(compound_head(page)); -} - /* * For pages that are never mapped to userspace (and aren't PageSlab), * page_type may be used. Because it is initialised to -1, we invert the @@ -982,6 +946,7 @@ static inline bool is_page_hwpoison(struct page *page) #define PG_offline 0x00000100 #define PG_table 0x00000200 #define PG_guard 0x00000400 +#define PG_hugetlb 0x00000800 #define PageType(page, flag) \ ((page->page_type & (PAGE_TYPE_BASE | flag)) == PAGE_TYPE_BASE) @@ -1076,6 +1041,37 @@ PAGE_TYPE_OPS(Table, table, pgtable) */ PAGE_TYPE_OPS(Guard, guard, guard) +#ifdef CONFIG_HUGETLB_PAGE +FOLIO_TYPE_OPS(hugetlb, hugetlb) +#else +FOLIO_TEST_FLAG_FALSE(hugetlb) +#endif + +/** + * PageHuge - Determine if the page belongs to hugetlbfs + * @page: The page to test. + * + * Context: Any context. + * Return: True for hugetlbfs pages, false for anon pages or pages + * belonging to other filesystems. + */ +static inline bool PageHuge(const struct page *page) +{ + return folio_test_hugetlb(page_folio(page)); +} + +/* + * Check if a page is currently marked HWPoisoned. Note that this check is + * best effort only and inherently racy: there is no way to synchronize with + * failing hardware. + */ +static inline bool is_page_hwpoison(struct page *page) +{ + if (PageHWPoison(page)) + return true; + return PageHuge(page) && PageHWPoison(compound_head(page)); +} + extern bool is_free_buddy_page(struct page *page); PAGEFLAG(Isolated, isolated, PF_ANY); @@ -1142,7 +1138,7 @@ static __always_inline void __ClearPageAnonExclusive(struct page *page) */ #define PAGE_FLAGS_SECOND \ (0xffUL /* order */ | 1UL << PG_has_hwpoisoned | \ - 1UL << PG_hugetlb | 1UL << PG_large_rmappable) + 1UL << PG_large_rmappable) #define PAGE_FLAGS_PRIVATE \ (1UL << PG_private | 1UL << PG_private_2) diff --git a/include/trace/events/mmflags.h b/include/trace/events/mmflags.h index d801409b33cfe..d55e53ac91bd2 100644 --- a/include/trace/events/mmflags.h +++ b/include/trace/events/mmflags.h @@ -135,6 +135,7 @@ IF_HAVE_PG_ARCH_X(arch_3) #define DEF_PAGETYPE_NAME(_name) { PG_##_name, __stringify(_name) } #define __def_pagetype_names \ + DEF_PAGETYPE_NAME(hugetlb), \ DEF_PAGETYPE_NAME(offline), \ DEF_PAGETYPE_NAME(guard), \ DEF_PAGETYPE_NAME(table), \ diff --git a/kernel/vmcore_info.c b/kernel/vmcore_info.c index f95516cd45bbe..23c125c2e2436 100644 --- a/kernel/vmcore_info.c +++ b/kernel/vmcore_info.c @@ -205,11 +205,10 @@ static int __init crash_save_vmcoreinfo_init(void) VMCOREINFO_NUMBER(PG_head_mask); #define PAGE_BUDDY_MAPCOUNT_VALUE (~PG_buddy) VMCOREINFO_NUMBER(PAGE_BUDDY_MAPCOUNT_VALUE); -#ifdef CONFIG_HUGETLB_PAGE - VMCOREINFO_NUMBER(PG_hugetlb); +#define PAGE_HUGETLB_MAPCOUNT_VALUE (~PG_hugetlb) + VMCOREINFO_NUMBER(PAGE_HUGETLB_MAPCOUNT_VALUE); #define PAGE_OFFLINE_MAPCOUNT_VALUE (~PG_offline) VMCOREINFO_NUMBER(PAGE_OFFLINE_MAPCOUNT_VALUE); -#endif #ifdef CONFIG_KALLSYMS VMCOREINFO_SYMBOL(kallsyms_names); diff --git a/mm/hugetlb.c b/mm/hugetlb.c index 53e0ab5c0845c..4553241f0fb28 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -1624,7 +1624,7 @@ static inline void __clear_hugetlb_destructor(struct hstate *h, { lockdep_assert_held(&hugetlb_lock); - folio_clear_hugetlb(folio); + __folio_clear_hugetlb(folio); } /* @@ -1711,7 +1711,7 @@ static void add_hugetlb_folio(struct hstate *h, struct folio *folio, h->surplus_huge_pages_node[nid]++; } - folio_set_hugetlb(folio); + __folio_set_hugetlb(folio); folio_change_private(folio, NULL); /* * We have to set hugetlb_vmemmap_optimized again as above @@ -2049,7 +2049,7 @@ static void __prep_account_new_huge_page(struct hstate *h, int nid) static void init_new_hugetlb_folio(struct hstate *h, struct folio *folio) { - folio_set_hugetlb(folio); + __folio_set_hugetlb(folio); INIT_LIST_HEAD(&folio->lru); hugetlb_set_folio_subpool(folio, NULL); set_hugetlb_cgroup(folio, NULL); @@ -2159,22 +2159,6 @@ static bool prep_compound_gigantic_folio_for_demote(struct folio *folio, return __prep_compound_gigantic_folio(folio, order, true); } -/* - * PageHuge() only returns true for hugetlbfs pages, but not for normal or - * transparent huge pages. See the PageTransHuge() documentation for more - * details. - */ -int PageHuge(const struct page *page) -{ - const struct folio *folio; - - if (!PageCompound(page)) - return 0; - folio = page_folio(page); - return folio_test_hugetlb(folio); -} -EXPORT_SYMBOL_GPL(PageHuge); - /* * Find and lock address space (mapping) in write mode. * From 682886ec69d22363819a83ddddd5d66cb5c791e1 Mon Sep 17 00:00:00 2001 From: Johannes Weiner Date: Thu, 18 Apr 2024 08:26:28 -0400 Subject: [PATCH 360/606] mm: zswap: fix shrinker NULL crash with cgroup_disable=memory Christian reports a NULL deref in zswap that he bisected down to the zswap shrinker. The issue also cropped up in the bug trackers of libguestfs [1] and the Red Hat bugzilla [2]. The problem is that when memcg is disabled with the boot time flag, the zswap shrinker might get called with sc->memcg == NULL. This is okay in many places, like the lruvec operations. But it crashes in memcg_page_state() - which is only used due to the non-node accounting of cgroup's the zswap memory to begin with. Nhat spotted that the memcg can be NULL in the memcg-disabled case, and I was then able to reproduce the crash locally as well. [1] https://github.com/libguestfs/libguestfs/issues/139 [2] https://bugzilla.redhat.com/show_bug.cgi?id=2275252 Link: https://lkml.kernel.org/r/20240418124043.GC1055428@cmpxchg.org Link: https://lkml.kernel.org/r/20240417143324.GA1055428@cmpxchg.org Fixes: b5ba474f3f51 ("zswap: shrink zswap pool based on memory pressure") Signed-off-by: Johannes Weiner Reported-by: Christian Heusel Debugged-by: Nhat Pham Suggested-by: Nhat Pham Tested-by: Christian Heusel Acked-by: Yosry Ahmed Cc: Chengming Zhou Cc: Dan Streetman Cc: Richard W.M. Jones Cc: Seth Jennings Cc: Vitaly Wool Cc: [v6.8] Signed-off-by: Andrew Morton --- mm/zswap.c | 25 ++++++++++++++++--------- 1 file changed, 16 insertions(+), 9 deletions(-) diff --git a/mm/zswap.c b/mm/zswap.c index caed028945b04..6f8850c44b616 100644 --- a/mm/zswap.c +++ b/mm/zswap.c @@ -1331,15 +1331,22 @@ static unsigned long zswap_shrinker_count(struct shrinker *shrinker, if (!gfp_has_io_fs(sc->gfp_mask)) return 0; -#ifdef CONFIG_MEMCG_KMEM - mem_cgroup_flush_stats(memcg); - nr_backing = memcg_page_state(memcg, MEMCG_ZSWAP_B) >> PAGE_SHIFT; - nr_stored = memcg_page_state(memcg, MEMCG_ZSWAPPED); -#else - /* use pool stats instead of memcg stats */ - nr_backing = zswap_pool_total_size >> PAGE_SHIFT; - nr_stored = atomic_read(&zswap_nr_stored); -#endif + /* + * For memcg, use the cgroup-wide ZSWAP stats since we don't + * have them per-node and thus per-lruvec. Careful if memcg is + * runtime-disabled: we can get sc->memcg == NULL, which is ok + * for the lruvec, but not for memcg_page_state(). + * + * Without memcg, use the zswap pool-wide metrics. + */ + if (!mem_cgroup_disabled()) { + mem_cgroup_flush_stats(memcg); + nr_backing = memcg_page_state(memcg, MEMCG_ZSWAP_B) >> PAGE_SHIFT; + nr_stored = memcg_page_state(memcg, MEMCG_ZSWAPPED); + } else { + nr_backing = zswap_pool_total_size >> PAGE_SHIFT; + nr_stored = atomic_read(&zswap_nr_stored); + } if (!nr_stored) return 0; From 37641efaa3faa4b8292aba4bbd7d71c0b703a239 Mon Sep 17 00:00:00 2001 From: "Vishal Moola (Oracle)" Date: Mon, 15 Apr 2024 14:17:47 -0700 Subject: [PATCH 361/606] hugetlb: check for anon_vma prior to folio allocation Commit 9acad7ba3e25 ("hugetlb: use vmf_anon_prepare() instead of anon_vma_prepare()") may bailout after allocating a folio if we do not hold the mmap lock. When this occurs, vmf_anon_prepare() will release the vma lock. Hugetlb then attempts to call restore_reserve_on_error(), which depends on the vma lock being held. We can move vmf_anon_prepare() prior to the folio allocation in order to avoid calling restore_reserve_on_error() without the vma lock. Link: https://lkml.kernel.org/r/ZiFqSrSRLhIV91og@fedora Fixes: 9acad7ba3e25 ("hugetlb: use vmf_anon_prepare() instead of anon_vma_prepare()") Reported-by: syzbot+ad1b592fc4483655438b@syzkaller.appspotmail.com Signed-off-by: Vishal Moola (Oracle) Cc: Muchun Song Cc: Signed-off-by: Andrew Morton --- mm/hugetlb.c | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/mm/hugetlb.c b/mm/hugetlb.c index 4553241f0fb28..05371bf54f96d 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -6261,6 +6261,12 @@ static vm_fault_t hugetlb_no_page(struct mm_struct *mm, VM_UFFD_MISSING); } + if (!(vma->vm_flags & VM_MAYSHARE)) { + ret = vmf_anon_prepare(vmf); + if (unlikely(ret)) + goto out; + } + folio = alloc_hugetlb_folio(vma, haddr, 0); if (IS_ERR(folio)) { /* @@ -6297,15 +6303,12 @@ static vm_fault_t hugetlb_no_page(struct mm_struct *mm, */ restore_reserve_on_error(h, vma, haddr, folio); folio_put(folio); + ret = VM_FAULT_SIGBUS; goto out; } new_pagecache_folio = true; } else { folio_lock(folio); - - ret = vmf_anon_prepare(vmf); - if (unlikely(ret)) - goto backout_unlocked; anon_rmap = 1; } } else { From 6fe60465e1d53ea321ee909be26d97529e8f746c Mon Sep 17 00:00:00 2001 From: Andrey Ryabinin Date: Thu, 18 Apr 2024 16:11:33 +0200 Subject: [PATCH 362/606] stackdepot: respect __GFP_NOLOCKDEP allocation flag If stack_depot_save_flags() allocates memory it always drops __GFP_NOLOCKDEP flag. So when KASAN tries to track __GFP_NOLOCKDEP allocation we may end up with lockdep splat like bellow: ====================================================== WARNING: possible circular locking dependency detected 6.9.0-rc3+ #49 Not tainted ------------------------------------------------------ kswapd0/149 is trying to acquire lock: ffff88811346a920 (&xfs_nondir_ilock_class){++++}-{4:4}, at: xfs_reclaim_inode+0x3ac/0x590 [xfs] but task is already holding lock: ffffffff8bb33100 (fs_reclaim){+.+.}-{0:0}, at: balance_pgdat+0x5d9/0xad0 which lock already depends on the new lock. the existing dependency chain (in reverse order) is: -> #1 (fs_reclaim){+.+.}-{0:0}: __lock_acquire+0x7da/0x1030 lock_acquire+0x15d/0x400 fs_reclaim_acquire+0xb5/0x100 prepare_alloc_pages.constprop.0+0xc5/0x230 __alloc_pages+0x12a/0x3f0 alloc_pages_mpol+0x175/0x340 stack_depot_save_flags+0x4c5/0x510 kasan_save_stack+0x30/0x40 kasan_save_track+0x10/0x30 __kasan_slab_alloc+0x83/0x90 kmem_cache_alloc+0x15e/0x4a0 __alloc_object+0x35/0x370 __create_object+0x22/0x90 __kmalloc_node_track_caller+0x477/0x5b0 krealloc+0x5f/0x110 xfs_iext_insert_raw+0x4b2/0x6e0 [xfs] xfs_iext_insert+0x2e/0x130 [xfs] xfs_iread_bmbt_block+0x1a9/0x4d0 [xfs] xfs_btree_visit_block+0xfb/0x290 [xfs] xfs_btree_visit_blocks+0x215/0x2c0 [xfs] xfs_iread_extents+0x1a2/0x2e0 [xfs] xfs_buffered_write_iomap_begin+0x376/0x10a0 [xfs] iomap_iter+0x1d1/0x2d0 iomap_file_buffered_write+0x120/0x1a0 xfs_file_buffered_write+0x128/0x4b0 [xfs] vfs_write+0x675/0x890 ksys_write+0xc3/0x160 do_syscall_64+0x94/0x170 entry_SYSCALL_64_after_hwframe+0x71/0x79 Always preserve __GFP_NOLOCKDEP to fix this. Link: https://lkml.kernel.org/r/20240418141133.22950-1-ryabinin.a.a@gmail.com Fixes: cd11016e5f52 ("mm, kasan: stackdepot implementation. Enable stackdepot for SLAB") Signed-off-by: Andrey Ryabinin Reported-by: Xiubo Li Closes: https://lore.kernel.org/all/a0caa289-ca02-48eb-9bf2-d86fd47b71f4@redhat.com/ Reported-by: Damien Le Moal Closes: https://lore.kernel.org/all/f9ff999a-e170-b66b-7caf-293f2b147ac2@opensource.wdc.com/ Suggested-by: Dave Chinner Tested-by: Xiubo Li Cc: Christoph Hellwig Cc: Alexander Potapenko Cc: Signed-off-by: Andrew Morton --- lib/stackdepot.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/lib/stackdepot.c b/lib/stackdepot.c index 68c97387aa54e..cd8f234552851 100644 --- a/lib/stackdepot.c +++ b/lib/stackdepot.c @@ -627,10 +627,10 @@ depot_stack_handle_t stack_depot_save_flags(unsigned long *entries, /* * Zero out zone modifiers, as we don't have specific zone * requirements. Keep the flags related to allocation in atomic - * contexts and I/O. + * contexts, I/O, nolockdep. */ alloc_flags &= ~GFP_ZONEMASK; - alloc_flags &= (GFP_ATOMIC | GFP_KERNEL); + alloc_flags &= (GFP_ATOMIC | GFP_KERNEL | __GFP_NOLOCKDEP); alloc_flags |= __GFP_NOWARN; page = alloc_pages(alloc_flags, DEPOT_POOL_ORDER); if (page) From 8d6bf83f6740ba52a59e25dad360e1e87ef47666 Mon Sep 17 00:00:00 2001 From: Duanqiang Wen Date: Mon, 22 Apr 2024 16:41:08 +0800 Subject: [PATCH 363/606] Revert "net: txgbe: fix i2c dev name cannot match clkdev" This reverts commit c644920ce9220d83e070f575a4df711741c07f07. when register i2c dev, txgbe shorten "i2c_designware" to "i2c_dw", will cause this i2c dev can't match platfom driver i2c_designware_platform. Signed-off-by: Duanqiang Wen Reviewed-by: Jacob Keller Link: https://lore.kernel.org/r/20240422084109.3201-1-duanqiangwen@net-swift.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/wangxun/txgbe/txgbe_phy.c | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/drivers/net/ethernet/wangxun/txgbe/txgbe_phy.c b/drivers/net/ethernet/wangxun/txgbe/txgbe_phy.c index 2fa511227eac8..5b5d5e4310d12 100644 --- a/drivers/net/ethernet/wangxun/txgbe/txgbe_phy.c +++ b/drivers/net/ethernet/wangxun/txgbe/txgbe_phy.c @@ -20,8 +20,6 @@ #include "txgbe_phy.h" #include "txgbe_hw.h" -#define TXGBE_I2C_CLK_DEV_NAME "i2c_dw" - static int txgbe_swnodes_register(struct txgbe *txgbe) { struct txgbe_nodes *nodes = &txgbe->nodes; @@ -573,8 +571,8 @@ static int txgbe_clock_register(struct txgbe *txgbe) char clk_name[32]; struct clk *clk; - snprintf(clk_name, sizeof(clk_name), "%s.%d", - TXGBE_I2C_CLK_DEV_NAME, pci_dev_id(pdev)); + snprintf(clk_name, sizeof(clk_name), "i2c_dw.%d", + pci_dev_id(pdev)); clk = clk_register_fixed_rate(NULL, clk_name, NULL, 0, 156250000); if (IS_ERR(clk)) @@ -636,7 +634,7 @@ static int txgbe_i2c_register(struct txgbe *txgbe) info.parent = &pdev->dev; info.fwnode = software_node_fwnode(txgbe->nodes.group[SWNODE_I2C]); - info.name = TXGBE_I2C_CLK_DEV_NAME; + info.name = "i2c_designware"; info.id = pci_dev_id(pdev); info.res = &DEFINE_RES_IRQ(pdev->irq); From edd2d250fb3bb5d70419ae82c1f9dbb9684dffd3 Mon Sep 17 00:00:00 2001 From: Duanqiang Wen Date: Mon, 22 Apr 2024 16:41:09 +0800 Subject: [PATCH 364/606] Revert "net: txgbe: fix clk_name exceed MAX_DEV_ID limits" This reverts commit e30cef001da259e8df354b813015d0e5acc08740. commit 99f4570cfba1 ("clkdev: Update clkdev id usage to allow for longer names") can fix clk_name exceed MAX_DEV_ID limits, so this commit is meaningless. Signed-off-by: Duanqiang Wen Reviewed-by: Jacob Keller Link: https://lore.kernel.org/r/20240422084109.3201-2-duanqiangwen@net-swift.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/wangxun/txgbe/txgbe_phy.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/wangxun/txgbe/txgbe_phy.c b/drivers/net/ethernet/wangxun/txgbe/txgbe_phy.c index 5b5d5e4310d12..93295916b1d2b 100644 --- a/drivers/net/ethernet/wangxun/txgbe/txgbe_phy.c +++ b/drivers/net/ethernet/wangxun/txgbe/txgbe_phy.c @@ -571,7 +571,7 @@ static int txgbe_clock_register(struct txgbe *txgbe) char clk_name[32]; struct clk *clk; - snprintf(clk_name, sizeof(clk_name), "i2c_dw.%d", + snprintf(clk_name, sizeof(clk_name), "i2c_designware.%d", pci_dev_id(pdev)); clk = clk_register_fixed_rate(NULL, clk_name, NULL, 0, 156250000); From c04d1b9ecce565455652ac3c6b17043cd475cf47 Mon Sep 17 00:00:00 2001 From: Lukas Wunner Date: Mon, 22 Apr 2024 13:45:02 -0700 Subject: [PATCH 365/606] igc: Fix LED-related deadlock on driver unbind MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Roman reports a deadlock on unplug of a Thunderbolt docking station containing an Intel I225 Ethernet adapter. The root cause is that led_classdev's for LEDs on the adapter are registered such that they're device-managed by the netdev. That results in recursive acquisition of the rtnl_lock() mutex on unplug: When the driver calls unregister_netdev(), it acquires rtnl_lock(), then frees the device-managed resources. Upon unregistering the LEDs, netdev_trig_deactivate() invokes unregister_netdevice_notifier(), which tries to acquire rtnl_lock() again. Avoid by using non-device-managed LED registration. Stack trace for posterity: schedule+0x6e/0xf0 schedule_preempt_disabled+0x15/0x20 __mutex_lock+0x2a0/0x750 unregister_netdevice_notifier+0x40/0x150 netdev_trig_deactivate+0x1f/0x60 [ledtrig_netdev] led_trigger_set+0x102/0x330 led_classdev_unregister+0x4b/0x110 release_nodes+0x3d/0xb0 devres_release_all+0x8b/0xc0 device_del+0x34f/0x3c0 unregister_netdevice_many_notify+0x80b/0xaf0 unregister_netdev+0x7c/0xd0 igc_remove+0xd8/0x1e0 [igc] pci_device_remove+0x3f/0xb0 Fixes: ea578703b03d ("igc: Add support for LEDs on i225/i226") Reported-by: Roman Lozko Closes: https://lore.kernel.org/r/CAEhC_B=ksywxCG_+aQqXUrGEgKq+4mqnSV8EBHOKbC3-Obj9+Q@mail.gmail.com/ Reported-by: "Marek Marczykowski-Górecki" Closes: https://lore.kernel.org/r/ZhRD3cOtz5i-61PB@mail-itl/ Signed-off-by: Kurt Kanzenbach Signed-off-by: Lukas Wunner Cc: Heiner Kallweit Reviewed-by: Simon Horman Reviewed-by: Kurt Kanzenbach Tested-by: Kurt Kanzenbach # Intel i225 Tested-by: Naama Meir Signed-off-by: Tony Nguyen Link: https://lore.kernel.org/r/20240422204503.225448-1-anthony.l.nguyen@intel.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/intel/igc/igc.h | 2 ++ drivers/net/ethernet/intel/igc/igc_leds.c | 38 ++++++++++++++++++----- drivers/net/ethernet/intel/igc/igc_main.c | 3 ++ 3 files changed, 35 insertions(+), 8 deletions(-) diff --git a/drivers/net/ethernet/intel/igc/igc.h b/drivers/net/ethernet/intel/igc/igc.h index 90316dc586308..6bc56c7c181e4 100644 --- a/drivers/net/ethernet/intel/igc/igc.h +++ b/drivers/net/ethernet/intel/igc/igc.h @@ -298,6 +298,7 @@ struct igc_adapter { /* LEDs */ struct mutex led_mutex; + struct igc_led_classdev *leds; }; void igc_up(struct igc_adapter *adapter); @@ -723,6 +724,7 @@ void igc_ptp_read(struct igc_adapter *adapter, struct timespec64 *ts); void igc_ptp_tx_tstamp_event(struct igc_adapter *adapter); int igc_led_setup(struct igc_adapter *adapter); +void igc_led_free(struct igc_adapter *adapter); #define igc_rx_pg_size(_ring) (PAGE_SIZE << igc_rx_pg_order(_ring)) diff --git a/drivers/net/ethernet/intel/igc/igc_leds.c b/drivers/net/ethernet/intel/igc/igc_leds.c index bf240c5daf865..3929b25b6ae6e 100644 --- a/drivers/net/ethernet/intel/igc/igc_leds.c +++ b/drivers/net/ethernet/intel/igc/igc_leds.c @@ -236,8 +236,8 @@ static void igc_led_get_name(struct igc_adapter *adapter, int index, char *buf, pci_dev_id(adapter->pdev), index); } -static void igc_setup_ldev(struct igc_led_classdev *ldev, - struct net_device *netdev, int index) +static int igc_setup_ldev(struct igc_led_classdev *ldev, + struct net_device *netdev, int index) { struct igc_adapter *adapter = netdev_priv(netdev); struct led_classdev *led_cdev = &ldev->led; @@ -257,24 +257,46 @@ static void igc_setup_ldev(struct igc_led_classdev *ldev, led_cdev->hw_control_get = igc_led_hw_control_get; led_cdev->hw_control_get_device = igc_led_hw_control_get_device; - devm_led_classdev_register(&netdev->dev, led_cdev); + return led_classdev_register(&netdev->dev, led_cdev); } int igc_led_setup(struct igc_adapter *adapter) { struct net_device *netdev = adapter->netdev; - struct device *dev = &netdev->dev; struct igc_led_classdev *leds; - int i; + int i, err; mutex_init(&adapter->led_mutex); - leds = devm_kcalloc(dev, IGC_NUM_LEDS, sizeof(*leds), GFP_KERNEL); + leds = kcalloc(IGC_NUM_LEDS, sizeof(*leds), GFP_KERNEL); if (!leds) return -ENOMEM; - for (i = 0; i < IGC_NUM_LEDS; i++) - igc_setup_ldev(leds + i, netdev, i); + for (i = 0; i < IGC_NUM_LEDS; i++) { + err = igc_setup_ldev(leds + i, netdev, i); + if (err) + goto err; + } + + adapter->leds = leds; return 0; + +err: + for (i--; i >= 0; i--) + led_classdev_unregister(&((leds + i)->led)); + + kfree(leds); + return err; +} + +void igc_led_free(struct igc_adapter *adapter) +{ + struct igc_led_classdev *leds = adapter->leds; + int i; + + for (i = 0; i < IGC_NUM_LEDS; i++) + led_classdev_unregister(&((leds + i)->led)); + + kfree(leds); } diff --git a/drivers/net/ethernet/intel/igc/igc_main.c b/drivers/net/ethernet/intel/igc/igc_main.c index 35ad40a803cb6..4d975d620a8e4 100644 --- a/drivers/net/ethernet/intel/igc/igc_main.c +++ b/drivers/net/ethernet/intel/igc/igc_main.c @@ -7021,6 +7021,9 @@ static void igc_remove(struct pci_dev *pdev) cancel_work_sync(&adapter->watchdog_task); hrtimer_cancel(&adapter->hrtimer); + if (IS_ENABLED(CONFIG_IGC_LEDS)) + igc_led_free(adapter); + /* Release control of h/w to f/w. If f/w is AMT enabled, this * would have already happened in close and is redundant. */ From 730117730709992c9f6535dd7b47638ee561ec45 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Tue, 23 Apr 2024 17:21:48 -0700 Subject: [PATCH 366/606] eth: bnxt: fix counting packets discarded due to OOM and netpoll I added OOM and netpoll discard counters, naively assuming that the cpr pointer is pointing to a common completion ring. Turns out that is usually *a* completion ring but not *the* completion ring which bnapi->cp_ring points to. bnapi->cp_ring is where the stats are read from, so we end up reporting 0 thru ethtool -S and qstat even though the drop events have happened. Make 100% sure we're recording statistics in the correct structure. Fixes: 907fd4a294db ("bnxt: count discards due to memory allocation errors") Reviewed-by: Michael Chan Link: https://lore.kernel.org/r/20240424002148.3937059-1-kuba@kernel.org Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/broadcom/bnxt/bnxt.c | 44 ++++++++++------------- 1 file changed, 18 insertions(+), 26 deletions(-) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c index ed04a90a4fdde..2c2ee79c4d779 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c @@ -1778,7 +1778,7 @@ static inline struct sk_buff *bnxt_tpa_end(struct bnxt *bp, skb = bnxt_copy_skb(bnapi, data_ptr, len, mapping); if (!skb) { bnxt_abort_tpa(cpr, idx, agg_bufs); - cpr->sw_stats.rx.rx_oom_discards += 1; + cpr->bnapi->cp_ring.sw_stats.rx.rx_oom_discards += 1; return NULL; } } else { @@ -1788,7 +1788,7 @@ static inline struct sk_buff *bnxt_tpa_end(struct bnxt *bp, new_data = __bnxt_alloc_rx_frag(bp, &new_mapping, GFP_ATOMIC); if (!new_data) { bnxt_abort_tpa(cpr, idx, agg_bufs); - cpr->sw_stats.rx.rx_oom_discards += 1; + cpr->bnapi->cp_ring.sw_stats.rx.rx_oom_discards += 1; return NULL; } @@ -1804,7 +1804,7 @@ static inline struct sk_buff *bnxt_tpa_end(struct bnxt *bp, if (!skb) { skb_free_frag(data); bnxt_abort_tpa(cpr, idx, agg_bufs); - cpr->sw_stats.rx.rx_oom_discards += 1; + cpr->bnapi->cp_ring.sw_stats.rx.rx_oom_discards += 1; return NULL; } skb_reserve(skb, bp->rx_offset); @@ -1815,7 +1815,7 @@ static inline struct sk_buff *bnxt_tpa_end(struct bnxt *bp, skb = bnxt_rx_agg_pages_skb(bp, cpr, skb, idx, agg_bufs, true); if (!skb) { /* Page reuse already handled by bnxt_rx_pages(). */ - cpr->sw_stats.rx.rx_oom_discards += 1; + cpr->bnapi->cp_ring.sw_stats.rx.rx_oom_discards += 1; return NULL; } } @@ -2094,11 +2094,8 @@ static int bnxt_rx_pkt(struct bnxt *bp, struct bnxt_cp_ring_info *cpr, u32 frag_len = bnxt_rx_agg_pages_xdp(bp, cpr, &xdp, cp_cons, agg_bufs, false); - if (!frag_len) { - cpr->sw_stats.rx.rx_oom_discards += 1; - rc = -ENOMEM; - goto next_rx; - } + if (!frag_len) + goto oom_next_rx; } xdp_active = true; } @@ -2121,9 +2118,7 @@ static int bnxt_rx_pkt(struct bnxt *bp, struct bnxt_cp_ring_info *cpr, else bnxt_xdp_buff_frags_free(rxr, &xdp); } - cpr->sw_stats.rx.rx_oom_discards += 1; - rc = -ENOMEM; - goto next_rx; + goto oom_next_rx; } } else { u32 payload; @@ -2134,29 +2129,21 @@ static int bnxt_rx_pkt(struct bnxt *bp, struct bnxt_cp_ring_info *cpr, payload = 0; skb = bp->rx_skb_func(bp, rxr, cons, data, data_ptr, dma_addr, payload | len); - if (!skb) { - cpr->sw_stats.rx.rx_oom_discards += 1; - rc = -ENOMEM; - goto next_rx; - } + if (!skb) + goto oom_next_rx; } if (agg_bufs) { if (!xdp_active) { skb = bnxt_rx_agg_pages_skb(bp, cpr, skb, cp_cons, agg_bufs, false); - if (!skb) { - cpr->sw_stats.rx.rx_oom_discards += 1; - rc = -ENOMEM; - goto next_rx; - } + if (!skb) + goto oom_next_rx; } else { skb = bnxt_xdp_build_skb(bp, skb, agg_bufs, rxr->page_pool, &xdp, rxcmp1); if (!skb) { /* we should be able to free the old skb here */ bnxt_xdp_buff_frags_free(rxr, &xdp); - cpr->sw_stats.rx.rx_oom_discards += 1; - rc = -ENOMEM; - goto next_rx; + goto oom_next_rx; } } } @@ -2234,6 +2221,11 @@ static int bnxt_rx_pkt(struct bnxt *bp, struct bnxt_cp_ring_info *cpr, *raw_cons = tmp_raw_cons; return rc; + +oom_next_rx: + cpr->bnapi->cp_ring.sw_stats.rx.rx_oom_discards += 1; + rc = -ENOMEM; + goto next_rx; } /* In netpoll mode, if we are using a combined completion ring, we need to @@ -2280,7 +2272,7 @@ static int bnxt_force_rx_discard(struct bnxt *bp, } rc = bnxt_rx_pkt(bp, cpr, raw_cons, event); if (rc && rc != -EBUSY) - cpr->sw_stats.rx.rx_netpoll_discards += 1; + cpr->bnapi->cp_ring.sw_stats.rx.rx_netpoll_discards += 1; return rc; } From d7f3040a565214a30e2f07dc9b91566d316e2d36 Mon Sep 17 00:00:00 2001 From: Michael Heimpold Date: Tue, 16 Apr 2024 21:06:58 +0200 Subject: [PATCH 367/606] ARM: dts: imx6ull-tarragon: fix USB over-current polarity Our Tarragon platform uses a active-low signal to inform the i.MX6ULL about the over-current detection. Fixes: 5e4f393ccbf0 ("ARM: dts: imx6ull: Add chargebyte Tarragon support") Signed-off-by: Michael Heimpold Signed-off-by: Stefan Wahren Signed-off-by: Shawn Guo --- arch/arm/boot/dts/nxp/imx/imx6ull-tarragon-common.dtsi | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/arm/boot/dts/nxp/imx/imx6ull-tarragon-common.dtsi b/arch/arm/boot/dts/nxp/imx/imx6ull-tarragon-common.dtsi index 3fdece5bd31f9..5248a058230c8 100644 --- a/arch/arm/boot/dts/nxp/imx/imx6ull-tarragon-common.dtsi +++ b/arch/arm/boot/dts/nxp/imx/imx6ull-tarragon-common.dtsi @@ -805,6 +805,7 @@ &pinctrl_usb_pwr>; dr_mode = "host"; power-active-high; + over-current-active-low; disable-over-current; status = "okay"; }; From 8e30abc9ace4f0add4cd761dfdbfaebae5632dd2 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Wed, 24 Apr 2024 20:45:01 +0200 Subject: [PATCH 368/606] netfilter: nf_tables: honor table dormant flag from netdev release event path Check for table dormant flag otherwise netdev release event path tries to unregister an already unregistered hook. [524854.857999] ------------[ cut here ]------------ [524854.858010] WARNING: CPU: 0 PID: 3386599 at net/netfilter/core.c:501 __nf_unregister_net_hook+0x21a/0x260 [...] [524854.858848] CPU: 0 PID: 3386599 Comm: kworker/u32:2 Not tainted 6.9.0-rc3+ #365 [524854.858869] Workqueue: netns cleanup_net [524854.858886] RIP: 0010:__nf_unregister_net_hook+0x21a/0x260 [524854.858903] Code: 24 e8 aa 73 83 ff 48 63 43 1c 83 f8 01 0f 85 3d ff ff ff e8 98 d1 f0 ff 48 8b 3c 24 e8 8f 73 83 ff 48 63 43 1c e9 26 ff ff ff <0f> 0b 48 83 c4 18 48 c7 c7 00 68 e9 82 5b 5d 41 5c 41 5d 41 5e 41 [524854.858914] RSP: 0018:ffff8881e36d79e0 EFLAGS: 00010246 [524854.858926] RAX: 0000000000000000 RBX: ffff8881339ae790 RCX: ffffffff81ba524a [524854.858936] RDX: dffffc0000000000 RSI: 0000000000000008 RDI: ffff8881c8a16438 [524854.858945] RBP: ffff8881c8a16438 R08: 0000000000000001 R09: ffffed103c6daf34 [524854.858954] R10: ffff8881e36d79a7 R11: 0000000000000000 R12: 0000000000000005 [524854.858962] R13: ffff8881c8a16000 R14: 0000000000000000 R15: ffff8881351b5a00 [524854.858971] FS: 0000000000000000(0000) GS:ffff888390800000(0000) knlGS:0000000000000000 [524854.858982] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [524854.858991] CR2: 00007fc9be0f16f4 CR3: 00000001437cc004 CR4: 00000000001706f0 [524854.859000] Call Trace: [524854.859006] [524854.859013] ? __warn+0x9f/0x1a0 [524854.859027] ? __nf_unregister_net_hook+0x21a/0x260 [524854.859044] ? report_bug+0x1b1/0x1e0 [524854.859060] ? handle_bug+0x3c/0x70 [524854.859071] ? exc_invalid_op+0x17/0x40 [524854.859083] ? asm_exc_invalid_op+0x1a/0x20 [524854.859100] ? __nf_unregister_net_hook+0x6a/0x260 [524854.859116] ? __nf_unregister_net_hook+0x21a/0x260 [524854.859135] nf_tables_netdev_event+0x337/0x390 [nf_tables] [524854.859304] ? __pfx_nf_tables_netdev_event+0x10/0x10 [nf_tables] [524854.859461] ? packet_notifier+0xb3/0x360 [524854.859476] ? _raw_spin_unlock_irqrestore+0x11/0x40 [524854.859489] ? dcbnl_netdevice_event+0x35/0x140 [524854.859507] ? __pfx_nf_tables_netdev_event+0x10/0x10 [nf_tables] [524854.859661] notifier_call_chain+0x7d/0x140 [524854.859677] unregister_netdevice_many_notify+0x5e1/0xae0 Fixes: d54725cd11a5 ("netfilter: nf_tables: support for multiple devices per netdev hook") Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nft_chain_filter.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/net/netfilter/nft_chain_filter.c b/net/netfilter/nft_chain_filter.c index 274b6f7e6bb57..d170758a1eb5d 100644 --- a/net/netfilter/nft_chain_filter.c +++ b/net/netfilter/nft_chain_filter.c @@ -338,7 +338,9 @@ static void nft_netdev_event(unsigned long event, struct net_device *dev, return; if (n > 1) { - nf_unregister_net_hook(ctx->net, &found->ops); + if (!(ctx->chain->table->flags & NFT_TABLE_F_DORMANT)) + nf_unregister_net_hook(ctx->net, &found->ops); + list_del_rcu(&found->list); kfree_rcu(found, rcu); return; From 6c9cd59dbcb09a2122b5ce0dfc07c74e6fc00dc0 Mon Sep 17 00:00:00 2001 From: MD Danish Anwar Date: Tue, 23 Apr 2024 14:18:28 +0530 Subject: [PATCH 369/606] net: phy: dp83869: Fix MII mode failure The DP83869 driver sets the MII bit (needed for PHY to work in MII mode) only if the op-mode is either DP83869_100M_MEDIA_CONVERT or DP83869_RGMII_100_BASE. Some drivers i.e. ICSSG support MII mode with op-mode as DP83869_RGMII_COPPER_ETHERNET for which the MII bit is not set in dp83869 driver. As a result MII mode on ICSSG doesn't work and below log is seen. TI DP83869 300b2400.mdio:0f: selected op-mode is not valid with MII mode icssg-prueth icssg1-eth: couldn't connect to phy ethernet-phy@0 icssg-prueth icssg1-eth: can't phy connect port MII0 Fix this by setting MII bit for DP83869_RGMII_COPPER_ETHERNET op-mode as well. Fixes: 94e86ef1b801 ("net: phy: dp83869: support mii mode when rgmii strap cfg is used") Signed-off-by: MD Danish Anwar Reviewed-by: Ravi Gunasekaran Signed-off-by: David S. Miller --- drivers/net/phy/dp83869.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/phy/dp83869.c b/drivers/net/phy/dp83869.c index fa8c6fdcf3018..d7aaefb5226b6 100644 --- a/drivers/net/phy/dp83869.c +++ b/drivers/net/phy/dp83869.c @@ -695,7 +695,8 @@ static int dp83869_configure_mode(struct phy_device *phydev, phy_ctrl_val = dp83869->mode; if (phydev->interface == PHY_INTERFACE_MODE_MII) { if (dp83869->mode == DP83869_100M_MEDIA_CONVERT || - dp83869->mode == DP83869_RGMII_100_BASE) { + dp83869->mode == DP83869_RGMII_100_BASE || + dp83869->mode == DP83869_RGMII_COPPER_ETHERNET) { phy_ctrl_val |= DP83869_OP_MODE_MII; } else { phydev_err(phydev, "selected op-mode is not valid with MII mode\n"); From c26591afd33adce296c022e3480dea4282b7ef91 Mon Sep 17 00:00:00 2001 From: Guanrui Huang Date: Thu, 18 Apr 2024 14:10:52 +0800 Subject: [PATCH 370/606] irqchip/gic-v3-its: Prevent double free on error The error handling path in its_vpe_irq_domain_alloc() causes a double free when its_vpe_init() fails after successfully allocating at least one interrupt. This happens because its_vpe_irq_domain_free() frees the interrupts along with the area bitmap and the vprop_page and its_vpe_irq_domain_alloc() subsequently frees the area bitmap and the vprop_page again. Fix this by unconditionally invoking its_vpe_irq_domain_free() which handles all cases correctly and by removing the bitmap/vprop_page freeing from its_vpe_irq_domain_alloc(). [ tglx: Massaged change log ] Fixes: 7d75bbb4bc1a ("irqchip/gic-v3-its: Add VPE irq domain allocation/teardown") Signed-off-by: Guanrui Huang Signed-off-by: Thomas Gleixner Reviewed-by: Marc Zyngier Reviewed-by: Zenghui Yu Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20240418061053.96803-2-guanrui.huang@linux.alibaba.com --- drivers/irqchip/irq-gic-v3-its.c | 9 ++------- 1 file changed, 2 insertions(+), 7 deletions(-) diff --git a/drivers/irqchip/irq-gic-v3-its.c b/drivers/irqchip/irq-gic-v3-its.c index 2a537cbfcb077..5f7d3db3afd82 100644 --- a/drivers/irqchip/irq-gic-v3-its.c +++ b/drivers/irqchip/irq-gic-v3-its.c @@ -4567,13 +4567,8 @@ static int its_vpe_irq_domain_alloc(struct irq_domain *domain, unsigned int virq irqd_set_resend_when_in_progress(irq_get_irq_data(virq + i)); } - if (err) { - if (i > 0) - its_vpe_irq_domain_free(domain, virq, i); - - its_lpi_free(bitmap, base, nr_ids); - its_free_prop_table(vprop_page); - } + if (err) + its_vpe_irq_domain_free(domain, virq, i); return err; } From fe42754b94a42d08cf9501790afc25c4f6a5f631 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Fri, 19 Apr 2024 17:05:54 -0700 Subject: [PATCH 371/606] cpu: Re-enable CPU mitigations by default for !X86 architectures Rename x86's to CPU_MITIGATIONS, define it in generic code, and force it on for all architectures exception x86. A recent commit to turn mitigations off by default if SPECULATION_MITIGATIONS=n kinda sorta missed that "cpu_mitigations" is completely generic, whereas SPECULATION_MITIGATIONS is x86-specific. Rename x86's SPECULATIVE_MITIGATIONS instead of keeping both and have it select CPU_MITIGATIONS, as having two configs for the same thing is unnecessary and confusing. This will also allow x86 to use the knob to manage mitigations that aren't strictly related to speculative execution. Use another Kconfig to communicate to common code that CPU_MITIGATIONS is already defined instead of having x86's menu depend on the common CPU_MITIGATIONS. This allows keeping a single point of contact for all of x86's mitigations, and it's not clear that other architectures *want* to allow disabling mitigations at compile-time. Fixes: f337a6a21e2f ("x86/cpu: Actually turn off mitigations by default for SPECULATION_MITIGATIONS=n") Closes: https://lkml.kernel.org/r/20240413115324.53303a68%40canb.auug.org.au Reported-by: Stephen Rothwell Reported-by: Michael Ellerman Reported-by: Geert Uytterhoeven Signed-off-by: Sean Christopherson Signed-off-by: Borislav Petkov (AMD) Acked-by: Josh Poimboeuf Acked-by: Borislav Petkov (AMD) Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20240420000556.2645001-2-seanjc@google.com --- arch/Kconfig | 8 ++++++++ arch/x86/Kconfig | 11 ++++++----- kernel/cpu.c | 4 ++-- 3 files changed, 16 insertions(+), 7 deletions(-) diff --git a/arch/Kconfig b/arch/Kconfig index 65afb1de48b36..30f7930275d83 100644 --- a/arch/Kconfig +++ b/arch/Kconfig @@ -9,6 +9,14 @@ # source "arch/$(SRCARCH)/Kconfig" +config ARCH_CONFIGURES_CPU_MITIGATIONS + bool + +if !ARCH_CONFIGURES_CPU_MITIGATIONS +config CPU_MITIGATIONS + def_bool y +endif + menu "General architecture-dependent options" config ARCH_HAS_SUBPAGE_FAULTS diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 4474bf32d0a49..619a04d5c1310 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -62,6 +62,7 @@ config X86 select ACPI_HOTPLUG_CPU if ACPI_PROCESSOR && HOTPLUG_CPU select ARCH_32BIT_OFF_T if X86_32 select ARCH_CLOCKSOURCE_INIT + select ARCH_CONFIGURES_CPU_MITIGATIONS select ARCH_CORRECT_STACKTRACE_ON_KRETPROBE select ARCH_ENABLE_HUGEPAGE_MIGRATION if X86_64 && HUGETLB_PAGE && MIGRATION select ARCH_ENABLE_MEMORY_HOTPLUG if X86_64 @@ -2488,17 +2489,17 @@ config PREFIX_SYMBOLS def_bool y depends on CALL_PADDING && !CFI_CLANG -menuconfig SPECULATION_MITIGATIONS - bool "Mitigations for speculative execution vulnerabilities" +menuconfig CPU_MITIGATIONS + bool "Mitigations for CPU vulnerabilities" default y help - Say Y here to enable options which enable mitigations for - speculative execution hardware vulnerabilities. + Say Y here to enable options which enable mitigations for hardware + vulnerabilities (usually related to speculative execution). If you say N, all mitigations will be disabled. You really should know what you are doing to say so. -if SPECULATION_MITIGATIONS +if CPU_MITIGATIONS config MITIGATION_PAGE_TABLE_ISOLATION bool "Remove the kernel mapping in user mode" diff --git a/kernel/cpu.c b/kernel/cpu.c index 07ad53b7f1195..bb0ff275fb467 100644 --- a/kernel/cpu.c +++ b/kernel/cpu.c @@ -3207,8 +3207,8 @@ enum cpu_mitigations { }; static enum cpu_mitigations cpu_mitigations __ro_after_init = - IS_ENABLED(CONFIG_SPECULATION_MITIGATIONS) ? CPU_MITIGATIONS_AUTO : - CPU_MITIGATIONS_OFF; + IS_ENABLED(CONFIG_CPU_MITIGATIONS) ? CPU_MITIGATIONS_AUTO : + CPU_MITIGATIONS_OFF; static int __init mitigations_parse_cmdline(char *arg) { From ce0abef6a1d540acef85068e0e82bdf1fbeeb0e9 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Fri, 19 Apr 2024 17:05:55 -0700 Subject: [PATCH 372/606] cpu: Ignore "mitigations" kernel parameter if CPU_MITIGATIONS=n Explicitly disallow enabling mitigations at runtime for kernels that were built with CONFIG_CPU_MITIGATIONS=n, as some architectures may omit code entirely if mitigations are disabled at compile time. E.g. on x86, a large pile of Kconfigs are buried behind CPU_MITIGATIONS, and trying to provide sane behavior for retroactively enabling mitigations is extremely difficult, bordering on impossible. E.g. page table isolation and call depth tracking require build-time support, BHI mitigations will still be off without additional kernel parameters, etc. [ bp: Touchups. ] Signed-off-by: Sean Christopherson Signed-off-by: Borislav Petkov (AMD) Acked-by: Borislav Petkov (AMD) Link: https://lore.kernel.org/r/20240420000556.2645001-3-seanjc@google.com --- Documentation/admin-guide/kernel-parameters.txt | 3 +++ arch/x86/Kconfig | 8 ++++++-- include/linux/cpu.h | 11 +++++++++++ kernel/cpu.c | 14 ++++++++++---- 4 files changed, 30 insertions(+), 6 deletions(-) diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt index 902ecd92a29fb..213d0719e2b7a 100644 --- a/Documentation/admin-guide/kernel-parameters.txt +++ b/Documentation/admin-guide/kernel-parameters.txt @@ -3423,6 +3423,9 @@ arch-independent options, each of which is an aggregation of existing arch-specific options. + Note, "mitigations" is supported if and only if the + kernel was built with CPU_MITIGATIONS=y. + off Disable all optional CPU mitigations. This improves system performance, but it may also diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 619a04d5c1310..928820e61cb50 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -2495,9 +2495,13 @@ menuconfig CPU_MITIGATIONS help Say Y here to enable options which enable mitigations for hardware vulnerabilities (usually related to speculative execution). + Mitigations can be disabled or restricted to SMT systems at runtime + via the "mitigations" kernel parameter. - If you say N, all mitigations will be disabled. You really - should know what you are doing to say so. + If you say N, all mitigations will be disabled. This CANNOT be + overridden at runtime. + + Say 'Y', unless you really know what you are doing. if CPU_MITIGATIONS diff --git a/include/linux/cpu.h b/include/linux/cpu.h index 272e4e79e15c4..861c3bfc5f170 100644 --- a/include/linux/cpu.h +++ b/include/linux/cpu.h @@ -221,7 +221,18 @@ void cpuhp_report_idle_dead(void); static inline void cpuhp_report_idle_dead(void) { } #endif /* #ifdef CONFIG_HOTPLUG_CPU */ +#ifdef CONFIG_CPU_MITIGATIONS extern bool cpu_mitigations_off(void); extern bool cpu_mitigations_auto_nosmt(void); +#else +static inline bool cpu_mitigations_off(void) +{ + return true; +} +static inline bool cpu_mitigations_auto_nosmt(void) +{ + return false; +} +#endif #endif /* _LINUX_CPU_H_ */ diff --git a/kernel/cpu.c b/kernel/cpu.c index bb0ff275fb467..63447eb85dab6 100644 --- a/kernel/cpu.c +++ b/kernel/cpu.c @@ -3196,6 +3196,7 @@ void __init boot_cpu_hotplug_init(void) this_cpu_write(cpuhp_state.target, CPUHP_ONLINE); } +#ifdef CONFIG_CPU_MITIGATIONS /* * These are used for a global "mitigations=" cmdline option for toggling * optional CPU mitigations. @@ -3206,9 +3207,7 @@ enum cpu_mitigations { CPU_MITIGATIONS_AUTO_NOSMT, }; -static enum cpu_mitigations cpu_mitigations __ro_after_init = - IS_ENABLED(CONFIG_CPU_MITIGATIONS) ? CPU_MITIGATIONS_AUTO : - CPU_MITIGATIONS_OFF; +static enum cpu_mitigations cpu_mitigations __ro_after_init = CPU_MITIGATIONS_AUTO; static int __init mitigations_parse_cmdline(char *arg) { @@ -3224,7 +3223,6 @@ static int __init mitigations_parse_cmdline(char *arg) return 0; } -early_param("mitigations", mitigations_parse_cmdline); /* mitigations=off */ bool cpu_mitigations_off(void) @@ -3239,3 +3237,11 @@ bool cpu_mitigations_auto_nosmt(void) return cpu_mitigations == CPU_MITIGATIONS_AUTO_NOSMT; } EXPORT_SYMBOL_GPL(cpu_mitigations_auto_nosmt); +#else +static int __init mitigations_parse_cmdline(char *arg) +{ + pr_crit("Kernel compiled without mitigations, ignoring 'mitigations'; system may still be vulnerable\n"); + return 0; +} +#endif +early_param("mitigations", mitigations_parse_cmdline); From f3334ebb8a2a1841c2824594dd992e66de19deb2 Mon Sep 17 00:00:00 2001 From: Bibo Mao Date: Thu, 25 Apr 2024 22:17:52 +0800 Subject: [PATCH 373/606] LoongArch: Lately init pmu after smp is online There is an smp function call named reset_counters() to init PMU registers of every CPU in PMU initialization state. It requires that all CPUs are online. However there is an early_initcall() wrapper for the PMU init funciton init_hw_perf_events(), so that pmu init funciton is called in do_pre_smp_initcalls() which before function smp_init(). Function reset_counters() cannot work on other CPUs since they haven't boot up still. Here replace the wrapper early_initcall() with pure_initcall(), so that the PMU init function is called after every cpu is online. Signed-off-by: Bibo Mao Signed-off-by: Huacai Chen --- arch/loongarch/kernel/perf_event.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/loongarch/kernel/perf_event.c b/arch/loongarch/kernel/perf_event.c index 0491bf453cd49..cac7cba81b65f 100644 --- a/arch/loongarch/kernel/perf_event.c +++ b/arch/loongarch/kernel/perf_event.c @@ -884,4 +884,4 @@ static int __init init_hw_perf_events(void) return 0; } -early_initcall(init_hw_perf_events); +pure_initcall(init_hw_perf_events); From 9af503d91298c3f2945e73703f0e00995be08c30 Mon Sep 17 00:00:00 2001 From: Dominique Martinet Date: Fri, 19 Apr 2024 11:22:48 +0900 Subject: [PATCH 374/606] btrfs: add missing mutex_unlock in btrfs_relocate_sys_chunks() The previous patch that replaced BUG_ON by error handling forgot to unlock the mutex in the error path. Link: https://lore.kernel.org/all/Zh%2fHpAGFqa7YAFuM@duo.ucw.cz Reported-by: Pavel Machek Fixes: 7411055db5ce ("btrfs: handle chunk tree lookup error in btrfs_relocate_sys_chunks()") CC: stable@vger.kernel.org Reviewed-by: Pavel Machek Signed-off-by: Dominique Martinet Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/volumes.c | 1 + 1 file changed, 1 insertion(+) diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index dedec3d9b1117..c72c351fe7eb6 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -3419,6 +3419,7 @@ static int btrfs_relocate_sys_chunks(struct btrfs_fs_info *fs_info) * alignment and size). */ ret = -EUCLEAN; + mutex_unlock(&fs_info->reclaim_bgs_lock); goto error; } From 0f2b8098d72a93890e69aa24ec549ef4bc34f4db Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Fri, 19 Apr 2024 14:38:48 -0400 Subject: [PATCH 375/606] btrfs: take the cleaner_mutex earlier in qgroup disable One of my CI runs popped the following lockdep splat ====================================================== WARNING: possible circular locking dependency detected 6.9.0-rc4+ #1 Not tainted ------------------------------------------------------ btrfs/471533 is trying to acquire lock: ffff92ba46980850 (&fs_info->cleaner_mutex){+.+.}-{3:3}, at: btrfs_quota_disable+0x54/0x4c0 but task is already holding lock: ffff92ba46980bd0 (&fs_info->subvol_sem){++++}-{3:3}, at: btrfs_ioctl+0x1c8f/0x2600 which lock already depends on the new lock. the existing dependency chain (in reverse order) is: -> #2 (&fs_info->subvol_sem){++++}-{3:3}: down_read+0x42/0x170 btrfs_rename+0x607/0xb00 btrfs_rename2+0x2e/0x70 vfs_rename+0xaf8/0xfc0 do_renameat2+0x586/0x600 __x64_sys_rename+0x43/0x50 do_syscall_64+0x95/0x180 entry_SYSCALL_64_after_hwframe+0x76/0x7e -> #1 (&sb->s_type->i_mutex_key#16){++++}-{3:3}: down_write+0x3f/0xc0 btrfs_inode_lock+0x40/0x70 prealloc_file_extent_cluster+0x1b0/0x370 relocate_file_extent_cluster+0xb2/0x720 relocate_data_extent+0x107/0x160 relocate_block_group+0x442/0x550 btrfs_relocate_block_group+0x2cb/0x4b0 btrfs_relocate_chunk+0x50/0x1b0 btrfs_balance+0x92f/0x13d0 btrfs_ioctl+0x1abf/0x2600 __x64_sys_ioctl+0x97/0xd0 do_syscall_64+0x95/0x180 entry_SYSCALL_64_after_hwframe+0x76/0x7e -> #0 (&fs_info->cleaner_mutex){+.+.}-{3:3}: __lock_acquire+0x13e7/0x2180 lock_acquire+0xcb/0x2e0 __mutex_lock+0xbe/0xc00 btrfs_quota_disable+0x54/0x4c0 btrfs_ioctl+0x206b/0x2600 __x64_sys_ioctl+0x97/0xd0 do_syscall_64+0x95/0x180 entry_SYSCALL_64_after_hwframe+0x76/0x7e other info that might help us debug this: Chain exists of: &fs_info->cleaner_mutex --> &sb->s_type->i_mutex_key#16 --> &fs_info->subvol_sem Possible unsafe locking scenario: CPU0 CPU1 ---- ---- lock(&fs_info->subvol_sem); lock(&sb->s_type->i_mutex_key#16); lock(&fs_info->subvol_sem); lock(&fs_info->cleaner_mutex); *** DEADLOCK *** 2 locks held by btrfs/471533: #0: ffff92ba4319e420 (sb_writers#14){.+.+}-{0:0}, at: btrfs_ioctl+0x3b5/0x2600 #1: ffff92ba46980bd0 (&fs_info->subvol_sem){++++}-{3:3}, at: btrfs_ioctl+0x1c8f/0x2600 stack backtrace: CPU: 1 PID: 471533 Comm: btrfs Kdump: loaded Not tainted 6.9.0-rc4+ #1 Call Trace: dump_stack_lvl+0x77/0xb0 check_noncircular+0x148/0x160 ? lock_acquire+0xcb/0x2e0 __lock_acquire+0x13e7/0x2180 lock_acquire+0xcb/0x2e0 ? btrfs_quota_disable+0x54/0x4c0 ? lock_is_held_type+0x9a/0x110 __mutex_lock+0xbe/0xc00 ? btrfs_quota_disable+0x54/0x4c0 ? srso_return_thunk+0x5/0x5f ? lock_acquire+0xcb/0x2e0 ? btrfs_quota_disable+0x54/0x4c0 ? btrfs_quota_disable+0x54/0x4c0 btrfs_quota_disable+0x54/0x4c0 btrfs_ioctl+0x206b/0x2600 ? srso_return_thunk+0x5/0x5f ? __do_sys_statfs+0x61/0x70 __x64_sys_ioctl+0x97/0xd0 do_syscall_64+0x95/0x180 ? srso_return_thunk+0x5/0x5f ? reacquire_held_locks+0xd1/0x1f0 ? do_user_addr_fault+0x307/0x8a0 ? srso_return_thunk+0x5/0x5f ? lock_acquire+0xcb/0x2e0 ? srso_return_thunk+0x5/0x5f ? srso_return_thunk+0x5/0x5f ? find_held_lock+0x2b/0x80 ? srso_return_thunk+0x5/0x5f ? lock_release+0xca/0x2a0 ? srso_return_thunk+0x5/0x5f ? do_user_addr_fault+0x35c/0x8a0 ? srso_return_thunk+0x5/0x5f ? trace_hardirqs_off+0x4b/0xc0 ? srso_return_thunk+0x5/0x5f ? lockdep_hardirqs_on_prepare+0xde/0x190 ? srso_return_thunk+0x5/0x5f This happens because when we call rename we already have the inode mutex held, and then we acquire the subvol_sem if we are a subvolume. This makes the dependency inode lock -> subvol sem When we're running data relocation we will preallocate space for the data relocation inode, and we always run the relocation under the ->cleaner_mutex. This now creates the dependency of cleaner_mutex -> inode lock (from the prealloc) -> subvol_sem Qgroup delete is doing this in the opposite order, it is acquiring the subvol_sem and then it is acquiring the cleaner_mutex, which results in this lockdep splat. This deadlock can't happen in reality, because we won't ever rename the data reloc inode, nor is the data reloc inode a subvolume. However this is fairly easy to fix, simply take the cleaner mutex in the case where we are disabling qgroups before we take the subvol_sem. This resolves the lockdep splat. Reviewed-by: Filipe Manana Signed-off-by: Josef Bacik Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/ioctl.c | 33 ++++++++++++++++++++++++++++++--- fs/btrfs/qgroup.c | 21 ++++++++------------- 2 files changed, 38 insertions(+), 16 deletions(-) diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index 5a507237c4fa3..12ede85018bf0 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -3758,15 +3758,43 @@ static long btrfs_ioctl_quota_ctl(struct file *file, void __user *arg) goto drop_write; } - down_write(&fs_info->subvol_sem); - switch (sa->cmd) { case BTRFS_QUOTA_CTL_ENABLE: case BTRFS_QUOTA_CTL_ENABLE_SIMPLE_QUOTA: + down_write(&fs_info->subvol_sem); ret = btrfs_quota_enable(fs_info, sa); + up_write(&fs_info->subvol_sem); break; case BTRFS_QUOTA_CTL_DISABLE: + /* + * Lock the cleaner mutex to prevent races with concurrent + * relocation, because relocation may be building backrefs for + * blocks of the quota root while we are deleting the root. This + * is like dropping fs roots of deleted snapshots/subvolumes, we + * need the same protection. + * + * This also prevents races between concurrent tasks trying to + * disable quotas, because we will unlock and relock + * qgroup_ioctl_lock across BTRFS_FS_QUOTA_ENABLED changes. + * + * We take this here because we have the dependency of + * + * inode_lock -> subvol_sem + * + * because of rename. With relocation we can prealloc extents, + * so that makes the dependency chain + * + * cleaner_mutex -> inode_lock -> subvol_sem + * + * so we must take the cleaner_mutex here before we take the + * subvol_sem. The deadlock can't actually happen, but this + * quiets lockdep. + */ + mutex_lock(&fs_info->cleaner_mutex); + down_write(&fs_info->subvol_sem); ret = btrfs_quota_disable(fs_info); + up_write(&fs_info->subvol_sem); + mutex_unlock(&fs_info->cleaner_mutex); break; default: ret = -EINVAL; @@ -3774,7 +3802,6 @@ static long btrfs_ioctl_quota_ctl(struct file *file, void __user *arg) } kfree(sa); - up_write(&fs_info->subvol_sem); drop_write: mnt_drop_write_file(file); return ret; diff --git a/fs/btrfs/qgroup.c b/fs/btrfs/qgroup.c index cf8820ce7aa29..364acc9bbe730 100644 --- a/fs/btrfs/qgroup.c +++ b/fs/btrfs/qgroup.c @@ -1342,16 +1342,10 @@ int btrfs_quota_disable(struct btrfs_fs_info *fs_info) lockdep_assert_held_write(&fs_info->subvol_sem); /* - * Lock the cleaner mutex to prevent races with concurrent relocation, - * because relocation may be building backrefs for blocks of the quota - * root while we are deleting the root. This is like dropping fs roots - * of deleted snapshots/subvolumes, we need the same protection. - * - * This also prevents races between concurrent tasks trying to disable - * quotas, because we will unlock and relock qgroup_ioctl_lock across - * BTRFS_FS_QUOTA_ENABLED changes. + * Relocation will mess with backrefs, so make sure we have the + * cleaner_mutex held to protect us from relocate. */ - mutex_lock(&fs_info->cleaner_mutex); + lockdep_assert_held(&fs_info->cleaner_mutex); mutex_lock(&fs_info->qgroup_ioctl_lock); if (!fs_info->quota_root) @@ -1373,9 +1367,13 @@ int btrfs_quota_disable(struct btrfs_fs_info *fs_info) clear_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags); btrfs_qgroup_wait_for_completion(fs_info, false); + /* + * We have nothing held here and no trans handle, just return the error + * if there is one. + */ ret = flush_reservations(fs_info); if (ret) - goto out_unlock_cleaner; + return ret; /* * 1 For the root item @@ -1439,9 +1437,6 @@ int btrfs_quota_disable(struct btrfs_fs_info *fs_info) btrfs_end_transaction(trans); else if (trans) ret = btrfs_commit_transaction(trans); -out_unlock_cleaner: - mutex_unlock(&fs_info->cleaner_mutex); - return ret; } From d7a5c9de99b3a9a43dce49f2084eb69b5f6a9752 Mon Sep 17 00:00:00 2001 From: Derek Foreman Date: Mon, 18 Mar 2024 07:32:07 -0500 Subject: [PATCH 376/606] drm/etnaviv: fix tx clock gating on some GC7000 variants commit 4bce244272513 ("drm/etnaviv: disable tx clock gating for GC7000 rev6203") accidentally applied the fix for i.MX8MN errata ERR050226 to GC2000 instead of GC7000, failing to disable tx clock gating for GC7000 rev 0x6023 as intended. Additional clean-up further propagated this issue, partially breaking the clock gating fixes added for GC7000 rev 6202 in commit 432f51e7deeda ("drm/etnaviv: add clock gating workaround for GC7000 r6202"). Signed-off-by: Derek Foreman Signed-off-by: Lucas Stach --- drivers/gpu/drm/etnaviv/etnaviv_gpu.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/etnaviv/etnaviv_gpu.c b/drivers/gpu/drm/etnaviv/etnaviv_gpu.c index 734412aae94dd..6f763038c21a5 100644 --- a/drivers/gpu/drm/etnaviv/etnaviv_gpu.c +++ b/drivers/gpu/drm/etnaviv/etnaviv_gpu.c @@ -663,8 +663,8 @@ static void etnaviv_gpu_enable_mlcg(struct etnaviv_gpu *gpu) /* Disable TX clock gating on affected core revisions. */ if (etnaviv_is_model_rev(gpu, GC4000, 0x5222) || etnaviv_is_model_rev(gpu, GC2000, 0x5108) || - etnaviv_is_model_rev(gpu, GC2000, 0x6202) || - etnaviv_is_model_rev(gpu, GC2000, 0x6203)) + etnaviv_is_model_rev(gpu, GC7000, 0x6202) || + etnaviv_is_model_rev(gpu, GC7000, 0x6203)) pmc |= VIVS_PM_MODULE_CONTROLS_DISABLE_MODULE_CLOCK_GATING_TX; /* Disable SE and RA clock gating on affected core revisions. */ From e877d705704d7c8fe17b6b5ebdfdb14b84c207a7 Mon Sep 17 00:00:00 2001 From: Christian Gmeiner Date: Sat, 20 Apr 2024 15:41:58 +0200 Subject: [PATCH 377/606] Revert "drm/etnaviv: Expose a few more chipspecs to userspace" This reverts commit 1dccdba084897443d116508a8ed71e0ac8a031a4. In userspace a different approach was choosen - hwdb. As a result, there is no need for these values. Signed-off-by: Christian Gmeiner Reviewed-by: Tomeu Vizoso Signed-off-by: Lucas Stach --- drivers/gpu/drm/etnaviv/etnaviv_gpu.c | 20 --------------- drivers/gpu/drm/etnaviv/etnaviv_gpu.h | 12 --------- drivers/gpu/drm/etnaviv/etnaviv_hwdb.c | 34 -------------------------- include/uapi/drm/etnaviv_drm.h | 5 ---- 4 files changed, 71 deletions(-) diff --git a/drivers/gpu/drm/etnaviv/etnaviv_gpu.c b/drivers/gpu/drm/etnaviv/etnaviv_gpu.c index 6f763038c21a5..a9bf426f69b36 100644 --- a/drivers/gpu/drm/etnaviv/etnaviv_gpu.c +++ b/drivers/gpu/drm/etnaviv/etnaviv_gpu.c @@ -164,26 +164,6 @@ int etnaviv_gpu_get_param(struct etnaviv_gpu *gpu, u32 param, u64 *value) *value = gpu->identity.eco_id; break; - case ETNAVIV_PARAM_GPU_NN_CORE_COUNT: - *value = gpu->identity.nn_core_count; - break; - - case ETNAVIV_PARAM_GPU_NN_MAD_PER_CORE: - *value = gpu->identity.nn_mad_per_core; - break; - - case ETNAVIV_PARAM_GPU_TP_CORE_COUNT: - *value = gpu->identity.tp_core_count; - break; - - case ETNAVIV_PARAM_GPU_ON_CHIP_SRAM_SIZE: - *value = gpu->identity.on_chip_sram_size; - break; - - case ETNAVIV_PARAM_GPU_AXI_SRAM_SIZE: - *value = gpu->identity.axi_sram_size; - break; - default: DBG("%s: invalid param: %u", dev_name(gpu->dev), param); return -EINVAL; diff --git a/drivers/gpu/drm/etnaviv/etnaviv_gpu.h b/drivers/gpu/drm/etnaviv/etnaviv_gpu.h index 7d5e9158e13c1..197e0037732ec 100644 --- a/drivers/gpu/drm/etnaviv/etnaviv_gpu.h +++ b/drivers/gpu/drm/etnaviv/etnaviv_gpu.h @@ -54,18 +54,6 @@ struct etnaviv_chip_identity { /* Number of Neural Network cores. */ u32 nn_core_count; - /* Number of MAD units per Neural Network core. */ - u32 nn_mad_per_core; - - /* Number of Tensor Processing cores. */ - u32 tp_core_count; - - /* Size in bytes of the SRAM inside the NPU. */ - u32 on_chip_sram_size; - - /* Size in bytes of the SRAM across the AXI bus. */ - u32 axi_sram_size; - /* Size of the vertex cache. */ u32 vertex_cache_size; diff --git a/drivers/gpu/drm/etnaviv/etnaviv_hwdb.c b/drivers/gpu/drm/etnaviv/etnaviv_hwdb.c index d8e7334de8cea..8665f2658d51b 100644 --- a/drivers/gpu/drm/etnaviv/etnaviv_hwdb.c +++ b/drivers/gpu/drm/etnaviv/etnaviv_hwdb.c @@ -17,10 +17,6 @@ static const struct etnaviv_chip_identity etnaviv_chip_identities[] = { .thread_count = 128, .shader_core_count = 1, .nn_core_count = 0, - .nn_mad_per_core = 0, - .tp_core_count = 0, - .on_chip_sram_size = 0, - .axi_sram_size = 0, .vertex_cache_size = 8, .vertex_output_buffer_size = 1024, .pixel_pipes = 1, @@ -52,11 +48,6 @@ static const struct etnaviv_chip_identity etnaviv_chip_identities[] = { .register_max = 64, .thread_count = 256, .shader_core_count = 1, - .nn_core_count = 0, - .nn_mad_per_core = 0, - .tp_core_count = 0, - .on_chip_sram_size = 0, - .axi_sram_size = 0, .vertex_cache_size = 8, .vertex_output_buffer_size = 512, .pixel_pipes = 1, @@ -89,10 +80,6 @@ static const struct etnaviv_chip_identity etnaviv_chip_identities[] = { .thread_count = 512, .shader_core_count = 2, .nn_core_count = 0, - .nn_mad_per_core = 0, - .tp_core_count = 0, - .on_chip_sram_size = 0, - .axi_sram_size = 0, .vertex_cache_size = 16, .vertex_output_buffer_size = 1024, .pixel_pipes = 1, @@ -125,10 +112,6 @@ static const struct etnaviv_chip_identity etnaviv_chip_identities[] = { .thread_count = 512, .shader_core_count = 2, .nn_core_count = 0, - .nn_mad_per_core = 0, - .tp_core_count = 0, - .on_chip_sram_size = 0, - .axi_sram_size = 0, .vertex_cache_size = 16, .vertex_output_buffer_size = 1024, .pixel_pipes = 1, @@ -160,11 +143,6 @@ static const struct etnaviv_chip_identity etnaviv_chip_identities[] = { .register_max = 64, .thread_count = 512, .shader_core_count = 2, - .nn_core_count = 0, - .nn_mad_per_core = 0, - .tp_core_count = 0, - .on_chip_sram_size = 0, - .axi_sram_size = 0, .vertex_cache_size = 16, .vertex_output_buffer_size = 1024, .pixel_pipes = 1, @@ -197,10 +175,6 @@ static const struct etnaviv_chip_identity etnaviv_chip_identities[] = { .thread_count = 1024, .shader_core_count = 4, .nn_core_count = 0, - .nn_mad_per_core = 0, - .tp_core_count = 0, - .on_chip_sram_size = 0, - .axi_sram_size = 0, .vertex_cache_size = 16, .vertex_output_buffer_size = 1024, .pixel_pipes = 2, @@ -233,10 +207,6 @@ static const struct etnaviv_chip_identity etnaviv_chip_identities[] = { .thread_count = 256, .shader_core_count = 1, .nn_core_count = 8, - .nn_mad_per_core = 64, - .tp_core_count = 4, - .on_chip_sram_size = 524288, - .axi_sram_size = 1048576, .vertex_cache_size = 16, .vertex_output_buffer_size = 1024, .pixel_pipes = 1, @@ -269,10 +239,6 @@ static const struct etnaviv_chip_identity etnaviv_chip_identities[] = { .thread_count = 256, .shader_core_count = 1, .nn_core_count = 6, - .nn_mad_per_core = 64, - .tp_core_count = 3, - .on_chip_sram_size = 262144, - .axi_sram_size = 0, .vertex_cache_size = 16, .vertex_output_buffer_size = 1024, .pixel_pipes = 1, diff --git a/include/uapi/drm/etnaviv_drm.h b/include/uapi/drm/etnaviv_drm.h index d87410a8443aa..af024d90453dd 100644 --- a/include/uapi/drm/etnaviv_drm.h +++ b/include/uapi/drm/etnaviv_drm.h @@ -77,11 +77,6 @@ struct drm_etnaviv_timespec { #define ETNAVIV_PARAM_GPU_PRODUCT_ID 0x1c #define ETNAVIV_PARAM_GPU_CUSTOMER_ID 0x1d #define ETNAVIV_PARAM_GPU_ECO_ID 0x1e -#define ETNAVIV_PARAM_GPU_NN_CORE_COUNT 0x1f -#define ETNAVIV_PARAM_GPU_NN_MAD_PER_CORE 0x20 -#define ETNAVIV_PARAM_GPU_TP_CORE_COUNT 0x21 -#define ETNAVIV_PARAM_GPU_ON_CHIP_SRAM_SIZE 0x22 -#define ETNAVIV_PARAM_GPU_AXI_SRAM_SIZE 0x23 #define ETNA_MAX_PIPES 4 From 475747a19316b08e856c666a20503e73d7ed67ed Mon Sep 17 00:00:00 2001 From: Rahul Rameshbabu Date: Tue, 23 Apr 2024 11:13:02 -0700 Subject: [PATCH 378/606] macsec: Enable devices to advertise whether they update sk_buff md_dst during offloads Cannot know whether a Rx skb missing md_dst is intended for MACsec or not without knowing whether the device is able to update this field during an offload. Assume that an offload to a MACsec device cannot support updating md_dst by default. Capable devices can advertise that they do indicate that an skb is related to a MACsec offloaded packet using the md_dst. Cc: Sabrina Dubroca Cc: stable@vger.kernel.org Fixes: 860ead89b851 ("net/macsec: Add MACsec skb_metadata_dst Rx Data path support") Signed-off-by: Rahul Rameshbabu Reviewed-by: Benjamin Poirier Reviewed-by: Cosmin Ratiu Reviewed-by: Sabrina Dubroca Link: https://lore.kernel.org/r/20240423181319.115860-2-rrameshbabu@nvidia.com Signed-off-by: Jakub Kicinski --- include/net/macsec.h | 2 ++ 1 file changed, 2 insertions(+) diff --git a/include/net/macsec.h b/include/net/macsec.h index dbd22180cc5c3..de216cbc6b059 100644 --- a/include/net/macsec.h +++ b/include/net/macsec.h @@ -321,6 +321,7 @@ struct macsec_context { * for the TX tag * @needed_tailroom: number of bytes reserved at the end of the sk_buff for the * TX tag + * @rx_uses_md_dst: whether MACsec device offload supports sk_buff md_dst */ struct macsec_ops { /* Device wide */ @@ -352,6 +353,7 @@ struct macsec_ops { struct sk_buff *skb); unsigned int needed_headroom; unsigned int needed_tailroom; + bool rx_uses_md_dst; }; void macsec_pn_wrapped(struct macsec_secy *secy, struct macsec_tx_sa *tx_sa); From 6e159fd653d7ebf6290358e0330a0cb8a75cf73b Mon Sep 17 00:00:00 2001 From: Rahul Rameshbabu Date: Tue, 23 Apr 2024 11:13:03 -0700 Subject: [PATCH 379/606] ethernet: Add helper for assigning packet type when dest address does not match device address Enable reuse of logic in eth_type_trans for determining packet type. Suggested-by: Sabrina Dubroca Cc: stable@vger.kernel.org Signed-off-by: Rahul Rameshbabu Reviewed-by: Sabrina Dubroca Link: https://lore.kernel.org/r/20240423181319.115860-3-rrameshbabu@nvidia.com Signed-off-by: Jakub Kicinski --- include/linux/etherdevice.h | 25 +++++++++++++++++++++++++ net/ethernet/eth.c | 12 +----------- 2 files changed, 26 insertions(+), 11 deletions(-) diff --git a/include/linux/etherdevice.h b/include/linux/etherdevice.h index 224645f17c333..297231854ada5 100644 --- a/include/linux/etherdevice.h +++ b/include/linux/etherdevice.h @@ -607,6 +607,31 @@ static inline void eth_hw_addr_gen(struct net_device *dev, const u8 *base_addr, eth_hw_addr_set(dev, addr); } +/** + * eth_skb_pkt_type - Assign packet type if destination address does not match + * @skb: Assigned a packet type if address does not match @dev address + * @dev: Network device used to compare packet address against + * + * If the destination MAC address of the packet does not match the network + * device address, assign an appropriate packet type. + */ +static inline void eth_skb_pkt_type(struct sk_buff *skb, + const struct net_device *dev) +{ + const struct ethhdr *eth = eth_hdr(skb); + + if (unlikely(!ether_addr_equal_64bits(eth->h_dest, dev->dev_addr))) { + if (unlikely(is_multicast_ether_addr_64bits(eth->h_dest))) { + if (ether_addr_equal_64bits(eth->h_dest, dev->broadcast)) + skb->pkt_type = PACKET_BROADCAST; + else + skb->pkt_type = PACKET_MULTICAST; + } else { + skb->pkt_type = PACKET_OTHERHOST; + } + } +} + /** * eth_skb_pad - Pad buffer to mininum number of octets for Ethernet frame * @skb: Buffer to pad diff --git a/net/ethernet/eth.c b/net/ethernet/eth.c index 2edc8b796a4e7..049c3adeb8504 100644 --- a/net/ethernet/eth.c +++ b/net/ethernet/eth.c @@ -164,17 +164,7 @@ __be16 eth_type_trans(struct sk_buff *skb, struct net_device *dev) eth = (struct ethhdr *)skb->data; skb_pull_inline(skb, ETH_HLEN); - if (unlikely(!ether_addr_equal_64bits(eth->h_dest, - dev->dev_addr))) { - if (unlikely(is_multicast_ether_addr_64bits(eth->h_dest))) { - if (ether_addr_equal_64bits(eth->h_dest, dev->broadcast)) - skb->pkt_type = PACKET_BROADCAST; - else - skb->pkt_type = PACKET_MULTICAST; - } else { - skb->pkt_type = PACKET_OTHERHOST; - } - } + eth_skb_pkt_type(skb, dev); /* * Some variants of DSA tagging don't have an ethertype field From 642c984dd0e37dbaec9f87bd1211e5fac1f142bf Mon Sep 17 00:00:00 2001 From: Rahul Rameshbabu Date: Tue, 23 Apr 2024 11:13:04 -0700 Subject: [PATCH 380/606] macsec: Detect if Rx skb is macsec-related for offloading devices that update md_dst Can now correctly identify where the packets should be delivered by using md_dst or its absence on devices that provide it. This detection is not possible without device drivers that update md_dst. A fallback pattern should be used for supporting such device drivers. This fallback mode causes multicast messages to be cloned to both the non-macsec and macsec ports, independent of whether the multicast message received was encrypted over MACsec or not. Other non-macsec traffic may also fail to be handled correctly for devices in promiscuous mode. Link: https://lore.kernel.org/netdev/ZULRxX9eIbFiVi7v@hog/ Cc: Sabrina Dubroca Cc: stable@vger.kernel.org Fixes: 860ead89b851 ("net/macsec: Add MACsec skb_metadata_dst Rx Data path support") Signed-off-by: Rahul Rameshbabu Reviewed-by: Benjamin Poirier Reviewed-by: Cosmin Ratiu Reviewed-by: Sabrina Dubroca Link: https://lore.kernel.org/r/20240423181319.115860-4-rrameshbabu@nvidia.com Signed-off-by: Jakub Kicinski --- drivers/net/macsec.c | 46 ++++++++++++++++++++++++++++++++++---------- 1 file changed, 36 insertions(+), 10 deletions(-) diff --git a/drivers/net/macsec.c b/drivers/net/macsec.c index 0206b84284ab5..ff016c11b4a03 100644 --- a/drivers/net/macsec.c +++ b/drivers/net/macsec.c @@ -999,10 +999,12 @@ static enum rx_handler_result handle_not_macsec(struct sk_buff *skb) struct metadata_dst *md_dst; struct macsec_rxh_data *rxd; struct macsec_dev *macsec; + bool is_macsec_md_dst; rcu_read_lock(); rxd = macsec_data_rcu(skb->dev); md_dst = skb_metadata_dst(skb); + is_macsec_md_dst = md_dst && md_dst->type == METADATA_MACSEC; list_for_each_entry_rcu(macsec, &rxd->secys, secys) { struct sk_buff *nskb; @@ -1013,14 +1015,42 @@ static enum rx_handler_result handle_not_macsec(struct sk_buff *skb) * the SecTAG, so we have to deduce which port to deliver to. */ if (macsec_is_offloaded(macsec) && netif_running(ndev)) { - struct macsec_rx_sc *rx_sc = NULL; + const struct macsec_ops *ops; - if (md_dst && md_dst->type == METADATA_MACSEC) - rx_sc = find_rx_sc(&macsec->secy, md_dst->u.macsec_info.sci); + ops = macsec_get_ops(macsec, NULL); - if (md_dst && md_dst->type == METADATA_MACSEC && !rx_sc) + if (ops->rx_uses_md_dst && !is_macsec_md_dst) continue; + if (is_macsec_md_dst) { + struct macsec_rx_sc *rx_sc; + + /* All drivers that implement MACsec offload + * support using skb metadata destinations must + * indicate that they do so. + */ + DEBUG_NET_WARN_ON_ONCE(!ops->rx_uses_md_dst); + rx_sc = find_rx_sc(&macsec->secy, + md_dst->u.macsec_info.sci); + if (!rx_sc) + continue; + /* device indicated macsec offload occurred */ + skb->dev = ndev; + skb->pkt_type = PACKET_HOST; + eth_skb_pkt_type(skb, ndev); + ret = RX_HANDLER_ANOTHER; + goto out; + } + + /* This datapath is insecure because it is unable to + * enforce isolation of broadcast/multicast traffic and + * unicast traffic with promiscuous mode on the macsec + * netdev. Since the core stack has no mechanism to + * check that the hardware did indeed receive MACsec + * traffic, it is possible that the response handling + * done by the MACsec port was to a plaintext packet. + * This violates the MACsec protocol standard. + */ if (ether_addr_equal_64bits(hdr->h_dest, ndev->dev_addr)) { /* exact match, divert skb to this port */ @@ -1036,14 +1066,10 @@ static enum rx_handler_result handle_not_macsec(struct sk_buff *skb) break; nskb->dev = ndev; - if (ether_addr_equal_64bits(hdr->h_dest, - ndev->broadcast)) - nskb->pkt_type = PACKET_BROADCAST; - else - nskb->pkt_type = PACKET_MULTICAST; + eth_skb_pkt_type(nskb, ndev); __netif_rx(nskb); - } else if (rx_sc || ndev->flags & IFF_PROMISC) { + } else if (ndev->flags & IFF_PROMISC) { skb->dev = ndev; skb->pkt_type = PACKET_HOST; ret = RX_HANDLER_ANOTHER; From 39d26a8f2efcb8b5665fe7d54a7dba306a8f1dff Mon Sep 17 00:00:00 2001 From: Rahul Rameshbabu Date: Tue, 23 Apr 2024 11:13:05 -0700 Subject: [PATCH 381/606] net/mlx5e: Advertise mlx5 ethernet driver updates sk_buff md_dst for MACsec mlx5 Rx flow steering and CQE handling enable the driver to be able to update an skb's md_dst attribute as MACsec when MACsec traffic arrives when a device is configured for offloading. Advertise this to the core stack to take advantage of this capability. Cc: stable@vger.kernel.org Fixes: b7c9400cbc48 ("net/mlx5e: Implement MACsec Rx data path using MACsec skb_metadata_dst") Signed-off-by: Rahul Rameshbabu Reviewed-by: Benjamin Poirier Reviewed-by: Cosmin Ratiu Reviewed-by: Sabrina Dubroca Link: https://lore.kernel.org/r/20240423181319.115860-5-rrameshbabu@nvidia.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/mellanox/mlx5/core/en_accel/macsec.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/macsec.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/macsec.c index b2cabd6ab86cb..cc9bcc4200324 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/macsec.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/macsec.c @@ -1640,6 +1640,7 @@ static const struct macsec_ops macsec_offload_ops = { .mdo_add_secy = mlx5e_macsec_add_secy, .mdo_upd_secy = mlx5e_macsec_upd_secy, .mdo_del_secy = mlx5e_macsec_del_secy, + .rx_uses_md_dst = true, }; bool mlx5e_macsec_handle_tx_skb(struct mlx5e_macsec *macsec, struct sk_buff *skb) From 4dcd0e83ea1d1df9b2e0174a6d3e795b3477d64e Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Tue, 23 Apr 2024 19:15:22 +0300 Subject: [PATCH 382/606] net: ti: icssg-prueth: Fix signedness bug in prueth_init_rx_chns() The rx_chn->irq[] array is unsigned int but it should be signed for the error handling to work. Also if k3_udma_glue_rx_get_irq() returns zero then we should return -ENXIO instead of success. Fixes: 128d5874c082 ("net: ti: icssg-prueth: Add ICSSG ethernet driver") Signed-off-by: Dan Carpenter Reviewed-by: Roger Quadros Reviewed-by: MD Danish Anwar Link: https://lore.kernel.org/r/05282415-e7f4-42f3-99f8-32fde8f30936@moroto.mountain Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/ti/icssg/icssg_prueth.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/ti/icssg/icssg_prueth.c b/drivers/net/ethernet/ti/icssg/icssg_prueth.c index cf7b73f8f4507..b69af69a1ccd3 100644 --- a/drivers/net/ethernet/ti/icssg/icssg_prueth.c +++ b/drivers/net/ethernet/ti/icssg/icssg_prueth.c @@ -421,12 +421,14 @@ static int prueth_init_rx_chns(struct prueth_emac *emac, if (!i) fdqring_id = k3_udma_glue_rx_flow_get_fdq_id(rx_chn->rx_chn, i); - rx_chn->irq[i] = k3_udma_glue_rx_get_irq(rx_chn->rx_chn, i); - if (rx_chn->irq[i] <= 0) { - ret = rx_chn->irq[i]; + ret = k3_udma_glue_rx_get_irq(rx_chn->rx_chn, i); + if (ret <= 0) { + if (!ret) + ret = -ENXIO; netdev_err(ndev, "Failed to get rx dma irq"); goto fail; } + rx_chn->irq[i] = ret; } return 0; From 2cc7d150550cc981aceedf008f5459193282425c Mon Sep 17 00:00:00 2001 From: Sindhu Devale Date: Tue, 23 Apr 2024 11:27:17 -0700 Subject: [PATCH 383/606] i40e: Do not use WQ_MEM_RECLAIM flag for workqueue Issue reported by customer during SRIOV testing, call trace: When both i40e and the i40iw driver are loaded, a warning in check_flush_dependency is being triggered. This seems to be because of the i40e driver workqueue is allocated with the WQ_MEM_RECLAIM flag, and the i40iw one is not. Similar error was encountered on ice too and it was fixed by removing the flag. Do the same for i40e too. [Feb 9 09:08] ------------[ cut here ]------------ [ +0.000004] workqueue: WQ_MEM_RECLAIM i40e:i40e_service_task [i40e] is flushing !WQ_MEM_RECLAIM infiniband:0x0 [ +0.000060] WARNING: CPU: 0 PID: 937 at kernel/workqueue.c:2966 check_flush_dependency+0x10b/0x120 [ +0.000007] Modules linked in: snd_seq_dummy snd_hrtimer snd_seq snd_timer snd_seq_device snd soundcore nls_utf8 cifs cifs_arc4 nls_ucs2_utils rdma_cm iw_cm ib_cm cifs_md4 dns_resolver netfs qrtr rfkill sunrpc vfat fat intel_rapl_msr intel_rapl_common irdma intel_uncore_frequency intel_uncore_frequency_common ice ipmi_ssif isst_if_common skx_edac nfit libnvdimm x86_pkg_temp_thermal intel_powerclamp gnss coretemp ib_uverbs rapl intel_cstate ib_core iTCO_wdt iTCO_vendor_support acpi_ipmi mei_me ipmi_si intel_uncore ioatdma i2c_i801 joydev pcspkr mei ipmi_devintf lpc_ich intel_pch_thermal i2c_smbus ipmi_msghandler acpi_power_meter acpi_pad xfs libcrc32c ast sd_mod drm_shmem_helper t10_pi drm_kms_helper sg ixgbe drm i40e ahci crct10dif_pclmul libahci crc32_pclmul igb crc32c_intel libata ghash_clmulni_intel i2c_algo_bit mdio dca wmi dm_mirror dm_region_hash dm_log dm_mod fuse [ +0.000050] CPU: 0 PID: 937 Comm: kworker/0:3 Kdump: loaded Not tainted 6.8.0-rc2-Feb-net_dev-Qiueue-00279-gbd43c5687e05 #1 [ +0.000003] Hardware name: Intel Corporation S2600BPB/S2600BPB, BIOS SE5C620.86B.02.01.0013.121520200651 12/15/2020 [ +0.000001] Workqueue: i40e i40e_service_task [i40e] [ +0.000024] RIP: 0010:check_flush_dependency+0x10b/0x120 [ +0.000003] Code: ff 49 8b 54 24 18 48 8d 8b b0 00 00 00 49 89 e8 48 81 c6 b0 00 00 00 48 c7 c7 b0 97 fa 9f c6 05 8a cc 1f 02 01 e8 35 b3 fd ff <0f> 0b e9 10 ff ff ff 80 3d 78 cc 1f 02 00 75 94 e9 46 ff ff ff 90 [ +0.000002] RSP: 0018:ffffbd294976bcf8 EFLAGS: 00010282 [ +0.000002] RAX: 0000000000000000 RBX: ffff94d4c483c000 RCX: 0000000000000027 [ +0.000001] RDX: ffff94d47f620bc8 RSI: 0000000000000001 RDI: ffff94d47f620bc0 [ +0.000001] RBP: 0000000000000000 R08: 0000000000000000 R09: 00000000ffff7fff [ +0.000001] R10: ffffbd294976bb98 R11: ffffffffa0be65e8 R12: ffff94c5451ea180 [ +0.000001] R13: ffff94c5ab5e8000 R14: ffff94c5c20b6e05 R15: ffff94c5f1330ab0 [ +0.000001] FS: 0000000000000000(0000) GS:ffff94d47f600000(0000) knlGS:0000000000000000 [ +0.000002] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ +0.000001] CR2: 00007f9e6f1fca70 CR3: 0000000038e20004 CR4: 00000000007706f0 [ +0.000000] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 [ +0.000001] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 [ +0.000001] PKRU: 55555554 [ +0.000001] Call Trace: [ +0.000001] [ +0.000002] ? __warn+0x80/0x130 [ +0.000003] ? check_flush_dependency+0x10b/0x120 [ +0.000002] ? report_bug+0x195/0x1a0 [ +0.000005] ? handle_bug+0x3c/0x70 [ +0.000003] ? exc_invalid_op+0x14/0x70 [ +0.000002] ? asm_exc_invalid_op+0x16/0x20 [ +0.000006] ? check_flush_dependency+0x10b/0x120 [ +0.000002] ? check_flush_dependency+0x10b/0x120 [ +0.000002] __flush_workqueue+0x126/0x3f0 [ +0.000015] ib_cache_cleanup_one+0x1c/0xe0 [ib_core] [ +0.000056] __ib_unregister_device+0x6a/0xb0 [ib_core] [ +0.000023] ib_unregister_device_and_put+0x34/0x50 [ib_core] [ +0.000020] i40iw_close+0x4b/0x90 [irdma] [ +0.000022] i40e_notify_client_of_netdev_close+0x54/0xc0 [i40e] [ +0.000035] i40e_service_task+0x126/0x190 [i40e] [ +0.000024] process_one_work+0x174/0x340 [ +0.000003] worker_thread+0x27e/0x390 [ +0.000001] ? __pfx_worker_thread+0x10/0x10 [ +0.000002] kthread+0xdf/0x110 [ +0.000002] ? __pfx_kthread+0x10/0x10 [ +0.000002] ret_from_fork+0x2d/0x50 [ +0.000003] ? __pfx_kthread+0x10/0x10 [ +0.000001] ret_from_fork_asm+0x1b/0x30 [ +0.000004] [ +0.000001] ---[ end trace 0000000000000000 ]--- Fixes: 4d5957cbdecd ("i40e: remove WQ_UNBOUND and the task limit of our workqueue") Signed-off-by: Sindhu Devale Reviewed-by: Arkadiusz Kubalewski Reviewed-by: Mateusz Polchlopek Signed-off-by: Aleksandr Loktionov Tested-by: Robert Ganzynkowicz Signed-off-by: Tony Nguyen Link: https://lore.kernel.org/r/20240423182723.740401-2-anthony.l.nguyen@intel.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/intel/i40e/i40e_main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c index 48b9ddb2b1b38..1792491d8d2dd 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_main.c +++ b/drivers/net/ethernet/intel/i40e/i40e_main.c @@ -16650,7 +16650,7 @@ static int __init i40e_init_module(void) * since we need to be able to guarantee forward progress even under * memory pressure. */ - i40e_wq = alloc_workqueue("%s", WQ_MEM_RECLAIM, 0, i40e_driver_name); + i40e_wq = alloc_workqueue("%s", 0, 0, i40e_driver_name); if (!i40e_wq) { pr_err("%s: Failed to create workqueue\n", i40e_driver_name); return -ENOMEM; From ef3c313119ea448c22da10366faa26b5b4b1a18e Mon Sep 17 00:00:00 2001 From: Erwan Velu Date: Tue, 23 Apr 2024 11:27:18 -0700 Subject: [PATCH 384/606] i40e: Report MFS in decimal base instead of hex If the MFS is set below the default (0x2600), a warning message is reported like the following : MFS for port 1 has been set below the default: 600 This message is a bit confusing as the number shown here (600) is in fact an hexa number: 0x600 = 1536 Without any explicit "0x" prefix, this message is read like the MFS is set to 600 bytes. MFS, as per MTUs, are usually expressed in decimal base. This commit reports both current and default MFS values in decimal so it's less confusing for end-users. A typical warning message looks like the following : MFS for port 1 (1536) has been set below the default (9728) Signed-off-by: Erwan Velu Reviewed-by: Simon Horman Tested-by: Tony Brelinski Signed-off-by: Tony Nguyen Fixes: 3a2c6ced90e1 ("i40e: Add a check to see if MFS is set") Link: https://lore.kernel.org/r/20240423182723.740401-3-anthony.l.nguyen@intel.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/intel/i40e/i40e_main.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c index 1792491d8d2dd..ffb9f9f15c523 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_main.c +++ b/drivers/net/ethernet/intel/i40e/i40e_main.c @@ -16107,8 +16107,8 @@ static int i40e_probe(struct pci_dev *pdev, const struct pci_device_id *ent) val = FIELD_GET(I40E_PRTGL_SAH_MFS_MASK, rd32(&pf->hw, I40E_PRTGL_SAH)); if (val < MAX_FRAME_SIZE_DEFAULT) - dev_warn(&pdev->dev, "MFS for port %x has been set below the default: %x\n", - pf->hw.port, val); + dev_warn(&pdev->dev, "MFS for port %x (%d) has been set below the default (%d)\n", + pf->hw.port, val, MAX_FRAME_SIZE_DEFAULT); /* Add a filter to drop all Flow control frames from any VSI from being * transmitted. By doing so we stop a malicious VF from sending out From 54976cf58d6168b8d15cebb395069f23b2f34b31 Mon Sep 17 00:00:00 2001 From: Sudheer Mogilappagari Date: Tue, 23 Apr 2024 11:27:19 -0700 Subject: [PATCH 385/606] iavf: Fix TC config comparison with existing adapter TC config Same number of TCs doesn't imply that underlying TC configs are same. The config could be different due to difference in number of queues in each TC. Add utility function to determine if TC configs are same. Fixes: d5b33d024496 ("i40evf: add ndo_setup_tc callback to i40evf") Signed-off-by: Sudheer Mogilappagari Tested-by: Mineri Bhange (A Contingent Worker at Intel) Signed-off-by: Tony Nguyen Link: https://lore.kernel.org/r/20240423182723.740401-4-anthony.l.nguyen@intel.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/intel/iavf/iavf_main.c | 30 ++++++++++++++++++++- 1 file changed, 29 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/intel/iavf/iavf_main.c b/drivers/net/ethernet/intel/iavf/iavf_main.c index ef2440f3abf8b..166832a4213a2 100644 --- a/drivers/net/ethernet/intel/iavf/iavf_main.c +++ b/drivers/net/ethernet/intel/iavf/iavf_main.c @@ -3502,6 +3502,34 @@ static void iavf_del_all_cloud_filters(struct iavf_adapter *adapter) spin_unlock_bh(&adapter->cloud_filter_list_lock); } +/** + * iavf_is_tc_config_same - Compare the mqprio TC config with the + * TC config already configured on this adapter. + * @adapter: board private structure + * @mqprio_qopt: TC config received from kernel. + * + * This function compares the TC config received from the kernel + * with the config already configured on the adapter. + * + * Return: True if configuration is same, false otherwise. + **/ +static bool iavf_is_tc_config_same(struct iavf_adapter *adapter, + struct tc_mqprio_qopt *mqprio_qopt) +{ + struct virtchnl_channel_info *ch = &adapter->ch_config.ch_info[0]; + int i; + + if (adapter->num_tc != mqprio_qopt->num_tc) + return false; + + for (i = 0; i < adapter->num_tc; i++) { + if (ch[i].count != mqprio_qopt->count[i] || + ch[i].offset != mqprio_qopt->offset[i]) + return false; + } + return true; +} + /** * __iavf_setup_tc - configure multiple traffic classes * @netdev: network interface device structure @@ -3559,7 +3587,7 @@ static int __iavf_setup_tc(struct net_device *netdev, void *type_data) if (ret) return ret; /* Return if same TC config is requested */ - if (adapter->num_tc == num_tc) + if (iavf_is_tc_config_same(adapter, &mqprio_qopt->qopt)) return 0; adapter->num_tc = num_tc; From 96fdd1f6b4ed72a741fb0eb705c0e13049b8721f Mon Sep 17 00:00:00 2001 From: Jacob Keller Date: Tue, 23 Apr 2024 11:27:20 -0700 Subject: [PATCH 386/606] ice: fix LAG and VF lock dependency in ice_reset_vf() 9f74a3dfcf83 ("ice: Fix VF Reset paths when interface in a failed over aggregate"), the ice driver has acquired the LAG mutex in ice_reset_vf(). The commit placed this lock acquisition just prior to the acquisition of the VF configuration lock. If ice_reset_vf() acquires the configuration lock via the ICE_VF_RESET_LOCK flag, this could deadlock with ice_vc_cfg_qs_msg() because it always acquires the locks in the order of the VF configuration lock and then the LAG mutex. Lockdep reports this violation almost immediately on creating and then removing 2 VF: ====================================================== WARNING: possible circular locking dependency detected 6.8.0-rc6 #54 Tainted: G W O ------------------------------------------------------ kworker/60:3/6771 is trying to acquire lock: ff40d43e099380a0 (&vf->cfg_lock){+.+.}-{3:3}, at: ice_reset_vf+0x22f/0x4d0 [ice] but task is already holding lock: ff40d43ea1961210 (&pf->lag_mutex){+.+.}-{3:3}, at: ice_reset_vf+0xb7/0x4d0 [ice] which lock already depends on the new lock. the existing dependency chain (in reverse order) is: -> #1 (&pf->lag_mutex){+.+.}-{3:3}: __lock_acquire+0x4f8/0xb40 lock_acquire+0xd4/0x2d0 __mutex_lock+0x9b/0xbf0 ice_vc_cfg_qs_msg+0x45/0x690 [ice] ice_vc_process_vf_msg+0x4f5/0x870 [ice] __ice_clean_ctrlq+0x2b5/0x600 [ice] ice_service_task+0x2c9/0x480 [ice] process_one_work+0x1e9/0x4d0 worker_thread+0x1e1/0x3d0 kthread+0x104/0x140 ret_from_fork+0x31/0x50 ret_from_fork_asm+0x1b/0x30 -> #0 (&vf->cfg_lock){+.+.}-{3:3}: check_prev_add+0xe2/0xc50 validate_chain+0x558/0x800 __lock_acquire+0x4f8/0xb40 lock_acquire+0xd4/0x2d0 __mutex_lock+0x9b/0xbf0 ice_reset_vf+0x22f/0x4d0 [ice] ice_process_vflr_event+0x98/0xd0 [ice] ice_service_task+0x1cc/0x480 [ice] process_one_work+0x1e9/0x4d0 worker_thread+0x1e1/0x3d0 kthread+0x104/0x140 ret_from_fork+0x31/0x50 ret_from_fork_asm+0x1b/0x30 other info that might help us debug this: Possible unsafe locking scenario: CPU0 CPU1 ---- ---- lock(&pf->lag_mutex); lock(&vf->cfg_lock); lock(&pf->lag_mutex); lock(&vf->cfg_lock); *** DEADLOCK *** 4 locks held by kworker/60:3/6771: #0: ff40d43e05428b38 ((wq_completion)ice){+.+.}-{0:0}, at: process_one_work+0x176/0x4d0 #1: ff50d06e05197e58 ((work_completion)(&pf->serv_task)){+.+.}-{0:0}, at: process_one_work+0x176/0x4d0 #2: ff40d43ea1960e50 (&pf->vfs.table_lock){+.+.}-{3:3}, at: ice_process_vflr_event+0x48/0xd0 [ice] #3: ff40d43ea1961210 (&pf->lag_mutex){+.+.}-{3:3}, at: ice_reset_vf+0xb7/0x4d0 [ice] stack backtrace: CPU: 60 PID: 6771 Comm: kworker/60:3 Tainted: G W O 6.8.0-rc6 #54 Hardware name: Workqueue: ice ice_service_task [ice] Call Trace: dump_stack_lvl+0x4a/0x80 check_noncircular+0x12d/0x150 check_prev_add+0xe2/0xc50 ? save_trace+0x59/0x230 ? add_chain_cache+0x109/0x450 validate_chain+0x558/0x800 __lock_acquire+0x4f8/0xb40 ? lockdep_hardirqs_on+0x7d/0x100 lock_acquire+0xd4/0x2d0 ? ice_reset_vf+0x22f/0x4d0 [ice] ? lock_is_held_type+0xc7/0x120 __mutex_lock+0x9b/0xbf0 ? ice_reset_vf+0x22f/0x4d0 [ice] ? ice_reset_vf+0x22f/0x4d0 [ice] ? rcu_is_watching+0x11/0x50 ? ice_reset_vf+0x22f/0x4d0 [ice] ice_reset_vf+0x22f/0x4d0 [ice] ? process_one_work+0x176/0x4d0 ice_process_vflr_event+0x98/0xd0 [ice] ice_service_task+0x1cc/0x480 [ice] process_one_work+0x1e9/0x4d0 worker_thread+0x1e1/0x3d0 ? __pfx_worker_thread+0x10/0x10 kthread+0x104/0x140 ? __pfx_kthread+0x10/0x10 ret_from_fork+0x31/0x50 ? __pfx_kthread+0x10/0x10 ret_from_fork_asm+0x1b/0x30 To avoid deadlock, we must acquire the LAG mutex only after acquiring the VF configuration lock. Fix the ice_reset_vf() to acquire the LAG mutex only after we either acquire or check that the VF configuration lock is held. Fixes: 9f74a3dfcf83 ("ice: Fix VF Reset paths when interface in a failed over aggregate") Signed-off-by: Jacob Keller Reviewed-by: Dave Ertman Reviewed-by: Mateusz Polchlopek Tested-by: Przemek Kitszel Tested-by: Rafal Romanowski Signed-off-by: Tony Nguyen Link: https://lore.kernel.org/r/20240423182723.740401-5-anthony.l.nguyen@intel.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/intel/ice/ice_vf_lib.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/drivers/net/ethernet/intel/ice/ice_vf_lib.c b/drivers/net/ethernet/intel/ice/ice_vf_lib.c index 21d26e19338a6..d10a4be965b59 100644 --- a/drivers/net/ethernet/intel/ice/ice_vf_lib.c +++ b/drivers/net/ethernet/intel/ice/ice_vf_lib.c @@ -856,6 +856,11 @@ int ice_reset_vf(struct ice_vf *vf, u32 flags) return 0; } + if (flags & ICE_VF_RESET_LOCK) + mutex_lock(&vf->cfg_lock); + else + lockdep_assert_held(&vf->cfg_lock); + lag = pf->lag; mutex_lock(&pf->lag_mutex); if (lag && lag->bonded && lag->primary) { @@ -867,11 +872,6 @@ int ice_reset_vf(struct ice_vf *vf, u32 flags) act_prt = ICE_LAG_INVALID_PORT; } - if (flags & ICE_VF_RESET_LOCK) - mutex_lock(&vf->cfg_lock); - else - lockdep_assert_held(&vf->cfg_lock); - if (ice_is_vf_disabled(vf)) { vsi = ice_get_vf_vsi(vf); if (!vsi) { @@ -956,14 +956,14 @@ int ice_reset_vf(struct ice_vf *vf, u32 flags) ice_mbx_clear_malvf(&vf->mbx_info); out_unlock: - if (flags & ICE_VF_RESET_LOCK) - mutex_unlock(&vf->cfg_lock); - if (lag && lag->bonded && lag->primary && act_prt != ICE_LAG_INVALID_PORT) ice_lag_move_vf_nodes_cfg(lag, pri_prt, act_prt); mutex_unlock(&pf->lag_mutex); + if (flags & ICE_VF_RESET_LOCK) + mutex_unlock(&vf->cfg_lock); + return err; } From 1b9e743e923b256e353a9a644195372285e5a6c0 Mon Sep 17 00:00:00 2001 From: Jason Reeder Date: Wed, 24 Apr 2024 12:46:26 +0530 Subject: [PATCH 387/606] net: ethernet: ti: am65-cpts: Fix PTPv1 message type on TX packets The CPTS, by design, captures the messageType (Sync, Delay_Req, etc.) field from the second nibble of the PTP header which is defined in the PTPv2 (1588-2008) specification. In the PTPv1 (1588-2002) specification the first two bytes of the PTP header are defined as the versionType which is always 0x0001. This means that any PTPv1 packets that are tagged for TX timestamping by the CPTS will have their messageType set to 0x0 which corresponds to a Sync message type. This causes issues when a PTPv1 stack is expecting a Delay_Req (messageType: 0x1) timestamp that never appears. Fix this by checking if the ptp_class of the timestamped TX packet is PTP_CLASS_V1 and then matching the PTP sequence ID to the stored sequence ID in the skb->cb data structure. If the sequence IDs match and the packet is of type PTPv1 then there is a chance that the messageType has been incorrectly stored by the CPTS so overwrite the messageType stored by the CPTS with the messageType from the skb->cb data structure. This allows the PTPv1 stack to receive TX timestamps for Delay_Req packets which are necessary to lock onto a PTP Leader. Signed-off-by: Jason Reeder Signed-off-by: Ravi Gunasekaran Tested-by: Ed Trexel Fixes: f6bd59526ca5 ("net: ethernet: ti: introduce am654 common platform time sync driver") Link: https://lore.kernel.org/r/20240424071626.32558-1-r-gunasekaran@ti.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/ti/am65-cpts.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/net/ethernet/ti/am65-cpts.c b/drivers/net/ethernet/ti/am65-cpts.c index c66618d91c28f..f89716b1cfb64 100644 --- a/drivers/net/ethernet/ti/am65-cpts.c +++ b/drivers/net/ethernet/ti/am65-cpts.c @@ -784,6 +784,11 @@ static bool am65_cpts_match_tx_ts(struct am65_cpts *cpts, struct am65_cpts_skb_cb_data *skb_cb = (struct am65_cpts_skb_cb_data *)skb->cb; + if ((ptp_classify_raw(skb) & PTP_CLASS_V1) && + ((mtype_seqid & AM65_CPTS_EVENT_1_SEQUENCE_ID_MASK) == + (skb_cb->skb_mtype_seqid & AM65_CPTS_EVENT_1_SEQUENCE_ID_MASK))) + mtype_seqid = skb_cb->skb_mtype_seqid; + if (mtype_seqid == skb_cb->skb_mtype_seqid) { u64 ns = event->timestamp; From 6e965eba43e9724f3e603d7b7cc83e53b23d155e Mon Sep 17 00:00:00 2001 From: Su Hui Date: Wed, 24 Apr 2024 10:27:25 +0800 Subject: [PATCH 388/606] octeontx2-af: fix the double free in rvu_npc_freemem() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Clang static checker(scan-build) warning: drivers/net/ethernet/marvell/octeontx2/af/rvu_npc.c:line 2184, column 2 Attempt to free released memory. npc_mcam_rsrcs_deinit() has released 'mcam->counters.bmap'. Deleted this redundant kfree() to fix this double free problem. Fixes: dd7842878633 ("octeontx2-af: Add new devlink param to configure maximum usable NIX block LFs") Signed-off-by: Su Hui Reviewed-by: Geetha sowjanya Reviewed-by: Kalesh AP Reviewed-by: Hariprasad Kelam Link: https://lore.kernel.org/r/20240424022724.144587-1-suhui@nfschina.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/marvell/octeontx2/af/rvu_npc.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc.c index be709f83f3318..e8b73b9d75e31 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc.c +++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc.c @@ -2181,7 +2181,6 @@ void rvu_npc_freemem(struct rvu *rvu) kfree(pkind->rsrc.bmap); npc_mcam_rsrcs_deinit(rvu); - kfree(mcam->counters.bmap); if (rvu->kpu_prfl_addr) iounmap(rvu->kpu_prfl_addr); else From 0c81ea5a8e231fa120e3f76aa9ea99fa3950cc59 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Wed, 24 Apr 2024 09:45:21 +0200 Subject: [PATCH 389/606] net: ravb: Fix registered interrupt names MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit As interrupts are now requested from ravb_probe(), before calling register_netdev(), ndev->name still contains the template "eth%d", leading to funny names in /proc/interrupts. E.g. on R-Car E3: 89: 0 0 GICv2 93 Level eth%d:ch22:multi 90: 0 3 GICv2 95 Level eth%d:ch24:emac 91: 0 23484 GICv2 71 Level eth%d:ch0:rx_be 92: 0 0 GICv2 72 Level eth%d:ch1:rx_nc 93: 0 13735 GICv2 89 Level eth%d:ch18:tx_be 94: 0 0 GICv2 90 Level eth%d:ch19:tx_nc Worse, on platforms with multiple RAVB instances (e.g. R-Car V4H), all interrupts have similar names. Fix this by using the device name instead, like is done in several other drivers: 89: 0 0 GICv2 93 Level e6800000.ethernet:ch22:multi 90: 0 1 GICv2 95 Level e6800000.ethernet:ch24:emac 91: 0 28578 GICv2 71 Level e6800000.ethernet:ch0:rx_be 92: 0 0 GICv2 72 Level e6800000.ethernet:ch1:rx_nc 93: 0 14044 GICv2 89 Level e6800000.ethernet:ch18:tx_be 94: 0 0 GICv2 90 Level e6800000.ethernet:ch19:tx_nc Rename the local variable dev_name, as it shadows the dev_name() function, and pre-initialize it, to simplify the code. Fixes: 32f012b8c01ca9fd ("net: ravb: Move getting/requesting IRQs in the probe() method") Signed-off-by: Geert Uytterhoeven Reviewed-by: Niklas Söderlund Reviewed-by: Sergey Shtylyov Reviewed-by: Claudiu Beznea Tested-by: Claudiu Beznea # on RZ/G3S Link: https://lore.kernel.org/r/cde67b68adf115b3cf0b44c32334ae00b2fbb321.1713944647.git.geert+renesas@glider.be Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/renesas/ravb_main.c | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/drivers/net/ethernet/renesas/ravb_main.c b/drivers/net/ethernet/renesas/ravb_main.c index fcb756d77681c..9b1f639f64a10 100644 --- a/drivers/net/ethernet/renesas/ravb_main.c +++ b/drivers/net/ethernet/renesas/ravb_main.c @@ -2722,19 +2722,18 @@ static int ravb_setup_irq(struct ravb_private *priv, const char *irq_name, struct platform_device *pdev = priv->pdev; struct net_device *ndev = priv->ndev; struct device *dev = &pdev->dev; - const char *dev_name; + const char *devname = dev_name(dev); unsigned long flags; int error, irq_num; if (irq_name) { - dev_name = devm_kasprintf(dev, GFP_KERNEL, "%s:%s", ndev->name, ch); - if (!dev_name) + devname = devm_kasprintf(dev, GFP_KERNEL, "%s:%s", devname, ch); + if (!devname) return -ENOMEM; irq_num = platform_get_irq_byname(pdev, irq_name); flags = 0; } else { - dev_name = ndev->name; irq_num = platform_get_irq(pdev, 0); flags = IRQF_SHARED; } @@ -2744,9 +2743,9 @@ static int ravb_setup_irq(struct ravb_private *priv, const char *irq_name, if (irq) *irq = irq_num; - error = devm_request_irq(dev, irq_num, handler, flags, dev_name, ndev); + error = devm_request_irq(dev, irq_num, handler, flags, devname, ndev); if (error) - netdev_err(ndev, "cannot request IRQ %s\n", dev_name); + netdev_err(ndev, "cannot request IRQ %s\n", devname); return error; } From 38d7b94e81d068b8d8c8392f421cfd2c3bbfd1a6 Mon Sep 17 00:00:00 2001 From: Arkadiusz Kubalewski Date: Wed, 24 Apr 2024 12:16:36 +0200 Subject: [PATCH 390/606] dpll: fix dpll_pin_on_pin_register() for multiple parent pins In scenario where pin is registered with multiple parent pins via dpll_pin_on_pin_register(..), all belonging to the same dpll device. A second call to dpll_pin_on_pin_unregister(..) would cause a call trace, as it tries to use already released registration resources (due to fix introduced in b446631f355e). In this scenario pin was registered twice, so resources are not yet expected to be release until each registered pin/pin pair is unregistered. Currently, the following crash/call trace is produced when ice driver is removed on the system with installed E810T NIC which includes dpll device: WARNING: CPU: 51 PID: 9155 at drivers/dpll/dpll_core.c:809 dpll_pin_ops+0x20/0x30 RIP: 0010:dpll_pin_ops+0x20/0x30 Call Trace: ? __warn+0x7f/0x130 ? dpll_pin_ops+0x20/0x30 dpll_msg_add_pin_freq+0x37/0x1d0 dpll_cmd_pin_get_one+0x1c0/0x400 ? __nlmsg_put+0x63/0x80 dpll_pin_event_send+0x93/0x140 dpll_pin_on_pin_unregister+0x3f/0x100 ice_dpll_deinit_pins+0xa1/0x230 [ice] ice_remove+0xf1/0x210 [ice] Fix by adding a parent pointer as a cookie when creating a registration, also when searching for it. For the regular pins pass NULL, this allows to create separated registration for each parent the pin is registered with. Fixes: b446631f355e ("dpll: fix dpll_xa_ref_*_del() for multiple registrations") Signed-off-by: Arkadiusz Kubalewski Reviewed-by: Jiri Pirko Link: https://lore.kernel.org/r/20240424101636.1491424-1-arkadiusz.kubalewski@intel.com Signed-off-by: Jakub Kicinski --- drivers/dpll/dpll_core.c | 58 +++++++++++++++++++++++----------------- 1 file changed, 33 insertions(+), 25 deletions(-) diff --git a/drivers/dpll/dpll_core.c b/drivers/dpll/dpll_core.c index 64eaca80d736c..d0f6693ca1426 100644 --- a/drivers/dpll/dpll_core.c +++ b/drivers/dpll/dpll_core.c @@ -42,6 +42,7 @@ struct dpll_pin_registration { struct list_head list; const struct dpll_pin_ops *ops; void *priv; + void *cookie; }; struct dpll_device *dpll_device_get_by_id(int id) @@ -54,12 +55,14 @@ struct dpll_device *dpll_device_get_by_id(int id) static struct dpll_pin_registration * dpll_pin_registration_find(struct dpll_pin_ref *ref, - const struct dpll_pin_ops *ops, void *priv) + const struct dpll_pin_ops *ops, void *priv, + void *cookie) { struct dpll_pin_registration *reg; list_for_each_entry(reg, &ref->registration_list, list) { - if (reg->ops == ops && reg->priv == priv) + if (reg->ops == ops && reg->priv == priv && + reg->cookie == cookie) return reg; } return NULL; @@ -67,7 +70,8 @@ dpll_pin_registration_find(struct dpll_pin_ref *ref, static int dpll_xa_ref_pin_add(struct xarray *xa_pins, struct dpll_pin *pin, - const struct dpll_pin_ops *ops, void *priv) + const struct dpll_pin_ops *ops, void *priv, + void *cookie) { struct dpll_pin_registration *reg; struct dpll_pin_ref *ref; @@ -78,7 +82,7 @@ dpll_xa_ref_pin_add(struct xarray *xa_pins, struct dpll_pin *pin, xa_for_each(xa_pins, i, ref) { if (ref->pin != pin) continue; - reg = dpll_pin_registration_find(ref, ops, priv); + reg = dpll_pin_registration_find(ref, ops, priv, cookie); if (reg) { refcount_inc(&ref->refcount); return 0; @@ -111,6 +115,7 @@ dpll_xa_ref_pin_add(struct xarray *xa_pins, struct dpll_pin *pin, } reg->ops = ops; reg->priv = priv; + reg->cookie = cookie; if (ref_exists) refcount_inc(&ref->refcount); list_add_tail(®->list, &ref->registration_list); @@ -119,7 +124,8 @@ dpll_xa_ref_pin_add(struct xarray *xa_pins, struct dpll_pin *pin, } static int dpll_xa_ref_pin_del(struct xarray *xa_pins, struct dpll_pin *pin, - const struct dpll_pin_ops *ops, void *priv) + const struct dpll_pin_ops *ops, void *priv, + void *cookie) { struct dpll_pin_registration *reg; struct dpll_pin_ref *ref; @@ -128,7 +134,7 @@ static int dpll_xa_ref_pin_del(struct xarray *xa_pins, struct dpll_pin *pin, xa_for_each(xa_pins, i, ref) { if (ref->pin != pin) continue; - reg = dpll_pin_registration_find(ref, ops, priv); + reg = dpll_pin_registration_find(ref, ops, priv, cookie); if (WARN_ON(!reg)) return -EINVAL; list_del(®->list); @@ -146,7 +152,7 @@ static int dpll_xa_ref_pin_del(struct xarray *xa_pins, struct dpll_pin *pin, static int dpll_xa_ref_dpll_add(struct xarray *xa_dplls, struct dpll_device *dpll, - const struct dpll_pin_ops *ops, void *priv) + const struct dpll_pin_ops *ops, void *priv, void *cookie) { struct dpll_pin_registration *reg; struct dpll_pin_ref *ref; @@ -157,7 +163,7 @@ dpll_xa_ref_dpll_add(struct xarray *xa_dplls, struct dpll_device *dpll, xa_for_each(xa_dplls, i, ref) { if (ref->dpll != dpll) continue; - reg = dpll_pin_registration_find(ref, ops, priv); + reg = dpll_pin_registration_find(ref, ops, priv, cookie); if (reg) { refcount_inc(&ref->refcount); return 0; @@ -190,6 +196,7 @@ dpll_xa_ref_dpll_add(struct xarray *xa_dplls, struct dpll_device *dpll, } reg->ops = ops; reg->priv = priv; + reg->cookie = cookie; if (ref_exists) refcount_inc(&ref->refcount); list_add_tail(®->list, &ref->registration_list); @@ -199,7 +206,7 @@ dpll_xa_ref_dpll_add(struct xarray *xa_dplls, struct dpll_device *dpll, static void dpll_xa_ref_dpll_del(struct xarray *xa_dplls, struct dpll_device *dpll, - const struct dpll_pin_ops *ops, void *priv) + const struct dpll_pin_ops *ops, void *priv, void *cookie) { struct dpll_pin_registration *reg; struct dpll_pin_ref *ref; @@ -208,7 +215,7 @@ dpll_xa_ref_dpll_del(struct xarray *xa_dplls, struct dpll_device *dpll, xa_for_each(xa_dplls, i, ref) { if (ref->dpll != dpll) continue; - reg = dpll_pin_registration_find(ref, ops, priv); + reg = dpll_pin_registration_find(ref, ops, priv, cookie); if (WARN_ON(!reg)) return; list_del(®->list); @@ -594,14 +601,14 @@ EXPORT_SYMBOL_GPL(dpll_pin_put); static int __dpll_pin_register(struct dpll_device *dpll, struct dpll_pin *pin, - const struct dpll_pin_ops *ops, void *priv) + const struct dpll_pin_ops *ops, void *priv, void *cookie) { int ret; - ret = dpll_xa_ref_pin_add(&dpll->pin_refs, pin, ops, priv); + ret = dpll_xa_ref_pin_add(&dpll->pin_refs, pin, ops, priv, cookie); if (ret) return ret; - ret = dpll_xa_ref_dpll_add(&pin->dpll_refs, dpll, ops, priv); + ret = dpll_xa_ref_dpll_add(&pin->dpll_refs, dpll, ops, priv, cookie); if (ret) goto ref_pin_del; xa_set_mark(&dpll_pin_xa, pin->id, DPLL_REGISTERED); @@ -610,7 +617,7 @@ __dpll_pin_register(struct dpll_device *dpll, struct dpll_pin *pin, return ret; ref_pin_del: - dpll_xa_ref_pin_del(&dpll->pin_refs, pin, ops, priv); + dpll_xa_ref_pin_del(&dpll->pin_refs, pin, ops, priv, cookie); return ret; } @@ -642,7 +649,7 @@ dpll_pin_register(struct dpll_device *dpll, struct dpll_pin *pin, dpll->clock_id == pin->clock_id))) ret = -EINVAL; else - ret = __dpll_pin_register(dpll, pin, ops, priv); + ret = __dpll_pin_register(dpll, pin, ops, priv, NULL); mutex_unlock(&dpll_lock); return ret; @@ -651,11 +658,11 @@ EXPORT_SYMBOL_GPL(dpll_pin_register); static void __dpll_pin_unregister(struct dpll_device *dpll, struct dpll_pin *pin, - const struct dpll_pin_ops *ops, void *priv) + const struct dpll_pin_ops *ops, void *priv, void *cookie) { ASSERT_DPLL_PIN_REGISTERED(pin); - dpll_xa_ref_pin_del(&dpll->pin_refs, pin, ops, priv); - dpll_xa_ref_dpll_del(&pin->dpll_refs, dpll, ops, priv); + dpll_xa_ref_pin_del(&dpll->pin_refs, pin, ops, priv, cookie); + dpll_xa_ref_dpll_del(&pin->dpll_refs, dpll, ops, priv, cookie); if (xa_empty(&pin->dpll_refs)) xa_clear_mark(&dpll_pin_xa, pin->id, DPLL_REGISTERED); } @@ -680,7 +687,7 @@ void dpll_pin_unregister(struct dpll_device *dpll, struct dpll_pin *pin, mutex_lock(&dpll_lock); dpll_pin_delete_ntf(pin); - __dpll_pin_unregister(dpll, pin, ops, priv); + __dpll_pin_unregister(dpll, pin, ops, priv, NULL); mutex_unlock(&dpll_lock); } EXPORT_SYMBOL_GPL(dpll_pin_unregister); @@ -716,12 +723,12 @@ int dpll_pin_on_pin_register(struct dpll_pin *parent, struct dpll_pin *pin, return -EINVAL; mutex_lock(&dpll_lock); - ret = dpll_xa_ref_pin_add(&pin->parent_refs, parent, ops, priv); + ret = dpll_xa_ref_pin_add(&pin->parent_refs, parent, ops, priv, pin); if (ret) goto unlock; refcount_inc(&pin->refcount); xa_for_each(&parent->dpll_refs, i, ref) { - ret = __dpll_pin_register(ref->dpll, pin, ops, priv); + ret = __dpll_pin_register(ref->dpll, pin, ops, priv, parent); if (ret) { stop = i; goto dpll_unregister; @@ -735,11 +742,12 @@ int dpll_pin_on_pin_register(struct dpll_pin *parent, struct dpll_pin *pin, dpll_unregister: xa_for_each(&parent->dpll_refs, i, ref) if (i < stop) { - __dpll_pin_unregister(ref->dpll, pin, ops, priv); + __dpll_pin_unregister(ref->dpll, pin, ops, priv, + parent); dpll_pin_delete_ntf(pin); } refcount_dec(&pin->refcount); - dpll_xa_ref_pin_del(&pin->parent_refs, parent, ops, priv); + dpll_xa_ref_pin_del(&pin->parent_refs, parent, ops, priv, pin); unlock: mutex_unlock(&dpll_lock); return ret; @@ -764,10 +772,10 @@ void dpll_pin_on_pin_unregister(struct dpll_pin *parent, struct dpll_pin *pin, mutex_lock(&dpll_lock); dpll_pin_delete_ntf(pin); - dpll_xa_ref_pin_del(&pin->parent_refs, parent, ops, priv); + dpll_xa_ref_pin_del(&pin->parent_refs, parent, ops, priv, pin); refcount_dec(&pin->refcount); xa_for_each(&pin->dpll_refs, i, ref) - __dpll_pin_unregister(ref->dpll, pin, ops, priv); + __dpll_pin_unregister(ref->dpll, pin, ops, priv, parent); mutex_unlock(&dpll_lock); } EXPORT_SYMBOL_GPL(dpll_pin_on_pin_unregister); From 0844370f8945086eb9335739d10205dcea8d707b Mon Sep 17 00:00:00 2001 From: Sabrina Dubroca Date: Wed, 24 Apr 2024 12:25:47 +0200 Subject: [PATCH 391/606] tls: fix lockless read of strp->msg_ready in ->poll tls_sk_poll is called without locking the socket, and needs to read strp->msg_ready (via tls_strp_msg_ready). Convert msg_ready to a bool and use READ_ONCE/WRITE_ONCE where needed. The remaining reads are only performed when the socket is locked. Fixes: 121dca784fc0 ("tls: suppress wakeups unless we have a full record") Signed-off-by: Sabrina Dubroca Link: https://lore.kernel.org/r/0b7ee062319037cf86af6b317b3d72f7bfcd2e97.1713797701.git.sd@queasysnail.net Signed-off-by: Jakub Kicinski --- include/net/tls.h | 3 ++- net/tls/tls.h | 2 +- net/tls/tls_strp.c | 6 +++--- 3 files changed, 6 insertions(+), 5 deletions(-) diff --git a/include/net/tls.h b/include/net/tls.h index 340ad43971e47..33f657d3c0510 100644 --- a/include/net/tls.h +++ b/include/net/tls.h @@ -111,7 +111,8 @@ struct tls_strparser { u32 stopped : 1; u32 copy_mode : 1; u32 mixed_decrypted : 1; - u32 msg_ready : 1; + + bool msg_ready; struct strp_msg stm; diff --git a/net/tls/tls.h b/net/tls/tls.h index 762f424ff2d59..e5e47452308ab 100644 --- a/net/tls/tls.h +++ b/net/tls/tls.h @@ -215,7 +215,7 @@ static inline struct sk_buff *tls_strp_msg(struct tls_sw_context_rx *ctx) static inline bool tls_strp_msg_ready(struct tls_sw_context_rx *ctx) { - return ctx->strp.msg_ready; + return READ_ONCE(ctx->strp.msg_ready); } static inline bool tls_strp_msg_mixed_decrypted(struct tls_sw_context_rx *ctx) diff --git a/net/tls/tls_strp.c b/net/tls/tls_strp.c index ca1e0e198ceb4..5df08d848b5c9 100644 --- a/net/tls/tls_strp.c +++ b/net/tls/tls_strp.c @@ -360,7 +360,7 @@ static int tls_strp_copyin(read_descriptor_t *desc, struct sk_buff *in_skb, if (strp->stm.full_len && strp->stm.full_len == skb->len) { desc->count = 0; - strp->msg_ready = 1; + WRITE_ONCE(strp->msg_ready, 1); tls_rx_msg_ready(strp); } @@ -528,7 +528,7 @@ static int tls_strp_read_sock(struct tls_strparser *strp) if (!tls_strp_check_queue_ok(strp)) return tls_strp_read_copy(strp, false); - strp->msg_ready = 1; + WRITE_ONCE(strp->msg_ready, 1); tls_rx_msg_ready(strp); return 0; @@ -580,7 +580,7 @@ void tls_strp_msg_done(struct tls_strparser *strp) else tls_strp_flush_anchor_copy(strp); - strp->msg_ready = 0; + WRITE_ONCE(strp->msg_ready, 0); memset(&strp->stm, 0, sizeof(strp->stm)); tls_strp_check_rcv(strp); From e3eb7dd47bd4806f00e104eb6da092c435f9fb21 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Peter=20M=C3=BCnster?= Date: Wed, 24 Apr 2024 15:51:52 +0200 Subject: [PATCH 392/606] net: b44: set pause params only when interface is up MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit b44_free_rings() accesses b44::rx_buffers (and ::tx_buffers) unconditionally, but b44::rx_buffers is only valid when the device is up (they get allocated in b44_open(), and deallocated again in b44_close()), any other time these are just a NULL pointers. So if you try to change the pause params while the network interface is disabled/administratively down, everything explodes (which likely netifd tries to do). Link: https://github.com/openwrt/openwrt/issues/13789 Fixes: 1da177e4c3f4 (Linux-2.6.12-rc2) Cc: stable@vger.kernel.org Reported-by: Peter Münster Suggested-by: Jonas Gorski Signed-off-by: Vaclav Svoboda Tested-by: Peter Münster Reviewed-by: Andrew Lunn Signed-off-by: Peter Münster Reviewed-by: Michael Chan Link: https://lore.kernel.org/r/87y192oolj.fsf@a16n.net Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/broadcom/b44.c | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/drivers/net/ethernet/broadcom/b44.c b/drivers/net/ethernet/broadcom/b44.c index 3e4fb3c3e8342..1be6d14030bcf 100644 --- a/drivers/net/ethernet/broadcom/b44.c +++ b/drivers/net/ethernet/broadcom/b44.c @@ -2009,12 +2009,14 @@ static int b44_set_pauseparam(struct net_device *dev, bp->flags |= B44_FLAG_TX_PAUSE; else bp->flags &= ~B44_FLAG_TX_PAUSE; - if (bp->flags & B44_FLAG_PAUSE_AUTO) { - b44_halt(bp); - b44_init_rings(bp); - b44_init_hw(bp, B44_FULL_RESET); - } else { - __b44_set_flow_ctrl(bp, bp->flags); + if (netif_running(dev)) { + if (bp->flags & B44_FLAG_PAUSE_AUTO) { + b44_halt(bp); + b44_init_rings(bp); + b44_init_hw(bp, B44_FULL_RESET); + } else { + __b44_set_flow_ctrl(bp, bp->flags); + } } spin_unlock_irq(&bp->lock); From ded103c7eb23753f22597afa500a7c1ad34116ba Mon Sep 17 00:00:00 2001 From: Miguel Ojeda Date: Mon, 22 Apr 2024 11:06:44 +0200 Subject: [PATCH 393/606] kbuild: rust: force `alloc` extern to allow "empty" Rust files If one attempts to build an essentially empty file somewhere in the kernel tree, it leads to a build error because the compiler does not recognize the `new_uninit` unstable feature: error[E0635]: unknown feature `new_uninit` --> :1:9 | 1 | feature(new_uninit) | ^^^^^^^^^^ The reason is that we pass `-Zcrate-attr='feature(new_uninit)'` (together with `-Zallow-features=new_uninit`) to let non-`rust/` code use that unstable feature. However, the compiler only recognizes the feature if the `alloc` crate is resolved (the feature is an `alloc` one). `--extern alloc`, which we pass, is not enough to resolve the crate. Introducing a reference like `use alloc;` or `extern crate alloc;` solves the issue, thus this is not seen in normal files. For instance, `use`ing the `kernel` prelude introduces such a reference, since `alloc` is used inside. While normal use of the build system is not impacted by this, it can still be fairly confusing for kernel developers [1], thus use the unstable `force` option of `--extern` [2] (added in Rust 1.71 [3]) to force the compiler to resolve `alloc`. This new unstable feature is only needed meanwhile we use the other unstable feature, since then we will not need `-Zcrate-attr`. Cc: stable@vger.kernel.org # v6.6+ Reported-by: Daniel Almeida Reported-by: Julian Stecklina Closes: https://rust-for-linux.zulipchat.com/#narrow/stream/288089-General/topic/x/near/424096982 [1] Fixes: 2f7ab1267dc9 ("Kbuild: add Rust support") Link: https://github.com/rust-lang/rust/issues/111302 [2] Link: https://github.com/rust-lang/rust/pull/109421 [3] Reviewed-by: Alice Ryhl Reviewed-by: Gary Guo Link: https://lore.kernel.org/r/20240422090644.525520-1-ojeda@kernel.org Signed-off-by: Miguel Ojeda --- scripts/Makefile.build | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/Makefile.build b/scripts/Makefile.build index baf86c0880b6d..533a7799fdfe6 100644 --- a/scripts/Makefile.build +++ b/scripts/Makefile.build @@ -273,7 +273,7 @@ rust_common_cmd = \ -Zallow-features=$(rust_allowed_features) \ -Zcrate-attr=no_std \ -Zcrate-attr='feature($(rust_allowed_features))' \ - --extern alloc --extern kernel \ + -Zunstable-options --extern force:alloc --extern kernel \ --crate-type rlib -L $(objtree)/rust/ \ --crate-name $(basename $(notdir $@)) \ --sysroot=/dev/null \ From 19843452dca40e28d6d3f4793d998b681d505c7f Mon Sep 17 00:00:00 2001 From: Aswin Unnikrishnan Date: Fri, 19 Apr 2024 21:50:13 +0000 Subject: [PATCH 394/606] rust: remove `params` from `module` macro example Remove argument `params` from the `module` macro example, because the macro does not currently support module parameters since it was not sent with the initial merge. Signed-off-by: Aswin Unnikrishnan Reviewed-by: Alice Ryhl Cc: stable@vger.kernel.org Fixes: 1fbde52bde73 ("rust: add `macros` crate") Link: https://lore.kernel.org/r/20240419215015.157258-1-aswinunni01@gmail.com [ Reworded slightly. ] Signed-off-by: Miguel Ojeda --- rust/macros/lib.rs | 12 ------------ 1 file changed, 12 deletions(-) diff --git a/rust/macros/lib.rs b/rust/macros/lib.rs index f489f31573832..520eae5fd7928 100644 --- a/rust/macros/lib.rs +++ b/rust/macros/lib.rs @@ -35,18 +35,6 @@ use proc_macro::TokenStream; /// author: "Rust for Linux Contributors", /// description: "My very own kernel module!", /// license: "GPL", -/// params: { -/// my_i32: i32 { -/// default: 42, -/// permissions: 0o000, -/// description: "Example of i32", -/// }, -/// writeable_i32: i32 { -/// default: 42, -/// permissions: 0o644, -/// description: "Example of i32", -/// }, -/// }, /// } /// /// struct MyModule; From 1971d13ffa84a551d29a81fdf5b5ec5be166ac83 Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Wed, 24 Apr 2024 10:04:43 -0700 Subject: [PATCH 395/606] af_unix: Suppress false-positive lockdep splat for spin_lock() in __unix_gc(). syzbot reported a lockdep splat regarding unix_gc_lock and unix_state_lock(). One is called from recvmsg() for a connected socket, and another is called from GC for TCP_LISTEN socket. So, the splat is false-positive. Let's add a dedicated lock class for the latter to suppress the splat. Note that this change is not necessary for net-next.git as the issue is only applied to the old GC impl. [0]: WARNING: possible circular locking dependency detected 6.9.0-rc5-syzkaller-00007-g4d2008430ce8 #0 Not tainted ----------------------------------------------------- kworker/u8:1/11 is trying to acquire lock: ffff88807cea4e70 (&u->lock){+.+.}-{2:2}, at: spin_lock include/linux/spinlock.h:351 [inline] ffff88807cea4e70 (&u->lock){+.+.}-{2:2}, at: __unix_gc+0x40e/0xf70 net/unix/garbage.c:302 but task is already holding lock: ffffffff8f6ab638 (unix_gc_lock){+.+.}-{2:2}, at: spin_lock include/linux/spinlock.h:351 [inline] ffffffff8f6ab638 (unix_gc_lock){+.+.}-{2:2}, at: __unix_gc+0x117/0xf70 net/unix/garbage.c:261 which lock already depends on the new lock. the existing dependency chain (in reverse order) is: -> #1 (unix_gc_lock){+.+.}-{2:2}: lock_acquire+0x1ed/0x550 kernel/locking/lockdep.c:5754 __raw_spin_lock include/linux/spinlock_api_smp.h:133 [inline] _raw_spin_lock+0x2e/0x40 kernel/locking/spinlock.c:154 spin_lock include/linux/spinlock.h:351 [inline] unix_notinflight+0x13d/0x390 net/unix/garbage.c:140 unix_detach_fds net/unix/af_unix.c:1819 [inline] unix_destruct_scm+0x221/0x350 net/unix/af_unix.c:1876 skb_release_head_state+0x100/0x250 net/core/skbuff.c:1188 skb_release_all net/core/skbuff.c:1200 [inline] __kfree_skb net/core/skbuff.c:1216 [inline] kfree_skb_reason+0x16d/0x3b0 net/core/skbuff.c:1252 kfree_skb include/linux/skbuff.h:1262 [inline] manage_oob net/unix/af_unix.c:2672 [inline] unix_stream_read_generic+0x1125/0x2700 net/unix/af_unix.c:2749 unix_stream_splice_read+0x239/0x320 net/unix/af_unix.c:2981 do_splice_read fs/splice.c:985 [inline] splice_file_to_pipe+0x299/0x500 fs/splice.c:1295 do_splice+0xf2d/0x1880 fs/splice.c:1379 __do_splice fs/splice.c:1436 [inline] __do_sys_splice fs/splice.c:1652 [inline] __se_sys_splice+0x331/0x4a0 fs/splice.c:1634 do_syscall_x64 arch/x86/entry/common.c:52 [inline] do_syscall_64+0xf5/0x240 arch/x86/entry/common.c:83 entry_SYSCALL_64_after_hwframe+0x77/0x7f -> #0 (&u->lock){+.+.}-{2:2}: check_prev_add kernel/locking/lockdep.c:3134 [inline] check_prevs_add kernel/locking/lockdep.c:3253 [inline] validate_chain+0x18cb/0x58e0 kernel/locking/lockdep.c:3869 __lock_acquire+0x1346/0x1fd0 kernel/locking/lockdep.c:5137 lock_acquire+0x1ed/0x550 kernel/locking/lockdep.c:5754 __raw_spin_lock include/linux/spinlock_api_smp.h:133 [inline] _raw_spin_lock+0x2e/0x40 kernel/locking/spinlock.c:154 spin_lock include/linux/spinlock.h:351 [inline] __unix_gc+0x40e/0xf70 net/unix/garbage.c:302 process_one_work kernel/workqueue.c:3254 [inline] process_scheduled_works+0xa10/0x17c0 kernel/workqueue.c:3335 worker_thread+0x86d/0xd70 kernel/workqueue.c:3416 kthread+0x2f0/0x390 kernel/kthread.c:388 ret_from_fork+0x4b/0x80 arch/x86/kernel/process.c:147 ret_from_fork_asm+0x1a/0x30 arch/x86/entry/entry_64.S:244 other info that might help us debug this: Possible unsafe locking scenario: CPU0 CPU1 ---- ---- lock(unix_gc_lock); lock(&u->lock); lock(unix_gc_lock); lock(&u->lock); *** DEADLOCK *** 3 locks held by kworker/u8:1/11: #0: ffff888015089148 ((wq_completion)events_unbound){+.+.}-{0:0}, at: process_one_work kernel/workqueue.c:3229 [inline] #0: ffff888015089148 ((wq_completion)events_unbound){+.+.}-{0:0}, at: process_scheduled_works+0x8e0/0x17c0 kernel/workqueue.c:3335 #1: ffffc90000107d00 (unix_gc_work){+.+.}-{0:0}, at: process_one_work kernel/workqueue.c:3230 [inline] #1: ffffc90000107d00 (unix_gc_work){+.+.}-{0:0}, at: process_scheduled_works+0x91b/0x17c0 kernel/workqueue.c:3335 #2: ffffffff8f6ab638 (unix_gc_lock){+.+.}-{2:2}, at: spin_lock include/linux/spinlock.h:351 [inline] #2: ffffffff8f6ab638 (unix_gc_lock){+.+.}-{2:2}, at: __unix_gc+0x117/0xf70 net/unix/garbage.c:261 stack backtrace: CPU: 0 PID: 11 Comm: kworker/u8:1 Not tainted 6.9.0-rc5-syzkaller-00007-g4d2008430ce8 #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 03/27/2024 Workqueue: events_unbound __unix_gc Call Trace: __dump_stack lib/dump_stack.c:88 [inline] dump_stack_lvl+0x241/0x360 lib/dump_stack.c:114 check_noncircular+0x36a/0x4a0 kernel/locking/lockdep.c:2187 check_prev_add kernel/locking/lockdep.c:3134 [inline] check_prevs_add kernel/locking/lockdep.c:3253 [inline] validate_chain+0x18cb/0x58e0 kernel/locking/lockdep.c:3869 __lock_acquire+0x1346/0x1fd0 kernel/locking/lockdep.c:5137 lock_acquire+0x1ed/0x550 kernel/locking/lockdep.c:5754 __raw_spin_lock include/linux/spinlock_api_smp.h:133 [inline] _raw_spin_lock+0x2e/0x40 kernel/locking/spinlock.c:154 spin_lock include/linux/spinlock.h:351 [inline] __unix_gc+0x40e/0xf70 net/unix/garbage.c:302 process_one_work kernel/workqueue.c:3254 [inline] process_scheduled_works+0xa10/0x17c0 kernel/workqueue.c:3335 worker_thread+0x86d/0xd70 kernel/workqueue.c:3416 kthread+0x2f0/0x390 kernel/kthread.c:388 ret_from_fork+0x4b/0x80 arch/x86/kernel/process.c:147 ret_from_fork_asm+0x1a/0x30 arch/x86/entry/entry_64.S:244 Fixes: 47d8ac011fe1 ("af_unix: Fix garbage collector racing against connect()") Reported-and-tested-by: syzbot+fa379358c28cc87cc307@syzkaller.appspotmail.com Closes: https://syzkaller.appspot.com/bug?extid=fa379358c28cc87cc307 Signed-off-by: Kuniyuki Iwashima Link: https://lore.kernel.org/r/20240424170443.9832-1-kuniyu@amazon.com Signed-off-by: Jakub Kicinski --- include/net/af_unix.h | 3 +++ net/unix/garbage.c | 2 +- 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/include/net/af_unix.h b/include/net/af_unix.h index 627ea8e2d9159..3dee0b2721aa4 100644 --- a/include/net/af_unix.h +++ b/include/net/af_unix.h @@ -85,6 +85,9 @@ enum unix_socket_lock_class { U_LOCK_NORMAL, U_LOCK_SECOND, /* for double locking, see unix_state_double_lock(). */ U_LOCK_DIAG, /* used while dumping icons, see sk_diag_dump_icons(). */ + U_LOCK_GC_LISTENER, /* used for listening socket while determining gc + * candidates to close a small race window. + */ }; static inline void unix_state_lock_nested(struct sock *sk, diff --git a/net/unix/garbage.c b/net/unix/garbage.c index 6433a414acf86..0104be9d47045 100644 --- a/net/unix/garbage.c +++ b/net/unix/garbage.c @@ -299,7 +299,7 @@ static void __unix_gc(struct work_struct *work) __set_bit(UNIX_GC_MAYBE_CYCLE, &u->gc_flags); if (sk->sk_state == TCP_LISTEN) { - unix_state_lock(sk); + unix_state_lock_nested(sk, U_LOCK_GC_LISTENER); unix_state_unlock(sk); } } From e027e72ecc1683e04f33aedf0196ad6c3278d309 Mon Sep 17 00:00:00 2001 From: Sergei Antonov Date: Mon, 22 Apr 2024 18:36:07 +0300 Subject: [PATCH 396/606] mmc: moxart: fix handling of sgm->consumed, otherwise WARN_ON triggers When e.g. 8 bytes are to be read, sgm->consumed equals 8 immediately after sg_miter_next() call. The driver then increments it as bytes are read, so sgm->consumed becomes 16 and this warning triggers in sg_miter_stop(): WARN_ON(miter->consumed > miter->length); WARNING: CPU: 0 PID: 28 at lib/scatterlist.c:925 sg_miter_stop+0x2c/0x10c CPU: 0 PID: 28 Comm: kworker/0:2 Tainted: G W 6.9.0-rc5-dirty #249 Hardware name: Generic DT based system Workqueue: events_freezable mmc_rescan Call trace:. unwind_backtrace from show_stack+0x10/0x14 show_stack from dump_stack_lvl+0x44/0x5c dump_stack_lvl from __warn+0x78/0x16c __warn from warn_slowpath_fmt+0xb0/0x160 warn_slowpath_fmt from sg_miter_stop+0x2c/0x10c sg_miter_stop from moxart_request+0xb0/0x468 moxart_request from mmc_start_request+0x94/0xa8 mmc_start_request from mmc_wait_for_req+0x60/0xa8 mmc_wait_for_req from mmc_app_send_scr+0xf8/0x150 mmc_app_send_scr from mmc_sd_setup_card+0x1c/0x420 mmc_sd_setup_card from mmc_sd_init_card+0x12c/0x4dc mmc_sd_init_card from mmc_attach_sd+0xf0/0x16c mmc_attach_sd from mmc_rescan+0x1e0/0x298 mmc_rescan from process_scheduled_works+0x2e4/0x4ec process_scheduled_works from worker_thread+0x1ec/0x24c worker_thread from kthread+0xd4/0xe0 kthread from ret_from_fork+0x14/0x38 This patch adds initial zeroing of sgm->consumed. It is then incremented as bytes are read or written. Signed-off-by: Sergei Antonov Cc: Linus Walleij Fixes: 3ee0e7c3e67c ("mmc: moxart-mmc: Use sg_miter for PIO") Reviewed-by: Linus Walleij Link: https://lore.kernel.org/r/20240422153607.963672-1-saproj@gmail.com Signed-off-by: Ulf Hansson --- drivers/mmc/host/moxart-mmc.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/mmc/host/moxart-mmc.c b/drivers/mmc/host/moxart-mmc.c index b88d6dec209f5..9a5f75163acae 100644 --- a/drivers/mmc/host/moxart-mmc.c +++ b/drivers/mmc/host/moxart-mmc.c @@ -300,6 +300,7 @@ static void moxart_transfer_pio(struct moxart_host *host) remain = sgm->length; if (remain > host->data_len) remain = host->data_len; + sgm->consumed = 0; if (data->flags & MMC_DATA_WRITE) { while (remain > 0) { From edbe59428eb0da09958769326a6566d4c9242ae7 Mon Sep 17 00:00:00 2001 From: Shubhrajyoti Datta Date: Thu, 25 Apr 2024 17:49:40 +0530 Subject: [PATCH 397/606] EDAC/versal: Do not register for NOC errors The NOC errors are not handled in the driver. Remove the request for registration. Signed-off-by: Shubhrajyoti Datta Signed-off-by: Borislav Petkov (AMD) Link: https://lore.kernel.org/r/20240425121942.26378-2-shubhrajyoti.datta@amd.com --- drivers/edac/versal_edac.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/drivers/edac/versal_edac.c b/drivers/edac/versal_edac.c index 1688a5050f63a..873e7377a105f 100644 --- a/drivers/edac/versal_edac.c +++ b/drivers/edac/versal_edac.c @@ -1135,8 +1135,7 @@ static int mc_probe(struct platform_device *pdev) } rc = xlnx_register_event(PM_NOTIFY_CB, VERSAL_EVENT_ERROR_PMC_ERR1, - XPM_EVENT_ERROR_MASK_DDRMC_CR | XPM_EVENT_ERROR_MASK_DDRMC_NCR | - XPM_EVENT_ERROR_MASK_NOC_CR | XPM_EVENT_ERROR_MASK_NOC_NCR, + XPM_EVENT_ERROR_MASK_DDRMC_CR | XPM_EVENT_ERROR_MASK_DDRMC_NCR, false, err_callback, mci); if (rc) { if (rc == -EACCES) @@ -1173,8 +1172,6 @@ static void mc_remove(struct platform_device *pdev) xlnx_unregister_event(PM_NOTIFY_CB, VERSAL_EVENT_ERROR_PMC_ERR1, XPM_EVENT_ERROR_MASK_DDRMC_CR | - XPM_EVENT_ERROR_MASK_NOC_CR | - XPM_EVENT_ERROR_MASK_NOC_NCR | XPM_EVENT_ERROR_MASK_DDRMC_NCR, err_callback, mci); edac_mc_del_mc(&pdev->dev); edac_mc_free(mci); From de87ba848d5e4c861b7357dd7a91698aed7a5a18 Mon Sep 17 00:00:00 2001 From: Shubhrajyoti Datta Date: Thu, 25 Apr 2024 17:49:41 +0530 Subject: [PATCH 398/606] EDAC/versal: Check user-supplied data before injecting an error The function inject_data_ue_store() lacks a NULL check for the user passed values. To prevent below kernel crash include a NULL check. Call trace: kstrtoull kstrtou8 inject_data_ue_store full_proxy_write vfs_write ksys_write __arm64_sys_write invoke_syscall el0_svc_common.constprop.0 do_el0_svc el0_svc el0t_64_sync_handler el0t_64_sync Fixes: 83bf24051a60 ("EDAC/versal: Make the bit position of injected errors configurable") Signed-off-by: Shubhrajyoti Datta Signed-off-by: Borislav Petkov (AMD) Link: https://lore.kernel.org/r/20240425121942.26378-3-shubhrajyoti.datta@amd.com --- drivers/edac/versal_edac.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/edac/versal_edac.c b/drivers/edac/versal_edac.c index 873e7377a105f..0c50afafc3e23 100644 --- a/drivers/edac/versal_edac.c +++ b/drivers/edac/versal_edac.c @@ -865,6 +865,9 @@ static ssize_t inject_data_ue_store(struct file *file, const char __user *data, for (i = 0; i < NUM_UE_BITPOS; i++) token[i] = strsep(&pbuf, ","); + if (!token[0] || !token[1]) + return -EFAULT; + ret = kstrtou8(token[0], 0, &ue0); if (ret) return ret; From 1a24733e80771d8eef656e515306a560519856a9 Mon Sep 17 00:00:00 2001 From: Shubhrajyoti Datta Date: Thu, 25 Apr 2024 17:49:42 +0530 Subject: [PATCH 399/606] EDAC/versal: Do not log total error counts When logging errors, the driver currently logs the total error count. However, it should log the current error only. Fix it. [ bp: Rewrite text. ] Fixes: 6f15b178cd63 ("EDAC/versal: Add a Xilinx Versal memory controller driver") Signed-off-by: Shubhrajyoti Datta Signed-off-by: Borislav Petkov (AMD) Link: https://lore.kernel.org/r/20240425121942.26378-4-shubhrajyoti.datta@amd.com --- drivers/edac/versal_edac.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/edac/versal_edac.c b/drivers/edac/versal_edac.c index 0c50afafc3e23..a556d23e8261c 100644 --- a/drivers/edac/versal_edac.c +++ b/drivers/edac/versal_edac.c @@ -425,7 +425,7 @@ static void handle_error(struct mem_ctl_info *mci, struct ecc_status *stat) convert_to_physical(priv, pinf), pinf.burstpos); edac_mc_handle_error(HW_EVENT_ERR_CORRECTED, mci, - priv->ce_cnt, 0, 0, 0, 0, 0, -1, + 1, 0, 0, 0, 0, 0, -1, priv->message, ""); } @@ -438,7 +438,7 @@ static void handle_error(struct mem_ctl_info *mci, struct ecc_status *stat) convert_to_physical(priv, pinf), pinf.burstpos); edac_mc_handle_error(HW_EVENT_ERR_UNCORRECTED, mci, - priv->ue_cnt, 0, 0, 0, 0, 0, -1, + 1, 0, 0, 0, 0, 0, -1, priv->message, ""); } From 17c67ed752d6a456602b3dbb25c5ae4d3de5deab Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cl=C3=A9ment=20L=C3=A9ger?= Date: Wed, 6 Dec 2023 14:44:37 +0100 Subject: [PATCH 400/606] selftests: sud_test: return correct emulated syscall value on RISC-V MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Currently, the sud_test expects the emulated syscall to return the emulated syscall number. This assumption only works on architectures were the syscall calling convention use the same register for syscall number/syscall return value. This is not the case for RISC-V and thus the return value must be also emulated using the provided ucontext. Signed-off-by: Clément Léger Reviewed-by: Palmer Dabbelt Acked-by: Palmer Dabbelt Link: https://lore.kernel.org/r/20231206134438.473166-1-cleger@rivosinc.com Signed-off-by: Palmer Dabbelt --- .../selftests/syscall_user_dispatch/sud_test.c | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/tools/testing/selftests/syscall_user_dispatch/sud_test.c b/tools/testing/selftests/syscall_user_dispatch/sud_test.c index b5d592d4099e8..d975a67673299 100644 --- a/tools/testing/selftests/syscall_user_dispatch/sud_test.c +++ b/tools/testing/selftests/syscall_user_dispatch/sud_test.c @@ -158,6 +158,20 @@ static void handle_sigsys(int sig, siginfo_t *info, void *ucontext) /* In preparation for sigreturn. */ SYSCALL_DISPATCH_OFF(glob_sel); + + /* + * The tests for argument handling assume that `syscall(x) == x`. This + * is a NOP on x86 because the syscall number is passed in %rax, which + * happens to also be the function ABI return register. Other + * architectures may need to swizzle the arguments around. + */ +#if defined(__riscv) +/* REG_A7 is not defined in libc headers */ +# define REG_A7 (REG_A0 + 7) + + ((ucontext_t *)ucontext)->uc_mcontext.__gregs[REG_A0] = + ((ucontext_t *)ucontext)->uc_mcontext.__gregs[REG_A7]; +#endif } TEST(dispatch_and_return) From ed74abcd1da0244c3c3be865587dc2727148ee83 Mon Sep 17 00:00:00 2001 From: Muhammad Usama Anjum Date: Fri, 19 Apr 2024 16:50:27 +0500 Subject: [PATCH 401/606] selftests: mm: protection_keys: save/restore nr_hugepages value from launch script The save/restore of nr_hugepages was added to the test itself by using the atexit() functionality. But it is broken as parent exits after creating child. Hence calling the atexit() function early. That's not it. The child exits after creating its child and so on. The parent cannot wait to get the termination status for its children as it'll keep on holding the resources until the new pkey allocation fails. It is impossible to wait for exits of all the grand and great grand children. Hence the restoring of nr_hugepages value from parent is wrong. Let's save/restore the nr_hugepages settings in the launch script instead of doing it in the test. Link: https://lkml.kernel.org/r/20240419115027.3848958-1-usama.anjum@collabora.com Fixes: c52eb6db7b7d ("selftests: mm: restore settings from only parent process") Signed-off-by: Muhammad Usama Anjum Reported-by: Joey Gouly Closes: https://lore.kernel.org/all/20240418125250.GA2941398@e124191.cambridge.arm.com Cc: Joey Gouly Cc: Shuah Khan Signed-off-by: Andrew Morton --- tools/testing/selftests/mm/protection_keys.c | 38 -------------------- tools/testing/selftests/mm/run_vmtests.sh | 2 ++ 2 files changed, 2 insertions(+), 38 deletions(-) diff --git a/tools/testing/selftests/mm/protection_keys.c b/tools/testing/selftests/mm/protection_keys.c index 374a308174d2b..48dc151f8fca8 100644 --- a/tools/testing/selftests/mm/protection_keys.c +++ b/tools/testing/selftests/mm/protection_keys.c @@ -54,7 +54,6 @@ int test_nr; u64 shadow_pkey_reg; int dprint_in_signal; char dprint_in_signal_buffer[DPRINT_IN_SIGNAL_BUF_SIZE]; -char buf[256]; void cat_into_file(char *str, char *file) { @@ -1745,42 +1744,6 @@ void pkey_setup_shadow(void) shadow_pkey_reg = __read_pkey_reg(); } -pid_t parent_pid; - -void restore_settings_atexit(void) -{ - if (parent_pid == getpid()) - cat_into_file(buf, "/proc/sys/vm/nr_hugepages"); -} - -void save_settings(void) -{ - int fd; - int err; - - if (geteuid()) - return; - - fd = open("/proc/sys/vm/nr_hugepages", O_RDONLY); - if (fd < 0) { - fprintf(stderr, "error opening\n"); - perror("error: "); - exit(__LINE__); - } - - /* -1 to guarantee leaving the trailing \0 */ - err = read(fd, buf, sizeof(buf)-1); - if (err < 0) { - fprintf(stderr, "error reading\n"); - perror("error: "); - exit(__LINE__); - } - - parent_pid = getpid(); - atexit(restore_settings_atexit); - close(fd); -} - int main(void) { int nr_iterations = 22; @@ -1788,7 +1751,6 @@ int main(void) srand((unsigned int)time(NULL)); - save_settings(); setup_handlers(); printf("has pkeys: %d\n", pkeys_supported); diff --git a/tools/testing/selftests/mm/run_vmtests.sh b/tools/testing/selftests/mm/run_vmtests.sh index c2c542fe7b17b..4bdb3a0c7a606 100755 --- a/tools/testing/selftests/mm/run_vmtests.sh +++ b/tools/testing/selftests/mm/run_vmtests.sh @@ -385,6 +385,7 @@ CATEGORY="ksm_numa" run_test ./ksm_tests -N -m 0 CATEGORY="ksm" run_test ./ksm_functional_tests # protection_keys tests +nr_hugepgs=$(cat /proc/sys/vm/nr_hugepages) if [ -x ./protection_keys_32 ] then CATEGORY="pkey" run_test ./protection_keys_32 @@ -394,6 +395,7 @@ if [ -x ./protection_keys_64 ] then CATEGORY="pkey" run_test ./protection_keys_64 fi +echo "$nr_hugepgs" > /proc/sys/vm/nr_hugepages if [ -x ./soft-dirty ] then From 52ccdde16b6540abe43b6f8d8e1e1ec90b0983af Mon Sep 17 00:00:00 2001 From: Miaohe Lin Date: Fri, 19 Apr 2024 16:58:19 +0800 Subject: [PATCH 402/606] mm/hugetlb: fix DEBUG_LOCKS_WARN_ON(1) when dissolve_free_hugetlb_folio() When I did memory failure tests recently, below warning occurs: DEBUG_LOCKS_WARN_ON(1) WARNING: CPU: 8 PID: 1011 at kernel/locking/lockdep.c:232 __lock_acquire+0xccb/0x1ca0 Modules linked in: mce_inject hwpoison_inject CPU: 8 PID: 1011 Comm: bash Kdump: loaded Not tainted 6.9.0-rc3-next-20240410-00012-gdb69f219f4be #3 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.14.0-0-g155821a1990b-prebuilt.qemu.org 04/01/2014 RIP: 0010:__lock_acquire+0xccb/0x1ca0 RSP: 0018:ffffa7a1c7fe3bd0 EFLAGS: 00000082 RAX: 0000000000000000 RBX: eb851eb853975fcf RCX: ffffa1ce5fc1c9c8 RDX: 00000000ffffffd8 RSI: 0000000000000027 RDI: ffffa1ce5fc1c9c0 RBP: ffffa1c6865d3280 R08: ffffffffb0f570a8 R09: 0000000000009ffb R10: 0000000000000286 R11: ffffffffb0f2ad50 R12: ffffa1c6865d3d10 R13: ffffa1c6865d3c70 R14: 0000000000000000 R15: 0000000000000004 FS: 00007ff9f32aa740(0000) GS:ffffa1ce5fc00000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 00007ff9f3134ba0 CR3: 00000008484e4000 CR4: 00000000000006f0 Call Trace: lock_acquire+0xbe/0x2d0 _raw_spin_lock_irqsave+0x3a/0x60 hugepage_subpool_put_pages.part.0+0xe/0xc0 free_huge_folio+0x253/0x3f0 dissolve_free_huge_page+0x147/0x210 __page_handle_poison+0x9/0x70 memory_failure+0x4e6/0x8c0 hard_offline_page_store+0x55/0xa0 kernfs_fop_write_iter+0x12c/0x1d0 vfs_write+0x380/0x540 ksys_write+0x64/0xe0 do_syscall_64+0xbc/0x1d0 entry_SYSCALL_64_after_hwframe+0x77/0x7f RIP: 0033:0x7ff9f3114887 RSP: 002b:00007ffecbacb458 EFLAGS: 00000246 ORIG_RAX: 0000000000000001 RAX: ffffffffffffffda RBX: 000000000000000c RCX: 00007ff9f3114887 RDX: 000000000000000c RSI: 0000564494164e10 RDI: 0000000000000001 RBP: 0000564494164e10 R08: 00007ff9f31d1460 R09: 000000007fffffff R10: 0000000000000000 R11: 0000000000000246 R12: 000000000000000c R13: 00007ff9f321b780 R14: 00007ff9f3217600 R15: 00007ff9f3216a00 Kernel panic - not syncing: kernel: panic_on_warn set ... CPU: 8 PID: 1011 Comm: bash Kdump: loaded Not tainted 6.9.0-rc3-next-20240410-00012-gdb69f219f4be #3 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.14.0-0-g155821a1990b-prebuilt.qemu.org 04/01/2014 Call Trace: panic+0x326/0x350 check_panic_on_warn+0x4f/0x50 __warn+0x98/0x190 report_bug+0x18e/0x1a0 handle_bug+0x3d/0x70 exc_invalid_op+0x18/0x70 asm_exc_invalid_op+0x1a/0x20 RIP: 0010:__lock_acquire+0xccb/0x1ca0 RSP: 0018:ffffa7a1c7fe3bd0 EFLAGS: 00000082 RAX: 0000000000000000 RBX: eb851eb853975fcf RCX: ffffa1ce5fc1c9c8 RDX: 00000000ffffffd8 RSI: 0000000000000027 RDI: ffffa1ce5fc1c9c0 RBP: ffffa1c6865d3280 R08: ffffffffb0f570a8 R09: 0000000000009ffb R10: 0000000000000286 R11: ffffffffb0f2ad50 R12: ffffa1c6865d3d10 R13: ffffa1c6865d3c70 R14: 0000000000000000 R15: 0000000000000004 lock_acquire+0xbe/0x2d0 _raw_spin_lock_irqsave+0x3a/0x60 hugepage_subpool_put_pages.part.0+0xe/0xc0 free_huge_folio+0x253/0x3f0 dissolve_free_huge_page+0x147/0x210 __page_handle_poison+0x9/0x70 memory_failure+0x4e6/0x8c0 hard_offline_page_store+0x55/0xa0 kernfs_fop_write_iter+0x12c/0x1d0 vfs_write+0x380/0x540 ksys_write+0x64/0xe0 do_syscall_64+0xbc/0x1d0 entry_SYSCALL_64_after_hwframe+0x77/0x7f RIP: 0033:0x7ff9f3114887 RSP: 002b:00007ffecbacb458 EFLAGS: 00000246 ORIG_RAX: 0000000000000001 RAX: ffffffffffffffda RBX: 000000000000000c RCX: 00007ff9f3114887 RDX: 000000000000000c RSI: 0000564494164e10 RDI: 0000000000000001 RBP: 0000564494164e10 R08: 00007ff9f31d1460 R09: 000000007fffffff R10: 0000000000000000 R11: 0000000000000246 R12: 000000000000000c R13: 00007ff9f321b780 R14: 00007ff9f3217600 R15: 00007ff9f3216a00 After git bisecting and digging into the code, I believe the root cause is that _deferred_list field of folio is unioned with _hugetlb_subpool field. In __update_and_free_hugetlb_folio(), folio->_deferred_list is initialized leading to corrupted folio->_hugetlb_subpool when folio is hugetlb. Later free_huge_folio() will use _hugetlb_subpool and above warning happens. But it is assumed hugetlb flag must have been cleared when calling folio_put() in update_and_free_hugetlb_folio(). This assumption is broken due to below race: CPU1 CPU2 dissolve_free_huge_page update_and_free_pages_bulk update_and_free_hugetlb_folio hugetlb_vmemmap_restore_folios folio_clear_hugetlb_vmemmap_optimized clear_flag = folio_test_hugetlb_vmemmap_optimized if (clear_flag) <-- False, it's already cleared. __folio_clear_hugetlb(folio) <-- Hugetlb is not cleared. folio_put free_huge_folio <-- free_the_page is expected. list_for_each_entry() __folio_clear_hugetlb <-- Too late. Fix this issue by checking whether folio is hugetlb directly instead of checking clear_flag to close the race window. Link: https://lkml.kernel.org/r/20240419085819.1901645-1-linmiaohe@huawei.com Fixes: 32c877191e02 ("hugetlb: do not clear hugetlb dtor until allocating vmemmap") Signed-off-by: Miaohe Lin Reviewed-by: Oscar Salvador Cc: Signed-off-by: Andrew Morton --- mm/hugetlb.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mm/hugetlb.c b/mm/hugetlb.c index 05371bf54f96d..ce7be5c244429 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -1781,7 +1781,7 @@ static void __update_and_free_hugetlb_folio(struct hstate *h, * If vmemmap pages were allocated above, then we need to clear the * hugetlb destructor under the hugetlb lock. */ - if (clear_dtor) { + if (folio_test_hugetlb(folio)) { spin_lock_irq(&hugetlb_lock); __clear_hugetlb_destructor(h, folio); spin_unlock_irq(&hugetlb_lock); From 6179d4a213006491ff0d50073256f21fad22149b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Christoph=20M=C3=BCllner?= Date: Sun, 7 Apr 2024 23:32:35 +0200 Subject: [PATCH 403/606] riscv: thead: Rename T-Head PBMT to MAE MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit T-Head's vendor extension to set page attributes has the name MAE (memory attribute extension). Let's rename it, so it is clear what this referes to. Link: https://github.com/T-head-Semi/thead-extension-spec/blob/master/xtheadmae.adoc Reviewed-by: Conor Dooley Signed-off-by: Christoph Müllner Link: https://lore.kernel.org/r/20240407213236.2121592-2-christoph.muellner@vrull.eu Signed-off-by: Palmer Dabbelt --- arch/riscv/Kconfig.errata | 8 ++++---- arch/riscv/errata/thead/errata.c | 10 +++++----- arch/riscv/include/asm/errata_list.h | 20 ++++++++++---------- 3 files changed, 19 insertions(+), 19 deletions(-) diff --git a/arch/riscv/Kconfig.errata b/arch/riscv/Kconfig.errata index 910ba8837add8..2acc7d876e1fb 100644 --- a/arch/riscv/Kconfig.errata +++ b/arch/riscv/Kconfig.errata @@ -82,14 +82,14 @@ config ERRATA_THEAD Otherwise, please say "N" here to avoid unnecessary overhead. -config ERRATA_THEAD_PBMT - bool "Apply T-Head memory type errata" +config ERRATA_THEAD_MAE + bool "Apply T-Head's memory attribute extension (XTheadMae) errata" depends on ERRATA_THEAD && 64BIT && MMU select RISCV_ALTERNATIVE_EARLY default y help - This will apply the memory type errata to handle the non-standard - memory type bits in page-table-entries on T-Head SoCs. + This will apply the memory attribute extension errata to handle the + non-standard PTE utilization on T-Head SoCs (XTheadMae). If you don't know what to do here, say "Y". diff --git a/arch/riscv/errata/thead/errata.c b/arch/riscv/errata/thead/errata.c index b1c410bbc1aec..6e7ee1f16bee3 100644 --- a/arch/riscv/errata/thead/errata.c +++ b/arch/riscv/errata/thead/errata.c @@ -19,10 +19,10 @@ #include #include -static bool errata_probe_pbmt(unsigned int stage, - unsigned long arch_id, unsigned long impid) +static bool errata_probe_mae(unsigned int stage, + unsigned long arch_id, unsigned long impid) { - if (!IS_ENABLED(CONFIG_ERRATA_THEAD_PBMT)) + if (!IS_ENABLED(CONFIG_ERRATA_THEAD_MAE)) return false; if (arch_id != 0 || impid != 0) @@ -140,8 +140,8 @@ static u32 thead_errata_probe(unsigned int stage, { u32 cpu_req_errata = 0; - if (errata_probe_pbmt(stage, archid, impid)) - cpu_req_errata |= BIT(ERRATA_THEAD_PBMT); + if (errata_probe_mae(stage, archid, impid)) + cpu_req_errata |= BIT(ERRATA_THEAD_MAE); errata_probe_cmo(stage, archid, impid); diff --git a/arch/riscv/include/asm/errata_list.h b/arch/riscv/include/asm/errata_list.h index ea33288f8a25b..9bad9dfa2f7a1 100644 --- a/arch/riscv/include/asm/errata_list.h +++ b/arch/riscv/include/asm/errata_list.h @@ -23,7 +23,7 @@ #endif #ifdef CONFIG_ERRATA_THEAD -#define ERRATA_THEAD_PBMT 0 +#define ERRATA_THEAD_MAE 0 #define ERRATA_THEAD_PMU 1 #define ERRATA_THEAD_NUMBER 2 #endif @@ -53,20 +53,20 @@ asm(ALTERNATIVE("sfence.vma %0", "sfence.vma", SIFIVE_VENDOR_ID, \ * in the default case. */ #define ALT_SVPBMT_SHIFT 61 -#define ALT_THEAD_PBMT_SHIFT 59 +#define ALT_THEAD_MAE_SHIFT 59 #define ALT_SVPBMT(_val, prot) \ asm(ALTERNATIVE_2("li %0, 0\t\nnop", \ "li %0, %1\t\nslli %0,%0,%3", 0, \ RISCV_ISA_EXT_SVPBMT, CONFIG_RISCV_ISA_SVPBMT, \ "li %0, %2\t\nslli %0,%0,%4", THEAD_VENDOR_ID, \ - ERRATA_THEAD_PBMT, CONFIG_ERRATA_THEAD_PBMT) \ + ERRATA_THEAD_MAE, CONFIG_ERRATA_THEAD_MAE) \ : "=r"(_val) \ : "I"(prot##_SVPBMT >> ALT_SVPBMT_SHIFT), \ - "I"(prot##_THEAD >> ALT_THEAD_PBMT_SHIFT), \ + "I"(prot##_THEAD >> ALT_THEAD_MAE_SHIFT), \ "I"(ALT_SVPBMT_SHIFT), \ - "I"(ALT_THEAD_PBMT_SHIFT)) + "I"(ALT_THEAD_MAE_SHIFT)) -#ifdef CONFIG_ERRATA_THEAD_PBMT +#ifdef CONFIG_ERRATA_THEAD_MAE /* * IO/NOCACHE memory types are handled together with svpbmt, * so on T-Head chips, check if no other memory type is set, @@ -83,11 +83,11 @@ asm volatile(ALTERNATIVE( \ "slli t3, t3, %3\n\t" \ "or %0, %0, t3\n\t" \ "2:", THEAD_VENDOR_ID, \ - ERRATA_THEAD_PBMT, CONFIG_ERRATA_THEAD_PBMT) \ + ERRATA_THEAD_MAE, CONFIG_ERRATA_THEAD_MAE) \ : "+r"(_val) \ - : "I"(_PAGE_MTMASK_THEAD >> ALT_THEAD_PBMT_SHIFT), \ - "I"(_PAGE_PMA_THEAD >> ALT_THEAD_PBMT_SHIFT), \ - "I"(ALT_THEAD_PBMT_SHIFT) \ + : "I"(_PAGE_MTMASK_THEAD >> ALT_THEAD_MAE_SHIFT), \ + "I"(_PAGE_PMA_THEAD >> ALT_THEAD_MAE_SHIFT), \ + "I"(ALT_THEAD_MAE_SHIFT) \ : "t3") #else #define ALT_THEAD_PMA(_val) From 65b71cc35cc6631cb0a5b24f961fe64c085cb40b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Christoph=20M=C3=BCllner?= Date: Sun, 7 Apr 2024 23:32:36 +0200 Subject: [PATCH 404/606] riscv: T-Head: Test availability bit before enabling MAE errata MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit T-Head's memory attribute extension (XTheadMae) (non-compatible equivalent of RVI's Svpbmt) is currently assumed for all T-Head harts. However, QEMU recently decided to drop acceptance of guests that write reserved bits in PTEs. As XTheadMae uses reserved bits in PTEs and Linux applies the MAE errata for all T-Head harts, this broke the Linux startup on QEMU emulations of the C906 emulation. This patch attempts to address this issue by testing the MAE-enable bit in the th.sxstatus CSR. This CSR is available in HW and can be emulated in QEMU. This patch also makes the XTheadMae probing mechanism reliable, because a test for the right combination of mvendorid, marchid, and mimpid is not sufficient to enable MAE. Reviewed-by: Conor Dooley Signed-off-by: Christoph Müllner Link: https://lore.kernel.org/r/20240407213236.2121592-3-christoph.muellner@vrull.eu Signed-off-by: Palmer Dabbelt --- arch/riscv/errata/thead/errata.c | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/arch/riscv/errata/thead/errata.c b/arch/riscv/errata/thead/errata.c index 6e7ee1f16bee3..bf6a0a6318ee3 100644 --- a/arch/riscv/errata/thead/errata.c +++ b/arch/riscv/errata/thead/errata.c @@ -19,6 +19,9 @@ #include #include +#define CSR_TH_SXSTATUS 0x5c0 +#define SXSTATUS_MAEE _AC(0x200000, UL) + static bool errata_probe_mae(unsigned int stage, unsigned long arch_id, unsigned long impid) { @@ -28,11 +31,14 @@ static bool errata_probe_mae(unsigned int stage, if (arch_id != 0 || impid != 0) return false; - if (stage == RISCV_ALTERNATIVES_EARLY_BOOT || - stage == RISCV_ALTERNATIVES_MODULE) - return true; + if (stage != RISCV_ALTERNATIVES_EARLY_BOOT && + stage != RISCV_ALTERNATIVES_MODULE) + return false; - return false; + if (!(csr_read(CSR_TH_SXSTATUS) & SXSTATUS_MAEE)) + return false; + + return true; } /* From 8094a600245e9b28eb36a13036f202ad67c1f887 Mon Sep 17 00:00:00 2001 From: Steve French Date: Thu, 25 Apr 2024 11:30:16 -0500 Subject: [PATCH 405/606] smb3: missing lock when picking channel Coverity spotted a place where we should have been holding the channel lock when accessing the ses channel index. Addresses-Coverity: 1582039 ("Data race condition (MISSING_LOCK)") Cc: stable@vger.kernel.org Reviewed-by: Shyam Prasad N Signed-off-by: Steve French --- fs/smb/client/transport.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/fs/smb/client/transport.c b/fs/smb/client/transport.c index 994d701934329..e1a79e031b28c 100644 --- a/fs/smb/client/transport.c +++ b/fs/smb/client/transport.c @@ -1057,9 +1057,11 @@ struct TCP_Server_Info *cifs_pick_channel(struct cifs_ses *ses) index = (uint)atomic_inc_return(&ses->chan_seq); index %= ses->chan_count; } + + server = ses->chans[index].server; spin_unlock(&ses->chan_lock); - return ses->chans[index].server; + return server; } int From 8861fd5180476f45f9e8853db154600469a0284f Mon Sep 17 00:00:00 2001 From: Steve French Date: Thu, 25 Apr 2024 12:49:50 -0500 Subject: [PATCH 406/606] smb3: fix lock ordering potential deadlock in cifs_sync_mid_result Coverity spotted that the cifs_sync_mid_result function could deadlock "Thread deadlock (ORDER_REVERSAL) lock_order: Calling spin_lock acquires lock TCP_Server_Info.srv_lock while holding lock TCP_Server_Info.mid_lock" Addresses-Coverity: 1590401 ("Thread deadlock (ORDER_REVERSAL)") Cc: stable@vger.kernel.org Reviewed-by: Shyam Prasad N Signed-off-by: Steve French --- fs/smb/client/transport.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/fs/smb/client/transport.c b/fs/smb/client/transport.c index e1a79e031b28c..ddf1a3aafee5c 100644 --- a/fs/smb/client/transport.c +++ b/fs/smb/client/transport.c @@ -909,12 +909,15 @@ cifs_sync_mid_result(struct mid_q_entry *mid, struct TCP_Server_Info *server) list_del_init(&mid->qhead); mid->mid_flags |= MID_DELETED; } + spin_unlock(&server->mid_lock); cifs_server_dbg(VFS, "%s: invalid mid state mid=%llu state=%d\n", __func__, mid->mid, mid->mid_state); rc = -EIO; + goto sync_mid_done; } spin_unlock(&server->mid_lock); +sync_mid_done: release_mid(mid); return rc; } From 18180a4550d08be4eb0387fe83f02f703f92d4e7 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Thu, 25 Apr 2024 17:46:26 -0400 Subject: [PATCH 407/606] NFSD: Fix nfsd4_encode_fattr4() crasher Ensure that args.acl is initialized early. It is used in an unconditional call to kfree() on the way out of nfsd4_encode_fattr4(). Reported-by: Scott Mayhew Fixes: 83ab8678ad0c ("NFSD: Add struct nfsd4_fattr_args") Signed-off-by: Chuck Lever --- fs/nfsd/nfs4xdr.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c index 1955481832e03..a644460f3a5e7 100644 --- a/fs/nfsd/nfs4xdr.c +++ b/fs/nfsd/nfs4xdr.c @@ -3515,6 +3515,7 @@ nfsd4_encode_fattr4(struct svc_rqst *rqstp, struct xdr_stream *xdr, args.exp = exp; args.dentry = dentry; args.ignore_crossmnt = (ignore_crossmnt != 0); + args.acl = NULL; /* * Make a local copy of the attribute bitmap that can be modified. @@ -3573,7 +3574,6 @@ nfsd4_encode_fattr4(struct svc_rqst *rqstp, struct xdr_stream *xdr, } else args.fhp = fhp; - args.acl = NULL; if (attrmask[0] & FATTR4_WORD0_ACL) { err = nfsd4_get_nfs4_acl(rqstp, dentry, &args.acl); if (err == -EOPNOTSUPP) From 782e5e7925880f737963444f141a0320a12104a5 Mon Sep 17 00:00:00 2001 From: Ian Forbes Date: Thu, 25 Apr 2024 15:07:00 -0500 Subject: [PATCH 408/606] drm/vmwgfx: Fix Legacy Display Unit Legacy DU was broken by the referenced fixes commit because the placement and the busy_placement no longer pointed to the same object. This was later fixed indirectly by commit a78a8da51b36c7a0c0c16233f91d60aac03a5a49 ("drm/ttm: replace busy placement with flags v6") in v6.9. Fixes: 39985eea5a6d ("drm/vmwgfx: Abstract placement selection") Signed-off-by: Ian Forbes Cc: # v6.4+ Reviewed-by: Zack Rusin Signed-off-by: Zack Rusin Link: https://patchwork.freedesktop.org/patch/msgid/20240425200700.24403-1-ian.forbes@broadcom.com --- drivers/gpu/drm/vmwgfx/vmwgfx_bo.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_bo.c b/drivers/gpu/drm/vmwgfx/vmwgfx_bo.c index e5eb21a471a60..00144632c600e 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_bo.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_bo.c @@ -204,6 +204,7 @@ int vmw_bo_pin_in_start_of_vram(struct vmw_private *dev_priv, VMW_BO_DOMAIN_VRAM, VMW_BO_DOMAIN_VRAM); buf->places[0].lpfn = PFN_UP(bo->resource->size); + buf->busy_places[0].lpfn = PFN_UP(bo->resource->size); ret = ttm_bo_validate(bo, &buf->placement, &ctx); /* For some reason we didn't end up at the start of vram */ From 27906e5d78248b19bcdfdae72049338c828897bb Mon Sep 17 00:00:00 2001 From: Zack Rusin Date: Mon, 8 Apr 2024 11:56:05 -0400 Subject: [PATCH 409/606] drm/ttm: Print the memory decryption status just once MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Stop printing the TT memory decryption status info each time tt is created and instead print it just once. Reduces the spam in the system logs when running guests with SEV enabled. Signed-off-by: Zack Rusin Fixes: 71ce046327cf ("drm/ttm: Make sure the mapped tt pages are decrypted when needed") Reviewed-by: Christian König Cc: Thomas Hellström Cc: dri-devel@lists.freedesktop.org Cc: linux-kernel@vger.kernel.org Cc: # v5.14+ Link: https://patchwork.freedesktop.org/patch/msgid/20240408155605.1398631-1-zack.rusin@broadcom.com --- drivers/gpu/drm/ttm/ttm_tt.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/ttm/ttm_tt.c b/drivers/gpu/drm/ttm/ttm_tt.c index 578a7c37f00bd..d776e3f87064f 100644 --- a/drivers/gpu/drm/ttm/ttm_tt.c +++ b/drivers/gpu/drm/ttm/ttm_tt.c @@ -92,7 +92,7 @@ int ttm_tt_create(struct ttm_buffer_object *bo, bool zero_alloc) */ if (bdev->pool.use_dma_alloc && cc_platform_has(CC_ATTR_GUEST_MEM_ENCRYPT)) { page_flags |= TTM_TT_FLAG_DECRYPTED; - drm_info(ddev, "TT memory decryption enabled."); + drm_info_once(ddev, "TT memory decryption enabled."); } bo->ttm = bdev->funcs->ttm_tt_create(bo, page_flags); From 666854ea9cad844f75a068f32812a2d78004914a Mon Sep 17 00:00:00 2001 From: Bui Quang Minh Date: Wed, 24 Apr 2024 21:44:18 +0700 Subject: [PATCH 410/606] ice: ensure the copied buf is NUL terminated Currently, we allocate a count-sized kernel buffer and copy count bytes from userspace to that buffer. Later, we use sscanf on this buffer but we don't ensure that the string is terminated inside the buffer, this can lead to OOB read when using sscanf. Fix this issue by using memdup_user_nul instead of memdup_user. Fixes: 96a9a9341cda ("ice: configure FW logging") Fixes: 73671c3162c8 ("ice: enable FW logging") Reviewed-by: Przemek Kitszel Signed-off-by: Bui Quang Minh Link: https://lore.kernel.org/r/20240424-fix-oob-read-v2-1-f1f1b53a10f4@gmail.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/intel/ice/ice_debugfs.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/intel/ice/ice_debugfs.c b/drivers/net/ethernet/intel/ice/ice_debugfs.c index d252d98218d08..9fc0fd95a13d8 100644 --- a/drivers/net/ethernet/intel/ice/ice_debugfs.c +++ b/drivers/net/ethernet/intel/ice/ice_debugfs.c @@ -171,7 +171,7 @@ ice_debugfs_module_write(struct file *filp, const char __user *buf, if (*ppos != 0 || count > 8) return -EINVAL; - cmd_buf = memdup_user(buf, count); + cmd_buf = memdup_user_nul(buf, count); if (IS_ERR(cmd_buf)) return PTR_ERR(cmd_buf); @@ -257,7 +257,7 @@ ice_debugfs_nr_messages_write(struct file *filp, const char __user *buf, if (*ppos != 0 || count > 4) return -EINVAL; - cmd_buf = memdup_user(buf, count); + cmd_buf = memdup_user_nul(buf, count); if (IS_ERR(cmd_buf)) return PTR_ERR(cmd_buf); @@ -332,7 +332,7 @@ ice_debugfs_enable_write(struct file *filp, const char __user *buf, if (*ppos != 0 || count > 2) return -EINVAL; - cmd_buf = memdup_user(buf, count); + cmd_buf = memdup_user_nul(buf, count); if (IS_ERR(cmd_buf)) return PTR_ERR(cmd_buf); @@ -428,7 +428,7 @@ ice_debugfs_log_size_write(struct file *filp, const char __user *buf, if (*ppos != 0 || count > 5) return -EINVAL; - cmd_buf = memdup_user(buf, count); + cmd_buf = memdup_user_nul(buf, count); if (IS_ERR(cmd_buf)) return PTR_ERR(cmd_buf); From 8c34096c7fdf272fd4c0c37fe411cd2e3ed0ee9f Mon Sep 17 00:00:00 2001 From: Bui Quang Minh Date: Wed, 24 Apr 2024 21:44:19 +0700 Subject: [PATCH 411/606] bna: ensure the copied buf is NUL terminated Currently, we allocate a nbytes-sized kernel buffer and copy nbytes from userspace to that buffer. Later, we use sscanf on this buffer but we don't ensure that the string is terminated inside the buffer, this can lead to OOB read when using sscanf. Fix this issue by using memdup_user_nul instead of memdup_user. Fixes: 7afc5dbde091 ("bna: Add debugfs interface.") Signed-off-by: Bui Quang Minh Link: https://lore.kernel.org/r/20240424-fix-oob-read-v2-2-f1f1b53a10f4@gmail.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/brocade/bna/bnad_debugfs.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/brocade/bna/bnad_debugfs.c b/drivers/net/ethernet/brocade/bna/bnad_debugfs.c index 7246e13dd559f..97291bfbeea58 100644 --- a/drivers/net/ethernet/brocade/bna/bnad_debugfs.c +++ b/drivers/net/ethernet/brocade/bna/bnad_debugfs.c @@ -312,7 +312,7 @@ bnad_debugfs_write_regrd(struct file *file, const char __user *buf, void *kern_buf; /* Copy the user space buf */ - kern_buf = memdup_user(buf, nbytes); + kern_buf = memdup_user_nul(buf, nbytes); if (IS_ERR(kern_buf)) return PTR_ERR(kern_buf); @@ -372,7 +372,7 @@ bnad_debugfs_write_regwr(struct file *file, const char __user *buf, void *kern_buf; /* Copy the user space buf */ - kern_buf = memdup_user(buf, nbytes); + kern_buf = memdup_user_nul(buf, nbytes); if (IS_ERR(kern_buf)) return PTR_ERR(kern_buf); From f299ee709fb45036454ca11e90cb2810fe771878 Mon Sep 17 00:00:00 2001 From: Bui Quang Minh Date: Wed, 24 Apr 2024 21:44:23 +0700 Subject: [PATCH 412/606] octeontx2-af: avoid off-by-one read from userspace We try to access count + 1 byte from userspace with memdup_user(buffer, count + 1). However, the userspace only provides buffer of count bytes and only these count bytes are verified to be okay to access. To ensure the copied buffer is NUL terminated, we use memdup_user_nul instead. Fixes: 3a2eb515d136 ("octeontx2-af: Fix an off by one in rvu_dbg_qsize_write()") Signed-off-by: Bui Quang Minh Link: https://lore.kernel.org/r/20240424-fix-oob-read-v2-6-f1f1b53a10f4@gmail.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/marvell/octeontx2/af/rvu_debugfs.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_debugfs.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu_debugfs.c index 2500f5ba4f5a4..881d704644fbe 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_debugfs.c +++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_debugfs.c @@ -999,12 +999,10 @@ static ssize_t rvu_dbg_qsize_write(struct file *filp, u16 pcifunc; int ret, lf; - cmd_buf = memdup_user(buffer, count + 1); + cmd_buf = memdup_user_nul(buffer, count); if (IS_ERR(cmd_buf)) return -ENOMEM; - cmd_buf[count] = '\0'; - cmd_buf_tmp = strchr(cmd_buf, '\n'); if (cmd_buf_tmp) { *cmd_buf_tmp = '\0'; From bb1dbeceb1c20cfd81271e1bd69892ebd1ee38e0 Mon Sep 17 00:00:00 2001 From: Peter Colberg Date: Mon, 22 Apr 2024 19:02:57 -0400 Subject: [PATCH 413/606] fpga: dfl-pci: add PCI subdevice ID for Intel D5005 card Add PCI subdevice ID for the Intel D5005 Stratix 10 FPGA card as used with the Open FPGA Stack (OFS) FPGA Interface Manager (FIM). Unlike the Intel D5005 PAC FIM which exposed a separate PCI device ID, the OFS FIM reuses the same device ID for all DFL-based FPGA cards and differentiates on the subdevice ID. The subdevice ID values were chosen as the numeric part of the FPGA card names in hexadecimal. Signed-off-by: Peter Colberg Reviewed-by: Matthew Gerlach Acked-by: Xu Yilun Link: https://lore.kernel.org/r/20240422230257.1959-1-peter.colberg@intel.com Signed-off-by: Xu Yilun --- drivers/fpga/dfl-pci.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/fpga/dfl-pci.c b/drivers/fpga/dfl-pci.c index 98b8fd16183e4..80cac3a5f9767 100644 --- a/drivers/fpga/dfl-pci.c +++ b/drivers/fpga/dfl-pci.c @@ -78,6 +78,7 @@ static void cci_pci_free_irq(struct pci_dev *pcidev) #define PCIE_DEVICE_ID_SILICOM_PAC_N5011 0x1001 #define PCIE_DEVICE_ID_INTEL_DFL 0xbcce /* PCI Subdevice ID for PCIE_DEVICE_ID_INTEL_DFL */ +#define PCIE_SUBDEVICE_ID_INTEL_D5005 0x138d #define PCIE_SUBDEVICE_ID_INTEL_N6000 0x1770 #define PCIE_SUBDEVICE_ID_INTEL_N6001 0x1771 #define PCIE_SUBDEVICE_ID_INTEL_C6100 0x17d4 @@ -101,6 +102,8 @@ static struct pci_device_id cci_pcie_id_tbl[] = { {PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCIE_DEVICE_ID_INTEL_PAC_D5005_VF),}, {PCI_DEVICE(PCI_VENDOR_ID_SILICOM_DENMARK, PCIE_DEVICE_ID_SILICOM_PAC_N5010),}, {PCI_DEVICE(PCI_VENDOR_ID_SILICOM_DENMARK, PCIE_DEVICE_ID_SILICOM_PAC_N5011),}, + {PCI_DEVICE_SUB(PCI_VENDOR_ID_INTEL, PCIE_DEVICE_ID_INTEL_DFL, + PCI_VENDOR_ID_INTEL, PCIE_SUBDEVICE_ID_INTEL_D5005),}, {PCI_DEVICE_SUB(PCI_VENDOR_ID_INTEL, PCIE_DEVICE_ID_INTEL_DFL, PCI_VENDOR_ID_INTEL, PCIE_SUBDEVICE_ID_INTEL_N6000),}, {PCI_DEVICE_SUB(PCI_VENDOR_ID_INTEL, PCIE_DEVICE_ID_INTEL_DFL_VF, From 1d422e44e17c234cef262599e8e5dce6cd3ce28d Mon Sep 17 00:00:00 2001 From: Guenter Roeck Date: Thu, 25 Apr 2024 13:57:19 -0700 Subject: [PATCH 414/606] MAINTAINERS: Drop entry for PCA9541 bus master selector I no longer have access to PCA9541 hardware, and I am no longer involved in related development. Listing me as PCA9541 maintainer does not make sense anymore. Remove PCA9541 from MAINTAINERS to let its support default to the generic I2C multiplexer entry. Signed-off-by: Guenter Roeck Acked-by: Peter Rosin Signed-off-by: Wolfram Sang --- MAINTAINERS | 6 ------ 1 file changed, 6 deletions(-) diff --git a/MAINTAINERS b/MAINTAINERS index ebf03f5f0619a..652cce4992739 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -16799,12 +16799,6 @@ S: Maintained F: drivers/leds/leds-pca9532.c F: include/linux/leds-pca9532.h -PCA9541 I2C BUS MASTER SELECTOR DRIVER -M: Guenter Roeck -L: linux-i2c@vger.kernel.org -S: Maintained -F: drivers/i2c/muxes/i2c-mux-pca9541.c - PCI DRIVER FOR AARDVARK (Marvell Armada 3700) M: Thomas Petazzoni M: Pali Rohár From 190f1f46ede17ca0d7153ac115d6518ec1be2ba3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= Date: Wed, 24 Apr 2024 23:26:27 +0200 Subject: [PATCH 415/606] MAINTAINERS: Update Uwe's email address, drop SIOX maintenance MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In the context of changing my career path, my Pengutronix email address will soon stop to be available to me. Update the PWM maintainer entry to my kernel.org identity. I drop my co-maintenance of SIOX. Thorsten will continue to care for it with the support of the Pengutronix kernel team. Signed-off-by: Uwe Kleine-König Acked-by: Thorsten Scherer Link: https://lore.kernel.org/r/20240424212626.603631-2-ukleinek@kernel.org Signed-off-by: Uwe Kleine-König --- MAINTAINERS | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/MAINTAINERS b/MAINTAINERS index ebf03f5f0619a..85a32423e9eda 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -17873,7 +17873,7 @@ F: Documentation/devicetree/bindings/leds/irled/pwm-ir-tx.yaml F: drivers/media/rc/pwm-ir-tx.c PWM SUBSYSTEM -M: Uwe Kleine-König +M: Uwe Kleine-König L: linux-pwm@vger.kernel.org S: Maintained Q: https://patchwork.ozlabs.org/project/linux-pwm/list/ @@ -20177,7 +20177,6 @@ F: include/linux/platform_data/simplefb.h SIOX M: Thorsten Scherer -M: Uwe Kleine-König R: Pengutronix Kernel Team S: Supported F: drivers/gpio/gpio-siox.c From 72c1afffa4c645fe0e0f1c03e5f34395ed65b5f4 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Thu, 25 Apr 2024 19:52:12 +0200 Subject: [PATCH 416/606] thermal/debugfs: Free all thermal zone debug memory on zone removal Because thermal_debug_tz_remove() does not free all memory allocated for thermal zone diagnostics, some of that memory becomes unreachable after freeing the thermal zone's struct thermal_debugfs object. Address this by making thermal_debug_tz_remove() free all of the memory in question. Fixes: 7ef01f228c9f ("thermal/debugfs: Add thermal debugfs information for mitigation episodes") Cc :6.8+ # 6.8+ Signed-off-by: Rafael J. Wysocki Reviewed-by: Lukasz Luba --- drivers/thermal/thermal_debugfs.c | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/drivers/thermal/thermal_debugfs.c b/drivers/thermal/thermal_debugfs.c index d78d54ae2605e..8d66b3a96be1a 100644 --- a/drivers/thermal/thermal_debugfs.c +++ b/drivers/thermal/thermal_debugfs.c @@ -826,15 +826,28 @@ void thermal_debug_tz_add(struct thermal_zone_device *tz) void thermal_debug_tz_remove(struct thermal_zone_device *tz) { struct thermal_debugfs *thermal_dbg = tz->debugfs; + struct tz_episode *tze, *tmp; + struct tz_debugfs *tz_dbg; + int *trips_crossed; if (!thermal_dbg) return; + tz_dbg = &thermal_dbg->tz_dbg; + mutex_lock(&thermal_dbg->lock); + trips_crossed = tz_dbg->trips_crossed; + + list_for_each_entry_safe(tze, tmp, &tz_dbg->tz_episodes, node) { + list_del(&tze->node); + kfree(tze); + } + tz->debugfs = NULL; mutex_unlock(&thermal_dbg->lock); thermal_debugfs_remove_id(thermal_dbg); + kfree(trips_crossed); } From c7f7c37271787a7f77d7eedc132b0b419a76b4c8 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Thu, 25 Apr 2024 20:00:33 +0200 Subject: [PATCH 417/606] thermal/debugfs: Fix two locking issues with thermal zone debug With the current thermal zone locking arrangement in the debugfs code, user space can open the "mitigations" file for a thermal zone before the zone's debugfs pointer is set which will result in a NULL pointer dereference in tze_seq_start(). Moreover, thermal_debug_tz_remove() is not called under the thermal zone lock, so it can run in parallel with the other functions accessing the thermal zone's struct thermal_debugfs object. Then, it may clear tz->debugfs after one of those functions has checked it and the struct thermal_debugfs object may be freed prematurely. To address the first problem, pass a pointer to the thermal zone's struct thermal_debugfs object to debugfs_create_file() in thermal_debug_tz_add() and make tze_seq_start(), tze_seq_next(), tze_seq_stop(), and tze_seq_show() retrieve it from s->private instead of a pointer to the thermal zone object. This will ensure that tz_debugfs will be valid across the "mitigations" file accesses until thermal_debugfs_remove_id() called by thermal_debug_tz_remove() removes that file. To address the second problem, use tz->lock in thermal_debug_tz_remove() around the tz->debugfs value check (in case the same thermal zone is removed at the same time in two different threads) and its reset to NULL. Fixes: 7ef01f228c9f ("thermal/debugfs: Add thermal debugfs information for mitigation episodes") Cc :6.8+ # 6.8+ Signed-off-by: Rafael J. Wysocki Reviewed-by: Lukasz Luba --- drivers/thermal/thermal_debugfs.c | 34 ++++++++++++++++++++----------- 1 file changed, 22 insertions(+), 12 deletions(-) diff --git a/drivers/thermal/thermal_debugfs.c b/drivers/thermal/thermal_debugfs.c index 8d66b3a96be1a..1fe2914a8853b 100644 --- a/drivers/thermal/thermal_debugfs.c +++ b/drivers/thermal/thermal_debugfs.c @@ -139,11 +139,13 @@ struct tz_episode { * we keep track of the current position in the history array. * * @tz_episodes: a list of thermal mitigation episodes + * @tz: thermal zone this object belongs to * @trips_crossed: an array of trip points crossed by id * @nr_trips: the number of trip points currently being crossed */ struct tz_debugfs { struct list_head tz_episodes; + struct thermal_zone_device *tz; int *trips_crossed; int nr_trips; }; @@ -716,8 +718,7 @@ void thermal_debug_update_temp(struct thermal_zone_device *tz) static void *tze_seq_start(struct seq_file *s, loff_t *pos) { - struct thermal_zone_device *tz = s->private; - struct thermal_debugfs *thermal_dbg = tz->debugfs; + struct thermal_debugfs *thermal_dbg = s->private; struct tz_debugfs *tz_dbg = &thermal_dbg->tz_dbg; mutex_lock(&thermal_dbg->lock); @@ -727,8 +728,7 @@ static void *tze_seq_start(struct seq_file *s, loff_t *pos) static void *tze_seq_next(struct seq_file *s, void *v, loff_t *pos) { - struct thermal_zone_device *tz = s->private; - struct thermal_debugfs *thermal_dbg = tz->debugfs; + struct thermal_debugfs *thermal_dbg = s->private; struct tz_debugfs *tz_dbg = &thermal_dbg->tz_dbg; return seq_list_next(v, &tz_dbg->tz_episodes, pos); @@ -736,15 +736,15 @@ static void *tze_seq_next(struct seq_file *s, void *v, loff_t *pos) static void tze_seq_stop(struct seq_file *s, void *v) { - struct thermal_zone_device *tz = s->private; - struct thermal_debugfs *thermal_dbg = tz->debugfs; + struct thermal_debugfs *thermal_dbg = s->private; mutex_unlock(&thermal_dbg->lock); } static int tze_seq_show(struct seq_file *s, void *v) { - struct thermal_zone_device *tz = s->private; + struct thermal_debugfs *thermal_dbg = s->private; + struct thermal_zone_device *tz = thermal_dbg->tz_dbg.tz; struct thermal_trip *trip; struct tz_episode *tze; const char *type; @@ -810,6 +810,8 @@ void thermal_debug_tz_add(struct thermal_zone_device *tz) tz_dbg = &thermal_dbg->tz_dbg; + tz_dbg->tz = tz; + tz_dbg->trips_crossed = kzalloc(sizeof(int) * tz->num_trips, GFP_KERNEL); if (!tz_dbg->trips_crossed) { thermal_debugfs_remove_id(thermal_dbg); @@ -818,20 +820,30 @@ void thermal_debug_tz_add(struct thermal_zone_device *tz) INIT_LIST_HEAD(&tz_dbg->tz_episodes); - debugfs_create_file("mitigations", 0400, thermal_dbg->d_top, tz, &tze_fops); + debugfs_create_file("mitigations", 0400, thermal_dbg->d_top, + thermal_dbg, &tze_fops); tz->debugfs = thermal_dbg; } void thermal_debug_tz_remove(struct thermal_zone_device *tz) { - struct thermal_debugfs *thermal_dbg = tz->debugfs; + struct thermal_debugfs *thermal_dbg; struct tz_episode *tze, *tmp; struct tz_debugfs *tz_dbg; int *trips_crossed; - if (!thermal_dbg) + mutex_lock(&tz->lock); + + thermal_dbg = tz->debugfs; + if (!thermal_dbg) { + mutex_unlock(&tz->lock); return; + } + + tz->debugfs = NULL; + + mutex_unlock(&tz->lock); tz_dbg = &thermal_dbg->tz_dbg; @@ -844,8 +856,6 @@ void thermal_debug_tz_remove(struct thermal_zone_device *tz) kfree(tze); } - tz->debugfs = NULL; - mutex_unlock(&thermal_dbg->lock); thermal_debugfs_remove_id(thermal_dbg); From 4b911a9690d72641879ea6d13cce1de31d346d79 Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Tue, 23 Apr 2024 19:35:49 -0700 Subject: [PATCH 418/606] nsh: Restore skb->{protocol,data,mac_header} for outer header in nsh_gso_segment(). syzbot triggered various splats (see [0] and links) by a crafted GSO packet of VIRTIO_NET_HDR_GSO_UDP layering the following protocols: ETH_P_8021AD + ETH_P_NSH + ETH_P_IPV6 + IPPROTO_UDP NSH can encapsulate IPv4, IPv6, Ethernet, NSH, and MPLS. As the inner protocol can be Ethernet, NSH GSO handler, nsh_gso_segment(), calls skb_mac_gso_segment() to invoke inner protocol GSO handlers. nsh_gso_segment() does the following for the original skb before calling skb_mac_gso_segment() 1. reset skb->network_header 2. save the original skb->{mac_heaeder,mac_len} in a local variable 3. pull the NSH header 4. resets skb->mac_header 5. set up skb->mac_len and skb->protocol for the inner protocol. and does the following for the segmented skb 6. set ntohs(ETH_P_NSH) to skb->protocol 7. push the NSH header 8. restore skb->mac_header 9. set skb->mac_header + mac_len to skb->network_header 10. restore skb->mac_len There are two problems in 6-7 and 8-9. (a) After 6 & 7, skb->data points to the NSH header, so the outer header (ETH_P_8021AD in this case) is stripped when skb is sent out of netdev. Also, if NSH is encapsulated by NSH + Ethernet (so NSH-Ethernet-NSH), skb_pull() in the first nsh_gso_segment() will make skb->data point to the middle of the outer NSH or Ethernet header because the Ethernet header is not pulled by the second nsh_gso_segment(). (b) While restoring skb->{mac_header,network_header} in 8 & 9, nsh_gso_segment() does not assume that the data in the linear buffer is shifted. However, udp6_ufo_fragment() could shift the data and change skb->mac_header accordingly as demonstrated by syzbot. If this happens, even the restored skb->mac_header points to the middle of the outer header. It seems nsh_gso_segment() has never worked with outer headers so far. At the end of nsh_gso_segment(), the outer header must be restored for the segmented skb, instead of the NSH header. To do that, let's calculate the outer header position relatively from the inner header and set skb->{data,mac_header,protocol} properly. [0]: BUG: KMSAN: uninit-value in ipvlan_process_outbound drivers/net/ipvlan/ipvlan_core.c:524 [inline] BUG: KMSAN: uninit-value in ipvlan_xmit_mode_l3 drivers/net/ipvlan/ipvlan_core.c:602 [inline] BUG: KMSAN: uninit-value in ipvlan_queue_xmit+0xf44/0x16b0 drivers/net/ipvlan/ipvlan_core.c:668 ipvlan_process_outbound drivers/net/ipvlan/ipvlan_core.c:524 [inline] ipvlan_xmit_mode_l3 drivers/net/ipvlan/ipvlan_core.c:602 [inline] ipvlan_queue_xmit+0xf44/0x16b0 drivers/net/ipvlan/ipvlan_core.c:668 ipvlan_start_xmit+0x5c/0x1a0 drivers/net/ipvlan/ipvlan_main.c:222 __netdev_start_xmit include/linux/netdevice.h:4989 [inline] netdev_start_xmit include/linux/netdevice.h:5003 [inline] xmit_one net/core/dev.c:3547 [inline] dev_hard_start_xmit+0x244/0xa10 net/core/dev.c:3563 __dev_queue_xmit+0x33ed/0x51c0 net/core/dev.c:4351 dev_queue_xmit include/linux/netdevice.h:3171 [inline] packet_xmit+0x9c/0x6b0 net/packet/af_packet.c:276 packet_snd net/packet/af_packet.c:3081 [inline] packet_sendmsg+0x8aef/0x9f10 net/packet/af_packet.c:3113 sock_sendmsg_nosec net/socket.c:730 [inline] __sock_sendmsg net/socket.c:745 [inline] __sys_sendto+0x735/0xa10 net/socket.c:2191 __do_sys_sendto net/socket.c:2203 [inline] __se_sys_sendto net/socket.c:2199 [inline] __x64_sys_sendto+0x125/0x1c0 net/socket.c:2199 do_syscall_x64 arch/x86/entry/common.c:52 [inline] do_syscall_64+0xcf/0x1e0 arch/x86/entry/common.c:83 entry_SYSCALL_64_after_hwframe+0x63/0x6b Uninit was created at: slab_post_alloc_hook mm/slub.c:3819 [inline] slab_alloc_node mm/slub.c:3860 [inline] __do_kmalloc_node mm/slub.c:3980 [inline] __kmalloc_node_track_caller+0x705/0x1000 mm/slub.c:4001 kmalloc_reserve+0x249/0x4a0 net/core/skbuff.c:582 __alloc_skb+0x352/0x790 net/core/skbuff.c:651 skb_segment+0x20aa/0x7080 net/core/skbuff.c:4647 udp6_ufo_fragment+0xcab/0x1150 net/ipv6/udp_offload.c:109 ipv6_gso_segment+0x14be/0x2ca0 net/ipv6/ip6_offload.c:152 skb_mac_gso_segment+0x3e8/0x760 net/core/gso.c:53 nsh_gso_segment+0x6f4/0xf70 net/nsh/nsh.c:108 skb_mac_gso_segment+0x3e8/0x760 net/core/gso.c:53 __skb_gso_segment+0x4b0/0x730 net/core/gso.c:124 skb_gso_segment include/net/gso.h:83 [inline] validate_xmit_skb+0x107f/0x1930 net/core/dev.c:3628 __dev_queue_xmit+0x1f28/0x51c0 net/core/dev.c:4343 dev_queue_xmit include/linux/netdevice.h:3171 [inline] packet_xmit+0x9c/0x6b0 net/packet/af_packet.c:276 packet_snd net/packet/af_packet.c:3081 [inline] packet_sendmsg+0x8aef/0x9f10 net/packet/af_packet.c:3113 sock_sendmsg_nosec net/socket.c:730 [inline] __sock_sendmsg net/socket.c:745 [inline] __sys_sendto+0x735/0xa10 net/socket.c:2191 __do_sys_sendto net/socket.c:2203 [inline] __se_sys_sendto net/socket.c:2199 [inline] __x64_sys_sendto+0x125/0x1c0 net/socket.c:2199 do_syscall_x64 arch/x86/entry/common.c:52 [inline] do_syscall_64+0xcf/0x1e0 arch/x86/entry/common.c:83 entry_SYSCALL_64_after_hwframe+0x63/0x6b CPU: 1 PID: 5101 Comm: syz-executor421 Not tainted 6.8.0-rc5-syzkaller-00297-gf2e367d6ad3b #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/25/2024 Fixes: c411ed854584 ("nsh: add GSO support") Reported-and-tested-by: syzbot+42a0dc856239de4de60e@syzkaller.appspotmail.com Closes: https://syzkaller.appspot.com/bug?extid=42a0dc856239de4de60e Reported-and-tested-by: syzbot+c298c9f0e46a3c86332b@syzkaller.appspotmail.com Closes: https://syzkaller.appspot.com/bug?extid=c298c9f0e46a3c86332b Link: https://lore.kernel.org/netdev/20240415222041.18537-1-kuniyu@amazon.com/ Signed-off-by: Kuniyuki Iwashima Link: https://lore.kernel.org/r/20240424023549.21862-1-kuniyu@amazon.com Signed-off-by: Paolo Abeni --- net/nsh/nsh.c | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/net/nsh/nsh.c b/net/nsh/nsh.c index f4a38bd6a7e04..bfb7758063f31 100644 --- a/net/nsh/nsh.c +++ b/net/nsh/nsh.c @@ -77,13 +77,15 @@ EXPORT_SYMBOL_GPL(nsh_pop); static struct sk_buff *nsh_gso_segment(struct sk_buff *skb, netdev_features_t features) { + unsigned int outer_hlen, mac_len, nsh_len; struct sk_buff *segs = ERR_PTR(-EINVAL); u16 mac_offset = skb->mac_header; - unsigned int nsh_len, mac_len; - __be16 proto; + __be16 outer_proto, proto; skb_reset_network_header(skb); + outer_proto = skb->protocol; + outer_hlen = skb_mac_header_len(skb); mac_len = skb->mac_len; if (unlikely(!pskb_may_pull(skb, NSH_BASE_HDR_LEN))) @@ -113,10 +115,10 @@ static struct sk_buff *nsh_gso_segment(struct sk_buff *skb, } for (skb = segs; skb; skb = skb->next) { - skb->protocol = htons(ETH_P_NSH); - __skb_push(skb, nsh_len); - skb->mac_header = mac_offset; - skb->network_header = skb->mac_header + mac_len; + skb->protocol = outer_proto; + __skb_push(skb, nsh_len + outer_hlen); + skb_reset_mac_header(skb); + skb_set_network_header(skb, outer_hlen); skb->mac_len = mac_len; } From 42f853b42899d9b445763b55c3c8adc72be0f0e1 Mon Sep 17 00:00:00 2001 From: David Bauer Date: Wed, 24 Apr 2024 19:11:10 +0200 Subject: [PATCH 419/606] net l2tp: drop flow hash on forward Drop the flow-hash of the skb when forwarding to the L2TP netdev. This avoids the L2TP qdisc from using the flow-hash from the outer packet, which is identical for every flow within the tunnel. This does not affect every platform but is specific for the ethernet driver. It depends on the platform including L4 information in the flow-hash. One such example is the Mediatek Filogic MT798x family of networking processors. Fixes: d9e31d17ceba ("l2tp: Add L2TP ethernet pseudowire support") Acked-by: James Chapman Signed-off-by: David Bauer Reviewed-by: Simon Horman Link: https://lore.kernel.org/r/20240424171110.13701-1-mail@david-bauer.net Signed-off-by: Paolo Abeni --- net/l2tp/l2tp_eth.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/net/l2tp/l2tp_eth.c b/net/l2tp/l2tp_eth.c index 39e487ccc4688..8ba00ad433c21 100644 --- a/net/l2tp/l2tp_eth.c +++ b/net/l2tp/l2tp_eth.c @@ -127,6 +127,9 @@ static void l2tp_eth_dev_recv(struct l2tp_session *session, struct sk_buff *skb, /* checksums verified by L2TP */ skb->ip_summed = CHECKSUM_NONE; + /* drop outer flow-hash */ + skb_clear_hash(skb); + skb_dst_drop(skb); nf_reset_ct(skb); From c97f59e276d4e93480f29a70accbd0d7273cf3f5 Mon Sep 17 00:00:00 2001 From: David Howells Date: Fri, 26 Apr 2024 12:15:15 +0100 Subject: [PATCH 420/606] netfs: Fix the pre-flush when appending to a file in writethrough mode In netfs_perform_write(), when the file is marked NETFS_ICTX_WRITETHROUGH or O_*SYNC or RWF_*SYNC was specified, write-through caching is performed on a buffered file. When setting up for write-through, we flush any conflicting writes in the region and wait for the write to complete, failing if there's a write error to return. The issue arises if we're writing at or above the EOF position because we skip the flush and - more importantly - the wait. This becomes a problem if there's a partial folio at the end of the file that is being written out and we want to make a write to it too. Both the already-running write and the write we start both want to clear the writeback mark, but whoever is second causes a warning looking something like: ------------[ cut here ]------------ R=00000012: folio 11 is not under writeback WARNING: CPU: 34 PID: 654 at fs/netfs/write_collect.c:105 ... CPU: 34 PID: 654 Comm: kworker/u386:27 Tainted: G S ... ... Workqueue: events_unbound netfs_write_collection_worker ... RIP: 0010:netfs_writeback_lookup_folio Fix this by making the flush-and-wait unconditional. It will do nothing if there are no folios in the pagecache and will return quickly if there are no folios in the region specified. Further, move the WBC attachment above the flush call as the flush is going to attach a WBC and detach it again if it is not present - and since we need one anyway we might as well share it. Fixes: 41d8e7673a77 ("netfs: Implement a write-through caching option") Reported-by: kernel test robot Closes: https://lore.kernel.org/oe-lkp/202404161031.468b84f-oliver.sang@intel.com Signed-off-by: David Howells Link: https://lore.kernel.org/r/2150448.1714130115@warthog.procyon.org.uk Reviewed-by: Jeffrey Layton cc: Eric Van Hensbergen cc: Latchesar Ionkov cc: Dominique Martinet cc: Christian Schoenebeck cc: Marc Dionne cc: netfs@lists.linux.dev cc: linux-fsdevel@vger.kernel.org cc: linux-mm@kvack.org cc: v9fs@lists.linux.dev cc: linux-afs@lists.infradead.org cc: linux-cifs@vger.kernel.org Signed-off-by: Christian Brauner --- fs/netfs/buffered_write.c | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/fs/netfs/buffered_write.c b/fs/netfs/buffered_write.c index 8f13ca8fbc74d..267b622d923b1 100644 --- a/fs/netfs/buffered_write.c +++ b/fs/netfs/buffered_write.c @@ -172,15 +172,14 @@ ssize_t netfs_perform_write(struct kiocb *iocb, struct iov_iter *iter, if (unlikely(test_bit(NETFS_ICTX_WRITETHROUGH, &ctx->flags) || iocb->ki_flags & (IOCB_DSYNC | IOCB_SYNC)) ) { - if (pos < i_size_read(inode)) { - ret = filemap_write_and_wait_range(mapping, pos, pos + iter->count); - if (ret < 0) { - goto out; - } - } - wbc_attach_fdatawrite_inode(&wbc, mapping->host); + ret = filemap_write_and_wait_range(mapping, pos, pos + iter->count); + if (ret < 0) { + wbc_detach_inode(&wbc); + goto out; + } + wreq = netfs_begin_writethrough(iocb, iter->count); if (IS_ERR(wreq)) { wbc_detach_inode(&wbc); From d351eb0ab04c3e8109895fc33250cebbce9c11da Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Fri, 26 Apr 2024 11:10:10 +0200 Subject: [PATCH 421/606] thermal/debugfs: Prevent use-after-free from occurring after cdev removal Since thermal_debug_cdev_remove() does not run under cdev->lock, it can run in parallel with thermal_debug_cdev_state_update() and it may free the struct thermal_debugfs object used by the latter after it has been checked against NULL. If that happens, thermal_debug_cdev_state_update() will access memory that has been freed already causing the kernel to crash. Address this by using cdev->lock in thermal_debug_cdev_remove() around the cdev->debugfs value check (in case the same cdev is removed at the same time in two different threads) and its reset to NULL. Fixes: 755113d76786 ("thermal/debugfs: Add thermal cooling device debugfs information") Cc :6.8+ # 6.8+ Signed-off-by: Rafael J. Wysocki Reviewed-by: Lukasz Luba --- drivers/thermal/thermal_debugfs.c | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/drivers/thermal/thermal_debugfs.c b/drivers/thermal/thermal_debugfs.c index 1fe2914a8853b..5693cc8b231aa 100644 --- a/drivers/thermal/thermal_debugfs.c +++ b/drivers/thermal/thermal_debugfs.c @@ -505,15 +505,23 @@ void thermal_debug_cdev_add(struct thermal_cooling_device *cdev) */ void thermal_debug_cdev_remove(struct thermal_cooling_device *cdev) { - struct thermal_debugfs *thermal_dbg = cdev->debugfs; + struct thermal_debugfs *thermal_dbg; - if (!thermal_dbg) + mutex_lock(&cdev->lock); + + thermal_dbg = cdev->debugfs; + if (!thermal_dbg) { + mutex_unlock(&cdev->lock); return; + } + + cdev->debugfs = NULL; + + mutex_unlock(&cdev->lock); mutex_lock(&thermal_dbg->lock); thermal_debugfs_cdev_clear(&thermal_dbg->cdev_dbg); - cdev->debugfs = NULL; mutex_unlock(&thermal_dbg->lock); From 5e1a99cf22a65bd91cb43c5380cc14a44b85ad2a Mon Sep 17 00:00:00 2001 From: Sven Schnelle Date: Tue, 23 Apr 2024 07:57:49 +0200 Subject: [PATCH 422/606] s390/3270: Fix buffer assignment Since commit 1b2ac5a6d61f ("s390/3270: use new address translation helpers") rq->buffer is passed unconditionally to virt_to_dma32(). The 3270 driver allocates requests without buffer, so the value passed to virt_to_dma32 might be NULL. Check for NULL before assigning. Fixes: 1b2ac5a6d61f ("s390/3270: use new address translation helpers") Reviewed-by: Heiko Carstens Signed-off-by: Sven Schnelle Signed-off-by: Alexander Gordeev --- drivers/s390/char/raw3270.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/s390/char/raw3270.c b/drivers/s390/char/raw3270.c index 37173cb0f5f5a..c57694be9bd32 100644 --- a/drivers/s390/char/raw3270.c +++ b/drivers/s390/char/raw3270.c @@ -162,7 +162,8 @@ struct raw3270_request *raw3270_request_alloc(size_t size) /* * Setup ccw. */ - rq->ccw.cda = virt_to_dma32(rq->buffer); + if (rq->buffer) + rq->ccw.cda = virt_to_dma32(rq->buffer); rq->ccw.flags = CCW_FLAG_SLI; return rq; @@ -188,7 +189,8 @@ int raw3270_request_reset(struct raw3270_request *rq) return -EBUSY; rq->ccw.cmd_code = 0; rq->ccw.count = 0; - rq->ccw.cda = virt_to_dma32(rq->buffer); + if (rq->buffer) + rq->ccw.cda = virt_to_dma32(rq->buffer); rq->ccw.flags = CCW_FLAG_SLI; rq->rescnt = 0; rq->rc = 0; From b961ec10b9f9719987470236feb50c967db5a652 Mon Sep 17 00:00:00 2001 From: Jens Remus Date: Tue, 23 Apr 2024 17:35:52 +0200 Subject: [PATCH 423/606] s390/vdso: Add CFI for RA register to asm macro vdso_func The return-address (RA) register r14 is specified as volatile in the s390x ELF ABI [1]. Nevertheless proper CFI directives must be provided for an unwinder to restore the return address, if the RA register value is changed from its value at function entry, as it is the case. [1]: s390x ELF ABI, https://github.com/IBM/s390x-abi/releases Fixes: 4bff8cb54502 ("s390: convert to GENERIC_VDSO") Signed-off-by: Jens Remus Acked-by: Heiko Carstens Signed-off-by: Alexander Gordeev --- arch/s390/include/asm/dwarf.h | 1 + arch/s390/kernel/vdso64/vdso_user_wrapper.S | 2 ++ 2 files changed, 3 insertions(+) diff --git a/arch/s390/include/asm/dwarf.h b/arch/s390/include/asm/dwarf.h index 4f21ae561e4dd..390906b8e386e 100644 --- a/arch/s390/include/asm/dwarf.h +++ b/arch/s390/include/asm/dwarf.h @@ -9,6 +9,7 @@ #define CFI_DEF_CFA_OFFSET .cfi_def_cfa_offset #define CFI_ADJUST_CFA_OFFSET .cfi_adjust_cfa_offset #define CFI_RESTORE .cfi_restore +#define CFI_REL_OFFSET .cfi_rel_offset #ifdef CONFIG_AS_CFI_VAL_OFFSET #define CFI_VAL_OFFSET .cfi_val_offset diff --git a/arch/s390/kernel/vdso64/vdso_user_wrapper.S b/arch/s390/kernel/vdso64/vdso_user_wrapper.S index 57f62596e53b9..85247ef5a41b8 100644 --- a/arch/s390/kernel/vdso64/vdso_user_wrapper.S +++ b/arch/s390/kernel/vdso64/vdso_user_wrapper.S @@ -24,8 +24,10 @@ __kernel_\func: CFI_DEF_CFA_OFFSET (STACK_FRAME_OVERHEAD + WRAPPER_FRAME_SIZE) CFI_VAL_OFFSET 15, -STACK_FRAME_OVERHEAD stg %r14,STACK_FRAME_OVERHEAD(%r15) + CFI_REL_OFFSET 14, STACK_FRAME_OVERHEAD brasl %r14,__s390_vdso_\func lg %r14,STACK_FRAME_OVERHEAD(%r15) + CFI_RESTORE 14 aghi %r15,WRAPPER_FRAME_SIZE CFI_DEF_CFA_OFFSET STACK_FRAME_OVERHEAD CFI_RESTORE 15 From 66e13b615a0ce76b785d780ecc9776ba71983629 Mon Sep 17 00:00:00 2001 From: Puranjay Mohan Date: Wed, 24 Apr 2024 10:02:08 +0000 Subject: [PATCH 424/606] bpf: verifier: prevent userspace memory access With BPF_PROBE_MEM, BPF allows de-referencing an untrusted pointer. To thwart invalid memory accesses, the JITs add an exception table entry for all such accesses. But in case the src_reg + offset is a userspace address, the BPF program might read that memory if the user has mapped it. Make the verifier add guard instructions around such memory accesses and skip the load if the address falls into the userspace region. The JITs need to implement bpf_arch_uaddress_limit() to define where the userspace addresses end for that architecture or TASK_SIZE is taken as default. The implementation is as follows: REG_AX = SRC_REG if(offset) REG_AX += offset; REG_AX >>= 32; if (REG_AX <= (uaddress_limit >> 32)) DST_REG = 0; else DST_REG = *(size *)(SRC_REG + offset); Comparing just the upper 32 bits of the load address with the upper 32 bits of uaddress_limit implies that the values are being aligned down to a 4GB boundary before comparison. The above means that all loads with address <= uaddress_limit + 4GB are skipped. This is acceptable because there is a large hole (much larger than 4GB) between userspace and kernel space memory, therefore a correctly functioning BPF program should not access this 4GB memory above the userspace. Let's analyze what this patch does to the following fentry program dereferencing an untrusted pointer: SEC("fentry/tcp_v4_connect") int BPF_PROG(fentry_tcp_v4_connect, struct sock *sk) { *(volatile long *)sk; return 0; } BPF Program before | BPF Program after ------------------ | ----------------- 0: (79) r1 = *(u64 *)(r1 +0) 0: (79) r1 = *(u64 *)(r1 +0) ----------------------------------------------------------------------- 1: (79) r1 = *(u64 *)(r1 +0) --\ 1: (bf) r11 = r1 ----------------------------\ \ 2: (77) r11 >>= 32 2: (b7) r0 = 0 \ \ 3: (b5) if r11 <= 0x8000 goto pc+2 3: (95) exit \ \-> 4: (79) r1 = *(u64 *)(r1 +0) \ 5: (05) goto pc+1 \ 6: (b7) r1 = 0 \-------------------------------------- 7: (b7) r0 = 0 8: (95) exit As you can see from above, in the best case (off=0), 5 extra instructions are emitted. Now, we analyze the same program after it has gone through the JITs of ARM64 and RISC-V architectures. We follow the single load instruction that has the untrusted pointer and see what instrumentation has been added around it. x86-64 JIT ========== JIT's Instrumentation (upstream) --------------------- 0: nopl 0x0(%rax,%rax,1) 5: xchg %ax,%ax 7: push %rbp 8: mov %rsp,%rbp b: mov 0x0(%rdi),%rdi --------------------------------- f: movabs $0x800000000000,%r11 19: cmp %r11,%rdi 1c: jb 0x000000000000002a 1e: mov %rdi,%r11 21: add $0x0,%r11 28: jae 0x000000000000002e 2a: xor %edi,%edi 2c: jmp 0x0000000000000032 2e: mov 0x0(%rdi),%rdi --------------------------------- 32: xor %eax,%eax 34: leave 35: ret The x86-64 JIT already emits some instructions to protect against user memory access. This patch doesn't make any changes for the x86-64 JIT. ARM64 JIT ========= No Intrumentation Verifier's Instrumentation (upstream) (This patch) ----------------- -------------------------- 0: add x9, x30, #0x0 0: add x9, x30, #0x0 4: nop 4: nop 8: paciasp 8: paciasp c: stp x29, x30, [sp, #-16]! c: stp x29, x30, [sp, #-16]! 10: mov x29, sp 10: mov x29, sp 14: stp x19, x20, [sp, #-16]! 14: stp x19, x20, [sp, #-16]! 18: stp x21, x22, [sp, #-16]! 18: stp x21, x22, [sp, #-16]! 1c: stp x25, x26, [sp, #-16]! 1c: stp x25, x26, [sp, #-16]! 20: stp x27, x28, [sp, #-16]! 20: stp x27, x28, [sp, #-16]! 24: mov x25, sp 24: mov x25, sp 28: mov x26, #0x0 28: mov x26, #0x0 2c: sub x27, x25, #0x0 2c: sub x27, x25, #0x0 30: sub sp, sp, #0x0 30: sub sp, sp, #0x0 34: ldr x0, [x0] 34: ldr x0, [x0] -------------------------------------------------------------------------------- 38: ldr x0, [x0] ----------\ 38: add x9, x0, #0x0 -----------------------------------\\ 3c: lsr x9, x9, #32 3c: mov x7, #0x0 \\ 40: cmp x9, #0x10, lsl #12 40: mov sp, sp \\ 44: b.ls 0x0000000000000050 44: ldp x27, x28, [sp], #16 \\--> 48: ldr x0, [x0] 48: ldp x25, x26, [sp], #16 \ 4c: b 0x0000000000000054 4c: ldp x21, x22, [sp], #16 \ 50: mov x0, #0x0 50: ldp x19, x20, [sp], #16 \--------------------------------------- 54: ldp x29, x30, [sp], #16 54: mov x7, #0x0 58: add x0, x7, #0x0 58: mov sp, sp 5c: autiasp 5c: ldp x27, x28, [sp], #16 60: ret 60: ldp x25, x26, [sp], #16 64: nop 64: ldp x21, x22, [sp], #16 68: ldr x10, 0x0000000000000070 68: ldp x19, x20, [sp], #16 6c: br x10 6c: ldp x29, x30, [sp], #16 70: add x0, x7, #0x0 74: autiasp 78: ret 7c: nop 80: ldr x10, 0x0000000000000088 84: br x10 There are 6 extra instructions added in ARM64 in the best case. This will become 7 in the worst case (off != 0). RISC-V JIT (RISCV_ISA_C Disabled) ========== No Intrumentation Verifier's Instrumentation (upstream) (This patch) ----------------- -------------------------- 0: nop 0: nop 4: nop 4: nop 8: li a6, 33 8: li a6, 33 c: addi sp, sp, -16 c: addi sp, sp, -16 10: sd s0, 8(sp) 10: sd s0, 8(sp) 14: addi s0, sp, 16 14: addi s0, sp, 16 18: ld a0, 0(a0) 18: ld a0, 0(a0) --------------------------------------------------------------- 1c: ld a0, 0(a0) --\ 1c: mv t0, a0 --------------------------\ \ 20: srli t0, t0, 32 20: li a5, 0 \ \ 24: lui t1, 4096 24: ld s0, 8(sp) \ \ 28: sext.w t1, t1 28: addi sp, sp, 16 \ \ 2c: bgeu t1, t0, 12 2c: sext.w a0, a5 \ \--> 30: ld a0, 0(a0) 30: ret \ 34: j 8 \ 38: li a0, 0 \------------------------------ 3c: li a5, 0 40: ld s0, 8(sp) 44: addi sp, sp, 16 48: sext.w a0, a5 4c: ret There are 7 extra instructions added in RISC-V. Fixes: 800834285361 ("bpf, arm64: Add BPF exception tables") Reported-by: Breno Leitao Suggested-by: Alexei Starovoitov Acked-by: Ilya Leoshkevich Signed-off-by: Puranjay Mohan Link: https://lore.kernel.org/r/20240424100210.11982-2-puranjay@kernel.org Signed-off-by: Alexei Starovoitov --- arch/x86/net/bpf_jit_comp.c | 6 ++++++ include/linux/filter.h | 1 + kernel/bpf/core.c | 9 +++++++++ kernel/bpf/verifier.c | 30 ++++++++++++++++++++++++++++++ 4 files changed, 46 insertions(+) diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c index df5fac428408f..b520b66ad14b1 100644 --- a/arch/x86/net/bpf_jit_comp.c +++ b/arch/x86/net/bpf_jit_comp.c @@ -3473,3 +3473,9 @@ bool bpf_jit_supports_ptr_xchg(void) { return true; } + +/* x86-64 JIT emits its own code to filter user addresses so return 0 here */ +u64 bpf_arch_uaddress_limit(void) +{ + return 0; +} diff --git a/include/linux/filter.h b/include/linux/filter.h index c99bc3df2d28e..219ee7a768744 100644 --- a/include/linux/filter.h +++ b/include/linux/filter.h @@ -963,6 +963,7 @@ bool bpf_jit_supports_far_kfunc_call(void); bool bpf_jit_supports_exceptions(void); bool bpf_jit_supports_ptr_xchg(void); bool bpf_jit_supports_arena(void); +u64 bpf_arch_uaddress_limit(void); void arch_bpf_stack_walk(bool (*consume_fn)(void *cookie, u64 ip, u64 sp, u64 bp), void *cookie); bool bpf_helper_changes_pkt_data(void *func); diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c index 696bc55de8e82..1ea5ce5bb5993 100644 --- a/kernel/bpf/core.c +++ b/kernel/bpf/core.c @@ -2942,6 +2942,15 @@ bool __weak bpf_jit_supports_arena(void) return false; } +u64 __weak bpf_arch_uaddress_limit(void) +{ +#if defined(CONFIG_64BIT) && defined(CONFIG_ARCH_HAS_NON_OVERLAPPING_ADDRESS_SPACE) + return TASK_SIZE; +#else + return 0; +#endif +} + /* Return TRUE if the JIT backend satisfies the following two conditions: * 1) JIT backend supports atomic_xchg() on pointer-sized words. * 2) Under the specific arch, the implementation of xchg() is the same diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index aa24beb65393c..cb7ad1f795e18 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -19675,6 +19675,36 @@ static int do_misc_fixups(struct bpf_verifier_env *env) goto next_insn; } + /* Make it impossible to de-reference a userspace address */ + if (BPF_CLASS(insn->code) == BPF_LDX && + (BPF_MODE(insn->code) == BPF_PROBE_MEM || + BPF_MODE(insn->code) == BPF_PROBE_MEMSX)) { + struct bpf_insn *patch = &insn_buf[0]; + u64 uaddress_limit = bpf_arch_uaddress_limit(); + + if (!uaddress_limit) + goto next_insn; + + *patch++ = BPF_MOV64_REG(BPF_REG_AX, insn->src_reg); + if (insn->off) + *patch++ = BPF_ALU64_IMM(BPF_ADD, BPF_REG_AX, insn->off); + *patch++ = BPF_ALU64_IMM(BPF_RSH, BPF_REG_AX, 32); + *patch++ = BPF_JMP_IMM(BPF_JLE, BPF_REG_AX, uaddress_limit >> 32, 2); + *patch++ = *insn; + *patch++ = BPF_JMP_IMM(BPF_JA, 0, 0, 1); + *patch++ = BPF_MOV64_IMM(insn->dst_reg, 0); + + cnt = patch - insn_buf; + new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt); + if (!new_prog) + return -ENOMEM; + + delta += cnt - 1; + env->prog = prog = new_prog; + insn = new_prog->insnsi + i + delta; + goto next_insn; + } + /* Implement LD_ABS and LD_IND with a rewrite, if supported by the program type. */ if (BPF_CLASS(insn->code) == BPF_LD && (BPF_MODE(insn->code) == BPF_ABS || From b599d7d26d6ad1fc9975218574bc2ca6d0293cfd Mon Sep 17 00:00:00 2001 From: Puranjay Mohan Date: Wed, 24 Apr 2024 10:02:09 +0000 Subject: [PATCH 425/606] bpf, x86: Fix PROBE_MEM runtime load check When a load is marked PROBE_MEM - e.g. due to PTR_UNTRUSTED access - the address being loaded from is not necessarily valid. The BPF jit sets up exception handlers for each such load which catch page faults and 0 out the destination register. If the address for the load is outside kernel address space, the load will escape the exception handling and crash the kernel. To prevent this from happening, the emits some instruction to verify that addr is > end of userspace addresses. x86 has a legacy vsyscall ABI where a page at address 0xffffffffff600000 is mapped with user accessible permissions. The addresses in this page are considered userspace addresses by the fault handler. Therefore, a BPF program accessing this page will crash the kernel. This patch fixes the runtime checks to also check that the PROBE_MEM address is below VSYSCALL_ADDR. Example BPF program: SEC("fentry/tcp_v4_connect") int BPF_PROG(fentry_tcp_v4_connect, struct sock *sk) { *(volatile unsigned long *)&sk->sk_tsq_flags; return 0; } BPF Assembly: 0: (79) r1 = *(u64 *)(r1 +0) 1: (79) r1 = *(u64 *)(r1 +344) 2: (b7) r0 = 0 3: (95) exit x86-64 JIT ========== BEFORE AFTER ------ ----- 0: nopl 0x0(%rax,%rax,1) 0: nopl 0x0(%rax,%rax,1) 5: xchg %ax,%ax 5: xchg %ax,%ax 7: push %rbp 7: push %rbp 8: mov %rsp,%rbp 8: mov %rsp,%rbp b: mov 0x0(%rdi),%rdi b: mov 0x0(%rdi),%rdi ------------------------------------------------------------------------------- f: movabs $0x100000000000000,%r11 f: movabs $0xffffffffff600000,%r10 19: add $0x2a0,%rdi 19: mov %rdi,%r11 20: cmp %r11,%rdi 1c: add $0x2a0,%r11 23: jae 0x0000000000000029 23: sub %r10,%r11 25: xor %edi,%edi 26: movabs $0x100000000a00000,%r10 27: jmp 0x000000000000002d 30: cmp %r10,%r11 29: mov 0x0(%rdi),%rdi 33: ja 0x0000000000000039 --------------------------------\ 35: xor %edi,%edi 2d: xor %eax,%eax \ 37: jmp 0x0000000000000040 2f: leave \ 39: mov 0x2a0(%rdi),%rdi 30: ret \-------------------------------------------- 40: xor %eax,%eax 42: leave 43: ret Signed-off-by: Puranjay Mohan Link: https://lore.kernel.org/r/20240424100210.11982-3-puranjay@kernel.org Signed-off-by: Alexei Starovoitov --- arch/x86/net/bpf_jit_comp.c | 57 ++++++++++++++++--------------------- 1 file changed, 25 insertions(+), 32 deletions(-) diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c index b520b66ad14b1..59cbc94b6e690 100644 --- a/arch/x86/net/bpf_jit_comp.c +++ b/arch/x86/net/bpf_jit_comp.c @@ -1807,36 +1807,41 @@ st: if (is_imm8(insn->off)) if (BPF_MODE(insn->code) == BPF_PROBE_MEM || BPF_MODE(insn->code) == BPF_PROBE_MEMSX) { /* Conservatively check that src_reg + insn->off is a kernel address: - * src_reg + insn->off >= TASK_SIZE_MAX + PAGE_SIZE - * src_reg is used as scratch for src_reg += insn->off and restored - * after emit_ldx if necessary + * src_reg + insn->off > TASK_SIZE_MAX + PAGE_SIZE + * and + * src_reg + insn->off < VSYSCALL_ADDR */ - u64 limit = TASK_SIZE_MAX + PAGE_SIZE; + u64 limit = TASK_SIZE_MAX + PAGE_SIZE - VSYSCALL_ADDR; u8 *end_of_jmp; - /* At end of these emitted checks, insn->off will have been added - * to src_reg, so no need to do relative load with insn->off offset - */ - insn_off = 0; + /* movabsq r10, VSYSCALL_ADDR */ + emit_mov_imm64(&prog, BPF_REG_AX, (long)VSYSCALL_ADDR >> 32, + (u32)(long)VSYSCALL_ADDR); - /* movabsq r11, limit */ - EMIT2(add_1mod(0x48, AUX_REG), add_1reg(0xB8, AUX_REG)); - EMIT((u32)limit, 4); - EMIT(limit >> 32, 4); + /* mov src_reg, r11 */ + EMIT_mov(AUX_REG, src_reg); if (insn->off) { - /* add src_reg, insn->off */ - maybe_emit_1mod(&prog, src_reg, true); - EMIT2_off32(0x81, add_1reg(0xC0, src_reg), insn->off); + /* add r11, insn->off */ + maybe_emit_1mod(&prog, AUX_REG, true); + EMIT2_off32(0x81, add_1reg(0xC0, AUX_REG), insn->off); } - /* cmp src_reg, r11 */ - maybe_emit_mod(&prog, src_reg, AUX_REG, true); - EMIT2(0x39, add_2reg(0xC0, src_reg, AUX_REG)); + /* sub r11, r10 */ + maybe_emit_mod(&prog, AUX_REG, BPF_REG_AX, true); + EMIT2(0x29, add_2reg(0xC0, AUX_REG, BPF_REG_AX)); + + /* movabsq r10, limit */ + emit_mov_imm64(&prog, BPF_REG_AX, (long)limit >> 32, + (u32)(long)limit); + + /* cmp r10, r11 */ + maybe_emit_mod(&prog, AUX_REG, BPF_REG_AX, true); + EMIT2(0x39, add_2reg(0xC0, AUX_REG, BPF_REG_AX)); - /* if unsigned '>=', goto load */ - EMIT2(X86_JAE, 0); + /* if unsigned '>', goto load */ + EMIT2(X86_JA, 0); end_of_jmp = prog; /* xor dst_reg, dst_reg */ @@ -1862,18 +1867,6 @@ st: if (is_imm8(insn->off)) /* populate jmp_offset for JMP above */ start_of_ldx[-1] = prog - start_of_ldx; - if (insn->off && src_reg != dst_reg) { - /* sub src_reg, insn->off - * Restore src_reg after "add src_reg, insn->off" in prev - * if statement. But if src_reg == dst_reg, emit_ldx - * above already clobbered src_reg, so no need to restore. - * If add src_reg, insn->off was unnecessary, no need to - * restore either. - */ - maybe_emit_1mod(&prog, src_reg, true); - EMIT2_off32(0x81, add_1reg(0xE8, src_reg), insn->off); - } - if (!bpf_prog->aux->extable) break; From 7cd6750d9a560fa69bb640a7280479d6a67999ad Mon Sep 17 00:00:00 2001 From: Puranjay Mohan Date: Wed, 24 Apr 2024 10:02:10 +0000 Subject: [PATCH 426/606] selftests/bpf: Test PROBE_MEM of VSYSCALL_ADDR on x86-64 The vsyscall is a legacy API for fast execution of system calls. It maps a page at address VSYSCALL_ADDR into the userspace program. This address is in the top 10MB of the address space: ffffffffff600000 - ffffffffff600fff | 4 kB | legacy vsyscall ABI The last commit fixes the x86-64 BPF JIT to skip accessing addresses in this memory region. Add this address to bpf_testmod_return_ptr() so we can make sure that it is fixed. After this change and without the previous commit, subprogs_extable selftest will crash the kernel. Signed-off-by: Puranjay Mohan Link: https://lore.kernel.org/r/20240424100210.11982-4-puranjay@kernel.org Signed-off-by: Alexei Starovoitov --- tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.c b/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.c index 39ad96a18123f..edcd26106557b 100644 --- a/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.c +++ b/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.c @@ -205,6 +205,9 @@ __weak noinline struct file *bpf_testmod_return_ptr(int arg) case 5: return (void *)~(1ull << 30); /* trigger extable */ case 6: return &f; /* valid addr */ case 7: return (void *)((long)&f | 1); /* kernel tricks */ +#ifdef CONFIG_X86_64 + case 8: return (void *)VSYSCALL_ADDR; /* vsyscall page address */ +#endif default: return NULL; } } From 9c49085d69ec8ca4eea254d0f426676232549f84 Mon Sep 17 00:00:00 2001 From: Ben Zong-You Xie Date: Tue, 5 Mar 2024 20:05:01 +0800 Subject: [PATCH 427/606] perf riscv: Fix the warning due to the incompatible type In the 32-bit platform, the second argument of getline is expectd to be 'size_t *'(aka 'unsigned int *'), but line_sz is of type 'unsigned long *'. Therefore, declare line_sz as size_t. Signed-off-by: Ben Zong-You Xie Reviewed-by: Alexandre Ghiti Link: https://lore.kernel.org/r/20240305120501.1785084-3-ben717@andestech.com Signed-off-by: Palmer Dabbelt --- tools/perf/arch/riscv/util/header.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/perf/arch/riscv/util/header.c b/tools/perf/arch/riscv/util/header.c index 4a41856938a88..1b29030021eeb 100644 --- a/tools/perf/arch/riscv/util/header.c +++ b/tools/perf/arch/riscv/util/header.c @@ -41,7 +41,7 @@ static char *_get_cpuid(void) char *mimpid = NULL; char *cpuid = NULL; int read; - unsigned long line_sz; + size_t line_sz; FILE *cpuinfo; cpuinfo = fopen(CPUINFO, "r"); From 49408400d683ae4f41e414dfcb615166cc93be5c Mon Sep 17 00:00:00 2001 From: Andrew Jones Date: Fri, 22 Mar 2024 14:47:28 +0100 Subject: [PATCH 428/606] RISC-V: selftests: cbo: Ensure asm operands match constraints, take 2 Commit 0de65288d75f ("RISC-V: selftests: cbo: Ensure asm operands match constraints") attempted to ensure MK_CBO() would always provide to a compile-time constant when given a constant, but cpu_to_le32() isn't necessarily going to do that. Switch to manually shifting the bytes, when needed, to finally get this right. Reported-by: Woodrow Shen Closes: https://lore.kernel.org/all/CABquHATcBTUwfLpd9sPObBgNobqQKEAZ2yxk+TWSpyO5xvpXpg@mail.gmail.com/ Fixes: a29e2a48afe3 ("RISC-V: selftests: Add CBO tests") Fixes: 0de65288d75f ("RISC-V: selftests: cbo: Ensure asm operands match constraints") Signed-off-by: Andrew Jones Link: https://lore.kernel.org/r/20240322134728.151255-2-ajones@ventanamicro.com Signed-off-by: Palmer Dabbelt --- tools/testing/selftests/riscv/hwprobe/cbo.c | 2 +- tools/testing/selftests/riscv/hwprobe/hwprobe.h | 10 ++++++++++ 2 files changed, 11 insertions(+), 1 deletion(-) diff --git a/tools/testing/selftests/riscv/hwprobe/cbo.c b/tools/testing/selftests/riscv/hwprobe/cbo.c index c537d52fafc58..a40541bb7c7de 100644 --- a/tools/testing/selftests/riscv/hwprobe/cbo.c +++ b/tools/testing/selftests/riscv/hwprobe/cbo.c @@ -19,7 +19,7 @@ #include "hwprobe.h" #include "../../kselftest.h" -#define MK_CBO(fn) cpu_to_le32((fn) << 20 | 10 << 15 | 2 << 12 | 0 << 7 | 15) +#define MK_CBO(fn) le32_bswap((uint32_t)(fn) << 20 | 10 << 15 | 2 << 12 | 0 << 7 | 15) static char mem[4096] __aligned(4096) = { [0 ... 4095] = 0xa5 }; diff --git a/tools/testing/selftests/riscv/hwprobe/hwprobe.h b/tools/testing/selftests/riscv/hwprobe/hwprobe.h index e3fccb390c4dc..f3de970c32227 100644 --- a/tools/testing/selftests/riscv/hwprobe/hwprobe.h +++ b/tools/testing/selftests/riscv/hwprobe/hwprobe.h @@ -4,6 +4,16 @@ #include #include +#if __BYTE_ORDER == __BIG_ENDIAN +# define le32_bswap(_x) \ + ((((_x) & 0x000000ffU) << 24) | \ + (((_x) & 0x0000ff00U) << 8) | \ + (((_x) & 0x00ff0000U) >> 8) | \ + (((_x) & 0xff000000U) >> 24)) +#else +# define le32_bswap(_x) (_x) +#endif + /* * Rather than relying on having a new enough libc to define this, just do it * ourselves. This way we don't need to be coupled to a new-enough libc to From 6a30653b604aaad1bf0f2e74b068ceb8b6fc7aea Mon Sep 17 00:00:00 2001 From: David Howells Date: Thu, 25 Apr 2024 09:39:32 +0100 Subject: [PATCH 429/606] Fix a potential infinite loop in extract_user_to_sg() Fix extract_user_to_sg() so that it will break out of the loop if iov_iter_extract_pages() returns 0 rather than looping around forever. [Note that I've included two fixes lines as the function got moved to a different file and renamed] Fixes: 85dd2c8ff368 ("netfs: Add a function to extract a UBUF or IOVEC into a BVEC iterator") Fixes: f5f82cd18732 ("Move netfs_extract_iter_to_sg() to lib/scatterlist.c") Signed-off-by: David Howells cc: Jeff Layton cc: Steve French cc: Herbert Xu cc: netfs@lists.linux.dev Link: https://lore.kernel.org/r/1967121.1714034372@warthog.procyon.org.uk Signed-off-by: Jakub Kicinski --- lib/scatterlist.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/scatterlist.c b/lib/scatterlist.c index 68b45c82c37a6..7bc2220fea805 100644 --- a/lib/scatterlist.c +++ b/lib/scatterlist.c @@ -1124,7 +1124,7 @@ static ssize_t extract_user_to_sg(struct iov_iter *iter, do { res = iov_iter_extract_pages(iter, &pages, maxsize, sg_max, extraction_flags, &off); - if (res < 0) + if (res <= 0) goto failed; len = res; From 91811a31b68d3765b3065f4bb6d7d6d84a7cfc9f Mon Sep 17 00:00:00 2001 From: Wolfram Sang Date: Fri, 26 Apr 2024 08:44:08 +0200 Subject: [PATCH 430/606] i2c: smbus: fix NULL function pointer dereference Baruch reported an OOPS when using the designware controller as target only. Target-only modes break the assumption of one transfer function always being available. Fix this by always checking the pointer in __i2c_transfer. Reported-by: Baruch Siach Closes: https://lore.kernel.org/r/4269631780e5ba789cf1ae391eec1b959def7d99.1712761976.git.baruch@tkos.co.il Fixes: 4b1acc43331d ("i2c: core changes for slave support") [wsa: dropped the simplification in core-smbus to avoid theoretical regressions] Signed-off-by: Wolfram Sang Tested-by: Baruch Siach --- drivers/i2c/i2c-core-base.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/drivers/i2c/i2c-core-base.c b/drivers/i2c/i2c-core-base.c index ff5c486a1dbb1..db0d1ac82910e 100644 --- a/drivers/i2c/i2c-core-base.c +++ b/drivers/i2c/i2c-core-base.c @@ -2200,13 +2200,18 @@ static int i2c_check_for_quirks(struct i2c_adapter *adap, struct i2c_msg *msgs, * Returns negative errno, else the number of messages executed. * * Adapter lock must be held when calling this function. No debug logging - * takes place. adap->algo->master_xfer existence isn't checked. + * takes place. */ int __i2c_transfer(struct i2c_adapter *adap, struct i2c_msg *msgs, int num) { unsigned long orig_jiffies; int ret, try; + if (!adap->algo->master_xfer) { + dev_dbg(&adap->dev, "I2C level transfers not supported\n"); + return -EOPNOTSUPP; + } + if (WARN_ON(!msgs || num < 1)) return -EINVAL; @@ -2273,11 +2278,6 @@ int i2c_transfer(struct i2c_adapter *adap, struct i2c_msg *msgs, int num) { int ret; - if (!adap->algo->master_xfer) { - dev_dbg(&adap->dev, "I2C level transfers not supported\n"); - return -EOPNOTSUPP; - } - /* REVISIT the fault reporting model here is weak: * * - When we get an error after receiving N bytes from a slave, From 2e5449f4f21a1b0bd9beec4c4b580eb1f9b9ed7f Mon Sep 17 00:00:00 2001 From: Tetsuo Handa Date: Sat, 27 Apr 2024 15:27:58 +0900 Subject: [PATCH 431/606] profiling: Remove create_prof_cpu_mask(). create_prof_cpu_mask() is no longer used after commit 1f44a225777e ("s390: convert interrupt handling to use generic hardirq"). Signed-off-by: Tetsuo Handa Signed-off-by: Linus Torvalds --- include/linux/profile.h | 5 ----- kernel/profile.c | 43 ----------------------------------------- 2 files changed, 48 deletions(-) diff --git a/include/linux/profile.h b/include/linux/profile.h index 11db1ec516e27..04ae5ebcb637a 100644 --- a/include/linux/profile.h +++ b/include/linux/profile.h @@ -18,13 +18,8 @@ struct proc_dir_entry; struct notifier_block; #if defined(CONFIG_PROFILING) && defined(CONFIG_PROC_FS) -void create_prof_cpu_mask(void); int create_proc_profile(void); #else -static inline void create_prof_cpu_mask(void) -{ -} - static inline int create_proc_profile(void) { return 0; diff --git a/kernel/profile.c b/kernel/profile.c index 8a77769bc4b4c..2b775cc5c28f9 100644 --- a/kernel/profile.c +++ b/kernel/profile.c @@ -344,49 +344,6 @@ void profile_tick(int type) #include #include -static int prof_cpu_mask_proc_show(struct seq_file *m, void *v) -{ - seq_printf(m, "%*pb\n", cpumask_pr_args(prof_cpu_mask)); - return 0; -} - -static int prof_cpu_mask_proc_open(struct inode *inode, struct file *file) -{ - return single_open(file, prof_cpu_mask_proc_show, NULL); -} - -static ssize_t prof_cpu_mask_proc_write(struct file *file, - const char __user *buffer, size_t count, loff_t *pos) -{ - cpumask_var_t new_value; - int err; - - if (!zalloc_cpumask_var(&new_value, GFP_KERNEL)) - return -ENOMEM; - - err = cpumask_parse_user(buffer, count, new_value); - if (!err) { - cpumask_copy(prof_cpu_mask, new_value); - err = count; - } - free_cpumask_var(new_value); - return err; -} - -static const struct proc_ops prof_cpu_mask_proc_ops = { - .proc_open = prof_cpu_mask_proc_open, - .proc_read = seq_read, - .proc_lseek = seq_lseek, - .proc_release = single_release, - .proc_write = prof_cpu_mask_proc_write, -}; - -void create_prof_cpu_mask(void) -{ - /* create /proc/irq/prof_cpu_mask */ - proc_create("irq/prof_cpu_mask", 0600, NULL, &prof_cpu_mask_proc_ops); -} - /* * This function accesses profiling information. The returned data is * binary: the sampling step and the actual contents of the profile From 5097cbcb38e6e0d2627c9dde1985e91d2c9f880e Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Thu, 11 Apr 2024 16:39:05 +0200 Subject: [PATCH 432/606] sched/isolation: Prevent boot crash when the boot CPU is nohz_full Documentation/timers/no_hz.rst states that the "nohz_full=" mask must not include the boot CPU, which is no longer true after: 08ae95f4fd3b ("nohz_full: Allow the boot CPU to be nohz_full"). However after: aae17ebb53cd ("workqueue: Avoid using isolated cpus' timers on queue_delayed_work") the kernel will crash at boot time in this case; housekeeping_any_cpu() returns an invalid CPU number until smp_init() brings the first housekeeping CPU up. Change housekeeping_any_cpu() to check the result of cpumask_any_and() and return smp_processor_id() in this case. This is just the simple and backportable workaround which fixes the symptom, but smp_processor_id() at boot time should be safe at least for type == HK_TYPE_TIMER, this more or less matches the tick_do_timer_boot_cpu logic. There is no worry about cpu_down(); tick_nohz_cpu_down() will not allow to offline tick_do_timer_cpu (the 1st online housekeeping CPU). Fixes: aae17ebb53cd ("workqueue: Avoid using isolated cpus' timers on queue_delayed_work") Reported-by: Chris von Recklinghausen Signed-off-by: Oleg Nesterov Signed-off-by: Thomas Gleixner Signed-off-by: Ingo Molnar Reviewed-by: Phil Auld Acked-by: Frederic Weisbecker Link: https://lore.kernel.org/r/20240411143905.GA19288@redhat.com Closes: https://lore.kernel.org/all/20240402105847.GA24832@redhat.com/ --- Documentation/timers/no_hz.rst | 7 ++----- kernel/sched/isolation.c | 11 ++++++++++- 2 files changed, 12 insertions(+), 6 deletions(-) diff --git a/Documentation/timers/no_hz.rst b/Documentation/timers/no_hz.rst index f8786be15183c..7fe8ef9718d8e 100644 --- a/Documentation/timers/no_hz.rst +++ b/Documentation/timers/no_hz.rst @@ -129,11 +129,8 @@ adaptive-tick CPUs: At least one non-adaptive-tick CPU must remain online to handle timekeeping tasks in order to ensure that system calls like gettimeofday() returns accurate values on adaptive-tick CPUs. (This is not an issue for CONFIG_NO_HZ_IDLE=y because there are no running -user processes to observe slight drifts in clock rate.) Therefore, the -boot CPU is prohibited from entering adaptive-ticks mode. Specifying a -"nohz_full=" mask that includes the boot CPU will result in a boot-time -error message, and the boot CPU will be removed from the mask. Note that -this means that your system must have at least two CPUs in order for +user processes to observe slight drifts in clock rate.) Note that this +means that your system must have at least two CPUs in order for CONFIG_NO_HZ_FULL=y to do anything for you. Finally, adaptive-ticks CPUs must have their RCU callbacks offloaded. diff --git a/kernel/sched/isolation.c b/kernel/sched/isolation.c index 373d42c707bc5..2a262d3ecb3da 100644 --- a/kernel/sched/isolation.c +++ b/kernel/sched/isolation.c @@ -46,7 +46,16 @@ int housekeeping_any_cpu(enum hk_type type) if (cpu < nr_cpu_ids) return cpu; - return cpumask_any_and(housekeeping.cpumasks[type], cpu_online_mask); + cpu = cpumask_any_and(housekeeping.cpumasks[type], cpu_online_mask); + if (likely(cpu < nr_cpu_ids)) + return cpu; + /* + * Unless we have another problem this can only happen + * at boot time before start_secondary() brings the 1st + * housekeeping CPU up. + */ + WARN_ON_ONCE(system_state == SYSTEM_RUNNING || + type != HK_TYPE_TIMER); } } return smp_processor_id(); From 257bf89d84121280904800acd25cc2c444c717ae Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Sat, 13 Apr 2024 16:17:46 +0200 Subject: [PATCH 433/606] sched/isolation: Fix boot crash when maxcpus < first housekeeping CPU housekeeping_setup() checks cpumask_intersects(present, online) to ensure that the kernel will have at least one housekeeping CPU after smp_init(), but this doesn't work if the maxcpus= kernel parameter limits the number of processors available after bootup. For example, a kernel with "maxcpus=2 nohz_full=0-2" parameters crashes at boot time on a virtual machine with 4 CPUs. Change housekeeping_setup() to use cpumask_first_and() and check that the returned CPU number is valid and less than setup_max_cpus. Another corner case is "nohz_full=0" on a machine with a single CPU or with the maxcpus=1 kernel argument. In this case non_housekeeping_mask is empty and tick_nohz_full_setup() makes no sense. And indeed, the kernel hits the WARN_ON(tick_nohz_full_running) in tick_sched_do_timer(). And how should the kernel interpret the "nohz_full=" parameter? It should be silently ignored, but currently cpulist_parse() happily returns the empty cpumask and this leads to the same problem. Change housekeeping_setup() to check cpumask_empty(non_housekeeping_mask) and do nothing in this case. Signed-off-by: Oleg Nesterov Signed-off-by: Thomas Gleixner Signed-off-by: Ingo Molnar Reviewed-by: Phil Auld Acked-by: Frederic Weisbecker Link: https://lore.kernel.org/r/20240413141746.GA10008@redhat.com --- kernel/sched/isolation.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/kernel/sched/isolation.c b/kernel/sched/isolation.c index 2a262d3ecb3da..5891e715f00d0 100644 --- a/kernel/sched/isolation.c +++ b/kernel/sched/isolation.c @@ -118,6 +118,7 @@ static void __init housekeeping_setup_type(enum hk_type type, static int __init housekeeping_setup(char *str, unsigned long flags) { cpumask_var_t non_housekeeping_mask, housekeeping_staging; + unsigned int first_cpu; int err = 0; if ((flags & HK_FLAG_TICK) && !(housekeeping.flags & HK_FLAG_TICK)) { @@ -138,7 +139,8 @@ static int __init housekeeping_setup(char *str, unsigned long flags) cpumask_andnot(housekeeping_staging, cpu_possible_mask, non_housekeeping_mask); - if (!cpumask_intersects(cpu_present_mask, housekeeping_staging)) { + first_cpu = cpumask_first_and(cpu_present_mask, housekeeping_staging); + if (first_cpu >= nr_cpu_ids || first_cpu >= setup_max_cpus) { __cpumask_set_cpu(smp_processor_id(), housekeeping_staging); __cpumask_clear_cpu(smp_processor_id(), non_housekeeping_mask); if (!housekeeping.flags) { @@ -147,6 +149,9 @@ static int __init housekeeping_setup(char *str, unsigned long flags) } } + if (cpumask_empty(non_housekeeping_mask)) + goto free_housekeeping_staging; + if (!housekeeping.flags) { /* First setup call ("nohz_full=" or "isolcpus=") */ enum hk_type type; From 398321d7531963b95841865eb371fe65c44c6921 Mon Sep 17 00:00:00 2001 From: Oswald Buddenhagen Date: Sun, 28 Apr 2024 11:37:11 +0200 Subject: [PATCH 434/606] ALSA: emu10k1: fix E-MU card dock presence monitoring While there are two separate IRQ status bits for dock attach and detach, the hardware appears to mix them up more or less randomly, making them useless for tracking what actually happened. It is much safer to check the dock status separately and proceed based on that, as the old polling code did. Note that the code assumes that only the dock can be hot-plugged - if other option card bits changed, the logic would break. Fixes: fbb64eedf5a3 ("ALSA: emu10k1: make E-MU dock monitoring interrupt-driven") Link: https://bugzilla.kernel.org/show_bug.cgi?id=218584 Signed-off-by: Oswald Buddenhagen Signed-off-by: Takashi Iwai Message-ID: <20240428093716.3198666-2-oswald.buddenhagen@gmx.de> --- sound/pci/emu10k1/emu10k1_main.c | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-) diff --git a/sound/pci/emu10k1/emu10k1_main.c b/sound/pci/emu10k1/emu10k1_main.c index de5c41e578e1f..85f70368a27db 100644 --- a/sound/pci/emu10k1/emu10k1_main.c +++ b/sound/pci/emu10k1/emu10k1_main.c @@ -778,6 +778,11 @@ static void emu1010_firmware_work(struct work_struct *work) msleep(10); /* Unmute all. Default is muted after a firmware load */ snd_emu1010_fpga_write(emu, EMU_HANA_UNMUTE, EMU_UNMUTE); + } else if (!(reg & EMU_HANA_OPTION_DOCK_ONLINE)) { + /* Audio Dock removed */ + dev_info(emu->card->dev, "emu1010: Audio Dock detached\n"); + /* The hardware auto-mutes all, so we unmute again */ + snd_emu1010_fpga_write(emu, EMU_HANA_UNMUTE, EMU_UNMUTE); } } @@ -810,14 +815,12 @@ static void emu1010_interrupt(struct snd_emu10k1 *emu) u32 sts; snd_emu1010_fpga_read(emu, EMU_HANA_IRQ_STATUS, &sts); - if (sts & EMU_HANA_IRQ_DOCK_LOST) { - /* Audio Dock removed */ - dev_info(emu->card->dev, "emu1010: Audio Dock detached\n"); - /* The hardware auto-mutes all, so we unmute again */ - snd_emu1010_fpga_write(emu, EMU_HANA_UNMUTE, EMU_UNMUTE); - } else if (sts & EMU_HANA_IRQ_DOCK) { + + // The distinction of the IRQ status bits is unreliable, + // so we dispatch later based on option card status. + if (sts & (EMU_HANA_IRQ_DOCK | EMU_HANA_IRQ_DOCK_LOST)) schedule_work(&emu->emu1010.firmware_work); - } + if (sts & EMU_HANA_IRQ_WCLK_CHANGED) schedule_work(&emu->emu1010.clock_work); } From 28deafd0fbdc45cc9c63bd7dd4efc35137958862 Mon Sep 17 00:00:00 2001 From: Oswald Buddenhagen Date: Sun, 28 Apr 2024 11:37:12 +0200 Subject: [PATCH 435/606] ALSA: emu10k1: factor out snd_emu1010_load_dock_firmware() Pulled out of the next patch to improve its legibility. As the function is now available, call it directly from snd_emu10k1_emu1010_init(), thus making the MicroDock firmware loading synchronous - there isn't really a reason not to. Note that this does not affect the AudioDocks of rev1 cards, as these have no independent power supplies, and thus come up only a while after the main card is initialized. As a drive-by, adjust the priorities of two messages to better reflect their impact. Signed-off-by: Oswald Buddenhagen Signed-off-by: Takashi Iwai Message-ID: <20240428093716.3198666-3-oswald.buddenhagen@gmx.de> --- sound/pci/emu10k1/emu10k1_main.c | 66 +++++++++++++++++--------------- 1 file changed, 36 insertions(+), 30 deletions(-) diff --git a/sound/pci/emu10k1/emu10k1_main.c b/sound/pci/emu10k1/emu10k1_main.c index 85f70368a27db..6265fc9ae2606 100644 --- a/sound/pci/emu10k1/emu10k1_main.c +++ b/sound/pci/emu10k1/emu10k1_main.c @@ -732,11 +732,43 @@ static int snd_emu1010_load_firmware(struct snd_emu10k1 *emu, int dock, return snd_emu1010_load_firmware_entry(emu, *fw); } +static void snd_emu1010_load_dock_firmware(struct snd_emu10k1 *emu) +{ + u32 tmp, tmp2; + int err; + + dev_info(emu->card->dev, "emu1010: Loading Audio Dock Firmware\n"); + /* Return to Audio Dock programming mode */ + snd_emu1010_fpga_write(emu, EMU_HANA_FPGA_CONFIG, + EMU_HANA_FPGA_CONFIG_AUDIODOCK); + err = snd_emu1010_load_firmware(emu, 1, &emu->dock_fw); + if (err < 0) + return; + snd_emu1010_fpga_write(emu, EMU_HANA_FPGA_CONFIG, 0); + + snd_emu1010_fpga_read(emu, EMU_HANA_ID, &tmp); + dev_dbg(emu->card->dev, "emu1010: EMU_HANA+DOCK_ID = 0x%x\n", tmp); + if ((tmp & 0x1f) != 0x15) { + /* FPGA failed to be programmed */ + dev_err(emu->card->dev, + "emu1010: Loading Audio Dock Firmware failed, reg = 0x%x\n", + tmp); + return; + } + dev_info(emu->card->dev, "emu1010: Audio Dock Firmware loaded\n"); + + snd_emu1010_fpga_read(emu, EMU_DOCK_MAJOR_REV, &tmp); + snd_emu1010_fpga_read(emu, EMU_DOCK_MINOR_REV, &tmp2); + dev_info(emu->card->dev, "Audio Dock ver: %u.%u\n", tmp, tmp2); + + /* Allow DLL to settle, to sync clocking between 1010 and Dock */ + msleep(10); +} + static void emu1010_firmware_work(struct work_struct *work) { struct snd_emu10k1 *emu; - u32 tmp, tmp2, reg; - int err; + u32 reg; emu = container_of(work, struct snd_emu10k1, emu1010.firmware_work); @@ -749,33 +781,7 @@ static void emu1010_firmware_work(struct work_struct *work) snd_emu1010_fpga_read(emu, EMU_HANA_OPTION_CARDS, ®); /* OPTIONS: Which cards are attached to the EMU */ if (reg & EMU_HANA_OPTION_DOCK_OFFLINE) { /* Audio Dock attached */ - /* Return to Audio Dock programming mode */ - dev_info(emu->card->dev, - "emu1010: Loading Audio Dock Firmware\n"); - snd_emu1010_fpga_write(emu, EMU_HANA_FPGA_CONFIG, - EMU_HANA_FPGA_CONFIG_AUDIODOCK); - err = snd_emu1010_load_firmware(emu, 1, &emu->dock_fw); - if (err < 0) - return; - snd_emu1010_fpga_write(emu, EMU_HANA_FPGA_CONFIG, 0); - snd_emu1010_fpga_read(emu, EMU_HANA_ID, &tmp); - dev_info(emu->card->dev, - "emu1010: EMU_HANA+DOCK_ID = 0x%x\n", tmp); - if ((tmp & 0x1f) != 0x15) { - /* FPGA failed to be programmed */ - dev_info(emu->card->dev, - "emu1010: Loading Audio Dock Firmware file failed, reg = 0x%x\n", - tmp); - return; - } - dev_info(emu->card->dev, - "emu1010: Audio Dock Firmware loaded\n"); - snd_emu1010_fpga_read(emu, EMU_DOCK_MAJOR_REV, &tmp); - snd_emu1010_fpga_read(emu, EMU_DOCK_MINOR_REV, &tmp2); - dev_info(emu->card->dev, "Audio Dock ver: %u.%u\n", tmp, tmp2); - /* Sync clocking between 1010 and Dock */ - /* Allow DLL to settle */ - msleep(10); + snd_emu1010_load_dock_firmware(emu); /* Unmute all. Default is muted after a firmware load */ snd_emu1010_fpga_write(emu, EMU_HANA_UNMUTE, EMU_UNMUTE); } else if (!(reg & EMU_HANA_OPTION_DOCK_ONLINE)) { @@ -892,7 +898,7 @@ static int snd_emu10k1_emu1010_init(struct snd_emu10k1 *emu) snd_emu1010_fpga_read(emu, EMU_HANA_OPTION_CARDS, ®); dev_info(emu->card->dev, "emu1010: Card options = 0x%x\n", reg); if (reg & EMU_HANA_OPTION_DOCK_OFFLINE) - schedule_work(&emu->emu1010.firmware_work); + snd_emu1010_load_dock_firmware(emu); if (emu->card_capabilities->no_adat) { emu->emu1010.optical_in = 0; /* IN_SPDIF */ emu->emu1010.optical_out = 0; /* OUT_SPDIF */ From f848337cd801c7106a4ec0d61765771dab2a5909 Mon Sep 17 00:00:00 2001 From: Oswald Buddenhagen Date: Sun, 28 Apr 2024 11:37:13 +0200 Subject: [PATCH 436/606] ALSA: emu10k1: move the whole GPIO event handling to the workqueue The actual event processing was already done by workqueue items. We can move the event dispatching there as well, rather than doing it already in the interrupt handler callback. This change has a rather profound "side effect" on the reliability of the FPGA programming: once we enter programming mode, we must not issue any snd_emu1010_fpga_{read,write}() calls until we're done, as these would badly mess up the programming protocol. But exactly that would happen when trying to program the dock, as that triggers GPIO interrupts as a side effect. This is mitigated by deferring the actual interrupt handling, as workqueue items are not re-entrant. To avoid scheduling the dispatcher on non-events, we now explicitly ignore GPIO IRQs triggered by "uninteresting" pins, which happens a lot as a side effect of calling snd_emu1010_fpga_{read,write}(). Fixes: fbb64eedf5a3 ("ALSA: emu10k1: make E-MU dock monitoring interrupt-driven") Link: https://bugzilla.kernel.org/show_bug.cgi?id=218584 Signed-off-by: Oswald Buddenhagen Signed-off-by: Takashi Iwai Message-ID: <20240428093716.3198666-4-oswald.buddenhagen@gmx.de> --- include/sound/emu10k1.h | 3 +- sound/pci/emu10k1/emu10k1.c | 3 +- sound/pci/emu10k1/emu10k1_main.c | 56 ++++++++++++++++---------------- 3 files changed, 30 insertions(+), 32 deletions(-) diff --git a/include/sound/emu10k1.h b/include/sound/emu10k1.h index 1af9e68193920..9cc10fab01a8c 100644 --- a/include/sound/emu10k1.h +++ b/include/sound/emu10k1.h @@ -1684,8 +1684,7 @@ struct snd_emu1010 { unsigned int clock_fallback; unsigned int optical_in; /* 0:SPDIF, 1:ADAT */ unsigned int optical_out; /* 0:SPDIF, 1:ADAT */ - struct work_struct firmware_work; - struct work_struct clock_work; + struct work_struct work; }; struct snd_emu10k1 { diff --git a/sound/pci/emu10k1/emu10k1.c b/sound/pci/emu10k1/emu10k1.c index fe72e7d772412..dadeda7758cee 100644 --- a/sound/pci/emu10k1/emu10k1.c +++ b/sound/pci/emu10k1/emu10k1.c @@ -189,8 +189,7 @@ static int snd_emu10k1_suspend(struct device *dev) emu->suspend = 1; - cancel_work_sync(&emu->emu1010.firmware_work); - cancel_work_sync(&emu->emu1010.clock_work); + cancel_work_sync(&emu->emu1010.work); snd_ac97_suspend(emu->ac97); diff --git a/sound/pci/emu10k1/emu10k1_main.c b/sound/pci/emu10k1/emu10k1_main.c index 6265fc9ae2606..86eaf5963502c 100644 --- a/sound/pci/emu10k1/emu10k1_main.c +++ b/sound/pci/emu10k1/emu10k1_main.c @@ -765,19 +765,10 @@ static void snd_emu1010_load_dock_firmware(struct snd_emu10k1 *emu) msleep(10); } -static void emu1010_firmware_work(struct work_struct *work) +static void emu1010_dock_event(struct snd_emu10k1 *emu) { - struct snd_emu10k1 *emu; u32 reg; - emu = container_of(work, struct snd_emu10k1, - emu1010.firmware_work); - if (emu->card->shutdown) - return; -#ifdef CONFIG_PM_SLEEP - if (emu->suspend) - return; -#endif snd_emu1010_fpga_read(emu, EMU_HANA_OPTION_CARDS, ®); /* OPTIONS: Which cards are attached to the EMU */ if (reg & EMU_HANA_OPTION_DOCK_OFFLINE) { /* Audio Dock attached */ @@ -792,20 +783,10 @@ static void emu1010_firmware_work(struct work_struct *work) } } -static void emu1010_clock_work(struct work_struct *work) +static void emu1010_clock_event(struct snd_emu10k1 *emu) { - struct snd_emu10k1 *emu; struct snd_ctl_elem_id id; - emu = container_of(work, struct snd_emu10k1, - emu1010.clock_work); - if (emu->card->shutdown) - return; -#ifdef CONFIG_PM_SLEEP - if (emu->suspend) - return; -#endif - spin_lock_irq(&emu->reg_lock); // This is the only thing that can actually happen. emu->emu1010.clock_source = emu->emu1010.clock_fallback; @@ -816,19 +797,40 @@ static void emu1010_clock_work(struct work_struct *work) snd_ctl_notify(emu->card, SNDRV_CTL_EVENT_MASK_VALUE, &id); } -static void emu1010_interrupt(struct snd_emu10k1 *emu) +static void emu1010_work(struct work_struct *work) { + struct snd_emu10k1 *emu; u32 sts; + emu = container_of(work, struct snd_emu10k1, emu1010.work); + if (emu->card->shutdown) + return; +#ifdef CONFIG_PM_SLEEP + if (emu->suspend) + return; +#endif + snd_emu1010_fpga_read(emu, EMU_HANA_IRQ_STATUS, &sts); // The distinction of the IRQ status bits is unreliable, // so we dispatch later based on option card status. if (sts & (EMU_HANA_IRQ_DOCK | EMU_HANA_IRQ_DOCK_LOST)) - schedule_work(&emu->emu1010.firmware_work); + emu1010_dock_event(emu); if (sts & EMU_HANA_IRQ_WCLK_CHANGED) - schedule_work(&emu->emu1010.clock_work); + emu1010_clock_event(emu); +} + +static void emu1010_interrupt(struct snd_emu10k1 *emu) +{ + // We get an interrupt on each GPIO input pin change, but we + // care only about the ones triggered by the dedicated pin. + u16 sts = inw(emu->port + A_GPIO); + u16 bit = emu->card_capabilities->ca0108_chip ? 0x2000 : 0x8000; + if (!(sts & bit)) + return; + + schedule_work(&emu->emu1010.work); } /* @@ -969,8 +971,7 @@ static void snd_emu10k1_free(struct snd_card *card) /* Disable 48Volt power to Audio Dock */ snd_emu1010_fpga_write(emu, EMU_HANA_DOCK_PWR, 0); } - cancel_work_sync(&emu->emu1010.firmware_work); - cancel_work_sync(&emu->emu1010.clock_work); + cancel_work_sync(&emu->emu1010.work); release_firmware(emu->firmware); release_firmware(emu->dock_fw); snd_util_memhdr_free(emu->memhdr); @@ -1549,8 +1550,7 @@ int snd_emu10k1_create(struct snd_card *card, emu->irq = -1; emu->synth = NULL; emu->get_synth_voice = NULL; - INIT_WORK(&emu->emu1010.firmware_work, emu1010_firmware_work); - INIT_WORK(&emu->emu1010.clock_work, emu1010_clock_work); + INIT_WORK(&emu->emu1010.work, emu1010_work); /* read revision & serial */ emu->revision = pci->revision; pci_read_config_dword(pci, PCI_SUBSYSTEM_VENDOR_ID, &emu->serial); From 2d3f4810886eb7c319cec41b6d725d2953bfa88a Mon Sep 17 00:00:00 2001 From: Oswald Buddenhagen Date: Sun, 28 Apr 2024 11:37:14 +0200 Subject: [PATCH 437/606] ALSA: emu10k1: use mutex for E-MU FPGA access locking The FPGA access through the GPIO port does not interfere with other sound processor register access, so there is no need to subject it to emu_lock. And after moving all FPGA access out of the interrupt handler, it does not need to be IRQ-safe, either. What's more, attaching the dock causes a firmware upload, which takes several seconds. We really don't want to disable IRQs for this long, and even less also have someone else spin with IRQs disabled waiting for us. Therefore, use a mutex for FPGA access locking. This makes the code somewhat more noisy, as we need to wrap bigger sections into the mutex, as it needs to enclose the spinlocks. The latter has the "side effect" of fixing dock FPGA programming in a corner case: a really badly timed mixer access right between entering FPGA programming mode and uploading the netlist would mess up the protocol. Signed-off-by: Oswald Buddenhagen Signed-off-by: Takashi Iwai Message-ID: <20240428093716.3198666-5-oswald.buddenhagen@gmx.de> --- include/sound/emu10k1.h | 4 +++ sound/pci/emu10k1/emu10k1_main.c | 19 ++++++++++--- sound/pci/emu10k1/emumixer.c | 18 ++++++++---- sound/pci/emu10k1/emuproc.c | 9 ++++++ sound/pci/emu10k1/io.c | 48 +++++++++++++------------------- 5 files changed, 61 insertions(+), 37 deletions(-) diff --git a/include/sound/emu10k1.h b/include/sound/emu10k1.h index 9cc10fab01a8c..234b5baea69c8 100644 --- a/include/sound/emu10k1.h +++ b/include/sound/emu10k1.h @@ -1685,6 +1685,7 @@ struct snd_emu1010 { unsigned int optical_in; /* 0:SPDIF, 1:ADAT */ unsigned int optical_out; /* 0:SPDIF, 1:ADAT */ struct work_struct work; + struct mutex lock; }; struct snd_emu10k1 { @@ -1833,6 +1834,9 @@ unsigned int snd_emu10k1_ptr20_read(struct snd_emu10k1 * emu, unsigned int reg, void snd_emu10k1_ptr20_write(struct snd_emu10k1 *emu, unsigned int reg, unsigned int chn, unsigned int data); int snd_emu10k1_spi_write(struct snd_emu10k1 * emu, unsigned int data); int snd_emu10k1_i2c_write(struct snd_emu10k1 *emu, u32 reg, u32 value); +static inline void snd_emu1010_fpga_lock(struct snd_emu10k1 *emu) { mutex_lock(&emu->emu1010.lock); }; +static inline void snd_emu1010_fpga_unlock(struct snd_emu10k1 *emu) { mutex_unlock(&emu->emu1010.lock); }; +void snd_emu1010_fpga_write_lock(struct snd_emu10k1 *emu, u32 reg, u32 value); void snd_emu1010_fpga_write(struct snd_emu10k1 *emu, u32 reg, u32 value); void snd_emu1010_fpga_read(struct snd_emu10k1 *emu, u32 reg, u32 *value); void snd_emu1010_fpga_link_dst_src_write(struct snd_emu10k1 *emu, u32 dst, u32 src); diff --git a/sound/pci/emu10k1/emu10k1_main.c b/sound/pci/emu10k1/emu10k1_main.c index 86eaf5963502c..1a2905e8672bd 100644 --- a/sound/pci/emu10k1/emu10k1_main.c +++ b/sound/pci/emu10k1/emu10k1_main.c @@ -810,6 +810,8 @@ static void emu1010_work(struct work_struct *work) return; #endif + snd_emu1010_fpga_lock(emu); + snd_emu1010_fpga_read(emu, EMU_HANA_IRQ_STATUS, &sts); // The distinction of the IRQ status bits is unreliable, @@ -819,6 +821,8 @@ static void emu1010_work(struct work_struct *work) if (sts & EMU_HANA_IRQ_WCLK_CHANGED) emu1010_clock_event(emu); + + snd_emu1010_fpga_unlock(emu); } static void emu1010_interrupt(struct snd_emu10k1 *emu) @@ -852,6 +856,8 @@ static int snd_emu10k1_emu1010_init(struct snd_emu10k1 *emu) * Proper init follows in snd_emu10k1_init(). */ outl(HCFG_LOCKSOUNDCACHE | HCFG_LOCKTANKCACHE_MASK, emu->port + HCFG); + snd_emu1010_fpga_lock(emu); + /* Disable 48Volt power to Audio Dock */ snd_emu1010_fpga_write(emu, EMU_HANA_DOCK_PWR, 0); @@ -877,7 +883,7 @@ static int snd_emu10k1_emu1010_init(struct snd_emu10k1 *emu) err = snd_emu1010_load_firmware(emu, 0, &emu->firmware); if (err < 0) { dev_info(emu->card->dev, "emu1010: Loading Firmware failed\n"); - return err; + goto fail; } /* ID, should read & 0x7f = 0x55 when FPGA programmed. */ @@ -887,7 +893,8 @@ static int snd_emu10k1_emu1010_init(struct snd_emu10k1 *emu) dev_info(emu->card->dev, "emu1010: Loading Hana Firmware file failed, reg = 0x%x\n", reg); - return -ENODEV; + err = -ENODEV; + goto fail; } dev_info(emu->card->dev, "emu1010: Hana Firmware loaded\n"); @@ -947,7 +954,9 @@ static int snd_emu10k1_emu1010_init(struct snd_emu10k1 *emu) // so it is safe to simply enable the outputs. snd_emu1010_fpga_write(emu, EMU_HANA_UNMUTE, EMU_UNMUTE); - return 0; +fail: + snd_emu1010_fpga_unlock(emu); + return err; } /* * Create the EMU10K1 instance @@ -969,9 +978,10 @@ static void snd_emu10k1_free(struct snd_card *card) } if (emu->card_capabilities->emu_model == EMU_MODEL_EMU1010) { /* Disable 48Volt power to Audio Dock */ - snd_emu1010_fpga_write(emu, EMU_HANA_DOCK_PWR, 0); + snd_emu1010_fpga_write_lock(emu, EMU_HANA_DOCK_PWR, 0); } cancel_work_sync(&emu->emu1010.work); + mutex_destroy(&emu->emu1010.lock); release_firmware(emu->firmware); release_firmware(emu->dock_fw); snd_util_memhdr_free(emu->memhdr); @@ -1551,6 +1561,7 @@ int snd_emu10k1_create(struct snd_card *card, emu->synth = NULL; emu->get_synth_voice = NULL; INIT_WORK(&emu->emu1010.work, emu1010_work); + mutex_init(&emu->emu1010.lock); /* read revision & serial */ emu->revision = pci->revision; pci_read_config_dword(pci, PCI_SUBSYSTEM_VENDOR_ID, &emu->serial); diff --git a/sound/pci/emu10k1/emumixer.c b/sound/pci/emu10k1/emumixer.c index 0a32ea53d8c64..05b98d9b547b2 100644 --- a/sound/pci/emu10k1/emumixer.c +++ b/sound/pci/emu10k1/emumixer.c @@ -661,7 +661,9 @@ static int snd_emu1010_output_source_put(struct snd_kcontrol *kcontrol, change = (emu->emu1010.output_source[channel] != val); if (change) { emu->emu1010.output_source[channel] = val; + snd_emu1010_fpga_lock(emu); snd_emu1010_output_source_apply(emu, channel, val); + snd_emu1010_fpga_unlock(emu); } return change; } @@ -705,7 +707,9 @@ static int snd_emu1010_input_source_put(struct snd_kcontrol *kcontrol, change = (emu->emu1010.input_source[channel] != val); if (change) { emu->emu1010.input_source[channel] = val; + snd_emu1010_fpga_lock(emu); snd_emu1010_input_source_apply(emu, channel, val); + snd_emu1010_fpga_unlock(emu); } return change; } @@ -774,7 +778,7 @@ static int snd_emu1010_adc_pads_put(struct snd_kcontrol *kcontrol, struct snd_ct cache = cache & ~mask; change = (cache != emu->emu1010.adc_pads); if (change) { - snd_emu1010_fpga_write(emu, EMU_HANA_ADC_PADS, cache ); + snd_emu1010_fpga_write_lock(emu, EMU_HANA_ADC_PADS, cache ); emu->emu1010.adc_pads = cache; } @@ -832,7 +836,7 @@ static int snd_emu1010_dac_pads_put(struct snd_kcontrol *kcontrol, struct snd_ct cache = cache & ~mask; change = (cache != emu->emu1010.dac_pads); if (change) { - snd_emu1010_fpga_write(emu, EMU_HANA_DAC_PADS, cache ); + snd_emu1010_fpga_write_lock(emu, EMU_HANA_DAC_PADS, cache ); emu->emu1010.dac_pads = cache; } @@ -980,6 +984,7 @@ static int snd_emu1010_clock_source_put(struct snd_kcontrol *kcontrol, val = ucontrol->value.enumerated.item[0] ; if (val >= emu_ci->num) return -EINVAL; + snd_emu1010_fpga_lock(emu); spin_lock_irq(&emu->reg_lock); change = (emu->emu1010.clock_source != val); if (change) { @@ -996,6 +1001,7 @@ static int snd_emu1010_clock_source_put(struct snd_kcontrol *kcontrol, } else { spin_unlock_irq(&emu->reg_lock); } + snd_emu1010_fpga_unlock(emu); return change; } @@ -1041,7 +1047,7 @@ static int snd_emu1010_clock_fallback_put(struct snd_kcontrol *kcontrol, change = (emu->emu1010.clock_fallback != val); if (change) { emu->emu1010.clock_fallback = val; - snd_emu1010_fpga_write(emu, EMU_HANA_DEFCLOCK, 1 - val); + snd_emu1010_fpga_write_lock(emu, EMU_HANA_DEFCLOCK, 1 - val); } return change; } @@ -1093,7 +1099,7 @@ static int snd_emu1010_optical_out_put(struct snd_kcontrol *kcontrol, emu->emu1010.optical_out = val; tmp = (emu->emu1010.optical_in ? EMU_HANA_OPTICAL_IN_ADAT : EMU_HANA_OPTICAL_IN_SPDIF) | (emu->emu1010.optical_out ? EMU_HANA_OPTICAL_OUT_ADAT : EMU_HANA_OPTICAL_OUT_SPDIF); - snd_emu1010_fpga_write(emu, EMU_HANA_OPTICAL_TYPE, tmp); + snd_emu1010_fpga_write_lock(emu, EMU_HANA_OPTICAL_TYPE, tmp); } return change; } @@ -1144,7 +1150,7 @@ static int snd_emu1010_optical_in_put(struct snd_kcontrol *kcontrol, emu->emu1010.optical_in = val; tmp = (emu->emu1010.optical_in ? EMU_HANA_OPTICAL_IN_ADAT : EMU_HANA_OPTICAL_IN_SPDIF) | (emu->emu1010.optical_out ? EMU_HANA_OPTICAL_OUT_ADAT : EMU_HANA_OPTICAL_OUT_SPDIF); - snd_emu1010_fpga_write(emu, EMU_HANA_OPTICAL_TYPE, tmp); + snd_emu1010_fpga_write_lock(emu, EMU_HANA_OPTICAL_TYPE, tmp); } return change; } @@ -2323,7 +2329,9 @@ int snd_emu10k1_mixer(struct snd_emu10k1 *emu, for (i = 0; i < emu_ri->n_outs; i++) emu->emu1010.output_source[i] = emu1010_map_source(emu_ri, emu_ri->out_dflts[i]); + snd_emu1010_fpga_lock(emu); snd_emu1010_apply_sources(emu); + snd_emu1010_fpga_unlock(emu); kctl = emu->ctl_clock_source = snd_ctl_new1(&snd_emu1010_clock_source, emu); err = snd_ctl_add(card, kctl); diff --git a/sound/pci/emu10k1/emuproc.c b/sound/pci/emu10k1/emuproc.c index 2f80fd91017c3..737c28d31b41a 100644 --- a/sound/pci/emu10k1/emuproc.c +++ b/sound/pci/emu10k1/emuproc.c @@ -165,6 +165,8 @@ static void snd_emu10k1_proc_spdif_read(struct snd_info_entry *entry, u32 value2; if (emu->card_capabilities->emu_model) { + snd_emu1010_fpga_lock(emu); + // This represents the S/PDIF lock status on 0404b, which is // kinda weird and unhelpful, because monitoring it via IRQ is // impractical (one gets an IRQ flood as long as it is desynced). @@ -197,6 +199,8 @@ static void snd_emu10k1_proc_spdif_read(struct snd_info_entry *entry, snd_iprintf(buffer, "\nS/PDIF mode: %s%s\n", value & EMU_HANA_SPDIF_MODE_RX_PRO ? "professional" : "consumer", value & EMU_HANA_SPDIF_MODE_RX_NOCOPY ? ", no copy" : ""); + + snd_emu1010_fpga_unlock(emu); } else { snd_emu10k1_proc_spdif_status(emu, buffer, "CD-ROM S/PDIF In", CDCS, CDSRCS); snd_emu10k1_proc_spdif_status(emu, buffer, "Optical or Coax S/PDIF In", GPSCS, GPSRCS); @@ -458,6 +462,9 @@ static void snd_emu_proc_emu1010_reg_read(struct snd_info_entry *entry, struct snd_emu10k1 *emu = entry->private_data; u32 value; int i; + + snd_emu1010_fpga_lock(emu); + snd_iprintf(buffer, "EMU1010 Registers:\n\n"); for(i = 0; i < 0x40; i+=1) { @@ -496,6 +503,8 @@ static void snd_emu_proc_emu1010_reg_read(struct snd_info_entry *entry, snd_emu_proc_emu1010_link_read(emu, buffer, 0x701); } } + + snd_emu1010_fpga_unlock(emu); } static void snd_emu_proc_io_reg_read(struct snd_info_entry *entry, diff --git a/sound/pci/emu10k1/io.c b/sound/pci/emu10k1/io.c index 74df2330015f6..f3260a81e47b7 100644 --- a/sound/pci/emu10k1/io.c +++ b/sound/pci/emu10k1/io.c @@ -289,20 +289,28 @@ static void snd_emu1010_fpga_write_locked(struct snd_emu10k1 *emu, u32 reg, u32 void snd_emu1010_fpga_write(struct snd_emu10k1 *emu, u32 reg, u32 value) { - unsigned long flags; + if (snd_BUG_ON(!mutex_is_locked(&emu->emu1010.lock))) + return; + snd_emu1010_fpga_write_locked(emu, reg, value); +} - spin_lock_irqsave(&emu->emu_lock, flags); +void snd_emu1010_fpga_write_lock(struct snd_emu10k1 *emu, u32 reg, u32 value) +{ + snd_emu1010_fpga_lock(emu); snd_emu1010_fpga_write_locked(emu, reg, value); - spin_unlock_irqrestore(&emu->emu_lock, flags); + snd_emu1010_fpga_unlock(emu); } -static void snd_emu1010_fpga_read_locked(struct snd_emu10k1 *emu, u32 reg, u32 *value) +void snd_emu1010_fpga_read(struct snd_emu10k1 *emu, u32 reg, u32 *value) { // The higest input pin is used as the designated interrupt trigger, // so it needs to be masked out. // But note that any other input pin change will also cause an IRQ, // so using this function often causes an IRQ as a side effect. u32 mask = emu->card_capabilities->ca0108_chip ? 0x1f : 0x7f; + + if (snd_BUG_ON(!mutex_is_locked(&emu->emu1010.lock))) + return; if (snd_BUG_ON(reg > 0x3f)) return; reg += 0x40; /* 0x40 upwards are registers. */ @@ -313,47 +321,31 @@ static void snd_emu1010_fpga_read_locked(struct snd_emu10k1 *emu, u32 reg, u32 * *value = ((inw(emu->port + A_GPIO) >> 8) & mask); } -void snd_emu1010_fpga_read(struct snd_emu10k1 *emu, u32 reg, u32 *value) -{ - unsigned long flags; - - spin_lock_irqsave(&emu->emu_lock, flags); - snd_emu1010_fpga_read_locked(emu, reg, value); - spin_unlock_irqrestore(&emu->emu_lock, flags); -} - /* Each Destination has one and only one Source, * but one Source can feed any number of Destinations simultaneously. */ void snd_emu1010_fpga_link_dst_src_write(struct snd_emu10k1 *emu, u32 dst, u32 src) { - unsigned long flags; - if (snd_BUG_ON(dst & ~0x71f)) return; if (snd_BUG_ON(src & ~0x71f)) return; - spin_lock_irqsave(&emu->emu_lock, flags); - snd_emu1010_fpga_write_locked(emu, EMU_HANA_DESTHI, dst >> 8); - snd_emu1010_fpga_write_locked(emu, EMU_HANA_DESTLO, dst & 0x1f); - snd_emu1010_fpga_write_locked(emu, EMU_HANA_SRCHI, src >> 8); - snd_emu1010_fpga_write_locked(emu, EMU_HANA_SRCLO, src & 0x1f); - spin_unlock_irqrestore(&emu->emu_lock, flags); + snd_emu1010_fpga_write(emu, EMU_HANA_DESTHI, dst >> 8); + snd_emu1010_fpga_write(emu, EMU_HANA_DESTLO, dst & 0x1f); + snd_emu1010_fpga_write(emu, EMU_HANA_SRCHI, src >> 8); + snd_emu1010_fpga_write(emu, EMU_HANA_SRCLO, src & 0x1f); } u32 snd_emu1010_fpga_link_dst_src_read(struct snd_emu10k1 *emu, u32 dst) { - unsigned long flags; u32 hi, lo; if (snd_BUG_ON(dst & ~0x71f)) return 0; - spin_lock_irqsave(&emu->emu_lock, flags); - snd_emu1010_fpga_write_locked(emu, EMU_HANA_DESTHI, dst >> 8); - snd_emu1010_fpga_write_locked(emu, EMU_HANA_DESTLO, dst & 0x1f); - snd_emu1010_fpga_read_locked(emu, EMU_HANA_SRCHI, &hi); - snd_emu1010_fpga_read_locked(emu, EMU_HANA_SRCLO, &lo); - spin_unlock_irqrestore(&emu->emu_lock, flags); + snd_emu1010_fpga_write(emu, EMU_HANA_DESTHI, dst >> 8); + snd_emu1010_fpga_write(emu, EMU_HANA_DESTLO, dst & 0x1f); + snd_emu1010_fpga_read(emu, EMU_HANA_SRCHI, &hi); + snd_emu1010_fpga_read(emu, EMU_HANA_SRCLO, &lo); return (hi << 8) | lo; } From e8289fd3fa65d60cf04dab6f7845eda352c04ea6 Mon Sep 17 00:00:00 2001 From: Oswald Buddenhagen Date: Sun, 28 Apr 2024 11:37:15 +0200 Subject: [PATCH 438/606] ALSA: emu10k1: fix E-MU dock initialization A side effect of making the dock monitoring interrupt-driven was that we'd be very quick to program a freshly connected dock. However, for unclear reasons, the dock does not work when we do that - despite the FPGA netlist upload going just fine. We work around this by adding a delay before programming the dock; for safety, the value is several times as much as was determined empirically. Note that a badly timed dock hot-plug would have triggered the problem even before the referenced commit - but now it would happen 100% instead of about 3% of the time, thus making it impossible to work around by re-plugging. Fixes: fbb64eedf5a3 ("ALSA: emu10k1: make E-MU dock monitoring interrupt-driven") Link: https://bugzilla.kernel.org/show_bug.cgi?id=218584 Signed-off-by: Oswald Buddenhagen Signed-off-by: Takashi Iwai Message-ID: <20240428093716.3198666-6-oswald.buddenhagen@gmx.de> --- sound/pci/emu10k1/emu10k1_main.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/sound/pci/emu10k1/emu10k1_main.c b/sound/pci/emu10k1/emu10k1_main.c index 1a2905e8672bd..8ccc0178360ce 100644 --- a/sound/pci/emu10k1/emu10k1_main.c +++ b/sound/pci/emu10k1/emu10k1_main.c @@ -737,6 +737,12 @@ static void snd_emu1010_load_dock_firmware(struct snd_emu10k1 *emu) u32 tmp, tmp2; int err; + // The docking events clearly arrive prematurely - while the + // Dock's FPGA seems to be successfully programmed, the Dock + // fails to initialize subsequently if we don't give it some + // time to "warm up" here. + msleep(200); + dev_info(emu->card->dev, "emu1010: Loading Audio Dock Firmware\n"); /* Return to Audio Dock programming mode */ snd_emu1010_fpga_write(emu, EMU_HANA_FPGA_CONFIG, From 15c7e87aa88f0ab2d51c2e2123b127a6d693ca21 Mon Sep 17 00:00:00 2001 From: Oswald Buddenhagen Date: Sun, 28 Apr 2024 11:37:16 +0200 Subject: [PATCH 439/606] ALSA: emu10k1: make E-MU FPGA writes potentially more reliable We did not delay after the second strobe signal, so another immediately following access could potentially corrupt the written value. This is a purely speculative fix with no supporting evidence, but after taking out the spinlocks around the writes, it seems plausible that a modern processor could be actually too fast. Also, it's just cleaner to be consistent. Signed-off-by: Oswald Buddenhagen Signed-off-by: Takashi Iwai Message-ID: <20240428093716.3198666-7-oswald.buddenhagen@gmx.de> --- sound/pci/emu10k1/io.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/pci/emu10k1/io.c b/sound/pci/emu10k1/io.c index f3260a81e47b7..f4a1c2d4b0787 100644 --- a/sound/pci/emu10k1/io.c +++ b/sound/pci/emu10k1/io.c @@ -285,6 +285,7 @@ static void snd_emu1010_fpga_write_locked(struct snd_emu10k1 *emu, u32 reg, u32 outw(value, emu->port + A_GPIO); udelay(10); outw(value | 0x80 , emu->port + A_GPIO); /* High bit clocks the value into the fpga. */ + udelay(10); } void snd_emu1010_fpga_write(struct snd_emu10k1 *emu, u32 reg, u32 value) From 17597b1e18d2fafef2230c987479eccaeddb4628 Mon Sep 17 00:00:00 2001 From: Hongbo Li Date: Wed, 24 Apr 2024 16:42:47 +0800 Subject: [PATCH 440/606] erofs: modify the error message when prepare_ondemand_read failed When prepare_ondemand_read failed, wrong error message is printed. The prepare_read is also implemented in cachefiles, so we amend it. Reviewed-by: Gao Xiang Signed-off-by: Hongbo Li Reviewed-by: Jingbo Xu Reviewed-by: Chao Yu Link: https://lore.kernel.org/r/20240424084247.759432-1-lihongbo22@huawei.com Signed-off-by: Gao Xiang --- fs/erofs/fscache.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/erofs/fscache.c b/fs/erofs/fscache.c index 8aff1a724805a..62da538d91cbd 100644 --- a/fs/erofs/fscache.c +++ b/fs/erofs/fscache.c @@ -151,7 +151,7 @@ static int erofs_fscache_read_io_async(struct fscache_cookie *cookie, if (WARN_ON(len == 0)) source = NETFS_INVALID_READ; if (source != NETFS_READ_FROM_CACHE) { - erofs_err(NULL, "prepare_read failed (source %d)", source); + erofs_err(NULL, "prepare_ondemand_read failed (source %d)", source); return -EIO; } From 07abe43a28b2c660f726d66f5470f7f114f9643a Mon Sep 17 00:00:00 2001 From: Baokun Li Date: Fri, 19 Apr 2024 20:36:10 +0800 Subject: [PATCH 441/606] erofs: get rid of erofs_fs_context Instead of allocating the erofs_sb_info in fill_super() allocate it during erofs_init_fs_context() and ensure that erofs can always have the info available during erofs_kill_sb(). After this erofs_fs_context is no longer needed, replace ctx with sbi, no functional changes. Suggested-by: Jingbo Xu Signed-off-by: Baokun Li Reviewed-by: Jingbo Xu Reviewed-by: Gao Xiang Reviewed-by: Chao Yu Link: https://lore.kernel.org/r/20240419123611.947084-2-libaokun1@huawei.com Signed-off-by: Gao Xiang --- fs/erofs/internal.h | 7 --- fs/erofs/super.c | 116 ++++++++++++++++++++------------------------ 2 files changed, 53 insertions(+), 70 deletions(-) diff --git a/fs/erofs/internal.h b/fs/erofs/internal.h index 39c67119f43bf..d28ccfc0352b1 100644 --- a/fs/erofs/internal.h +++ b/fs/erofs/internal.h @@ -84,13 +84,6 @@ struct erofs_dev_context { bool flatdev; }; -struct erofs_fs_context { - struct erofs_mount_opts opt; - struct erofs_dev_context *devs; - char *fsid; - char *domain_id; -}; - /* all filesystem-wide lz4 configurations */ struct erofs_sb_lz4_info { /* # of pages needed for EROFS lz4 rolling decompression */ diff --git a/fs/erofs/super.c b/fs/erofs/super.c index c0eb139adb07a..21faa49bc9703 100644 --- a/fs/erofs/super.c +++ b/fs/erofs/super.c @@ -370,18 +370,18 @@ static int erofs_read_superblock(struct super_block *sb) return ret; } -static void erofs_default_options(struct erofs_fs_context *ctx) +static void erofs_default_options(struct erofs_sb_info *sbi) { #ifdef CONFIG_EROFS_FS_ZIP - ctx->opt.cache_strategy = EROFS_ZIP_CACHE_READAROUND; - ctx->opt.max_sync_decompress_pages = 3; - ctx->opt.sync_decompress = EROFS_SYNC_DECOMPRESS_AUTO; + sbi->opt.cache_strategy = EROFS_ZIP_CACHE_READAROUND; + sbi->opt.max_sync_decompress_pages = 3; + sbi->opt.sync_decompress = EROFS_SYNC_DECOMPRESS_AUTO; #endif #ifdef CONFIG_EROFS_FS_XATTR - set_opt(&ctx->opt, XATTR_USER); + set_opt(&sbi->opt, XATTR_USER); #endif #ifdef CONFIG_EROFS_FS_POSIX_ACL - set_opt(&ctx->opt, POSIX_ACL); + set_opt(&sbi->opt, POSIX_ACL); #endif } @@ -426,16 +426,16 @@ static const struct fs_parameter_spec erofs_fs_parameters[] = { static bool erofs_fc_set_dax_mode(struct fs_context *fc, unsigned int mode) { #ifdef CONFIG_FS_DAX - struct erofs_fs_context *ctx = fc->fs_private; + struct erofs_sb_info *sbi = fc->s_fs_info; switch (mode) { case EROFS_MOUNT_DAX_ALWAYS: - set_opt(&ctx->opt, DAX_ALWAYS); - clear_opt(&ctx->opt, DAX_NEVER); + set_opt(&sbi->opt, DAX_ALWAYS); + clear_opt(&sbi->opt, DAX_NEVER); return true; case EROFS_MOUNT_DAX_NEVER: - set_opt(&ctx->opt, DAX_NEVER); - clear_opt(&ctx->opt, DAX_ALWAYS); + set_opt(&sbi->opt, DAX_NEVER); + clear_opt(&sbi->opt, DAX_ALWAYS); return true; default: DBG_BUGON(1); @@ -450,7 +450,7 @@ static bool erofs_fc_set_dax_mode(struct fs_context *fc, unsigned int mode) static int erofs_fc_parse_param(struct fs_context *fc, struct fs_parameter *param) { - struct erofs_fs_context *ctx = fc->fs_private; + struct erofs_sb_info *sbi = fc->s_fs_info; struct fs_parse_result result; struct erofs_device_info *dif; int opt, ret; @@ -463,9 +463,9 @@ static int erofs_fc_parse_param(struct fs_context *fc, case Opt_user_xattr: #ifdef CONFIG_EROFS_FS_XATTR if (result.boolean) - set_opt(&ctx->opt, XATTR_USER); + set_opt(&sbi->opt, XATTR_USER); else - clear_opt(&ctx->opt, XATTR_USER); + clear_opt(&sbi->opt, XATTR_USER); #else errorfc(fc, "{,no}user_xattr options not supported"); #endif @@ -473,16 +473,16 @@ static int erofs_fc_parse_param(struct fs_context *fc, case Opt_acl: #ifdef CONFIG_EROFS_FS_POSIX_ACL if (result.boolean) - set_opt(&ctx->opt, POSIX_ACL); + set_opt(&sbi->opt, POSIX_ACL); else - clear_opt(&ctx->opt, POSIX_ACL); + clear_opt(&sbi->opt, POSIX_ACL); #else errorfc(fc, "{,no}acl options not supported"); #endif break; case Opt_cache_strategy: #ifdef CONFIG_EROFS_FS_ZIP - ctx->opt.cache_strategy = result.uint_32; + sbi->opt.cache_strategy = result.uint_32; #else errorfc(fc, "compression not supported, cache_strategy ignored"); #endif @@ -504,27 +504,27 @@ static int erofs_fc_parse_param(struct fs_context *fc, kfree(dif); return -ENOMEM; } - down_write(&ctx->devs->rwsem); - ret = idr_alloc(&ctx->devs->tree, dif, 0, 0, GFP_KERNEL); - up_write(&ctx->devs->rwsem); + down_write(&sbi->devs->rwsem); + ret = idr_alloc(&sbi->devs->tree, dif, 0, 0, GFP_KERNEL); + up_write(&sbi->devs->rwsem); if (ret < 0) { kfree(dif->path); kfree(dif); return ret; } - ++ctx->devs->extra_devices; + ++sbi->devs->extra_devices; break; #ifdef CONFIG_EROFS_FS_ONDEMAND case Opt_fsid: - kfree(ctx->fsid); - ctx->fsid = kstrdup(param->string, GFP_KERNEL); - if (!ctx->fsid) + kfree(sbi->fsid); + sbi->fsid = kstrdup(param->string, GFP_KERNEL); + if (!sbi->fsid) return -ENOMEM; break; case Opt_domain_id: - kfree(ctx->domain_id); - ctx->domain_id = kstrdup(param->string, GFP_KERNEL); - if (!ctx->domain_id) + kfree(sbi->domain_id); + sbi->domain_id = kstrdup(param->string, GFP_KERNEL); + if (!sbi->domain_id) return -ENOMEM; break; #else @@ -581,8 +581,7 @@ static const struct export_operations erofs_export_ops = { static int erofs_fc_fill_super(struct super_block *sb, struct fs_context *fc) { struct inode *inode; - struct erofs_sb_info *sbi; - struct erofs_fs_context *ctx = fc->fs_private; + struct erofs_sb_info *sbi = EROFS_SB(sb); int err; sb->s_magic = EROFS_SUPER_MAGIC; @@ -590,19 +589,6 @@ static int erofs_fc_fill_super(struct super_block *sb, struct fs_context *fc) sb->s_maxbytes = MAX_LFS_FILESIZE; sb->s_op = &erofs_sops; - sbi = kzalloc(sizeof(*sbi), GFP_KERNEL); - if (!sbi) - return -ENOMEM; - - sb->s_fs_info = sbi; - sbi->opt = ctx->opt; - sbi->devs = ctx->devs; - ctx->devs = NULL; - sbi->fsid = ctx->fsid; - ctx->fsid = NULL; - sbi->domain_id = ctx->domain_id; - ctx->domain_id = NULL; - sbi->blkszbits = PAGE_SHIFT; if (erofs_is_fscache_mode(sb)) { sb->s_blocksize = PAGE_SIZE; @@ -706,9 +692,9 @@ static int erofs_fc_fill_super(struct super_block *sb, struct fs_context *fc) static int erofs_fc_get_tree(struct fs_context *fc) { - struct erofs_fs_context *ctx = fc->fs_private; + struct erofs_sb_info *sbi = fc->s_fs_info; - if (IS_ENABLED(CONFIG_EROFS_FS_ONDEMAND) && ctx->fsid) + if (IS_ENABLED(CONFIG_EROFS_FS_ONDEMAND) && sbi->fsid) return get_tree_nodev(fc, erofs_fc_fill_super); return get_tree_bdev(fc, erofs_fc_fill_super); @@ -718,19 +704,19 @@ static int erofs_fc_reconfigure(struct fs_context *fc) { struct super_block *sb = fc->root->d_sb; struct erofs_sb_info *sbi = EROFS_SB(sb); - struct erofs_fs_context *ctx = fc->fs_private; + struct erofs_sb_info *new_sbi = fc->s_fs_info; DBG_BUGON(!sb_rdonly(sb)); - if (ctx->fsid || ctx->domain_id) + if (new_sbi->fsid || new_sbi->domain_id) erofs_info(sb, "ignoring reconfiguration for fsid|domain_id."); - if (test_opt(&ctx->opt, POSIX_ACL)) + if (test_opt(&new_sbi->opt, POSIX_ACL)) fc->sb_flags |= SB_POSIXACL; else fc->sb_flags &= ~SB_POSIXACL; - sbi->opt = ctx->opt; + sbi->opt = new_sbi->opt; fc->sb_flags |= SB_RDONLY; return 0; @@ -761,12 +747,15 @@ static void erofs_free_dev_context(struct erofs_dev_context *devs) static void erofs_fc_free(struct fs_context *fc) { - struct erofs_fs_context *ctx = fc->fs_private; + struct erofs_sb_info *sbi = fc->s_fs_info; + + if (!sbi) + return; - erofs_free_dev_context(ctx->devs); - kfree(ctx->fsid); - kfree(ctx->domain_id); - kfree(ctx); + erofs_free_dev_context(sbi->devs); + kfree(sbi->fsid); + kfree(sbi->domain_id); + kfree(sbi); } static const struct fs_context_operations erofs_context_ops = { @@ -778,21 +767,22 @@ static const struct fs_context_operations erofs_context_ops = { static int erofs_init_fs_context(struct fs_context *fc) { - struct erofs_fs_context *ctx; + struct erofs_sb_info *sbi; - ctx = kzalloc(sizeof(*ctx), GFP_KERNEL); - if (!ctx) + sbi = kzalloc(sizeof(*sbi), GFP_KERNEL); + if (!sbi) return -ENOMEM; - ctx->devs = kzalloc(sizeof(struct erofs_dev_context), GFP_KERNEL); - if (!ctx->devs) { - kfree(ctx); + + sbi->devs = kzalloc(sizeof(struct erofs_dev_context), GFP_KERNEL); + if (!sbi->devs) { + kfree(sbi); return -ENOMEM; } - fc->fs_private = ctx; + fc->s_fs_info = sbi; - idr_init(&ctx->devs->tree); - init_rwsem(&ctx->devs->rwsem); - erofs_default_options(ctx); + idr_init(&sbi->devs->tree); + init_rwsem(&sbi->devs->rwsem); + erofs_default_options(sbi); fc->ops = &erofs_context_ops; return 0; } From 7af2ae1b1531feab5d38ec9c8f472dc6cceb4606 Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Fri, 19 Apr 2024 20:36:11 +0800 Subject: [PATCH 442/606] erofs: reliably distinguish block based and fscache mode When erofs_kill_sb() is called in block dev based mode, s_bdev may not have been initialised yet, and if CONFIG_EROFS_FS_ONDEMAND is enabled, it will be mistaken for fscache mode, and then attempt to free an anon_dev that has never been allocated, triggering the following warning: ============================================ ida_free called for id=0 which is not allocated. WARNING: CPU: 14 PID: 926 at lib/idr.c:525 ida_free+0x134/0x140 Modules linked in: CPU: 14 PID: 926 Comm: mount Not tainted 6.9.0-rc3-dirty #630 RIP: 0010:ida_free+0x134/0x140 Call Trace: erofs_kill_sb+0x81/0x90 deactivate_locked_super+0x35/0x80 get_tree_bdev+0x136/0x1e0 vfs_get_tree+0x2c/0xf0 do_new_mount+0x190/0x2f0 [...] ============================================ Now when erofs_kill_sb() is called, erofs_sb_info must have been initialised, so use sbi->fsid to distinguish between the two modes. Signed-off-by: Christian Brauner Signed-off-by: Baokun Li Reviewed-by: Jingbo Xu Reviewed-by: Gao Xiang Reviewed-by: Chao Yu Link: https://lore.kernel.org/r/20240419123611.947084-3-libaokun1@huawei.com Signed-off-by: Gao Xiang --- fs/erofs/super.c | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/fs/erofs/super.c b/fs/erofs/super.c index 21faa49bc9703..30b49b2eee534 100644 --- a/fs/erofs/super.c +++ b/fs/erofs/super.c @@ -789,17 +789,13 @@ static int erofs_init_fs_context(struct fs_context *fc) static void erofs_kill_sb(struct super_block *sb) { - struct erofs_sb_info *sbi; + struct erofs_sb_info *sbi = EROFS_SB(sb); - if (erofs_is_fscache_mode(sb)) + if (IS_ENABLED(CONFIG_EROFS_FS_ONDEMAND) && sbi->fsid) kill_anon_super(sb); else kill_block_super(sb); - sbi = EROFS_SB(sb); - if (!sbi) - return; - erofs_free_dev_context(sbi->devs); fs_put_dax(sbi->dax_dev, NULL); erofs_fscache_unregister_fs(sb); From e67572cd2204894179d89bd7b984072f19313b03 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Sun, 28 Apr 2024 13:47:24 -0700 Subject: [PATCH 443/606] Linux 6.9-rc6 --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 43b10f3d438cf..40fb2ca6fe4c0 100644 --- a/Makefile +++ b/Makefile @@ -2,7 +2,7 @@ VERSION = 6 PATCHLEVEL = 9 SUBLEVEL = 0 -EXTRAVERSION = -rc5 +EXTRAVERSION = -rc6 NAME = Hurr durr I'ma ninja sloth # *DOCUMENTATION* From ae92765373c3bd82575041cf2910c96e1ba03118 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Mon, 22 Apr 2024 17:33:20 -0400 Subject: [PATCH 444/606] bcachefs: Remove accidental debug assert Signed-off-by: Kent Overstreet --- fs/bcachefs/buckets.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/fs/bcachefs/buckets.c b/fs/bcachefs/buckets.c index 941401a210f56..82f179258867b 100644 --- a/fs/bcachefs/buckets.c +++ b/fs/bcachefs/buckets.c @@ -525,7 +525,6 @@ int bch2_mark_metadata_bucket(struct bch_fs *c, struct bch_dev *ca, "different types of data in same bucket: %s, %s", bch2_data_type_str(g->data_type), bch2_data_type_str(data_type))) { - BUG(); ret = -EIO; goto err; } @@ -629,7 +628,6 @@ int bch2_check_bucket_ref(struct btree_trans *trans, bch2_data_type_str(ptr_data_type), (printbuf_reset(&buf), bch2_bkey_val_to_text(&buf, c, k), buf.buf)); - BUG(); ret = -EIO; goto err; } From f7c3dc2646584cddae6fedc517cd58d97483e5cc Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Thu, 25 Apr 2024 03:55:48 -0400 Subject: [PATCH 445/606] bcachefs: btree node scan now fills in sectors_written Signed-off-by: Kent Overstreet --- fs/bcachefs/btree_node_scan.c | 7 +++++-- fs/bcachefs/btree_node_scan_types.h | 1 + 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/fs/bcachefs/btree_node_scan.c b/fs/bcachefs/btree_node_scan.c index c60794264da28..45cb8149d374c 100644 --- a/fs/bcachefs/btree_node_scan.c +++ b/fs/bcachefs/btree_node_scan.c @@ -57,13 +57,14 @@ static void found_btree_node_to_key(struct bkey_i *k, const struct found_btree_n bp->v.seq = cpu_to_le64(f->cookie); bp->v.sectors_written = 0; bp->v.flags = 0; + bp->v.sectors_written = cpu_to_le16(f->sectors_written); bp->v.min_key = f->min_key; SET_BTREE_PTR_RANGE_UPDATED(&bp->v, f->range_updated); memcpy(bp->v.start, f->ptrs, sizeof(struct bch_extent_ptr) * f->nr_ptrs); } static bool found_btree_node_is_readable(struct btree_trans *trans, - const struct found_btree_node *f) + struct found_btree_node *f) { struct { __BKEY_PADDED(k, BKEY_BTREE_PTR_VAL_U64s_MAX); } k; @@ -71,8 +72,10 @@ static bool found_btree_node_is_readable(struct btree_trans *trans, struct btree *b = bch2_btree_node_get_noiter(trans, &k.k, f->btree_id, f->level, false); bool ret = !IS_ERR_OR_NULL(b); - if (ret) + if (ret) { + f->sectors_written = b->written; six_unlock_read(&b->c.lock); + } /* * We might update this node's range; if that happens, we need the node diff --git a/fs/bcachefs/btree_node_scan_types.h b/fs/bcachefs/btree_node_scan_types.h index abb7b27d556a9..5cfaeb5ac831b 100644 --- a/fs/bcachefs/btree_node_scan_types.h +++ b/fs/bcachefs/btree_node_scan_types.h @@ -9,6 +9,7 @@ struct found_btree_node { bool overwritten:1; u8 btree_id; u8 level; + unsigned sectors_written; u32 seq; u64 cookie; From c258c08add1cc8fa7719f112c5db36c08c507f1e Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Thu, 25 Apr 2024 14:41:17 -0400 Subject: [PATCH 446/606] bcachefs: fix integer conversion bug Signed-off-by: Kent Overstreet --- fs/bcachefs/inode.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/bcachefs/inode.c b/fs/bcachefs/inode.c index ca4a066e9a542..0f95d7fb5ec0b 100644 --- a/fs/bcachefs/inode.c +++ b/fs/bcachefs/inode.c @@ -606,7 +606,7 @@ int bch2_trigger_inode(struct btree_trans *trans, struct bkey_s new, unsigned flags) { - s64 nr = bkey_is_inode(new.k) - bkey_is_inode(old.k); + s64 nr = (s64) bkey_is_inode(new.k) - (s64) bkey_is_inode(old.k); if (flags & BTREE_TRIGGER_TRANSACTIONAL) { if (nr) { From 1dd1eff161bd55968d3d46bc36def62d71fb4785 Mon Sep 17 00:00:00 2001 From: Zqiang Date: Sat, 27 Apr 2024 18:28:08 +0800 Subject: [PATCH 447/606] softirq: Fix suspicious RCU usage in __do_softirq() Currently, the condition "__this_cpu_read(ksoftirqd) == current" is used to invoke rcu_softirq_qs() in ksoftirqd tasks context for non-RT kernels. This works correctly as long as the context is actually task context but this condition is wrong when: - the current task is ksoftirqd - the task is interrupted in a RCU read side critical section - __do_softirq() is invoked on return from interrupt Syzkaller triggered the following scenario: -> finish_task_switch() -> put_task_struct_rcu_user() -> call_rcu(&task->rcu, delayed_put_task_struct) -> __kasan_record_aux_stack() -> pfn_valid() -> rcu_read_lock_sched() __irq_exit_rcu() -> __do_softirq)() -> if (!IS_ENABLED(CONFIG_PREEMPT_RT) && __this_cpu_read(ksoftirqd) == current) -> rcu_softirq_qs() -> RCU_LOCKDEP_WARN(lock_is_held(&rcu_sched_lock_map)) The rcu quiescent state is reported in the rcu-read critical section, so the lockdep warning is triggered. Fix this by splitting out the inner working of __do_softirq() into a helper function which takes an argument to distinguish between ksoftirqd task context and interrupted context and invoke it from the relevant call sites with the proper context information and use that for the conditional invocation of rcu_softirq_qs(). Reported-by: syzbot+dce04ed6d1438ad69656@syzkaller.appspotmail.com Suggested-by: Thomas Gleixner Signed-off-by: Zqiang Signed-off-by: Thomas Gleixner Link: https://lore.kernel.org/r/20240427102808.29356-1-qiang.zhang1211@gmail.com Link: https://lore.kernel.org/lkml/8f281a10-b85a-4586-9586-5bbc12dc784f@paulmck-laptop/T/#mea8aba4abfcb97bbf499d169ce7f30c4cff1b0e3 --- kernel/softirq.c | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/kernel/softirq.c b/kernel/softirq.c index b315b21fb28cd..02582017759a2 100644 --- a/kernel/softirq.c +++ b/kernel/softirq.c @@ -508,7 +508,7 @@ static inline bool lockdep_softirq_start(void) { return false; } static inline void lockdep_softirq_end(bool in_hardirq) { } #endif -asmlinkage __visible void __softirq_entry __do_softirq(void) +static void handle_softirqs(bool ksirqd) { unsigned long end = jiffies + MAX_SOFTIRQ_TIME; unsigned long old_flags = current->flags; @@ -563,8 +563,7 @@ asmlinkage __visible void __softirq_entry __do_softirq(void) pending >>= softirq_bit; } - if (!IS_ENABLED(CONFIG_PREEMPT_RT) && - __this_cpu_read(ksoftirqd) == current) + if (!IS_ENABLED(CONFIG_PREEMPT_RT) && ksirqd) rcu_softirq_qs(); local_irq_disable(); @@ -584,6 +583,11 @@ asmlinkage __visible void __softirq_entry __do_softirq(void) current_restore_flags(old_flags, PF_MEMALLOC); } +asmlinkage __visible void __softirq_entry __do_softirq(void) +{ + handle_softirqs(false); +} + /** * irq_enter_rcu - Enter an interrupt context with RCU watching */ @@ -921,7 +925,7 @@ static void run_ksoftirqd(unsigned int cpu) * We can safely run softirq on inline stack, as we are not deep * in the task stack here. */ - __do_softirq(); + handle_softirqs(true); ksoftirqd_run_end(); cond_resched(); return; From b7cf2a1d9881823133acc48427815a48b35b49f4 Mon Sep 17 00:00:00 2001 From: Barry Song Date: Tue, 19 Mar 2024 14:09:20 +1300 Subject: [PATCH 448/606] xtensa: remove redundant flush_dcache_page and ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE macros xtensa's flush_dcache_page() can be a no-op sometimes. There is a generic implementation for this case in include/asm-generic/ cacheflush.h. #ifndef ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE static inline void flush_dcache_page(struct page *page) { } #define ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE 0 #endif So remove the superfluous flush_dcache_page() definition, which also helps silence potential build warnings complaining the page variable passed to flush_dcache_page() is not used. In file included from crypto/scompress.c:12: include/crypto/scatterwalk.h: In function 'scatterwalk_pagedone': include/crypto/scatterwalk.h:76:30: warning: variable 'page' set but not used [-Wunused-but-set-variable] 76 | struct page *page; | ^~~~ crypto/scompress.c: In function 'scomp_acomp_comp_decomp': >> crypto/scompress.c:174:38: warning: unused variable 'dst_page' [-Wunused-variable] 174 | struct page *dst_page = sg_page(req->dst); | The issue was originally reported on LoongArch by kernel test robot (Huacai fixed it on LoongArch), then reported by Guenter and me on xtensa. This patch also removes lots of redundant macros which have been defined by asm-generic/cacheflush.h. Cc: Huacai Chen Cc: Herbert Xu Reported-by: kernel test robot Closes: https://lore.kernel.org/oe-kbuild-all/202403091614.NeUw5zcv-lkp@intel.com/ Reported-by: Barry Song Closes: https://lore.kernel.org/all/CAGsJ_4yDk1+axbte7FKQEwD7X2oxUCFrEc9M5YOS1BobfDFXPA@mail.gmail.com/ Reported-by: Guenter Roeck Closes: https://lore.kernel.org/all/aaa8b7d7-5abe-47bf-93f6-407942436472@roeck-us.net/ Fixes: 77292bb8ca69 ("crypto: scomp - remove memcpy if sg_nents is 1 and pages are lowmem") Signed-off-by: Barry Song Message-Id: <20240319010920.125192-1-21cnbao@gmail.com> Signed-off-by: Max Filippov --- arch/xtensa/include/asm/cacheflush.h | 24 ++++++++---------------- 1 file changed, 8 insertions(+), 16 deletions(-) diff --git a/arch/xtensa/include/asm/cacheflush.h b/arch/xtensa/include/asm/cacheflush.h index 38bcecb0e457d..a2b6bb5429f5c 100644 --- a/arch/xtensa/include/asm/cacheflush.h +++ b/arch/xtensa/include/asm/cacheflush.h @@ -100,6 +100,10 @@ void flush_cache_range(struct vm_area_struct*, ulong, ulong); void flush_icache_range(unsigned long start, unsigned long end); void flush_cache_page(struct vm_area_struct*, unsigned long, unsigned long); +#define flush_cache_all flush_cache_all +#define flush_cache_range flush_cache_range +#define flush_icache_range flush_icache_range +#define flush_cache_page flush_cache_page #else #define flush_cache_all local_flush_cache_all #define flush_cache_range local_flush_cache_range @@ -136,20 +140,7 @@ void local_flush_cache_page(struct vm_area_struct *vma, #else -#define flush_cache_all() do { } while (0) -#define flush_cache_mm(mm) do { } while (0) -#define flush_cache_dup_mm(mm) do { } while (0) - -#define flush_cache_vmap(start,end) do { } while (0) -#define flush_cache_vmap_early(start,end) do { } while (0) -#define flush_cache_vunmap(start,end) do { } while (0) - -#define ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE 0 -#define flush_dcache_page(page) do { } while (0) - #define flush_icache_range local_flush_icache_range -#define flush_cache_page(vma, addr, pfn) do { } while (0) -#define flush_cache_range(vma, start, end) do { } while (0) #endif @@ -162,15 +153,14 @@ void local_flush_cache_page(struct vm_area_struct *vma, __invalidate_icache_range(start,(end) - (start)); \ } while (0) -#define flush_dcache_mmap_lock(mapping) do { } while (0) -#define flush_dcache_mmap_unlock(mapping) do { } while (0) - #if defined(CONFIG_MMU) && (DCACHE_WAY_SIZE > PAGE_SIZE) extern void copy_to_user_page(struct vm_area_struct*, struct page*, unsigned long, void*, const void*, unsigned long); extern void copy_from_user_page(struct vm_area_struct*, struct page*, unsigned long, void*, const void*, unsigned long); +#define copy_to_user_page copy_to_user_page +#define copy_from_user_page copy_from_user_page #else @@ -186,4 +176,6 @@ extern void copy_from_user_page(struct vm_area_struct*, struct page*, #endif +#include + #endif /* _XTENSA_CACHEFLUSH_H */ From d85cf67a339685beae1d0aee27b7f61da95455be Mon Sep 17 00:00:00 2001 From: Doug Berger Date: Thu, 25 Apr 2024 15:27:19 -0700 Subject: [PATCH 449/606] net: bcmgenet: synchronize EXT_RGMII_OOB_CTRL access The EXT_RGMII_OOB_CTRL register can be written from different contexts. It is predominantly written from the adjust_link handler which is synchronized by the phydev->lock, but can also be written from a different context when configuring the mii in bcmgenet_mii_config(). The chances of contention are quite low, but it is conceivable that adjust_link could occur during resume when WoL is enabled so use the phydev->lock synchronizer in bcmgenet_mii_config() to be sure. Fixes: afe3f907d20f ("net: bcmgenet: power on MII block for all MII modes") Cc: stable@vger.kernel.org Signed-off-by: Doug Berger Acked-by: Florian Fainelli Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/genet/bcmmii.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/broadcom/genet/bcmmii.c b/drivers/net/ethernet/broadcom/genet/bcmmii.c index 9ada893557475..86a4aa72b3d4b 100644 --- a/drivers/net/ethernet/broadcom/genet/bcmmii.c +++ b/drivers/net/ethernet/broadcom/genet/bcmmii.c @@ -2,7 +2,7 @@ /* * Broadcom GENET MDIO routines * - * Copyright (c) 2014-2017 Broadcom + * Copyright (c) 2014-2024 Broadcom */ #include @@ -275,6 +275,7 @@ int bcmgenet_mii_config(struct net_device *dev, bool init) * block for the interface to work, unconditionally clear the * Out-of-band disable since we do not need it. */ + mutex_lock(&phydev->lock); reg = bcmgenet_ext_readl(priv, EXT_RGMII_OOB_CTRL); reg &= ~OOB_DISABLE; if (priv->ext_phy) { @@ -286,6 +287,7 @@ int bcmgenet_mii_config(struct net_device *dev, bool init) reg |= RGMII_MODE_EN; } bcmgenet_ext_writel(priv, reg, EXT_RGMII_OOB_CTRL); + mutex_unlock(&phydev->lock); if (init) dev_info(kdev, "configuring instance for %s\n", phy_name); From 2dbe5f19368caae63b1f59f5bc2af78c7d522b3a Mon Sep 17 00:00:00 2001 From: Doug Berger Date: Thu, 25 Apr 2024 15:27:20 -0700 Subject: [PATCH 450/606] net: bcmgenet: synchronize use of bcmgenet_set_rx_mode() The ndo_set_rx_mode function is synchronized with the netif_addr_lock spinlock and BHs disabled. Since this function is also invoked directly from the driver the same synchronization should be applied. Fixes: 72f96347628e ("net: bcmgenet: set Rx mode before starting netif") Cc: stable@vger.kernel.org Signed-off-by: Doug Berger Acked-by: Florian Fainelli Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/genet/bcmgenet.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/broadcom/genet/bcmgenet.c b/drivers/net/ethernet/broadcom/genet/bcmgenet.c index b1f84b37032a7..5452b7dc6e6ae 100644 --- a/drivers/net/ethernet/broadcom/genet/bcmgenet.c +++ b/drivers/net/ethernet/broadcom/genet/bcmgenet.c @@ -2,7 +2,7 @@ /* * Broadcom GENET (Gigabit Ethernet) controller driver * - * Copyright (c) 2014-2020 Broadcom + * Copyright (c) 2014-2024 Broadcom */ #define pr_fmt(fmt) "bcmgenet: " fmt @@ -3334,7 +3334,9 @@ static void bcmgenet_netif_start(struct net_device *dev) struct bcmgenet_priv *priv = netdev_priv(dev); /* Start the network engine */ + netif_addr_lock_bh(dev); bcmgenet_set_rx_mode(dev); + netif_addr_unlock_bh(dev); bcmgenet_enable_rx_napi(priv); umac_enable_set(priv, CMD_TX_EN | CMD_RX_EN, true); From 0d5e2a82232605b337972fb2c7d0cbc46898aca1 Mon Sep 17 00:00:00 2001 From: Doug Berger Date: Thu, 25 Apr 2024 15:27:21 -0700 Subject: [PATCH 451/606] net: bcmgenet: synchronize UMAC_CMD access The UMAC_CMD register is written from different execution contexts and has insufficient synchronization protections to prevent possible corruption. Of particular concern are the acceses from the phy_device delayed work context used by the adjust_link call and the BH context that may be used by the ndo_set_rx_mode call. A spinlock is added to the driver to protect contended register accesses (i.e. reg_lock) and it is used to synchronize accesses to UMAC_CMD. Fixes: 1c1008c793fa ("net: bcmgenet: add main driver file") Cc: stable@vger.kernel.org Signed-off-by: Doug Berger Acked-by: Florian Fainelli Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/genet/bcmgenet.c | 12 +++++++++++- drivers/net/ethernet/broadcom/genet/bcmgenet.h | 4 +++- drivers/net/ethernet/broadcom/genet/bcmgenet_wol.c | 8 +++++++- drivers/net/ethernet/broadcom/genet/bcmmii.c | 2 ++ 4 files changed, 23 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/broadcom/genet/bcmgenet.c b/drivers/net/ethernet/broadcom/genet/bcmgenet.c index 5452b7dc6e6ae..c7e7dac057a33 100644 --- a/drivers/net/ethernet/broadcom/genet/bcmgenet.c +++ b/drivers/net/ethernet/broadcom/genet/bcmgenet.c @@ -2467,14 +2467,18 @@ static void umac_enable_set(struct bcmgenet_priv *priv, u32 mask, bool enable) { u32 reg; + spin_lock_bh(&priv->reg_lock); reg = bcmgenet_umac_readl(priv, UMAC_CMD); - if (reg & CMD_SW_RESET) + if (reg & CMD_SW_RESET) { + spin_unlock_bh(&priv->reg_lock); return; + } if (enable) reg |= mask; else reg &= ~mask; bcmgenet_umac_writel(priv, reg, UMAC_CMD); + spin_unlock_bh(&priv->reg_lock); /* UniMAC stops on a packet boundary, wait for a full-size packet * to be processed @@ -2490,8 +2494,10 @@ static void reset_umac(struct bcmgenet_priv *priv) udelay(10); /* issue soft reset and disable MAC while updating its registers */ + spin_lock_bh(&priv->reg_lock); bcmgenet_umac_writel(priv, CMD_SW_RESET, UMAC_CMD); udelay(2); + spin_unlock_bh(&priv->reg_lock); } static void bcmgenet_intr_disable(struct bcmgenet_priv *priv) @@ -3597,16 +3603,19 @@ static void bcmgenet_set_rx_mode(struct net_device *dev) * 3. The number of filters needed exceeds the number filters * supported by the hardware. */ + spin_lock(&priv->reg_lock); reg = bcmgenet_umac_readl(priv, UMAC_CMD); if ((dev->flags & (IFF_PROMISC | IFF_ALLMULTI)) || (nfilter > MAX_MDF_FILTER)) { reg |= CMD_PROMISC; bcmgenet_umac_writel(priv, reg, UMAC_CMD); + spin_unlock(&priv->reg_lock); bcmgenet_umac_writel(priv, 0, UMAC_MDF_CTRL); return; } else { reg &= ~CMD_PROMISC; bcmgenet_umac_writel(priv, reg, UMAC_CMD); + spin_unlock(&priv->reg_lock); } /* update MDF filter */ @@ -4005,6 +4014,7 @@ static int bcmgenet_probe(struct platform_device *pdev) goto err; } + spin_lock_init(&priv->reg_lock); spin_lock_init(&priv->lock); /* Set default pause parameters */ diff --git a/drivers/net/ethernet/broadcom/genet/bcmgenet.h b/drivers/net/ethernet/broadcom/genet/bcmgenet.h index 7523b60b3c1c0..43b923c48b14f 100644 --- a/drivers/net/ethernet/broadcom/genet/bcmgenet.h +++ b/drivers/net/ethernet/broadcom/genet/bcmgenet.h @@ -1,6 +1,6 @@ /* SPDX-License-Identifier: GPL-2.0-only */ /* - * Copyright (c) 2014-2020 Broadcom + * Copyright (c) 2014-2024 Broadcom */ #ifndef __BCMGENET_H__ @@ -573,6 +573,8 @@ struct bcmgenet_rxnfc_rule { /* device context */ struct bcmgenet_priv { void __iomem *base; + /* reg_lock: lock to serialize access to shared registers */ + spinlock_t reg_lock; enum bcmgenet_version version; struct net_device *dev; diff --git a/drivers/net/ethernet/broadcom/genet/bcmgenet_wol.c b/drivers/net/ethernet/broadcom/genet/bcmgenet_wol.c index 7a41cad5788f4..1248792d7fd4d 100644 --- a/drivers/net/ethernet/broadcom/genet/bcmgenet_wol.c +++ b/drivers/net/ethernet/broadcom/genet/bcmgenet_wol.c @@ -2,7 +2,7 @@ /* * Broadcom GENET (Gigabit Ethernet) Wake-on-LAN support * - * Copyright (c) 2014-2020 Broadcom + * Copyright (c) 2014-2024 Broadcom */ #define pr_fmt(fmt) "bcmgenet_wol: " fmt @@ -151,6 +151,7 @@ int bcmgenet_wol_power_down_cfg(struct bcmgenet_priv *priv, } /* Can't suspend with WoL if MAC is still in reset */ + spin_lock_bh(&priv->reg_lock); reg = bcmgenet_umac_readl(priv, UMAC_CMD); if (reg & CMD_SW_RESET) reg &= ~CMD_SW_RESET; @@ -158,6 +159,7 @@ int bcmgenet_wol_power_down_cfg(struct bcmgenet_priv *priv, /* disable RX */ reg &= ~CMD_RX_EN; bcmgenet_umac_writel(priv, reg, UMAC_CMD); + spin_unlock_bh(&priv->reg_lock); mdelay(10); if (priv->wolopts & (WAKE_MAGIC | WAKE_MAGICSECURE)) { @@ -203,6 +205,7 @@ int bcmgenet_wol_power_down_cfg(struct bcmgenet_priv *priv, } /* Enable CRC forward */ + spin_lock_bh(&priv->reg_lock); reg = bcmgenet_umac_readl(priv, UMAC_CMD); priv->crc_fwd_en = 1; reg |= CMD_CRC_FWD; @@ -210,6 +213,7 @@ int bcmgenet_wol_power_down_cfg(struct bcmgenet_priv *priv, /* Receiver must be enabled for WOL MP detection */ reg |= CMD_RX_EN; bcmgenet_umac_writel(priv, reg, UMAC_CMD); + spin_unlock_bh(&priv->reg_lock); reg = UMAC_IRQ_MPD_R; if (hfb_enable) @@ -256,7 +260,9 @@ void bcmgenet_wol_power_up_cfg(struct bcmgenet_priv *priv, } /* Disable CRC Forward */ + spin_lock_bh(&priv->reg_lock); reg = bcmgenet_umac_readl(priv, UMAC_CMD); reg &= ~CMD_CRC_FWD; bcmgenet_umac_writel(priv, reg, UMAC_CMD); + spin_unlock_bh(&priv->reg_lock); } diff --git a/drivers/net/ethernet/broadcom/genet/bcmmii.c b/drivers/net/ethernet/broadcom/genet/bcmmii.c index 86a4aa72b3d4b..c4a3698cef66f 100644 --- a/drivers/net/ethernet/broadcom/genet/bcmmii.c +++ b/drivers/net/ethernet/broadcom/genet/bcmmii.c @@ -76,6 +76,7 @@ static void bcmgenet_mac_config(struct net_device *dev) reg |= RGMII_LINK; bcmgenet_ext_writel(priv, reg, EXT_RGMII_OOB_CTRL); + spin_lock_bh(&priv->reg_lock); reg = bcmgenet_umac_readl(priv, UMAC_CMD); reg &= ~((CMD_SPEED_MASK << CMD_SPEED_SHIFT) | CMD_HD_EN | @@ -88,6 +89,7 @@ static void bcmgenet_mac_config(struct net_device *dev) reg |= CMD_TX_EN | CMD_RX_EN; } bcmgenet_umac_writel(priv, reg, UMAC_CMD); + spin_unlock_bh(&priv->reg_lock); active = phy_init_eee(phydev, 0) >= 0; bcmgenet_eee_enable_set(dev, From 16f50301a804a4b86c75fdd0dfb50ec263ed41b1 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Wed, 24 Apr 2024 11:37:59 -0700 Subject: [PATCH 452/606] MAINTAINERS: add an explicit entry for YNL Donald has been contributing to YNL a lot. Let's create a dedicated MAINTAINERS entry and add make his involvement official :) Signed-off-by: Jakub Kicinski Acked-by: Donald Hunter Signed-off-by: David S. Miller --- MAINTAINERS | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/MAINTAINERS b/MAINTAINERS index aff81a252865d..4a2e864fce516 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -24468,6 +24468,14 @@ T: git git://git.kernel.org/pub/scm/linux/kernel/git/kees/linux.git for-next/har F: Documentation/admin-guide/LSM/Yama.rst F: security/yama/ +YAML NETLINK (YNL) +M: Donald Hunter +M: Jakub Kicinski +F: Documentation/netlink/ +F: Documentation/userspace-api/netlink/intro-specs.rst +F: Documentation/userspace-api/netlink/specs.rst +F: tools/net/ynl/ + YEALINK PHONE DRIVER M: Henk Vergonet L: usbb2k-api-dev@nongnu.org From e25714466abd9d96901b15efddf82c60a38abd86 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Asbj=C3=B8rn=20Sloth=20T=C3=B8nnesen?= Date: Fri, 26 Apr 2024 09:12:23 +0000 Subject: [PATCH 453/606] net: qede: sanitize 'rc' in qede_add_tc_flower_fltr() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Explicitly set 'rc' (return code), before jumping to the unlock and return path. By not having any code depend on that 'rc' remains at it's initial value of -EINVAL, then we can re-use 'rc' for the return code of function calls in subsequent patches. Only compile tested. Signed-off-by: Asbjørn Sloth Tønnesen Reviewed-by: Simon Horman Signed-off-by: David S. Miller --- drivers/net/ethernet/qlogic/qede/qede_filter.c | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/qlogic/qede/qede_filter.c b/drivers/net/ethernet/qlogic/qede/qede_filter.c index a5ac21a0ee33f..8ecdfa36a6854 100644 --- a/drivers/net/ethernet/qlogic/qede/qede_filter.c +++ b/drivers/net/ethernet/qlogic/qede/qede_filter.c @@ -1868,8 +1868,8 @@ int qede_add_tc_flower_fltr(struct qede_dev *edev, __be16 proto, struct flow_cls_offload *f) { struct qede_arfs_fltr_node *n; - int min_hlen, rc = -EINVAL; struct qede_arfs_tuple t; + int min_hlen, rc; __qede_lock(edev); @@ -1879,8 +1879,10 @@ int qede_add_tc_flower_fltr(struct qede_dev *edev, __be16 proto, } /* parse flower attribute and prepare filter */ - if (qede_parse_flow_attr(edev, proto, f->rule, &t)) + if (qede_parse_flow_attr(edev, proto, f->rule, &t)) { + rc = -EINVAL; goto unlock; + } /* Validate profile mode and number of filters */ if ((edev->arfs->filter_count && edev->arfs->mode != t.mode) || @@ -1888,12 +1890,15 @@ int qede_add_tc_flower_fltr(struct qede_dev *edev, __be16 proto, DP_NOTICE(edev, "Filter configuration invalidated, filter mode=0x%x, configured mode=0x%x, filter count=0x%x\n", t.mode, edev->arfs->mode, edev->arfs->filter_count); + rc = -EINVAL; goto unlock; } /* parse tc actions and get the vf_id */ - if (qede_parse_actions(edev, &f->rule->action, f->common.extack)) + if (qede_parse_actions(edev, &f->rule->action, f->common.extack)) { + rc = -EINVAL; goto unlock; + } if (qede_flow_find_fltr(edev, &t)) { rc = -EEXIST; From fcee2065a178f78be6fd516302830378b17dba3d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Asbj=C3=B8rn=20Sloth=20T=C3=B8nnesen?= Date: Fri, 26 Apr 2024 09:12:24 +0000 Subject: [PATCH 454/606] net: qede: use return from qede_parse_flow_attr() for flower MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In qede_add_tc_flower_fltr(), when calling qede_parse_flow_attr() then the return code was only used for a non-zero check, and then -EINVAL was returned. qede_parse_flow_attr() can currently fail with: * -EINVAL * -EOPNOTSUPP * -EPROTONOSUPPORT This patch changes the code to use the actual return code, not just return -EINVAL. The blaimed commit introduced these functions. Only compile tested. Fixes: 2ce9c93eaca6 ("qede: Ingress tc flower offload (drop action) support.") Signed-off-by: Asbjørn Sloth Tønnesen Reviewed-by: Simon Horman Signed-off-by: David S. Miller --- drivers/net/ethernet/qlogic/qede/qede_filter.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/qlogic/qede/qede_filter.c b/drivers/net/ethernet/qlogic/qede/qede_filter.c index 8ecdfa36a6854..25ef0f4258cb1 100644 --- a/drivers/net/ethernet/qlogic/qede/qede_filter.c +++ b/drivers/net/ethernet/qlogic/qede/qede_filter.c @@ -1879,10 +1879,9 @@ int qede_add_tc_flower_fltr(struct qede_dev *edev, __be16 proto, } /* parse flower attribute and prepare filter */ - if (qede_parse_flow_attr(edev, proto, f->rule, &t)) { - rc = -EINVAL; + rc = qede_parse_flow_attr(edev, proto, f->rule, &t); + if (rc) goto unlock; - } /* Validate profile mode and number of filters */ if ((edev->arfs->filter_count && edev->arfs->mode != t.mode) || From 27b44414a34b108c5a37cd5b4894f606061d86e7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Asbj=C3=B8rn=20Sloth=20T=C3=B8nnesen?= Date: Fri, 26 Apr 2024 09:12:25 +0000 Subject: [PATCH 455/606] net: qede: use return from qede_parse_flow_attr() for flow_spec MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In qede_flow_spec_to_rule(), when calling qede_parse_flow_attr() then the return code was only used for a non-zero check, and then -EINVAL was returned. qede_parse_flow_attr() can currently fail with: * -EINVAL * -EOPNOTSUPP * -EPROTONOSUPPORT This patch changes the code to use the actual return code, not just return -EINVAL. The blaimed commit introduced qede_flow_spec_to_rule(), and this call to qede_parse_flow_attr(), it looks like it just duplicated how it was already used. Only compile tested. Fixes: 37c5d3efd7f8 ("qede: use ethtool_rx_flow_rule() to remove duplicated parser code") Signed-off-by: Asbjørn Sloth Tønnesen Reviewed-by: Simon Horman Signed-off-by: David S. Miller --- drivers/net/ethernet/qlogic/qede/qede_filter.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/qlogic/qede/qede_filter.c b/drivers/net/ethernet/qlogic/qede/qede_filter.c index 25ef0f4258cb1..377d661f70f78 100644 --- a/drivers/net/ethernet/qlogic/qede/qede_filter.c +++ b/drivers/net/ethernet/qlogic/qede/qede_filter.c @@ -2002,10 +2002,9 @@ static int qede_flow_spec_to_rule(struct qede_dev *edev, if (IS_ERR(flow)) return PTR_ERR(flow); - if (qede_parse_flow_attr(edev, proto, flow->rule, t)) { - err = -EINVAL; + err = qede_parse_flow_attr(edev, proto, flow->rule, t); + if (err) goto err_out; - } /* Make sure location is valid and filter isn't already set */ err = qede_flow_spec_validate(edev, &flow->rule->action, t, From f26f719a36e56381a1f4230e5364e7ad4d485888 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Asbj=C3=B8rn=20Sloth=20T=C3=B8nnesen?= Date: Fri, 26 Apr 2024 09:12:26 +0000 Subject: [PATCH 456/606] net: qede: use return from qede_parse_actions() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When calling qede_parse_actions() then the return code was only used for a non-zero check, and then -EINVAL was returned. qede_parse_actions() can currently fail with: * -EINVAL * -EOPNOTSUPP This patch changes the code to use the actual return code, not just return -EINVAL. The blaimed commit broke the implicit assumption that only -EINVAL would ever be returned. Only compile tested. Fixes: 319a1d19471e ("flow_offload: check for basic action hw stats type") Signed-off-by: Asbjørn Sloth Tønnesen Reviewed-by: Simon Horman Signed-off-by: David S. Miller --- drivers/net/ethernet/qlogic/qede/qede_filter.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/qlogic/qede/qede_filter.c b/drivers/net/ethernet/qlogic/qede/qede_filter.c index 377d661f70f78..cb6b33a228ea2 100644 --- a/drivers/net/ethernet/qlogic/qede/qede_filter.c +++ b/drivers/net/ethernet/qlogic/qede/qede_filter.c @@ -1894,10 +1894,9 @@ int qede_add_tc_flower_fltr(struct qede_dev *edev, __be16 proto, } /* parse tc actions and get the vf_id */ - if (qede_parse_actions(edev, &f->rule->action, f->common.extack)) { - rc = -EINVAL; + rc = qede_parse_actions(edev, &f->rule->action, f->common.extack); + if (rc) goto unlock; - } if (qede_flow_find_fltr(edev, &t)) { rc = -EEXIST; From d6d85ac15cce4dcf02cf8c96cb970562be6a3529 Mon Sep 17 00:00:00 2001 From: Ashish Kalra Date: Fri, 26 Apr 2024 00:41:56 +0000 Subject: [PATCH 457/606] x86/e820: Add a new e820 table update helper Add a new API helper e820__range_update_table() with which to update an arbitrary e820 table. Move all current users of e820__range_update_kexec() to this new helper. [ bp: Massage. ] Signed-off-by: Ashish Kalra Signed-off-by: Borislav Petkov (AMD) Link: https://lore.kernel.org/r/b726af213ad55053f8a7a1e793b01bb3f1ca9dd5.1714090302.git.ashish.kalra@amd.com --- arch/x86/include/asm/e820/api.h | 1 + arch/x86/kernel/e820.c | 7 ++++--- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/arch/x86/include/asm/e820/api.h b/arch/x86/include/asm/e820/api.h index e8f58ddd06d97..2e74a7f0e9357 100644 --- a/arch/x86/include/asm/e820/api.h +++ b/arch/x86/include/asm/e820/api.h @@ -17,6 +17,7 @@ extern bool e820__mapped_all(u64 start, u64 end, enum e820_type type); extern void e820__range_add (u64 start, u64 size, enum e820_type type); extern u64 e820__range_update(u64 start, u64 size, enum e820_type old_type, enum e820_type new_type); extern u64 e820__range_remove(u64 start, u64 size, enum e820_type old_type, bool check_type); +extern u64 e820__range_update_table(struct e820_table *t, u64 start, u64 size, enum e820_type old_type, enum e820_type new_type); extern void e820__print_table(char *who); extern int e820__update_table(struct e820_table *table); diff --git a/arch/x86/kernel/e820.c b/arch/x86/kernel/e820.c index 6f1b379e3b385..68b09f718f10e 100644 --- a/arch/x86/kernel/e820.c +++ b/arch/x86/kernel/e820.c @@ -532,9 +532,10 @@ u64 __init e820__range_update(u64 start, u64 size, enum e820_type old_type, enum return __e820__range_update(e820_table, start, size, old_type, new_type); } -static u64 __init e820__range_update_kexec(u64 start, u64 size, enum e820_type old_type, enum e820_type new_type) +u64 __init e820__range_update_table(struct e820_table *t, u64 start, u64 size, + enum e820_type old_type, enum e820_type new_type) { - return __e820__range_update(e820_table_kexec, start, size, old_type, new_type); + return __e820__range_update(t, start, size, old_type, new_type); } /* Remove a range of memory from the E820 table: */ @@ -806,7 +807,7 @@ u64 __init e820__memblock_alloc_reserved(u64 size, u64 align) addr = memblock_phys_alloc(size, align); if (addr) { - e820__range_update_kexec(addr, size, E820_TYPE_RAM, E820_TYPE_RESERVED); + e820__range_update_table(e820_table_kexec, addr, size, E820_TYPE_RAM, E820_TYPE_RESERVED); pr_info("update e820_table_kexec for e820__memblock_alloc_reserved()\n"); e820__update_table_kexec(); } From ee59be35d7a8be7fcaa2d61fb89734ab5c25e4ee Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Thomas=20Wei=C3=9Fschuh?= Date: Thu, 11 Apr 2024 23:33:51 +0200 Subject: [PATCH 458/606] misc/pvpanic-pci: register attributes via pci_driver MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In __pci_register_driver(), the pci core overwrites the dev_groups field of the embedded struct device_driver with the dev_groups from the outer struct pci_driver unconditionally. Set dev_groups in the pci_driver to make sure it is used. This was broken since the introduction of pvpanic-pci. Fixes: db3a4f0abefd ("misc/pvpanic: add PCI driver") Cc: stable@vger.kernel.org Signed-off-by: Thomas Weißschuh Fixes: ded13b9cfd59 ("PCI: Add support for dev_groups to struct pci_driver") Link: https://lore.kernel.org/r/20240411-pvpanic-pci-dev-groups-v1-1-db8cb69f1b09@weissschuh.net Signed-off-by: Greg Kroah-Hartman --- drivers/misc/pvpanic/pvpanic-pci.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/drivers/misc/pvpanic/pvpanic-pci.c b/drivers/misc/pvpanic/pvpanic-pci.c index 9ad20e82785bc..b21598a18f6da 100644 --- a/drivers/misc/pvpanic/pvpanic-pci.c +++ b/drivers/misc/pvpanic/pvpanic-pci.c @@ -44,8 +44,6 @@ static struct pci_driver pvpanic_pci_driver = { .name = "pvpanic-pci", .id_table = pvpanic_pci_id_tbl, .probe = pvpanic_pci_probe, - .driver = { - .dev_groups = pvpanic_dev_groups, - }, + .dev_groups = pvpanic_dev_groups, }; module_pci_driver(pvpanic_pci_driver); From 400fea4b9651adf5d7ebd5d71e905f34f4e4e493 Mon Sep 17 00:00:00 2001 From: Ashish Kalra Date: Fri, 26 Apr 2024 00:43:18 +0000 Subject: [PATCH 459/606] x86/sev: Add callback to apply RMP table fixups for kexec Handle cases where the RMP table placement in the BIOS is not 2M aligned and the kexec-ed kernel could try to allocate from within that chunk which then causes a fatal RMP fault. The kexec failure is illustrated below: SEV-SNP: RMP table physical range [0x0000007ffe800000 - 0x000000807f0fffff] BIOS-provided physical RAM map: BIOS-e820: [mem 0x0000000000000000-0x000000000008efff] usable BIOS-e820: [mem 0x000000000008f000-0x000000000008ffff] ACPI NVS ... BIOS-e820: [mem 0x0000004080000000-0x0000007ffe7fffff] usable BIOS-e820: [mem 0x0000007ffe800000-0x000000807f0fffff] reserved BIOS-e820: [mem 0x000000807f100000-0x000000807f1fefff] usable As seen here in the e820 memory map, the end range of the RMP table is not aligned to 2MB and not reserved but it is usable as RAM. Subsequently, kexec -s (KEXEC_FILE_LOAD syscall) loads it's purgatory code and boot_param, command line and other setup data into this RAM region as seen in the kexec logs below, which leads to fatal RMP fault during kexec boot. Loaded purgatory at 0x807f1fa000 Loaded boot_param, command line and misc at 0x807f1f8000 bufsz=0x1350 memsz=0x2000 Loaded 64bit kernel at 0x7ffae00000 bufsz=0xd06200 memsz=0x3894000 Loaded initrd at 0x7ff6c89000 bufsz=0x4176014 memsz=0x4176014 E820 memmap: 0000000000000000-000000000008efff (1) 000000000008f000-000000000008ffff (4) 0000000000090000-000000000009ffff (1) ... 0000004080000000-0000007ffe7fffff (1) 0000007ffe800000-000000807f0fffff (2) 000000807f100000-000000807f1fefff (1) 000000807f1ff000-000000807fffffff (2) nr_segments = 4 segment[0]: buf=0x00000000e626d1a2 bufsz=0x4000 mem=0x807f1fa000 memsz=0x5000 segment[1]: buf=0x0000000029c67bd6 bufsz=0x1350 mem=0x807f1f8000 memsz=0x2000 segment[2]: buf=0x0000000045c60183 bufsz=0xd06200 mem=0x7ffae00000 memsz=0x3894000 segment[3]: buf=0x000000006e54f08d bufsz=0x4176014 mem=0x7ff6c89000 memsz=0x4177000 kexec_file_load: type:0, start:0x807f1fa150 head:0x1184d0002 flags:0x0 Check if RMP table start and end physical range in the e820 tables are not aligned to 2MB and in that case map this range to reserved in all the three e820 tables. [ bp: Massage. ] Fixes: c3b86e61b756 ("x86/cpufeatures: Enable/unmask SEV-SNP CPU feature") Signed-off-by: Ashish Kalra Signed-off-by: Borislav Petkov (AMD) Link: https://lore.kernel.org/r/df6e995ff88565262c2c7c69964883ff8aa6fc30.1714090302.git.ashish.kalra@amd.com --- arch/x86/include/asm/sev.h | 2 ++ arch/x86/mm/mem_encrypt.c | 7 +++++++ arch/x86/virt/svm/sev.c | 36 ++++++++++++++++++++++++++++++++++++ 3 files changed, 45 insertions(+) diff --git a/arch/x86/include/asm/sev.h b/arch/x86/include/asm/sev.h index 7f57382afee41..93ed60080cfe7 100644 --- a/arch/x86/include/asm/sev.h +++ b/arch/x86/include/asm/sev.h @@ -269,6 +269,7 @@ int rmp_make_private(u64 pfn, u64 gpa, enum pg_level level, u32 asid, bool immut int rmp_make_shared(u64 pfn, enum pg_level level); void snp_leak_pages(u64 pfn, unsigned int npages); void kdump_sev_callback(void); +void snp_fixup_e820_tables(void); #else static inline bool snp_probe_rmptable_info(void) { return false; } static inline int snp_lookup_rmpentry(u64 pfn, bool *assigned, int *level) { return -ENODEV; } @@ -282,6 +283,7 @@ static inline int rmp_make_private(u64 pfn, u64 gpa, enum pg_level level, u32 as static inline int rmp_make_shared(u64 pfn, enum pg_level level) { return -ENODEV; } static inline void snp_leak_pages(u64 pfn, unsigned int npages) {} static inline void kdump_sev_callback(void) { } +static inline void snp_fixup_e820_tables(void) {} #endif #endif diff --git a/arch/x86/mm/mem_encrypt.c b/arch/x86/mm/mem_encrypt.c index 6f3b3e0287185..0a120d85d7bba 100644 --- a/arch/x86/mm/mem_encrypt.c +++ b/arch/x86/mm/mem_encrypt.c @@ -102,6 +102,13 @@ void __init mem_encrypt_setup_arch(void) phys_addr_t total_mem = memblock_phys_mem_size(); unsigned long size; + /* + * Do RMP table fixups after the e820 tables have been setup by + * e820__memory_setup(). + */ + if (cc_platform_has(CC_ATTR_HOST_SEV_SNP)) + snp_fixup_e820_tables(); + if (!cc_platform_has(CC_ATTR_GUEST_MEM_ENCRYPT)) return; diff --git a/arch/x86/virt/svm/sev.c b/arch/x86/virt/svm/sev.c index ab0e8448bb6eb..0ae10535c6999 100644 --- a/arch/x86/virt/svm/sev.c +++ b/arch/x86/virt/svm/sev.c @@ -163,6 +163,42 @@ bool snp_probe_rmptable_info(void) return true; } +static void __init __snp_fixup_e820_tables(u64 pa) +{ + if (IS_ALIGNED(pa, PMD_SIZE)) + return; + + /* + * Handle cases where the RMP table placement by the BIOS is not + * 2M aligned and the kexec kernel could try to allocate + * from within that chunk which then causes a fatal RMP fault. + * + * The e820_table needs to be updated as it is converted to + * kernel memory resources and used by KEXEC_FILE_LOAD syscall + * to load kexec segments. + * + * The e820_table_firmware needs to be updated as it is exposed + * to sysfs and used by the KEXEC_LOAD syscall to load kexec + * segments. + * + * The e820_table_kexec needs to be updated as it passed to + * the kexec-ed kernel. + */ + pa = ALIGN_DOWN(pa, PMD_SIZE); + if (e820__mapped_any(pa, pa + PMD_SIZE, E820_TYPE_RAM)) { + pr_info("Reserving start/end of RMP table on a 2MB boundary [0x%016llx]\n", pa); + e820__range_update(pa, PMD_SIZE, E820_TYPE_RAM, E820_TYPE_RESERVED); + e820__range_update_table(e820_table_kexec, pa, PMD_SIZE, E820_TYPE_RAM, E820_TYPE_RESERVED); + e820__range_update_table(e820_table_firmware, pa, PMD_SIZE, E820_TYPE_RAM, E820_TYPE_RESERVED); + } +} + +void __init snp_fixup_e820_tables(void) +{ + __snp_fixup_e820_tables(probed_rmp_base); + __snp_fixup_e820_tables(probed_rmp_base + probed_rmp_size); +} + /* * Do the necessary preparations which are verified by the firmware as * described in the SNP_INIT_EX firmware command description in the SNP From 38762a0763c10c24a4915feee722d7aa6e73eb98 Mon Sep 17 00:00:00 2001 From: Thanassis Avgerinos Date: Wed, 17 Apr 2024 11:30:02 -0400 Subject: [PATCH 460/606] firewire: nosy: ensure user_length is taken into account when fetching packet contents Ensure that packet_buffer_get respects the user_length provided. If the length of the head packet exceeds the user_length, packet_buffer_get will now return 0 to signify to the user that no data were read and a larger buffer size is required. Helps prevent user space overflows. Signed-off-by: Thanassis Avgerinos Signed-off-by: Takashi Sakamoto --- drivers/firewire/nosy.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/firewire/nosy.c b/drivers/firewire/nosy.c index b0d671db178a8..ea31ac7ac1ca9 100644 --- a/drivers/firewire/nosy.c +++ b/drivers/firewire/nosy.c @@ -148,10 +148,12 @@ packet_buffer_get(struct client *client, char __user *data, size_t user_length) if (atomic_read(&buffer->size) == 0) return -ENODEV; - /* FIXME: Check length <= user_length. */ + length = buffer->head->length; + + if (length > user_length) + return 0; end = buffer->data + buffer->capacity; - length = buffer->head->length; if (&buffer->head->data[length] < end) { if (copy_to_user(data, buffer->head->data, length)) From 09773bf55aeabe3fd61745d900798dc1272c778a Mon Sep 17 00:00:00 2001 From: Takashi Sakamoto Date: Mon, 29 Apr 2024 17:47:08 +0900 Subject: [PATCH 461/606] firewire: ohci: fulfill timestamp for some local asynchronous transaction 1394 OHCI driver generates packet data for the response subaction to the request subaction to some local registers. In the case, the driver should assign timestamp to them by itself. This commit fulfills the timestamp for the subaction. Cc: stable@vger.kernel.org Fixes: dcadfd7f7c74 ("firewire: core: use union for callback of transaction completion") Link: https://lore.kernel.org/r/20240429084709.707473-1-o-takashi@sakamocchi.jp Signed-off-by: Takashi Sakamoto --- drivers/firewire/ohci.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/drivers/firewire/ohci.c b/drivers/firewire/ohci.c index 38d19410a2be6..b9ae0340b8a70 100644 --- a/drivers/firewire/ohci.c +++ b/drivers/firewire/ohci.c @@ -1556,6 +1556,8 @@ static int handle_at_packet(struct context *context, #define HEADER_GET_DATA_LENGTH(q) (((q) >> 16) & 0xffff) #define HEADER_GET_EXTENDED_TCODE(q) (((q) >> 0) & 0xffff) +static u32 get_cycle_time(struct fw_ohci *ohci); + static void handle_local_rom(struct fw_ohci *ohci, struct fw_packet *packet, u32 csr) { @@ -1580,6 +1582,8 @@ static void handle_local_rom(struct fw_ohci *ohci, (void *) ohci->config_rom + i, length); } + // Timestamping on behalf of the hardware. + response.timestamp = cycle_time_to_ohci_tstamp(get_cycle_time(ohci)); fw_core_handle_response(&ohci->card, &response); } @@ -1628,6 +1632,8 @@ static void handle_local_lock(struct fw_ohci *ohci, fw_fill_response(&response, packet->header, RCODE_BUSY, NULL, 0); out: + // Timestamping on behalf of the hardware. + response.timestamp = cycle_time_to_ohci_tstamp(get_cycle_time(ohci)); fw_core_handle_response(&ohci->card, &response); } @@ -1670,8 +1676,6 @@ static void handle_local_request(struct context *ctx, struct fw_packet *packet) } } -static u32 get_cycle_time(struct fw_ohci *ohci); - static void at_context_transmit(struct context *ctx, struct fw_packet *packet) { unsigned long flags; From 515a3c3a5489a890c7c3c1df3855eb4868a27598 Mon Sep 17 00:00:00 2001 From: Srinivas Pandruvada Date: Mon, 22 Apr 2024 14:22:22 -0700 Subject: [PATCH 462/606] platform/x86: ISST: Add Grand Ridge to HPM CPU list MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add Grand Ridge (ATOM_CRESTMONT) to hpm_cpu_ids, so that MSR 0x54 can be used. Signed-off-by: Srinivas Pandruvada Link: https://lore.kernel.org/r/20240422212222.3881606-1-srinivas.pandruvada@linux.intel.com Reviewed-by: Ilpo Järvinen Signed-off-by: Ilpo Järvinen --- drivers/platform/x86/intel/speed_select_if/isst_if_common.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/platform/x86/intel/speed_select_if/isst_if_common.c b/drivers/platform/x86/intel/speed_select_if/isst_if_common.c index 30951f7131cd9..1accdaaf282c5 100644 --- a/drivers/platform/x86/intel/speed_select_if/isst_if_common.c +++ b/drivers/platform/x86/intel/speed_select_if/isst_if_common.c @@ -721,6 +721,7 @@ static struct miscdevice isst_if_char_driver = { static const struct x86_cpu_id hpm_cpu_ids[] = { X86_MATCH_INTEL_FAM6_MODEL(GRANITERAPIDS_D, NULL), X86_MATCH_INTEL_FAM6_MODEL(GRANITERAPIDS_X, NULL), + X86_MATCH_INTEL_FAM6_MODEL(ATOM_CRESTMONT, NULL), X86_MATCH_INTEL_FAM6_MODEL(ATOM_CRESTMONT_X, NULL), {} }; From e4236b14fe32a8d92686ec656c870a6bb1d6f50a Mon Sep 17 00:00:00 2001 From: Matt Coster Date: Tue, 5 Mar 2024 10:28:33 +0000 Subject: [PATCH 463/606] drm/imagination: Ensure PVR_MIPS_PT_PAGE_COUNT is never zero When the host page size was more than 4 times larger than the FW page size, this macro evaluated to zero resulting in zero-sized arrays. Use DIV_ROUND_UP() to ensure the correct behavior. Reported-by: 20240228012313.5934-1-yaolu@kylinos.cn Closes: https://lore.kernel.org/dri-devel/20240228012313.5934-1-yaolu@kylinos.cn Link: https://lore.kernel.org/dri-devel/20240228012313.5934-1-yaolu@kylinos.cn Fixes: 927f3e0253c1 ("drm/imagination: Implement MIPS firmware processor and MMU support") Cc: stable@vger.kernel.org Signed-off-by: Matt Coster Reviewed-by: Frank Binns --- drivers/gpu/drm/imagination/pvr_fw_mips.h | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/imagination/pvr_fw_mips.h b/drivers/gpu/drm/imagination/pvr_fw_mips.h index 408dbe63a90cf..a0c5c41c8aa24 100644 --- a/drivers/gpu/drm/imagination/pvr_fw_mips.h +++ b/drivers/gpu/drm/imagination/pvr_fw_mips.h @@ -7,13 +7,14 @@ #include "pvr_rogue_mips.h" #include +#include #include /* Forward declaration from pvr_gem.h. */ struct pvr_gem_object; -#define PVR_MIPS_PT_PAGE_COUNT ((ROGUE_MIPSFW_MAX_NUM_PAGETABLE_PAGES * ROGUE_MIPSFW_PAGE_SIZE_4K) \ - >> PAGE_SHIFT) +#define PVR_MIPS_PT_PAGE_COUNT DIV_ROUND_UP(ROGUE_MIPSFW_MAX_NUM_PAGETABLE_PAGES * ROGUE_MIPSFW_PAGE_SIZE_4K, PAGE_SIZE) + /** * struct pvr_fw_mips_data - MIPS-specific data */ From 2d5af3ab9e6f1cf1468b2a5221b5c1f7f46c3333 Mon Sep 17 00:00:00 2001 From: Aman Dhoot Date: Mon, 22 Apr 2024 18:08:23 +0530 Subject: [PATCH 464/606] ALSA: hda/realtek: Fix mute led of HP Laptop 15-da3001TU This patch simply add SND_PCI_QUIRK for HP Laptop 15-da3001TU to fixed mute led of laptop. Signed-off-by: Aman Dhoot Cc: Link: https://lore.kernel.org/r/CAMTp=B+3NG65Z684xMwHqdXDJhY+DJK-kuSw4adn6xwnG+b5JA@mail.gmail.com Signed-off-by: Takashi Iwai --- sound/pci/hda/patch_realtek.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index 70d80b6af3fe3..e770133097954 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -9937,6 +9937,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x103c, 0x860f, "HP ZBook 15 G6", ALC285_FIXUP_HP_GPIO_AMP_INIT), SND_PCI_QUIRK(0x103c, 0x861f, "HP Elite Dragonfly G1", ALC285_FIXUP_HP_GPIO_AMP_INIT), SND_PCI_QUIRK(0x103c, 0x869d, "HP", ALC236_FIXUP_HP_MUTE_LED), + SND_PCI_QUIRK(0x103c, 0x86c1, "HP Laptop 15-da3001TU", ALC236_FIXUP_HP_MUTE_LED_COEFBIT2), SND_PCI_QUIRK(0x103c, 0x86c7, "HP Envy AiO 32", ALC274_FIXUP_HP_ENVY_GPIO), SND_PCI_QUIRK(0x103c, 0x86e7, "HP Spectre x360 15-eb0xxx", ALC285_FIXUP_HP_SPECTRE_X360_EB1), SND_PCI_QUIRK(0x103c, 0x86e8, "HP Spectre x360 15-eb0xxx", ALC285_FIXUP_HP_SPECTRE_X360_EB1), From 6dee402daba4eb8677a9438ebdcd8fe90ddd4326 Mon Sep 17 00:00:00 2001 From: Guillaume Nault Date: Fri, 26 Apr 2024 17:27:17 +0200 Subject: [PATCH 465/606] vxlan: Fix racy device stats updates. VXLAN devices update their stats locklessly. Therefore these counters should either be stored in per-cpu data structures or the updates should be done using atomic increments. Since the net_device_core_stats infrastructure is already used in vxlan_rcv(), use it for the other rx_dropped and tx_dropped counter updates. Update the other counters atomically using DEV_STATS_INC(). Fixes: d342894c5d2f ("vxlan: virtual extensible lan") Signed-off-by: Guillaume Nault Reviewed-by: Eric Dumazet Signed-off-by: David S. Miller --- drivers/net/vxlan/vxlan_core.c | 28 ++++++++++++++-------------- 1 file changed, 14 insertions(+), 14 deletions(-) diff --git a/drivers/net/vxlan/vxlan_core.c b/drivers/net/vxlan/vxlan_core.c index ba319fc219571..0cd9e44c7be85 100644 --- a/drivers/net/vxlan/vxlan_core.c +++ b/drivers/net/vxlan/vxlan_core.c @@ -1766,8 +1766,8 @@ static int vxlan_rcv(struct sock *sk, struct sk_buff *skb) skb_reset_network_header(skb); if (!vxlan_ecn_decapsulate(vs, oiph, skb)) { - ++vxlan->dev->stats.rx_frame_errors; - ++vxlan->dev->stats.rx_errors; + DEV_STATS_INC(vxlan->dev, rx_frame_errors); + DEV_STATS_INC(vxlan->dev, rx_errors); vxlan_vnifilter_count(vxlan, vni, vninode, VXLAN_VNI_STATS_RX_ERRORS, 0); goto drop; @@ -1837,7 +1837,7 @@ static int arp_reduce(struct net_device *dev, struct sk_buff *skb, __be32 vni) goto out; if (!pskb_may_pull(skb, arp_hdr_len(dev))) { - dev->stats.tx_dropped++; + dev_core_stats_tx_dropped_inc(dev); goto out; } parp = arp_hdr(skb); @@ -1893,7 +1893,7 @@ static int arp_reduce(struct net_device *dev, struct sk_buff *skb, __be32 vni) reply->pkt_type = PACKET_HOST; if (netif_rx(reply) == NET_RX_DROP) { - dev->stats.rx_dropped++; + dev_core_stats_rx_dropped_inc(dev); vxlan_vnifilter_count(vxlan, vni, NULL, VXLAN_VNI_STATS_RX_DROPS, 0); } @@ -2052,7 +2052,7 @@ static int neigh_reduce(struct net_device *dev, struct sk_buff *skb, __be32 vni) goto out; if (netif_rx(reply) == NET_RX_DROP) { - dev->stats.rx_dropped++; + dev_core_stats_rx_dropped_inc(dev); vxlan_vnifilter_count(vxlan, vni, NULL, VXLAN_VNI_STATS_RX_DROPS, 0); } @@ -2263,7 +2263,7 @@ static void vxlan_encap_bypass(struct sk_buff *skb, struct vxlan_dev *src_vxlan, len); } else { drop: - dev->stats.rx_dropped++; + dev_core_stats_rx_dropped_inc(dev); vxlan_vnifilter_count(dst_vxlan, vni, NULL, VXLAN_VNI_STATS_RX_DROPS, 0); } @@ -2295,7 +2295,7 @@ static int encap_bypass_if_local(struct sk_buff *skb, struct net_device *dev, addr_family, dst_port, vxlan->cfg.flags); if (!dst_vxlan) { - dev->stats.tx_errors++; + DEV_STATS_INC(dev, tx_errors); vxlan_vnifilter_count(vxlan, vni, NULL, VXLAN_VNI_STATS_TX_ERRORS, 0); kfree_skb(skb); @@ -2559,7 +2559,7 @@ void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev, return; drop: - dev->stats.tx_dropped++; + dev_core_stats_tx_dropped_inc(dev); vxlan_vnifilter_count(vxlan, vni, NULL, VXLAN_VNI_STATS_TX_DROPS, 0); dev_kfree_skb(skb); return; @@ -2567,11 +2567,11 @@ void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev, tx_error: rcu_read_unlock(); if (err == -ELOOP) - dev->stats.collisions++; + DEV_STATS_INC(dev, collisions); else if (err == -ENETUNREACH) - dev->stats.tx_carrier_errors++; + DEV_STATS_INC(dev, tx_carrier_errors); dst_release(ndst); - dev->stats.tx_errors++; + DEV_STATS_INC(dev, tx_errors); vxlan_vnifilter_count(vxlan, vni, NULL, VXLAN_VNI_STATS_TX_ERRORS, 0); kfree_skb(skb); } @@ -2604,7 +2604,7 @@ static void vxlan_xmit_nh(struct sk_buff *skb, struct net_device *dev, return; drop: - dev->stats.tx_dropped++; + dev_core_stats_tx_dropped_inc(dev); vxlan_vnifilter_count(netdev_priv(dev), vni, NULL, VXLAN_VNI_STATS_TX_DROPS, 0); dev_kfree_skb(skb); @@ -2642,7 +2642,7 @@ static netdev_tx_t vxlan_xmit_nhid(struct sk_buff *skb, struct net_device *dev, return NETDEV_TX_OK; drop: - dev->stats.tx_dropped++; + dev_core_stats_tx_dropped_inc(dev); vxlan_vnifilter_count(netdev_priv(dev), vni, NULL, VXLAN_VNI_STATS_TX_DROPS, 0); dev_kfree_skb(skb); @@ -2739,7 +2739,7 @@ static netdev_tx_t vxlan_xmit(struct sk_buff *skb, struct net_device *dev) !is_multicast_ether_addr(eth->h_dest)) vxlan_fdb_miss(vxlan, eth->h_dest); - dev->stats.tx_dropped++; + dev_core_stats_tx_dropped_inc(dev); vxlan_vnifilter_count(vxlan, vni, NULL, VXLAN_VNI_STATS_TX_DROPS, 0); kfree_skb(skb); From b22ea4ef4c3438817fcb604255b55b0058ed8c64 Mon Sep 17 00:00:00 2001 From: Guillaume Nault Date: Fri, 26 Apr 2024 17:27:19 +0200 Subject: [PATCH 466/606] vxlan: Add missing VNI filter counter update in arp_reduce(). VXLAN stores per-VNI statistics using vxlan_vnifilter_count(). These statistics were not updated when arp_reduce() failed its pskb_may_pull() call. Use vxlan_vnifilter_count() to update the VNI counter when that happens. Fixes: 4095e0e1328a ("drivers: vxlan: vnifilter: per vni stats") Signed-off-by: Guillaume Nault Signed-off-by: David S. Miller --- drivers/net/vxlan/vxlan_core.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/vxlan/vxlan_core.c b/drivers/net/vxlan/vxlan_core.c index 0cd9e44c7be85..c9e4e03ad214f 100644 --- a/drivers/net/vxlan/vxlan_core.c +++ b/drivers/net/vxlan/vxlan_core.c @@ -1838,6 +1838,8 @@ static int arp_reduce(struct net_device *dev, struct sk_buff *skb, __be32 vni) if (!pskb_may_pull(skb, arp_hdr_len(dev))) { dev_core_stats_tx_dropped_inc(dev); + vxlan_vnifilter_count(vxlan, vni, NULL, + VXLAN_VNI_STATS_TX_DROPS, 0); goto out; } parp = arp_hdr(skb); From dce3696271af7765f04428ec31b1b87dc7d016c6 Mon Sep 17 00:00:00 2001 From: LuMingYin Date: Sat, 27 Apr 2024 08:23:47 +0100 Subject: [PATCH 467/606] tracing/probes: Fix memory leak in traceprobe_parse_probe_arg_body() If traceprobe_parse_probe_arg_body() failed to allocate 'parg->fmt', it jumps to the label 'out' instead of 'fail' by mistake.In the result, the buffer 'tmp' is not freed in this case and leaks its memory. Thus jump to the label 'fail' in that error case. Link: https://lore.kernel.org/all/20240427072347.1421053-1-lumingyindetect@126.com/ Fixes: 032330abd08b ("tracing/probes: Cleanup probe argument parser") Signed-off-by: LuMingYin Acked-by: Masami Hiramatsu (Google) Signed-off-by: Masami Hiramatsu (Google) --- kernel/trace/trace_probe.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/trace/trace_probe.c b/kernel/trace/trace_probe.c index dfe3ee6035ecc..42bc0f3622263 100644 --- a/kernel/trace/trace_probe.c +++ b/kernel/trace/trace_probe.c @@ -1466,7 +1466,7 @@ static int traceprobe_parse_probe_arg_body(const char *argv, ssize_t *size, parg->fmt = kmalloc(len, GFP_KERNEL); if (!parg->fmt) { ret = -ENOMEM; - goto out; + goto fail; } snprintf(parg->fmt, len, "%s[%d]", parg->type->fmttype, parg->count); From b11d26660dff8d7430892008616452dc8e5fb0f3 Mon Sep 17 00:00:00 2001 From: Jerome Brunet Date: Fri, 26 Apr 2024 17:29:38 +0200 Subject: [PATCH 468/606] ASoC: meson: axg-fifo: use threaded irq to check periods With the AXG audio subsystem, there is a possible random channel shift on TDM capture, when the slot number per lane is more than 2, and there is more than one lane used. The problem has been there since the introduction of the axg audio support but such scenario is pretty uncommon. This is why there is no loud complains about the problem. Solving the problem require to make the links non-atomic and use the trigger() callback to start FEs and BEs in the appropriate order. This was tried in the past and reverted because it caused the block irq to sleep while atomic. However, instead of reverting, the solution is to call snd_pcm_period_elapsed() in a non atomic context. Use the bottom half of a threaded IRQ to do so. Fixes: 6dc4fa179fb8 ("ASoC: meson: add axg fifo base driver") Signed-off-by: Jerome Brunet Link: https://lore.kernel.org/r/20240426152946.3078805-2-jbrunet@baylibre.com Signed-off-by: Mark Brown --- sound/soc/meson/axg-fifo.c | 29 +++++++++++++++++++---------- 1 file changed, 19 insertions(+), 10 deletions(-) diff --git a/sound/soc/meson/axg-fifo.c b/sound/soc/meson/axg-fifo.c index bebee0ca8e388..ecb3eb7a9723d 100644 --- a/sound/soc/meson/axg-fifo.c +++ b/sound/soc/meson/axg-fifo.c @@ -204,18 +204,26 @@ static irqreturn_t axg_fifo_pcm_irq_block(int irq, void *dev_id) unsigned int status; regmap_read(fifo->map, FIFO_STATUS1, &status); - status = FIELD_GET(STATUS1_INT_STS, status); + axg_fifo_ack_irq(fifo, status); + + /* Use the thread to call period elapsed on nonatomic links */ if (status & FIFO_INT_COUNT_REPEAT) - snd_pcm_period_elapsed(ss); - else - dev_dbg(axg_fifo_dev(ss), "unexpected irq - STS 0x%02x\n", - status); + return IRQ_WAKE_THREAD; - /* Ack irqs */ - axg_fifo_ack_irq(fifo, status); + dev_dbg(axg_fifo_dev(ss), "unexpected irq - STS 0x%02x\n", + status); + + return IRQ_NONE; +} + +static irqreturn_t axg_fifo_pcm_irq_block_thread(int irq, void *dev_id) +{ + struct snd_pcm_substream *ss = dev_id; + + snd_pcm_period_elapsed(ss); - return IRQ_RETVAL(status); + return IRQ_HANDLED; } int axg_fifo_pcm_open(struct snd_soc_component *component, @@ -243,8 +251,9 @@ int axg_fifo_pcm_open(struct snd_soc_component *component, if (ret) return ret; - ret = request_irq(fifo->irq, axg_fifo_pcm_irq_block, 0, - dev_name(dev), ss); + ret = request_threaded_irq(fifo->irq, axg_fifo_pcm_irq_block, + axg_fifo_pcm_irq_block_thread, + IRQF_ONESHOT, dev_name(dev), ss); if (ret) return ret; From dcba52ace7d4c12e2c8c273eff55ea03a84c8baf Mon Sep 17 00:00:00 2001 From: Jerome Brunet Date: Fri, 26 Apr 2024 17:29:39 +0200 Subject: [PATCH 469/606] ASoC: meson: axg-card: make links nonatomic Non atomic operations need to be performed in the trigger callback of the TDM interfaces. Those are BEs but what matters is the nonatomic flag of the FE in the DPCM context. Just set nonatomic for everything so, at least, what is done is clear. Fixes: 7864a79f37b5 ("ASoC: meson: add axg sound card support") Signed-off-by: Jerome Brunet Link: https://lore.kernel.org/r/20240426152946.3078805-3-jbrunet@baylibre.com Signed-off-by: Mark Brown --- sound/soc/meson/axg-card.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/soc/meson/axg-card.c b/sound/soc/meson/axg-card.c index 3180aa4d3a157..8c5605c1e34e8 100644 --- a/sound/soc/meson/axg-card.c +++ b/sound/soc/meson/axg-card.c @@ -318,6 +318,7 @@ static int axg_card_add_link(struct snd_soc_card *card, struct device_node *np, dai_link->cpus = cpu; dai_link->num_cpus = 1; + dai_link->nonatomic = true; ret = meson_card_parse_dai(card, np, dai_link->cpus); if (ret) From f949ed458ad15a00d41b37c745ebadaef171aaae Mon Sep 17 00:00:00 2001 From: Jerome Brunet Date: Fri, 26 Apr 2024 17:29:40 +0200 Subject: [PATCH 470/606] ASoC: meson: axg-tdm-interface: manage formatters in trigger So far, the formatters have been reset/enabled using the .prepare() callback. This was done in this callback because walking the formatters use a mutex. A mutex is used because formatter handling require dealing possibly slow clock operation. With the support of non-atomic, .trigger() callback may be used which also allows to properly enable and disable formatters on start but also pause/resume. This solve a random shift on TDMIN as well repeated samples on for TDMOUT. Fixes: d60e4f1e4be5 ("ASoC: meson: add tdm interface driver") Signed-off-by: Jerome Brunet Link: https://lore.kernel.org/r/20240426152946.3078805-4-jbrunet@baylibre.com Signed-off-by: Mark Brown --- sound/soc/meson/axg-tdm-interface.c | 34 ++++++++++++++++------------- 1 file changed, 19 insertions(+), 15 deletions(-) diff --git a/sound/soc/meson/axg-tdm-interface.c b/sound/soc/meson/axg-tdm-interface.c index bf708717635bf..8bf3735dedaaf 100644 --- a/sound/soc/meson/axg-tdm-interface.c +++ b/sound/soc/meson/axg-tdm-interface.c @@ -349,26 +349,31 @@ static int axg_tdm_iface_hw_params(struct snd_pcm_substream *substream, return 0; } -static int axg_tdm_iface_hw_free(struct snd_pcm_substream *substream, +static int axg_tdm_iface_trigger(struct snd_pcm_substream *substream, + int cmd, struct snd_soc_dai *dai) { - struct axg_tdm_stream *ts = snd_soc_dai_get_dma_data(dai, substream); + struct axg_tdm_stream *ts = + snd_soc_dai_get_dma_data(dai, substream); - /* Stop all attached formatters */ - axg_tdm_stream_stop(ts); + switch (cmd) { + case SNDRV_PCM_TRIGGER_START: + case SNDRV_PCM_TRIGGER_RESUME: + case SNDRV_PCM_TRIGGER_PAUSE_RELEASE: + axg_tdm_stream_start(ts); + break; + case SNDRV_PCM_TRIGGER_SUSPEND: + case SNDRV_PCM_TRIGGER_PAUSE_PUSH: + case SNDRV_PCM_TRIGGER_STOP: + axg_tdm_stream_stop(ts); + break; + default: + return -EINVAL; + } return 0; } -static int axg_tdm_iface_prepare(struct snd_pcm_substream *substream, - struct snd_soc_dai *dai) -{ - struct axg_tdm_stream *ts = snd_soc_dai_get_dma_data(dai, substream); - - /* Force all attached formatters to update */ - return axg_tdm_stream_reset(ts); -} - static int axg_tdm_iface_remove_dai(struct snd_soc_dai *dai) { int stream; @@ -412,8 +417,7 @@ static const struct snd_soc_dai_ops axg_tdm_iface_ops = { .set_fmt = axg_tdm_iface_set_fmt, .startup = axg_tdm_iface_startup, .hw_params = axg_tdm_iface_hw_params, - .prepare = axg_tdm_iface_prepare, - .hw_free = axg_tdm_iface_hw_free, + .trigger = axg_tdm_iface_trigger, }; /* TDM Backend DAIs */ From a5a89037d080e0870d7517c61f8b2123d58ab33b Mon Sep 17 00:00:00 2001 From: Jerome Brunet Date: Fri, 26 Apr 2024 17:29:41 +0200 Subject: [PATCH 471/606] ASoC: meson: axg-tdm: add continuous clock support Some devices may need the clocks running, even while paused. Add support for this use case. Signed-off-by: Jerome Brunet Link: https://lore.kernel.org/r/20240426152946.3078805-5-jbrunet@baylibre.com Signed-off-by: Mark Brown --- sound/soc/meson/axg-tdm-formatter.c | 40 +++++++++++++++++++++++++++++ sound/soc/meson/axg-tdm-interface.c | 16 +++++++++++- sound/soc/meson/axg-tdm.h | 5 ++++ 3 files changed, 60 insertions(+), 1 deletion(-) diff --git a/sound/soc/meson/axg-tdm-formatter.c b/sound/soc/meson/axg-tdm-formatter.c index 63333a2b0a9c3..a6579efd37750 100644 --- a/sound/soc/meson/axg-tdm-formatter.c +++ b/sound/soc/meson/axg-tdm-formatter.c @@ -392,6 +392,46 @@ void axg_tdm_stream_free(struct axg_tdm_stream *ts) } EXPORT_SYMBOL_GPL(axg_tdm_stream_free); +int axg_tdm_stream_set_cont_clocks(struct axg_tdm_stream *ts, + unsigned int fmt) +{ + int ret = 0; + + if (fmt & SND_SOC_DAIFMT_CONT) { + /* Clock are already enabled - skipping */ + if (ts->clk_enabled) + return 0; + + ret = clk_prepare_enable(ts->iface->mclk); + if (ret) + return ret; + + ret = clk_prepare_enable(ts->iface->sclk); + if (ret) + goto err_sclk; + + ret = clk_prepare_enable(ts->iface->lrclk); + if (ret) + goto err_lrclk; + + ts->clk_enabled = true; + return 0; + } + + /* Clocks are already disabled - skipping */ + if (!ts->clk_enabled) + return 0; + + clk_disable_unprepare(ts->iface->lrclk); +err_lrclk: + clk_disable_unprepare(ts->iface->sclk); +err_sclk: + clk_disable_unprepare(ts->iface->mclk); + ts->clk_enabled = false; + return ret; +} +EXPORT_SYMBOL_GPL(axg_tdm_stream_set_cont_clocks); + MODULE_DESCRIPTION("Amlogic AXG TDM formatter driver"); MODULE_AUTHOR("Jerome Brunet "); MODULE_LICENSE("GPL v2"); diff --git a/sound/soc/meson/axg-tdm-interface.c b/sound/soc/meson/axg-tdm-interface.c index 8bf3735dedaaf..62057c71f742e 100644 --- a/sound/soc/meson/axg-tdm-interface.c +++ b/sound/soc/meson/axg-tdm-interface.c @@ -309,6 +309,7 @@ static int axg_tdm_iface_hw_params(struct snd_pcm_substream *substream, struct snd_soc_dai *dai) { struct axg_tdm_iface *iface = snd_soc_dai_get_drvdata(dai); + struct axg_tdm_stream *ts = snd_soc_dai_get_dma_data(dai, substream); int ret; switch (iface->fmt & SND_SOC_DAIFMT_FORMAT_MASK) { @@ -346,7 +347,19 @@ static int axg_tdm_iface_hw_params(struct snd_pcm_substream *substream, return ret; } - return 0; + ret = axg_tdm_stream_set_cont_clocks(ts, iface->fmt); + if (ret) + dev_err(dai->dev, "failed to apply continuous clock setting\n"); + + return ret; +} + +static int axg_tdm_iface_hw_free(struct snd_pcm_substream *substream, + struct snd_soc_dai *dai) +{ + struct axg_tdm_stream *ts = snd_soc_dai_get_dma_data(dai, substream); + + return axg_tdm_stream_set_cont_clocks(ts, 0); } static int axg_tdm_iface_trigger(struct snd_pcm_substream *substream, @@ -417,6 +430,7 @@ static const struct snd_soc_dai_ops axg_tdm_iface_ops = { .set_fmt = axg_tdm_iface_set_fmt, .startup = axg_tdm_iface_startup, .hw_params = axg_tdm_iface_hw_params, + .hw_free = axg_tdm_iface_hw_free, .trigger = axg_tdm_iface_trigger, }; diff --git a/sound/soc/meson/axg-tdm.h b/sound/soc/meson/axg-tdm.h index 42f7470b9a7f4..daaca10fec9e2 100644 --- a/sound/soc/meson/axg-tdm.h +++ b/sound/soc/meson/axg-tdm.h @@ -58,12 +58,17 @@ struct axg_tdm_stream { unsigned int physical_width; u32 *mask; bool ready; + + /* For continuous clock tracking */ + bool clk_enabled; }; struct axg_tdm_stream *axg_tdm_stream_alloc(struct axg_tdm_iface *iface); void axg_tdm_stream_free(struct axg_tdm_stream *ts); int axg_tdm_stream_start(struct axg_tdm_stream *ts); void axg_tdm_stream_stop(struct axg_tdm_stream *ts); +int axg_tdm_stream_set_cont_clocks(struct axg_tdm_stream *ts, + unsigned int fmt); static inline int axg_tdm_stream_reset(struct axg_tdm_stream *ts) { From 6db26f9ea4edd8a17d39ab3c20111e3ccd704aef Mon Sep 17 00:00:00 2001 From: Jerome Brunet Date: Fri, 26 Apr 2024 15:41:47 +0200 Subject: [PATCH 472/606] ASoC: meson: cards: select SND_DYNAMIC_MINORS Amlogic sound cards do create a lot of pcm interfaces, possibly more than 8. Some pcm interfaces are internal (like DPCM backends and c2c) and not exposed to userspace. Those interfaces still increase the number passed to snd_find_free_minor(), which eventually exceeds 8 causing -EBUSY error on card registration if CONFIG_SND_DYNAMIC_MINORS=n and the interface is exposed to userspace. select CONFIG_SND_DYNAMIC_MINORS for Amlogic cards to avoid the problem. Fixes: 7864a79f37b5 ("ASoC: meson: add axg sound card support") Signed-off-by: Jerome Brunet Link: https://lore.kernel.org/r/20240426134150.3053741-1-jbrunet@baylibre.com Signed-off-by: Mark Brown --- sound/soc/meson/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/soc/meson/Kconfig b/sound/soc/meson/Kconfig index b93ea33739f29..6458d5dc4902f 100644 --- a/sound/soc/meson/Kconfig +++ b/sound/soc/meson/Kconfig @@ -99,6 +99,7 @@ config SND_MESON_AXG_PDM config SND_MESON_CARD_UTILS tristate + select SND_DYNAMIC_MINORS config SND_MESON_CODEC_GLUE tristate From e8a6a5ad73acbafd98e8fd3f0cbf6e379771bb76 Mon Sep 17 00:00:00 2001 From: Pierre-Louis Bossart Date: Fri, 26 Apr 2024 10:30:33 -0500 Subject: [PATCH 473/606] ASoC: da7219-aad: fix usage of device_get_named_child_node() The documentation for device_get_named_child_node() mentions this important point: " The caller is responsible for calling fwnode_handle_put() on the returned fwnode pointer. " Add fwnode_handle_put() to avoid a leaked reference. Signed-off-by: Pierre-Louis Bossart Link: https://lore.kernel.org/r/20240426153033.38500-1-pierre-louis.bossart@linux.intel.com Signed-off-by: Mark Brown --- sound/soc/codecs/da7219-aad.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/sound/soc/codecs/da7219-aad.c b/sound/soc/codecs/da7219-aad.c index 6bc068cdcbe2a..15e5e3eb592b3 100644 --- a/sound/soc/codecs/da7219-aad.c +++ b/sound/soc/codecs/da7219-aad.c @@ -671,8 +671,10 @@ static struct da7219_aad_pdata *da7219_aad_fw_to_pdata(struct device *dev) return NULL; aad_pdata = devm_kzalloc(dev, sizeof(*aad_pdata), GFP_KERNEL); - if (!aad_pdata) + if (!aad_pdata) { + fwnode_handle_put(aad_np); return NULL; + } aad_pdata->irq = i2c->irq; @@ -753,6 +755,8 @@ static struct da7219_aad_pdata *da7219_aad_fw_to_pdata(struct device *dev) else aad_pdata->adc_1bit_rpt = DA7219_AAD_ADC_1BIT_RPT_1; + fwnode_handle_put(aad_np); + return aad_pdata; } From fbd741f0993203d07b2b6562d68d1e5e4745b59b Mon Sep 17 00:00:00 2001 From: Pierre-Louis Bossart Date: Fri, 26 Apr 2024 10:29:39 -0500 Subject: [PATCH 474/606] ASoC: cs35l56: fix usages of device_get_named_child_node() The documentation for device_get_named_child_node() mentions this important point: " The caller is responsible for calling fwnode_handle_put() on the returned fwnode pointer. " Add fwnode_handle_put() to avoid leaked references. Signed-off-by: Pierre-Louis Bossart Link: https://lore.kernel.org/r/20240426152939.38471-1-pierre-louis.bossart@linux.intel.com Signed-off-by: Mark Brown --- sound/soc/codecs/cs35l56.c | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/sound/soc/codecs/cs35l56.c b/sound/soc/codecs/cs35l56.c index 6331b8c6136ea..4986e78105daf 100644 --- a/sound/soc/codecs/cs35l56.c +++ b/sound/soc/codecs/cs35l56.c @@ -1360,6 +1360,7 @@ static int cs35l56_try_get_broken_sdca_spkid_gpio(struct cs35l56_private *cs35l5 "spk-id-gpios", ACPI_TYPE_PACKAGE, &obj); if (ret) { dev_dbg(cs35l56->base.dev, "Could not get spk-id-gpios package: %d\n", ret); + fwnode_handle_put(af01_fwnode); return -ENOENT; } @@ -1367,6 +1368,7 @@ static int cs35l56_try_get_broken_sdca_spkid_gpio(struct cs35l56_private *cs35l5 if (obj->package.count != 4) { dev_warn(cs35l56->base.dev, "Unexpected spk-id element count %d\n", obj->package.count); + fwnode_handle_put(af01_fwnode); return -ENOENT; } @@ -1381,6 +1383,7 @@ static int cs35l56_try_get_broken_sdca_spkid_gpio(struct cs35l56_private *cs35l5 */ ret = acpi_dev_add_driver_gpios(adev, cs35l56_af01_spkid_gpios_mapping); if (ret) { + fwnode_handle_put(af01_fwnode); return dev_err_probe(cs35l56->base.dev, ret, "Failed to add gpio mapping to AF01\n"); } @@ -1388,14 +1391,17 @@ static int cs35l56_try_get_broken_sdca_spkid_gpio(struct cs35l56_private *cs35l5 ret = devm_add_action_or_reset(cs35l56->base.dev, cs35l56_acpi_dev_release_driver_gpios, adev); - if (ret) + if (ret) { + fwnode_handle_put(af01_fwnode); return ret; + } dev_dbg(cs35l56->base.dev, "Added spk-id-gpios mapping to AF01\n"); } desc = fwnode_gpiod_get_index(af01_fwnode, "spk-id", 0, GPIOD_IN, NULL); if (IS_ERR(desc)) { + fwnode_handle_put(af01_fwnode); ret = PTR_ERR(desc); return dev_err_probe(cs35l56->base.dev, ret, "Get GPIO from AF01 failed\n"); } @@ -1404,9 +1410,12 @@ static int cs35l56_try_get_broken_sdca_spkid_gpio(struct cs35l56_private *cs35l5 gpiod_put(desc); if (ret < 0) { + fwnode_handle_put(af01_fwnode); dev_err_probe(cs35l56->base.dev, ret, "Error reading spk-id GPIO\n"); return ret; - } + } + + fwnode_handle_put(af01_fwnode); dev_info(cs35l56->base.dev, "Got spk-id from AF01\n"); From 79ac4c1443eaec0d09355307043a9149287f23c1 Mon Sep 17 00:00:00 2001 From: Pierre-Louis Bossart Date: Fri, 26 Apr 2024 10:28:18 -0500 Subject: [PATCH 475/606] ALSA: hda: intel-dsp-config: harden I2C/I2S codec detection MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The SOF driver is selected whenever specific I2C/I2S HIDs are reported as 'present' in the ACPI DSDT. In some cases, an HID is reported but the hardware does not actually rely on I2C/I2S. This false positive leads to an invalid selection of the SOF driver and as a result an invalid topology is loaded. This patch hardens the detection with a check that the NHLT table is consistent with the report of an I2S-based codec in DSDT. This table should expose at least one SSP endpoint configured for an I2S-codec connection. Tested on Huawei Matebook D14 (NBLB-WAX9N) using an HDaudio codec with an invalid ES8336 ACPI HID reported: [ 7.858249] snd_hda_intel 0000:00:1f.3: DSP detected with PCI class/subclass/prog-if info 0x040380 [ 7.858312] snd_hda_intel 0000:00:1f.3: snd_intel_dsp_find_config: no valid SSP found for HID ESSX8336, skipped Reported-by: Mauro Carvalho Chehab Tested-by: Mauro Carvalho Chehab Closes: https://github.com/thesofproject/linux/issues/4934 Signed-off-by: Pierre-Louis Bossart Reviewed-by: Bard Liao Reviewed-by: Péter Ujfalusi Message-ID: <20240426152818.38443-1-pierre-louis.bossart@linux.intel.com> Signed-off-by: Takashi Iwai --- sound/hda/intel-dsp-config.c | 27 +++++++++++++++++++++++++-- 1 file changed, 25 insertions(+), 2 deletions(-) diff --git a/sound/hda/intel-dsp-config.c b/sound/hda/intel-dsp-config.c index 6a384b922e4fa..d1f6cdcf1866e 100644 --- a/sound/hda/intel-dsp-config.c +++ b/sound/hda/intel-dsp-config.c @@ -557,9 +557,32 @@ static const struct config_entry *snd_intel_dsp_find_config if (table->codec_hid) { int i; - for (i = 0; i < table->codec_hid->num_codecs; i++) - if (acpi_dev_present(table->codec_hid->codecs[i], NULL, -1)) + for (i = 0; i < table->codec_hid->num_codecs; i++) { + struct nhlt_acpi_table *nhlt; + bool ssp_found = false; + + if (!acpi_dev_present(table->codec_hid->codecs[i], NULL, -1)) + continue; + + nhlt = intel_nhlt_init(&pci->dev); + if (!nhlt) { + dev_warn(&pci->dev, "%s: NHLT table not found, skipped HID %s\n", + __func__, table->codec_hid->codecs[i]); + continue; + } + + if (intel_nhlt_has_endpoint_type(nhlt, NHLT_LINK_SSP) && + intel_nhlt_ssp_endpoint_mask(nhlt, NHLT_DEVICE_I2S)) + ssp_found = true; + + intel_nhlt_free(nhlt); + + if (ssp_found) break; + + dev_warn(&pci->dev, "%s: no valid SSP found for HID %s, skipped\n", + __func__, table->codec_hid->codecs[i]); + } if (i == table->codec_hid->num_codecs) continue; } From 5af385f5f4cddf908f663974847a4083b2ff2c79 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Mon, 29 Apr 2024 15:47:51 +0100 Subject: [PATCH 476/606] bounds: Use the right number of bits for power-of-two CONFIG_NR_CPUS MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit bits_per() rounds up to the next power of two when passed a power of two. This causes crashes on some machines and configurations. Reported-by: Михаил Новоселов Tested-by: Ильфат Гаптрахманов Link: https://gitlab.freedesktop.org/drm/amd/-/issues/3347 Link: https://lore.kernel.org/all/1c978cf1-2934-4e66-e4b3-e81b04cb3571@rosalinux.ru/ Fixes: f2d5dcb48f7b (bounds: support non-power-of-two CONFIG_NR_CPUS) Cc: Signed-off-by: Matthew Wilcox (Oracle) Cc: Rik van Riel Cc: Mel Gorman Cc: Peter Zijlstra Cc: Ingo Molnar Cc: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/bounds.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/bounds.c b/kernel/bounds.c index c5a9fcd2d6228..29b2cd00df2cc 100644 --- a/kernel/bounds.c +++ b/kernel/bounds.c @@ -19,7 +19,7 @@ int main(void) DEFINE(NR_PAGEFLAGS, __NR_PAGEFLAGS); DEFINE(MAX_NR_ZONES, __MAX_NR_ZONES); #ifdef CONFIG_SMP - DEFINE(NR_CPUS_BITS, bits_per(CONFIG_NR_CPUS)); + DEFINE(NR_CPUS_BITS, order_base_2(CONFIG_NR_CPUS)); #endif DEFINE(SPINLOCK_SIZE, sizeof(spinlock_t)); #ifdef CONFIG_LRU_GEN From c158cf914713efc3bcdc25680c7156c48c12ef6a Mon Sep 17 00:00:00 2001 From: Pierre-Louis Bossart Date: Fri, 26 Apr 2024 10:27:31 -0500 Subject: [PATCH 477/606] ALSA: hda: intel-sdw-acpi: fix usage of device_get_named_child_node() The documentation for device_get_named_child_node() mentions this important point: " The caller is responsible for calling fwnode_handle_put() on the returned fwnode pointer. " Add fwnode_handle_put() to avoid a leaked reference. Signed-off-by: Pierre-Louis Bossart Fixes: 08c2a4bc9f2a ("ALSA: hda: move Intel SoundWire ACPI scan to dedicated module") Message-ID: <20240426152731.38420-1-pierre-louis.bossart@linux.intel.com> Signed-off-by: Takashi Iwai --- sound/hda/intel-sdw-acpi.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/sound/hda/intel-sdw-acpi.c b/sound/hda/intel-sdw-acpi.c index 5f60658c6051c..d7417a40392b2 100644 --- a/sound/hda/intel-sdw-acpi.c +++ b/sound/hda/intel-sdw-acpi.c @@ -45,6 +45,8 @@ static bool is_link_enabled(struct fwnode_handle *fw_node, u8 idx) "intel-quirk-mask", &quirk_mask); + fwnode_handle_put(link); + if (quirk_mask & SDW_INTEL_QUIRK_MASK_BUS_DISABLE) return false; From 5d211c7090590033581175d6405ae40917ca3a06 Mon Sep 17 00:00:00 2001 From: Dave Jiang Date: Fri, 26 Apr 2024 15:47:56 -0700 Subject: [PATCH 478/606] cxl: Fix cxl_endpoint_get_perf_coordinate() support for RCH Robert reported the following when booting a CXL host with Restricted CXL Host (RCH) topology: [ 39.815379] cxl_acpi ACPI0017:00: not a cxl_port device [ 39.827123] WARNING: CPU: 46 PID: 1754 at drivers/cxl/core/port.c:592 to_cxl_port+0x56/0x70 [cxl_core] ... plus some related subsequent NULL pointer dereference: [ 40.718708] BUG: kernel NULL pointer dereference, address: 00000000000002d8 The iterator to walk the PCIe path did not account for RCH topology. However RCH does not support hotplug and the memory exported by the Restricted CXL Device (RCD) should be covered by HMAT and therefore no access_coordinate is needed. Add check to see if the endpoint device is RCD and skip calculation. Also add a call to cxl_endpoint_get_perf_coordinates() in cxl_test in order to exercise the topology iterator. The dev_is_pci() check added is to help with this test and should be harmless for normal operation. Reported-by: Robert Richter Closes: https://lore.kernel.org/all/Ziv8GfSMSbvlBB0h@rric.localdomain/ Fixes: 592780b8391f ("cxl: Fix retrieving of access_coordinates in PCIe path") Reviewed-by: Dan Williams Tested-by: Robert Richter Reviewed-by: Robert Richter Link: https://lore.kernel.org/r/20240426224913.1027420-1-dave.jiang@intel.com Signed-off-by: Dave Jiang --- drivers/cxl/core/port.c | 15 ++++++++++++++- tools/testing/cxl/test/cxl.c | 7 +++++++ 2 files changed, 21 insertions(+), 1 deletion(-) diff --git a/drivers/cxl/core/port.c b/drivers/cxl/core/port.c index 762783bb091af..887ed6e358fb9 100644 --- a/drivers/cxl/core/port.c +++ b/drivers/cxl/core/port.c @@ -2184,6 +2184,7 @@ static bool parent_port_is_cxl_root(struct cxl_port *port) int cxl_endpoint_get_perf_coordinates(struct cxl_port *port, struct access_coordinate *coord) { + struct cxl_memdev *cxlmd = to_cxl_memdev(port->uport_dev); struct access_coordinate c[] = { { .read_bandwidth = UINT_MAX, @@ -2197,12 +2198,20 @@ int cxl_endpoint_get_perf_coordinates(struct cxl_port *port, struct cxl_port *iter = port; struct cxl_dport *dport; struct pci_dev *pdev; + struct device *dev; unsigned int bw; bool is_cxl_root; if (!is_cxl_endpoint(port)) return -EINVAL; + /* + * Skip calculation for RCD. Expectation is HMAT already covers RCD case + * since RCH does not support hotplug. + */ + if (cxlmd->cxlds->rcd) + return 0; + /* * Exit the loop when the parent port of the current iter port is cxl * root. The iterative loop starts at the endpoint and gathers the @@ -2232,8 +2241,12 @@ int cxl_endpoint_get_perf_coordinates(struct cxl_port *port, return -EINVAL; cxl_coordinates_combine(c, c, dport->coord); + dev = port->uport_dev->parent; + if (!dev_is_pci(dev)) + return -ENODEV; + /* Get the calculated PCI paths bandwidth */ - pdev = to_pci_dev(port->uport_dev->parent); + pdev = to_pci_dev(dev); bw = pcie_bandwidth_available(pdev, NULL, NULL, NULL); if (bw == 0) return -ENXIO; diff --git a/tools/testing/cxl/test/cxl.c b/tools/testing/cxl/test/cxl.c index 61c69297e7978..3482248aa3442 100644 --- a/tools/testing/cxl/test/cxl.c +++ b/tools/testing/cxl/test/cxl.c @@ -1001,6 +1001,7 @@ static void mock_cxl_endpoint_parse_cdat(struct cxl_port *port) struct cxl_memdev *cxlmd = to_cxl_memdev(port->uport_dev); struct cxl_dev_state *cxlds = cxlmd->cxlds; struct cxl_memdev_state *mds = to_cxl_memdev_state(cxlds); + struct access_coordinate ep_c[ACCESS_COORDINATE_MAX]; struct range pmem_range = { .start = cxlds->pmem_res.start, .end = cxlds->pmem_res.end, @@ -1020,6 +1021,12 @@ static void mock_cxl_endpoint_parse_cdat(struct cxl_port *port) dpa_perf_setup(port, &pmem_range, &mds->pmem_perf); cxl_memdev_update_perf(cxlmd); + + /* + * This function is here to only test the topology iterator. It serves + * no other purpose. + */ + cxl_endpoint_get_perf_coordinates(port, ep_c); } static struct cxl_mock_ops cxl_mock_ops = { From da7c622cddd4fe36be69ca61e8c42e43cde94784 Mon Sep 17 00:00:00 2001 From: Bui Quang Minh Date: Wed, 24 Apr 2024 21:44:22 +0700 Subject: [PATCH 479/606] s390/cio: Ensure the copied buf is NUL terminated Currently, we allocate a lbuf-sized kernel buffer and copy lbuf from userspace to that buffer. Later, we use scanf on this buffer but we don't ensure that the string is terminated inside the buffer, this can lead to OOB read when using scanf. Fix this issue by using memdup_user_nul instead. Fixes: a4f17cc72671 ("s390/cio: add CRW inject functionality") Signed-off-by: Bui Quang Minh Reviewed-by: Heiko Carstens Link: https://lore.kernel.org/r/20240424-fix-oob-read-v2-5-f1f1b53a10f4@gmail.com Signed-off-by: Alexander Gordeev --- drivers/s390/cio/cio_inject.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/s390/cio/cio_inject.c b/drivers/s390/cio/cio_inject.c index 8613fa937237b..a2e771ebae8eb 100644 --- a/drivers/s390/cio/cio_inject.c +++ b/drivers/s390/cio/cio_inject.c @@ -95,7 +95,7 @@ static ssize_t crw_inject_write(struct file *file, const char __user *buf, return -EINVAL; } - buffer = vmemdup_user(buf, lbuf); + buffer = memdup_user_nul(buf, lbuf); if (IS_ERR(buffer)) return -ENOMEM; From 720a22fd6c1cdadf691281909950c0cbc5cdf17e Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Fri, 26 Apr 2024 00:30:36 +0200 Subject: [PATCH 480/606] x86/apic: Don't access the APIC when disabling x2APIC With 'iommu=off' on the kernel command line and x2APIC enabled by the BIOS the code which disables the x2APIC triggers an unchecked MSR access error: RDMSR from 0x802 at rIP: 0xffffffff94079992 (native_apic_msr_read+0x12/0x50) This is happens because default_acpi_madt_oem_check() selects an x2APIC driver before the x2APIC is disabled. When the x2APIC is disabled because interrupt remapping cannot be enabled due to 'iommu=off' on the command line, x2apic_disable() invokes apic_set_fixmap() which in turn tries to read the APIC ID. This triggers the MSR warning because x2APIC is disabled, but the APIC driver is still x2APIC based. Prevent that by adding an argument to apic_set_fixmap() which makes the APIC ID read out conditional and set it to false from the x2APIC disable path. That's correct as the APIC ID has already been read out during early discovery. Fixes: d10a904435fa ("x86/apic: Consolidate boot_cpu_physical_apicid initialization sites") Reported-by: Adrian Huang Signed-off-by: Thomas Gleixner Signed-off-by: Borislav Petkov (AMD) Signed-off-by: Ingo Molnar Tested-by: Adrian Huang Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/875xw5t6r7.ffs@tglx --- arch/x86/kernel/apic/apic.c | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c index c342c4aa9c684..803dcfb0e3469 100644 --- a/arch/x86/kernel/apic/apic.c +++ b/arch/x86/kernel/apic/apic.c @@ -1771,7 +1771,7 @@ void x2apic_setup(void) __x2apic_enable(); } -static __init void apic_set_fixmap(void); +static __init void apic_set_fixmap(bool read_apic); static __init void x2apic_disable(void) { @@ -1793,7 +1793,12 @@ static __init void x2apic_disable(void) } __x2apic_disable(); - apic_set_fixmap(); + /* + * Don't reread the APIC ID as it was already done from + * check_x2apic() and the APIC driver still is a x2APIC variant, + * which fails to do the read after x2APIC was disabled. + */ + apic_set_fixmap(false); } static __init void x2apic_enable(void) @@ -2057,13 +2062,14 @@ void __init init_apic_mappings(void) } } -static __init void apic_set_fixmap(void) +static __init void apic_set_fixmap(bool read_apic) { set_fixmap_nocache(FIX_APIC_BASE, mp_lapic_addr); apic_mmio_base = APIC_BASE; apic_printk(APIC_VERBOSE, "mapped APIC to %16lx (%16lx)\n", apic_mmio_base, mp_lapic_addr); - apic_read_boot_cpu_id(false); + if (read_apic) + apic_read_boot_cpu_id(false); } void __init register_lapic_address(unsigned long address) @@ -2073,7 +2079,7 @@ void __init register_lapic_address(unsigned long address) mp_lapic_addr = address; if (!x2apic_mode) - apic_set_fixmap(); + apic_set_fixmap(true); } /* From 00e7d3bea2ce7dac7bee1cf501fb071fd0ea8f6c Mon Sep 17 00:00:00 2001 From: Jim Cromie Date: Mon, 29 Apr 2024 13:31:11 -0600 Subject: [PATCH 481/606] dyndbg: fix old BUG_ON in >control parser Fix a BUG_ON from 2009. Even if it looks "unreachable" (I didn't really look), lets make sure by removing it, doing pr_err and return -EINVAL instead. Cc: stable Signed-off-by: Jim Cromie Link: https://lore.kernel.org/r/20240429193145.66543-2-jim.cromie@gmail.com Signed-off-by: Greg Kroah-Hartman --- lib/dynamic_debug.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/lib/dynamic_debug.c b/lib/dynamic_debug.c index c78f335fa9813..f2c5e7910bb19 100644 --- a/lib/dynamic_debug.c +++ b/lib/dynamic_debug.c @@ -302,7 +302,11 @@ static int ddebug_tokenize(char *buf, char *words[], int maxwords) } else { for (end = buf; *end && !isspace(*end); end++) ; - BUG_ON(end == buf); + if (end == buf) { + pr_err("parse err after word:%d=%s\n", nwords, + nwords ? words[nwords - 1] : ""); + return -EINVAL; + } } /* `buf' is start of word, `end' is one past its end */ From 63a6ce5a1a6261e4c70bad2b55c4e0de8da4762e Mon Sep 17 00:00:00 2001 From: Qu Wenruo Date: Tue, 16 Apr 2024 08:07:00 +0930 Subject: [PATCH 482/606] btrfs: set correct ram_bytes when splitting ordered extent [BUG] When running generic/287, the following file extent items can be generated: item 16 key (258 EXTENT_DATA 2682880) itemoff 15305 itemsize 53 generation 9 type 1 (regular) extent data disk byte 1378414592 nr 462848 extent data offset 0 nr 462848 ram 2097152 extent compression 0 (none) Note that file extent item is not a compressed one, but its ram_bytes is way larger than its disk_num_bytes. According to btrfs on-disk scheme, ram_bytes should match disk_num_bytes if it's not a compressed one. [CAUSE] Since commit b73a6fd1b1ef ("btrfs: split partial dio bios before submit"), for partial dio writes, we would split the ordered extent. However the function btrfs_split_ordered_extent() doesn't update the ram_bytes even it has already shrunk the disk_num_bytes. Originally the function btrfs_split_ordered_extent() is only introduced for zoned devices in commit d22002fd37bd ("btrfs: zoned: split ordered extent when bio is sent"), but later commit b73a6fd1b1ef ("btrfs: split partial dio bios before submit") makes non-zoned btrfs affected. Thankfully for un-compressed file extent, we do not really utilize the ram_bytes member, thus it won't cause any real problem. [FIX] Also update btrfs_ordered_extent::ram_bytes inside btrfs_split_ordered_extent(). Fixes: d22002fd37bd ("btrfs: zoned: split ordered extent when bio is sent") CC: stable@vger.kernel.org # 5.15+ Reviewed-by: Filipe Manana Signed-off-by: Qu Wenruo Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/ordered-data.c | 1 + 1 file changed, 1 insertion(+) diff --git a/fs/btrfs/ordered-data.c b/fs/btrfs/ordered-data.c index b749ba45da2ba..c2a42bcde98e0 100644 --- a/fs/btrfs/ordered-data.c +++ b/fs/btrfs/ordered-data.c @@ -1188,6 +1188,7 @@ struct btrfs_ordered_extent *btrfs_split_ordered_extent( ordered->disk_bytenr += len; ordered->num_bytes -= len; ordered->disk_num_bytes -= len; + ordered->ram_bytes -= len; if (test_bit(BTRFS_ORDERED_IO_DONE, &ordered->flags)) { ASSERT(ordered->bytes_left == 0); From fe81f354841641c7f71163b84912b25c169ed8ec Mon Sep 17 00:00:00 2001 From: Guenter Roeck Date: Mon, 29 Apr 2024 08:40:10 -0700 Subject: [PATCH 483/606] usb: ohci: Prevent missed ohci interrupts Testing ohci functionality with qemu's pci-ohci emulation often results in ohci interface stalls, resulting in hung task timeouts. The problem is caused by lost interrupts between the emulation and the Linux kernel code. Additional interrupts raised while the ohci interrupt handler in Linux is running and before the handler clears the interrupt status are not handled. The fix for a similar problem in ehci suggests that the problem is likely caused by edge-triggered MSI interrupts. See commit 0b60557230ad ("usb: ehci: Prevent missed ehci interrupts with edge-triggered MSI") for details. Ensure that the ohci interrupt code handles all pending interrupts before returning to solve the problem. Cc: Gerd Hoffmann Cc: David Laight Cc: stable@vger.kernel.org Fixes: 306c54d0edb6 ("usb: hcd: Try MSI interrupts on PCI devices") Signed-off-by: Guenter Roeck Reviewed-by: Alan Stern Reviewed-by: Gerd Hoffmann Link: https://lore.kernel.org/r/20240429154010.1507366-1-linux@roeck-us.net Signed-off-by: Greg Kroah-Hartman --- drivers/usb/host/ohci-hcd.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/drivers/usb/host/ohci-hcd.c b/drivers/usb/host/ohci-hcd.c index 4f9982ecfb583..5cec7640e913c 100644 --- a/drivers/usb/host/ohci-hcd.c +++ b/drivers/usb/host/ohci-hcd.c @@ -888,6 +888,7 @@ static irqreturn_t ohci_irq (struct usb_hcd *hcd) /* Check for an all 1's result which is a typical consequence * of dead, unclocked, or unplugged (CardBus...) devices */ +again: if (ints == ~(u32)0) { ohci->rh_state = OHCI_RH_HALTED; ohci_dbg (ohci, "device removed!\n"); @@ -982,6 +983,13 @@ static irqreturn_t ohci_irq (struct usb_hcd *hcd) } spin_unlock(&ohci->lock); + /* repeat until all enabled interrupts are handled */ + if (ohci->rh_state != OHCI_RH_HALTED) { + ints = ohci_readl(ohci, ®s->intrstatus); + if (ints && (ints & ohci_readl(ohci, ®s->intrenable))) + goto again; + } + return IRQ_HANDLED; } From c78c3644b772e356ca452ae733a3c4de0fb11dc8 Mon Sep 17 00:00:00 2001 From: Alan Stern Date: Tue, 30 Apr 2024 10:33:48 -0400 Subject: [PATCH 484/606] usb: Fix regression caused by invalid ep0 maxpacket in virtual SuperSpeed device A virtual SuperSpeed device in the FreeBSD BVCP package (https://bhyve.npulse.net/) presents an invalid ep0 maxpacket size of 256. It stopped working with Linux following a recent commit because now we check these sizes more carefully than before. Fix this regression by using the bMaxpacketSize0 value in the device descriptor for SuperSpeed or faster devices, even if it is invalid. This is a very simple-minded change; we might want to check more carefully for values that actually make some sense (for instance, no smaller than 64). Signed-off-by: Alan Stern Reported-and-tested-by: Roger Whittaker Closes: https://bugzilla.suse.com/show_bug.cgi?id=1220569 Link: https://lore.kernel.org/linux-usb/9efbd569-7059-4575-983f-0ea30df41871@suse.com/ Fixes: 59cf44575456 ("USB: core: Fix oversight in SuperSpeed initialization") Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/4058ac05-237c-4db4-9ecc-5af42bdb4501@rowland.harvard.edu Signed-off-by: Greg Kroah-Hartman --- drivers/usb/core/hub.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/usb/core/hub.c b/drivers/usb/core/hub.c index 9446660e231bb..008053039875a 100644 --- a/drivers/usb/core/hub.c +++ b/drivers/usb/core/hub.c @@ -5110,9 +5110,10 @@ hub_port_init(struct usb_hub *hub, struct usb_device *udev, int port1, } if (usb_endpoint_maxp(&udev->ep0.desc) == i) { ; /* Initial ep0 maxpacket guess is right */ - } else if ((udev->speed == USB_SPEED_FULL || + } else if (((udev->speed == USB_SPEED_FULL || udev->speed == USB_SPEED_HIGH) && - (i == 8 || i == 16 || i == 32 || i == 64)) { + (i == 8 || i == 16 || i == 32 || i == 64)) || + (udev->speed >= USB_SPEED_SUPER && i > 0)) { /* Initial guess is wrong; use the descriptor's value */ if (udev->speed == USB_SPEED_FULL) dev_dbg(&udev->dev, "ep0 maxpacket = %d\n", i); From 1e707769df072757bdcafab158bb159ead73daa4 Mon Sep 17 00:00:00 2001 From: Kailang Yang Date: Tue, 30 Apr 2024 17:15:53 +0800 Subject: [PATCH 485/606] ALSA: hda/realtek - Set GPIO3 to default at S4 state for Thinkpad with ALC1318 There is a chance of damaging the IC when S4 resume. Add safe mode for no stream to disable GPIO3. Thinkpad with ALC1318 platform need to add this workaround. Signed-off-by: Kailang Yang Link: https://lore.kernel.org/r/a853dc4f0a4e412381d5f60565181247@realtek.com Signed-off-by: Takashi Iwai --- sound/pci/hda/patch_realtek.c | 51 ++++++++++++++++++++++++++++++++++- 1 file changed, 50 insertions(+), 1 deletion(-) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index e770133097954..b96c6863f2724 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -920,6 +920,8 @@ static void alc_pre_init(struct hda_codec *codec) ((codec)->core.dev.power.power_state.event == PM_EVENT_RESUME) #define is_s4_resume(codec) \ ((codec)->core.dev.power.power_state.event == PM_EVENT_RESTORE) +#define is_s4_suspend(codec) \ + ((codec)->core.dev.power.power_state.event == PM_EVENT_FREEZE) static int alc_init(struct hda_codec *codec) { @@ -7183,6 +7185,44 @@ static void alc245_fixup_hp_spectre_x360_eu0xxx(struct hda_codec *codec, alc245_fixup_hp_gpio_led(codec, fix, action); } +/* + * ALC287 PCM hooks + */ +static void alc287_alc1318_playback_pcm_hook(struct hda_pcm_stream *hinfo, + struct hda_codec *codec, + struct snd_pcm_substream *substream, + int action) +{ + alc_write_coef_idx(codec, 0x10, 0x8806); /* Change MLK to GPIO3 */ + switch (action) { + case HDA_GEN_PCM_ACT_OPEN: + alc_write_coefex_idx(codec, 0x5a, 0x00, 0x954f); /* write gpio3 to high */ + break; + case HDA_GEN_PCM_ACT_CLOSE: + alc_write_coefex_idx(codec, 0x5a, 0x00, 0x554f); /* write gpio3 as default value */ + break; + } +} + +static void alc287_s4_power_gpio3_default(struct hda_codec *codec) +{ + if (is_s4_suspend(codec)) { + alc_write_coef_idx(codec, 0x10, 0x8806); /* Change MLK to GPIO3 */ + alc_write_coefex_idx(codec, 0x5a, 0x00, 0x554f); /* write gpio3 as default value */ + } +} + +static void alc287_fixup_lenovo_thinkpad_with_alc1318(struct hda_codec *codec, + const struct hda_fixup *fix, int action) +{ + struct alc_spec *spec = codec->spec; + + if (action != HDA_FIXUP_ACT_PRE_PROBE) + return; + spec->power_hook = alc287_s4_power_gpio3_default; + spec->gen.pcm_playback_hook = alc287_alc1318_playback_pcm_hook; +} + enum { ALC269_FIXUP_GPIO2, @@ -7470,7 +7510,8 @@ enum { ALC285_FIXUP_ASUS_GA403U_HEADSET_MIC, ALC285_FIXUP_ASUS_GA403U_I2C_SPEAKER2_TO_DAC1, ALC285_FIXUP_ASUS_GU605_SPI_2_HEADSET_MIC, - ALC285_FIXUP_ASUS_GU605_SPI_SPEAKER2_TO_DAC1 + ALC285_FIXUP_ASUS_GU605_SPI_SPEAKER2_TO_DAC1, + ALC287_FIXUP_LENOVO_THKPAD_WH_ALC1318, }; /* A special fixup for Lenovo C940 and Yoga Duet 7; @@ -9726,6 +9767,12 @@ static const struct hda_fixup alc269_fixups[] = { .chained = true, .chain_id = ALC285_FIXUP_ASUS_GA403U, }, + [ALC287_FIXUP_LENOVO_THKPAD_WH_ALC1318] = { + .type = HDA_FIXUP_FUNC, + .v.func = alc287_fixup_lenovo_thinkpad_with_alc1318, + .chained = true, + .chain_id = ALC269_FIXUP_THINKPAD_ACPI + }, }; static const struct snd_pci_quirk alc269_fixup_tbl[] = { @@ -10394,6 +10441,8 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x17aa, 0x2318, "Thinkpad Z13 Gen2", ALC287_FIXUP_MG_RTKC_CSAMP_CS35L41_I2C_THINKPAD), SND_PCI_QUIRK(0x17aa, 0x2319, "Thinkpad Z16 Gen2", ALC287_FIXUP_MG_RTKC_CSAMP_CS35L41_I2C_THINKPAD), SND_PCI_QUIRK(0x17aa, 0x231a, "Thinkpad Z16 Gen2", ALC287_FIXUP_MG_RTKC_CSAMP_CS35L41_I2C_THINKPAD), + SND_PCI_QUIRK(0x17aa, 0x231e, "Thinkpad", ALC287_FIXUP_LENOVO_THKPAD_WH_ALC1318), + SND_PCI_QUIRK(0x17aa, 0x231f, "Thinkpad", ALC287_FIXUP_LENOVO_THKPAD_WH_ALC1318), SND_PCI_QUIRK(0x17aa, 0x30bb, "ThinkCentre AIO", ALC233_FIXUP_LENOVO_LINE2_MIC_HOTKEY), SND_PCI_QUIRK(0x17aa, 0x30e2, "ThinkCentre AIO", ALC233_FIXUP_LENOVO_LINE2_MIC_HOTKEY), SND_PCI_QUIRK(0x17aa, 0x310c, "ThinkCentre Station", ALC294_FIXUP_LENOVO_MIC_LOCATION), From 39815cdfc8d46ce2c72cbf2aa3d991c4bfb0024f Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Tue, 30 Apr 2024 18:32:04 +0200 Subject: [PATCH 486/606] ALSA: hda/realtek: Fix conflicting PCI SSID 17aa:386f for Lenovo Legion models Unfortunately both Lenovo Legion Pro 7 16ARX8H and Legion 7i 16IAX7 got the very same PCI SSID while the hardware implementations are completely different (the former is with TI TAS2781 codec while the latter is with Cirrus CS35L41 codec). The former model got broken by the recent fix for the latter model. For addressing the regression, check the codec SSID and apply the proper quirk for each model now. Fixes: 24b6332c2d4f ("ALSA: hda: Add Lenovo Legion 7i gen7 sound quirk") Cc: Link: https://bugzilla.suse.com/show_bug.cgi?id=1223462 Message-ID: <20240430163206.5200-1-tiwai@suse.de> Signed-off-by: Takashi Iwai --- sound/pci/hda/patch_realtek.c | 24 +++++++++++++++++++++++- 1 file changed, 23 insertions(+), 1 deletion(-) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index b96c6863f2724..e704425788ebf 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -7466,6 +7466,7 @@ enum { ALC287_FIXUP_YOGA7_14ITL_SPEAKERS, ALC298_FIXUP_LENOVO_C940_DUET7, ALC287_FIXUP_LENOVO_14IRP8_DUETITL, + ALC287_FIXUP_LENOVO_LEGION_7, ALC287_FIXUP_13S_GEN2_SPEAKERS, ALC256_FIXUP_SET_COEF_DEFAULTS, ALC256_FIXUP_SYSTEM76_MIC_NO_PRESENCE, @@ -7551,6 +7552,23 @@ static void alc287_fixup_lenovo_14irp8_duetitl(struct hda_codec *codec, __snd_hda_apply_fixup(codec, id, action, 0); } +/* Another hilarious PCI SSID conflict with Lenovo Legion Pro 7 16ARX8H (with + * TAS2781 codec) and Legion 7i 16IAX7 (with CS35L41 codec); + * we apply a corresponding fixup depending on the codec SSID instead + */ +static void alc287_fixup_lenovo_legion_7(struct hda_codec *codec, + const struct hda_fixup *fix, + int action) +{ + int id; + + if (codec->core.subsystem_id == 0x17aa38a8) + id = ALC287_FIXUP_TAS2781_I2C; /* Legion Pro 7 16ARX8H */ + else + id = ALC287_FIXUP_CS35L41_I2C_2; /* Legion 7i 16IAX7 */ + __snd_hda_apply_fixup(codec, id, action, 0); +} + static const struct hda_fixup alc269_fixups[] = { [ALC269_FIXUP_GPIO2] = { .type = HDA_FIXUP_FUNC, @@ -9445,6 +9463,10 @@ static const struct hda_fixup alc269_fixups[] = { .type = HDA_FIXUP_FUNC, .v.func = alc287_fixup_lenovo_14irp8_duetitl, }, + [ALC287_FIXUP_LENOVO_LEGION_7] = { + .type = HDA_FIXUP_FUNC, + .v.func = alc287_fixup_lenovo_legion_7, + }, [ALC287_FIXUP_13S_GEN2_SPEAKERS] = { .type = HDA_FIXUP_VERBS, .v.verbs = (const struct hda_verb[]) { @@ -10472,7 +10494,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x17aa, 0x3853, "Lenovo Yoga 7 15ITL5", ALC287_FIXUP_YOGA7_14ITL_SPEAKERS), SND_PCI_QUIRK(0x17aa, 0x3855, "Legion 7 16ITHG6", ALC287_FIXUP_LEGION_16ITHG6), SND_PCI_QUIRK(0x17aa, 0x3869, "Lenovo Yoga7 14IAL7", ALC287_FIXUP_YOGA9_14IAP7_BASS_SPK_PIN), - SND_PCI_QUIRK(0x17aa, 0x386f, "Legion 7i 16IAX7", ALC287_FIXUP_CS35L41_I2C_2), + SND_PCI_QUIRK(0x17aa, 0x386f, "Legion Pro 7/7i", ALC287_FIXUP_LENOVO_LEGION_7), SND_PCI_QUIRK(0x17aa, 0x3870, "Lenovo Yoga 7 14ARB7", ALC287_FIXUP_YOGA7_14ARB7_I2C), SND_PCI_QUIRK(0x17aa, 0x3877, "Lenovo Legion 7 Slim 16ARHA7", ALC287_FIXUP_CS35L41_I2C_2), SND_PCI_QUIRK(0x17aa, 0x3878, "Lenovo Legion 7 Slim 16ARHA7", ALC287_FIXUP_CS35L41_I2C_2), From 52a6947bf576b97ff8e14bb0a31c5eaf2d0d96e2 Mon Sep 17 00:00:00 2001 From: Lyude Paul Date: Mon, 29 Apr 2024 14:23:08 -0400 Subject: [PATCH 487/606] drm/nouveau/firmware: Fix SG_DEBUG error with nvkm_firmware_ctor() Currently, enabling SG_DEBUG in the kernel will cause nouveau to hit a BUG() on startup: kernel BUG at include/linux/scatterlist.h:187! invalid opcode: 0000 [#1] PREEMPT SMP NOPTI CPU: 7 PID: 930 Comm: (udev-worker) Not tainted 6.9.0-rc3Lyude-Test+ #30 Hardware name: MSI MS-7A39/A320M GAMING PRO (MS-7A39), BIOS 1.I0 01/22/2019 RIP: 0010:sg_init_one+0x85/0xa0 Code: 69 88 32 01 83 e1 03 f6 c3 03 75 20 a8 01 75 1e 48 09 cb 41 89 54 24 08 49 89 1c 24 41 89 6c 24 0c 5b 5d 41 5c e9 7b b9 88 00 <0f> 0b 0f 0b 0f 0b 48 8b 05 5e 46 9a 01 eb b2 66 66 2e 0f 1f 84 00 RSP: 0018:ffffa776017bf6a0 EFLAGS: 00010246 RAX: 0000000000000000 RBX: ffffa77600d87000 RCX: 000000000000002b RDX: 0000000000000001 RSI: 0000000000000000 RDI: ffffa77680d87000 RBP: 000000000000e000 R08: 0000000000000000 R09: 0000000000000000 R10: ffff98f4c46aa508 R11: 0000000000000000 R12: ffff98f4c46aa508 R13: ffff98f4c46aa008 R14: ffffa77600d4a000 R15: ffffa77600d4a018 FS: 00007feeb5aae980(0000) GS:ffff98f5c4dc0000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 00007f22cb9a4520 CR3: 00000001043ba000 CR4: 00000000003506f0 Call Trace: ? die+0x36/0x90 ? do_trap+0xdd/0x100 ? sg_init_one+0x85/0xa0 ? do_error_trap+0x65/0x80 ? sg_init_one+0x85/0xa0 ? exc_invalid_op+0x50/0x70 ? sg_init_one+0x85/0xa0 ? asm_exc_invalid_op+0x1a/0x20 ? sg_init_one+0x85/0xa0 nvkm_firmware_ctor+0x14a/0x250 [nouveau] nvkm_falcon_fw_ctor+0x42/0x70 [nouveau] ga102_gsp_booter_ctor+0xb4/0x1a0 [nouveau] r535_gsp_oneinit+0xb3/0x15f0 [nouveau] ? srso_return_thunk+0x5/0x5f ? srso_return_thunk+0x5/0x5f ? nvkm_udevice_new+0x95/0x140 [nouveau] ? srso_return_thunk+0x5/0x5f ? srso_return_thunk+0x5/0x5f ? ktime_get+0x47/0xb0 ? srso_return_thunk+0x5/0x5f nvkm_subdev_oneinit_+0x4f/0x120 [nouveau] nvkm_subdev_init_+0x39/0x140 [nouveau] ? srso_return_thunk+0x5/0x5f nvkm_subdev_init+0x44/0x90 [nouveau] nvkm_device_init+0x166/0x2e0 [nouveau] nvkm_udevice_init+0x47/0x70 [nouveau] nvkm_object_init+0x41/0x1c0 [nouveau] nvkm_ioctl_new+0x16a/0x290 [nouveau] ? __pfx_nvkm_client_child_new+0x10/0x10 [nouveau] ? __pfx_nvkm_udevice_new+0x10/0x10 [nouveau] nvkm_ioctl+0x126/0x290 [nouveau] nvif_object_ctor+0x112/0x190 [nouveau] nvif_device_ctor+0x23/0x60 [nouveau] nouveau_cli_init+0x164/0x640 [nouveau] nouveau_drm_device_init+0x97/0x9e0 [nouveau] ? srso_return_thunk+0x5/0x5f ? pci_update_current_state+0x72/0xb0 ? srso_return_thunk+0x5/0x5f nouveau_drm_probe+0x12c/0x280 [nouveau] ? srso_return_thunk+0x5/0x5f local_pci_probe+0x45/0xa0 pci_device_probe+0xc7/0x270 really_probe+0xe6/0x3a0 __driver_probe_device+0x87/0x160 driver_probe_device+0x1f/0xc0 __driver_attach+0xec/0x1f0 ? __pfx___driver_attach+0x10/0x10 bus_for_each_dev+0x88/0xd0 bus_add_driver+0x116/0x220 driver_register+0x59/0x100 ? __pfx_nouveau_drm_init+0x10/0x10 [nouveau] do_one_initcall+0x5b/0x320 do_init_module+0x60/0x250 init_module_from_file+0x86/0xc0 idempotent_init_module+0x120/0x2b0 __x64_sys_finit_module+0x5e/0xb0 do_syscall_64+0x83/0x160 ? srso_return_thunk+0x5/0x5f entry_SYSCALL_64_after_hwframe+0x71/0x79 RIP: 0033:0x7feeb5cc20cd Code: ff c3 66 2e 0f 1f 84 00 00 00 00 00 90 f3 0f 1e fa 48 89 f8 48 89 f7 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 f0 ff ff 73 01 c3 48 8b 0d 1b cd 0c 00 f7 d8 64 89 01 48 RSP: 002b:00007ffcf220b2c8 EFLAGS: 00000246 ORIG_RAX: 0000000000000139 RAX: ffffffffffffffda RBX: 000055fdd2916aa0 RCX: 00007feeb5cc20cd RDX: 0000000000000000 RSI: 000055fdd29161e0 RDI: 0000000000000035 RBP: 00007ffcf220b380 R08: 00007feeb5d8fb20 R09: 00007ffcf220b310 R10: 000055fdd2909dc0 R11: 0000000000000246 R12: 000055fdd29161e0 R13: 0000000000020000 R14: 000055fdd29203e0 R15: 000055fdd2909d80 We hit this when trying to initialize firmware of type NVKM_FIRMWARE_IMG_DMA because we allocate our memory with dma_alloc_coherent, and DMA allocations can't be turned back into memory pages - which a scatterlist needs in order to map them. So, fix this by allocating the memory with vmalloc instead(). V2: * Fixup explanation as the prior one was bogus Signed-off-by: Lyude Paul Reviewed-by: Dave Airlie Cc: stable@vger.kernel.org Link: https://patchwork.freedesktop.org/patch/msgid/20240429182318.189668-1-lyude@redhat.com --- drivers/gpu/drm/nouveau/nvkm/core/firmware.c | 19 ++++++++++++------- 1 file changed, 12 insertions(+), 7 deletions(-) diff --git a/drivers/gpu/drm/nouveau/nvkm/core/firmware.c b/drivers/gpu/drm/nouveau/nvkm/core/firmware.c index adc60b25f8e6c..141b0a513bf52 100644 --- a/drivers/gpu/drm/nouveau/nvkm/core/firmware.c +++ b/drivers/gpu/drm/nouveau/nvkm/core/firmware.c @@ -205,7 +205,9 @@ nvkm_firmware_dtor(struct nvkm_firmware *fw) break; case NVKM_FIRMWARE_IMG_DMA: nvkm_memory_unref(&memory); - dma_free_coherent(fw->device->dev, sg_dma_len(&fw->mem.sgl), fw->img, fw->phys); + dma_unmap_single(fw->device->dev, fw->phys, sg_dma_len(&fw->mem.sgl), + DMA_TO_DEVICE); + kfree(fw->img); break; case NVKM_FIRMWARE_IMG_SGT: nvkm_memory_unref(&memory); @@ -235,14 +237,17 @@ nvkm_firmware_ctor(const struct nvkm_firmware_func *func, const char *name, fw->img = kmemdup(src, fw->len, GFP_KERNEL); break; case NVKM_FIRMWARE_IMG_DMA: { - dma_addr_t addr; - len = ALIGN(fw->len, PAGE_SIZE); - fw->img = dma_alloc_coherent(fw->device->dev, len, &addr, GFP_KERNEL); - if (fw->img) { - memcpy(fw->img, src, fw->len); - fw->phys = addr; + fw->img = kmalloc(len, GFP_KERNEL); + if (!fw->img) + return -ENOMEM; + + memcpy(fw->img, src, fw->len); + fw->phys = dma_map_single(fw->device->dev, fw->img, len, DMA_TO_DEVICE); + if (dma_mapping_error(fw->device->dev, fw->phys)) { + kfree(fw->img); + return -EFAULT; } sg_init_one(&fw->mem.sgl, fw->img, len); From 6f572a80545773833f00c9a65e9242ab6fedb192 Mon Sep 17 00:00:00 2001 From: Lyude Paul Date: Mon, 29 Apr 2024 14:23:09 -0400 Subject: [PATCH 488/606] drm/nouveau/gsp: Use the sg allocator for level 2 of radix3 Currently we allocate all 3 levels of radix3 page tables using nvkm_gsp_mem_ctor(), which uses dma_alloc_coherent() for allocating all of the relevant memory. This can end up failing in scenarios where the system has very high memory fragmentation, and we can't find enough contiguous memory to allocate level 2 of the page table. Currently, this can result in runtime PM issues on systems where memory fragmentation is high - as we'll fail to allocate the page table for our suspend/resume buffer: kworker/10:2: page allocation failure: order:7, mode:0xcc0(GFP_KERNEL), nodemask=(null),cpuset=/,mems_allowed=0 CPU: 10 PID: 479809 Comm: kworker/10:2 Not tainted 6.8.6-201.ChopperV6.fc39.x86_64 #1 Hardware name: SLIMBOOK Executive/Executive, BIOS N.1.10GRU06 02/02/2024 Workqueue: pm pm_runtime_work Call Trace: dump_stack_lvl+0x64/0x80 warn_alloc+0x165/0x1e0 ? __alloc_pages_direct_compact+0xb3/0x2b0 __alloc_pages_slowpath.constprop.0+0xd7d/0xde0 __alloc_pages+0x32d/0x350 __dma_direct_alloc_pages.isra.0+0x16a/0x2b0 dma_direct_alloc+0x70/0x270 nvkm_gsp_radix3_sg+0x5e/0x130 [nouveau] r535_gsp_fini+0x1d4/0x350 [nouveau] nvkm_subdev_fini+0x67/0x150 [nouveau] nvkm_device_fini+0x95/0x1e0 [nouveau] nvkm_udevice_fini+0x53/0x70 [nouveau] nvkm_object_fini+0xb9/0x240 [nouveau] nvkm_object_fini+0x75/0x240 [nouveau] nouveau_do_suspend+0xf5/0x280 [nouveau] nouveau_pmops_runtime_suspend+0x3e/0xb0 [nouveau] pci_pm_runtime_suspend+0x67/0x1e0 ? __pfx_pci_pm_runtime_suspend+0x10/0x10 __rpm_callback+0x41/0x170 ? __pfx_pci_pm_runtime_suspend+0x10/0x10 rpm_callback+0x5d/0x70 ? __pfx_pci_pm_runtime_suspend+0x10/0x10 rpm_suspend+0x120/0x6a0 pm_runtime_work+0x98/0xb0 process_one_work+0x171/0x340 worker_thread+0x27b/0x3a0 ? __pfx_worker_thread+0x10/0x10 kthread+0xe5/0x120 ? __pfx_kthread+0x10/0x10 ret_from_fork+0x31/0x50 ? __pfx_kthread+0x10/0x10 ret_from_fork_asm+0x1b/0x30 Luckily, we don't actually need to allocate coherent memory for the page table thanks to being able to pass the GPU a radix3 page table for suspend/resume data. So, let's rewrite nvkm_gsp_radix3_sg() to use the sg allocator for level 2. We continue using coherent allocations for lvl0 and 1, since they only take a single page. V2: * Don't forget to actually jump to the next scatterlist when we reach the end of the scatterlist we're currently on when writing out the page table for level 2 Signed-off-by: Lyude Paul Cc: stable@vger.kernel.org Reviewed-by: Ben Skeggs Link: https://patchwork.freedesktop.org/patch/msgid/20240429182318.189668-2-lyude@redhat.com --- .../gpu/drm/nouveau/include/nvkm/subdev/gsp.h | 4 +- .../gpu/drm/nouveau/nvkm/subdev/gsp/r535.c | 77 ++++++++++++------- 2 files changed, 54 insertions(+), 27 deletions(-) diff --git a/drivers/gpu/drm/nouveau/include/nvkm/subdev/gsp.h b/drivers/gpu/drm/nouveau/include/nvkm/subdev/gsp.h index 6f5d376d8fcc1..a11d16a16c3b2 100644 --- a/drivers/gpu/drm/nouveau/include/nvkm/subdev/gsp.h +++ b/drivers/gpu/drm/nouveau/include/nvkm/subdev/gsp.h @@ -15,7 +15,9 @@ struct nvkm_gsp_mem { }; struct nvkm_gsp_radix3 { - struct nvkm_gsp_mem mem[3]; + struct nvkm_gsp_mem lvl0; + struct nvkm_gsp_mem lvl1; + struct sg_table lvl2; }; int nvkm_gsp_sg(struct nvkm_device *, u64 size, struct sg_table *); diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/gsp/r535.c b/drivers/gpu/drm/nouveau/nvkm/subdev/gsp/r535.c index 9858c1438aa7f..abe41f7a34045 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/gsp/r535.c +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/gsp/r535.c @@ -1624,7 +1624,7 @@ r535_gsp_wpr_meta_init(struct nvkm_gsp *gsp) meta->magic = GSP_FW_WPR_META_MAGIC; meta->revision = GSP_FW_WPR_META_REVISION; - meta->sysmemAddrOfRadix3Elf = gsp->radix3.mem[0].addr; + meta->sysmemAddrOfRadix3Elf = gsp->radix3.lvl0.addr; meta->sizeOfRadix3Elf = gsp->fb.wpr2.elf.size; meta->sysmemAddrOfBootloader = gsp->boot.fw.addr; @@ -1919,8 +1919,9 @@ nvkm_gsp_sg(struct nvkm_device *device, u64 size, struct sg_table *sgt) static void nvkm_gsp_radix3_dtor(struct nvkm_gsp *gsp, struct nvkm_gsp_radix3 *rx3) { - for (int i = ARRAY_SIZE(rx3->mem) - 1; i >= 0; i--) - nvkm_gsp_mem_dtor(gsp, &rx3->mem[i]); + nvkm_gsp_sg_free(gsp->subdev.device, &rx3->lvl2); + nvkm_gsp_mem_dtor(gsp, &rx3->lvl1); + nvkm_gsp_mem_dtor(gsp, &rx3->lvl0); } /** @@ -1960,36 +1961,60 @@ static int nvkm_gsp_radix3_sg(struct nvkm_gsp *gsp, struct sg_table *sgt, u64 size, struct nvkm_gsp_radix3 *rx3) { - u64 addr; + struct sg_dma_page_iter sg_dma_iter; + struct scatterlist *sg; + size_t bufsize; + u64 *pte; + int ret, i, page_idx = 0; - for (int i = ARRAY_SIZE(rx3->mem) - 1; i >= 0; i--) { - u64 *ptes; - size_t bufsize; - int ret, idx; + ret = nvkm_gsp_mem_ctor(gsp, GSP_PAGE_SIZE, &rx3->lvl0); + if (ret) + return ret; - bufsize = ALIGN((size / GSP_PAGE_SIZE) * sizeof(u64), GSP_PAGE_SIZE); - ret = nvkm_gsp_mem_ctor(gsp, bufsize, &rx3->mem[i]); - if (ret) - return ret; + ret = nvkm_gsp_mem_ctor(gsp, GSP_PAGE_SIZE, &rx3->lvl1); + if (ret) + goto lvl1_fail; - ptes = rx3->mem[i].data; - if (i == 2) { - struct scatterlist *sgl; + // Allocate level 2 + bufsize = ALIGN((size / GSP_PAGE_SIZE) * sizeof(u64), GSP_PAGE_SIZE); + ret = nvkm_gsp_sg(gsp->subdev.device, bufsize, &rx3->lvl2); + if (ret) + goto lvl2_fail; - for_each_sgtable_dma_sg(sgt, sgl, idx) { - for (int j = 0; j < sg_dma_len(sgl) / GSP_PAGE_SIZE; j++) - *ptes++ = sg_dma_address(sgl) + (GSP_PAGE_SIZE * j); - } - } else { - for (int j = 0; j < size / GSP_PAGE_SIZE; j++) - *ptes++ = addr + GSP_PAGE_SIZE * j; + // Write the bus address of level 1 to level 0 + pte = rx3->lvl0.data; + *pte = rx3->lvl1.addr; + + // Write the bus address of each page in level 2 to level 1 + pte = rx3->lvl1.data; + for_each_sgtable_dma_page(&rx3->lvl2, &sg_dma_iter, 0) + *pte++ = sg_page_iter_dma_address(&sg_dma_iter); + + // Finally, write the bus address of each page in sgt to level 2 + for_each_sgtable_sg(&rx3->lvl2, sg, i) { + void *sgl_end; + + pte = sg_virt(sg); + sgl_end = (void *)pte + sg->length; + + for_each_sgtable_dma_page(sgt, &sg_dma_iter, page_idx) { + *pte++ = sg_page_iter_dma_address(&sg_dma_iter); + page_idx++; + + // Go to the next scatterlist for level 2 if we've reached the end + if ((void *)pte >= sgl_end) + break; } + } - size = rx3->mem[i].size; - addr = rx3->mem[i].addr; + if (ret) { +lvl2_fail: + nvkm_gsp_mem_dtor(gsp, &rx3->lvl1); +lvl1_fail: + nvkm_gsp_mem_dtor(gsp, &rx3->lvl0); } - return 0; + return ret; } int @@ -2021,7 +2046,7 @@ r535_gsp_fini(struct nvkm_gsp *gsp, bool suspend) sr = gsp->sr.meta.data; sr->magic = GSP_FW_SR_META_MAGIC; sr->revision = GSP_FW_SR_META_REVISION; - sr->sysmemAddrOfSuspendResumeData = gsp->sr.radix3.mem[0].addr; + sr->sysmemAddrOfSuspendResumeData = gsp->sr.radix3.lvl0.addr; sr->sizeOfSuspendResumeData = len; mbox0 = lower_32_bits(gsp->sr.meta.addr); From a37ef7613c00f2d72c8fc08bd83fb6cc76926c8c Mon Sep 17 00:00:00 2001 From: Zack Rusin Date: Thu, 25 Apr 2024 15:27:48 -0400 Subject: [PATCH 489/606] drm/vmwgfx: Fix invalid reads in fence signaled events Correctly set the length of the drm_event to the size of the structure that's actually used. The length of the drm_event was set to the parent structure instead of to the drm_vmw_event_fence which is supposed to be read. drm_read uses the length parameter to copy the event to the user space thus resuling in oob reads. Signed-off-by: Zack Rusin Fixes: 8b7de6aa8468 ("vmwgfx: Rework fence event action") Reported-by: zdi-disclosures@trendmicro.com # ZDI-CAN-23566 Cc: David Airlie CC: Daniel Vetter Cc: Zack Rusin Cc: Broadcom internal kernel review list Cc: dri-devel@lists.freedesktop.org Cc: linux-kernel@vger.kernel.org Cc: # v3.4+ Reviewed-by: Maaz Mombasawala Reviewed-by: Martin Krastev Link: https://patchwork.freedesktop.org/patch/msgid/20240425192748.1761522-1-zack.rusin@broadcom.com --- drivers/gpu/drm/vmwgfx/vmwgfx_fence.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_fence.c b/drivers/gpu/drm/vmwgfx/vmwgfx_fence.c index 2a0cda3247031..5efc6a766f64e 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_fence.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_fence.c @@ -991,7 +991,7 @@ static int vmw_event_fence_action_create(struct drm_file *file_priv, } event->event.base.type = DRM_VMW_EVENT_FENCE_SIGNALED; - event->event.base.length = sizeof(*event); + event->event.base.length = sizeof(event->event); event->event.user_data = user_data; ret = drm_event_reserve_init(dev, file_priv, &event->base, &event->event.base); From edc5a009ee3bdc2161e2c3e782a37805617790a3 Mon Sep 17 00:00:00 2001 From: RD Babiera Date: Tue, 23 Apr 2024 20:25:46 +0000 Subject: [PATCH 490/606] usb: typec: tcpm: queue correct sop type in tcpm_queue_vdm_unlocked tcpm_queue_vdm_unlocked queues VDMs over SOP regardless of input parameter tx_sop_type. Fix tcpm_queue_vdm() call. Fixes: 7e7877c55eb1 ("usb: typec: tcpm: add alt mode enter/exit/vdm support for sop'") Cc: stable@vger.kernel.org Signed-off-by: RD Babiera Reviewed-by: Heikki Krogerus Link: https://lore.kernel.org/r/20240423202546.3374218-2-rdbabiera@google.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/typec/tcpm/tcpm.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/usb/typec/tcpm/tcpm.c b/drivers/usb/typec/tcpm/tcpm.c index ab6ed6111ed05..99a039374f9cb 100644 --- a/drivers/usb/typec/tcpm/tcpm.c +++ b/drivers/usb/typec/tcpm/tcpm.c @@ -1565,7 +1565,7 @@ static void tcpm_queue_vdm_unlocked(struct tcpm_port *port, const u32 header, const u32 *data, int cnt, enum tcpm_transmit_type tx_sop_type) { mutex_lock(&port->lock); - tcpm_queue_vdm(port, header, data, cnt, TCPC_TX_SOP); + tcpm_queue_vdm(port, header, data, cnt, tx_sop_type); mutex_unlock(&port->lock); } From bf20c69cf3cf9c6445c4925dd9a8a6ca1b78bfdf Mon Sep 17 00:00:00 2001 From: RD Babiera Date: Tue, 23 Apr 2024 20:27:16 +0000 Subject: [PATCH 491/606] usb: typec: tcpm: clear pd_event queue in PORT_RESET When a Fast Role Swap control message attempt results in a transition to ERROR_RECOVERY, the TCPC can still queue a TCPM_SOURCING_VBUS event. If the event is queued but processed after the tcpm_reset_port() call in the PORT_RESET state, then the following occurs: 1. tcpm_reset_port() calls tcpm_init_vbus() to reset the vbus sourcing and sinking state 2. tcpm_pd_event_handler() turns VBUS on before the port is in the default state. 3. The port resolves as a sink. In the SNK_DISCOVERY state, tcpm_set_charge() cannot set vbus to charge. Clear pd events within PORT_RESET to get rid of non-applicable events. Fixes: b17dd57118fe ("staging: typec: tcpm: Improve role swap with non PD capable partners") Cc: stable@vger.kernel.org Signed-off-by: RD Babiera Reviewed-by: Heikki Krogerus Link: https://lore.kernel.org/r/20240423202715.3375827-2-rdbabiera@google.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/typec/tcpm/tcpm.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/usb/typec/tcpm/tcpm.c b/drivers/usb/typec/tcpm/tcpm.c index 99a039374f9cb..a0e0ffd5a64b6 100644 --- a/drivers/usb/typec/tcpm/tcpm.c +++ b/drivers/usb/typec/tcpm/tcpm.c @@ -5605,6 +5605,7 @@ static void run_state_machine(struct tcpm_port *port) break; case PORT_RESET: tcpm_reset_port(port); + port->pd_events = 0; if (port->self_powered) tcpm_set_cc(port, TYPEC_CC_OPEN); else From 230ecdf71a644c9c73e0e6735b33173074ae3f94 Mon Sep 17 00:00:00 2001 From: Amit Sunil Dhamne Date: Wed, 24 Apr 2024 15:32:16 -0700 Subject: [PATCH 492/606] usb: typec: tcpm: unregister existing source caps before re-registration Check and unregister existing source caps in tcpm_register_source_caps function before registering new ones. This change fixes following warning when port partner resends source caps after negotiating PD contract for the purpose of re-negotiation. [ 343.135030][ T151] sysfs: cannot create duplicate filename '/devices/virtual/usb_power_delivery/pd1/source-capabilities' [ 343.135071][ T151] Call trace: [ 343.135076][ T151] dump_backtrace+0xe8/0x108 [ 343.135099][ T151] show_stack+0x18/0x24 [ 343.135106][ T151] dump_stack_lvl+0x50/0x6c [ 343.135119][ T151] dump_stack+0x18/0x24 [ 343.135126][ T151] sysfs_create_dir_ns+0xe0/0x140 [ 343.135137][ T151] kobject_add_internal+0x228/0x424 [ 343.135146][ T151] kobject_add+0x94/0x10c [ 343.135152][ T151] device_add+0x1b0/0x4c0 [ 343.135187][ T151] device_register+0x20/0x34 [ 343.135195][ T151] usb_power_delivery_register_capabilities+0x90/0x20c [ 343.135209][ T151] tcpm_pd_rx_handler+0x9f0/0x15b8 [ 343.135216][ T151] kthread_worker_fn+0x11c/0x260 [ 343.135227][ T151] kthread+0x114/0x1bc [ 343.135235][ T151] ret_from_fork+0x10/0x20 [ 343.135265][ T151] kobject: kobject_add_internal failed for source-capabilities with -EEXIST, don't try to register things with the same name in the same directory. Fixes: 8203d26905ee ("usb: typec: tcpm: Register USB Power Delivery Capabilities") Cc: linux-usb@vger.kernel.org Cc: stable@vger.kernel.org Cc: linux-kernel@vger.kernel.org Cc: Mark Brown Signed-off-by: Amit Sunil Dhamne Reviewed-by: Heikki Krogerus Link: https://lore.kernel.org/r/20240424223227.1807844-1-amitsd@google.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/typec/tcpm/tcpm.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/usb/typec/tcpm/tcpm.c b/drivers/usb/typec/tcpm/tcpm.c index a0e0ffd5a64b6..77e632ea68721 100644 --- a/drivers/usb/typec/tcpm/tcpm.c +++ b/drivers/usb/typec/tcpm/tcpm.c @@ -2996,7 +2996,7 @@ static int tcpm_register_source_caps(struct tcpm_port *port) { struct usb_power_delivery_desc desc = { port->negotiated_rev }; struct usb_power_delivery_capabilities_desc caps = { }; - struct usb_power_delivery_capabilities *cap; + struct usb_power_delivery_capabilities *cap = port->partner_source_caps; if (!port->partner_pd) port->partner_pd = usb_power_delivery_register(NULL, &desc); @@ -3006,6 +3006,9 @@ static int tcpm_register_source_caps(struct tcpm_port *port) memcpy(caps.pdo, port->source_caps, sizeof(u32) * port->nr_source_caps); caps.role = TYPEC_SOURCE; + if (cap) + usb_power_delivery_unregister_capabilities(cap); + cap = usb_power_delivery_register_capabilities(port->partner_pd, &caps); if (IS_ERR(cap)) return PTR_ERR(cap); From cdc9946ea6377e8e214b135ccc308c5e514ba25f Mon Sep 17 00:00:00 2001 From: RD Babiera Date: Tue, 23 Apr 2024 20:23:57 +0000 Subject: [PATCH 493/606] usb: typec: tcpm: enforce ready state when queueing alt mode vdm Before sending Enter Mode for an Alt Mode, there is a gap between Discover Modes and the Alt Mode driver queueing the Enter Mode VDM for the port partner to send a message to the port. If this message results in unregistering Alt Modes such as in a DR_SWAP, then the following deadlock can occur with respect to the DisplayPort Alt Mode driver: 1. The DR_SWAP state holds port->lock. Unregistering the Alt Mode driver results in a cancel_work_sync() that waits for the current dp_altmode_work to finish. 2. dp_altmode_work makes a call to tcpm_altmode_enter. The deadlock occurs because tcpm_queue_vdm_unlock attempts to hold port->lock. Before attempting to grab the lock, ensure that the port is in a state vdm_run_state_machine can run in. Alt Mode unregistration will not occur in these states. Fixes: 03eafcfb60c0 ("usb: typec: tcpm: Add tcpm_queue_vdm_unlocked() helper") Cc: stable@vger.kernel.org Signed-off-by: RD Babiera Reviewed-by: Heikki Krogerus Link: https://lore.kernel.org/r/20240423202356.3372314-2-rdbabiera@google.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/typec/tcpm/tcpm.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/usb/typec/tcpm/tcpm.c b/drivers/usb/typec/tcpm/tcpm.c index 77e632ea68721..53c1f308ebd7c 100644 --- a/drivers/usb/typec/tcpm/tcpm.c +++ b/drivers/usb/typec/tcpm/tcpm.c @@ -1564,6 +1564,10 @@ static void tcpm_queue_vdm(struct tcpm_port *port, const u32 header, static void tcpm_queue_vdm_unlocked(struct tcpm_port *port, const u32 header, const u32 *data, int cnt, enum tcpm_transmit_type tx_sop_type) { + if (port->state != SRC_READY && port->state != SNK_READY && + port->state != SRC_VDM_IDENTITY_REQUEST) + return; + mutex_lock(&port->lock); tcpm_queue_vdm(port, header, data, cnt, tx_sop_type); mutex_unlock(&port->lock); From ae11f04b452b5205536e1c02d31f8045eba249dd Mon Sep 17 00:00:00 2001 From: Badhri Jagan Sridharan Date: Sat, 27 Apr 2024 20:28:12 +0000 Subject: [PATCH 494/606] usb: typec: tcpm: Check for port partner validity before consuming it typec_register_partner() does not guarantee partner registration to always succeed. In the event of failure, port->partner is set to the error value or NULL. Given that port->partner validity is not checked, this results in the following crash: Unable to handle kernel NULL pointer dereference at virtual address xx pc : run_state_machine+0x1bc8/0x1c08 lr : run_state_machine+0x1b90/0x1c08 .. Call trace: run_state_machine+0x1bc8/0x1c08 tcpm_state_machine_work+0x94/0xe4 kthread_worker_fn+0x118/0x328 kthread+0x1d0/0x23c ret_from_fork+0x10/0x20 To prevent the crash, check for port->partner validity before derefencing it in all the call sites. Cc: stable@vger.kernel.org Fixes: c97cd0b4b54e ("usb: typec: tcpm: set initial svdm version based on pd revision") Signed-off-by: Badhri Jagan Sridharan Reviewed-by: Heikki Krogerus Reviewed-by: Dmitry Baryshkov Link: https://lore.kernel.org/r/20240427202812.3435268-1-badhri@google.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/typec/tcpm/tcpm.c | 30 +++++++++++++++++++++++------- 1 file changed, 23 insertions(+), 7 deletions(-) diff --git a/drivers/usb/typec/tcpm/tcpm.c b/drivers/usb/typec/tcpm/tcpm.c index 53c1f308ebd7c..8a1af08f71b64 100644 --- a/drivers/usb/typec/tcpm/tcpm.c +++ b/drivers/usb/typec/tcpm/tcpm.c @@ -1584,7 +1584,8 @@ static void svdm_consume_identity(struct tcpm_port *port, const u32 *p, int cnt) port->partner_ident.cert_stat = p[VDO_INDEX_CSTAT]; port->partner_ident.product = product; - typec_partner_set_identity(port->partner); + if (port->partner) + typec_partner_set_identity(port->partner); tcpm_log(port, "Identity: %04x:%04x.%04x", PD_IDH_VID(vdo), @@ -1746,6 +1747,9 @@ static void tcpm_register_partner_altmodes(struct tcpm_port *port) struct typec_altmode *altmode; int i; + if (!port->partner) + return; + for (i = 0; i < modep->altmodes; i++) { altmode = typec_partner_register_altmode(port->partner, &modep->altmode_desc[i]); @@ -4238,7 +4242,10 @@ static int tcpm_init_vconn(struct tcpm_port *port) static void tcpm_typec_connect(struct tcpm_port *port) { + struct typec_partner *partner; + if (!port->connected) { + port->connected = true; /* Make sure we don't report stale identity information */ memset(&port->partner_ident, 0, sizeof(port->partner_ident)); port->partner_desc.usb_pd = port->pd_capable; @@ -4248,9 +4255,13 @@ static void tcpm_typec_connect(struct tcpm_port *port) port->partner_desc.accessory = TYPEC_ACCESSORY_AUDIO; else port->partner_desc.accessory = TYPEC_ACCESSORY_NONE; - port->partner = typec_register_partner(port->typec_port, - &port->partner_desc); - port->connected = true; + partner = typec_register_partner(port->typec_port, &port->partner_desc); + if (IS_ERR(partner)) { + dev_err(port->dev, "Failed to register partner (%ld)\n", PTR_ERR(partner)); + return; + } + + port->partner = partner; typec_partner_set_usb_power_delivery(port->partner, port->partner_pd); } } @@ -4330,9 +4341,11 @@ static void tcpm_typec_disconnect(struct tcpm_port *port) port->plug_prime = NULL; port->cable = NULL; if (port->connected) { - typec_partner_set_usb_power_delivery(port->partner, NULL); - typec_unregister_partner(port->partner); - port->partner = NULL; + if (port->partner) { + typec_partner_set_usb_power_delivery(port->partner, NULL); + typec_unregister_partner(port->partner); + port->partner = NULL; + } port->connected = false; } } @@ -4556,6 +4569,9 @@ static enum typec_cc_status tcpm_pwr_opmode_to_rp(enum typec_pwr_opmode opmode) static void tcpm_set_initial_svdm_version(struct tcpm_port *port) { + if (!port->partner) + return; + switch (port->negotiated_rev) { case PD_REV30: break; From 0537c8eef4f699aacdeb67c6181c66cccd63c7f5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= Date: Tue, 30 Apr 2024 11:46:32 -0700 Subject: [PATCH 495/606] Input: amimouse - mark driver struct with __refdata to prevent section mismatch MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit As described in the added code comment, a reference to .exit.text is ok for drivers registered via module_platform_driver_probe(). Make this explicit to prevent the following section mismatch warning WARNING: modpost: drivers/input/mouse/amimouse: section mismatch in reference: amimouse_driver+0x8 (section: .data) -> amimouse_remove (section: .exit.text) that triggers on an allmodconfig W=1 build. Signed-off-by: Uwe Kleine-König Link: https://lore.kernel.org/r/2e3783106bf6bd9a7bdeb12b706378fb16316471.1711748999.git.u.kleine-koenig@pengutronix.de Signed-off-by: Dmitry Torokhov --- drivers/input/mouse/amimouse.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/drivers/input/mouse/amimouse.c b/drivers/input/mouse/amimouse.c index cda0c3ff5a288..2fbbaeb76d708 100644 --- a/drivers/input/mouse/amimouse.c +++ b/drivers/input/mouse/amimouse.c @@ -132,7 +132,13 @@ static void __exit amimouse_remove(struct platform_device *pdev) input_unregister_device(dev); } -static struct platform_driver amimouse_driver = { +/* + * amimouse_remove() lives in .exit.text. For drivers registered via + * module_platform_driver_probe() this is ok because they cannot get unbound at + * runtime. So mark the driver struct with __refdata to prevent modpost + * triggering a section mismatch warning. + */ +static struct platform_driver amimouse_driver __refdata = { .remove_new = __exit_p(amimouse_remove), .driver = { .name = "amiga-mouse", From aacb99de1099346244d488bdf7df489a44278574 Mon Sep 17 00:00:00 2001 From: Marek Szyprowski Date: Tue, 30 Apr 2024 20:46:56 +0200 Subject: [PATCH 496/606] clk: samsung: Revert "clk: Use device_get_match_data()" device_get_match_data() function should not be used on the device other than the one matched to the given driver, because it always returns the match_data of the matched driver. In case of exynos-clkout driver, the original code matches the OF IDs on the PARENT device, so replacing it with of_device_get_match_data() broke the driver. This has been already pointed once in commit 2bc5febd05ab ("clk: samsung: Revert "clk: samsung: exynos-clkout: Use of_device_get_match_data()""). To avoid further confusion, add a comment about this special case, which requires direct of_match_device() call to pass custom IDs array. This partially reverts commit 409c39ec92a35e3708f5b5798c78eae78512cd71. Cc: Fixes: 409c39ec92a3 ("clk: Use device_get_match_data()") Signed-off-by: Marek Szyprowski Link: https://lore.kernel.org/r/20240425075628.838497-1-m.szyprowski@samsung.com Signed-off-by: Krzysztof Kozlowski Link: https://lore.kernel.org/r/20240430184656.357805-1-krzysztof.kozlowski@linaro.org Signed-off-by: Stephen Boyd --- drivers/clk/samsung/clk-exynos-clkout.c | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/drivers/clk/samsung/clk-exynos-clkout.c b/drivers/clk/samsung/clk-exynos-clkout.c index 3484e6cc80ad1..503c6f5b20d5c 100644 --- a/drivers/clk/samsung/clk-exynos-clkout.c +++ b/drivers/clk/samsung/clk-exynos-clkout.c @@ -13,9 +13,9 @@ #include #include #include +#include #include #include -#include #define EXYNOS_CLKOUT_NR_CLKS 1 #define EXYNOS_CLKOUT_PARENTS 32 @@ -84,17 +84,24 @@ MODULE_DEVICE_TABLE(of, exynos_clkout_ids); static int exynos_clkout_match_parent_dev(struct device *dev, u32 *mux_mask) { const struct exynos_clkout_variant *variant; + const struct of_device_id *match; if (!dev->parent) { dev_err(dev, "not instantiated from MFD\n"); return -EINVAL; } - variant = device_get_match_data(dev->parent); - if (!variant) { + /* + * 'exynos_clkout_ids' arrays is not the ids array matched by + * the dev->parent driver, so of_device_get_match_data() or + * device_get_match_data() cannot be used here. + */ + match = of_match_device(exynos_clkout_ids, dev->parent); + if (!match) { dev_err(dev, "cannot match parent device\n"); return -EINVAL; } + variant = match->data; *mux_mask = variant->mux_mask; From eaf4a9b19b9961f8ca294c39c5f8984a4cf42212 Mon Sep 17 00:00:00 2001 From: Uday Shankar Date: Tue, 30 Apr 2024 15:16:24 -0600 Subject: [PATCH 497/606] ublk: remove segment count and size limits ublk_drv currently creates block devices with the default max_segments and max_segment_size limits of BLK_MAX_SEGMENTS (128) and BLK_MAX_SEGMENT_SIZE (65536) respectively. These defaults can artificially constrain the I/O size seen by the ublk server - for example, suppose that the ublk server has configured itself to accept I/Os up to 1M and the application is also issuing 1M sized I/Os. If the I/O buffer used by the application is backed by 4K pages, the buffer could consist of up to 1M / 4K = 256 physically discontiguous segments (even if the buffer is virtually contiguous). As such, the I/O could exceed the default max_segments limit and get split. This can cause unnecessary performance issues if the ublk server is optimized to handle 1M I/Os. The block layer's segment count/size limits exist to model hardware constraints which don't exist in ublk_drv's case, so just remove those limits for the block devices created by ublk_drv. Signed-off-by: Uday Shankar Reviewed-by: Riley Thomasson Reviewed-by: Ming Lei Link: https://lore.kernel.org/r/20240430211623.2802036-1-ushankar@purestorage.com Signed-off-by: Jens Axboe --- drivers/block/ublk_drv.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/block/ublk_drv.c b/drivers/block/ublk_drv.c index bea3d5cf8a834..374e4efa8759f 100644 --- a/drivers/block/ublk_drv.c +++ b/drivers/block/ublk_drv.c @@ -2177,7 +2177,8 @@ static int ublk_ctrl_start_dev(struct ublk_device *ub, struct io_uring_cmd *cmd) .max_hw_sectors = p->max_sectors, .chunk_sectors = p->chunk_sectors, .virt_boundary_mask = p->virt_boundary_mask, - + .max_segments = USHRT_MAX, + .max_segment_size = UINT_MAX, }; struct gendisk *disk; int ret = -EINVAL; From f06446ef23216090d1ee8ede1a7d7ae430c22dcc Mon Sep 17 00:00:00 2001 From: Mukul Joshi Date: Tue, 23 Apr 2024 14:40:37 -0400 Subject: [PATCH 498/606] drm/amdgpu: Fix VRAM memory accounting Subtract the VRAM pinned memory when checking for available memory in amdgpu_amdkfd_reserve_mem_limit function since that memory is not available for use. Signed-off-by: Mukul Joshi Reviewed-by: Felix Kuehling Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c index 2131de36e3dac..e4d4e55c08ad5 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c @@ -220,7 +220,7 @@ int amdgpu_amdkfd_reserve_mem_limit(struct amdgpu_device *adev, (kfd_mem_limit.ttm_mem_used + ttm_mem_needed > kfd_mem_limit.max_ttm_mem_limit) || (adev && xcp_id >= 0 && adev->kfd.vram_used[xcp_id] + vram_needed > - vram_size - reserved_for_pt)) { + vram_size - reserved_for_pt - atomic64_read(&adev->vram_pin_size))) { ret = -ENOMEM; goto release; } From 719564737a9ac3d0b49c314450b56cf6f7d71358 Mon Sep 17 00:00:00 2001 From: George Shen Date: Thu, 16 Sep 2021 19:55:39 -0400 Subject: [PATCH 499/606] drm/amd/display: Handle Y carry-over in VCP X.Y calculation Theoretically rare corner case where ceil(Y) results in rounding up to an integer. If this happens, the 1 should be carried over to the X value. CC: stable@vger.kernel.org Reviewed-by: Rodrigo Siqueira Signed-off-by: George Shen Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- .../drm/amd/display/dc/dcn31/dcn31_hpo_dp_link_encoder.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_hpo_dp_link_encoder.c b/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_hpo_dp_link_encoder.c index 5b7ad38f85e08..65e45a0b4ff34 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_hpo_dp_link_encoder.c +++ b/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_hpo_dp_link_encoder.c @@ -395,6 +395,12 @@ void dcn31_hpo_dp_link_enc_set_throttled_vcp_size( x), 25)); + // If y rounds up to integer, carry it over to x. + if (y >> 25) { + x += 1; + y = 0; + } + switch (stream_encoder_inst) { case 0: REG_SET_2(DP_DPHY_SYM32_VC_RATE_CNTL0, 0, From be53bd4f00aa4c7db9f41116224c027b4cfce8e3 Mon Sep 17 00:00:00 2001 From: Rodrigo Siqueira Date: Thu, 11 Apr 2024 17:38:08 -0600 Subject: [PATCH 500/606] drm/amd/display: Ensure that dmcub support flag is set for DCN20 In the DCN20 resource initialization, ensure that DMCUB support starts configured as true. Signed-off-by: Rodrigo Siqueira Acked-by: Aurabindo Pillai Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/resource/dcn20/dcn20_resource.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/amd/display/dc/resource/dcn20/dcn20_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dcn20/dcn20_resource.c index a2387cea1af9a..622214b365a25 100644 --- a/drivers/gpu/drm/amd/display/dc/resource/dcn20/dcn20_resource.c +++ b/drivers/gpu/drm/amd/display/dc/resource/dcn20/dcn20_resource.c @@ -2449,6 +2449,7 @@ static bool dcn20_resource_construct( dc->caps.post_blend_color_processing = true; dc->caps.force_dp_tps4_for_cp2520 = true; dc->caps.extended_aux_timeout_support = true; + dc->caps.dmcub_support = true; /* Color pipeline capabilities */ dc->caps.color.dpp.dcn_arch = 1; From 9f8eeea1643c213c0e1ad2e546a15536200d216b Mon Sep 17 00:00:00 2001 From: Yunsheng Lin Date: Sun, 28 Apr 2024 19:16:38 +0800 Subject: [PATCH 501/606] rxrpc: Fix using alignmask being zero for __page_frag_alloc_align() rxrpc_alloc_data_txbuf() may be called with data_align being zero in none_alloc_txbuf() and rxkad_alloc_txbuf(), data_align is supposed to be an order-based alignment value, but zero is not a valid order-based alignment value, and '~(data_align - 1)' doesn't result in a valid mask-based alignment value for __page_frag_alloc_align(). Fix it by passing a valid order-based alignment value in none_alloc_txbuf() and rxkad_alloc_txbuf(). Also use page_frag_alloc_align() expecting an order-based alignment value in rxrpc_alloc_data_txbuf() to avoid doing the alignment converting operation and to catch possible invalid alignment value in the future. Remove the 'if (data_align)' checking too, as it is always true for a valid order-based alignment value. Fixes: 6b2536462fd4 ("rxrpc: Fix use of changed alignment param to page_frag_alloc_align()") Fixes: 49489bb03a50 ("rxrpc: Do zerocopy using MSG_SPLICE_PAGES and page frags") CC: Alexander Duyck Signed-off-by: Yunsheng Lin Acked-by: David Howells Link: https://lore.kernel.org/r/20240428111640.27306-1-linyunsheng@huawei.com Signed-off-by: Jakub Kicinski --- net/rxrpc/insecure.c | 2 +- net/rxrpc/rxkad.c | 2 +- net/rxrpc/txbuf.c | 10 +++++----- 3 files changed, 7 insertions(+), 7 deletions(-) diff --git a/net/rxrpc/insecure.c b/net/rxrpc/insecure.c index f2701068ed9e4..6716c021a532e 100644 --- a/net/rxrpc/insecure.c +++ b/net/rxrpc/insecure.c @@ -19,7 +19,7 @@ static int none_init_connection_security(struct rxrpc_connection *conn, */ static struct rxrpc_txbuf *none_alloc_txbuf(struct rxrpc_call *call, size_t remain, gfp_t gfp) { - return rxrpc_alloc_data_txbuf(call, min_t(size_t, remain, RXRPC_JUMBO_DATALEN), 0, gfp); + return rxrpc_alloc_data_txbuf(call, min_t(size_t, remain, RXRPC_JUMBO_DATALEN), 1, gfp); } static int none_secure_packet(struct rxrpc_call *call, struct rxrpc_txbuf *txb) diff --git a/net/rxrpc/rxkad.c b/net/rxrpc/rxkad.c index f1a68270862db..48a1475e6b063 100644 --- a/net/rxrpc/rxkad.c +++ b/net/rxrpc/rxkad.c @@ -155,7 +155,7 @@ static struct rxrpc_txbuf *rxkad_alloc_txbuf(struct rxrpc_call *call, size_t rem switch (call->conn->security_level) { default: space = min_t(size_t, remain, RXRPC_JUMBO_DATALEN); - return rxrpc_alloc_data_txbuf(call, space, 0, gfp); + return rxrpc_alloc_data_txbuf(call, space, 1, gfp); case RXRPC_SECURITY_AUTH: shdr = sizeof(struct rxkad_level1_hdr); break; diff --git a/net/rxrpc/txbuf.c b/net/rxrpc/txbuf.c index e0679658d9de0..c3913d8a50d34 100644 --- a/net/rxrpc/txbuf.c +++ b/net/rxrpc/txbuf.c @@ -21,20 +21,20 @@ struct rxrpc_txbuf *rxrpc_alloc_data_txbuf(struct rxrpc_call *call, size_t data_ { struct rxrpc_wire_header *whdr; struct rxrpc_txbuf *txb; - size_t total, hoff = 0; + size_t total, hoff; void *buf; txb = kmalloc(sizeof(*txb), gfp); if (!txb) return NULL; - if (data_align) - hoff = round_up(sizeof(*whdr), data_align) - sizeof(*whdr); + hoff = round_up(sizeof(*whdr), data_align) - sizeof(*whdr); total = hoff + sizeof(*whdr) + data_size; + data_align = umax(data_align, L1_CACHE_BYTES); mutex_lock(&call->conn->tx_data_alloc_lock); - buf = __page_frag_alloc_align(&call->conn->tx_data_alloc, total, gfp, - ~(data_align - 1) & ~(L1_CACHE_BYTES - 1)); + buf = page_frag_alloc_align(&call->conn->tx_data_alloc, total, gfp, + data_align); mutex_unlock(&call->conn->tx_data_alloc_lock); if (!buf) { kfree(txb); From 9067eccdd7849dd120d5495dbd5a686fa6ed2c1a Mon Sep 17 00:00:00 2001 From: Sebastian Andrzej Siewior Date: Mon, 29 Apr 2024 11:11:47 +0200 Subject: [PATCH 502/606] cxgb4: Properly lock TX queue for the selftest. The selftest for the driver sends a dummy packet and checks if the packet will be received properly as it should be. The regular TX path and the selftest can use the same network queue so locking is required and was missing in the selftest path. This was addressed in the commit cited below. Unfortunately locking the TX queue requires BH to be disabled which is not the case in selftest path which is invoked in process context. Lockdep should be complaining about this. Use __netif_tx_lock_bh() for TX queue locking. Fixes: c650e04898072 ("cxgb4: Fix race between loopback and normal Tx path") Reported-by: "John B. Wyatt IV" Closes: https://lore.kernel.org/all/Zic0ot5aGgR-V4Ks@thinkpad2021/ Signed-off-by: Sebastian Andrzej Siewior Link: https://lore.kernel.org/r/20240429091147.YWAaal4v@linutronix.de Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/chelsio/cxgb4/sge.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/chelsio/cxgb4/sge.c b/drivers/net/ethernet/chelsio/cxgb4/sge.c index 49d5808b7d11d..de52bcb884c41 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/sge.c +++ b/drivers/net/ethernet/chelsio/cxgb4/sge.c @@ -2670,12 +2670,12 @@ int cxgb4_selftest_lb_pkt(struct net_device *netdev) lb->loopback = 1; q = &adap->sge.ethtxq[pi->first_qset]; - __netif_tx_lock(q->txq, smp_processor_id()); + __netif_tx_lock_bh(q->txq); reclaim_completed_tx(adap, &q->q, -1, true); credits = txq_avail(&q->q) - ndesc; if (unlikely(credits < 0)) { - __netif_tx_unlock(q->txq); + __netif_tx_unlock_bh(q->txq); return -ENOMEM; } @@ -2710,7 +2710,7 @@ int cxgb4_selftest_lb_pkt(struct net_device *netdev) init_completion(&lb->completion); txq_advance(&q->q, ndesc); cxgb4_ring_tx_db(adap, &q->q, ndesc); - __netif_tx_unlock(q->txq); + __netif_tx_unlock_bh(q->txq); /* wait for the pkt to return */ ret = wait_for_completion_timeout(&lb->completion, 10 * HZ); From a01b64f31d65bdc917d1afb4cec9915beb6931be Mon Sep 17 00:00:00 2001 From: Swapnil Patel Date: Tue, 2 Apr 2024 21:07:46 -0400 Subject: [PATCH 503/606] drm/amd/display: Add dtbclk access to dcn315 [Why & How] Currently DCN315 clk manager is missing code to enable/disable dtbclk. Because of this, "optimized_required" flag is constantly set and this prevents FreeSync from engaging for certain high bandwidth display Modes which require DTBCLK. Reviewed-by: Dmytro Laktyushkin Acked-by: Aurabindo Pillai Signed-off-by: Swapnil Patel Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- .../drm/amd/display/dc/clk_mgr/dcn315/dcn315_clk_mgr.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn315/dcn315_clk_mgr.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn315/dcn315_clk_mgr.c index 644da46373209..5506cf9b3672f 100644 --- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn315/dcn315_clk_mgr.c +++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn315/dcn315_clk_mgr.c @@ -145,6 +145,10 @@ static void dcn315_update_clocks(struct clk_mgr *clk_mgr_base, */ clk_mgr_base->clks.zstate_support = new_clocks->zstate_support; if (safe_to_lower) { + if (clk_mgr_base->clks.dtbclk_en && !new_clocks->dtbclk_en) { + dcn315_smu_set_dtbclk(clk_mgr, false); + clk_mgr_base->clks.dtbclk_en = new_clocks->dtbclk_en; + } /* check that we're not already in lower */ if (clk_mgr_base->clks.pwr_state != DCN_PWR_STATE_LOW_POWER) { display_count = dcn315_get_active_display_cnt_wa(dc, context); @@ -160,6 +164,10 @@ static void dcn315_update_clocks(struct clk_mgr *clk_mgr_base, } } } else { + if (!clk_mgr_base->clks.dtbclk_en && new_clocks->dtbclk_en) { + dcn315_smu_set_dtbclk(clk_mgr, true); + clk_mgr_base->clks.dtbclk_en = new_clocks->dtbclk_en; + } /* check that we're not already in D0 */ if (clk_mgr_base->clks.pwr_state != DCN_PWR_STATE_MISSION_MODE) { union display_idle_optimization_u idle_info = { 0 }; From b9a61c20179fda7bdfe2c1210aa72451991ab81a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marek=20Beh=C3=BAn?= Date: Mon, 29 Apr 2024 15:38:32 +0200 Subject: [PATCH 504/606] net: dsa: mv88e6xxx: Fix number of databases for 88E6141 / 88E6341 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The Topaz family (88E6141 and 88E6341) only support 256 Forwarding Information Tables. Fixes: a75961d0ebfd ("net: dsa: mv88e6xxx: Add support for ethernet switch 88E6341") Fixes: 1558727a1c1b ("net: dsa: mv88e6xxx: Add support for ethernet switch 88E6141") Signed-off-by: Marek Behún Reviewed-by: Andrew Lunn Reviewed-by: Florian Fainelli Link: https://lore.kernel.org/r/20240429133832.9547-1-kabel@kernel.org Signed-off-by: Jakub Kicinski --- drivers/net/dsa/mv88e6xxx/chip.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/dsa/mv88e6xxx/chip.c b/drivers/net/dsa/mv88e6xxx/chip.c index 59b5dd0e2f41d..14daf432f30b5 100644 --- a/drivers/net/dsa/mv88e6xxx/chip.c +++ b/drivers/net/dsa/mv88e6xxx/chip.c @@ -5705,7 +5705,7 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = { .prod_num = MV88E6XXX_PORT_SWITCH_ID_PROD_6141, .family = MV88E6XXX_FAMILY_6341, .name = "Marvell 88E6141", - .num_databases = 4096, + .num_databases = 256, .num_macs = 2048, .num_ports = 6, .num_internal_phys = 5, @@ -6164,7 +6164,7 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = { .prod_num = MV88E6XXX_PORT_SWITCH_ID_PROD_6341, .family = MV88E6XXX_FAMILY_6341, .name = "Marvell 88E6341", - .num_databases = 4096, + .num_databases = 256, .num_macs = 2048, .num_internal_phys = 5, .num_ports = 6, From 9a35d205f466501dcfe5625ca313d944d0ac2d60 Mon Sep 17 00:00:00 2001 From: Gabe Teeger Date: Tue, 9 Apr 2024 10:38:58 -0400 Subject: [PATCH 505/606] drm/amd/display: Atom Integrated System Info v2_2 for DCN35 New request from KMD/VBIOS in order to support new UMA carveout model. This fixes a null dereference from accessing Ctx->dc_bios->integrated_info while it was NULL. DAL parses through the BIOS and extracts the necessary integrated_info but was missing a case for the new BIOS version 2.3. Reviewed-by: Nicholas Kazlauskas Acked-by: Aurabindo Pillai Signed-off-by: Gabe Teeger Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/bios/bios_parser2.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/amd/display/dc/bios/bios_parser2.c b/drivers/gpu/drm/amd/display/dc/bios/bios_parser2.c index 05f392501c0ae..ab31643b10969 100644 --- a/drivers/gpu/drm/amd/display/dc/bios/bios_parser2.c +++ b/drivers/gpu/drm/amd/display/dc/bios/bios_parser2.c @@ -2948,6 +2948,7 @@ static enum bp_result construct_integrated_info( result = get_integrated_info_v2_1(bp, info); break; case 2: + case 3: result = get_integrated_info_v2_2(bp, info); break; default: From 892b41b16f6163e6556545835abba668fcab4eea Mon Sep 17 00:00:00 2001 From: Hersen Wu Date: Tue, 13 Feb 2024 14:26:06 -0500 Subject: [PATCH 506/606] drm/amd/display: Fix incorrect DSC instance for MST [Why] DSC debugfs, such as dp_dsc_clock_en_read, use aconnector->dc_link to find pipe_ctx for display. Displays connected to MST hub share the same dc_link. DSC instance is from pipe_ctx. This causes incorrect DSC instance for display connected to MST hub. [How] Add aconnector->sink check to find pipe_ctx. CC: stable@vger.kernel.org Reviewed-by: Aurabindo Pillai Signed-off-by: Hersen Wu Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- .../amd/display/amdgpu_dm/amdgpu_dm_debugfs.c | 48 ++++++++++++++----- 1 file changed, 36 insertions(+), 12 deletions(-) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_debugfs.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_debugfs.c index eee4945653e2d..c7715a17f388b 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_debugfs.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_debugfs.c @@ -1495,7 +1495,9 @@ static ssize_t dp_dsc_clock_en_read(struct file *f, char __user *buf, for (i = 0; i < MAX_PIPES; i++) { pipe_ctx = &aconnector->dc_link->dc->current_state->res_ctx.pipe_ctx[i]; if (pipe_ctx->stream && - pipe_ctx->stream->link == aconnector->dc_link) + pipe_ctx->stream->link == aconnector->dc_link && + pipe_ctx->stream->sink && + pipe_ctx->stream->sink == aconnector->dc_sink) break; } @@ -1596,7 +1598,9 @@ static ssize_t dp_dsc_clock_en_write(struct file *f, const char __user *buf, for (i = 0; i < MAX_PIPES; i++) { pipe_ctx = &aconnector->dc_link->dc->current_state->res_ctx.pipe_ctx[i]; if (pipe_ctx->stream && - pipe_ctx->stream->link == aconnector->dc_link) + pipe_ctx->stream->link == aconnector->dc_link && + pipe_ctx->stream->sink && + pipe_ctx->stream->sink == aconnector->dc_sink) break; } @@ -1681,7 +1685,9 @@ static ssize_t dp_dsc_slice_width_read(struct file *f, char __user *buf, for (i = 0; i < MAX_PIPES; i++) { pipe_ctx = &aconnector->dc_link->dc->current_state->res_ctx.pipe_ctx[i]; if (pipe_ctx->stream && - pipe_ctx->stream->link == aconnector->dc_link) + pipe_ctx->stream->link == aconnector->dc_link && + pipe_ctx->stream->sink && + pipe_ctx->stream->sink == aconnector->dc_sink) break; } @@ -1780,7 +1786,9 @@ static ssize_t dp_dsc_slice_width_write(struct file *f, const char __user *buf, for (i = 0; i < MAX_PIPES; i++) { pipe_ctx = &aconnector->dc_link->dc->current_state->res_ctx.pipe_ctx[i]; if (pipe_ctx->stream && - pipe_ctx->stream->link == aconnector->dc_link) + pipe_ctx->stream->link == aconnector->dc_link && + pipe_ctx->stream->sink && + pipe_ctx->stream->sink == aconnector->dc_sink) break; } @@ -1865,7 +1873,9 @@ static ssize_t dp_dsc_slice_height_read(struct file *f, char __user *buf, for (i = 0; i < MAX_PIPES; i++) { pipe_ctx = &aconnector->dc_link->dc->current_state->res_ctx.pipe_ctx[i]; if (pipe_ctx->stream && - pipe_ctx->stream->link == aconnector->dc_link) + pipe_ctx->stream->link == aconnector->dc_link && + pipe_ctx->stream->sink && + pipe_ctx->stream->sink == aconnector->dc_sink) break; } @@ -1964,7 +1974,9 @@ static ssize_t dp_dsc_slice_height_write(struct file *f, const char __user *buf, for (i = 0; i < MAX_PIPES; i++) { pipe_ctx = &aconnector->dc_link->dc->current_state->res_ctx.pipe_ctx[i]; if (pipe_ctx->stream && - pipe_ctx->stream->link == aconnector->dc_link) + pipe_ctx->stream->link == aconnector->dc_link && + pipe_ctx->stream->sink && + pipe_ctx->stream->sink == aconnector->dc_sink) break; } @@ -2045,7 +2057,9 @@ static ssize_t dp_dsc_bits_per_pixel_read(struct file *f, char __user *buf, for (i = 0; i < MAX_PIPES; i++) { pipe_ctx = &aconnector->dc_link->dc->current_state->res_ctx.pipe_ctx[i]; if (pipe_ctx->stream && - pipe_ctx->stream->link == aconnector->dc_link) + pipe_ctx->stream->link == aconnector->dc_link && + pipe_ctx->stream->sink && + pipe_ctx->stream->sink == aconnector->dc_sink) break; } @@ -2141,7 +2155,9 @@ static ssize_t dp_dsc_bits_per_pixel_write(struct file *f, const char __user *bu for (i = 0; i < MAX_PIPES; i++) { pipe_ctx = &aconnector->dc_link->dc->current_state->res_ctx.pipe_ctx[i]; if (pipe_ctx->stream && - pipe_ctx->stream->link == aconnector->dc_link) + pipe_ctx->stream->link == aconnector->dc_link && + pipe_ctx->stream->sink && + pipe_ctx->stream->sink == aconnector->dc_sink) break; } @@ -2220,7 +2236,9 @@ static ssize_t dp_dsc_pic_width_read(struct file *f, char __user *buf, for (i = 0; i < MAX_PIPES; i++) { pipe_ctx = &aconnector->dc_link->dc->current_state->res_ctx.pipe_ctx[i]; if (pipe_ctx->stream && - pipe_ctx->stream->link == aconnector->dc_link) + pipe_ctx->stream->link == aconnector->dc_link && + pipe_ctx->stream->sink && + pipe_ctx->stream->sink == aconnector->dc_sink) break; } @@ -2276,7 +2294,9 @@ static ssize_t dp_dsc_pic_height_read(struct file *f, char __user *buf, for (i = 0; i < MAX_PIPES; i++) { pipe_ctx = &aconnector->dc_link->dc->current_state->res_ctx.pipe_ctx[i]; if (pipe_ctx->stream && - pipe_ctx->stream->link == aconnector->dc_link) + pipe_ctx->stream->link == aconnector->dc_link && + pipe_ctx->stream->sink && + pipe_ctx->stream->sink == aconnector->dc_sink) break; } @@ -2347,7 +2367,9 @@ static ssize_t dp_dsc_chunk_size_read(struct file *f, char __user *buf, for (i = 0; i < MAX_PIPES; i++) { pipe_ctx = &aconnector->dc_link->dc->current_state->res_ctx.pipe_ctx[i]; if (pipe_ctx->stream && - pipe_ctx->stream->link == aconnector->dc_link) + pipe_ctx->stream->link == aconnector->dc_link && + pipe_ctx->stream->sink && + pipe_ctx->stream->sink == aconnector->dc_sink) break; } @@ -2418,7 +2440,9 @@ static ssize_t dp_dsc_slice_bpg_offset_read(struct file *f, char __user *buf, for (i = 0; i < MAX_PIPES; i++) { pipe_ctx = &aconnector->dc_link->dc->current_state->res_ctx.pipe_ctx[i]; if (pipe_ctx->stream && - pipe_ctx->stream->link == aconnector->dc_link) + pipe_ctx->stream->link == aconnector->dc_link && + pipe_ctx->stream->sink && + pipe_ctx->stream->sink == aconnector->dc_sink) break; } From 46fe9cb1a9e62f4e6229f48ae303ef8e6c1fdc64 Mon Sep 17 00:00:00 2001 From: Meenakshikumar Somasundaram Date: Wed, 10 Apr 2024 10:46:35 -0400 Subject: [PATCH 507/606] drm/amd/display: Allocate zero bw after bw alloc enable [Why] During DP tunnel creation, CM preallocates BW and reduces estimated BW of other DPIA. CM release preallocation only when allocation is complete. Display mode validation logic validates timings based on bw available per host router. In multi display setup, this causes bw allocation failure when allocation greater than estimated bw. [How] Do zero alloc to make the CM to release preallocation and update estimated BW correctly for all DPIAs per host router. Reviewed-by: PeiChen Huang Acked-by: Aurabindo Pillai Signed-off-by: Meenakshikumar Somasundaram Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- .../amd/display/dc/link/protocols/link_dp_dpia_bw.c | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_dpia_bw.c b/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_dpia_bw.c index 5491b707cec88..5a965c26bf209 100644 --- a/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_dpia_bw.c +++ b/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_dpia_bw.c @@ -270,7 +270,7 @@ static void set_usb4_req_bw_req(struct dc_link *link, int req_bw) /* Error check whether requested and allocated are equal */ req_bw = requested_bw * (Kbps_TO_Gbps / link->dpia_bw_alloc_config.bw_granularity); - if (req_bw == link->dpia_bw_alloc_config.allocated_bw) { + if (req_bw && (req_bw == link->dpia_bw_alloc_config.allocated_bw)) { DC_LOG_ERROR("%s: Request bw equals to allocated bw for link(%d)\n", __func__, link->link_index); } @@ -341,6 +341,14 @@ bool link_dp_dpia_set_dptx_usb4_bw_alloc_support(struct dc_link *link) ret = true; init_usb4_bw_struct(link); link->dpia_bw_alloc_config.bw_alloc_enabled = true; + + /* + * During DP tunnel creation, CM preallocates BW and reduces estimated BW of other + * DPIA. CM release preallocation only when allocation is complete. Do zero alloc + * to make the CM to release preallocation and update estimated BW correctly for + * all DPIAs per host router + */ + link_dp_dpia_allocate_usb4_bandwidth_for_stream(link, 0); } } From d3a9331a6591e9df64791e076f6591f440af51c3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Christian=20K=C3=B6nig?= Date: Thu, 21 Mar 2024 11:32:02 +0100 Subject: [PATCH 508/606] drm/amdgpu: once more fix the call oder in amdgpu_ttm_move() v2 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This reverts drm/amdgpu: fix ftrace event amdgpu_bo_move always move on same heap. The basic problem here is that after the move the old location is simply not available any more. Some fixes were suggested, but essentially we should call the move notification before actually moving things because only this way we have the correct order for DMA-buf and VM move notifications as well. Also rework the statistic handling so that we don't update the eviction counter before the move. v2: add missing NULL check Signed-off-by: Christian König Fixes: 94aeb4117343 ("drm/amdgpu: fix ftrace event amdgpu_bo_move always move on same heap") Closes: https://gitlab.freedesktop.org/drm/amd/-/issues/3171 Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher CC: stable@vger.kernel.org --- drivers/gpu/drm/amd/amdgpu/amdgpu_object.c | 14 ++++--- drivers/gpu/drm/amd/amdgpu/amdgpu_object.h | 4 +- drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | 48 ++++++++++++---------- 3 files changed, 38 insertions(+), 28 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c index ce733e3cb35d0..f6d503432a9ef 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c @@ -1243,14 +1243,18 @@ int amdgpu_bo_get_metadata(struct amdgpu_bo *bo, void *buffer, * amdgpu_bo_move_notify - notification about a memory move * @bo: pointer to a buffer object * @evict: if this move is evicting the buffer from the graphics address space + * @new_mem: new resource for backing the BO * * Marks the corresponding &amdgpu_bo buffer object as invalid, also performs * bookkeeping. * TTM driver callback which is called when ttm moves a buffer. */ -void amdgpu_bo_move_notify(struct ttm_buffer_object *bo, bool evict) +void amdgpu_bo_move_notify(struct ttm_buffer_object *bo, + bool evict, + struct ttm_resource *new_mem) { struct amdgpu_device *adev = amdgpu_ttm_adev(bo->bdev); + struct ttm_resource *old_mem = bo->resource; struct amdgpu_bo *abo; if (!amdgpu_bo_is_amdgpu_bo(bo)) @@ -1262,12 +1266,12 @@ void amdgpu_bo_move_notify(struct ttm_buffer_object *bo, bool evict) amdgpu_bo_kunmap(abo); if (abo->tbo.base.dma_buf && !abo->tbo.base.import_attach && - bo->resource->mem_type != TTM_PL_SYSTEM) + old_mem && old_mem->mem_type != TTM_PL_SYSTEM) dma_buf_move_notify(abo->tbo.base.dma_buf); - /* remember the eviction */ - if (evict) - atomic64_inc(&adev->num_evictions); + /* move_notify is called before move happens */ + trace_amdgpu_bo_move(abo, new_mem ? new_mem->mem_type : -1, + old_mem ? old_mem->mem_type : -1); } void amdgpu_bo_get_memory(struct amdgpu_bo *bo, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h index fa03d9e4874cc..bc42ccbde659a 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h @@ -328,7 +328,9 @@ int amdgpu_bo_set_metadata (struct amdgpu_bo *bo, void *metadata, int amdgpu_bo_get_metadata(struct amdgpu_bo *bo, void *buffer, size_t buffer_size, uint32_t *metadata_size, uint64_t *flags); -void amdgpu_bo_move_notify(struct ttm_buffer_object *bo, bool evict); +void amdgpu_bo_move_notify(struct ttm_buffer_object *bo, + bool evict, + struct ttm_resource *new_mem); void amdgpu_bo_release_notify(struct ttm_buffer_object *bo); vm_fault_t amdgpu_bo_fault_reserve_notify(struct ttm_buffer_object *bo); void amdgpu_bo_fence(struct amdgpu_bo *bo, struct dma_fence *fence, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c index 1d71729e3f6bc..4ffee55452650 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c @@ -481,14 +481,16 @@ static int amdgpu_bo_move(struct ttm_buffer_object *bo, bool evict, if (!old_mem || (old_mem->mem_type == TTM_PL_SYSTEM && bo->ttm == NULL)) { + amdgpu_bo_move_notify(bo, evict, new_mem); ttm_bo_move_null(bo, new_mem); - goto out; + return 0; } if (old_mem->mem_type == TTM_PL_SYSTEM && (new_mem->mem_type == TTM_PL_TT || new_mem->mem_type == AMDGPU_PL_PREEMPT)) { + amdgpu_bo_move_notify(bo, evict, new_mem); ttm_bo_move_null(bo, new_mem); - goto out; + return 0; } if ((old_mem->mem_type == TTM_PL_TT || old_mem->mem_type == AMDGPU_PL_PREEMPT) && @@ -498,9 +500,10 @@ static int amdgpu_bo_move(struct ttm_buffer_object *bo, bool evict, return r; amdgpu_ttm_backend_unbind(bo->bdev, bo->ttm); + amdgpu_bo_move_notify(bo, evict, new_mem); ttm_resource_free(bo, &bo->resource); ttm_bo_assign_mem(bo, new_mem); - goto out; + return 0; } if (old_mem->mem_type == AMDGPU_PL_GDS || @@ -512,8 +515,9 @@ static int amdgpu_bo_move(struct ttm_buffer_object *bo, bool evict, new_mem->mem_type == AMDGPU_PL_OA || new_mem->mem_type == AMDGPU_PL_DOORBELL) { /* Nothing to save here */ + amdgpu_bo_move_notify(bo, evict, new_mem); ttm_bo_move_null(bo, new_mem); - goto out; + return 0; } if (bo->type == ttm_bo_type_device && @@ -525,22 +529,23 @@ static int amdgpu_bo_move(struct ttm_buffer_object *bo, bool evict, abo->flags &= ~AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED; } - if (adev->mman.buffer_funcs_enabled) { - if (((old_mem->mem_type == TTM_PL_SYSTEM && - new_mem->mem_type == TTM_PL_VRAM) || - (old_mem->mem_type == TTM_PL_VRAM && - new_mem->mem_type == TTM_PL_SYSTEM))) { - hop->fpfn = 0; - hop->lpfn = 0; - hop->mem_type = TTM_PL_TT; - hop->flags = TTM_PL_FLAG_TEMPORARY; - return -EMULTIHOP; - } + if (adev->mman.buffer_funcs_enabled && + ((old_mem->mem_type == TTM_PL_SYSTEM && + new_mem->mem_type == TTM_PL_VRAM) || + (old_mem->mem_type == TTM_PL_VRAM && + new_mem->mem_type == TTM_PL_SYSTEM))) { + hop->fpfn = 0; + hop->lpfn = 0; + hop->mem_type = TTM_PL_TT; + hop->flags = TTM_PL_FLAG_TEMPORARY; + return -EMULTIHOP; + } + amdgpu_bo_move_notify(bo, evict, new_mem); + if (adev->mman.buffer_funcs_enabled) r = amdgpu_move_blit(bo, evict, new_mem, old_mem); - } else { + else r = -ENODEV; - } if (r) { /* Check that all memory is CPU accessible */ @@ -555,11 +560,10 @@ static int amdgpu_bo_move(struct ttm_buffer_object *bo, bool evict, return r; } - trace_amdgpu_bo_move(abo, new_mem->mem_type, old_mem->mem_type); -out: - /* update statistics */ + /* update statistics after the move */ + if (evict) + atomic64_inc(&adev->num_evictions); atomic64_add(bo->base.size, &adev->num_bytes_moved); - amdgpu_bo_move_notify(bo, evict); return 0; } @@ -1559,7 +1563,7 @@ static int amdgpu_ttm_access_memory(struct ttm_buffer_object *bo, static void amdgpu_bo_delete_mem_notify(struct ttm_buffer_object *bo) { - amdgpu_bo_move_notify(bo, false); + amdgpu_bo_move_notify(bo, false, NULL); } static struct ttm_device_funcs amdgpu_bo_driver = { From 387f295cb2150ed164905b648d76dfcbd3621778 Mon Sep 17 00:00:00 2001 From: Vitaly Lifshits Date: Mon, 29 Apr 2024 10:10:40 -0700 Subject: [PATCH 509/606] e1000e: change usleep_range to udelay in PHY mdic access MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This is a partial revert of commit 6dbdd4de0362 ("e1000e: Workaround for sporadic MDI error on Meteor Lake systems"). The referenced commit used usleep_range inside the PHY access routines, which are sometimes called from an atomic context. This can lead to a kernel panic in some scenarios, such as cable disconnection and reconnection on vPro systems. Solve this by changing the usleep_range calls back to udelay. Fixes: 6dbdd4de0362 ("e1000e: Workaround for sporadic MDI error on Meteor Lake systems") Cc: stable@vger.kernel.org Reported-by: Jérôme Carretero Closes: https://bugzilla.kernel.org/show_bug.cgi?id=218740 Closes: https://lore.kernel.org/lkml/a7eb665c74b5efb5140e6979759ed243072cb24a.camel@zougloub.eu/ Co-developed-by: Sasha Neftin Signed-off-by: Sasha Neftin Signed-off-by: Vitaly Lifshits Tested-by: Dima Ruinskiy Signed-off-by: Tony Nguyen Reviewed-by: Simon Horman Link: https://lore.kernel.org/r/20240429171040.1152516-1-anthony.l.nguyen@intel.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/intel/e1000e/phy.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/intel/e1000e/phy.c b/drivers/net/ethernet/intel/e1000e/phy.c index 93544f1cc2a51..f7ae0e0aa4a4f 100644 --- a/drivers/net/ethernet/intel/e1000e/phy.c +++ b/drivers/net/ethernet/intel/e1000e/phy.c @@ -157,7 +157,7 @@ s32 e1000e_read_phy_reg_mdic(struct e1000_hw *hw, u32 offset, u16 *data) * the lower time out */ for (i = 0; i < (E1000_GEN_POLL_TIMEOUT * 3); i++) { - usleep_range(50, 60); + udelay(50); mdic = er32(MDIC); if (mdic & E1000_MDIC_READY) break; @@ -181,7 +181,7 @@ s32 e1000e_read_phy_reg_mdic(struct e1000_hw *hw, u32 offset, u16 *data) * reading duplicate data in the next MDIC transaction. */ if (hw->mac.type == e1000_pch2lan) - usleep_range(100, 150); + udelay(100); if (success) { *data = (u16)mdic; @@ -237,7 +237,7 @@ s32 e1000e_write_phy_reg_mdic(struct e1000_hw *hw, u32 offset, u16 data) * the lower time out */ for (i = 0; i < (E1000_GEN_POLL_TIMEOUT * 3); i++) { - usleep_range(50, 60); + udelay(50); mdic = er32(MDIC); if (mdic & E1000_MDIC_READY) break; @@ -261,7 +261,7 @@ s32 e1000e_write_phy_reg_mdic(struct e1000_hw *hw, u32 offset, u16 data) * reading duplicate data in the next MDIC transaction. */ if (hw->mac.type == e1000_pch2lan) - usleep_range(100, 150); + udelay(100); if (success) return 0; From 0e62103bdcbc88281e16add299a946fb3bd02fbe Mon Sep 17 00:00:00 2001 From: Rodrigo Siqueira Date: Thu, 18 Apr 2024 11:19:03 -0600 Subject: [PATCH 510/606] drm/amd/display: Add VCO speed parameter for DCN31 FPU Add VCO speed parameters in the bounding box array. Acked-by: Wayne Lin Signed-off-by: Rodrigo Siqueira Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/dml/dcn31/dcn31_fpu.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn31/dcn31_fpu.c b/drivers/gpu/drm/amd/display/dc/dml/dcn31/dcn31_fpu.c index deb6d162a2d5c..7307b7b8d8ad7 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dcn31/dcn31_fpu.c +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn31/dcn31_fpu.c @@ -291,6 +291,7 @@ static struct _vcs_dpi_soc_bounding_box_st dcn3_15_soc = { .do_urgent_latency_adjustment = false, .urgent_latency_adjustment_fabric_clock_component_us = 0, .urgent_latency_adjustment_fabric_clock_reference_mhz = 0, + .dispclk_dppclk_vco_speed_mhz = 2400.0, .num_chans = 4, .dummy_pstate_latency_us = 10.0 }; @@ -438,6 +439,7 @@ static struct _vcs_dpi_soc_bounding_box_st dcn3_16_soc = { .do_urgent_latency_adjustment = false, .urgent_latency_adjustment_fabric_clock_component_us = 0, .urgent_latency_adjustment_fabric_clock_reference_mhz = 0, + .dispclk_dppclk_vco_speed_mhz = 2500.0, }; void dcn31_zero_pipe_dcc_fraction(display_e2e_pipe_params_st *pipes, From ce649bd2d834db83ecc2756a362c9a1ec61658a5 Mon Sep 17 00:00:00 2001 From: Leo Ma Date: Thu, 11 Apr 2024 17:17:04 -0400 Subject: [PATCH 511/606] drm/amd/display: Fix DC mode screen flickering on DCN321 [Why && How] Screen flickering saw on 4K@60 eDP with high refresh rate external monitor when booting up in DC mode. DC Mode Capping is disabled which caused wrong UCLK being used. Reviewed-by: Alvin Lee Acked-by: Wayne Lin Signed-off-by: Leo Ma Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- .../amd/display/dc/clk_mgr/dcn32/dcn32_clk_mgr.c | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn32/dcn32_clk_mgr.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn32/dcn32_clk_mgr.c index bec252e1dd27a..e506e4f969ca9 100644 --- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn32/dcn32_clk_mgr.c +++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn32/dcn32_clk_mgr.c @@ -712,8 +712,12 @@ static void dcn32_update_clocks(struct clk_mgr *clk_mgr_base, * since we calculate mode support based on softmax being the max UCLK * frequency. */ - dcn32_smu_set_hard_min_by_freq(clk_mgr, PPCLK_UCLK, - dc->clk_mgr->bw_params->dc_mode_softmax_memclk); + if (dc->debug.disable_dc_mode_overwrite) { + dcn30_smu_set_hard_max_by_freq(clk_mgr, PPCLK_UCLK, dc->clk_mgr->bw_params->max_memclk_mhz); + dcn32_smu_set_hard_min_by_freq(clk_mgr, PPCLK_UCLK, dc->clk_mgr->bw_params->max_memclk_mhz); + } else + dcn32_smu_set_hard_min_by_freq(clk_mgr, PPCLK_UCLK, + dc->clk_mgr->bw_params->dc_mode_softmax_memclk); } else { dcn32_smu_set_hard_min_by_freq(clk_mgr, PPCLK_UCLK, dc->clk_mgr->bw_params->max_memclk_mhz); } @@ -746,8 +750,13 @@ static void dcn32_update_clocks(struct clk_mgr *clk_mgr_base, /* set UCLK to requested value if P-State switching is supported, or to re-enable P-State switching */ if (clk_mgr_base->clks.p_state_change_support && (update_uclk || !clk_mgr_base->clks.prev_p_state_change_support) && - !dc->work_arounds.clock_update_disable_mask.uclk) + !dc->work_arounds.clock_update_disable_mask.uclk) { + if (dc->clk_mgr->dc_mode_softmax_enabled && dc->debug.disable_dc_mode_overwrite) + dcn30_smu_set_hard_max_by_freq(clk_mgr, PPCLK_UCLK, + max((int)dc->clk_mgr->bw_params->dc_mode_softmax_memclk, khz_to_mhz_ceil(clk_mgr_base->clks.dramclk_khz))); + dcn32_smu_set_hard_min_by_freq(clk_mgr, PPCLK_UCLK, khz_to_mhz_ceil(clk_mgr_base->clks.dramclk_khz)); + } if (clk_mgr_base->clks.num_ways != new_clocks->num_ways && clk_mgr_base->clks.num_ways > new_clocks->num_ways) { From fb7a0d334894206ae35f023a82cad5a290fd7386 Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Mon, 29 Apr 2024 20:00:31 +0200 Subject: [PATCH 512/606] mptcp: ensure snd_nxt is properly initialized on connect Christoph reported a splat hinting at a corrupted snd_una: WARNING: CPU: 1 PID: 38 at net/mptcp/protocol.c:1005 __mptcp_clean_una+0x4b3/0x620 net/mptcp/protocol.c:1005 Modules linked in: CPU: 1 PID: 38 Comm: kworker/1:1 Not tainted 6.9.0-rc1-gbbeac67456c9 #59 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.11.0-2.el7 04/01/2014 Workqueue: events mptcp_worker RIP: 0010:__mptcp_clean_una+0x4b3/0x620 net/mptcp/protocol.c:1005 Code: be 06 01 00 00 bf 06 01 00 00 e8 a8 12 e7 fe e9 00 fe ff ff e8 8e 1a e7 fe 0f b7 ab 3e 02 00 00 e9 d3 fd ff ff e8 7d 1a e7 fe <0f> 0b 4c 8b bb e0 05 00 00 e9 74 fc ff ff e8 6a 1a e7 fe 0f 0b e9 RSP: 0018:ffffc9000013fd48 EFLAGS: 00010293 RAX: 0000000000000000 RBX: ffff8881029bd280 RCX: ffffffff82382fe4 RDX: ffff8881003cbd00 RSI: ffffffff823833c3 RDI: 0000000000000001 RBP: 0000000000000000 R08: 0000000000000001 R09: 0000000000000000 R10: 0000000000000000 R11: fefefefefefefeff R12: ffff888138ba8000 R13: 0000000000000106 R14: ffff8881029bd908 R15: ffff888126560000 FS: 0000000000000000(0000) GS:ffff88813bd00000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 00007f604a5dae38 CR3: 0000000101dac002 CR4: 0000000000170ef0 Call Trace: __mptcp_clean_una_wakeup net/mptcp/protocol.c:1055 [inline] mptcp_clean_una_wakeup net/mptcp/protocol.c:1062 [inline] __mptcp_retrans+0x7f/0x7e0 net/mptcp/protocol.c:2615 mptcp_worker+0x434/0x740 net/mptcp/protocol.c:2767 process_one_work+0x1e0/0x560 kernel/workqueue.c:3254 process_scheduled_works kernel/workqueue.c:3335 [inline] worker_thread+0x3c7/0x640 kernel/workqueue.c:3416 kthread+0x121/0x170 kernel/kthread.c:388 ret_from_fork+0x44/0x50 arch/x86/kernel/process.c:147 ret_from_fork_asm+0x1a/0x30 arch/x86/entry/entry_64.S:243 When fallback to TCP happens early on a client socket, snd_nxt is not yet initialized and any incoming ack will copy such value into snd_una. If the mptcp worker (dumbly) tries mptcp-level re-injection after such ack, that would unconditionally trigger a send buffer cleanup using 'bad' snd_una values. We could easily disable re-injection for fallback sockets, but such dumb behavior already helped catching a few subtle issues and a very low to zero impact in practice. Instead address the issue always initializing snd_nxt (and write_seq, for consistency) at connect time. Fixes: 8fd738049ac3 ("mptcp: fallback in case of simultaneous connect") Cc: stable@vger.kernel.org Reported-by: Christoph Paasch Closes: https://github.com/multipath-tcp/mptcp_net-next/issues/485 Tested-by: Christoph Paasch Signed-off-by: Paolo Abeni Reviewed-by: Mat Martineau Signed-off-by: Matthieu Baerts (NGI0) Link: https://lore.kernel.org/r/20240429-upstream-net-20240429-mptcp-snd_nxt-init-connect-v1-1-59ceac0a7dcb@kernel.org Signed-off-by: Jakub Kicinski --- net/mptcp/protocol.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c index 7e74b812e366a..965eb69dc5de3 100644 --- a/net/mptcp/protocol.c +++ b/net/mptcp/protocol.c @@ -3723,6 +3723,9 @@ static int mptcp_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) MPTCP_INC_STATS(sock_net(ssk), MPTCP_MIB_TOKENFALLBACKINIT); mptcp_subflow_early_fallback(msk, subflow); } + + WRITE_ONCE(msk->write_seq, subflow->idsn); + WRITE_ONCE(msk->snd_nxt, subflow->idsn); if (likely(!__mptcp_check_fallback(msk))) MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_MPCAPABLEACTIVE); From 6f0c228ed9184287031a66b46a79e5a3d2e73a86 Mon Sep 17 00:00:00 2001 From: Sung Joon Kim Date: Thu, 18 Apr 2024 16:59:36 -0400 Subject: [PATCH 513/606] drm/amd/display: Disable seamless boot on 128b/132b encoding [why] preOS will not support display mode programming and link training for UHBR rates. [how] If we detect a sink that's UHBR capable, disable seamless boot Reviewed-by: Anthony Koo Acked-by: Wayne Lin Signed-off-by: Sung Joon Kim Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/core/dc.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/gpu/drm/amd/display/dc/core/dc.c b/drivers/gpu/drm/amd/display/dc/core/dc.c index 03b554e912a20..d68c83e40d4d6 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc.c @@ -1801,6 +1801,9 @@ bool dc_validate_boot_timing(const struct dc *dc, return false; } + if (link->dpcd_caps.channel_coding_cap.bits.DP_128b_132b_SUPPORTED) + return false; + if (dc->link_srv->edp_is_ilr_optimization_required(link, crtc_timing)) { DC_LOG_EVENT_LINK_TRAINING("Seamless boot disabled to optimize eDP link rate\n"); return false; From f5b9053398e70a0c10aa9cb4dd5910ab6bc457c5 Mon Sep 17 00:00:00 2001 From: Lancelot SIX Date: Wed, 10 Apr 2024 14:14:13 +0100 Subject: [PATCH 514/606] drm/amdkfd: Flush the process wq before creating a kfd_process There is a race condition when re-creating a kfd_process for a process. This has been observed when a process under the debugger executes exec(3). In this scenario: - The process executes exec. - This will eventually release the process's mm, which will cause the kfd_process object associated with the process to be freed (kfd_process_free_notifier decrements the reference count to the kfd_process to 0). This causes kfd_process_ref_release to enqueue kfd_process_wq_release to the kfd_process_wq. - The debugger receives the PTRACE_EVENT_EXEC notification, and tries to re-enable AMDGPU traps (KFD_IOC_DBG_TRAP_ENABLE). - When handling this request, KFD tries to re-create a kfd_process. This eventually calls kfd_create_process and kobject_init_and_add. At this point the call to kobject_init_and_add can fail because the old kfd_process.kobj has not been freed yet by kfd_process_wq_release. This patch proposes to avoid this race by making sure to drain kfd_process_wq before creating a new kfd_process object. This way, we know that any cleanup task is done executing when we reach kobject_init_and_add. Signed-off-by: Lancelot SIX Reviewed-by: Felix Kuehling Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdkfd/kfd_process.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process.c b/drivers/gpu/drm/amd/amdkfd/kfd_process.c index 58c1fe5421934..451bb058cc620 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_process.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_process.c @@ -829,6 +829,14 @@ struct kfd_process *kfd_create_process(struct task_struct *thread) if (process) { pr_debug("Process already found\n"); } else { + /* If the process just called exec(3), it is possible that the + * cleanup of the kfd_process (following the release of the mm + * of the old process image) is still in the cleanup work queue. + * Make sure to drain any job before trying to recreate any + * resource for this process. + */ + flush_workqueue(kfd_process_wq); + process = create_process(thread); if (IS_ERR(process)) goto out; From 705d0480e6ae5a73ca3a9c04316d0678e19a46ed Mon Sep 17 00:00:00 2001 From: Shashank Sharma Date: Mon, 29 Apr 2024 14:29:47 +0200 Subject: [PATCH 515/606] drm/amdgpu: fix doorbell regression This patch adds a missed handling of PL domain doorbell while handling VRAM faults. Cc: Christian Koenig Cc: Alex Deucher Fixes: a6ff969fe9cb ("drm/amdgpu: fix visible VRAM handling during faults") Reviewed-by: Christian Koenig Signed-off-by: Shashank Sharma Signed-off-by: Arvind Yadav Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c index 4ffee55452650..109fe557a02bc 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c @@ -419,7 +419,7 @@ bool amdgpu_res_cpu_visible(struct amdgpu_device *adev, return false; if (res->mem_type == TTM_PL_SYSTEM || res->mem_type == TTM_PL_TT || - res->mem_type == AMDGPU_PL_PREEMPT) + res->mem_type == AMDGPU_PL_PREEMPT || res->mem_type == AMDGPU_PL_DOORBELL) return true; if (res->mem_type != TTM_PL_VRAM) From 4756fa529b2f12b7cb8f21fe229b0f6f47190829 Mon Sep 17 00:00:00 2001 From: Mans Rullgard Date: Tue, 30 Apr 2024 19:27:05 +0100 Subject: [PATCH 516/606] spi: fix null pointer dereference within spi_sync If spi_sync() is called with the non-empty queue and the same spi_message is then reused, the complete callback for the message remains set while the context is cleared, leading to a null pointer dereference when the callback is invoked from spi_finalize_current_message(). With function inlining disabled, the call stack might look like this: _raw_spin_lock_irqsave from complete_with_flags+0x18/0x58 complete_with_flags from spi_complete+0x8/0xc spi_complete from spi_finalize_current_message+0xec/0x184 spi_finalize_current_message from spi_transfer_one_message+0x2a8/0x474 spi_transfer_one_message from __spi_pump_transfer_message+0x104/0x230 __spi_pump_transfer_message from __spi_transfer_message_noqueue+0x30/0xc4 __spi_transfer_message_noqueue from __spi_sync+0x204/0x248 __spi_sync from spi_sync+0x24/0x3c spi_sync from mcp251xfd_regmap_crc_read+0x124/0x28c [mcp251xfd] mcp251xfd_regmap_crc_read [mcp251xfd] from _regmap_raw_read+0xf8/0x154 _regmap_raw_read from _regmap_bus_read+0x44/0x70 _regmap_bus_read from _regmap_read+0x60/0xd8 _regmap_read from regmap_read+0x3c/0x5c regmap_read from mcp251xfd_alloc_can_err_skb+0x1c/0x54 [mcp251xfd] mcp251xfd_alloc_can_err_skb [mcp251xfd] from mcp251xfd_irq+0x194/0xe70 [mcp251xfd] mcp251xfd_irq [mcp251xfd] from irq_thread_fn+0x1c/0x78 irq_thread_fn from irq_thread+0x118/0x1f4 irq_thread from kthread+0xd8/0xf4 kthread from ret_from_fork+0x14/0x28 Fix this by also setting message->complete to NULL when the transfer is complete. Fixes: ae7d2346dc89 ("spi: Don't use the message queue if possible in spi_sync") Signed-off-by: Mans Rullgard Link: https://lore.kernel.org/r/20240430182705.13019-1-mans@mansr.com Signed-off-by: Mark Brown --- drivers/spi/spi.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/spi/spi.c b/drivers/spi/spi.c index ff75838c1b5df..a2c467d9e92f5 100644 --- a/drivers/spi/spi.c +++ b/drivers/spi/spi.c @@ -4523,6 +4523,7 @@ static int __spi_sync(struct spi_device *spi, struct spi_message *message) wait_for_completion(&done); status = message->status; } + message->complete = NULL; message->context = NULL; return status; From 02b670c1f88e78f42a6c5aee155c7b26960ca054 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Mon, 29 Apr 2024 10:00:51 +0200 Subject: [PATCH 517/606] x86/mm: Remove broken vsyscall emulation code from the page fault code The syzbot-reported stack trace from hell in this discussion thread actually has three nested page faults: https://lore.kernel.org/r/000000000000d5f4fc0616e816d4@google.com ... and I think that's actually the important thing here: - the first page fault is from user space, and triggers the vsyscall emulation. - the second page fault is from __do_sys_gettimeofday(), and that should just have caused the exception that then sets the return value to -EFAULT - the third nested page fault is due to _raw_spin_unlock_irqrestore() -> preempt_schedule() -> trace_sched_switch(), which then causes a BPF trace program to run, which does that bpf_probe_read_compat(), which causes that page fault under pagefault_disable(). It's quite the nasty backtrace, and there's a lot going on. The problem is literally the vsyscall emulation, which sets current->thread.sig_on_uaccess_err = 1; and that causes the fixup_exception() code to send the signal *despite* the exception being caught. And I think that is in fact completely bogus. It's completely bogus exactly because it sends that signal even when it *shouldn't* be sent - like for the BPF user mode trace gathering. In other words, I think the whole "sig_on_uaccess_err" thing is entirely broken, because it makes any nested page-faults do all the wrong things. Now, arguably, I don't think anybody should enable vsyscall emulation any more, but this test case clearly does. I think we should just make the "send SIGSEGV" be something that the vsyscall emulation does on its own, not this broken per-thread state for something that isn't actually per thread. The x86 page fault code actually tried to deal with the "incorrect nesting" by having that: if (in_interrupt()) return; which ignores the sig_on_uaccess_err case when it happens in interrupts, but as shown by this example, these nested page faults do not need to be about interrupts at all. IOW, I think the only right thing is to remove that horrendously broken code. The attached patch looks like the ObviouslyCorrect(tm) thing to do. NOTE! This broken code goes back to this commit in 2011: 4fc3490114bb ("x86-64: Set siginfo and context on vsyscall emulation faults") ... and back then the reason was to get all the siginfo details right. Honestly, I do not for a moment believe that it's worth getting the siginfo details right here, but part of the commit says: This fixes issues with UML when vsyscall=emulate. ... and so my patch to remove this garbage will probably break UML in this situation. I do not believe that anybody should be running with vsyscall=emulate in 2024 in the first place, much less if you are doing things like UML. But let's see if somebody screams. Reported-and-tested-by: syzbot+83e7f982ca045ab4405c@syzkaller.appspotmail.com Signed-off-by: Linus Torvalds Signed-off-by: Ingo Molnar Tested-by: Jiri Olsa Acked-by: Andy Lutomirski Link: https://lore.kernel.org/r/CAHk-=wh9D6f7HUkDgZHKmDCHUQmp+Co89GP+b8+z+G56BKeyNg@mail.gmail.com --- arch/x86/entry/vsyscall/vsyscall_64.c | 28 ++--------------------- arch/x86/include/asm/processor.h | 1 - arch/x86/mm/fault.c | 33 +-------------------------- 3 files changed, 3 insertions(+), 59 deletions(-) diff --git a/arch/x86/entry/vsyscall/vsyscall_64.c b/arch/x86/entry/vsyscall/vsyscall_64.c index a3c0df11d0e6d..2fb7d53cf3338 100644 --- a/arch/x86/entry/vsyscall/vsyscall_64.c +++ b/arch/x86/entry/vsyscall/vsyscall_64.c @@ -98,11 +98,6 @@ static int addr_to_vsyscall_nr(unsigned long addr) static bool write_ok_or_segv(unsigned long ptr, size_t size) { - /* - * XXX: if access_ok, get_user, and put_user handled - * sig_on_uaccess_err, this could go away. - */ - if (!access_ok((void __user *)ptr, size)) { struct thread_struct *thread = ¤t->thread; @@ -120,10 +115,8 @@ static bool write_ok_or_segv(unsigned long ptr, size_t size) bool emulate_vsyscall(unsigned long error_code, struct pt_regs *regs, unsigned long address) { - struct task_struct *tsk; unsigned long caller; int vsyscall_nr, syscall_nr, tmp; - int prev_sig_on_uaccess_err; long ret; unsigned long orig_dx; @@ -172,8 +165,6 @@ bool emulate_vsyscall(unsigned long error_code, goto sigsegv; } - tsk = current; - /* * Check for access_ok violations and find the syscall nr. * @@ -234,12 +225,8 @@ bool emulate_vsyscall(unsigned long error_code, goto do_ret; /* skip requested */ /* - * With a real vsyscall, page faults cause SIGSEGV. We want to - * preserve that behavior to make writing exploits harder. + * With a real vsyscall, page faults cause SIGSEGV. */ - prev_sig_on_uaccess_err = current->thread.sig_on_uaccess_err; - current->thread.sig_on_uaccess_err = 1; - ret = -EFAULT; switch (vsyscall_nr) { case 0: @@ -262,23 +249,12 @@ bool emulate_vsyscall(unsigned long error_code, break; } - current->thread.sig_on_uaccess_err = prev_sig_on_uaccess_err; - check_fault: if (ret == -EFAULT) { /* Bad news -- userspace fed a bad pointer to a vsyscall. */ warn_bad_vsyscall(KERN_INFO, regs, "vsyscall fault (exploit attempt?)"); - - /* - * If we failed to generate a signal for any reason, - * generate one here. (This should be impossible.) - */ - if (WARN_ON_ONCE(!sigismember(&tsk->pending.signal, SIGBUS) && - !sigismember(&tsk->pending.signal, SIGSEGV))) - goto sigsegv; - - return true; /* Don't emulate the ret. */ + goto sigsegv; } regs->ax = ret; diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h index 811548f131f4e..78e51b0d6433d 100644 --- a/arch/x86/include/asm/processor.h +++ b/arch/x86/include/asm/processor.h @@ -472,7 +472,6 @@ struct thread_struct { unsigned long iopl_emul; unsigned int iopl_warn:1; - unsigned int sig_on_uaccess_err:1; /* * Protection Keys Register for Userspace. Loaded immediately on diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c index 622d12ec7f085..bba4e020dd646 100644 --- a/arch/x86/mm/fault.c +++ b/arch/x86/mm/fault.c @@ -723,39 +723,8 @@ kernelmode_fixup_or_oops(struct pt_regs *regs, unsigned long error_code, WARN_ON_ONCE(user_mode(regs)); /* Are we prepared to handle this kernel fault? */ - if (fixup_exception(regs, X86_TRAP_PF, error_code, address)) { - /* - * Any interrupt that takes a fault gets the fixup. This makes - * the below recursive fault logic only apply to a faults from - * task context. - */ - if (in_interrupt()) - return; - - /* - * Per the above we're !in_interrupt(), aka. task context. - * - * In this case we need to make sure we're not recursively - * faulting through the emulate_vsyscall() logic. - */ - if (current->thread.sig_on_uaccess_err && signal) { - sanitize_error_code(address, &error_code); - - set_signal_archinfo(address, error_code); - - if (si_code == SEGV_PKUERR) { - force_sig_pkuerr((void __user *)address, pkey); - } else { - /* XXX: hwpoison faults will set the wrong code. */ - force_sig_fault(signal, si_code, (void __user *)address); - } - } - - /* - * Barring that, we can do the fixup and be happy. - */ + if (fixup_exception(regs, X86_TRAP_PF, error_code, address)) return; - } /* * AMD erratum #91 manifests as a spurious page fault on a PREFETCH From a4499998c7f4dfa15ddba18b266e187cf29b7c76 Mon Sep 17 00:00:00 2001 From: Harald Freudenberger Date: Mon, 25 Mar 2024 09:43:53 +0100 Subject: [PATCH 518/606] s390/zcrypt: Fix wrong format string in debug feature printout Fix wrong format string debug feature: %04x was used to print out a 32 bit value. - changed to %08x. Signed-off-by: Harald Freudenberger Reviewed-by: Ingo Franzki Reviewed-by: Holger Dengler Signed-off-by: Alexander Gordeev --- drivers/s390/crypto/zcrypt_ep11misc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/s390/crypto/zcrypt_ep11misc.c b/drivers/s390/crypto/zcrypt_ep11misc.c index eb7f5489ccf95..a1b55fdf35104 100644 --- a/drivers/s390/crypto/zcrypt_ep11misc.c +++ b/drivers/s390/crypto/zcrypt_ep11misc.c @@ -556,7 +556,7 @@ static int check_reply_pl(const u8 *pl, const char *func) pl += 2; ret = *((u32 *)pl); if (ret != 0) { - ZCRYPT_DBF_ERR("%s return value 0x%04x != 0\n", func, ret); + ZCRYPT_DBF_ERR("%s return value 0x%08x != 0\n", func, ret); return -EIO; } From c0e983b697f45f5e413b00c94037e56b7870cfcd Mon Sep 17 00:00:00 2001 From: Harald Freudenberger Date: Mon, 25 Mar 2024 09:59:19 +0100 Subject: [PATCH 519/606] s390/zcrypt: Handle ep11 cprb return code An EP11 reply cprb contains a field ret_code which may hold an error code different than the error code stored in the payload of the cprb. As of now all the EP11 misc functions do not evaluate this field but focus on the error code in the payload. Before checking the payload error, first the cprb error field should be evaluated which is introduced with this patch. If the return code value 0x000c0003 is seen, this indicates a busy situation which is reflected by -EBUSY in the zcrpyt_ep11misc.c low level function. A higher level caller should consider to retry after waiting a dedicated duration (say 1 second). Fixes: ed6776c96c60 ("s390/crypto: remove retry loop with sleep from PAES pkey invocation") Signed-off-by: Harald Freudenberger Reviewed-by: Ingo Franzki Reviewed-by: Holger Dengler Signed-off-by: Alexander Gordeev --- drivers/s390/crypto/zcrypt_ep11misc.c | 46 +++++++++++++++++++++++++++ 1 file changed, 46 insertions(+) diff --git a/drivers/s390/crypto/zcrypt_ep11misc.c b/drivers/s390/crypto/zcrypt_ep11misc.c index a1b55fdf35104..9bcf8fc69ebe4 100644 --- a/drivers/s390/crypto/zcrypt_ep11misc.c +++ b/drivers/s390/crypto/zcrypt_ep11misc.c @@ -563,6 +563,22 @@ static int check_reply_pl(const u8 *pl, const char *func) return 0; } +/* Check ep11 reply cprb, return 0 or suggested errno value. */ +static int check_reply_cprb(const struct ep11_cprb *rep, const char *func) +{ + /* check ep11 reply return code field */ + if (rep->ret_code) { + ZCRYPT_DBF_ERR("%s ep11 reply ret_code=0x%08x\n", __func__, + rep->ret_code); + if (rep->ret_code == 0x000c0003) + return -EBUSY; + else + return -EIO; + } + + return 0; +} + /* * Helper function which does an ep11 query with given query type. */ @@ -627,6 +643,12 @@ static int ep11_query_info(u16 cardnr, u16 domain, u32 query_type, goto out; } + /* check ep11 reply cprb */ + rc = check_reply_cprb(rep, __func__); + if (rc) + goto out; + + /* check payload */ rc = check_reply_pl((u8 *)rep_pl, __func__); if (rc) goto out; @@ -877,6 +899,12 @@ static int _ep11_genaeskey(u16 card, u16 domain, goto out; } + /* check ep11 reply cprb */ + rc = check_reply_cprb(rep, __func__); + if (rc) + goto out; + + /* check payload */ rc = check_reply_pl((u8 *)rep_pl, __func__); if (rc) goto out; @@ -1028,6 +1056,12 @@ static int ep11_cryptsingle(u16 card, u16 domain, goto out; } + /* check ep11 reply cprb */ + rc = check_reply_cprb(rep, __func__); + if (rc) + goto out; + + /* check payload */ rc = check_reply_pl((u8 *)rep_pl, __func__); if (rc) goto out; @@ -1185,6 +1219,12 @@ static int _ep11_unwrapkey(u16 card, u16 domain, goto out; } + /* check ep11 reply cprb */ + rc = check_reply_cprb(rep, __func__); + if (rc) + goto out; + + /* check payload */ rc = check_reply_pl((u8 *)rep_pl, __func__); if (rc) goto out; @@ -1339,6 +1379,12 @@ static int _ep11_wrapkey(u16 card, u16 domain, goto out; } + /* check ep11 reply cprb */ + rc = check_reply_cprb(rep, __func__); + if (rc) + goto out; + + /* check payload */ rc = check_reply_pl((u8 *)rep_pl, __func__); if (rc) goto out; From da5658320bc962634c36ece6052c5a543493e3cf Mon Sep 17 00:00:00 2001 From: Harald Freudenberger Date: Thu, 25 Apr 2024 16:22:51 +0200 Subject: [PATCH 520/606] s390/zcrypt: Use EBUSY to indicate temp unavailability Use -EBUSY instead of -EAGAIN in zcrypt_ccamisc.c in cases where the CCA card returns 8/2290 to indicate a temporarily unavailability of this function. Fixes: ed6776c96c60 ("s390/crypto: remove retry loop with sleep from PAES pkey invocation") Signed-off-by: Harald Freudenberger Reviewed-by: Ingo Franzki Reviewed-by: Holger Dengler Signed-off-by: Alexander Gordeev --- drivers/s390/crypto/zcrypt_ccamisc.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/s390/crypto/zcrypt_ccamisc.c b/drivers/s390/crypto/zcrypt_ccamisc.c index 0a3a678ffc7ee..6087547328ce9 100644 --- a/drivers/s390/crypto/zcrypt_ccamisc.c +++ b/drivers/s390/crypto/zcrypt_ccamisc.c @@ -658,7 +658,7 @@ int cca_sec2protkey(u16 cardnr, u16 domain, (int)prepcblk->ccp_rtcode, (int)prepcblk->ccp_rscode); if (prepcblk->ccp_rtcode == 8 && prepcblk->ccp_rscode == 2290) - rc = -EAGAIN; + rc = -EBUSY; else rc = -EIO; goto out; @@ -1263,7 +1263,7 @@ int cca_cipher2protkey(u16 cardnr, u16 domain, const u8 *ckey, (int)prepcblk->ccp_rtcode, (int)prepcblk->ccp_rscode); if (prepcblk->ccp_rtcode == 8 && prepcblk->ccp_rscode == 2290) - rc = -EAGAIN; + rc = -EBUSY; else rc = -EIO; goto out; @@ -1426,7 +1426,7 @@ int cca_ecc2protkey(u16 cardnr, u16 domain, const u8 *key, (int)prepcblk->ccp_rtcode, (int)prepcblk->ccp_rscode); if (prepcblk->ccp_rtcode == 8 && prepcblk->ccp_rscode == 2290) - rc = -EAGAIN; + rc = -EBUSY; else rc = -EIO; goto out; From 7bbe449d0bdb68892cc67e9f5f1bfa106a3588d5 Mon Sep 17 00:00:00 2001 From: Harald Freudenberger Date: Thu, 25 Apr 2024 16:29:48 +0200 Subject: [PATCH 521/606] s390/paes: Reestablish retry loop in paes With commit ed6776c96c60 ("s390/crypto: remove retry loop with sleep from PAES pkey invocation") the retry loop to retry derivation of a protected key from a secure key has been removed. This was based on the assumption that theses retries are not needed any more as proper retries are done in the zcrypt layer. However, tests have revealed that there exist some cases with master key change in the HSM and immediately (< 1 second) attempt to derive a protected key from a secure key with exact this HSM may eventually fail. The low level functions in zcrypt_ccamisc.c and zcrypt_ep11misc.c detect and report this temporary failure and report it to the caller as -EBUSY. The re-established retry loop in the paes implementation catches exactly this -EBUSY and eventually may run some retries. Fixes: ed6776c96c60 ("s390/crypto: remove retry loop with sleep from PAES pkey invocation") Signed-off-by: Harald Freudenberger Reviewed-by: Ingo Franzki Reviewed-by: Holger Dengler Signed-off-by: Alexander Gordeev --- arch/s390/crypto/paes_s390.c | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) diff --git a/arch/s390/crypto/paes_s390.c b/arch/s390/crypto/paes_s390.c index 99f7e1f2b70af..99ea3f12c5d2a 100644 --- a/arch/s390/crypto/paes_s390.c +++ b/arch/s390/crypto/paes_s390.c @@ -125,8 +125,19 @@ struct s390_pxts_ctx { static inline int __paes_keyblob2pkey(struct key_blob *kb, struct pkey_protkey *pk) { - return pkey_keyblob2pkey(kb->key, kb->keylen, - pk->protkey, &pk->len, &pk->type); + int i, ret = -EIO; + + /* try three times in case of busy card */ + for (i = 0; ret && i < 3; i++) { + if (ret == -EBUSY && in_task()) { + if (msleep_interruptible(1000)) + return -EINTR; + } + ret = pkey_keyblob2pkey(kb->key, kb->keylen, + pk->protkey, &pk->len, &pk->type); + } + + return ret; } static inline int __paes_convert_key(struct s390_paes_ctx *ctx) From 863fe60ed27f2c85172654a63c5b827e72c8b2e6 Mon Sep 17 00:00:00 2001 From: Nilay Shroff Date: Tue, 16 Apr 2024 13:49:23 +0530 Subject: [PATCH 522/606] nvme: find numa distance only if controller has valid numa id On system where native nvme multipath is configured and iopolicy is set to numa but the nvme controller numa node id is undefined or -1 (NUMA_NO_NODE) then avoid calculating node distance for finding optimal io path. In such case we may access numa distance table with invalid index and that may potentially refer to incorrect memory. So this patch ensures that if the nvme controller numa node id is -1 then instead of calculating node distance for finding optimal io path, we set the numa node distance of such controller to default 10 (LOCAL_DISTANCE). Link: https://lore.kernel.org/all/20240413090614.678353-1-nilay@linux.ibm.com/ Signed-off-by: Nilay Shroff Reviewed-by: Christoph Hellwig Reviewed-by: Sagi Grimberg Reviewed-by: Chaitanya Kulkarni Signed-off-by: Keith Busch --- drivers/nvme/host/multipath.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/nvme/host/multipath.c b/drivers/nvme/host/multipath.c index 5397fb428b242..d16e976ae1a47 100644 --- a/drivers/nvme/host/multipath.c +++ b/drivers/nvme/host/multipath.c @@ -247,7 +247,8 @@ static struct nvme_ns *__nvme_find_path(struct nvme_ns_head *head, int node) if (nvme_path_is_disabled(ns)) continue; - if (READ_ONCE(head->subsys->iopolicy) == NVME_IOPOLICY_NUMA) + if (ns->ctrl->numa_node != NUMA_NO_NODE && + READ_ONCE(head->subsys->iopolicy) == NVME_IOPOLICY_NUMA) distance = node_distance(node, ns->ctrl->numa_node); else distance = LOCAL_DISTANCE; From 46b8f9f74f6d500871985e22eb19560b21f3bc81 Mon Sep 17 00:00:00 2001 From: Maurizio Lombardi Date: Wed, 10 Apr 2024 11:48:41 +0200 Subject: [PATCH 523/606] nvmet-auth: return the error code to the nvmet_auth_host_hash() callers If the nvmet_auth_host_hash() function fails, the error code should be returned to its callers. Signed-off-by: Maurizio Lombardi Reviewed-by: Sagi Grimberg Reviewed-by: Chaitanya Kulkarni Signed-off-by: Keith Busch --- drivers/nvme/target/auth.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/nvme/target/auth.c b/drivers/nvme/target/auth.c index 3ddbc3880cac8..9e51c064b0728 100644 --- a/drivers/nvme/target/auth.c +++ b/drivers/nvme/target/auth.c @@ -370,7 +370,7 @@ int nvmet_auth_host_hash(struct nvmet_req *req, u8 *response, nvme_auth_free_key(transformed_key); out_free_tfm: crypto_free_shash(shash_tfm); - return 0; + return ret; } int nvmet_auth_ctrl_hash(struct nvmet_req *req, u8 *response, From 445f9119e70368ccc964575c2a6d3176966a9d65 Mon Sep 17 00:00:00 2001 From: Maurizio Lombardi Date: Wed, 10 Apr 2024 11:48:42 +0200 Subject: [PATCH 524/606] nvmet-auth: replace pr_debug() with pr_err() to report an error. In nvmet_auth_host_hash(), if a mismatch is detected in the hash length the kernel should print an error. Signed-off-by: Maurizio Lombardi Reviewed-by: Sagi Grimberg Reviewed-by: Chaitanya Kulkarni Signed-off-by: Keith Busch --- drivers/nvme/target/auth.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/nvme/target/auth.c b/drivers/nvme/target/auth.c index 9e51c064b0728..fb518b00f71f6 100644 --- a/drivers/nvme/target/auth.c +++ b/drivers/nvme/target/auth.c @@ -285,9 +285,9 @@ int nvmet_auth_host_hash(struct nvmet_req *req, u8 *response, } if (shash_len != crypto_shash_digestsize(shash_tfm)) { - pr_debug("%s: hash len mismatch (len %d digest %d)\n", - __func__, shash_len, - crypto_shash_digestsize(shash_tfm)); + pr_err("%s: hash len mismatch (len %d digest %d)\n", + __func__, shash_len, + crypto_shash_digestsize(shash_tfm)); ret = -EINVAL; goto out_free_tfm; } From 25bb3534ee21e39eb9301c4edd7182eb83cb0d07 Mon Sep 17 00:00:00 2001 From: Nilay Shroff Date: Thu, 25 Apr 2024 19:33:00 +0530 Subject: [PATCH 525/606] nvme: cancel pending I/O if nvme controller is in terminal state While I/O is running, if the pci bus error occurs then in-flight I/O can not complete. Worst, if at this time, user (logically) hot-unplug the nvme disk then the nvme_remove() code path can't forward progress until in-flight I/O is cancelled. So these sequence of events may potentially hang hot-unplug code path indefinitely. This patch helps cancel the pending/in-flight I/O from the nvme request timeout handler in case the nvme controller is in the terminal (DEAD/DELETING/DELETING_NOIO) state and that helps nvme_remove() code path forward progress and finish successfully. Link: https://lore.kernel.org/all/199be893-5dfa-41e5-b6f2-40ac90ebccc4@linux.ibm.com/ Signed-off-by: Nilay Shroff Reviewed-by: Sagi Grimberg Signed-off-by: Keith Busch --- drivers/nvme/host/core.c | 21 --------------------- drivers/nvme/host/nvme.h | 21 +++++++++++++++++++++ drivers/nvme/host/pci.c | 8 +++++++- 3 files changed, 28 insertions(+), 22 deletions(-) diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index e4bec200ebeb2..095f59e7aa937 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -628,27 +628,6 @@ bool nvme_change_ctrl_state(struct nvme_ctrl *ctrl, } EXPORT_SYMBOL_GPL(nvme_change_ctrl_state); -/* - * Returns true for sink states that can't ever transition back to live. - */ -static bool nvme_state_terminal(struct nvme_ctrl *ctrl) -{ - switch (nvme_ctrl_state(ctrl)) { - case NVME_CTRL_NEW: - case NVME_CTRL_LIVE: - case NVME_CTRL_RESETTING: - case NVME_CTRL_CONNECTING: - return false; - case NVME_CTRL_DELETING: - case NVME_CTRL_DELETING_NOIO: - case NVME_CTRL_DEAD: - return true; - default: - WARN_ONCE(1, "Unhandled ctrl state:%d", ctrl->state); - return true; - } -} - /* * Waits for the controller state to be resetting, or returns false if it is * not possible to ever transition to that state. diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h index d0ed64dc7380e..b564c5f1450c6 100644 --- a/drivers/nvme/host/nvme.h +++ b/drivers/nvme/host/nvme.h @@ -741,6 +741,27 @@ static inline bool nvme_is_aen_req(u16 qid, __u16 command_id) nvme_tag_from_cid(command_id) >= NVME_AQ_BLK_MQ_DEPTH; } +/* + * Returns true for sink states that can't ever transition back to live. + */ +static inline bool nvme_state_terminal(struct nvme_ctrl *ctrl) +{ + switch (nvme_ctrl_state(ctrl)) { + case NVME_CTRL_NEW: + case NVME_CTRL_LIVE: + case NVME_CTRL_RESETTING: + case NVME_CTRL_CONNECTING: + return false; + case NVME_CTRL_DELETING: + case NVME_CTRL_DELETING_NOIO: + case NVME_CTRL_DEAD: + return true; + default: + WARN_ONCE(1, "Unhandled ctrl state:%d", ctrl->state); + return true; + } +} + void nvme_complete_rq(struct request *req); void nvme_complete_batch_req(struct request *req); diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index 8e0bb9692685d..e393f6947ce49 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -1286,6 +1286,9 @@ static enum blk_eh_timer_return nvme_timeout(struct request *req) u32 csts = readl(dev->bar + NVME_REG_CSTS); u8 opcode; + if (nvme_state_terminal(&dev->ctrl)) + goto disable; + /* If PCI error recovery process is happening, we cannot reset or * the recovery mechanism will surely fail. */ @@ -1390,8 +1393,11 @@ static enum blk_eh_timer_return nvme_timeout(struct request *req) return BLK_EH_RESET_TIMER; disable: - if (!nvme_change_ctrl_state(&dev->ctrl, NVME_CTRL_RESETTING)) + if (!nvme_change_ctrl_state(&dev->ctrl, NVME_CTRL_RESETTING)) { + if (nvme_state_terminal(&dev->ctrl)) + nvme_dev_disable(dev, true); return BLK_EH_DONE; + } nvme_dev_disable(dev, false); if (nvme_try_sched_reset(&dev->ctrl)) From 6825bdde44340c5a9121f6d6fa25cc885bd9e821 Mon Sep 17 00:00:00 2001 From: Sagi Grimberg Date: Sun, 28 Apr 2024 11:49:49 +0300 Subject: [PATCH 526/606] nvmet-tcp: fix possible memory leak when tearing down a controller When we teardown the controller, we wait for pending I/Os to complete (sq->ref on all queues to drop to zero) and then we go over the commands, and free their command buffers in case they are still fetching data from the host (e.g. processing nvme writes) and have yet to take a reference on the sq. However, we may miss the case where commands have failed before executing and are queued for sending a response, but will never occur because the queue socket is already down. In this case we may miss deallocating command buffers. Solve this by freeing all commands buffers as nvmet_tcp_free_cmd_buffers is idempotent anyways. Reported-by: Yi Zhang Tested-by: Yi Zhang Signed-off-by: Sagi Grimberg Reviewed-by: Christoph Hellwig Signed-off-by: Keith Busch --- drivers/nvme/target/tcp.c | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) diff --git a/drivers/nvme/target/tcp.c b/drivers/nvme/target/tcp.c index a5422e2c979ad..380f22ee3ebba 100644 --- a/drivers/nvme/target/tcp.c +++ b/drivers/nvme/target/tcp.c @@ -348,6 +348,7 @@ static int nvmet_tcp_check_ddgst(struct nvmet_tcp_queue *queue, void *pdu) return 0; } +/* If cmd buffers are NULL, no operation is performed */ static void nvmet_tcp_free_cmd_buffers(struct nvmet_tcp_cmd *cmd) { kfree(cmd->iov); @@ -1581,13 +1582,9 @@ static void nvmet_tcp_free_cmd_data_in_buffers(struct nvmet_tcp_queue *queue) struct nvmet_tcp_cmd *cmd = queue->cmds; int i; - for (i = 0; i < queue->nr_cmds; i++, cmd++) { - if (nvmet_tcp_need_data_in(cmd)) - nvmet_tcp_free_cmd_buffers(cmd); - } - - if (!queue->nr_cmds && nvmet_tcp_need_data_in(&queue->connect)) - nvmet_tcp_free_cmd_buffers(&queue->connect); + for (i = 0; i < queue->nr_cmds; i++, cmd++) + nvmet_tcp_free_cmd_buffers(cmd); + nvmet_tcp_free_cmd_buffers(&queue->connect); } static void nvmet_tcp_release_queue_work(struct work_struct *w) From 505363957fad35f7aed9a2b0d8dad73451a80fb5 Mon Sep 17 00:00:00 2001 From: Sagi Grimberg Date: Sun, 28 Apr 2024 12:25:40 +0300 Subject: [PATCH 527/606] nvmet: fix nvme status code when namespace is disabled If the user disabled a nvmet namespace, it is removed from the subsystem namespaces list. When nvmet processes a command directed to an nsid that was disabled, it cannot differentiate between a nsid that is disabled vs. a non-existent namespace, and resorts to return NVME_SC_INVALID_NS with the dnr bit set. This translates to a non-retryable status for the host, which translates to a user error. We should expect disabled namespaces to not cause an I/O error in a multipath environment. Address this by searching a configfs item for the namespace nvmet failed to find, and if we found one, conclude that the namespace is disabled (perhaps temporarily). Return NVME_SC_INTERNAL_PATH_ERROR in this case and keep DNR bit cleared. Reported-by: Jirong Feng Tested-by: Jirong Feng Signed-off-by: Sagi Grimberg Reviewed-by: Christoph Hellwig Signed-off-by: Keith Busch --- drivers/nvme/target/configfs.c | 13 +++++++++++++ drivers/nvme/target/core.c | 5 ++++- drivers/nvme/target/nvmet.h | 1 + 3 files changed, 18 insertions(+), 1 deletion(-) diff --git a/drivers/nvme/target/configfs.c b/drivers/nvme/target/configfs.c index a2325330bf221..89a001101a7fc 100644 --- a/drivers/nvme/target/configfs.c +++ b/drivers/nvme/target/configfs.c @@ -754,6 +754,19 @@ static struct configfs_attribute *nvmet_ns_attrs[] = { NULL, }; +bool nvmet_subsys_nsid_exists(struct nvmet_subsys *subsys, u32 nsid) +{ + struct config_item *ns_item; + char name[4] = {}; + + if (sprintf(name, "%u", nsid) <= 0) + return false; + mutex_lock(&subsys->namespaces_group.cg_subsys->su_mutex); + ns_item = config_group_find_item(&subsys->namespaces_group, name); + mutex_unlock(&subsys->namespaces_group.cg_subsys->su_mutex); + return ns_item != NULL; +} + static void nvmet_ns_release(struct config_item *item) { struct nvmet_ns *ns = to_nvmet_ns(item); diff --git a/drivers/nvme/target/core.c b/drivers/nvme/target/core.c index 8860a3eb71ec8..e06013c5dace9 100644 --- a/drivers/nvme/target/core.c +++ b/drivers/nvme/target/core.c @@ -437,10 +437,13 @@ void nvmet_stop_keep_alive_timer(struct nvmet_ctrl *ctrl) u16 nvmet_req_find_ns(struct nvmet_req *req) { u32 nsid = le32_to_cpu(req->cmd->common.nsid); + struct nvmet_subsys *subsys = nvmet_req_subsys(req); - req->ns = xa_load(&nvmet_req_subsys(req)->namespaces, nsid); + req->ns = xa_load(&subsys->namespaces, nsid); if (unlikely(!req->ns)) { req->error_loc = offsetof(struct nvme_common_command, nsid); + if (nvmet_subsys_nsid_exists(subsys, nsid)) + return NVME_SC_INTERNAL_PATH_ERROR; return NVME_SC_INVALID_NS | NVME_SC_DNR; } diff --git a/drivers/nvme/target/nvmet.h b/drivers/nvme/target/nvmet.h index f460728e1df1f..c1306de1f4ddf 100644 --- a/drivers/nvme/target/nvmet.h +++ b/drivers/nvme/target/nvmet.h @@ -543,6 +543,7 @@ void nvmet_subsys_disc_changed(struct nvmet_subsys *subsys, struct nvmet_host *host); void nvmet_add_async_event(struct nvmet_ctrl *ctrl, u8 event_type, u8 event_info, u8 log_page); +bool nvmet_subsys_nsid_exists(struct nvmet_subsys *subsys, u32 nsid); #define NVMET_MIN_QUEUE_SIZE 16 #define NVMET_MAX_QUEUE_SIZE 1024 From 50abcc179e0c9ca667feb223b26ea406d5c4c556 Mon Sep 17 00:00:00 2001 From: Hannes Reinecke Date: Thu, 18 Apr 2024 12:39:45 +0200 Subject: [PATCH 528/606] nvme-tcp: strict pdu pacing to avoid send stalls on TLS TLS requires a strict pdu pacing via MSG_EOR to signal the end of a record and subsequent encryption. If we do not set MSG_EOR at the end of a sequence the record won't be closed, encryption doesn't start, and we end up with a send stall as the message will never be passed on to the TCP layer. So do not check for the queue status when TLS is enabled but rather make the MSG_MORE setting dependent on the current request only. Signed-off-by: Hannes Reinecke Reviewed-by: Sagi Grimberg Signed-off-by: Keith Busch --- drivers/nvme/host/tcp.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/drivers/nvme/host/tcp.c b/drivers/nvme/host/tcp.c index fdbcdcedcee99..28bc2f373cfa0 100644 --- a/drivers/nvme/host/tcp.c +++ b/drivers/nvme/host/tcp.c @@ -360,12 +360,18 @@ static inline void nvme_tcp_send_all(struct nvme_tcp_queue *queue) } while (ret > 0); } -static inline bool nvme_tcp_queue_more(struct nvme_tcp_queue *queue) +static inline bool nvme_tcp_queue_has_pending(struct nvme_tcp_queue *queue) { return !list_empty(&queue->send_list) || !llist_empty(&queue->req_list); } +static inline bool nvme_tcp_queue_more(struct nvme_tcp_queue *queue) +{ + return !nvme_tcp_tls(&queue->ctrl->ctrl) && + nvme_tcp_queue_has_pending(queue); +} + static inline void nvme_tcp_queue_request(struct nvme_tcp_request *req, bool sync, bool last) { @@ -386,7 +392,7 @@ static inline void nvme_tcp_queue_request(struct nvme_tcp_request *req, mutex_unlock(&queue->send_mutex); } - if (last && nvme_tcp_queue_more(queue)) + if (last && nvme_tcp_queue_has_pending(queue)) queue_work_on(queue->io_cpu, nvme_tcp_wq, &queue->io_work); } From 59c878cbcdd80ed39315573b3511d0acfd3501b5 Mon Sep 17 00:00:00 2001 From: Felix Fietkau Date: Sat, 27 Apr 2024 20:24:18 +0200 Subject: [PATCH 529/606] net: bridge: fix multicast-to-unicast with fraglist GSO Calling skb_copy on a SKB_GSO_FRAGLIST skb is not valid, since it returns an invalid linearized skb. This code only needs to change the ethernet header, so pskb_copy is the right function to call here. Fixes: 6db6f0eae605 ("bridge: multicast to unicast") Signed-off-by: Felix Fietkau Acked-by: Paolo Abeni Acked-by: Nikolay Aleksandrov Signed-off-by: David S. Miller --- net/bridge/br_forward.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/bridge/br_forward.c b/net/bridge/br_forward.c index 7431f89e897b9..d7c35f55bd69f 100644 --- a/net/bridge/br_forward.c +++ b/net/bridge/br_forward.c @@ -266,7 +266,7 @@ static void maybe_deliver_addr(struct net_bridge_port *p, struct sk_buff *skb, if (skb->dev == p->dev && ether_addr_equal(src, addr)) return; - skb = skb_copy(skb, GFP_ATOMIC); + skb = pskb_copy(skb, GFP_ATOMIC); if (!skb) { DEV_STATS_INC(dev, tx_dropped); return; From d091e579b864fa790dd6a0cd537a22c383126681 Mon Sep 17 00:00:00 2001 From: Felix Fietkau Date: Sat, 27 Apr 2024 20:24:19 +0200 Subject: [PATCH 530/606] net: core: reject skb_copy(_expand) for fraglist GSO skbs SKB_GSO_FRAGLIST skbs must not be linearized, otherwise they become invalid. Return NULL if such an skb is passed to skb_copy or skb_copy_expand, in order to prevent a crash on a potential later call to skb_gso_segment. Fixes: 3a1296a38d0c ("net: Support GRO/GSO fraglist chaining.") Signed-off-by: Felix Fietkau Signed-off-by: David S. Miller --- net/core/skbuff.c | 27 +++++++++++++++++++-------- 1 file changed, 19 insertions(+), 8 deletions(-) diff --git a/net/core/skbuff.c b/net/core/skbuff.c index b99127712e670..4096e679f61c7 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -2123,11 +2123,17 @@ static inline int skb_alloc_rx_flag(const struct sk_buff *skb) struct sk_buff *skb_copy(const struct sk_buff *skb, gfp_t gfp_mask) { - int headerlen = skb_headroom(skb); - unsigned int size = skb_end_offset(skb) + skb->data_len; - struct sk_buff *n = __alloc_skb(size, gfp_mask, - skb_alloc_rx_flag(skb), NUMA_NO_NODE); + struct sk_buff *n; + unsigned int size; + int headerlen; + + if (WARN_ON_ONCE(skb_shinfo(skb)->gso_type & SKB_GSO_FRAGLIST)) + return NULL; + headerlen = skb_headroom(skb); + size = skb_end_offset(skb) + skb->data_len; + n = __alloc_skb(size, gfp_mask, + skb_alloc_rx_flag(skb), NUMA_NO_NODE); if (!n) return NULL; @@ -2455,12 +2461,17 @@ struct sk_buff *skb_copy_expand(const struct sk_buff *skb, /* * Allocate the copy buffer */ - struct sk_buff *n = __alloc_skb(newheadroom + skb->len + newtailroom, - gfp_mask, skb_alloc_rx_flag(skb), - NUMA_NO_NODE); - int oldheadroom = skb_headroom(skb); int head_copy_len, head_copy_off; + struct sk_buff *n; + int oldheadroom; + + if (WARN_ON_ONCE(skb_shinfo(skb)->gso_type & SKB_GSO_FRAGLIST)) + return NULL; + oldheadroom = skb_headroom(skb); + n = __alloc_skb(newheadroom + skb->len + newtailroom, + gfp_mask, skb_alloc_rx_flag(skb), + NUMA_NO_NODE); if (!n) return NULL; From ab72d5945d8190f3b9ae16dafcf67655b458bf0e Mon Sep 17 00:00:00 2001 From: Mario Limonciello Date: Tue, 30 Apr 2024 09:53:23 -0500 Subject: [PATCH 531/606] drm/amd/display: Disable panel replay by default for now Panel replay was enabled by default in commit 5950efe25ee0 ("drm/amd/display: Enable Panel Replay for static screen use case"), but it isn't working properly at least on some BOE and AUO panels. Instead of being static the screen is solid black when active. As it's a new feature that was just introduced that regressed VRR disable it for now so that problem can be properly root caused. Cc: Tom Chung Closes: https://gitlab.freedesktop.org/drm/amd/-/issues/3344 Fixes: 5950efe25ee0 ("drm/amd/display: Enable Panel Replay for static screen use case") Signed-off-by: Mario Limonciello Acked-by: Harry Wentland Signed-off-by: Alex Deucher --- .../gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 21 +++++++++++-------- 1 file changed, 12 insertions(+), 9 deletions(-) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index f3f94d109726d..d6e71aa808d88 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -4537,15 +4537,18 @@ static int amdgpu_dm_initialize_drm_device(struct amdgpu_device *adev) /* Determine whether to enable Replay support by default. */ if (!(amdgpu_dc_debug_mask & DC_DISABLE_REPLAY)) { switch (amdgpu_ip_version(adev, DCE_HWIP, 0)) { - case IP_VERSION(3, 1, 4): - case IP_VERSION(3, 1, 5): - case IP_VERSION(3, 1, 6): - case IP_VERSION(3, 2, 0): - case IP_VERSION(3, 2, 1): - case IP_VERSION(3, 5, 0): - case IP_VERSION(3, 5, 1): - replay_feature_enabled = true; - break; +/* + * Disabled by default due to https://gitlab.freedesktop.org/drm/amd/-/issues/3344 + * case IP_VERSION(3, 1, 4): + * case IP_VERSION(3, 1, 5): + * case IP_VERSION(3, 1, 6): + * case IP_VERSION(3, 2, 0): + * case IP_VERSION(3, 2, 1): + * case IP_VERSION(3, 5, 0): + * case IP_VERSION(3, 5, 1): + * replay_feature_enabled = true; + * break; + */ default: replay_feature_enabled = amdgpu_dc_feature_mask & DC_REPLAY_MASK; break; From 98957360563e7ffdc0c2b3a314655eff8bc1cb5a Mon Sep 17 00:00:00 2001 From: Matthew Auld Date: Tue, 23 Apr 2024 08:47:23 +0100 Subject: [PATCH 532/606] drm/xe/vm: prevent UAF in rebind_work_func() We flush the rebind worker during the vm close phase, however in places like preempt_fence_work_func() we seem to queue the rebind worker without first checking if the vm has already been closed. The concern here is the vm being closed with the worker flushed, but then being rearmed later, which looks like potential uaf, since there is no actual refcounting to track the queued worker. We can't take the vm->lock here in preempt_rebind_work_func() to first check if the vm is closed since that will deadlock, so instead flush the worker again when the vm refcount reaches zero. v2: - Grabbing vm->lock in the preempt worker creates a deadlock, so checking the closed state is tricky. Instead flush the worker when the refcount reaches zero. It should be impossible to queue the preempt worker without already holding vm ref. Fixes: dd08ebf6c352 ("drm/xe: Introduce a new DRM driver for Intel GPUs") Closes: https://gitlab.freedesktop.org/drm/xe/kernel/-/issues/1676 Closes: https://gitlab.freedesktop.org/drm/xe/kernel/-/issues/1591 Closes: https://gitlab.freedesktop.org/drm/xe/kernel/-/issues/1364 Closes: https://gitlab.freedesktop.org/drm/xe/kernel/-/issues/1304 Closes: https://gitlab.freedesktop.org/drm/xe/kernel/-/issues/1249 Signed-off-by: Matthew Auld Cc: Matthew Brost Cc: # v6.8+ Reviewed-by: Matthew Brost Link: https://patchwork.freedesktop.org/patch/msgid/20240423074721.119633-4-matthew.auld@intel.com (cherry picked from commit 3d44d67c441a9fe6f81a1d705f7de009a32a5b35) Signed-off-by: Lucas De Marchi --- drivers/gpu/drm/xe/xe_vm.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c index 3d4c8f342e215..32cd0c978aa28 100644 --- a/drivers/gpu/drm/xe/xe_vm.c +++ b/drivers/gpu/drm/xe/xe_vm.c @@ -1606,6 +1606,9 @@ static void vm_destroy_work_func(struct work_struct *w) /* xe_vm_close_and_put was not called? */ xe_assert(xe, !vm->size); + if (xe_vm_in_preempt_fence_mode(vm)) + flush_work(&vm->preempt.rebind_work); + mutex_destroy(&vm->snap_mutex); if (!(vm->flags & XE_VM_FLAG_MIGRATION)) From 8953285d7bd63c12b007432a9b4587fa2fad49fb Mon Sep 17 00:00:00 2001 From: Jeffrey Altman Date: Fri, 19 Apr 2024 13:30:57 -0300 Subject: [PATCH 533/606] rxrpc: Clients must accept conn from any address The find connection logic of Transarc's Rx was modified in the mid-1990s to support multi-homed servers which might send a response packet from an address other than the destination address in the received packet. The rules for accepting a packet by an Rx initiator (RX_CLIENT_CONNECTION) were altered to permit acceptance of a packet from any address provided that the port number was unchanged and all of the connection identifiers matched (Epoch, CID, SecurityClass, ...). This change applies the same rules to the Linux implementation which makes it consistent with IBM AFS 3.6, Arla, OpenAFS and AuriStorFS. Fixes: 17926a79320a ("[AF_RXRPC]: Provide secure RxRPC sockets for use by userspace and kernel both") Signed-off-by: Jeffrey Altman Acked-by: David Howells Signed-off-by: Marc Dionne Link: https://lore.kernel.org/r/20240419163057.4141728-1-marc.dionne@auristor.com Signed-off-by: Jakub Kicinski --- net/rxrpc/conn_object.c | 9 ++------- 1 file changed, 2 insertions(+), 7 deletions(-) diff --git a/net/rxrpc/conn_object.c b/net/rxrpc/conn_object.c index 0af4642aeec4b..1539d315afe74 100644 --- a/net/rxrpc/conn_object.c +++ b/net/rxrpc/conn_object.c @@ -119,18 +119,13 @@ struct rxrpc_connection *rxrpc_find_client_connection_rcu(struct rxrpc_local *lo switch (srx->transport.family) { case AF_INET: if (peer->srx.transport.sin.sin_port != - srx->transport.sin.sin_port || - peer->srx.transport.sin.sin_addr.s_addr != - srx->transport.sin.sin_addr.s_addr) + srx->transport.sin.sin_port) goto not_found; break; #ifdef CONFIG_AF_RXRPC_IPV6 case AF_INET6: if (peer->srx.transport.sin6.sin6_port != - srx->transport.sin6.sin6_port || - memcmp(&peer->srx.transport.sin6.sin6_addr, - &srx->transport.sin6.sin6_addr, - sizeof(struct in6_addr)) != 0) + srx->transport.sin6.sin6_port) goto not_found; break; #endif From 080cbb890286cd794f1ee788bbc5463e2deb7c2b Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Tue, 30 Apr 2024 15:53:37 +0200 Subject: [PATCH 534/606] tipc: fix UAF in error path Sam Page (sam4k) working with Trend Micro Zero Day Initiative reported a UAF in the tipc_buf_append() error path: BUG: KASAN: slab-use-after-free in kfree_skb_list_reason+0x47e/0x4c0 linux/net/core/skbuff.c:1183 Read of size 8 at addr ffff88804d2a7c80 by task poc/8034 CPU: 1 PID: 8034 Comm: poc Not tainted 6.8.2 #1 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.16.0-debian-1.16.0-5 04/01/2014 Call Trace: __dump_stack linux/lib/dump_stack.c:88 dump_stack_lvl+0xd9/0x1b0 linux/lib/dump_stack.c:106 print_address_description linux/mm/kasan/report.c:377 print_report+0xc4/0x620 linux/mm/kasan/report.c:488 kasan_report+0xda/0x110 linux/mm/kasan/report.c:601 kfree_skb_list_reason+0x47e/0x4c0 linux/net/core/skbuff.c:1183 skb_release_data+0x5af/0x880 linux/net/core/skbuff.c:1026 skb_release_all linux/net/core/skbuff.c:1094 __kfree_skb linux/net/core/skbuff.c:1108 kfree_skb_reason+0x12d/0x210 linux/net/core/skbuff.c:1144 kfree_skb linux/./include/linux/skbuff.h:1244 tipc_buf_append+0x425/0xb50 linux/net/tipc/msg.c:186 tipc_link_input+0x224/0x7c0 linux/net/tipc/link.c:1324 tipc_link_rcv+0x76e/0x2d70 linux/net/tipc/link.c:1824 tipc_rcv+0x45f/0x10f0 linux/net/tipc/node.c:2159 tipc_udp_recv+0x73b/0x8f0 linux/net/tipc/udp_media.c:390 udp_queue_rcv_one_skb+0xad2/0x1850 linux/net/ipv4/udp.c:2108 udp_queue_rcv_skb+0x131/0xb00 linux/net/ipv4/udp.c:2186 udp_unicast_rcv_skb+0x165/0x3b0 linux/net/ipv4/udp.c:2346 __udp4_lib_rcv+0x2594/0x3400 linux/net/ipv4/udp.c:2422 ip_protocol_deliver_rcu+0x30c/0x4e0 linux/net/ipv4/ip_input.c:205 ip_local_deliver_finish+0x2e4/0x520 linux/net/ipv4/ip_input.c:233 NF_HOOK linux/./include/linux/netfilter.h:314 NF_HOOK linux/./include/linux/netfilter.h:308 ip_local_deliver+0x18e/0x1f0 linux/net/ipv4/ip_input.c:254 dst_input linux/./include/net/dst.h:461 ip_rcv_finish linux/net/ipv4/ip_input.c:449 NF_HOOK linux/./include/linux/netfilter.h:314 NF_HOOK linux/./include/linux/netfilter.h:308 ip_rcv+0x2c5/0x5d0 linux/net/ipv4/ip_input.c:569 __netif_receive_skb_one_core+0x199/0x1e0 linux/net/core/dev.c:5534 __netif_receive_skb+0x1f/0x1c0 linux/net/core/dev.c:5648 process_backlog+0x101/0x6b0 linux/net/core/dev.c:5976 __napi_poll.constprop.0+0xba/0x550 linux/net/core/dev.c:6576 napi_poll linux/net/core/dev.c:6645 net_rx_action+0x95a/0xe90 linux/net/core/dev.c:6781 __do_softirq+0x21f/0x8e7 linux/kernel/softirq.c:553 do_softirq linux/kernel/softirq.c:454 do_softirq+0xb2/0xf0 linux/kernel/softirq.c:441 __local_bh_enable_ip+0x100/0x120 linux/kernel/softirq.c:381 local_bh_enable linux/./include/linux/bottom_half.h:33 rcu_read_unlock_bh linux/./include/linux/rcupdate.h:851 __dev_queue_xmit+0x871/0x3ee0 linux/net/core/dev.c:4378 dev_queue_xmit linux/./include/linux/netdevice.h:3169 neigh_hh_output linux/./include/net/neighbour.h:526 neigh_output linux/./include/net/neighbour.h:540 ip_finish_output2+0x169f/0x2550 linux/net/ipv4/ip_output.c:235 __ip_finish_output linux/net/ipv4/ip_output.c:313 __ip_finish_output+0x49e/0x950 linux/net/ipv4/ip_output.c:295 ip_finish_output+0x31/0x310 linux/net/ipv4/ip_output.c:323 NF_HOOK_COND linux/./include/linux/netfilter.h:303 ip_output+0x13b/0x2a0 linux/net/ipv4/ip_output.c:433 dst_output linux/./include/net/dst.h:451 ip_local_out linux/net/ipv4/ip_output.c:129 ip_send_skb+0x3e5/0x560 linux/net/ipv4/ip_output.c:1492 udp_send_skb+0x73f/0x1530 linux/net/ipv4/udp.c:963 udp_sendmsg+0x1a36/0x2b40 linux/net/ipv4/udp.c:1250 inet_sendmsg+0x105/0x140 linux/net/ipv4/af_inet.c:850 sock_sendmsg_nosec linux/net/socket.c:730 __sock_sendmsg linux/net/socket.c:745 __sys_sendto+0x42c/0x4e0 linux/net/socket.c:2191 __do_sys_sendto linux/net/socket.c:2203 __se_sys_sendto linux/net/socket.c:2199 __x64_sys_sendto+0xe0/0x1c0 linux/net/socket.c:2199 do_syscall_x64 linux/arch/x86/entry/common.c:52 do_syscall_64+0xd8/0x270 linux/arch/x86/entry/common.c:83 entry_SYSCALL_64_after_hwframe+0x6f/0x77 linux/arch/x86/entry/entry_64.S:120 RIP: 0033:0x7f3434974f29 Code: 00 c3 66 2e 0f 1f 84 00 00 00 00 00 0f 1f 44 00 00 48 89 f8 48 89 f7 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 f0 ff ff 73 01 c3 48 8b 0d 37 8f 0d 00 f7 d8 64 89 01 48 RSP: 002b:00007fff9154f2b8 EFLAGS: 00000212 ORIG_RAX: 000000000000002c RAX: ffffffffffffffda RBX: 0000000000000000 RCX: 00007f3434974f29 RDX: 00000000000032c8 RSI: 00007fff9154f300 RDI: 0000000000000003 RBP: 00007fff915532e0 R08: 00007fff91553360 R09: 0000000000000010 R10: 0000000000000000 R11: 0000000000000212 R12: 000055ed86d261d0 R13: 0000000000000000 R14: 0000000000000000 R15: 0000000000000000 In the critical scenario, either the relevant skb is freed or its ownership is transferred into a frag_lists. In both cases, the cleanup code must not free it again: we need to clear the skb reference earlier. Fixes: 1149557d64c9 ("tipc: eliminate unnecessary linearization of incoming buffers") Cc: stable@vger.kernel.org Reported-by: zdi-disclosures@trendmicro.com # ZDI-CAN-23852 Acked-by: Xin Long Signed-off-by: Paolo Abeni Reviewed-by: Eric Dumazet Link: https://lore.kernel.org/r/752f1ccf762223d109845365d07f55414058e5a3.1714484273.git.pabeni@redhat.com Signed-off-by: Jakub Kicinski --- net/tipc/msg.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/net/tipc/msg.c b/net/tipc/msg.c index 5c9fd4791c4ba..9a6e9bcbf6940 100644 --- a/net/tipc/msg.c +++ b/net/tipc/msg.c @@ -156,6 +156,11 @@ int tipc_buf_append(struct sk_buff **headbuf, struct sk_buff **buf) if (!head) goto err; + /* Either the input skb ownership is transferred to headskb + * or the input skb is freed, clear the reference to avoid + * bad access on error path. + */ + *buf = NULL; if (skb_try_coalesce(head, frag, &headstolen, &delta)) { kfree_skb_partial(frag, headstolen); } else { @@ -179,7 +184,6 @@ int tipc_buf_append(struct sk_buff **headbuf, struct sk_buff **buf) *headbuf = NULL; return 1; } - *buf = NULL; return 0; err: kfree_skb(*buf); From 97bf6f81b29a8efaf5d0983251a7450e5794370d Mon Sep 17 00:00:00 2001 From: Xin Long Date: Tue, 30 Apr 2024 10:03:38 -0400 Subject: [PATCH 535/606] tipc: fix a possible memleak in tipc_buf_append __skb_linearize() doesn't free the skb when it fails, so move '*buf = NULL' after __skb_linearize(), so that the skb can be freed on the err path. Fixes: b7df21cf1b79 ("tipc: skb_linearize the head skb when reassembling msgs") Reported-by: Paolo Abeni Signed-off-by: Xin Long Reviewed-by: Simon Horman Reviewed-by: Tung Nguyen Link: https://lore.kernel.org/r/90710748c29a1521efac4f75ea01b3b7e61414cf.1714485818.git.lucien.xin@gmail.com Signed-off-by: Jakub Kicinski --- net/tipc/msg.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/tipc/msg.c b/net/tipc/msg.c index 9a6e9bcbf6940..76284fc538ebd 100644 --- a/net/tipc/msg.c +++ b/net/tipc/msg.c @@ -142,9 +142,9 @@ int tipc_buf_append(struct sk_buff **headbuf, struct sk_buff **buf) if (fragid == FIRST_FRAGMENT) { if (unlikely(head)) goto err; - *buf = NULL; if (skb_has_frag_list(frag) && __skb_linearize(frag)) goto err; + *buf = NULL; frag = skb_unshare(frag, GFP_ATOMIC); if (unlikely(!frag)) goto err; From f7789419137b18e3847d0cc41afd788c3c00663d Mon Sep 17 00:00:00 2001 From: Guillaume Nault Date: Tue, 30 Apr 2024 18:50:13 +0200 Subject: [PATCH 536/606] vxlan: Pull inner IP header in vxlan_rcv(). Ensure the inner IP header is part of skb's linear data before reading its ECN bits. Otherwise we might read garbage. One symptom is the system erroneously logging errors like "vxlan: non-ECT from xxx.xxx.xxx.xxx with TOS=xxxx". Similar bugs have been fixed in geneve, ip_tunnel and ip6_tunnel (see commit 1ca1ba465e55 ("geneve: make sure to pull inner header in geneve_rx()") for example). So let's reuse the same code structure for consistency. Maybe we'll can add a common helper in the future. Fixes: d342894c5d2f ("vxlan: virtual extensible lan") Signed-off-by: Guillaume Nault Reviewed-by: Ido Schimmel Reviewed-by: Eric Dumazet Reviewed-by: Nikolay Aleksandrov Reviewed-by: Sabrina Dubroca Link: https://lore.kernel.org/r/1239c8db54efec341dd6455c77e0380f58923a3c.1714495737.git.gnault@redhat.com Signed-off-by: Jakub Kicinski --- drivers/net/vxlan/vxlan_core.c | 19 ++++++++++++++++++- 1 file changed, 18 insertions(+), 1 deletion(-) diff --git a/drivers/net/vxlan/vxlan_core.c b/drivers/net/vxlan/vxlan_core.c index c9e4e03ad214f..3a9148fb1422b 100644 --- a/drivers/net/vxlan/vxlan_core.c +++ b/drivers/net/vxlan/vxlan_core.c @@ -1674,6 +1674,7 @@ static int vxlan_rcv(struct sock *sk, struct sk_buff *skb) bool raw_proto = false; void *oiph; __be32 vni = 0; + int nh; /* Need UDP and VXLAN header to be present */ if (!pskb_may_pull(skb, VXLAN_HLEN)) @@ -1762,9 +1763,25 @@ static int vxlan_rcv(struct sock *sk, struct sk_buff *skb) skb->pkt_type = PACKET_HOST; } - oiph = skb_network_header(skb); + /* Save offset of outer header relative to skb->head, + * because we are going to reset the network header to the inner header + * and might change skb->head. + */ + nh = skb_network_header(skb) - skb->head; + skb_reset_network_header(skb); + if (!pskb_inet_may_pull(skb)) { + DEV_STATS_INC(vxlan->dev, rx_length_errors); + DEV_STATS_INC(vxlan->dev, rx_errors); + vxlan_vnifilter_count(vxlan, vni, vninode, + VXLAN_VNI_STATS_RX_ERRORS, 0); + goto drop; + } + + /* Get the outer header. */ + oiph = skb->head + nh; + if (!vxlan_ecn_decapsulate(vs, oiph, skb)) { DEV_STATS_INC(vxlan->dev, rx_frame_errors); DEV_STATS_INC(vxlan->dev, rx_errors); From fdb3f29dfe0d51bdb8e7b3a6d876ea8339d44df8 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Thu, 2 May 2024 08:24:42 +0200 Subject: [PATCH 537/606] ALSA: hda/realtek: Fix build error without CONFIG_PM The alc_spec.power_hook is defined only with CONFIG_PM, and the recent fix overlooked it, resulting in a build error without CONFIG_PM. Fix it with the simple ifdef and set __maybe_unused for the function. We may drop the whole CONFIG_PM dependency there, but it should be done in a separate cleanup patch later. Fixes: 1e707769df07 ("ALSA: hda/realtek - Set GPIO3 to default at S4 state for Thinkpad with ALC1318") Reported-by: kernel test robot Closes: https://lore.kernel.org/oe-kbuild-all/202405012104.Dr7h318W-lkp@intel.com/ Message-ID: <20240502062442.30545-1-tiwai@suse.de> Signed-off-by: Takashi Iwai --- sound/pci/hda/patch_realtek.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index e704425788ebf..b29739bd330b1 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -7204,7 +7204,7 @@ static void alc287_alc1318_playback_pcm_hook(struct hda_pcm_stream *hinfo, } } -static void alc287_s4_power_gpio3_default(struct hda_codec *codec) +static void __maybe_unused alc287_s4_power_gpio3_default(struct hda_codec *codec) { if (is_s4_suspend(codec)) { alc_write_coef_idx(codec, 0x10, 0x8806); /* Change MLK to GPIO3 */ @@ -7219,7 +7219,9 @@ static void alc287_fixup_lenovo_thinkpad_with_alc1318(struct hda_codec *codec, if (action != HDA_FIXUP_ACT_PRE_PROBE) return; +#ifdef CONFIG_PM spec->power_hook = alc287_s4_power_gpio3_default; +#endif spec->gen.pcm_playback_hook = alc287_alc1318_playback_pcm_hook; } From 8a2e4d37afb8500b276e5ee903dee06f50ab0494 Mon Sep 17 00:00:00 2001 From: Alexandra Winter Date: Tue, 30 Apr 2024 11:10:04 +0200 Subject: [PATCH 538/606] s390/qeth: Fix kernel panic after setting hsuid Symptom: When the hsuid attribute is set for the first time on an IQD Layer3 device while the corresponding network interface is already UP, the kernel will try to execute a napi function pointer that is NULL. Example: --------------------------------------------------------------------------- [ 2057.572696] illegal operation: 0001 ilc:1 [#1] SMP [ 2057.572702] Modules linked in: af_iucv qeth_l3 zfcp scsi_transport_fc sunrpc nft_fib_inet nft_fib_ipv4 nft_fib_ipv6 nft_fib nft_reject_inet nf_reject_ipv4 nf_reject_ipv6 nft_reject nft_ct nf_tables_set nft_chain_nat nf_nat nf_conntrack nf_defrag_ipv6 nf_defrag_ipv4 ip_set nf_tables libcrc32c nfnetlink ghash_s390 prng xts aes_s390 des_s390 de s_generic sha3_512_s390 sha3_256_s390 sha512_s390 vfio_ccw vfio_mdev mdev vfio_iommu_type1 eadm_sch vfio ext4 mbcache jbd2 qeth_l2 bridge stp llc dasd_eckd_mod qeth dasd_mod qdio ccwgroup pkey zcrypt [ 2057.572739] CPU: 6 PID: 60182 Comm: stress_client Kdump: loaded Not tainted 4.18.0-541.el8.s390x #1 [ 2057.572742] Hardware name: IBM 3931 A01 704 (LPAR) [ 2057.572744] Krnl PSW : 0704f00180000000 0000000000000002 (0x2) [ 2057.572748] R:0 T:1 IO:1 EX:1 Key:0 M:1 W:0 P:0 AS:3 CC:3 PM:0 RI:0 EA:3 [ 2057.572751] Krnl GPRS: 0000000000000004 0000000000000000 00000000a3b008d8 0000000000000000 [ 2057.572754] 00000000a3b008d8 cb923a29c779abc5 0000000000000000 00000000814cfd80 [ 2057.572756] 000000000000012c 0000000000000000 00000000a3b008d8 00000000a3b008d8 [ 2057.572758] 00000000bab6d500 00000000814cfd80 0000000091317e46 00000000814cfc68 [ 2057.572762] Krnl Code:#0000000000000000: 0000 illegal >0000000000000002: 0000 illegal 0000000000000004: 0000 illegal 0000000000000006: 0000 illegal 0000000000000008: 0000 illegal 000000000000000a: 0000 illegal 000000000000000c: 0000 illegal 000000000000000e: 0000 illegal [ 2057.572800] Call Trace: [ 2057.572801] ([<00000000ec639700>] 0xec639700) [ 2057.572803] [<00000000913183e2>] net_rx_action+0x2ba/0x398 [ 2057.572809] [<0000000091515f76>] __do_softirq+0x11e/0x3a0 [ 2057.572813] [<0000000090ce160c>] do_softirq_own_stack+0x3c/0x58 [ 2057.572817] ([<0000000090d2cbd6>] do_softirq.part.1+0x56/0x60) [ 2057.572822] [<0000000090d2cc60>] __local_bh_enable_ip+0x80/0x98 [ 2057.572825] [<0000000091314706>] __dev_queue_xmit+0x2be/0xd70 [ 2057.572827] [<000003ff803dd6d6>] afiucv_hs_send+0x24e/0x300 [af_iucv] [ 2057.572830] [<000003ff803dd88a>] iucv_send_ctrl+0x102/0x138 [af_iucv] [ 2057.572833] [<000003ff803de72a>] iucv_sock_connect+0x37a/0x468 [af_iucv] [ 2057.572835] [<00000000912e7e90>] __sys_connect+0xa0/0xd8 [ 2057.572839] [<00000000912e9580>] sys_socketcall+0x228/0x348 [ 2057.572841] [<0000000091514e1a>] system_call+0x2a6/0x2c8 [ 2057.572843] Last Breaking-Event-Address: [ 2057.572844] [<0000000091317e44>] __napi_poll+0x4c/0x1d8 [ 2057.572846] [ 2057.572847] Kernel panic - not syncing: Fatal exception in interrupt ------------------------------------------------------------------------------------------- Analysis: There is one napi structure per out_q: card->qdio.out_qs[i].napi The napi.poll functions are set during qeth_open(). Since commit 1cfef80d4c2b ("s390/qeth: Don't call dev_close/dev_open (DOWN/UP)") qeth_set_offline()/qeth_set_online() no longer call dev_close()/ dev_open(). So if qeth_free_qdio_queues() cleared card->qdio.out_qs[i].napi.poll while the network interface was UP and the card was offline, they are not set again. Reproduction: chzdev -e $devno layer2=0 ip link set dev $network_interface up echo 0 > /sys/bus/ccwgroup/devices/0.0.$devno/online echo foo > /sys/bus/ccwgroup/devices/0.0.$devno/hsuid echo 1 > /sys/bus/ccwgroup/devices/0.0.$devno/online -> Crash (can be enforced e.g. by af_iucv connect(), ip link down/up, ...) Note that a Completion Queue (CQ) is only enabled or disabled, when hsuid is set for the first time or when it is removed. Workarounds: - Set hsuid before setting the device online for the first time or - Use chzdev -d $devno; chzdev $devno hsuid=xxx; chzdev -e $devno; to set hsuid on an existing device. (this will remove and recreate the network interface) Fix: There is no need to free the output queues when a completion queue is added or removed. card->qdio.state now indicates whether the inbound buffer pool and the outbound queues are allocated. card->qdio.c_q indicates whether a CQ is allocated. Fixes: 1cfef80d4c2b ("s390/qeth: Don't call dev_close/dev_open (DOWN/UP)") Signed-off-by: Alexandra Winter Reviewed-by: Simon Horman Link: https://lore.kernel.org/r/20240430091004.2265683-1-wintera@linux.ibm.com Signed-off-by: Paolo Abeni --- drivers/s390/net/qeth_core_main.c | 61 ++++++++++++++----------------- 1 file changed, 27 insertions(+), 34 deletions(-) diff --git a/drivers/s390/net/qeth_core_main.c b/drivers/s390/net/qeth_core_main.c index f0b8b709649f2..a3adaec5504e4 100644 --- a/drivers/s390/net/qeth_core_main.c +++ b/drivers/s390/net/qeth_core_main.c @@ -364,30 +364,33 @@ static int qeth_cq_init(struct qeth_card *card) return rc; } +static void qeth_free_cq(struct qeth_card *card) +{ + if (card->qdio.c_q) { + qeth_free_qdio_queue(card->qdio.c_q); + card->qdio.c_q = NULL; + } +} + static int qeth_alloc_cq(struct qeth_card *card) { if (card->options.cq == QETH_CQ_ENABLED) { QETH_CARD_TEXT(card, 2, "cqon"); - card->qdio.c_q = qeth_alloc_qdio_queue(); if (!card->qdio.c_q) { - dev_err(&card->gdev->dev, "Failed to create completion queue\n"); - return -ENOMEM; + card->qdio.c_q = qeth_alloc_qdio_queue(); + if (!card->qdio.c_q) { + dev_err(&card->gdev->dev, + "Failed to create completion queue\n"); + return -ENOMEM; + } } } else { QETH_CARD_TEXT(card, 2, "nocq"); - card->qdio.c_q = NULL; + qeth_free_cq(card); } return 0; } -static void qeth_free_cq(struct qeth_card *card) -{ - if (card->qdio.c_q) { - qeth_free_qdio_queue(card->qdio.c_q); - card->qdio.c_q = NULL; - } -} - static enum iucv_tx_notify qeth_compute_cq_notification(int sbalf15, int delayed) { @@ -2628,6 +2631,10 @@ static int qeth_alloc_qdio_queues(struct qeth_card *card) QETH_CARD_TEXT(card, 2, "allcqdbf"); + /* completion */ + if (qeth_alloc_cq(card)) + goto out_err; + if (atomic_cmpxchg(&card->qdio.state, QETH_QDIO_UNINITIALIZED, QETH_QDIO_ALLOCATED) != QETH_QDIO_UNINITIALIZED) return 0; @@ -2663,10 +2670,6 @@ static int qeth_alloc_qdio_queues(struct qeth_card *card) queue->priority = QETH_QIB_PQUE_PRIO_DEFAULT; } - /* completion */ - if (qeth_alloc_cq(card)) - goto out_freeoutq; - return 0; out_freeoutq: @@ -2677,6 +2680,8 @@ static int qeth_alloc_qdio_queues(struct qeth_card *card) qeth_free_buffer_pool(card); out_buffer_pool: atomic_set(&card->qdio.state, QETH_QDIO_UNINITIALIZED); + qeth_free_cq(card); +out_err: return -ENOMEM; } @@ -2684,11 +2689,12 @@ static void qeth_free_qdio_queues(struct qeth_card *card) { int i, j; + qeth_free_cq(card); + if (atomic_xchg(&card->qdio.state, QETH_QDIO_UNINITIALIZED) == QETH_QDIO_UNINITIALIZED) return; - qeth_free_cq(card); for (j = 0; j < QDIO_MAX_BUFFERS_PER_Q; ++j) { if (card->qdio.in_q->bufs[j].rx_skb) { consume_skb(card->qdio.in_q->bufs[j].rx_skb); @@ -3742,24 +3748,11 @@ static void qeth_qdio_poll(struct ccw_device *cdev, unsigned long card_ptr) int qeth_configure_cq(struct qeth_card *card, enum qeth_cq cq) { - int rc; - - if (card->options.cq == QETH_CQ_NOTAVAILABLE) { - rc = -1; - goto out; - } else { - if (card->options.cq == cq) { - rc = 0; - goto out; - } - - qeth_free_qdio_queues(card); - card->options.cq = cq; - rc = 0; - } -out: - return rc; + if (card->options.cq == QETH_CQ_NOTAVAILABLE) + return -1; + card->options.cq = cq; + return 0; } EXPORT_SYMBOL_GPL(qeth_configure_cq); From d43cd48ef1791801c61a54fade4a88d294dedf77 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Thu, 25 Apr 2024 17:26:17 +0300 Subject: [PATCH 539/606] drm/panel: ili9341: Correct use of device property APIs It seems driver missed the point of proper use of device property APIs. Correct this by updating headers and calls respectively. Fixes: 5a04227326b0 ("drm/panel: Add ilitek ili9341 panel driver") Signed-off-by: Andy Shevchenko Reviewed-by: Dmitry Baryshkov Reviewed-by: Neil Armstrong Link: https://lore.kernel.org/r/20240425142706.2440113-2-andriy.shevchenko@linux.intel.com Signed-off-by: Neil Armstrong Link: https://patchwork.freedesktop.org/patch/msgid/20240425142706.2440113-2-andriy.shevchenko@linux.intel.com --- drivers/gpu/drm/panel/Kconfig | 2 +- drivers/gpu/drm/panel/panel-ilitek-ili9341.c | 5 +++-- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/panel/Kconfig b/drivers/gpu/drm/panel/Kconfig index d037b3b8b9993..5b15d02948367 100644 --- a/drivers/gpu/drm/panel/Kconfig +++ b/drivers/gpu/drm/panel/Kconfig @@ -177,7 +177,7 @@ config DRM_PANEL_ILITEK_IL9322 config DRM_PANEL_ILITEK_ILI9341 tristate "Ilitek ILI9341 240x320 QVGA panels" - depends on OF && SPI + depends on SPI select DRM_KMS_HELPER select DRM_GEM_DMA_HELPER depends on BACKLIGHT_CLASS_DEVICE diff --git a/drivers/gpu/drm/panel/panel-ilitek-ili9341.c b/drivers/gpu/drm/panel/panel-ilitek-ili9341.c index 3574681891e81..7584ddb0e4416 100644 --- a/drivers/gpu/drm/panel/panel-ilitek-ili9341.c +++ b/drivers/gpu/drm/panel/panel-ilitek-ili9341.c @@ -22,8 +22,9 @@ #include #include #include +#include #include -#include +#include #include #include @@ -691,7 +692,7 @@ static int ili9341_dpi_probe(struct spi_device *spi, struct gpio_desc *dc, * Every new incarnation of this display must have a unique * data entry for the system in this driver. */ - ili->conf = of_device_get_match_data(dev); + ili->conf = device_get_match_data(dev); if (!ili->conf) { dev_err(dev, "missing device configuration\n"); return -ENODEV; From 740fc1e0509be3f7e2207e89125b06119ed62943 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Thu, 25 Apr 2024 17:26:18 +0300 Subject: [PATCH 540/606] drm/panel: ili9341: Respect deferred probe GPIO controller might not be available when driver is being probed. There are plenty of reasons why, one of which is deferred probe. Since GPIOs are optional, return any error code we got to the upper layer, including deferred probe. With that in mind, use dev_err_probe() in order to avoid spamming the logs. Fixes: 5a04227326b0 ("drm/panel: Add ilitek ili9341 panel driver") Signed-off-by: Andy Shevchenko Reviewed-by: Dmitry Baryshkov Reviewed-by: Neil Armstrong Reviewed-by: Sui Jingfeng Link: https://lore.kernel.org/r/20240425142706.2440113-3-andriy.shevchenko@linux.intel.com Signed-off-by: Neil Armstrong Link: https://patchwork.freedesktop.org/patch/msgid/20240425142706.2440113-3-andriy.shevchenko@linux.intel.com --- drivers/gpu/drm/panel/panel-ilitek-ili9341.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/panel/panel-ilitek-ili9341.c b/drivers/gpu/drm/panel/panel-ilitek-ili9341.c index 7584ddb0e4416..24c74c56e5648 100644 --- a/drivers/gpu/drm/panel/panel-ilitek-ili9341.c +++ b/drivers/gpu/drm/panel/panel-ilitek-ili9341.c @@ -715,11 +715,11 @@ static int ili9341_probe(struct spi_device *spi) reset = devm_gpiod_get_optional(dev, "reset", GPIOD_OUT_HIGH); if (IS_ERR(reset)) - dev_err(dev, "Failed to get gpio 'reset'\n"); + return dev_err_probe(dev, PTR_ERR(reset), "Failed to get gpio 'reset'\n"); dc = devm_gpiod_get_optional(dev, "dc", GPIOD_OUT_LOW); if (IS_ERR(dc)) - dev_err(dev, "Failed to get gpio 'dc'\n"); + return dev_err_probe(dev, PTR_ERR(dc), "Failed to get gpio 'dc'\n"); if (!strcmp(id->name, "sf-tc240t-9370-t")) return ili9341_dpi_probe(spi, dc, reset); From da85f0aaa9f21999753b01d45c0343f885a8f905 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Thu, 25 Apr 2024 17:26:19 +0300 Subject: [PATCH 541/606] drm/panel: ili9341: Use predefined error codes In one case the -1 is returned which is quite confusing code for the wrong device ID, in another the ret is returning instead of plain 0 that also confusing as readed may ask the possible meaning of positive codes, which are never the case there. Convert both to use explicit predefined error codes to make it clear what's going on there. Fixes: 5a04227326b0 ("drm/panel: Add ilitek ili9341 panel driver") Signed-off-by: Andy Shevchenko Reviewed-by: Neil Armstrong Reviewed-by: Sui Jingfeng Link: https://lore.kernel.org/r/20240425142706.2440113-4-andriy.shevchenko@linux.intel.com Signed-off-by: Neil Armstrong Link: https://patchwork.freedesktop.org/patch/msgid/20240425142706.2440113-4-andriy.shevchenko@linux.intel.com --- drivers/gpu/drm/panel/panel-ilitek-ili9341.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/panel/panel-ilitek-ili9341.c b/drivers/gpu/drm/panel/panel-ilitek-ili9341.c index 24c74c56e5648..b933380b7eb78 100644 --- a/drivers/gpu/drm/panel/panel-ilitek-ili9341.c +++ b/drivers/gpu/drm/panel/panel-ilitek-ili9341.c @@ -422,7 +422,7 @@ static int ili9341_dpi_prepare(struct drm_panel *panel) ili9341_dpi_init(ili); - return ret; + return 0; } static int ili9341_dpi_enable(struct drm_panel *panel) @@ -726,7 +726,7 @@ static int ili9341_probe(struct spi_device *spi) else if (!strcmp(id->name, "yx240qv29")) return ili9341_dbi_probe(spi, dc, reset); - return -1; + return -ENODEV; } static void ili9341_remove(struct spi_device *spi) From fc1092f51567277509563800a3c56732070b6aa4 Mon Sep 17 00:00:00 2001 From: Shigeru Yoshida Date: Tue, 30 Apr 2024 21:39:45 +0900 Subject: [PATCH 542/606] ipv4: Fix uninit-value access in __ip_make_skb() KMSAN reported uninit-value access in __ip_make_skb() [1]. __ip_make_skb() tests HDRINCL to know if the skb has icmphdr. However, HDRINCL can cause a race condition. If calling setsockopt(2) with IP_HDRINCL changes HDRINCL while __ip_make_skb() is running, the function will access icmphdr in the skb even if it is not included. This causes the issue reported by KMSAN. Check FLOWI_FLAG_KNOWN_NH on fl4->flowi4_flags instead of testing HDRINCL on the socket. Also, fl4->fl4_icmp_type and fl4->fl4_icmp_code are not initialized. These are union in struct flowi4 and are implicitly initialized by flowi4_init_output(), but we should not rely on specific union layout. Initialize these explicitly in raw_sendmsg(). [1] BUG: KMSAN: uninit-value in __ip_make_skb+0x2b74/0x2d20 net/ipv4/ip_output.c:1481 __ip_make_skb+0x2b74/0x2d20 net/ipv4/ip_output.c:1481 ip_finish_skb include/net/ip.h:243 [inline] ip_push_pending_frames+0x4c/0x5c0 net/ipv4/ip_output.c:1508 raw_sendmsg+0x2381/0x2690 net/ipv4/raw.c:654 inet_sendmsg+0x27b/0x2a0 net/ipv4/af_inet.c:851 sock_sendmsg_nosec net/socket.c:730 [inline] __sock_sendmsg+0x274/0x3c0 net/socket.c:745 __sys_sendto+0x62c/0x7b0 net/socket.c:2191 __do_sys_sendto net/socket.c:2203 [inline] __se_sys_sendto net/socket.c:2199 [inline] __x64_sys_sendto+0x130/0x200 net/socket.c:2199 do_syscall_64+0xd8/0x1f0 arch/x86/entry/common.c:83 entry_SYSCALL_64_after_hwframe+0x6d/0x75 Uninit was created at: slab_post_alloc_hook mm/slub.c:3804 [inline] slab_alloc_node mm/slub.c:3845 [inline] kmem_cache_alloc_node+0x5f6/0xc50 mm/slub.c:3888 kmalloc_reserve+0x13c/0x4a0 net/core/skbuff.c:577 __alloc_skb+0x35a/0x7c0 net/core/skbuff.c:668 alloc_skb include/linux/skbuff.h:1318 [inline] __ip_append_data+0x49ab/0x68c0 net/ipv4/ip_output.c:1128 ip_append_data+0x1e7/0x260 net/ipv4/ip_output.c:1365 raw_sendmsg+0x22b1/0x2690 net/ipv4/raw.c:648 inet_sendmsg+0x27b/0x2a0 net/ipv4/af_inet.c:851 sock_sendmsg_nosec net/socket.c:730 [inline] __sock_sendmsg+0x274/0x3c0 net/socket.c:745 __sys_sendto+0x62c/0x7b0 net/socket.c:2191 __do_sys_sendto net/socket.c:2203 [inline] __se_sys_sendto net/socket.c:2199 [inline] __x64_sys_sendto+0x130/0x200 net/socket.c:2199 do_syscall_64+0xd8/0x1f0 arch/x86/entry/common.c:83 entry_SYSCALL_64_after_hwframe+0x6d/0x75 CPU: 1 PID: 15709 Comm: syz-executor.7 Not tainted 6.8.0-11567-gb3603fcb79b1 #25 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.16.3-1.fc39 04/01/2014 Fixes: 99e5acae193e ("ipv4: Fix potential uninit variable access bug in __ip_make_skb()") Reported-by: syzkaller Signed-off-by: Shigeru Yoshida Link: https://lore.kernel.org/r/20240430123945.2057348-1-syoshida@redhat.com Signed-off-by: Paolo Abeni --- net/ipv4/ip_output.c | 2 +- net/ipv4/raw.c | 3 +++ 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c index 1fe794967211e..39229fd0601a1 100644 --- a/net/ipv4/ip_output.c +++ b/net/ipv4/ip_output.c @@ -1473,7 +1473,7 @@ struct sk_buff *__ip_make_skb(struct sock *sk, * by icmp_hdr(skb)->type. */ if (sk->sk_type == SOCK_RAW && - !inet_test_bit(HDRINCL, sk)) + !(fl4->flowi4_flags & FLOWI_FLAG_KNOWN_NH)) icmp_type = fl4->fl4_icmp_type; else icmp_type = icmp_hdr(skb)->type; diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c index dcb11f22cbf2b..4cb43401e0e06 100644 --- a/net/ipv4/raw.c +++ b/net/ipv4/raw.c @@ -612,6 +612,9 @@ static int raw_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) (hdrincl ? FLOWI_FLAG_KNOWN_NH : 0), daddr, saddr, 0, 0, sk->sk_uid); + fl4.fl4_icmp_type = 0; + fl4.fl4_icmp_code = 0; + if (!hdrincl) { rfv.msg = msg; rfv.hlen = 0; From 5ef31ea5d053a8f493a772ebad3f3ce82c35d845 Mon Sep 17 00:00:00 2001 From: Richard Gobert Date: Tue, 30 Apr 2024 16:35:54 +0200 Subject: [PATCH 543/606] net: gro: fix udp bad offset in socket lookup by adding {inner_}network_offset to napi_gro_cb Commits a602456 ("udp: Add GRO functions to UDP socket") and 57c67ff ("udp: additional GRO support") introduce incorrect usage of {ip,ipv6}_hdr in the complete phase of gro. The functions always return skb->network_header, which in the case of encapsulated packets at the gro complete phase, is always set to the innermost L3 of the packet. That means that calling {ip,ipv6}_hdr for skbs which completed the GRO receive phase (both in gro_list and *_gro_complete) when parsing an encapsulated packet's _outer_ L3/L4 may return an unexpected value. This incorrect usage leads to a bug in GRO's UDP socket lookup. udp{4,6}_lib_lookup_skb functions use ip_hdr/ipv6_hdr respectively. These *_hdr functions return network_header which will point to the innermost L3, resulting in the wrong offset being used in __udp{4,6}_lib_lookup with encapsulated packets. This patch adds network_offset and inner_network_offset to napi_gro_cb, and makes sure both are set correctly. To fix the issue, network_offsets union is used inside napi_gro_cb, in which both the outer and the inner network offsets are saved. Reproduction example: Endpoint configuration example (fou + local address bind) # ip fou add port 6666 ipproto 4 # ip link add name tun1 type ipip remote 2.2.2.1 local 2.2.2.2 encap fou encap-dport 5555 encap-sport 6666 mode ipip # ip link set tun1 up # ip a add 1.1.1.2/24 dev tun1 Netperf TCP_STREAM result on net-next before patch is applied: net-next main, GRO enabled: $ netperf -H 1.1.1.2 -t TCP_STREAM -l 5 Recv Send Send Socket Socket Message Elapsed Size Size Size Time Throughput bytes bytes bytes secs. 10^6bits/sec 131072 16384 16384 5.28 2.37 net-next main, GRO disabled: $ netperf -H 1.1.1.2 -t TCP_STREAM -l 5 Recv Send Send Socket Socket Message Elapsed Size Size Size Time Throughput bytes bytes bytes secs. 10^6bits/sec 131072 16384 16384 5.01 2745.06 patch applied, GRO enabled: $ netperf -H 1.1.1.2 -t TCP_STREAM -l 5 Recv Send Send Socket Socket Message Elapsed Size Size Size Time Throughput bytes bytes bytes secs. 10^6bits/sec 131072 16384 16384 5.01 2877.38 Fixes: a6024562ffd7 ("udp: Add GRO functions to UDP socket") Signed-off-by: Richard Gobert Reviewed-by: Eric Dumazet Reviewed-by: Willem de Bruijn Signed-off-by: Paolo Abeni --- include/net/gro.h | 9 +++++++++ net/8021q/vlan_core.c | 2 ++ net/core/gro.c | 1 + net/ipv4/af_inet.c | 1 + net/ipv4/udp.c | 3 ++- net/ipv4/udp_offload.c | 3 ++- net/ipv6/ip6_offload.c | 1 + net/ipv6/udp.c | 3 ++- net/ipv6/udp_offload.c | 3 ++- 9 files changed, 22 insertions(+), 4 deletions(-) diff --git a/include/net/gro.h b/include/net/gro.h index 50f1e403dbbb3..c1d4ca0463a17 100644 --- a/include/net/gro.h +++ b/include/net/gro.h @@ -87,6 +87,15 @@ struct napi_gro_cb { /* used to support CHECKSUM_COMPLETE for tunneling protocols */ __wsum csum; + + /* L3 offsets */ + union { + struct { + u16 network_offset; + u16 inner_network_offset; + }; + u16 network_offsets[2]; + }; }; #define NAPI_GRO_CB(skb) ((struct napi_gro_cb *)(skb)->cb) diff --git a/net/8021q/vlan_core.c b/net/8021q/vlan_core.c index f001582345052..9404dd551dfd2 100644 --- a/net/8021q/vlan_core.c +++ b/net/8021q/vlan_core.c @@ -478,6 +478,8 @@ static struct sk_buff *vlan_gro_receive(struct list_head *head, if (unlikely(!vhdr)) goto out; + NAPI_GRO_CB(skb)->network_offsets[NAPI_GRO_CB(skb)->encap_mark] = hlen; + type = vhdr->h_vlan_encapsulated_proto; ptype = gro_find_receive_by_type(type); diff --git a/net/core/gro.c b/net/core/gro.c index 83f35d99a682c..c7901253a1a8f 100644 --- a/net/core/gro.c +++ b/net/core/gro.c @@ -371,6 +371,7 @@ static inline void skb_gro_reset_offset(struct sk_buff *skb, u32 nhoff) const skb_frag_t *frag0; unsigned int headlen; + NAPI_GRO_CB(skb)->network_offset = 0; NAPI_GRO_CB(skb)->data_offset = 0; headlen = skb_headlen(skb); NAPI_GRO_CB(skb)->frag0 = skb->data; diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index 55bd72997b310..fafb123f798be 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c @@ -1572,6 +1572,7 @@ struct sk_buff *inet_gro_receive(struct list_head *head, struct sk_buff *skb) /* The above will be needed by the transport layer if there is one * immediately following this IP hdr. */ + NAPI_GRO_CB(skb)->inner_network_offset = off; /* Note : No need to call skb_gro_postpull_rcsum() here, * as we already checked checksum over ipv4 header was 0 diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index 420905be5f30c..b32cf2eeeb41d 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -532,7 +532,8 @@ static inline struct sock *__udp4_lib_lookup_skb(struct sk_buff *skb, struct sock *udp4_lib_lookup_skb(const struct sk_buff *skb, __be16 sport, __be16 dport) { - const struct iphdr *iph = ip_hdr(skb); + const u16 offset = NAPI_GRO_CB(skb)->network_offsets[skb->encapsulation]; + const struct iphdr *iph = (struct iphdr *)(skb->data + offset); struct net *net = dev_net(skb->dev); int iif, sdif; diff --git a/net/ipv4/udp_offload.c b/net/ipv4/udp_offload.c index 3498dd1d0694d..fd29d21d579c1 100644 --- a/net/ipv4/udp_offload.c +++ b/net/ipv4/udp_offload.c @@ -718,7 +718,8 @@ EXPORT_SYMBOL(udp_gro_complete); INDIRECT_CALLABLE_SCOPE int udp4_gro_complete(struct sk_buff *skb, int nhoff) { - const struct iphdr *iph = ip_hdr(skb); + const u16 offset = NAPI_GRO_CB(skb)->network_offsets[skb->encapsulation]; + const struct iphdr *iph = (struct iphdr *)(skb->data + offset); struct udphdr *uh = (struct udphdr *)(skb->data + nhoff); /* do fraglist only if there is no outer UDP encap (or we already processed it) */ diff --git a/net/ipv6/ip6_offload.c b/net/ipv6/ip6_offload.c index b41e35af69ea2..c8b909a9904f3 100644 --- a/net/ipv6/ip6_offload.c +++ b/net/ipv6/ip6_offload.c @@ -237,6 +237,7 @@ INDIRECT_CALLABLE_SCOPE struct sk_buff *ipv6_gro_receive(struct list_head *head, goto out; skb_set_network_header(skb, off); + NAPI_GRO_CB(skb)->inner_network_offset = off; flush += ntohs(iph->payload_len) != skb->len - hlen; diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index 1a4cccdd40c9c..8f7aa8bac1e7b 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -272,7 +272,8 @@ static struct sock *__udp6_lib_lookup_skb(struct sk_buff *skb, struct sock *udp6_lib_lookup_skb(const struct sk_buff *skb, __be16 sport, __be16 dport) { - const struct ipv6hdr *iph = ipv6_hdr(skb); + const u16 offset = NAPI_GRO_CB(skb)->network_offsets[skb->encapsulation]; + const struct ipv6hdr *iph = (struct ipv6hdr *)(skb->data + offset); struct net *net = dev_net(skb->dev); int iif, sdif; diff --git a/net/ipv6/udp_offload.c b/net/ipv6/udp_offload.c index bbd347de00b45..b41152dd42469 100644 --- a/net/ipv6/udp_offload.c +++ b/net/ipv6/udp_offload.c @@ -164,7 +164,8 @@ struct sk_buff *udp6_gro_receive(struct list_head *head, struct sk_buff *skb) INDIRECT_CALLABLE_SCOPE int udp6_gro_complete(struct sk_buff *skb, int nhoff) { - const struct ipv6hdr *ipv6h = ipv6_hdr(skb); + const u16 offset = NAPI_GRO_CB(skb)->network_offsets[skb->encapsulation]; + const struct ipv6hdr *ipv6h = (struct ipv6hdr *)(skb->data + offset); struct udphdr *uh = (struct udphdr *)(skb->data + nhoff); /* do fraglist only if there is no outer UDP encap (or we already processed it) */ From 5babae777c61aa8a8679d59d3cdc54165ad96d42 Mon Sep 17 00:00:00 2001 From: Richard Gobert Date: Tue, 30 Apr 2024 16:35:55 +0200 Subject: [PATCH 544/606] net: gro: add flush check in udp_gro_receive_segment GRO-GSO path is supposed to be transparent and as such L3 flush checks are relevant to all UDP flows merging in GRO. This patch uses the same logic and code from tcp_gro_receive, terminating merge if flush is non zero. Fixes: e20cf8d3f1f7 ("udp: implement GRO for plain UDP sockets.") Signed-off-by: Richard Gobert Reviewed-by: Willem de Bruijn Signed-off-by: Paolo Abeni --- net/ipv4/udp_offload.c | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/net/ipv4/udp_offload.c b/net/ipv4/udp_offload.c index fd29d21d579c1..8721fe5beca2b 100644 --- a/net/ipv4/udp_offload.c +++ b/net/ipv4/udp_offload.c @@ -471,6 +471,7 @@ static struct sk_buff *udp_gro_receive_segment(struct list_head *head, struct sk_buff *p; unsigned int ulen; int ret = 0; + int flush; /* requires non zero csum, for symmetry with GSO */ if (!uh->check) { @@ -504,13 +505,22 @@ static struct sk_buff *udp_gro_receive_segment(struct list_head *head, return p; } + flush = NAPI_GRO_CB(p)->flush; + + if (NAPI_GRO_CB(p)->flush_id != 1 || + NAPI_GRO_CB(p)->count != 1 || + !NAPI_GRO_CB(p)->is_atomic) + flush |= NAPI_GRO_CB(p)->flush_id; + else + NAPI_GRO_CB(p)->is_atomic = false; + /* Terminate the flow on len mismatch or if it grow "too much". * Under small packet flood GRO count could elsewhere grow a lot * leading to excessive truesize values. * On len mismatch merge the first packet shorter than gso_size, * otherwise complete the GRO packet. */ - if (ulen > ntohs(uh2->len)) { + if (ulen > ntohs(uh2->len) || flush) { pp = p; } else { if (NAPI_GRO_CB(skb)->is_flist) { From c9ccbcd9f1995e6aa1578220f86c96f57be529d7 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Tue, 30 Apr 2024 16:33:05 -0700 Subject: [PATCH 545/606] MAINTAINERS: remove Ariel Elior aelior@marvell.com bounces, we haven't seen Ariel on lore since March 2022. Signed-off-by: Jakub Kicinski Link: https://lore.kernel.org/r/20240430233305.1356105-1-kuba@kernel.org Signed-off-by: Paolo Abeni --- MAINTAINERS | 3 --- 1 file changed, 3 deletions(-) diff --git a/MAINTAINERS b/MAINTAINERS index 4a2e864fce516..0261a80772742 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -4193,7 +4193,6 @@ S: Supported F: drivers/scsi/bnx2i/ BROADCOM BNX2X 10 GIGABIT ETHERNET DRIVER -M: Ariel Elior M: Sudarsana Kalluru M: Manish Chopra L: netdev@vger.kernel.org @@ -17998,7 +17997,6 @@ S: Supported F: drivers/scsi/qedi/ QLOGIC QL4xxx ETHERNET DRIVER -M: Ariel Elior M: Manish Chopra L: netdev@vger.kernel.org S: Supported @@ -18008,7 +18006,6 @@ F: include/linux/qed/ QLOGIC QL4xxx RDMA DRIVER M: Michal Kalderon -M: Ariel Elior L: linux-rdma@vger.kernel.org S: Supported F: drivers/infiniband/hw/qedr/ From 78cfe547607a83de60cd25304fa2422777634712 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Tue, 30 Apr 2024 16:35:32 -0700 Subject: [PATCH 546/606] MAINTAINERS: mark MYRICOM MYRI-10G as Orphan Chris's email address bounces and lore hasn't seen an email from anyone with his name for almost a decade. Signed-off-by: Jakub Kicinski Reviewed-by: Simon Horman Link: https://lore.kernel.org/r/20240430233532.1356982-1-kuba@kernel.org Signed-off-by: Paolo Abeni --- MAINTAINERS | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/MAINTAINERS b/MAINTAINERS index 0261a80772742..157135b470fa4 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -15161,9 +15161,8 @@ F: drivers/scsi/myrb.* F: drivers/scsi/myrs.* MYRICOM MYRI-10G 10GbE DRIVER (MYRI10GE) -M: Chris Lee L: netdev@vger.kernel.org -S: Supported +S: Orphan W: https://www.cspi.com/ethernet-products/support/downloads/ F: drivers/net/ethernet/myricom/myri10ge/ From 75961ffb5cb3e5196f19cae7683f35cc88b50800 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Thu, 2 May 2024 10:37:23 +0100 Subject: [PATCH 547/606] swiotlb: initialise restricted pool list_head when SWIOTLB_DYNAMIC=y Using restricted DMA pools (CONFIG_DMA_RESTRICTED_POOL=y) in conjunction with dynamic SWIOTLB (CONFIG_SWIOTLB_DYNAMIC=y) leads to the following crash when initialising the restricted pools at boot-time: | Unable to handle kernel NULL pointer dereference at virtual address 0000000000000008 | Internal error: Oops: 0000000096000005 [#1] PREEMPT SMP | pc : rmem_swiotlb_device_init+0xfc/0x1ec | lr : rmem_swiotlb_device_init+0xf0/0x1ec | Call trace: | rmem_swiotlb_device_init+0xfc/0x1ec | of_reserved_mem_device_init_by_idx+0x18c/0x238 | of_dma_configure_id+0x31c/0x33c | platform_dma_configure+0x34/0x80 faddr2line reveals that the crash is in the list validation code: include/linux/list.h:83 include/linux/rculist.h:79 include/linux/rculist.h:106 kernel/dma/swiotlb.c:306 kernel/dma/swiotlb.c:1695 because add_mem_pool() is trying to list_add_rcu() to a NULL 'mem->pools'. Fix the crash by initialising the 'mem->pools' list_head in rmem_swiotlb_device_init() before calling add_mem_pool(). Reported-by: Nikita Ioffe Tested-by: Nikita Ioffe Fixes: 1aaa736815eb ("swiotlb: allocate a new memory pool when existing pools are full") Signed-off-by: Will Deacon Signed-off-by: Christoph Hellwig --- kernel/dma/swiotlb.c | 1 + 1 file changed, 1 insertion(+) diff --git a/kernel/dma/swiotlb.c b/kernel/dma/swiotlb.c index a5e0dfc44d24e..0de66f0ff43ab 100644 --- a/kernel/dma/swiotlb.c +++ b/kernel/dma/swiotlb.c @@ -1798,6 +1798,7 @@ static int rmem_swiotlb_device_init(struct reserved_mem *rmem, mem->for_alloc = true; #ifdef CONFIG_SWIOTLB_DYNAMIC spin_lock_init(&mem->lock); + INIT_LIST_HEAD_RCU(&mem->pools); #endif add_mem_pool(mem, pool); From 8a95db3bf8a32186fe448b1a934afbd5f1da0263 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Thu, 2 May 2024 16:39:47 +0200 Subject: [PATCH 548/606] x86/xen/smp_pv: Register the boot CPU APIC properly The topology core expects the boot APIC to be registered from earhy APIC detection first and then again when the firmware tables are evaluated. This is used for detecting the real BSP CPU on a kexec kernel. The recent conversion of XEN/PV to register fake APIC IDs failed to register the boot CPU APIC correctly as it only registers it once. This causes the BSP detection mechanism to trigger wrongly: CPU topo: Boot CPU APIC ID not the first enumerated APIC ID: 0 > 1 Additionally this results in one CPU being ignored. Register the boot CPU APIC twice so that the XEN/PV fake enumeration behaves like real firmware. Reported-by: Juergen Gross Fixes: e75307023466 ("x86/xen/smp_pv: Register fake APICs") Signed-off-by: Thomas Gleixner Tested-by: Juergen Gross Reviewed-by: Juergen Gross Link: https://lore.kernel.org/r/87a5l8s2fg.ffs@tglx Signed-off-by: Juergen Gross --- arch/x86/xen/smp_pv.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/x86/xen/smp_pv.c b/arch/x86/xen/smp_pv.c index 27d1a5b7f571a..ac41d83b38d3c 100644 --- a/arch/x86/xen/smp_pv.c +++ b/arch/x86/xen/smp_pv.c @@ -154,9 +154,9 @@ static void __init xen_pv_smp_config(void) u32 apicid = 0; int i; - topology_register_boot_apic(apicid++); + topology_register_boot_apic(apicid); - for (i = 1; i < nr_cpu_ids; i++) + for (i = 0; i < nr_cpu_ids; i++) topology_register_apic(apicid++, CPU_ACPIID_INVALID, true); /* Pretend to be a proper enumerated system */ From df04b152fca2d46e75fbb74ed79299bc420bc9e6 Mon Sep 17 00:00:00 2001 From: Lucas De Marchi Date: Thu, 25 Apr 2024 11:16:09 -0700 Subject: [PATCH 549/606] drm/xe/display: Fix ADL-N detection Contrary to i915, in xe ADL-N is kept as a different platform, not a subplatform of ADL-P. Since the display side doesn't need to differentiate between P and N, i.e. IS_ALDERLAKE_P_N() is never called, just fixup the compat header to check for both P and N. Moving ADL-N to be a subplatform would be more complex as the firmware loading in xe only handles platforms, not subplatforms, as going forward the direction is to check on IP version rather than platforms/subplatforms. Fix warning when initializing display: xe 0000:00:02.0: [drm:intel_pch_type [xe]] Found Alder Lake PCH ------------[ cut here ]------------ xe 0000:00:02.0: drm_WARN_ON(!((dev_priv)->info.platform == XE_ALDERLAKE_S) && !((dev_priv)->info.platform == XE_ALDERLAKE_P)) And wrong paths being taken on the display side. Reviewed-by: Matt Roper Acked-by: Jani Nikula Link: https://patchwork.freedesktop.org/patch/msgid/20240425181610.2704633-1-lucas.demarchi@intel.com Signed-off-by: Lucas De Marchi (cherry picked from commit 6a2a90cba12b42eb96c2af3426b77ceb4be31df2) Fixes: 44e694958b95 ("drm/xe/display: Implement display support") Signed-off-by: Lucas De Marchi --- drivers/gpu/drm/xe/compat-i915-headers/i915_drv.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/xe/compat-i915-headers/i915_drv.h b/drivers/gpu/drm/xe/compat-i915-headers/i915_drv.h index 420eba0e4be00..854a7bb535674 100644 --- a/drivers/gpu/drm/xe/compat-i915-headers/i915_drv.h +++ b/drivers/gpu/drm/xe/compat-i915-headers/i915_drv.h @@ -84,7 +84,8 @@ static inline struct drm_i915_private *kdev_to_i915(struct device *kdev) #define IS_ROCKETLAKE(dev_priv) IS_PLATFORM(dev_priv, XE_ROCKETLAKE) #define IS_DG1(dev_priv) IS_PLATFORM(dev_priv, XE_DG1) #define IS_ALDERLAKE_S(dev_priv) IS_PLATFORM(dev_priv, XE_ALDERLAKE_S) -#define IS_ALDERLAKE_P(dev_priv) IS_PLATFORM(dev_priv, XE_ALDERLAKE_P) +#define IS_ALDERLAKE_P(dev_priv) (IS_PLATFORM(dev_priv, XE_ALDERLAKE_P) || \ + IS_PLATFORM(dev_priv, XE_ALDERLAKE_N)) #define IS_XEHPSDV(dev_priv) (dev_priv && 0) #define IS_DG2(dev_priv) IS_PLATFORM(dev_priv, XE_DG2) #define IS_PONTEVECCHIO(dev_priv) IS_PLATFORM(dev_priv, XE_PVC) From 802600ebdf23371b893a51a4ad046213f112ea3b Mon Sep 17 00:00:00 2001 From: Juergen Gross Date: Fri, 5 Apr 2024 14:15:47 +0200 Subject: [PATCH 550/606] x86/xen: return a sane initial apic id when running as PV guest With recent sanity checks for topology information added, there are now warnings issued for APs when running as a Xen PV guest: [Firmware Bug]: CPU 1: APIC ID mismatch. CPUID: 0x0000 APIC: 0x0001 This is due to the initial APIC ID obtained via CPUID for PV guests is always 0. Avoid the warnings by synthesizing the CPUID data to contain the same initial APIC ID as xen_pv_smp_config() is using for registering the APIC IDs of all CPUs. Fixes: 52128a7a21f7 ("86/cpu/topology: Make the APIC mismatch warnings complete") Signed-off-by: Juergen Gross --- arch/x86/xen/enlighten_pv.c | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/arch/x86/xen/enlighten_pv.c b/arch/x86/xen/enlighten_pv.c index ace2eb054053f..9ba53814ed6a9 100644 --- a/arch/x86/xen/enlighten_pv.c +++ b/arch/x86/xen/enlighten_pv.c @@ -219,13 +219,21 @@ static __read_mostly unsigned int cpuid_leaf5_edx_val; static void xen_cpuid(unsigned int *ax, unsigned int *bx, unsigned int *cx, unsigned int *dx) { - unsigned maskebx = ~0; + unsigned int maskebx = ~0; + unsigned int or_ebx = 0; /* * Mask out inconvenient features, to try and disable as many * unsupported kernel subsystems as possible. */ switch (*ax) { + case 0x1: + /* Replace initial APIC ID in bits 24-31 of EBX. */ + /* See xen_pv_smp_config() for related topology preparations. */ + maskebx = 0x00ffffff; + or_ebx = smp_processor_id() << 24; + break; + case CPUID_MWAIT_LEAF: /* Synthesize the values.. */ *ax = 0; @@ -248,6 +256,7 @@ static void xen_cpuid(unsigned int *ax, unsigned int *bx, : "0" (*ax), "2" (*cx)); *bx &= maskebx; + *bx |= or_ebx; } static bool __init xen_check_mwait(void) From 98241a774db49988f25b7b3657026ce51ccec293 Mon Sep 17 00:00:00 2001 From: Viken Dadhaniya Date: Tue, 30 Apr 2024 10:12:38 +0100 Subject: [PATCH 551/606] slimbus: qcom-ngd-ctrl: Add timeout for wait operation In current driver qcom_slim_ngd_up_worker() indefinitely waiting for ctrl->qmi_up completion object. This is resulting in workqueue lockup on Kthread. Added wait_for_completion_interruptible_timeout to allow the thread to wait for specific timeout period and bail out instead waiting infinitely. Fixes: a899d324863a ("slimbus: qcom-ngd-ctrl: add Sub System Restart support") Cc: stable@vger.kernel.org Reviewed-by: Konrad Dybcio Signed-off-by: Viken Dadhaniya Signed-off-by: Srinivas Kandagatla Link: https://lore.kernel.org/r/20240430091238.35209-2-srinivas.kandagatla@linaro.org Signed-off-by: Greg Kroah-Hartman --- drivers/slimbus/qcom-ngd-ctrl.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/slimbus/qcom-ngd-ctrl.c b/drivers/slimbus/qcom-ngd-ctrl.c index efeba8275a669..a09a26bf4988f 100644 --- a/drivers/slimbus/qcom-ngd-ctrl.c +++ b/drivers/slimbus/qcom-ngd-ctrl.c @@ -1451,7 +1451,11 @@ static void qcom_slim_ngd_up_worker(struct work_struct *work) ctrl = container_of(work, struct qcom_slim_ngd_ctrl, ngd_up_work); /* Make sure qmi service is up before continuing */ - wait_for_completion_interruptible(&ctrl->qmi_up); + if (!wait_for_completion_interruptible_timeout(&ctrl->qmi_up, + msecs_to_jiffies(MSEC_PER_SEC))) { + dev_err(ctrl->dev, "QMI wait timeout\n"); + return; + } mutex_lock(&ctrl->ssr_lock); qcom_slim_ngd_enable(ctrl, true); From b63db58e2fa5d6963db9c45df88e60060f0ff35f Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (Google)" Date: Thu, 2 May 2024 09:03:15 -0400 Subject: [PATCH 552/606] eventfs/tracing: Add callback for release of an eventfs_inode MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Synthetic events create and destroy tracefs files when they are created and removed. The tracing subsystem has its own file descriptor representing the state of the events attached to the tracefs files. There's a race between the eventfs files and this file descriptor of the tracing system where the following can cause an issue: With two scripts 'A' and 'B' doing: Script 'A': echo "hello int aaa" > /sys/kernel/tracing/synthetic_events while : do echo 0 > /sys/kernel/tracing/events/synthetic/hello/enable done Script 'B': echo > /sys/kernel/tracing/synthetic_events Script 'A' creates a synthetic event "hello" and then just writes zero into its enable file. Script 'B' removes all synthetic events (including the newly created "hello" event). What happens is that the opening of the "enable" file has: { struct trace_event_file *file = inode->i_private; int ret; ret = tracing_check_open_get_tr(file->tr); [..] But deleting the events frees the "file" descriptor, and a "use after free" happens with the dereference at "file->tr". The file descriptor does have a reference counter, but there needs to be a way to decrement it from the eventfs when the eventfs_inode is removed that represents this file descriptor. Add an optional "release" callback to the eventfs_entry array structure, that gets called when the eventfs file is about to be removed. This allows for the creating on the eventfs file to increment the tracing file descriptor ref counter. When the eventfs file is deleted, it can call the release function that will call the put function for the tracing file descriptor. This will protect the tracing file from being freed while a eventfs file that references it is being opened. Link: https://lore.kernel.org/linux-trace-kernel/20240426073410.17154-1-Tze-nan.Wu@mediatek.com/ Link: https://lore.kernel.org/linux-trace-kernel/20240502090315.448cba46@gandalf.local.home Cc: stable@vger.kernel.org Cc: Masami Hiramatsu Cc: Mathieu Desnoyers Fixes: 5790b1fb3d672 ("eventfs: Remove eventfs_file and just use eventfs_inode") Reported-by: Tze-nan wu Tested-by: Tze-nan Wu (吳澤南) Signed-off-by: Steven Rostedt (Google) --- fs/tracefs/event_inode.c | 23 +++++++++++++++++++++-- include/linux/tracefs.h | 3 +++ kernel/trace/trace_events.c | 12 ++++++++++++ 3 files changed, 36 insertions(+), 2 deletions(-) diff --git a/fs/tracefs/event_inode.c b/fs/tracefs/event_inode.c index 894c6ca1e5002..f5510e26f0f6e 100644 --- a/fs/tracefs/event_inode.c +++ b/fs/tracefs/event_inode.c @@ -84,10 +84,17 @@ enum { static void release_ei(struct kref *ref) { struct eventfs_inode *ei = container_of(ref, struct eventfs_inode, kref); + const struct eventfs_entry *entry; struct eventfs_root_inode *rei; WARN_ON_ONCE(!ei->is_freed); + for (int i = 0; i < ei->nr_entries; i++) { + entry = &ei->entries[i]; + if (entry->release) + entry->release(entry->name, ei->data); + } + kfree(ei->entry_attrs); kfree_const(ei->name); if (ei->is_events) { @@ -112,6 +119,18 @@ static inline void free_ei(struct eventfs_inode *ei) } } +/* + * Called when creation of an ei fails, do not call release() functions. + */ +static inline void cleanup_ei(struct eventfs_inode *ei) +{ + if (ei) { + /* Set nr_entries to 0 to prevent release() function being called */ + ei->nr_entries = 0; + free_ei(ei); + } +} + static inline struct eventfs_inode *get_ei(struct eventfs_inode *ei) { if (ei) @@ -734,7 +753,7 @@ struct eventfs_inode *eventfs_create_dir(const char *name, struct eventfs_inode /* Was the parent freed? */ if (list_empty(&ei->list)) { - free_ei(ei); + cleanup_ei(ei); ei = NULL; } return ei; @@ -835,7 +854,7 @@ struct eventfs_inode *eventfs_create_events_dir(const char *name, struct dentry return ei; fail: - free_ei(ei); + cleanup_ei(ei); tracefs_failed_creating(dentry); return ERR_PTR(-ENOMEM); } diff --git a/include/linux/tracefs.h b/include/linux/tracefs.h index 7a5fe17b6bf9c..d03f746587167 100644 --- a/include/linux/tracefs.h +++ b/include/linux/tracefs.h @@ -62,6 +62,8 @@ struct eventfs_file; typedef int (*eventfs_callback)(const char *name, umode_t *mode, void **data, const struct file_operations **fops); +typedef void (*eventfs_release)(const char *name, void *data); + /** * struct eventfs_entry - dynamically created eventfs file call back handler * @name: Then name of the dynamic file in an eventfs directory @@ -72,6 +74,7 @@ typedef int (*eventfs_callback)(const char *name, umode_t *mode, void **data, struct eventfs_entry { const char *name; eventfs_callback callback; + eventfs_release release; }; struct eventfs_inode; diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c index 52f75c36bbca4..6ef29eba90ceb 100644 --- a/kernel/trace/trace_events.c +++ b/kernel/trace/trace_events.c @@ -2552,6 +2552,14 @@ static int event_callback(const char *name, umode_t *mode, void **data, return 0; } +/* The file is incremented on creation and freeing the enable file decrements it */ +static void event_release(const char *name, void *data) +{ + struct trace_event_file *file = data; + + event_file_put(file); +} + static int event_create_dir(struct eventfs_inode *parent, struct trace_event_file *file) { @@ -2566,6 +2574,7 @@ event_create_dir(struct eventfs_inode *parent, struct trace_event_file *file) { .name = "enable", .callback = event_callback, + .release = event_release, }, { .name = "filter", @@ -2634,6 +2643,9 @@ event_create_dir(struct eventfs_inode *parent, struct trace_event_file *file) return ret; } + /* Gets decremented on freeing of the "enable" file */ + event_file_get(file); + return 0; } From ee4e0379475e4fe723986ae96293e465014fa8d9 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (Google)" Date: Thu, 2 May 2024 16:08:22 -0400 Subject: [PATCH 553/606] eventfs: Free all of the eventfs_inode after RCU The freeing of eventfs_inode via a kfree_rcu() callback. But the content of the eventfs_inode was being freed after the last kref. This is dangerous, as changes are being made that can access the content of an eventfs_inode from an RCU loop. Instead of using kfree_rcu() use call_rcu() that calls a function to do all the freeing of the eventfs_inode after a RCU grace period has expired. Link: https://lore.kernel.org/linux-trace-kernel/20240502200905.370261163@goodmis.org Cc: stable@vger.kernel.org Cc: Masami Hiramatsu Cc: Mark Rutland Cc: Mathieu Desnoyers Cc: Andrew Morton Fixes: 43aa6f97c2d03 ("eventfs: Get rid of dentry pointers without refcounts") Signed-off-by: Steven Rostedt (Google) --- fs/tracefs/event_inode.c | 25 ++++++++++++++++--------- 1 file changed, 16 insertions(+), 9 deletions(-) diff --git a/fs/tracefs/event_inode.c b/fs/tracefs/event_inode.c index f5510e26f0f6e..cc8b838bbe626 100644 --- a/fs/tracefs/event_inode.c +++ b/fs/tracefs/event_inode.c @@ -73,6 +73,21 @@ enum { #define EVENTFS_MODE_MASK (EVENTFS_SAVE_MODE - 1) +static void free_ei_rcu(struct rcu_head *rcu) +{ + struct eventfs_inode *ei = container_of(rcu, struct eventfs_inode, rcu); + struct eventfs_root_inode *rei; + + kfree(ei->entry_attrs); + kfree_const(ei->name); + if (ei->is_events) { + rei = get_root_inode(ei); + kfree(rei); + } else { + kfree(ei); + } +} + /* * eventfs_inode reference count management. * @@ -85,7 +100,6 @@ static void release_ei(struct kref *ref) { struct eventfs_inode *ei = container_of(ref, struct eventfs_inode, kref); const struct eventfs_entry *entry; - struct eventfs_root_inode *rei; WARN_ON_ONCE(!ei->is_freed); @@ -95,14 +109,7 @@ static void release_ei(struct kref *ref) entry->release(entry->name, ei->data); } - kfree(ei->entry_attrs); - kfree_const(ei->name); - if (ei->is_events) { - rei = get_root_inode(ei); - kfree_rcu(rei, ei.rcu); - } else { - kfree_rcu(ei, rcu); - } + call_rcu(&ei->rcu, free_ei_rcu); } static inline void put_ei(struct eventfs_inode *ei) From baa23a8d4360d981a49913841a726edede5cdd54 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (Google)" Date: Thu, 2 May 2024 16:08:23 -0400 Subject: [PATCH 554/606] tracefs: Reset permissions on remount if permissions are options There's an inconsistency with the way permissions are handled in tracefs. Because the permissions are generated when accessed, they default to the root inode's permission if they were never set by the user. If the user sets the permissions, then a flag is set and the permissions are saved via the inode (for tracefs files) or an internal attribute field (for eventfs). But if a remount happens that specify the permissions, all the files that were not changed by the user gets updated, but the ones that were are not. If the user were to remount the file system with a given permission, then all files and directories within that file system should be updated. This can cause security issues if a file's permission was updated but the admin forgot about it. They could incorrectly think that remounting with permissions set would update all files, but miss some. For example: # cd /sys/kernel/tracing # chgrp 1002 current_tracer # ls -l [..] -rw-r----- 1 root root 0 May 1 21:25 buffer_size_kb -rw-r----- 1 root root 0 May 1 21:25 buffer_subbuf_size_kb -r--r----- 1 root root 0 May 1 21:25 buffer_total_size_kb -rw-r----- 1 root lkp 0 May 1 21:25 current_tracer -rw-r----- 1 root root 0 May 1 21:25 dynamic_events -r--r----- 1 root root 0 May 1 21:25 dyn_ftrace_total_info -r--r----- 1 root root 0 May 1 21:25 enabled_functions Where current_tracer now has group "lkp". # mount -o remount,gid=1001 . # ls -l -rw-r----- 1 root tracing 0 May 1 21:25 buffer_size_kb -rw-r----- 1 root tracing 0 May 1 21:25 buffer_subbuf_size_kb -r--r----- 1 root tracing 0 May 1 21:25 buffer_total_size_kb -rw-r----- 1 root lkp 0 May 1 21:25 current_tracer -rw-r----- 1 root tracing 0 May 1 21:25 dynamic_events -r--r----- 1 root tracing 0 May 1 21:25 dyn_ftrace_total_info -r--r----- 1 root tracing 0 May 1 21:25 enabled_functions Everything changed but the "current_tracer". Add a new link list that keeps track of all the tracefs_inodes which has the permission flags that tell if the file/dir should use the root inode's permission or not. Then on remount, clear all the flags so that the default behavior of using the root inode's permission is done for all files and directories. Link: https://lore.kernel.org/linux-trace-kernel/20240502200905.529542160@goodmis.org Cc: stable@vger.kernel.org Cc: Masami Hiramatsu Cc: Mark Rutland Cc: Mathieu Desnoyers Cc: Andrew Morton Fixes: 8186fff7ab649 ("tracefs/eventfs: Use root and instance inodes as default ownership") Signed-off-by: Steven Rostedt (Google) --- fs/tracefs/event_inode.c | 29 ++++++++++++++++++ fs/tracefs/inode.c | 65 +++++++++++++++++++++++++++++++++++++++- fs/tracefs/internal.h | 7 ++++- 3 files changed, 99 insertions(+), 2 deletions(-) diff --git a/fs/tracefs/event_inode.c b/fs/tracefs/event_inode.c index cc8b838bbe626..15a2a9c3c62b0 100644 --- a/fs/tracefs/event_inode.c +++ b/fs/tracefs/event_inode.c @@ -308,6 +308,35 @@ static const struct file_operations eventfs_file_operations = { .llseek = generic_file_llseek, }; +/* + * On a remount of tracefs, if UID or GID options are set, then + * the mount point inode permissions should be used. + * Reset the saved permission flags appropriately. + */ +void eventfs_remount(struct tracefs_inode *ti, bool update_uid, bool update_gid) +{ + struct eventfs_inode *ei = ti->private; + + if (!ei) + return; + + if (update_uid) + ei->attr.mode &= ~EVENTFS_SAVE_UID; + + if (update_gid) + ei->attr.mode &= ~EVENTFS_SAVE_GID; + + if (!ei->entry_attrs) + return; + + for (int i = 0; i < ei->nr_entries; i++) { + if (update_uid) + ei->entry_attrs[i].mode &= ~EVENTFS_SAVE_UID; + if (update_gid) + ei->entry_attrs[i].mode &= ~EVENTFS_SAVE_GID; + } +} + /* Return the evenfs_inode of the "events" directory */ static struct eventfs_inode *eventfs_find_events(struct dentry *dentry) { diff --git a/fs/tracefs/inode.c b/fs/tracefs/inode.c index 5545e6bf7d26c..52aa14bd29946 100644 --- a/fs/tracefs/inode.c +++ b/fs/tracefs/inode.c @@ -30,20 +30,47 @@ static struct vfsmount *tracefs_mount; static int tracefs_mount_count; static bool tracefs_registered; +/* + * Keep track of all tracefs_inodes in order to update their + * flags if necessary on a remount. + */ +static DEFINE_SPINLOCK(tracefs_inode_lock); +static LIST_HEAD(tracefs_inodes); + static struct inode *tracefs_alloc_inode(struct super_block *sb) { struct tracefs_inode *ti; + unsigned long flags; ti = kmem_cache_alloc(tracefs_inode_cachep, GFP_KERNEL); if (!ti) return NULL; + spin_lock_irqsave(&tracefs_inode_lock, flags); + list_add_rcu(&ti->list, &tracefs_inodes); + spin_unlock_irqrestore(&tracefs_inode_lock, flags); + return &ti->vfs_inode; } +static void tracefs_free_inode_rcu(struct rcu_head *rcu) +{ + struct tracefs_inode *ti; + + ti = container_of(rcu, struct tracefs_inode, rcu); + kmem_cache_free(tracefs_inode_cachep, ti); +} + static void tracefs_free_inode(struct inode *inode) { - kmem_cache_free(tracefs_inode_cachep, get_tracefs(inode)); + struct tracefs_inode *ti = get_tracefs(inode); + unsigned long flags; + + spin_lock_irqsave(&tracefs_inode_lock, flags); + list_del_rcu(&ti->list); + spin_unlock_irqrestore(&tracefs_inode_lock, flags); + + call_rcu(&ti->rcu, tracefs_free_inode_rcu); } static ssize_t default_read_file(struct file *file, char __user *buf, @@ -313,6 +340,8 @@ static int tracefs_apply_options(struct super_block *sb, bool remount) struct tracefs_fs_info *fsi = sb->s_fs_info; struct inode *inode = d_inode(sb->s_root); struct tracefs_mount_opts *opts = &fsi->mount_opts; + struct tracefs_inode *ti; + bool update_uid, update_gid; umode_t tmp_mode; /* @@ -332,6 +361,25 @@ static int tracefs_apply_options(struct super_block *sb, bool remount) if (!remount || opts->opts & BIT(Opt_gid)) inode->i_gid = opts->gid; + if (remount && (opts->opts & BIT(Opt_uid) || opts->opts & BIT(Opt_gid))) { + + update_uid = opts->opts & BIT(Opt_uid); + update_gid = opts->opts & BIT(Opt_gid); + + rcu_read_lock(); + list_for_each_entry_rcu(ti, &tracefs_inodes, list) { + if (update_uid) + ti->flags &= ~TRACEFS_UID_PERM_SET; + + if (update_gid) + ti->flags &= ~TRACEFS_GID_PERM_SET; + + if (ti->flags & TRACEFS_EVENT_INODE) + eventfs_remount(ti, update_uid, update_gid); + } + rcu_read_unlock(); + } + return 0; } @@ -398,7 +446,22 @@ static int tracefs_d_revalidate(struct dentry *dentry, unsigned int flags) return !(ei && ei->is_freed); } +static void tracefs_d_iput(struct dentry *dentry, struct inode *inode) +{ + struct tracefs_inode *ti = get_tracefs(inode); + + /* + * This inode is being freed and cannot be used for + * eventfs. Clear the flag so that it doesn't call into + * eventfs during the remount flag updates. The eventfs_inode + * gets freed after an RCU cycle, so the content will still + * be safe if the iteration is going on now. + */ + ti->flags &= ~TRACEFS_EVENT_INODE; +} + static const struct dentry_operations tracefs_dentry_operations = { + .d_iput = tracefs_d_iput, .d_revalidate = tracefs_d_revalidate, .d_release = tracefs_d_release, }; diff --git a/fs/tracefs/internal.h b/fs/tracefs/internal.h index 15c26f9aaad4d..29f0c999975b6 100644 --- a/fs/tracefs/internal.h +++ b/fs/tracefs/internal.h @@ -11,8 +11,12 @@ enum { }; struct tracefs_inode { - struct inode vfs_inode; + union { + struct inode vfs_inode; + struct rcu_head rcu; + }; /* The below gets initialized with memset_after(ti, 0, vfs_inode) */ + struct list_head list; unsigned long flags; void *private; }; @@ -73,6 +77,7 @@ struct dentry *tracefs_end_creating(struct dentry *dentry); struct dentry *tracefs_failed_creating(struct dentry *dentry); struct inode *tracefs_get_inode(struct super_block *sb); +void eventfs_remount(struct tracefs_inode *ti, bool update_uid, bool update_gid); void eventfs_d_release(struct dentry *dentry); #endif /* _TRACEFS_INTERNAL_H */ From 6599bd5517be66c8344f869f3ca3a91bc10f2b9e Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (Google)" Date: Thu, 2 May 2024 16:08:24 -0400 Subject: [PATCH 555/606] tracefs: Still use mount point as default permissions for instances If the instances directory's permissions were never change, then have it and its children use the mount point permissions as the default. Currently, the permissions of instance directories are determined by the instance directory's permissions itself. But if the tracefs file system is remounted and changes the permissions, the instance directory and its children should use the new permission. But because both the instance directory and its children use the instance directory's inode for permissions, it misses the update. To demonstrate this: # cd /sys/kernel/tracing/ # mkdir instances/foo # ls -ld instances/foo drwxr-x--- 5 root root 0 May 1 19:07 instances/foo # ls -ld instances drwxr-x--- 3 root root 0 May 1 18:57 instances # ls -ld current_tracer -rw-r----- 1 root root 0 May 1 18:57 current_tracer # mount -o remount,gid=1002 . # ls -ld instances drwxr-x--- 3 root root 0 May 1 18:57 instances # ls -ld instances/foo/ drwxr-x--- 5 root root 0 May 1 19:07 instances/foo/ # ls -ld current_tracer -rw-r----- 1 root lkp 0 May 1 18:57 current_tracer Notice that changing the group id to that of "lkp" did not affect the instances directory nor its children. It should have been: # ls -ld current_tracer -rw-r----- 1 root root 0 May 1 19:19 current_tracer # ls -ld instances/foo/ drwxr-x--- 5 root root 0 May 1 19:25 instances/foo/ # ls -ld instances drwxr-x--- 3 root root 0 May 1 19:19 instances # mount -o remount,gid=1002 . # ls -ld current_tracer -rw-r----- 1 root lkp 0 May 1 19:19 current_tracer # ls -ld instances drwxr-x--- 3 root lkp 0 May 1 19:19 instances # ls -ld instances/foo/ drwxr-x--- 5 root lkp 0 May 1 19:25 instances/foo/ Where all files were updated by the remount gid update. Link: https://lore.kernel.org/linux-trace-kernel/20240502200905.686838327@goodmis.org Cc: stable@vger.kernel.org Cc: Masami Hiramatsu Cc: Mark Rutland Cc: Mathieu Desnoyers Cc: Andrew Morton Fixes: 8186fff7ab649 ("tracefs/eventfs: Use root and instance inodes as default ownership") Signed-off-by: Steven Rostedt (Google) --- fs/tracefs/inode.c | 27 +++++++++++++++++++++++++-- 1 file changed, 25 insertions(+), 2 deletions(-) diff --git a/fs/tracefs/inode.c b/fs/tracefs/inode.c index 52aa14bd29946..417c840e64033 100644 --- a/fs/tracefs/inode.c +++ b/fs/tracefs/inode.c @@ -180,16 +180,39 @@ static void set_tracefs_inode_owner(struct inode *inode) { struct tracefs_inode *ti = get_tracefs(inode); struct inode *root_inode = ti->private; + kuid_t uid; + kgid_t gid; + + uid = root_inode->i_uid; + gid = root_inode->i_gid; + + /* + * If the root is not the mount point, then check the root's + * permissions. If it was never set, then default to the + * mount point. + */ + if (root_inode != d_inode(root_inode->i_sb->s_root)) { + struct tracefs_inode *rti; + + rti = get_tracefs(root_inode); + root_inode = d_inode(root_inode->i_sb->s_root); + + if (!(rti->flags & TRACEFS_UID_PERM_SET)) + uid = root_inode->i_uid; + + if (!(rti->flags & TRACEFS_GID_PERM_SET)) + gid = root_inode->i_gid; + } /* * If this inode has never been referenced, then update * the permissions to the superblock. */ if (!(ti->flags & TRACEFS_UID_PERM_SET)) - inode->i_uid = root_inode->i_uid; + inode->i_uid = uid; if (!(ti->flags & TRACEFS_GID_PERM_SET)) - inode->i_gid = root_inode->i_gid; + inode->i_gid = gid; } static int tracefs_permission(struct mnt_idmap *idmap, From d53891d348ac3eceaf48f4732a1f4f5c0e0a55ce Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (Google)" Date: Thu, 2 May 2024 16:08:25 -0400 Subject: [PATCH 556/606] eventfs: Do not differentiate the toplevel events directory The toplevel events directory is really no different than the events directory of instances. Having the two be different caused inconsistencies and made it harder to fix the permissions bugs. Make all events directories act the same. Link: https://lore.kernel.org/linux-trace-kernel/20240502200905.846448710@goodmis.org Cc: stable@vger.kernel.org Cc: Masami Hiramatsu Cc: Mark Rutland Cc: Mathieu Desnoyers Cc: Andrew Morton Fixes: 8186fff7ab649 ("tracefs/eventfs: Use root and instance inodes as default ownership") Signed-off-by: Steven Rostedt (Google) --- fs/tracefs/event_inode.c | 29 ++++++++--------------------- fs/tracefs/internal.h | 7 +++---- 2 files changed, 11 insertions(+), 25 deletions(-) diff --git a/fs/tracefs/event_inode.c b/fs/tracefs/event_inode.c index 15a2a9c3c62b0..9dacf65c0b6ee 100644 --- a/fs/tracefs/event_inode.c +++ b/fs/tracefs/event_inode.c @@ -68,7 +68,6 @@ enum { EVENTFS_SAVE_MODE = BIT(16), EVENTFS_SAVE_UID = BIT(17), EVENTFS_SAVE_GID = BIT(18), - EVENTFS_TOPLEVEL = BIT(19), }; #define EVENTFS_MODE_MASK (EVENTFS_SAVE_MODE - 1) @@ -239,14 +238,10 @@ static int eventfs_set_attr(struct mnt_idmap *idmap, struct dentry *dentry, return ret; } -static void update_top_events_attr(struct eventfs_inode *ei, struct super_block *sb) +static void update_events_attr(struct eventfs_inode *ei, struct super_block *sb) { struct inode *root; - /* Only update if the "events" was on the top level */ - if (!ei || !(ei->attr.mode & EVENTFS_TOPLEVEL)) - return; - /* Get the tracefs root inode. */ root = d_inode(sb->s_root); ei->attr.uid = root->i_uid; @@ -259,10 +254,10 @@ static void set_top_events_ownership(struct inode *inode) struct eventfs_inode *ei = ti->private; /* The top events directory doesn't get automatically updated */ - if (!ei || !ei->is_events || !(ei->attr.mode & EVENTFS_TOPLEVEL)) + if (!ei || !ei->is_events) return; - update_top_events_attr(ei, inode->i_sb); + update_events_attr(ei, inode->i_sb); if (!(ei->attr.mode & EVENTFS_SAVE_UID)) inode->i_uid = ei->attr.uid; @@ -291,7 +286,7 @@ static int eventfs_permission(struct mnt_idmap *idmap, return generic_permission(idmap, inode, mask); } -static const struct inode_operations eventfs_root_dir_inode_operations = { +static const struct inode_operations eventfs_dir_inode_operations = { .lookup = eventfs_root_lookup, .setattr = eventfs_set_attr, .getattr = eventfs_get_attr, @@ -359,7 +354,7 @@ static struct eventfs_inode *eventfs_find_events(struct dentry *dentry) // Walk upwards until you find the events inode } while (!ei->is_events); - update_top_events_attr(ei, dentry->d_sb); + update_events_attr(ei, dentry->d_sb); return ei; } @@ -465,7 +460,7 @@ static struct dentry *lookup_dir_entry(struct dentry *dentry, update_inode_attr(dentry, inode, &ei->attr, S_IFDIR | S_IRWXU | S_IRUGO | S_IXUGO); - inode->i_op = &eventfs_root_dir_inode_operations; + inode->i_op = &eventfs_dir_inode_operations; inode->i_fop = &eventfs_file_operations; /* All directories will have the same inode number */ @@ -845,14 +840,6 @@ struct eventfs_inode *eventfs_create_events_dir(const char *name, struct dentry uid = d_inode(dentry->d_parent)->i_uid; gid = d_inode(dentry->d_parent)->i_gid; - /* - * If the events directory is of the top instance, then parent - * is NULL. Set the attr.mode to reflect this and its permissions will - * default to the tracefs root dentry. - */ - if (!parent) - ei->attr.mode = EVENTFS_TOPLEVEL; - /* This is used as the default ownership of the files and directories */ ei->attr.uid = uid; ei->attr.gid = gid; @@ -861,13 +848,13 @@ struct eventfs_inode *eventfs_create_events_dir(const char *name, struct dentry INIT_LIST_HEAD(&ei->list); ti = get_tracefs(inode); - ti->flags |= TRACEFS_EVENT_INODE | TRACEFS_EVENT_TOP_INODE; + ti->flags |= TRACEFS_EVENT_INODE; ti->private = ei; inode->i_mode = S_IFDIR | S_IRWXU | S_IRUGO | S_IXUGO; inode->i_uid = uid; inode->i_gid = gid; - inode->i_op = &eventfs_root_dir_inode_operations; + inode->i_op = &eventfs_dir_inode_operations; inode->i_fop = &eventfs_file_operations; dentry->d_fsdata = get_ei(ei); diff --git a/fs/tracefs/internal.h b/fs/tracefs/internal.h index 29f0c999975b6..f704d8348357e 100644 --- a/fs/tracefs/internal.h +++ b/fs/tracefs/internal.h @@ -4,10 +4,9 @@ enum { TRACEFS_EVENT_INODE = BIT(1), - TRACEFS_EVENT_TOP_INODE = BIT(2), - TRACEFS_GID_PERM_SET = BIT(3), - TRACEFS_UID_PERM_SET = BIT(4), - TRACEFS_INSTANCE_INODE = BIT(5), + TRACEFS_GID_PERM_SET = BIT(2), + TRACEFS_UID_PERM_SET = BIT(3), + TRACEFS_INSTANCE_INODE = BIT(4), }; struct tracefs_inode { From 22e61e15af731dbe46704c775d2335e56fcef4e9 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (Google)" Date: Thu, 2 May 2024 16:08:26 -0400 Subject: [PATCH 557/606] eventfs: Do not treat events directory different than other directories Treat the events directory the same as other directories when it comes to permissions. The events directory was considered different because it's dentry is persistent, whereas the other directory dentries are created when accessed. But the way tracefs now does its ownership by using the root dentry's permissions as the default permissions, the events directory can get out of sync when a remount is performed setting the group and user permissions. Remove the special case for the events directory on setting the attributes. This allows the updates caused by remount to work properly as well as simplifies the code. Link: https://lore.kernel.org/linux-trace-kernel/20240502200906.002923579@goodmis.org Cc: stable@vger.kernel.org Cc: Masami Hiramatsu Cc: Mark Rutland Cc: Mathieu Desnoyers Cc: Andrew Morton Fixes: 8186fff7ab649 ("tracefs/eventfs: Use root and instance inodes as default ownership") Signed-off-by: Steven Rostedt (Google) --- fs/tracefs/event_inode.c | 16 +--------------- 1 file changed, 1 insertion(+), 15 deletions(-) diff --git a/fs/tracefs/event_inode.c b/fs/tracefs/event_inode.c index 9dacf65c0b6ee..6e08405892ae8 100644 --- a/fs/tracefs/event_inode.c +++ b/fs/tracefs/event_inode.c @@ -206,21 +206,7 @@ static int eventfs_set_attr(struct mnt_idmap *idmap, struct dentry *dentry, * determined by the parent directory. */ if (dentry->d_inode->i_mode & S_IFDIR) { - /* - * The events directory dentry is never freed, unless its - * part of an instance that is deleted. It's attr is the - * default for its child files and directories. - * Do not update it. It's not used for its own mode or ownership. - */ - if (ei->is_events) { - /* But it still needs to know if it was modified */ - if (iattr->ia_valid & ATTR_UID) - ei->attr.mode |= EVENTFS_SAVE_UID; - if (iattr->ia_valid & ATTR_GID) - ei->attr.mode |= EVENTFS_SAVE_GID; - } else { - update_attr(&ei->attr, iattr); - } + update_attr(&ei->attr, iattr); } else { name = dentry->d_name.name; From d57cf30c4c07837799edec949102b0adf58bae79 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (Google)" Date: Thu, 2 May 2024 16:08:27 -0400 Subject: [PATCH 558/606] eventfs: Have "events" directory get permissions from its parent The events directory gets its permissions from the root inode. But this can cause an inconsistency if the instances directory changes its permissions, as the permissions of the created directories under it should inherit the permissions of the instances directory when directories under it are created. Currently the behavior is: # cd /sys/kernel/tracing # chgrp 1002 instances # mkdir instances/foo # ls -l instances/foo [..] -r--r----- 1 root lkp 0 May 1 18:55 buffer_total_size_kb -rw-r----- 1 root lkp 0 May 1 18:55 current_tracer -rw-r----- 1 root lkp 0 May 1 18:55 error_log drwxr-xr-x 1 root root 0 May 1 18:55 events --w------- 1 root lkp 0 May 1 18:55 free_buffer drwxr-x--- 2 root lkp 0 May 1 18:55 options drwxr-x--- 10 root lkp 0 May 1 18:55 per_cpu -rw-r----- 1 root lkp 0 May 1 18:55 set_event All the files and directories under "foo" has the "lkp" group except the "events" directory. That's because its getting its default value from the mount point instead of its parent. Have the "events" directory make its default value based on its parent's permissions. That now gives: # ls -l instances/foo [..] -rw-r----- 1 root lkp 0 May 1 21:16 buffer_subbuf_size_kb -r--r----- 1 root lkp 0 May 1 21:16 buffer_total_size_kb -rw-r----- 1 root lkp 0 May 1 21:16 current_tracer -rw-r----- 1 root lkp 0 May 1 21:16 error_log drwxr-xr-x 1 root lkp 0 May 1 21:16 events --w------- 1 root lkp 0 May 1 21:16 free_buffer drwxr-x--- 2 root lkp 0 May 1 21:16 options drwxr-x--- 10 root lkp 0 May 1 21:16 per_cpu -rw-r----- 1 root lkp 0 May 1 21:16 set_event Link: https://lore.kernel.org/linux-trace-kernel/20240502200906.161887248@goodmis.org Cc: stable@vger.kernel.org Cc: Masami Hiramatsu Cc: Mark Rutland Cc: Mathieu Desnoyers Cc: Andrew Morton Fixes: 8186fff7ab649 ("tracefs/eventfs: Use root and instance inodes as default ownership") Signed-off-by: Steven Rostedt (Google) --- fs/tracefs/event_inode.c | 30 ++++++++++++++++++++++++------ 1 file changed, 24 insertions(+), 6 deletions(-) diff --git a/fs/tracefs/event_inode.c b/fs/tracefs/event_inode.c index 6e08405892ae8..a878cea70f4c9 100644 --- a/fs/tracefs/event_inode.c +++ b/fs/tracefs/event_inode.c @@ -37,6 +37,7 @@ static DEFINE_MUTEX(eventfs_mutex); struct eventfs_root_inode { struct eventfs_inode ei; + struct inode *parent_inode; struct dentry *events_dir; }; @@ -226,12 +227,23 @@ static int eventfs_set_attr(struct mnt_idmap *idmap, struct dentry *dentry, static void update_events_attr(struct eventfs_inode *ei, struct super_block *sb) { - struct inode *root; + struct eventfs_root_inode *rei; + struct inode *parent; + + rei = get_root_inode(ei); + + /* Use the parent inode permissions unless root set its permissions */ + parent = rei->parent_inode; - /* Get the tracefs root inode. */ - root = d_inode(sb->s_root); - ei->attr.uid = root->i_uid; - ei->attr.gid = root->i_gid; + if (rei->ei.attr.mode & EVENTFS_SAVE_UID) + ei->attr.uid = rei->ei.attr.uid; + else + ei->attr.uid = parent->i_uid; + + if (rei->ei.attr.mode & EVENTFS_SAVE_GID) + ei->attr.gid = rei->ei.attr.gid; + else + ei->attr.gid = parent->i_gid; } static void set_top_events_ownership(struct inode *inode) @@ -817,6 +829,7 @@ struct eventfs_inode *eventfs_create_events_dir(const char *name, struct dentry // Note: we have a ref to the dentry from tracefs_start_creating() rei = get_root_inode(ei); rei->events_dir = dentry; + rei->parent_inode = d_inode(dentry->d_sb->s_root); ei->entries = entries; ei->nr_entries = size; @@ -826,10 +839,15 @@ struct eventfs_inode *eventfs_create_events_dir(const char *name, struct dentry uid = d_inode(dentry->d_parent)->i_uid; gid = d_inode(dentry->d_parent)->i_gid; - /* This is used as the default ownership of the files and directories */ ei->attr.uid = uid; ei->attr.gid = gid; + /* + * When the "events" directory is created, it takes on the + * permissions of its parent. But can be reset on remount. + */ + ei->attr.mode |= EVENTFS_SAVE_UID | EVENTFS_SAVE_GID; + INIT_LIST_HEAD(&ei->children); INIT_LIST_HEAD(&ei->list); From 4efaa5acf0a1d2b5947f98abb3acf8bfd966422b Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Fri, 3 May 2024 13:36:09 -0700 Subject: [PATCH 559/606] epoll: be better about file lifetimes epoll can call out to vfs_poll() with a file pointer that may race with the last 'fput()'. That would make f_count go down to zero, and while the ep->mtx locking means that the resulting file pointer tear-down will be blocked until the poll returns, it means that f_count is already dead, and any use of it won't actually get a reference to the file any more: it's dead regardless. Make sure we have a valid ref on the file pointer before we call down to vfs_poll() from the epoll routines. Link: https://lore.kernel.org/lkml/0000000000002d631f0615918f1e@google.com/ Reported-by: syzbot+045b454ab35fd82a35fb@syzkaller.appspotmail.com Reviewed-by: Jens Axboe Signed-off-by: Linus Torvalds --- fs/eventpoll.c | 38 +++++++++++++++++++++++++++++++++++++- 1 file changed, 37 insertions(+), 1 deletion(-) diff --git a/fs/eventpoll.c b/fs/eventpoll.c index 882b89edc52ad..f53ca4f7fcedd 100644 --- a/fs/eventpoll.c +++ b/fs/eventpoll.c @@ -979,6 +979,34 @@ static __poll_t __ep_eventpoll_poll(struct file *file, poll_table *wait, int dep return res; } +/* + * The ffd.file pointer may be in the process of being torn down due to + * being closed, but we may not have finished eventpoll_release() yet. + * + * Normally, even with the atomic_long_inc_not_zero, the file may have + * been free'd and then gotten re-allocated to something else (since + * files are not RCU-delayed, they are SLAB_TYPESAFE_BY_RCU). + * + * But for epoll, users hold the ep->mtx mutex, and as such any file in + * the process of being free'd will block in eventpoll_release_file() + * and thus the underlying file allocation will not be free'd, and the + * file re-use cannot happen. + * + * For the same reason we can avoid a rcu_read_lock() around the + * operation - 'ffd.file' cannot go away even if the refcount has + * reached zero (but we must still not call out to ->poll() functions + * etc). + */ +static struct file *epi_fget(const struct epitem *epi) +{ + struct file *file; + + file = epi->ffd.file; + if (!atomic_long_inc_not_zero(&file->f_count)) + file = NULL; + return file; +} + /* * Differs from ep_eventpoll_poll() in that internal callers already have * the ep->mtx so we need to start from depth=1, such that mutex_lock_nested() @@ -987,14 +1015,22 @@ static __poll_t __ep_eventpoll_poll(struct file *file, poll_table *wait, int dep static __poll_t ep_item_poll(const struct epitem *epi, poll_table *pt, int depth) { - struct file *file = epi->ffd.file; + struct file *file = epi_fget(epi); __poll_t res; + /* + * We could return EPOLLERR | EPOLLHUP or something, but let's + * treat this more as "file doesn't exist, poll didn't happen". + */ + if (!file) + return 0; + pt->_key = epi->event.events; if (!is_file_epoll(file)) res = vfs_poll(file, pt); else res = __ep_eventpoll_poll(file, pt, depth); + fput(file); return res & epi->event.events; } From dd5a440a31fae6e459c0d6271dddd62825505361 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Sun, 5 May 2024 14:06:01 -0700 Subject: [PATCH 560/606] Linux 6.9-rc7 --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 40fb2ca6fe4c0..d51d411d44a82 100644 --- a/Makefile +++ b/Makefile @@ -2,7 +2,7 @@ VERSION = 6 PATCHLEVEL = 9 SUBLEVEL = 0 -EXTRAVERSION = -rc6 +EXTRAVERSION = -rc7 NAME = Hurr durr I'ma ninja sloth # *DOCUMENTATION* From ad6c4b11f82e5b9416797c650b33eaf1d54c138a Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Thu, 18 Apr 2024 21:27:42 -1000 Subject: [PATCH 561/606] scx: Fix build when !CONFIG_SCHED_CLASS_EXT The dummy scx_tick() was declared incorrectly causing build errors when !CONFIG_SCHED_CLASS_EXT. Fix it. (cherry picked from commit 90359c05e7e9daabba17fa89a13a2c17cb6b3e9b) --- kernel/sched/ext.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/sched/ext.h b/kernel/sched/ext.h index d572be7a43243..b443be80a9815 100644 --- a/kernel/sched/ext.h +++ b/kernel/sched/ext.h @@ -95,7 +95,7 @@ static inline void scx_cancel_fork(struct task_struct *p) {} static inline int scx_check_setscheduler(struct task_struct *p, int policy) { return 0; } static inline bool scx_can_stop_tick(struct rq *rq) { return true; } -static inline void scx_tick(void) {} +static inline void scx_tick(struct rq *rq) {} static inline void scx_next_task_picked(struct rq *rq, struct task_struct *p, const struct sched_class *active) {} static inline void init_sched_ext_class(void) {} From 1e1e73c7b98f6722c77577e787e01b8162171b41 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Tue, 23 Apr 2024 10:36:07 -1000 Subject: [PATCH 562/606] scx: Add barrier() to scx_kf_[dis]allow() kf_mask manipulations shouldn't be reordered when seen from CPU itself. Tell compiler about it. (cherry picked from commit b04eb3e804bc07a10f47b114a432041412d0c984) --- kernel/sched/ext.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/kernel/sched/ext.c b/kernel/sched/ext.c index fc695b13efb05..7884688222e18 100644 --- a/kernel/sched/ext.c +++ b/kernel/sched/ext.c @@ -974,10 +974,12 @@ static __always_inline void scx_kf_allow(u32 mask) "invalid nesting current->scx.kf_mask=0x%x mask=0x%x\n", current->scx.kf_mask, mask); current->scx.kf_mask |= mask; + barrier(); } static void scx_kf_disallow(u32 mask) { + barrier(); current->scx.kf_mask &= ~mask; } From d3d59b4239c1fcaf6237d37175fb3992660d66bc Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Tue, 23 Apr 2024 10:36:07 -1000 Subject: [PATCH 563/606] scx: Add init_scx_entity() and initialize scx->tasks_node too Factor out sched_ext_entity initialization into init_scx_entity() which memsets it to zero at the beginning to avoid needing 0 inits. (cherry picked from commit 9400d6ebd1f71a01e8b3f47ffeb585f9bcbc03c8) --- include/linux/sched/ext.h | 3 ++- kernel/sched/core.c | 17 +---------------- kernel/sched/ext.c | 18 ++++++++++++++++++ kernel/sched/ext.h | 2 ++ 4 files changed, 23 insertions(+), 17 deletions(-) diff --git a/include/linux/sched/ext.h b/include/linux/sched/ext.h index dd113d4a95ada..798745cd0f3b2 100644 --- a/include/linux/sched/ext.h +++ b/include/linux/sched/ext.h @@ -183,10 +183,11 @@ struct sched_ext_entity { bool disallow; /* reject switching into SCX */ /* cold fields */ - struct list_head tasks_node; #ifdef CONFIG_EXT_GROUP_SCHED struct cgroup *cgrp_moving_from; #endif + /* must be the last field, see init_scx_entity() */ + struct list_head tasks_node; }; void sched_ext_free(struct task_struct *p); diff --git a/kernel/sched/core.c b/kernel/sched/core.c index ae7039676d313..68d32504a2dfe 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -4584,22 +4584,7 @@ static void __sched_fork(unsigned long clone_flags, struct task_struct *p) p->rt.on_rq = 0; p->rt.on_list = 0; -#ifdef CONFIG_SCHED_CLASS_EXT - p->scx.dsq = NULL; - INIT_LIST_HEAD(&p->scx.dsq_node.fifo); - RB_CLEAR_NODE(&p->scx.dsq_node.priq); - p->scx.flags = 0; - p->scx.weight = 0; - p->scx.sticky_cpu = -1; - p->scx.holding_cpu = -1; - p->scx.kf_mask = 0; - atomic_long_set(&p->scx.ops_state, 0); - INIT_LIST_HEAD(&p->scx.runnable_node); - p->scx.runnable_at = jiffies; - p->scx.ddsp_dsq_id = SCX_DSQ_INVALID; - p->scx.ddsp_enq_flags = 0; - p->scx.slice = SCX_SLICE_DFL; -#endif + init_scx_entity(&p->scx); #ifdef CONFIG_PREEMPT_NOTIFIERS INIT_HLIST_HEAD(&p->preempt_notifiers); diff --git a/kernel/sched/ext.c b/kernel/sched/ext.c index 7884688222e18..d114884c3bba2 100644 --- a/kernel/sched/ext.c +++ b/kernel/sched/ext.c @@ -3286,6 +3286,24 @@ static void scx_ops_exit_task(struct task_struct *p) scx_set_task_state(p, SCX_TASK_NONE); } +void init_scx_entity(struct sched_ext_entity *scx) +{ + /* + * init_idle() calls this function again after fork sequence is + * complete. Don't touch ->tasks_node as it's already linked. + */ + memset(scx, 0, offsetof(struct sched_ext_entity, tasks_node)); + + INIT_LIST_HEAD(&scx->dsq_node.fifo); + RB_CLEAR_NODE(&scx->dsq_node.priq); + scx->sticky_cpu = -1; + scx->holding_cpu = -1; + INIT_LIST_HEAD(&scx->runnable_node); + scx->runnable_at = jiffies; + scx->ddsp_dsq_id = SCX_DSQ_INVALID; + scx->slice = SCX_SLICE_DFL; +} + void scx_pre_fork(struct task_struct *p) { /* diff --git a/kernel/sched/ext.h b/kernel/sched/ext.h index b443be80a9815..7e47723cd6704 100644 --- a/kernel/sched/ext.h +++ b/kernel/sched/ext.h @@ -34,6 +34,7 @@ static inline bool task_on_scx(const struct task_struct *p) } bool task_should_scx(struct task_struct *p); +void init_scx_entity(struct sched_ext_entity *scx); void scx_pre_fork(struct task_struct *p); int scx_fork(struct task_struct *p); void scx_post_fork(struct task_struct *p); @@ -88,6 +89,7 @@ bool scx_prio_less(const struct task_struct *a, const struct task_struct *b, #define scx_switched_all() false static inline bool task_on_scx(const struct task_struct *p) { return false; } +static inline void init_scx_entity(struct sched_ext_entity *scx) {} static inline void scx_pre_fork(struct task_struct *p) {} static inline int scx_fork(struct task_struct *p) { return 0; } static inline void scx_post_fork(struct task_struct *p) {} From 3065432fe1606987cb9a3eb98c8dab0b14569431 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Tue, 23 Apr 2024 10:36:07 -1000 Subject: [PATCH 564/606] scx: Cosmetic / trivial changes - Relocate scx_task_iter definition so that it's together with iter functions. - Add find_user_dsq() instead of open-coding rhashtable_lookup_fast(). - Relocate scx_bpf_nr_cpu_ids() close to its cpumask friends. - Reorder kfunc decls in scx/common.bpf.h to match kernel def order. - Remove unnecessary 0 inits from init_task initializer. While init_task initializers have other 0 inits, it's far from being consistent and e.g. rb_node doesn't even have usable initializer. Let's just initialize what's necessary. No functional changes. (cherry picked from commit a7df32bf1d8f9aebd16493b91653cf4afeca52ee) --- init/init_task.c | 3 -- kernel/sched/ext.c | 54 +++++++++++--------- tools/sched_ext/include/scx/common.bpf.h | 65 ++++++++++++------------ 3 files changed, 62 insertions(+), 60 deletions(-) diff --git a/init/init_task.c b/init/init_task.c index a32c606cc81ba..377b51e9c7ed9 100644 --- a/init/init_task.c +++ b/init/init_task.c @@ -102,14 +102,11 @@ struct task_struct init_task __aligned(L1_CACHE_BYTES) = { #ifdef CONFIG_SCHED_CLASS_EXT .scx = { .dsq_node.fifo = LIST_HEAD_INIT(init_task.scx.dsq_node.fifo), - .flags = 0, .sticky_cpu = -1, .holding_cpu = -1, - .ops_state = ATOMIC_INIT(0), .runnable_node = LIST_HEAD_INIT(init_task.scx.runnable_node), .runnable_at = INITIAL_JIFFIES, .ddsp_dsq_id = SCX_DSQ_INVALID, - .ddsp_enq_flags = 0, .slice = SCX_SLICE_DFL, }, #endif diff --git a/kernel/sched/ext.c b/kernel/sched/ext.c index d114884c3bba2..c57610d1583bc 100644 --- a/kernel/sched/ext.c +++ b/kernel/sched/ext.c @@ -929,13 +929,6 @@ static __printf(3, 4) void scx_ops_exit_kind(enum scx_exit_kind kind, #define scx_ops_error(fmt, args...) \ scx_ops_error_kind(SCX_EXIT_ERROR, fmt, ##args) -struct scx_task_iter { - struct sched_ext_entity cursor; - struct task_struct *locked; - struct rq *rq; - struct rq_flags rf; -}; - #define SCX_HAS_OP(op) static_branch_likely(&scx_has_op[SCX_OP_IDX(op)]) static long jiffies_delta_msecs(unsigned long at, unsigned long now) @@ -1099,6 +1092,13 @@ static __always_inline bool scx_kf_allowed_on_arg_tasks(u32 mask, return true; } +struct scx_task_iter { + struct sched_ext_entity cursor; + struct task_struct *locked; + struct rq *rq; + struct rq_flags rf; +}; + /** * scx_task_iter_init - Initialize a task iterator * @iter: iterator to init @@ -1533,6 +1533,11 @@ static void dispatch_dequeue(struct scx_rq *scx_rq, struct task_struct *p) raw_spin_unlock(&dsq->lock); } +static struct scx_dispatch_q *find_user_dsq(u64 dsq_id) +{ + return rhashtable_lookup_fast(&dsq_hash, &dsq_id, dsq_hash_params); +} + static struct scx_dispatch_q *find_non_local_dsq(u64 dsq_id) { lockdep_assert(rcu_read_lock_any_held()); @@ -1540,8 +1545,7 @@ static struct scx_dispatch_q *find_non_local_dsq(u64 dsq_id) if (dsq_id == SCX_DSQ_GLOBAL) return &scx_dsq_global; else - return rhashtable_lookup_fast(&dsq_hash, &dsq_id, - dsq_hash_params); + return find_user_dsq(dsq_id); } static struct scx_dispatch_q *find_dsq_for_dispatch(struct rq *rq, u64 dsq_id, @@ -2089,7 +2093,7 @@ static bool consume_dispatch_q(struct rq *rq, struct rq_flags *rf, moved = move_task_to_local_dsq(rq, p, 0); double_unlock_balance(rq, task_rq); -#endif /* CONFIG_SMP */ +#endif /* CONFIG_SMP */ if (likely(moved)) return true; goto retry; @@ -2206,7 +2210,7 @@ dispatch_to_local_dsq(struct rq *rq, struct rq_flags *rf, u64 dsq_id, return dsp ? DTL_DISPATCHED : DTL_LOST; } -#endif /* CONFIG_SMP */ +#endif /* CONFIG_SMP */ scx_ops_error("SCX_DSQ_LOCAL[_ON] verdict target cpu %d not allowed for %s[%d]", cpu_of(dst_rq), p->comm, p->pid); @@ -3029,13 +3033,13 @@ static void rq_offline_scx(struct rq *rq, enum rq_onoff_reason reason) handle_hotplug(rq, false); } -#else /* !CONFIG_SMP */ +#else /* CONFIG_SMP */ static bool test_and_clear_cpu_idle(int cpu) { return false; } static s32 scx_pick_idle_cpu(const struct cpumask *cpus_allowed, u64 flags) { return -EBUSY; } static void reset_idle_masks(void) {} -#endif /* CONFIG_SMP */ +#endif /* CONFIG_SMP */ static bool check_rq_for_timeouts(struct rq *rq) { @@ -3734,7 +3738,7 @@ static void destroy_dsq(u64 dsq_id) rcu_read_lock(); - dsq = rhashtable_lookup_fast(&dsq_hash, &dsq_id, dsq_hash_params); + dsq = find_user_dsq(dsq_id); if (!dsq) goto out_unlock_rcu; @@ -5990,16 +5994,6 @@ __bpf_kfunc void scx_bpf_error_bstr(char *fmt, unsigned long long *data, data__sz); } -/** - * scx_bpf_nr_cpu_ids - Return the number of possible CPU IDs - * - * All valid CPU IDs in the system are smaller than the returned value. - */ -__bpf_kfunc u32 scx_bpf_nr_cpu_ids(void) -{ - return nr_cpu_ids; -} - /** * scx_bpf_cpuperf_cap - Query the maximum relative capacity of a CPU * @cpu: CPU of interest @@ -6071,6 +6065,16 @@ __bpf_kfunc void scx_bpf_cpuperf_set(u32 cpu, u32 perf) } } +/** + * scx_bpf_nr_cpu_ids - Return the number of possible CPU IDs + * + * All valid CPU IDs in the system are smaller than the returned value. + */ +__bpf_kfunc u32 scx_bpf_nr_cpu_ids(void) +{ + return nr_cpu_ids; +} + /** * scx_bpf_get_possible_cpumask - Get a referenced kptr to cpu_possible_mask */ @@ -6218,10 +6222,10 @@ __bpf_kfunc_end_defs(); BTF_KFUNCS_START(scx_kfunc_ids_any) BTF_ID_FLAGS(func, scx_bpf_exit_bstr, KF_TRUSTED_ARGS) BTF_ID_FLAGS(func, scx_bpf_error_bstr, KF_TRUSTED_ARGS) -BTF_ID_FLAGS(func, scx_bpf_nr_cpu_ids) BTF_ID_FLAGS(func, scx_bpf_cpuperf_cap) BTF_ID_FLAGS(func, scx_bpf_cpuperf_cur) BTF_ID_FLAGS(func, scx_bpf_cpuperf_set) +BTF_ID_FLAGS(func, scx_bpf_nr_cpu_ids) BTF_ID_FLAGS(func, scx_bpf_get_possible_cpumask, KF_ACQUIRE) BTF_ID_FLAGS(func, scx_bpf_get_online_cpumask, KF_ACQUIRE) BTF_ID_FLAGS(func, scx_bpf_put_cpumask, KF_RELEASE) diff --git a/tools/sched_ext/include/scx/common.bpf.h b/tools/sched_ext/include/scx/common.bpf.h index 44eb4267d3ac2..a309be2c910b9 100644 --- a/tools/sched_ext/include/scx/common.bpf.h +++ b/tools/sched_ext/include/scx/common.bpf.h @@ -29,6 +29,7 @@ static inline void ___vmlinux_h_sanity_check___(void) } s32 scx_bpf_create_dsq(u64 dsq_id, s32 node) __ksym; +s32 scx_bpf_select_cpu_dfl(struct task_struct *p, s32 prev_cpu, u64 wake_flags, bool *is_idle) __ksym; void scx_bpf_dispatch(struct task_struct *p, u64 dsq_id, u64 slice, u64 enq_flags) __ksym; void scx_bpf_dispatch_vtime(struct task_struct *p, u64 dsq_id, u64 slice, u64 vtime, u64 enq_flags) __ksym; u32 scx_bpf_dispatch_nr_slots(void) __ksym; @@ -37,24 +38,22 @@ bool scx_bpf_consume(u64 dsq_id) __ksym; u32 scx_bpf_reenqueue_local(void) __ksym; void scx_bpf_kick_cpu(s32 cpu, u64 flags) __ksym; s32 scx_bpf_dsq_nr_queued(u64 dsq_id) __ksym; -bool scx_bpf_test_and_clear_cpu_idle(s32 cpu) __ksym; -s32 scx_bpf_pick_idle_cpu(const cpumask_t *cpus_allowed, u64 flags) __ksym; -s32 scx_bpf_pick_any_cpu(const cpumask_t *cpus_allowed, u64 flags) __ksym; void scx_bpf_destroy_dsq(u64 dsq_id) __ksym; -s32 scx_bpf_select_cpu_dfl(struct task_struct *p, s32 prev_cpu, u64 wake_flags, bool *is_idle) __ksym; -void scx_bpf_exit_bstr(s64 exit_code, char *fmt, - unsigned long long *data, u32 data__sz) __ksym; +void scx_bpf_exit_bstr(s64 exit_code, char *fmt, unsigned long long *data, u32 data__sz) __ksym; void scx_bpf_error_bstr(char *fmt, unsigned long long *data, u32 data_len) __ksym; -u32 scx_bpf_nr_cpu_ids(void) __ksym; u32 scx_bpf_cpuperf_cap(s32 cpu) __ksym; u32 scx_bpf_cpuperf_cur(s32 cpu) __ksym; void scx_bpf_cpuperf_set(s32 cpu, u32 perf) __ksym; +u32 scx_bpf_nr_cpu_ids(void) __ksym; const struct cpumask *scx_bpf_get_possible_cpumask(void) __ksym; const struct cpumask *scx_bpf_get_online_cpumask(void) __ksym; void scx_bpf_put_cpumask(const struct cpumask *cpumask) __ksym; const struct cpumask *scx_bpf_get_idle_cpumask(void) __ksym; const struct cpumask *scx_bpf_get_idle_smtmask(void) __ksym; void scx_bpf_put_idle_cpumask(const struct cpumask *cpumask) __ksym; +bool scx_bpf_test_and_clear_cpu_idle(s32 cpu) __ksym; +s32 scx_bpf_pick_idle_cpu(const cpumask_t *cpus_allowed, u64 flags) __ksym; +s32 scx_bpf_pick_any_cpu(const cpumask_t *cpus_allowed, u64 flags) __ksym; bool scx_bpf_task_running(const struct task_struct *p) __ksym; s32 scx_bpf_task_cpu(const struct task_struct *p) __ksym; struct cgroup *scx_bpf_task_cgroup(struct task_struct *p) __ksym; @@ -67,18 +66,18 @@ void ___scx_bpf_exit_format_checker(const char *fmt, ...) {} * bstr exit kfuncs. Callers to this function should use ___fmt and ___param to * refer to the initialized list of inputs to the bstr kfunc. */ -#define scx_bpf_exit_preamble(fmt, args...) \ - static char ___fmt[] = fmt; \ - /* \ - * Note that __param[] must have at least one \ - * element to keep the verifier happy. \ - */ \ - unsigned long long ___param[___bpf_narg(args) ?: 1] = {}; \ - \ - _Pragma("GCC diagnostic push") \ - _Pragma("GCC diagnostic ignored \"-Wint-conversion\"") \ - ___bpf_fill(___param, args); \ - _Pragma("GCC diagnostic pop") \ +#define scx_bpf_exit_preamble(fmt, args...) \ + static char ___fmt[] = fmt; \ + /* \ + * Note that __param[] must have at least one \ + * element to keep the verifier happy. \ + */ \ + unsigned long long ___param[___bpf_narg(args) ?: 1] = {}; \ + \ + _Pragma("GCC diagnostic push") \ + _Pragma("GCC diagnostic ignored \"-Wint-conversion\"") \ + ___bpf_fill(___param, args); \ + _Pragma("GCC diagnostic pop") \ /* * scx_bpf_exit() wraps the scx_bpf_exit_bstr() kfunc with variadic arguments @@ -159,7 +158,8 @@ BPF_PROG(name, ##args) * be a pointer to the area. Use `MEMBER_VPTR(*ptr, .member)` instead of * `MEMBER_VPTR(ptr, ->member)`. */ -#define MEMBER_VPTR(base, member) (typeof((base) member) *)({ \ +#define MEMBER_VPTR(base, member) (typeof((base) member) *) \ +({ \ u64 __base = (u64)&(base); \ u64 __addr = (u64)&((base) member) - __base; \ _Static_assert(sizeof(base) >= sizeof((base) member), \ @@ -189,18 +189,19 @@ BPF_PROG(name, ##args) * size of the array to compute the max, which will result in rejection by * the verifier. */ -#define ARRAY_ELEM_PTR(arr, i, n) (typeof(arr[i]) *)({ \ - u64 __base = (u64)arr; \ - u64 __addr = (u64)&(arr[i]) - __base; \ - asm volatile ( \ - "if %0 <= %[max] goto +2\n" \ - "%0 = 0\n" \ - "goto +1\n" \ - "%0 += %1\n" \ - : "+r"(__addr) \ - : "r"(__base), \ - [max]"r"(sizeof(arr[0]) * ((n) - 1))); \ - __addr; \ +#define ARRAY_ELEM_PTR(arr, i, n) (typeof(arr[i]) *) \ +({ \ + u64 __base = (u64)arr; \ + u64 __addr = (u64)&(arr[i]) - __base; \ + asm volatile ( \ + "if %0 <= %[max] goto +2\n" \ + "%0 = 0\n" \ + "goto +1\n" \ + "%0 += %1\n" \ + : "+r"(__addr) \ + : "r"(__base), \ + [max]"r"(sizeof(arr[0]) * ((n) - 1))); \ + __addr; \ }) /* From efbb192066762ba2040630aac2c7e1350ffae05a Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Tue, 23 Apr 2024 10:36:07 -1000 Subject: [PATCH 565/606] scx: Make scx_kf_allowed_on_arg_tasks() actually use @mask The function takes @mask but ignores it and uses hard-coded __SCX_KF_RQ_LOCKED instead. Fix it. The only user was calling with __SCX_KF_RQ_LOCKED, so this doesn't cause any behavior changes. (cherry picked from commit 8a5b978ab69c62d508d646ebb03b413629fe0365) --- kernel/sched/ext.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/sched/ext.c b/kernel/sched/ext.c index c57610d1583bc..4f356dd28bafb 100644 --- a/kernel/sched/ext.c +++ b/kernel/sched/ext.c @@ -1080,7 +1080,7 @@ static __always_inline bool scx_kf_allowed(u32 mask) static __always_inline bool scx_kf_allowed_on_arg_tasks(u32 mask, struct task_struct *p) { - if (!scx_kf_allowed(__SCX_KF_RQ_LOCKED)) + if (!scx_kf_allowed(mask)) return false; if (unlikely((p != current->scx.kf_tasks[0] && From f713c9c0ad1ab533c657ac4d1aa350068c3e3c58 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Tue, 23 Apr 2024 10:36:07 -1000 Subject: [PATCH 566/606] scx: Make scx_bpf_dsq_nr_queued() safe to call from any context It's currently marked ops-only but can actually be called from any context. Let's make it safe. (cherry picked from commit d6d0db6aa0e2d3fc0d950157bccea1addc976c0e) --- kernel/sched/ext.c | 26 +++++++++++++++++--------- 1 file changed, 17 insertions(+), 9 deletions(-) diff --git a/kernel/sched/ext.c b/kernel/sched/ext.c index 4f356dd28bafb..12ec19dace635 100644 --- a/kernel/sched/ext.c +++ b/kernel/sched/ext.c @@ -5738,28 +5738,36 @@ __bpf_kfunc void scx_bpf_kick_cpu(s32 cpu, u64 flags) * @dsq_id: id of the DSQ * * Return the number of tasks in the DSQ matching @dsq_id. If not found, - * -%ENOENT is returned. Can be called from any non-sleepable online scx_ops - * operations. + * -%ENOENT is returned. */ __bpf_kfunc s32 scx_bpf_dsq_nr_queued(u64 dsq_id) { struct scx_dispatch_q *dsq; + s32 ret; - lockdep_assert(rcu_read_lock_any_held()); + preempt_disable(); if (dsq_id == SCX_DSQ_LOCAL) { - return this_rq()->scx.local_dsq.nr; + ret = this_rq()->scx.local_dsq.nr; + goto out; } else if ((dsq_id & SCX_DSQ_LOCAL_ON) == SCX_DSQ_LOCAL_ON) { s32 cpu = dsq_id & SCX_DSQ_LOCAL_CPU_MASK; - if (ops_cpu_valid(cpu, NULL)) - return cpu_rq(cpu)->scx.local_dsq.nr; + if (ops_cpu_valid(cpu, NULL)) { + ret = cpu_rq(cpu)->scx.local_dsq.nr; + goto out; + } } else { dsq = find_non_local_dsq(dsq_id); - if (dsq) - return dsq->nr; + if (dsq) { + ret = dsq->nr; + goto out; + } } - return -ENOENT; + ret = -ENOENT; +out: + preempt_enable(); + return ret; } /** From 0b4c6f04e872d1c7b2c25cc448957bc554b8e669 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Tue, 23 Apr 2024 10:36:08 -1000 Subject: [PATCH 567/606] scx: Remove scx_kfunc_set_ops_only None of the functions in scx_kfunc_set_ops_only set are ops-only. scx_bpf_select_cpu_dfl() is select_cpu-only and the rest can be called from anywhere. Remove the set and move the kfuncs to scx_kfunc_set_any and the new scx_kfunc_set_select_cpu. No functional changes intended. (cherry picked from commit c636cf8b92b91f26d36df0388b7dba6adf9c7fe0) --- kernel/sched/ext.c | 258 +++++++++++++++++++++++---------------------- 1 file changed, 131 insertions(+), 127 deletions(-) diff --git a/kernel/sched/ext.c b/kernel/sched/ext.c index 12ec19dace635..6e2f4564d2e7b 100644 --- a/kernel/sched/ext.c +++ b/kernel/sched/ext.c @@ -5378,6 +5378,44 @@ static const struct btf_kfunc_id_set scx_kfunc_set_sleepable = { .set = &scx_kfunc_ids_sleepable, }; +__bpf_kfunc_start_defs(); + +/** + * scx_bpf_select_cpu_dfl - The default implementation of ops.select_cpu() + * @p: task_struct to select a CPU for + * @prev_cpu: CPU @p was on previously + * @wake_flags: %SCX_WAKE_* flags + * @is_idle: out parameter indicating whether the returned CPU is idle + * + * Can only be called from ops.select_cpu() if the built-in CPU selection is + * enabled - ops.update_idle() is missing or %SCX_OPS_KEEP_BUILTIN_IDLE is set. + * @p, @prev_cpu and @wake_flags match ops.select_cpu(). + * + * Returns the picked CPU with *@is_idle indicating whether the picked CPU is + * currently idle and thus a good candidate for direct dispatching. + */ +__bpf_kfunc s32 scx_bpf_select_cpu_dfl(struct task_struct *p, s32 prev_cpu, + u64 wake_flags, bool *is_idle) +{ + if (!scx_kf_allowed(SCX_KF_SELECT_CPU)) { + *is_idle = false; + return prev_cpu; + } + + return scx_select_cpu_dfl(p, prev_cpu, wake_flags, is_idle); +} + +__bpf_kfunc_end_defs(); + +BTF_KFUNCS_START(scx_kfunc_ids_select_cpu) +BTF_ID_FLAGS(func, scx_bpf_select_cpu_dfl, KF_RCU) +BTF_KFUNCS_END(scx_kfunc_ids_select_cpu) + +static const struct btf_kfunc_id_set scx_kfunc_set_select_cpu = { + .owner = THIS_MODULE, + .set = &scx_kfunc_ids_select_cpu, +}; + static bool scx_dispatch_preamble(struct task_struct *p, u64 enq_flags) { if (!scx_kf_allowed(SCX_KF_ENQUEUE | SCX_KF_DISPATCH)) @@ -5770,91 +5808,6 @@ __bpf_kfunc s32 scx_bpf_dsq_nr_queued(u64 dsq_id) return ret; } -/** - * scx_bpf_test_and_clear_cpu_idle - Test and clear @cpu's idle state - * @cpu: cpu to test and clear idle for - * - * Returns %true if @cpu was idle and its idle state was successfully cleared. - * %false otherwise. - * - * Unavailable if ops.update_idle() is implemented and - * %SCX_OPS_KEEP_BUILTIN_IDLE is not set. - */ -__bpf_kfunc bool scx_bpf_test_and_clear_cpu_idle(s32 cpu) -{ - if (!static_branch_likely(&scx_builtin_idle_enabled)) { - scx_ops_error("built-in idle tracking is disabled"); - return false; - } - - if (ops_cpu_valid(cpu, NULL)) - return test_and_clear_cpu_idle(cpu); - else - return false; -} - -/** - * scx_bpf_pick_idle_cpu - Pick and claim an idle cpu - * @cpus_allowed: Allowed cpumask - * @flags: %SCX_PICK_IDLE_CPU_* flags - * - * Pick and claim an idle cpu in @cpus_allowed. Returns the picked idle cpu - * number on success. -%EBUSY if no matching cpu was found. - * - * Idle CPU tracking may race against CPU scheduling state transitions. For - * example, this function may return -%EBUSY as CPUs are transitioning into the - * idle state. If the caller then assumes that there will be dispatch events on - * the CPUs as they were all busy, the scheduler may end up stalling with CPUs - * idling while there are pending tasks. Use scx_bpf_pick_any_cpu() and - * scx_bpf_kick_cpu() to guarantee that there will be at least one dispatch - * event in the near future. - * - * Unavailable if ops.update_idle() is implemented and - * %SCX_OPS_KEEP_BUILTIN_IDLE is not set. - */ -__bpf_kfunc s32 scx_bpf_pick_idle_cpu(const struct cpumask *cpus_allowed, - u64 flags) -{ - if (!static_branch_likely(&scx_builtin_idle_enabled)) { - scx_ops_error("built-in idle tracking is disabled"); - return -EBUSY; - } - - return scx_pick_idle_cpu(cpus_allowed, flags); -} - -/** - * scx_bpf_pick_any_cpu - Pick and claim an idle cpu if available or pick any CPU - * @cpus_allowed: Allowed cpumask - * @flags: %SCX_PICK_IDLE_CPU_* flags - * - * Pick and claim an idle cpu in @cpus_allowed. If none is available, pick any - * CPU in @cpus_allowed. Guaranteed to succeed and returns the picked idle cpu - * number if @cpus_allowed is not empty. -%EBUSY is returned if @cpus_allowed is - * empty. - * - * If ops.update_idle() is implemented and %SCX_OPS_KEEP_BUILTIN_IDLE is not - * set, this function can't tell which CPUs are idle and will always pick any - * CPU. - */ -__bpf_kfunc s32 scx_bpf_pick_any_cpu(const struct cpumask *cpus_allowed, - u64 flags) -{ - s32 cpu; - - if (static_branch_likely(&scx_builtin_idle_enabled)) { - cpu = scx_pick_idle_cpu(cpus_allowed, flags); - if (cpu >= 0) - return cpu; - } - - cpu = cpumask_any_distribute(cpus_allowed); - if (cpu < nr_cpu_ids) - return cpu; - else - return -EBUSY; -} - /** * scx_bpf_destroy_dsq - Destroy a custom DSQ * @dsq_id: DSQ to destroy @@ -5869,48 +5822,8 @@ __bpf_kfunc void scx_bpf_destroy_dsq(u64 dsq_id) destroy_dsq(dsq_id); } -/** - * scx_bpf_select_cpu_dfl - The default implementation of ops.select_cpu() - * @p: task_struct to select a CPU for - * @prev_cpu: CPU @p was on previously - * @wake_flags: %SCX_WAKE_* flags - * @is_idle: out parameter indicating whether the returned CPU is idle - * - * Can only be called from ops.select_cpu() if the built-in CPU selection is - * enabled - ops.update_idle() is missing or %SCX_OPS_KEEP_BUILTIN_IDLE is set. - * @p, @prev_cpu and @wake_flags match ops.select_cpu(). - * - * Returns the picked CPU with *@is_idle indicating whether the picked CPU is - * currently idle and thus a good candidate for direct dispatching. - */ -__bpf_kfunc s32 scx_bpf_select_cpu_dfl(struct task_struct *p, s32 prev_cpu, - u64 wake_flags, bool *is_idle) -{ - if (!scx_kf_allowed(SCX_KF_SELECT_CPU)) { - *is_idle = false; - return prev_cpu; - } - - return scx_select_cpu_dfl(p, prev_cpu, wake_flags, is_idle); -} - __bpf_kfunc_end_defs(); -BTF_KFUNCS_START(scx_kfunc_ids_ops_only) -BTF_ID_FLAGS(func, scx_bpf_kick_cpu) -BTF_ID_FLAGS(func, scx_bpf_dsq_nr_queued) -BTF_ID_FLAGS(func, scx_bpf_test_and_clear_cpu_idle) -BTF_ID_FLAGS(func, scx_bpf_pick_idle_cpu, KF_RCU) -BTF_ID_FLAGS(func, scx_bpf_pick_any_cpu, KF_RCU) -BTF_ID_FLAGS(func, scx_bpf_destroy_dsq) -BTF_ID_FLAGS(func, scx_bpf_select_cpu_dfl, KF_RCU) -BTF_KFUNCS_END(scx_kfunc_ids_ops_only) - -static const struct btf_kfunc_id_set scx_kfunc_set_ops_only = { - .owner = THIS_MODULE, - .set = &scx_kfunc_ids_ops_only, -}; - struct scx_bpf_error_bstr_bufs { u64 data[MAX_BPRINTF_VARARGS]; char msg[SCX_EXIT_MSG_LEN]; @@ -6172,6 +6085,91 @@ __bpf_kfunc void scx_bpf_put_idle_cpumask(const struct cpumask *idle_mask) */ } +/** + * scx_bpf_test_and_clear_cpu_idle - Test and clear @cpu's idle state + * @cpu: cpu to test and clear idle for + * + * Returns %true if @cpu was idle and its idle state was successfully cleared. + * %false otherwise. + * + * Unavailable if ops.update_idle() is implemented and + * %SCX_OPS_KEEP_BUILTIN_IDLE is not set. + */ +__bpf_kfunc bool scx_bpf_test_and_clear_cpu_idle(s32 cpu) +{ + if (!static_branch_likely(&scx_builtin_idle_enabled)) { + scx_ops_error("built-in idle tracking is disabled"); + return false; + } + + if (ops_cpu_valid(cpu, NULL)) + return test_and_clear_cpu_idle(cpu); + else + return false; +} + +/** + * scx_bpf_pick_idle_cpu - Pick and claim an idle cpu + * @cpus_allowed: Allowed cpumask + * @flags: %SCX_PICK_IDLE_CPU_* flags + * + * Pick and claim an idle cpu in @cpus_allowed. Returns the picked idle cpu + * number on success. -%EBUSY if no matching cpu was found. + * + * Idle CPU tracking may race against CPU scheduling state transitions. For + * example, this function may return -%EBUSY as CPUs are transitioning into the + * idle state. If the caller then assumes that there will be dispatch events on + * the CPUs as they were all busy, the scheduler may end up stalling with CPUs + * idling while there are pending tasks. Use scx_bpf_pick_any_cpu() and + * scx_bpf_kick_cpu() to guarantee that there will be at least one dispatch + * event in the near future. + * + * Unavailable if ops.update_idle() is implemented and + * %SCX_OPS_KEEP_BUILTIN_IDLE is not set. + */ +__bpf_kfunc s32 scx_bpf_pick_idle_cpu(const struct cpumask *cpus_allowed, + u64 flags) +{ + if (!static_branch_likely(&scx_builtin_idle_enabled)) { + scx_ops_error("built-in idle tracking is disabled"); + return -EBUSY; + } + + return scx_pick_idle_cpu(cpus_allowed, flags); +} + +/** + * scx_bpf_pick_any_cpu - Pick and claim an idle cpu if available or pick any CPU + * @cpus_allowed: Allowed cpumask + * @flags: %SCX_PICK_IDLE_CPU_* flags + * + * Pick and claim an idle cpu in @cpus_allowed. If none is available, pick any + * CPU in @cpus_allowed. Guaranteed to succeed and returns the picked idle cpu + * number if @cpus_allowed is not empty. -%EBUSY is returned if @cpus_allowed is + * empty. + * + * If ops.update_idle() is implemented and %SCX_OPS_KEEP_BUILTIN_IDLE is not + * set, this function can't tell which CPUs are idle and will always pick any + * CPU. + */ +__bpf_kfunc s32 scx_bpf_pick_any_cpu(const struct cpumask *cpus_allowed, + u64 flags) +{ + s32 cpu; + + if (static_branch_likely(&scx_builtin_idle_enabled)) { + cpu = scx_pick_idle_cpu(cpus_allowed, flags); + if (cpu >= 0) + return cpu; + } + + cpu = cpumask_any_distribute(cpus_allowed); + if (cpu < nr_cpu_ids) + return cpu; + else + return -EBUSY; +} + /** * scx_bpf_task_running - Is task currently running? * @p: task of interest @@ -6228,6 +6226,9 @@ __bpf_kfunc struct cgroup *scx_bpf_task_cgroup(struct task_struct *p) __bpf_kfunc_end_defs(); BTF_KFUNCS_START(scx_kfunc_ids_any) +BTF_ID_FLAGS(func, scx_bpf_kick_cpu) +BTF_ID_FLAGS(func, scx_bpf_dsq_nr_queued) +BTF_ID_FLAGS(func, scx_bpf_destroy_dsq) BTF_ID_FLAGS(func, scx_bpf_exit_bstr, KF_TRUSTED_ARGS) BTF_ID_FLAGS(func, scx_bpf_error_bstr, KF_TRUSTED_ARGS) BTF_ID_FLAGS(func, scx_bpf_cpuperf_cap) @@ -6240,6 +6241,9 @@ BTF_ID_FLAGS(func, scx_bpf_put_cpumask, KF_RELEASE) BTF_ID_FLAGS(func, scx_bpf_get_idle_cpumask, KF_ACQUIRE) BTF_ID_FLAGS(func, scx_bpf_get_idle_smtmask, KF_ACQUIRE) BTF_ID_FLAGS(func, scx_bpf_put_idle_cpumask, KF_RELEASE) +BTF_ID_FLAGS(func, scx_bpf_test_and_clear_cpu_idle) +BTF_ID_FLAGS(func, scx_bpf_pick_idle_cpu, KF_RCU) +BTF_ID_FLAGS(func, scx_bpf_pick_any_cpu, KF_RCU) BTF_ID_FLAGS(func, scx_bpf_task_running, KF_RCU) BTF_ID_FLAGS(func, scx_bpf_task_cpu, KF_RCU) #ifdef CONFIG_CGROUP_SCHED @@ -6269,14 +6273,14 @@ static int __init scx_init(void) */ if ((ret = register_btf_kfunc_id_set(BPF_PROG_TYPE_STRUCT_OPS, &scx_kfunc_set_sleepable)) || + (ret = register_btf_kfunc_id_set(BPF_PROG_TYPE_STRUCT_OPS, + &scx_kfunc_set_select_cpu)) || (ret = register_btf_kfunc_id_set(BPF_PROG_TYPE_STRUCT_OPS, &scx_kfunc_set_enqueue_dispatch)) || (ret = register_btf_kfunc_id_set(BPF_PROG_TYPE_STRUCT_OPS, &scx_kfunc_set_dispatch)) || (ret = register_btf_kfunc_id_set(BPF_PROG_TYPE_STRUCT_OPS, &scx_kfunc_set_cpu_release)) || - (ret = register_btf_kfunc_id_set(BPF_PROG_TYPE_STRUCT_OPS, - &scx_kfunc_set_ops_only)) || (ret = register_btf_kfunc_id_set(BPF_PROG_TYPE_STRUCT_OPS, &scx_kfunc_set_any)) || (ret = register_btf_kfunc_id_set(BPF_PROG_TYPE_TRACING, From c18f827ee775e3d937318254eca2ae6de51d6375 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Tue, 23 Apr 2024 10:36:08 -1000 Subject: [PATCH 568/606] scx: Use WRITE_ONCE/READ_ONCE() on dsq->nr dsq->nr is read without locking from scx_bpf_dsq_nr_queued(). Use WRITE_ONCE() to update and READ_ONCE() from scx_bpf_dsq_nr_queued(). (cherry picked from commit 996286fbaa6d9924c677e78bd198a9257a7433eb) --- kernel/sched/ext.c | 22 ++++++++++++++-------- 1 file changed, 14 insertions(+), 8 deletions(-) diff --git a/kernel/sched/ext.c b/kernel/sched/ext.c index 6e2f4564d2e7b..942a5d6db6d24 100644 --- a/kernel/sched/ext.c +++ b/kernel/sched/ext.c @@ -1381,6 +1381,12 @@ static bool scx_dsq_priq_less(struct rb_node *node_a, return time_before64(a->scx.dsq_vtime, b->scx.dsq_vtime); } +static void dsq_mod_nr(struct scx_dispatch_q *dsq, s32 delta) +{ + /* scx_bpf_dsq_nr_queued() reads ->nr without locking, use WRITE_ONCE() */ + WRITE_ONCE(dsq->nr, dsq->nr + delta); +} + static void dispatch_enqueue(struct scx_dispatch_q *dsq, struct task_struct *p, u64 enq_flags) { @@ -1432,7 +1438,7 @@ static void dispatch_enqueue(struct scx_dispatch_q *dsq, struct task_struct *p, scx_ops_error("DSQ ID 0x%016llx already had PRIQ-enqueued tasks", dsq->id); } - dsq->nr++; + dsq_mod_nr(dsq, 1); p->scx.dsq = dsq; /* @@ -1516,7 +1522,7 @@ static void dispatch_dequeue(struct scx_rq *scx_rq, struct task_struct *p) /* @p must still be on @dsq, dequeue */ WARN_ON_ONCE(!task_linked_on_dsq(p)); task_unlink_from_dsq(p, dsq); - dsq->nr--; + dsq_mod_nr(dsq, -1); } else { /* * We're racing against dispatch_to_local_dsq() which already @@ -2065,8 +2071,8 @@ static bool consume_dispatch_q(struct rq *rq, struct rq_flags *rf, WARN_ON_ONCE(p->scx.holding_cpu >= 0); task_unlink_from_dsq(p, dsq); list_add_tail(&p->scx.dsq_node.fifo, &scx_rq->local_dsq.fifo); - dsq->nr--; - scx_rq->local_dsq.nr++; + dsq_mod_nr(dsq, -1); + dsq_mod_nr(&scx_rq->local_dsq, 1); p->scx.dsq = &scx_rq->local_dsq; raw_spin_unlock(&dsq->lock); return true; @@ -2082,7 +2088,7 @@ static bool consume_dispatch_q(struct rq *rq, struct rq_flags *rf, */ WARN_ON_ONCE(p->scx.holding_cpu >= 0); task_unlink_from_dsq(p, dsq); - dsq->nr--; + dsq_mod_nr(dsq, -1); p->scx.holding_cpu = raw_smp_processor_id(); raw_spin_unlock(&dsq->lock); @@ -5786,19 +5792,19 @@ __bpf_kfunc s32 scx_bpf_dsq_nr_queued(u64 dsq_id) preempt_disable(); if (dsq_id == SCX_DSQ_LOCAL) { - ret = this_rq()->scx.local_dsq.nr; + ret = READ_ONCE(this_rq()->scx.local_dsq.nr); goto out; } else if ((dsq_id & SCX_DSQ_LOCAL_ON) == SCX_DSQ_LOCAL_ON) { s32 cpu = dsq_id & SCX_DSQ_LOCAL_CPU_MASK; if (ops_cpu_valid(cpu, NULL)) { - ret = cpu_rq(cpu)->scx.local_dsq.nr; + ret = READ_ONCE(cpu_rq(cpu)->scx.local_dsq.nr); goto out; } } else { dsq = find_non_local_dsq(dsq_id); if (dsq) { - ret = dsq->nr; + ret = READ_ONCE(dsq->nr); goto out; } } From 0bcf1c6afb329012b8ecdb2616709e09357045f2 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Tue, 23 Apr 2024 10:36:08 -1000 Subject: [PATCH 569/606] scx: Rename scx_dispatch_q node field names We'll use the list_head for both fifo and priq. Let's rename it to list. No functional changes intended. (cherry picked from commit a5e996c33a617941dfef7347c27d7d46b98d6b6f) --- include/linux/sched/ext.h | 12 +++++++----- init/init_task.c | 2 +- kernel/sched/ext.c | 28 ++++++++++++++-------------- 3 files changed, 22 insertions(+), 20 deletions(-) diff --git a/include/linux/sched/ext.h b/include/linux/sched/ext.h index 798745cd0f3b2..b5c5706068b8d 100644 --- a/include/linux/sched/ext.h +++ b/include/linux/sched/ext.h @@ -51,13 +51,15 @@ enum scx_dsq_id_flags { }; /* - * Dispatch queue (dsq) is a simple FIFO which is used to buffer between the - * scheduler core and the BPF scheduler. See the documentation for more details. + * A dispatch queue (DSQ) can be either a FIFO or p->scx.dsq_vtime ordered + * queue. A built-in DSQ is always a FIFO. The built-in local DSQs are used to + * buffer between the scheduler core and the BPF scheduler. See the + * documentation for more details. */ struct scx_dispatch_q { raw_spinlock_t lock; - struct list_head fifo; /* processed in dispatching order */ - struct rb_root_cached priq; /* processed in p->scx.dsq_vtime order */ + struct list_head list; /* tasks in dispatch order */ + struct rb_root_cached priq; /* used to order by p->scx.dsq_vtime */ u32 nr; u64 id; struct rhash_head hash_node; @@ -126,7 +128,7 @@ enum scx_kf_mask { struct sched_ext_entity { struct scx_dispatch_q *dsq; struct { - struct list_head fifo; /* dispatch order */ + struct list_head list; /* dispatch order */ struct rb_node priq; /* p->scx.dsq_vtime order */ } dsq_node; u32 flags; /* protected by rq lock */ diff --git a/init/init_task.c b/init/init_task.c index 377b51e9c7ed9..6bac934219d8a 100644 --- a/init/init_task.c +++ b/init/init_task.c @@ -101,7 +101,7 @@ struct task_struct init_task __aligned(L1_CACHE_BYTES) = { #endif #ifdef CONFIG_SCHED_CLASS_EXT .scx = { - .dsq_node.fifo = LIST_HEAD_INIT(init_task.scx.dsq_node.fifo), + .dsq_node.list = LIST_HEAD_INIT(init_task.scx.dsq_node.list), .sticky_cpu = -1, .holding_cpu = -1, .runnable_node = LIST_HEAD_INIT(init_task.scx.runnable_node), diff --git a/kernel/sched/ext.c b/kernel/sched/ext.c index 942a5d6db6d24..e3a65525976d9 100644 --- a/kernel/sched/ext.c +++ b/kernel/sched/ext.c @@ -1392,7 +1392,7 @@ static void dispatch_enqueue(struct scx_dispatch_q *dsq, struct task_struct *p, { bool is_local = dsq->id == SCX_DSQ_LOCAL; - WARN_ON_ONCE(p->scx.dsq || !list_empty(&p->scx.dsq_node.fifo)); + WARN_ON_ONCE(p->scx.dsq || !list_empty(&p->scx.dsq_node.list)); WARN_ON_ONCE((p->scx.dsq_flags & SCX_TASK_DSQ_ON_PRIQ) || !RB_EMPTY_NODE(&p->scx.dsq_node.priq)); @@ -1425,14 +1425,14 @@ static void dispatch_enqueue(struct scx_dispatch_q *dsq, struct task_struct *p, rb_add_cached(&p->scx.dsq_node.priq, &dsq->priq, scx_dsq_priq_less); /* A DSQ should only be using either FIFO or PRIQ enqueuing. */ - if (unlikely(!list_empty(&dsq->fifo))) + if (unlikely(!list_empty(&dsq->list))) scx_ops_error("DSQ ID 0x%016llx already had FIFO-enqueued tasks", dsq->id); } else { if (enq_flags & (SCX_ENQ_HEAD | SCX_ENQ_PREEMPT)) - list_add(&p->scx.dsq_node.fifo, &dsq->fifo); + list_add(&p->scx.dsq_node.list, &dsq->list); else - list_add_tail(&p->scx.dsq_node.fifo, &dsq->fifo); + list_add_tail(&p->scx.dsq_node.list, &dsq->list); /* A DSQ should only be using either FIFO or PRIQ enqueuing. */ if (unlikely(rb_first_cached(&dsq->priq))) scx_ops_error("DSQ ID 0x%016llx already had PRIQ-enqueued tasks", @@ -1483,13 +1483,13 @@ static void task_unlink_from_dsq(struct task_struct *p, RB_CLEAR_NODE(&p->scx.dsq_node.priq); p->scx.dsq_flags &= ~SCX_TASK_DSQ_ON_PRIQ; } else { - list_del_init(&p->scx.dsq_node.fifo); + list_del_init(&p->scx.dsq_node.list); } } static bool task_linked_on_dsq(struct task_struct *p) { - return !list_empty(&p->scx.dsq_node.fifo) || + return !list_empty(&p->scx.dsq_node.list) || !RB_EMPTY_NODE(&p->scx.dsq_node.priq); } @@ -2040,12 +2040,12 @@ static bool consume_dispatch_q(struct rq *rq, struct rq_flags *rf, struct rq *task_rq; bool moved = false; retry: - if (list_empty(&dsq->fifo) && !rb_first_cached(&dsq->priq)) + if (list_empty(&dsq->list) && !rb_first_cached(&dsq->priq)) return false; raw_spin_lock(&dsq->lock); - list_for_each_entry(p, &dsq->fifo, scx.dsq_node.fifo) { + list_for_each_entry(p, &dsq->list, scx.dsq_node.list) { task_rq = task_rq(p); if (rq == task_rq) goto this_rq; @@ -2070,7 +2070,7 @@ static bool consume_dispatch_q(struct rq *rq, struct rq_flags *rf, /* @dsq is locked and @p is on this rq */ WARN_ON_ONCE(p->scx.holding_cpu >= 0); task_unlink_from_dsq(p, dsq); - list_add_tail(&p->scx.dsq_node.fifo, &scx_rq->local_dsq.fifo); + list_add_tail(&p->scx.dsq_node.list, &scx_rq->local_dsq.list); dsq_mod_nr(dsq, -1); dsq_mod_nr(&scx_rq->local_dsq, 1); p->scx.dsq = &scx_rq->local_dsq; @@ -2596,7 +2596,7 @@ static void put_prev_task_scx(struct rq *rq, struct task_struct *p) * can find the task unless it wants to trigger a separate * follow-up scheduling event. */ - if (list_empty(&rq->scx.local_dsq.fifo)) + if (list_empty(&rq->scx.local_dsq.list)) do_enqueue_task(rq, p, SCX_ENQ_LAST, -1); else do_enqueue_task(rq, p, 0, -1); @@ -2606,8 +2606,8 @@ static void put_prev_task_scx(struct rq *rq, struct task_struct *p) static struct task_struct *first_local_task(struct rq *rq) { WARN_ON_ONCE(rb_first_cached(&rq->scx.local_dsq.priq)); - return list_first_entry_or_null(&rq->scx.local_dsq.fifo, - struct task_struct, scx.dsq_node.fifo); + return list_first_entry_or_null(&rq->scx.local_dsq.list, + struct task_struct, scx.dsq_node.list); } static struct task_struct *pick_next_task_scx(struct rq *rq) @@ -3304,7 +3304,7 @@ void init_scx_entity(struct sched_ext_entity *scx) */ memset(scx, 0, offsetof(struct sched_ext_entity, tasks_node)); - INIT_LIST_HEAD(&scx->dsq_node.fifo); + INIT_LIST_HEAD(&scx->dsq_node.list); RB_CLEAR_NODE(&scx->dsq_node.priq); scx->sticky_cpu = -1; scx->holding_cpu = -1; @@ -3699,7 +3699,7 @@ static void init_dsq(struct scx_dispatch_q *dsq, u64 dsq_id) memset(dsq, 0, sizeof(*dsq)); raw_spin_lock_init(&dsq->lock); - INIT_LIST_HEAD(&dsq->fifo); + INIT_LIST_HEAD(&dsq->list); dsq->id = dsq_id; } From 229cad6c7a05493b169042e0bf04c30e94e8c6ec Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Tue, 23 Apr 2024 10:36:08 -1000 Subject: [PATCH 570/606] scx: Use uncached rbtree for dsq priq priq will updated so that the tasks are added to the list in vtime order too. Thus, caching the first node won't be necessary. Let's switch to uncached variant. (cherry picked from commit 8adc78c59a05a924aa24e0a46eed6e5686e46066) --- include/linux/sched/ext.h | 2 +- kernel/sched/ext.c | 13 ++++++------- 2 files changed, 7 insertions(+), 8 deletions(-) diff --git a/include/linux/sched/ext.h b/include/linux/sched/ext.h index b5c5706068b8d..790347a8b4a14 100644 --- a/include/linux/sched/ext.h +++ b/include/linux/sched/ext.h @@ -59,7 +59,7 @@ enum scx_dsq_id_flags { struct scx_dispatch_q { raw_spinlock_t lock; struct list_head list; /* tasks in dispatch order */ - struct rb_root_cached priq; /* used to order by p->scx.dsq_vtime */ + struct rb_root priq; /* used to order by p->scx.dsq_vtime */ u32 nr; u64 id; struct rhash_head hash_node; diff --git a/kernel/sched/ext.c b/kernel/sched/ext.c index e3a65525976d9..95692d51b6c37 100644 --- a/kernel/sched/ext.c +++ b/kernel/sched/ext.c @@ -1422,8 +1422,7 @@ static void dispatch_enqueue(struct scx_dispatch_q *dsq, struct task_struct *p, if (enq_flags & SCX_ENQ_DSQ_PRIQ) { p->scx.dsq_flags |= SCX_TASK_DSQ_ON_PRIQ; - rb_add_cached(&p->scx.dsq_node.priq, &dsq->priq, - scx_dsq_priq_less); + rb_add(&p->scx.dsq_node.priq, &dsq->priq, scx_dsq_priq_less); /* A DSQ should only be using either FIFO or PRIQ enqueuing. */ if (unlikely(!list_empty(&dsq->list))) scx_ops_error("DSQ ID 0x%016llx already had FIFO-enqueued tasks", @@ -1434,7 +1433,7 @@ static void dispatch_enqueue(struct scx_dispatch_q *dsq, struct task_struct *p, else list_add_tail(&p->scx.dsq_node.list, &dsq->list); /* A DSQ should only be using either FIFO or PRIQ enqueuing. */ - if (unlikely(rb_first_cached(&dsq->priq))) + if (unlikely(rb_first(&dsq->priq))) scx_ops_error("DSQ ID 0x%016llx already had PRIQ-enqueued tasks", dsq->id); } @@ -1479,7 +1478,7 @@ static void task_unlink_from_dsq(struct task_struct *p, struct scx_dispatch_q *dsq) { if (p->scx.dsq_flags & SCX_TASK_DSQ_ON_PRIQ) { - rb_erase_cached(&p->scx.dsq_node.priq, &dsq->priq); + rb_erase(&p->scx.dsq_node.priq, &dsq->priq); RB_CLEAR_NODE(&p->scx.dsq_node.priq); p->scx.dsq_flags &= ~SCX_TASK_DSQ_ON_PRIQ; } else { @@ -2040,7 +2039,7 @@ static bool consume_dispatch_q(struct rq *rq, struct rq_flags *rf, struct rq *task_rq; bool moved = false; retry: - if (list_empty(&dsq->list) && !rb_first_cached(&dsq->priq)) + if (list_empty(&dsq->list) && !rb_first(&dsq->priq)) return false; raw_spin_lock(&dsq->lock); @@ -2053,7 +2052,7 @@ static bool consume_dispatch_q(struct rq *rq, struct rq_flags *rf, goto remote_rq; } - for (rb_node = rb_first_cached(&dsq->priq); rb_node; + for (rb_node = rb_first(&dsq->priq); rb_node; rb_node = rb_next(rb_node)) { p = container_of(rb_node, struct task_struct, scx.dsq_node.priq); task_rq = task_rq(p); @@ -2605,7 +2604,7 @@ static void put_prev_task_scx(struct rq *rq, struct task_struct *p) static struct task_struct *first_local_task(struct rq *rq) { - WARN_ON_ONCE(rb_first_cached(&rq->scx.local_dsq.priq)); + WARN_ON_ONCE(rb_first(&rq->scx.local_dsq.priq)); return list_first_entry_or_null(&rq->scx.local_dsq.list, struct task_struct, scx.dsq_node.list); } From 0e45de524924659d090b9485abf78a27d31bdff8 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Tue, 23 Apr 2024 10:36:08 -1000 Subject: [PATCH 571/606] scx: Always keep tasks on dsq->list When a DSQ is used as a PRIQ, tasks were only kept on dsq->priq. This requires all other paths to check both dsq->priq and dsq->list. A task's scx.dsq_node.list is available when the task is on a PRIQ DSQ anyway, let's always put tasks on dsq->list whether it's used as a PRIQ or FIFO by inserting into dsq->list in vtime order on PRIQs. This means that dsq->list is always in dispatch order whether the DSQ is FIFO or PRIQ and the only paths that need to worry about the DSQ type are dispatch_enqueue() and task_unlink_from_dsq(). (cherry picked from commit f25afc8806f486735b1d07c68fc22602fc5299ca) --- kernel/sched/ext.c | 60 +++++++++++++++++++++++++++------------------- 1 file changed, 35 insertions(+), 25 deletions(-) diff --git a/kernel/sched/ext.c b/kernel/sched/ext.c index 95692d51b6c37..30da25890e0ca 100644 --- a/kernel/sched/ext.c +++ b/kernel/sched/ext.c @@ -1421,21 +1421,44 @@ static void dispatch_enqueue(struct scx_dispatch_q *dsq, struct task_struct *p, } if (enq_flags & SCX_ENQ_DSQ_PRIQ) { - p->scx.dsq_flags |= SCX_TASK_DSQ_ON_PRIQ; - rb_add(&p->scx.dsq_node.priq, &dsq->priq, scx_dsq_priq_less); - /* A DSQ should only be using either FIFO or PRIQ enqueuing. */ - if (unlikely(!list_empty(&dsq->list))) + struct rb_node *rbp; + + /* + * A PRIQ DSQ shouldn't be using FIFO enqueueing. As tasks are + * linked to both the rbtree and list on PRIQs, this can only be + * tested easily when adding the first task. + */ + if (unlikely(RB_EMPTY_ROOT(&dsq->priq) && + !list_empty(&dsq->list))) scx_ops_error("DSQ ID 0x%016llx already had FIFO-enqueued tasks", dsq->id); + + p->scx.dsq_flags |= SCX_TASK_DSQ_ON_PRIQ; + rb_add(&p->scx.dsq_node.priq, &dsq->priq, scx_dsq_priq_less); + + /* + * Find the previous task and insert after it on the list so + * that @dsq->list is vtime ordered. + */ + rbp = rb_prev(&p->scx.dsq_node.priq); + if (rbp) { + struct task_struct *prev = + container_of(rbp, struct task_struct, + scx.dsq_node.priq); + list_add(&p->scx.dsq_node.list, &prev->scx.dsq_node.list); + } else { + list_add(&p->scx.dsq_node.list, &dsq->list); + } } else { + /* a FIFO DSQ shouldn't be using PRIQ enqueuing */ + if (unlikely(!RB_EMPTY_ROOT(&dsq->priq))) + scx_ops_error("DSQ ID 0x%016llx already had PRIQ-enqueued tasks", + dsq->id); + if (enq_flags & (SCX_ENQ_HEAD | SCX_ENQ_PREEMPT)) list_add(&p->scx.dsq_node.list, &dsq->list); else list_add_tail(&p->scx.dsq_node.list, &dsq->list); - /* A DSQ should only be using either FIFO or PRIQ enqueuing. */ - if (unlikely(rb_first(&dsq->priq))) - scx_ops_error("DSQ ID 0x%016llx already had PRIQ-enqueued tasks", - dsq->id); } dsq_mod_nr(dsq, 1); p->scx.dsq = dsq; @@ -1481,15 +1504,14 @@ static void task_unlink_from_dsq(struct task_struct *p, rb_erase(&p->scx.dsq_node.priq, &dsq->priq); RB_CLEAR_NODE(&p->scx.dsq_node.priq); p->scx.dsq_flags &= ~SCX_TASK_DSQ_ON_PRIQ; - } else { - list_del_init(&p->scx.dsq_node.list); } + + list_del_init(&p->scx.dsq_node.list); } static bool task_linked_on_dsq(struct task_struct *p) { - return !list_empty(&p->scx.dsq_node.list) || - !RB_EMPTY_NODE(&p->scx.dsq_node.priq); + return !list_empty(&p->scx.dsq_node.list); } static void dispatch_dequeue(struct scx_rq *scx_rq, struct task_struct *p) @@ -2035,11 +2057,10 @@ static bool consume_dispatch_q(struct rq *rq, struct rq_flags *rf, { struct scx_rq *scx_rq = &rq->scx; struct task_struct *p; - struct rb_node *rb_node; struct rq *task_rq; bool moved = false; retry: - if (list_empty(&dsq->list) && !rb_first(&dsq->priq)) + if (list_empty(&dsq->list)) return false; raw_spin_lock(&dsq->lock); @@ -2052,16 +2073,6 @@ static bool consume_dispatch_q(struct rq *rq, struct rq_flags *rf, goto remote_rq; } - for (rb_node = rb_first(&dsq->priq); rb_node; - rb_node = rb_next(rb_node)) { - p = container_of(rb_node, struct task_struct, scx.dsq_node.priq); - task_rq = task_rq(p); - if (rq == task_rq) - goto this_rq; - if (task_can_run_on_rq(p, rq)) - goto remote_rq; - } - raw_spin_unlock(&dsq->lock); return false; @@ -2604,7 +2615,6 @@ static void put_prev_task_scx(struct rq *rq, struct task_struct *p) static struct task_struct *first_local_task(struct rq *rq) { - WARN_ON_ONCE(rb_first(&rq->scx.local_dsq.priq)); return list_first_entry_or_null(&rq->scx.local_dsq.list, struct task_struct, scx.dsq_node.list); } From 9ec87d389a7e4dcc25c935fcb685ecc504171e7c Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Tue, 23 Apr 2024 10:36:09 -1000 Subject: [PATCH 572/606] scx: Move p->scx.dsq_flags into p->scx.dsq_node and give p->scx.dsq_node's type the name of scx_dsq_node. This will be used to implement DSQ iterator. This wastes 8 bytes on 64bit. Oh well. (cherry picked from commit be87f284a20fae1adf9853af5fadc450d2683537) --- include/linux/sched/ext.h | 12 +++++++----- kernel/sched/ext.c | 10 +++++----- 2 files changed, 12 insertions(+), 10 deletions(-) diff --git a/include/linux/sched/ext.h b/include/linux/sched/ext.h index 790347a8b4a14..27252b2b3ceef 100644 --- a/include/linux/sched/ext.h +++ b/include/linux/sched/ext.h @@ -121,18 +121,20 @@ enum scx_kf_mask { __SCX_KF_TERMINAL = SCX_KF_ENQUEUE | SCX_KF_SELECT_CPU | SCX_KF_REST, }; +struct scx_dsq_node { + struct list_head list; /* dispatch order */ + struct rb_node priq; /* p->scx.dsq_vtime order */ + u32 flags; /* SCX_TASK_DSQ_* flags */ +}; + /* * The following is embedded in task_struct and contains all fields necessary * for a task to be scheduled by SCX. */ struct sched_ext_entity { struct scx_dispatch_q *dsq; - struct { - struct list_head list; /* dispatch order */ - struct rb_node priq; /* p->scx.dsq_vtime order */ - } dsq_node; + struct scx_dsq_node dsq_node; /* protected by dsq lock */ u32 flags; /* protected by rq lock */ - u32 dsq_flags; /* protected by dsq lock */ u32 weight; s32 sticky_cpu; s32 holding_cpu; diff --git a/kernel/sched/ext.c b/kernel/sched/ext.c index 30da25890e0ca..f7734e4cff667 100644 --- a/kernel/sched/ext.c +++ b/kernel/sched/ext.c @@ -1393,7 +1393,7 @@ static void dispatch_enqueue(struct scx_dispatch_q *dsq, struct task_struct *p, bool is_local = dsq->id == SCX_DSQ_LOCAL; WARN_ON_ONCE(p->scx.dsq || !list_empty(&p->scx.dsq_node.list)); - WARN_ON_ONCE((p->scx.dsq_flags & SCX_TASK_DSQ_ON_PRIQ) || + WARN_ON_ONCE((p->scx.dsq_node.flags & SCX_TASK_DSQ_ON_PRIQ) || !RB_EMPTY_NODE(&p->scx.dsq_node.priq)); if (!is_local) { @@ -1433,7 +1433,7 @@ static void dispatch_enqueue(struct scx_dispatch_q *dsq, struct task_struct *p, scx_ops_error("DSQ ID 0x%016llx already had FIFO-enqueued tasks", dsq->id); - p->scx.dsq_flags |= SCX_TASK_DSQ_ON_PRIQ; + p->scx.dsq_node.flags |= SCX_TASK_DSQ_ON_PRIQ; rb_add(&p->scx.dsq_node.priq, &dsq->priq, scx_dsq_priq_less); /* @@ -1500,10 +1500,10 @@ static void dispatch_enqueue(struct scx_dispatch_q *dsq, struct task_struct *p, static void task_unlink_from_dsq(struct task_struct *p, struct scx_dispatch_q *dsq) { - if (p->scx.dsq_flags & SCX_TASK_DSQ_ON_PRIQ) { + if (p->scx.dsq_node.flags & SCX_TASK_DSQ_ON_PRIQ) { rb_erase(&p->scx.dsq_node.priq, &dsq->priq); RB_CLEAR_NODE(&p->scx.dsq_node.priq); - p->scx.dsq_flags &= ~SCX_TASK_DSQ_ON_PRIQ; + p->scx.dsq_node.flags &= ~SCX_TASK_DSQ_ON_PRIQ; } list_del_init(&p->scx.dsq_node.list); @@ -4344,7 +4344,7 @@ static void scx_dump_task(struct seq_buf *s, struct task_struct *p, char marker, seq_buf_printf(s, " scx_state/flags=%u/0x%x dsq_flags=0x%x ops_state/qseq=%lu/%lu\n", scx_get_task_state(p), p->scx.flags & ~SCX_TASK_STATE_MASK, - p->scx.dsq_flags, + p->scx.dsq_node.flags, ops_state & SCX_OPSS_STATE_MASK, ops_state >> SCX_OPSS_QSEQ_SHIFT); seq_buf_printf(s, " sticky/holding_cpu=%d/%d dsq_id=%s\n", From 59ef2092a9aec287bc3547b7bde88085147bcb1d Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Tue, 23 Apr 2024 10:36:09 -1000 Subject: [PATCH 573/606] scx: Introduce nldsq_next_task() and nldsq_for_each_task() Implement dsq->list iteration helper nldsq_next_task() and nldsq_for_each_task(). As the name suggests, these are to be used only with non-local DSQs. Currently, these don't have any functional differences to raw list iteration. Future BPF DSQ iteration addition will modify the iteration behavior. (cherry picked from commit d1e6915686bc2c76f124d9711346f63b2e8e3fc8) --- kernel/sched/ext.c | 42 ++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 40 insertions(+), 2 deletions(-) diff --git a/kernel/sched/ext.c b/kernel/sched/ext.c index f7734e4cff667..a2d7167857c87 100644 --- a/kernel/sched/ext.c +++ b/kernel/sched/ext.c @@ -1092,6 +1092,44 @@ static __always_inline bool scx_kf_allowed_on_arg_tasks(u32 mask, return true; } +/** + * nldsq_next_task - Iterate to the next task in a non-local DSQ + * @dsq: user dsq being interated + * @cur: current position, %NULL to start iteration + * @rev: walk backwards + * + * Returns %NULL when iteration is finished. + */ +static struct task_struct *nldsq_next_task(struct scx_dispatch_q *dsq, + struct task_struct *cur, bool rev) +{ + struct list_head *list_node; + struct scx_dsq_node *dsq_node; + + lockdep_assert_held(&dsq->lock); + + if (cur) + list_node = &cur->scx.dsq_node.list; + else + list_node = &dsq->list; + + if (rev) + list_node = list_node->prev; + else + list_node = list_node->next; + + if (list_node == &dsq->list) + return NULL; + + dsq_node = container_of(list_node, struct scx_dsq_node, list); + + return container_of(dsq_node, struct task_struct, scx.dsq_node); +} + +#define nldsq_for_each_task(p, dsq) \ + for ((p) = nldsq_next_task((dsq), NULL, false); (p); \ + (p) = nldsq_next_task((dsq), (p), false)) + struct scx_task_iter { struct sched_ext_entity cursor; struct task_struct *locked; @@ -1429,7 +1467,7 @@ static void dispatch_enqueue(struct scx_dispatch_q *dsq, struct task_struct *p, * tested easily when adding the first task. */ if (unlikely(RB_EMPTY_ROOT(&dsq->priq) && - !list_empty(&dsq->list))) + nldsq_next_task(dsq, NULL, false))) scx_ops_error("DSQ ID 0x%016llx already had FIFO-enqueued tasks", dsq->id); @@ -2065,7 +2103,7 @@ static bool consume_dispatch_q(struct rq *rq, struct rq_flags *rf, raw_spin_lock(&dsq->lock); - list_for_each_entry(p, &dsq->list, scx.dsq_node.list) { + nldsq_for_each_task(p, dsq) { task_rq = task_rq(p); if (rq == task_rq) goto this_rq; From 2dd84c0bdaca539df78551306377c45020ace006 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Tue, 23 Apr 2024 10:36:09 -1000 Subject: [PATCH 574/606] scx: Implement BPF DSQ iterator - Can iterate non-local DSQs. - Can be used from any context. - Only tasks which are already queued at the time of iterator init are visited. - Can iterate in either dispatch or reverse dispatch order. For PRIQs, vtime order is guaranteed to monotonically increase or decrease depending on the walk direction. - scx_qmap updated to demonstrate the usage. (cherry picked from commit 3c1c97b0fc1b835c9c80966063cedb2ea27db0e5) --- include/linux/sched/ext.h | 4 + kernel/sched/ext.c | 159 ++++++++++++++++++++++- tools/sched_ext/include/scx/common.bpf.h | 3 + tools/sched_ext/scx_qmap.bpf.c | 71 +++++++--- tools/sched_ext/scx_qmap.c | 10 +- 5 files changed, 222 insertions(+), 25 deletions(-) diff --git a/include/linux/sched/ext.h b/include/linux/sched/ext.h index 27252b2b3ceef..364ca827a45e7 100644 --- a/include/linux/sched/ext.h +++ b/include/linux/sched/ext.h @@ -61,6 +61,7 @@ struct scx_dispatch_q { struct list_head list; /* tasks in dispatch order */ struct rb_root priq; /* used to order by p->scx.dsq_vtime */ u32 nr; + u64 seq; /* used by BPF iter */ u64 id; struct rhash_head hash_node; struct llist_node free_node; @@ -94,6 +95,8 @@ enum scx_task_state { /* scx_entity.dsq_flags */ enum scx_ent_dsq_flags { SCX_TASK_DSQ_ON_PRIQ = 1 << 0, /* task is queued on the priority queue of a dsq */ + + SCX_TASK_DSQ_CURSOR = 1 << 31, /* iteration cursor, not a task */ }; /* @@ -134,6 +137,7 @@ struct scx_dsq_node { struct sched_ext_entity { struct scx_dispatch_q *dsq; struct scx_dsq_node dsq_node; /* protected by dsq lock */ + u64 dsq_seq; u32 flags; /* protected by rq lock */ u32 weight; s32 sticky_cpu; diff --git a/kernel/sched/ext.c b/kernel/sched/ext.c index a2d7167857c87..3c43bd9392d80 100644 --- a/kernel/sched/ext.c +++ b/kernel/sched/ext.c @@ -1113,15 +1113,18 @@ static struct task_struct *nldsq_next_task(struct scx_dispatch_q *dsq, else list_node = &dsq->list; - if (rev) - list_node = list_node->prev; - else - list_node = list_node->next; + /* find the next task, need to skip BPF iteration cursors */ + do { + if (rev) + list_node = list_node->prev; + else + list_node = list_node->next; - if (list_node == &dsq->list) - return NULL; + if (list_node == &dsq->list) + return NULL; - dsq_node = container_of(list_node, struct scx_dsq_node, list); + dsq_node = container_of(list_node, struct scx_dsq_node, list); + } while (dsq_node->flags & SCX_TASK_DSQ_CURSOR); return container_of(dsq_node, struct task_struct, scx.dsq_node); } @@ -1130,6 +1133,35 @@ static struct task_struct *nldsq_next_task(struct scx_dispatch_q *dsq, for ((p) = nldsq_next_task((dsq), NULL, false); (p); \ (p) = nldsq_next_task((dsq), (p), false)) + +/* + * BPF DSQ iterator. Tasks in a non-local DSQ can be iterated in [reverse] + * dispatch order. BPF-visible iterator is opaque and larger to allow future + * changes without breaking backward compatibility. Can be used with + * bpf_for_each(). See bpf_iter_scx_dsq_*(). + */ +enum scx_dsq_iter_flags { + /* iterate in the reverse dispatch order */ + SCX_DSQ_ITER_REV = 1LLU << 0, + + __SCX_DSQ_ITER_ALL_FLAGS = SCX_DSQ_ITER_REV, +}; + +struct bpf_iter_scx_dsq_kern { + struct scx_dsq_node cursor; + struct scx_dispatch_q *dsq; + u64 dsq_seq; + u64 flags; +} __attribute__((aligned(8))); + +struct bpf_iter_scx_dsq { + u64 __opaque[12]; +} __attribute__((aligned(8))); + + +/* + * SCX task iterator. + */ struct scx_task_iter { struct sched_ext_entity cursor; struct task_struct *locked; @@ -1498,6 +1530,11 @@ static void dispatch_enqueue(struct scx_dispatch_q *dsq, struct task_struct *p, else list_add_tail(&p->scx.dsq_node.list, &dsq->list); } + + /* seq records the order tasks are queued, used by BPF iterations */ + dsq->seq++; + p->scx.dsq_seq = dsq->seq; + dsq_mod_nr(dsq, 1); p->scx.dsq = dsq; @@ -5875,6 +5912,111 @@ __bpf_kfunc void scx_bpf_destroy_dsq(u64 dsq_id) destroy_dsq(dsq_id); } +/** + * bpf_iter_scx_dsq_new - Create a DSQ iterator + * @it: iterator to initialize + * @dsq_id: DSQ to iterate + * @flags: %SCX_DSQ_ITER_* + * + * Initialize BPF iterator @it which can be used with bpf_for_each() to walk + * tasks in the DSQ specified by @dsq_id. Iteration using @it only includes + * tasks which are already queued when this function is invoked. + */ +__bpf_kfunc int bpf_iter_scx_dsq_new(struct bpf_iter_scx_dsq *it, u64 dsq_id, + u64 flags) +{ + struct bpf_iter_scx_dsq_kern *kit = (void *)it; + + BUILD_BUG_ON(sizeof(struct bpf_iter_scx_dsq_kern) > + sizeof(struct bpf_iter_scx_dsq)); + BUILD_BUG_ON(__alignof__(struct bpf_iter_scx_dsq_kern) != + __alignof__(struct bpf_iter_scx_dsq)); + + if (flags & ~__SCX_DSQ_ITER_ALL_FLAGS) + return -EINVAL; + + kit->dsq = find_non_local_dsq(dsq_id); + if (!kit->dsq) + return -ENOENT; + + INIT_LIST_HEAD(&kit->cursor.list); + RB_CLEAR_NODE(&kit->cursor.priq); + kit->cursor.flags = SCX_TASK_DSQ_CURSOR; + kit->dsq_seq = READ_ONCE(kit->dsq->seq); + kit->flags = flags; + + return 0; +} + +/** + * bpf_iter_scx_dsq_next - Progress a DSQ iterator + * @it: iterator to progress + * + * Return the next task. See bpf_iter_scx_dsq_new(). + */ +__bpf_kfunc struct task_struct *bpf_iter_scx_dsq_next(struct bpf_iter_scx_dsq *it) +{ + struct bpf_iter_scx_dsq_kern *kit = (void *)it; + bool rev = kit->flags & SCX_DSQ_ITER_REV; + struct task_struct *p; + unsigned long flags; + + if (!kit->dsq) + return NULL; + + raw_spin_lock_irqsave(&kit->dsq->lock, flags); + + if (list_empty(&kit->cursor.list)) + p = NULL; + else + p = container_of(&kit->cursor, struct task_struct, scx.dsq_node); + + /* + * Only tasks which were queued before the iteration started are + * visible. This bounds BPF iterations and guarantees that vtime never + * jumps in the other direction while iterating. + */ + do { + p = nldsq_next_task(kit->dsq, p, rev); + } while (p && unlikely(time_after64(p->scx.dsq_seq, kit->dsq_seq))); + + if (p) { + if (rev) + list_move_tail(&kit->cursor.list, &p->scx.dsq_node.list); + else + list_move(&kit->cursor.list, &p->scx.dsq_node.list); + } else { + list_del_init(&kit->cursor.list); + } + + raw_spin_unlock_irqrestore(&kit->dsq->lock, flags); + + return p; +} + +/** + * bpf_iter_scx_dsq_destroy - Destroy a DSQ iterator + * @it: iterator to destroy + * + * Undo scx_iter_scx_dsq_new(). + */ +__bpf_kfunc void bpf_iter_scx_dsq_destroy(struct bpf_iter_scx_dsq *it) +{ + struct bpf_iter_scx_dsq_kern *kit = (void *)it; + + if (!kit->dsq) + return; + + if (!list_empty(&kit->cursor.list)) { + unsigned long flags; + + raw_spin_lock_irqsave(&kit->dsq->lock, flags); + list_del_init(&kit->cursor.list); + raw_spin_unlock_irqrestore(&kit->dsq->lock, flags); + } + kit->dsq = NULL; +} + __bpf_kfunc_end_defs(); struct scx_bpf_error_bstr_bufs { @@ -6282,6 +6424,9 @@ BTF_KFUNCS_START(scx_kfunc_ids_any) BTF_ID_FLAGS(func, scx_bpf_kick_cpu) BTF_ID_FLAGS(func, scx_bpf_dsq_nr_queued) BTF_ID_FLAGS(func, scx_bpf_destroy_dsq) +BTF_ID_FLAGS(func, bpf_iter_scx_dsq_new, KF_ITER_NEW | KF_RCU_PROTECTED) +BTF_ID_FLAGS(func, bpf_iter_scx_dsq_next, KF_ITER_NEXT | KF_RET_NULL) +BTF_ID_FLAGS(func, bpf_iter_scx_dsq_destroy, KF_ITER_DESTROY) BTF_ID_FLAGS(func, scx_bpf_exit_bstr, KF_TRUSTED_ARGS) BTF_ID_FLAGS(func, scx_bpf_error_bstr, KF_TRUSTED_ARGS) BTF_ID_FLAGS(func, scx_bpf_cpuperf_cap) diff --git a/tools/sched_ext/include/scx/common.bpf.h b/tools/sched_ext/include/scx/common.bpf.h index a309be2c910b9..567d8761a7ca6 100644 --- a/tools/sched_ext/include/scx/common.bpf.h +++ b/tools/sched_ext/include/scx/common.bpf.h @@ -39,6 +39,9 @@ u32 scx_bpf_reenqueue_local(void) __ksym; void scx_bpf_kick_cpu(s32 cpu, u64 flags) __ksym; s32 scx_bpf_dsq_nr_queued(u64 dsq_id) __ksym; void scx_bpf_destroy_dsq(u64 dsq_id) __ksym; +int bpf_iter_scx_dsq_new(struct bpf_iter_scx_dsq *it, u64 dsq_id, bool rev) __ksym; +struct task_struct *bpf_iter_scx_dsq_next(struct bpf_iter_scx_dsq *it) __ksym; +void bpf_iter_scx_dsq_destroy(struct bpf_iter_scx_dsq *it) __ksym; void scx_bpf_exit_bstr(s64 exit_code, char *fmt, unsigned long long *data, u32 data__sz) __ksym; void scx_bpf_error_bstr(char *fmt, unsigned long long *data, u32 data_len) __ksym; u32 scx_bpf_cpuperf_cap(s32 cpu) __ksym; diff --git a/tools/sched_ext/scx_qmap.bpf.c b/tools/sched_ext/scx_qmap.bpf.c index dbca79a4d4237..a97b20025bc34 100644 --- a/tools/sched_ext/scx_qmap.bpf.c +++ b/tools/sched_ext/scx_qmap.bpf.c @@ -32,6 +32,8 @@ const volatile u64 slice_ns = SCX_SLICE_DFL; const volatile u32 stall_user_nth; const volatile u32 stall_kernel_nth; const volatile u32 dsp_inf_loop_after; +const volatile u32 dsp_batch; +const volatile bool print_shared_dsq; const volatile s32 disallow_tgid; const volatile bool switch_partial; @@ -258,7 +260,7 @@ static void update_core_sched_head_seq(struct task_struct *p) void BPF_STRUCT_OPS(qmap_dispatch, s32 cpu, struct task_struct *prev) { struct cpu_ctx *cpuc; - u32 zero = 0; + u32 zero = 0, batch = dsp_batch ?: 1; void *fifo; s32 i, pid; @@ -289,7 +291,6 @@ void BPF_STRUCT_OPS(qmap_dispatch, s32 cpu, struct task_struct *prev) cpuc->dsp_idx = (cpuc->dsp_idx + 1) % 5; cpuc->dsp_cnt = 1 << cpuc->dsp_idx; } - cpuc->dsp_cnt--; fifo = bpf_map_lookup_elem(&queue_arr, &cpuc->dsp_idx); if (!fifo) { @@ -298,17 +299,26 @@ void BPF_STRUCT_OPS(qmap_dispatch, s32 cpu, struct task_struct *prev) } /* Dispatch or advance. */ - if (!bpf_map_pop_elem(fifo, &pid)) { + bpf_repeat(BPF_MAX_LOOPS) { struct task_struct *p; + if (bpf_map_pop_elem(fifo, &pid)) + break; + p = bpf_task_from_pid(pid); - if (p) { - update_core_sched_head_seq(p); - __sync_fetch_and_add(&nr_dispatched, 1); - scx_bpf_dispatch(p, SCX_DSQ_GLOBAL, slice_ns, 0); - bpf_task_release(p); + if (!p) + continue; + + update_core_sched_head_seq(p); + __sync_fetch_and_add(&nr_dispatched, 1); + scx_bpf_dispatch(p, SCX_DSQ_GLOBAL, slice_ns, 0); + bpf_task_release(p); + batch--; + cpuc->dsp_cnt--; + if (!batch || !scx_bpf_dispatch_nr_slots()) return; - } + if (!cpuc->dsp_cnt) + break; } cpuc->dsp_cnt = 0; @@ -467,7 +477,7 @@ void BPF_STRUCT_OPS(qmap_cpu_offline, s32 cpu) print_cpus(); } -struct cpu_mon_timer { +struct monitor_timer { struct bpf_timer timer; }; @@ -475,14 +485,14 @@ struct { __uint(type, BPF_MAP_TYPE_ARRAY); __uint(max_entries, 1); __type(key, u32); - __type(value, struct cpu_mon_timer); + __type(value, struct monitor_timer); } central_timer SEC(".maps"); /* * Print out the min, avg and max performance levels of CPUs every second to * demonstrate the cpuperf interface. */ -static int cpu_mon_timerfn(void *map, int *key, struct bpf_timer *timer) +static void monitor_cpuperf(void) { u32 zero = 0; u32 nr_cpu_ids = scx_bpf_nr_cpu_ids(); @@ -492,8 +502,6 @@ static int cpu_mon_timerfn(void *map, int *key, struct bpf_timer *timer) int i, nr_online_cpus = 0; online = scx_bpf_get_online_cpumask(); - if (!online) - return -ENOMEM; bpf_for(i, 0, nr_cpu_ids) { struct cpu_ctx *cpuc; @@ -537,10 +545,39 @@ static int cpu_mon_timerfn(void *map, int *key, struct bpf_timer *timer) cpuperf_target_min = target_min; cpuperf_target_avg = target_sum / nr_online_cpus; cpuperf_target_max = target_max; - - bpf_timer_start(timer, ONE_SEC_IN_NS, 0); out: scx_bpf_put_cpumask(online); +} + +/* + * Dump the currently queued tasks in the global DSQ to demonstrate the usage of + * scx_bpf_dsq_nr_queued() and DSQ iterator. Raise the dispatch batch count to + * see meaningful dumps in the trace pipe. + */ +static void dump_dsq_global(void) +{ + struct task_struct *p; + s32 nr; + + if (!(nr = scx_bpf_dsq_nr_queued(SCX_DSQ_GLOBAL))) + return; + + bpf_printk("Dumping %d tasks in SCX_DSQ_GLOBAL in reverse order", nr); + + bpf_rcu_read_lock(); + bpf_for_each(scx_dsq, p, SCX_DSQ_GLOBAL, SCX_DSQ_ITER_REV) + bpf_printk("%s[%d]", p->comm, p->pid); + bpf_rcu_read_unlock(); +} + +static int monitor_timerfn(void *map, int *key, struct bpf_timer *timer) +{ + monitor_cpuperf(); + + if (print_shared_dsq) + dump_dsq_global(); + + bpf_timer_start(timer, ONE_SEC_IN_NS, 0); return 0; } @@ -559,7 +596,7 @@ s32 BPF_STRUCT_OPS(qmap_init) return -ESRCH; bpf_timer_init(timer, ¢ral_timer, CLOCK_MONOTONIC); - bpf_timer_set_callback(timer, cpu_mon_timerfn); + bpf_timer_set_callback(timer, monitor_timerfn); return bpf_timer_start(timer, ONE_SEC_IN_NS, 0); } diff --git a/tools/sched_ext/scx_qmap.c b/tools/sched_ext/scx_qmap.c index 5cadc2e0b32db..a2bd98b7283ac 100644 --- a/tools/sched_ext/scx_qmap.c +++ b/tools/sched_ext/scx_qmap.c @@ -27,6 +27,8 @@ const char help_fmt[] = " -t COUNT Stall every COUNT'th user thread\n" " -T COUNT Stall every COUNT'th kernel thread\n" " -l COUNT Trigger dispatch infinite looping after COUNT dispatches\n" +" -b COUNT Dispatch upto COUNT tasks together\n" +" -P Print out DSQ content to trace_pipe every second, use with -b\n" " -d PID Disallow a process from switching into SCHED_EXT (-1 for self)\n" " -D LEN Set scx_exit_info.dump buffer length\n" " -p Switch only tasks on SCHED_EXT policy intead of all\n" @@ -53,7 +55,7 @@ int main(int argc, char **argv) skel = scx_qmap__open(); SCX_BUG_ON(!skel, "Failed to open skel"); - while ((opt = getopt(argc, argv, "s:e:t:T:l:d:D:ph")) != -1) { + while ((opt = getopt(argc, argv, "s:e:t:T:l:b:Pd:D:ph")) != -1) { switch (opt) { case 's': skel->rodata->slice_ns = strtoull(optarg, NULL, 0) * 1000; @@ -70,6 +72,12 @@ int main(int argc, char **argv) case 'l': skel->rodata->dsp_inf_loop_after = strtoul(optarg, NULL, 0); break; + case 'b': + skel->rodata->dsp_batch = strtoul(optarg, NULL, 0); + break; + case 'P': + skel->rodata->print_shared_dsq = true; + break; case 'd': skel->rodata->disallow_tgid = strtol(optarg, NULL, 0); if (skel->rodata->disallow_tgid < 0) From 4b773f39ffdba9b640ab626fd820f4b70569ac40 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Tue, 23 Apr 2024 10:36:09 -1000 Subject: [PATCH 575/606] scx: Refactor consume_dispatch_q() - Factor out consume_local_task() and consume_remote_task() - s/task_can_run_on_rq/task_can_run_on_remote_rq()/ and improve UP handling. This will be used to implement targeted task consumption while iterating DSQs. (cherry picked from commit fd4df11bfaa3f46a9ee6dfd6ba56d9f0112d0dd7) --- kernel/sched/ext.c | 87 ++++++++++++++++++++++++++++------------------ 1 file changed, 54 insertions(+), 33 deletions(-) diff --git a/kernel/sched/ext.c b/kernel/sched/ext.c index 3c43bd9392d80..afa7558b5bcdc 100644 --- a/kernel/sched/ext.c +++ b/kernel/sched/ext.c @@ -2120,38 +2120,13 @@ static void dispatch_to_local_dsq_unlock(struct rq *rq, struct rq_flags *rf, } #endif /* CONFIG_SMP */ - -static bool task_can_run_on_rq(struct task_struct *p, struct rq *rq) -{ - return likely(test_rq_online(rq)) && !is_migration_disabled(p) && - cpumask_test_cpu(cpu_of(rq), p->cpus_ptr); -} - -static bool consume_dispatch_q(struct rq *rq, struct rq_flags *rf, - struct scx_dispatch_q *dsq) +static void consume_local_task(struct rq *rq, struct scx_dispatch_q *dsq, + struct task_struct *p) { struct scx_rq *scx_rq = &rq->scx; - struct task_struct *p; - struct rq *task_rq; - bool moved = false; -retry: - if (list_empty(&dsq->list)) - return false; - raw_spin_lock(&dsq->lock); - - nldsq_for_each_task(p, dsq) { - task_rq = task_rq(p); - if (rq == task_rq) - goto this_rq; - if (task_can_run_on_rq(p, rq)) - goto remote_rq; - } - - raw_spin_unlock(&dsq->lock); - return false; + lockdep_assert_held(&dsq->lock); /* released on return */ -this_rq: /* @dsq is locked and @p is on this rq */ WARN_ON_ONCE(p->scx.holding_cpu >= 0); task_unlink_from_dsq(p, dsq); @@ -2160,10 +2135,23 @@ static bool consume_dispatch_q(struct rq *rq, struct rq_flags *rf, dsq_mod_nr(&scx_rq->local_dsq, 1); p->scx.dsq = &scx_rq->local_dsq; raw_spin_unlock(&dsq->lock); - return true; +} -remote_rq: #ifdef CONFIG_SMP +static bool task_can_run_on_remote_rq(struct task_struct *p, struct rq *rq) +{ + return likely(test_rq_online(rq)) && !is_migration_disabled(p) && + cpumask_test_cpu(cpu_of(rq), p->cpus_ptr); +} + +static bool consume_remote_task(struct rq *rq, struct rq_flags *rf, + struct scx_dispatch_q *dsq, + struct task_struct *p, struct rq *task_rq) +{ + bool moved = false; + + lockdep_assert_held(&dsq->lock); /* released on return */ + /* * @dsq is locked and @p is on a remote rq. @p is currently protected by * @dsq->lock. We want to pull @p to @rq but may deadlock if we grab @@ -2184,10 +2172,43 @@ static bool consume_dispatch_q(struct rq *rq, struct rq_flags *rf, moved = move_task_to_local_dsq(rq, p, 0); double_unlock_balance(rq, task_rq); + + return moved; +} +#else /* CONFIG_SMP */ +static bool task_can_run_on_remote_rq(struct task_struct *p, struct rq *rq) { return false; } +static bool consume_remote_task(struct rq *rq, struct rq_flags *rf, + struct scx_dispatch_q *dsq, + struct task_struct *p, struct rq *task_rq) { return false; } #endif /* CONFIG_SMP */ - if (likely(moved)) - return true; - goto retry; + +static bool consume_dispatch_q(struct rq *rq, struct rq_flags *rf, + struct scx_dispatch_q *dsq) +{ + struct task_struct *p; +retry: + if (list_empty(&dsq->list)) + return false; + + raw_spin_lock(&dsq->lock); + + nldsq_for_each_task(p, dsq) { + struct rq *task_rq = task_rq(p); + + if (rq == task_rq) { + consume_local_task(rq, dsq, p); + return true; + } + + if (task_can_run_on_remote_rq(p, rq)) { + if (likely(consume_remote_task(rq, rf, dsq, p, task_rq))) + return true; + goto retry; + } + } + + raw_spin_unlock(&dsq->lock); + return false; } enum dispatch_to_local_dsq_ret { From c792a3d28c82a16ab66c827bf2ba2feccc817439 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Tue, 23 Apr 2024 10:36:09 -1000 Subject: [PATCH 576/606] scx: Better task_can_run_on_remote_rq() - Unfurl the conditions for readability. - Add missing task_cpu_possible() test. - Comments. (cherry picked from commit ee83d52ab76f67f27c445ef3dc5a4550a152860b) --- kernel/sched/ext.c | 17 +++++++++++++++-- 1 file changed, 15 insertions(+), 2 deletions(-) diff --git a/kernel/sched/ext.c b/kernel/sched/ext.c index afa7558b5bcdc..bba18afb643ac 100644 --- a/kernel/sched/ext.c +++ b/kernel/sched/ext.c @@ -2138,10 +2138,23 @@ static void consume_local_task(struct rq *rq, struct scx_dispatch_q *dsq, } #ifdef CONFIG_SMP +/* + * Similar to kernel/sched/core.c::is_cpu_allowed() but we're testing whether @p + * can be pulled to @rq. + */ static bool task_can_run_on_remote_rq(struct task_struct *p, struct rq *rq) { - return likely(test_rq_online(rq)) && !is_migration_disabled(p) && - cpumask_test_cpu(cpu_of(rq), p->cpus_ptr); + int cpu = cpu_of(rq); + + if (!cpumask_test_cpu(cpu, p->cpus_ptr)) + return false; + if (unlikely(is_migration_disabled(p))) + return false; + if (!(p->flags & PF_KTHREAD) && unlikely(!task_cpu_possible(cpu, p))) + return false; + if (unlikely(!test_rq_online(rq))) + return false; + return true; } static bool consume_remote_task(struct rq *rq, struct rq_flags *rf, From e9f4cbc21ca1fb6b88f6ea4275ba4cd55fbbc202 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Tue, 23 Apr 2024 10:36:10 -1000 Subject: [PATCH 577/606] scx_qmap: Use a user DSQ instead of DSQ_GLOBAL This should behave the same. This is to prepare for the next patch which will add a usage example of targeted consumption. (cherry picked from commit 221f6dcc34933e688616013c6d06728e2d87bdf8) --- tools/sched_ext/scx_qmap.bpf.c | 44 +++++++++++++++++++++------------- 1 file changed, 27 insertions(+), 17 deletions(-) diff --git a/tools/sched_ext/scx_qmap.bpf.c b/tools/sched_ext/scx_qmap.bpf.c index a97b20025bc34..1a85991fe13f8 100644 --- a/tools/sched_ext/scx_qmap.bpf.c +++ b/tools/sched_ext/scx_qmap.bpf.c @@ -24,7 +24,10 @@ */ #include -#define ONE_SEC_IN_NS 1000000000 +enum consts { + ONE_SEC_IN_NS = 1000000000, + SHARED_DSQ = 0, +}; char _license[] SEC("license") = "GPL"; @@ -213,7 +216,7 @@ void BPF_STRUCT_OPS(qmap_enqueue, struct task_struct *p, u64 enq_flags) if (enq_flags & SCX_ENQ_REENQ) { s32 cpu; - scx_bpf_dispatch(p, SCX_DSQ_GLOBAL, 0, enq_flags); + scx_bpf_dispatch(p, SHARED_DSQ, 0, enq_flags); cpu = scx_bpf_pick_idle_cpu(p->cpus_ptr, 0); if (cpu >= 0) scx_bpf_kick_cpu(cpu, __COMPAT_SCX_KICK_IDLE); @@ -228,7 +231,7 @@ void BPF_STRUCT_OPS(qmap_enqueue, struct task_struct *p, u64 enq_flags) /* Queue on the selected FIFO. If the FIFO overflows, punt to global. */ if (bpf_map_push_elem(ring, &pid, 0)) { - scx_bpf_dispatch(p, SCX_DSQ_GLOBAL, slice_ns, enq_flags); + scx_bpf_dispatch(p, SHARED_DSQ, slice_ns, enq_flags); return; } @@ -259,14 +262,16 @@ static void update_core_sched_head_seq(struct task_struct *p) void BPF_STRUCT_OPS(qmap_dispatch, s32 cpu, struct task_struct *prev) { + struct task_struct *p; struct cpu_ctx *cpuc; u32 zero = 0, batch = dsp_batch ?: 1; void *fifo; s32 i, pid; - if (dsp_inf_loop_after && nr_dispatched > dsp_inf_loop_after) { - struct task_struct *p; + if (scx_bpf_consume(SHARED_DSQ)) + return; + if (dsp_inf_loop_after && nr_dispatched > dsp_inf_loop_after) { /* * PID 2 should be kthreadd which should mostly be idle and off * the scheduler. Let's keep dispatching it to force the kernel @@ -274,7 +279,7 @@ void BPF_STRUCT_OPS(qmap_dispatch, s32 cpu, struct task_struct *prev) */ p = bpf_task_from_pid(2); if (p) { - scx_bpf_dispatch(p, SCX_DSQ_GLOBAL, slice_ns, 0); + scx_bpf_dispatch(p, SCX_DSQ_LOCAL, slice_ns, 0); bpf_task_release(p); return; } @@ -300,8 +305,6 @@ void BPF_STRUCT_OPS(qmap_dispatch, s32 cpu, struct task_struct *prev) /* Dispatch or advance. */ bpf_repeat(BPF_MAX_LOOPS) { - struct task_struct *p; - if (bpf_map_pop_elem(fifo, &pid)) break; @@ -311,12 +314,14 @@ void BPF_STRUCT_OPS(qmap_dispatch, s32 cpu, struct task_struct *prev) update_core_sched_head_seq(p); __sync_fetch_and_add(&nr_dispatched, 1); - scx_bpf_dispatch(p, SCX_DSQ_GLOBAL, slice_ns, 0); + scx_bpf_dispatch(p, SHARED_DSQ, slice_ns, 0); bpf_task_release(p); batch--; cpuc->dsp_cnt--; - if (!batch || !scx_bpf_dispatch_nr_slots()) + if (!batch || !scx_bpf_dispatch_nr_slots()) { + scx_bpf_consume(SHARED_DSQ); return; + } if (!cpuc->dsp_cnt) break; } @@ -550,22 +555,22 @@ static void monitor_cpuperf(void) } /* - * Dump the currently queued tasks in the global DSQ to demonstrate the usage of + * Dump the currently queued tasks in the shared DSQ to demonstrate the usage of * scx_bpf_dsq_nr_queued() and DSQ iterator. Raise the dispatch batch count to * see meaningful dumps in the trace pipe. */ -static void dump_dsq_global(void) +static void dump_shared_dsq(void) { struct task_struct *p; s32 nr; - if (!(nr = scx_bpf_dsq_nr_queued(SCX_DSQ_GLOBAL))) + if (!(nr = scx_bpf_dsq_nr_queued(SHARED_DSQ))) return; - bpf_printk("Dumping %d tasks in SCX_DSQ_GLOBAL in reverse order", nr); + bpf_printk("Dumping %d tasks in SHARED_DSQ in reverse order", nr); bpf_rcu_read_lock(); - bpf_for_each(scx_dsq, p, SCX_DSQ_GLOBAL, SCX_DSQ_ITER_REV) + bpf_for_each(scx_dsq, p, SHARED_DSQ, SCX_DSQ_ITER_REV) bpf_printk("%s[%d]", p->comm, p->pid); bpf_rcu_read_unlock(); } @@ -575,22 +580,27 @@ static int monitor_timerfn(void *map, int *key, struct bpf_timer *timer) monitor_cpuperf(); if (print_shared_dsq) - dump_dsq_global(); + dump_shared_dsq(); bpf_timer_start(timer, ONE_SEC_IN_NS, 0); return 0; } -s32 BPF_STRUCT_OPS(qmap_init) +s32 BPF_STRUCT_OPS_SLEEPABLE(qmap_init) { u32 key = 0; struct bpf_timer *timer; + s32 ret; if (!switch_partial) __COMPAT_scx_bpf_switch_all(); print_cpus(); + ret = scx_bpf_create_dsq(SHARED_DSQ, -1); + if (ret) + return ret; + timer = bpf_map_lookup_elem(¢ral_timer, &key); if (!timer) return -ESRCH; From 53ba8fd0d04afb10dfe2a62cc2f9ef5d315c24ae Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Tue, 23 Apr 2024 10:36:10 -1000 Subject: [PATCH 578/606] scx: Implement scx_bpf_consume_task() This allows consuming a specific task while iterating a DSQ using the BPF iterator significantly increasing DSQ's flexibility. It has to jump through some hoops to work around BPF restrictions which hopefully can be removed in the future. scx_qmap is updated with a silly priority boost mechanism to demonstrate the usage. (cherry picked from commit f7be5ef6a606e2161a9727c0c549bb1f6b61a758) --- kernel/sched/ext.c | 89 +++++++++++++++++++++++- tools/sched_ext/include/scx/common.bpf.h | 16 +++++ tools/sched_ext/scx_qmap.bpf.c | 39 ++++++++++- tools/sched_ext/scx_qmap.c | 12 +++- 4 files changed, 147 insertions(+), 9 deletions(-) diff --git a/kernel/sched/ext.c b/kernel/sched/ext.c index bba18afb643ac..7ea6be379812b 100644 --- a/kernel/sched/ext.c +++ b/kernel/sched/ext.c @@ -1148,6 +1148,12 @@ enum scx_dsq_iter_flags { }; struct bpf_iter_scx_dsq_kern { + /* + * Must be the first field. Used to work around BPF restriction and pass + * in the iterator pointer to scx_bpf_consume_task(). + */ + struct bpf_iter_scx_dsq_kern *self; + struct scx_dsq_node cursor; struct scx_dispatch_q *dsq; u64 dsq_seq; @@ -1536,7 +1542,7 @@ static void dispatch_enqueue(struct scx_dispatch_q *dsq, struct task_struct *p, p->scx.dsq_seq = dsq->seq; dsq_mod_nr(dsq, 1); - p->scx.dsq = dsq; + WRITE_ONCE(p->scx.dsq, dsq); /* * scx.ddsp_dsq_id and scx.ddsp_enq_flags are only relevant on the @@ -1629,7 +1635,7 @@ static void dispatch_dequeue(struct scx_rq *scx_rq, struct task_struct *p) WARN_ON_ONCE(task_linked_on_dsq(p)); p->scx.holding_cpu = -1; } - p->scx.dsq = NULL; + WRITE_ONCE(p->scx.dsq, NULL); if (!is_local) raw_spin_unlock(&dsq->lock); @@ -2133,7 +2139,7 @@ static void consume_local_task(struct rq *rq, struct scx_dispatch_q *dsq, list_add_tail(&p->scx.dsq_node.list, &scx_rq->local_dsq.list); dsq_mod_nr(dsq, -1); dsq_mod_nr(&scx_rq->local_dsq, 1); - p->scx.dsq = &scx_rq->local_dsq; + WRITE_ONCE(p->scx.dsq, &scx_rq->local_dsq); raw_spin_unlock(&dsq->lock); } @@ -5762,12 +5768,88 @@ __bpf_kfunc bool scx_bpf_consume(u64 dsq_id) } } +/** + * __scx_bpf_consume_task - Transfer a task from DSQ iteration to the local DSQ + * @it: DSQ iterator in progress + * @p: task to consume + * + * Transfer @p which is on the DSQ currently iterated by @it to the current + * CPU's local DSQ. For the transfer to be successful, @p must still be on the + * DSQ and have been queued before the DSQ iteration started. This function + * doesn't care whether @p was obtained from the DSQ iteration. @p just has to + * be on the DSQ and have been queued before the iteration started. + * + * Returns %true if @p has been consumed, %false if @p had already been consumed + * or dequeued. + */ +__bpf_kfunc bool __scx_bpf_consume_task(unsigned long it, struct task_struct *p) +{ + struct bpf_iter_scx_dsq_kern *kit = (void *)it; + struct scx_dispatch_q *dsq, *kit_dsq; + struct scx_dsp_ctx *dspc = this_cpu_ptr(&scx_dsp_ctx); + struct rq *task_rq; + u64 kit_dsq_seq; + + /* can't trust @kit, carefully fetch the values we need */ + if (get_kernel_nofault(kit_dsq, &kit->dsq) || + get_kernel_nofault(kit_dsq_seq, &kit->dsq_seq)) { + scx_ops_error("invalid @it 0x%lx", it); + return false; + } + + /* + * @kit can't be trusted and we can only get the DSQ from @p. As we + * don't know @p's rq is locked, use READ_ONCE() to access the field. + * Derefing is safe as DSQs are RCU protected. + */ + dsq = READ_ONCE(p->scx.dsq); + + if (unlikely(dsq->id == SCX_DSQ_LOCAL)) { + scx_ops_error("local DSQ not allowed"); + return false; + } + + if (unlikely(!dsq || dsq != kit_dsq)) + return false; + + if (!scx_kf_allowed(SCX_KF_DISPATCH)) + return false; + + flush_dispatch_buf(dspc->rq, dspc->rf); + + raw_spin_lock(&dsq->lock); + + /* + * Did someone else get to it? @p could have already left $dsq, got + * re-enqueud, or be in the process of being consumed by someone else. + */ + if (unlikely(p->scx.dsq != dsq || + time_after64(p->scx.dsq_seq, kit_dsq_seq) || + p->scx.holding_cpu >= 0)) + goto out_unlock; + + task_rq = task_rq(p); + + if (dspc->rq == task_rq) { + consume_local_task(dspc->rq, dsq, p); + return true; + } + + if (task_can_run_on_remote_rq(p, dspc->rq)) + return consume_remote_task(dspc->rq, dspc->rf, dsq, p, task_rq); + +out_unlock: + raw_spin_unlock(&dsq->lock); + return false; +} + __bpf_kfunc_end_defs(); BTF_KFUNCS_START(scx_kfunc_ids_dispatch) BTF_ID_FLAGS(func, scx_bpf_dispatch_nr_slots) BTF_ID_FLAGS(func, scx_bpf_dispatch_cancel) BTF_ID_FLAGS(func, scx_bpf_consume) +BTF_ID_FLAGS(func, __scx_bpf_consume_task) BTF_KFUNCS_END(scx_kfunc_ids_dispatch) static const struct btf_kfunc_id_set scx_kfunc_set_dispatch = { @@ -5976,6 +6058,7 @@ __bpf_kfunc int bpf_iter_scx_dsq_new(struct bpf_iter_scx_dsq *it, u64 dsq_id, INIT_LIST_HEAD(&kit->cursor.list); RB_CLEAR_NODE(&kit->cursor.priq); kit->cursor.flags = SCX_TASK_DSQ_CURSOR; + kit->self = kit; kit->dsq_seq = READ_ONCE(kit->dsq->seq); kit->flags = flags; diff --git a/tools/sched_ext/include/scx/common.bpf.h b/tools/sched_ext/include/scx/common.bpf.h index 567d8761a7ca6..e255c6725d46f 100644 --- a/tools/sched_ext/include/scx/common.bpf.h +++ b/tools/sched_ext/include/scx/common.bpf.h @@ -35,6 +35,7 @@ void scx_bpf_dispatch_vtime(struct task_struct *p, u64 dsq_id, u64 slice, u64 vt u32 scx_bpf_dispatch_nr_slots(void) __ksym; void scx_bpf_dispatch_cancel(void) __ksym; bool scx_bpf_consume(u64 dsq_id) __ksym; +bool __scx_bpf_consume_task(unsigned long it, struct task_struct *p) __ksym; u32 scx_bpf_reenqueue_local(void) __ksym; void scx_bpf_kick_cpu(s32 cpu, u64 flags) __ksym; s32 scx_bpf_dsq_nr_queued(u64 dsq_id) __ksym; @@ -64,6 +65,21 @@ struct cgroup *scx_bpf_task_cgroup(struct task_struct *p) __ksym; static inline __attribute__((format(printf, 1, 2))) void ___scx_bpf_exit_format_checker(const char *fmt, ...) {} +/* hopefully temporary wrapper to work around BPF restriction */ +static inline bool scx_bpf_consume_task(struct bpf_iter_scx_dsq *it, + struct task_struct *p) +{ + unsigned long ptr; + bpf_probe_read_kernel(&ptr, sizeof(ptr), it); + return __scx_bpf_consume_task(ptr, p); +} + +/* + * Use the following as @it when calling scx_bpf_consume_task() from whitin + * bpf_for_each() loops. + */ +#define BPF_FOR_EACH_ITER (&___it) + /* * Helper macro for initializing the fmt and variadic argument inputs to both * bstr exit kfuncs. Callers to this function should use ___fmt and ___param to diff --git a/tools/sched_ext/scx_qmap.bpf.c b/tools/sched_ext/scx_qmap.bpf.c index 1a85991fe13f8..76ff91386c99d 100644 --- a/tools/sched_ext/scx_qmap.bpf.c +++ b/tools/sched_ext/scx_qmap.bpf.c @@ -23,6 +23,7 @@ * Copyright (c) 2022 David Vernet */ #include +#include enum consts { ONE_SEC_IN_NS = 1000000000, @@ -37,6 +38,7 @@ const volatile u32 stall_kernel_nth; const volatile u32 dsp_inf_loop_after; const volatile u32 dsp_batch; const volatile bool print_shared_dsq; +const volatile char exp_prefix[17]; const volatile s32 disallow_tgid; const volatile bool switch_partial; @@ -121,7 +123,7 @@ struct { /* Statistics */ u64 nr_enqueued, nr_dispatched, nr_reenqueued, nr_dequeued; -u64 nr_core_sched_execed; +u64 nr_core_sched_execed, nr_expedited; u32 cpuperf_min, cpuperf_avg, cpuperf_max; u32 cpuperf_target_min, cpuperf_target_avg, cpuperf_target_max; @@ -260,6 +262,37 @@ static void update_core_sched_head_seq(struct task_struct *p) scx_bpf_error("task_ctx lookup failed"); } +static bool consume_shared_dsq(void) +{ + struct task_struct *p; + bool consumed; + s32 i; + + if (exp_prefix[0] == '\0') + return scx_bpf_consume(SHARED_DSQ); + + /* + * To demonstrate the use of scx_bpf_consume_task(), implement silly + * selective priority boosting mechanism by scanning SHARED_DSQ looking + * for matching comms and consume them first. This makes difference only + * when dsp_batch is larger than 1. + */ + consumed = false; + bpf_for_each(scx_dsq, p, SHARED_DSQ, 0) { + char comm[sizeof(exp_prefix)]; + + memcpy(comm, p->comm, sizeof(exp_prefix) - 1); + + if (!bpf_strncmp(comm, sizeof(exp_prefix), exp_prefix) && + scx_bpf_consume_task(BPF_FOR_EACH_ITER, p)) { + consumed = true; + __sync_fetch_and_add(&nr_expedited, 1); + } + } + + return consumed || scx_bpf_consume(SHARED_DSQ); +} + void BPF_STRUCT_OPS(qmap_dispatch, s32 cpu, struct task_struct *prev) { struct task_struct *p; @@ -268,7 +301,7 @@ void BPF_STRUCT_OPS(qmap_dispatch, s32 cpu, struct task_struct *prev) void *fifo; s32 i, pid; - if (scx_bpf_consume(SHARED_DSQ)) + if (consume_shared_dsq()) return; if (dsp_inf_loop_after && nr_dispatched > dsp_inf_loop_after) { @@ -319,7 +352,7 @@ void BPF_STRUCT_OPS(qmap_dispatch, s32 cpu, struct task_struct *prev) batch--; cpuc->dsp_cnt--; if (!batch || !scx_bpf_dispatch_nr_slots()) { - scx_bpf_consume(SHARED_DSQ); + consume_shared_dsq(); return; } if (!cpuc->dsp_cnt) diff --git a/tools/sched_ext/scx_qmap.c b/tools/sched_ext/scx_qmap.c index a2bd98b7283ac..4c5957417d102 100644 --- a/tools/sched_ext/scx_qmap.c +++ b/tools/sched_ext/scx_qmap.c @@ -29,6 +29,8 @@ const char help_fmt[] = " -l COUNT Trigger dispatch infinite looping after COUNT dispatches\n" " -b COUNT Dispatch upto COUNT tasks together\n" " -P Print out DSQ content to trace_pipe every second, use with -b\n" +" -E PREFIX Expedite consumption of threads w/ matching comm, use with -b\n" +" (e.g. match shell on a loaded system)\n" " -d PID Disallow a process from switching into SCHED_EXT (-1 for self)\n" " -D LEN Set scx_exit_info.dump buffer length\n" " -p Switch only tasks on SCHED_EXT policy intead of all\n" @@ -55,7 +57,7 @@ int main(int argc, char **argv) skel = scx_qmap__open(); SCX_BUG_ON(!skel, "Failed to open skel"); - while ((opt = getopt(argc, argv, "s:e:t:T:l:b:Pd:D:ph")) != -1) { + while ((opt = getopt(argc, argv, "s:e:t:T:l:b:PE:d:D:ph")) != -1) { switch (opt) { case 's': skel->rodata->slice_ns = strtoull(optarg, NULL, 0) * 1000; @@ -78,6 +80,10 @@ int main(int argc, char **argv) case 'P': skel->rodata->print_shared_dsq = true; break; + case 'E': + strncpy(skel->rodata->exp_prefix, optarg, + sizeof(skel->rodata->exp_prefix) - 1); + break; case 'd': skel->rodata->disallow_tgid = strtol(optarg, NULL, 0); if (skel->rodata->disallow_tgid < 0) @@ -103,10 +109,10 @@ int main(int argc, char **argv) long nr_enqueued = skel->bss->nr_enqueued; long nr_dispatched = skel->bss->nr_dispatched; - printf("stats : enq=%lu dsp=%lu delta=%ld reenq=%" PRIu64 " deq=%" PRIu64 " core=%" PRIu64 "\n", + printf("stats : enq=%lu dsp=%lu delta=%ld reenq=%"PRIu64" deq=%"PRIu64" core=%"PRIu64" exp=%"PRIu64"\n", nr_enqueued, nr_dispatched, nr_enqueued - nr_dispatched, skel->bss->nr_reenqueued, skel->bss->nr_dequeued, - skel->bss->nr_core_sched_execed); + skel->bss->nr_core_sched_execed, skel->bss->nr_expedited); printf("cpuperf: cur min/avg/max=%u/%u/%u target min/avg/max=%u/%u/%u\n", skel->bss->cpuperf_min, skel->bss->cpuperf_avg, From 516bcaabde50153733c63583dadcf8ff1bc80ab9 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Wed, 24 Apr 2024 15:27:03 -1000 Subject: [PATCH 579/606] scx: Cosmetic changes from patch split Reordering for consistency, indentation cleanup, minor arg renames, drop a BUILD_BUG_ON(), use SCX_OPS_OPEN consistently and so on. No functional changes intended. (cherry picked from commit a4bc772699752cf68334834781a1df15255661d7) --- kernel/sched/ext.c | 36 +++++++++++------------- kernel/sched/ext.h | 19 ++++++------- tools/sched_ext/include/scx/common.bpf.h | 16 +++++------ tools/sched_ext/include/scx/compat.h | 4 +-- tools/sched_ext/scx_central.c | 3 +- tools/sched_ext/scx_flatcg.c | 3 +- tools/sched_ext/scx_qmap.bpf.c | 4 +-- tools/sched_ext/scx_qmap.c | 7 ++--- tools/sched_ext/scx_simple.c | 3 +- 9 files changed, 44 insertions(+), 51 deletions(-) diff --git a/kernel/sched/ext.c b/kernel/sched/ext.c index 7ea6be379812b..ab6d0c7a8b8ea 100644 --- a/kernel/sched/ext.c +++ b/kernel/sched/ext.c @@ -121,10 +121,10 @@ enum scx_ops_flags { */ SCX_OPS_CGROUP_KNOB_WEIGHT = 1LLU << 16, /* cpu.weight */ - SCX_OPS_ALL_FLAGS = SCX_OPS_SWITCH_PARTIAL | - SCX_OPS_KEEP_BUILTIN_IDLE | + SCX_OPS_ALL_FLAGS = SCX_OPS_KEEP_BUILTIN_IDLE | SCX_OPS_ENQ_LAST | SCX_OPS_ENQ_EXITING | + SCX_OPS_SWITCH_PARTIAL | SCX_OPS_CGROUP_KNOB_WEIGHT, }; @@ -155,13 +155,13 @@ struct scx_cgroup_init_args { enum scx_cpu_preempt_reason { /* next task is being scheduled by &sched_class_rt */ - SCX_CPU_PREEMPT_RT, + SCX_CPU_PREEMPT_RT, /* next task is being scheduled by &sched_class_dl */ - SCX_CPU_PREEMPT_DL, + SCX_CPU_PREEMPT_DL, /* next task is being scheduled by &sched_class_stop */ - SCX_CPU_PREEMPT_STOP, + SCX_CPU_PREEMPT_STOP, /* unknown reason for SCX being preempted */ - SCX_CPU_PREEMPT_UNKNOWN, + SCX_CPU_PREEMPT_UNKNOWN, }; /* @@ -920,11 +920,11 @@ static __printf(3, 4) void scx_ops_exit_kind(enum scx_exit_kind kind, s64 exit_code, const char *fmt, ...); -#define scx_ops_error_kind(__err, fmt, args...) \ - scx_ops_exit_kind(__err, 0, fmt, ##args) +#define scx_ops_error_kind(err, fmt, args...) \ + scx_ops_exit_kind((err), 0, fmt, ##args) -#define scx_ops_exit(__code, fmt, args...) \ - scx_ops_exit_kind(SCX_EXIT_UNREG_KERN, __code, fmt, ##args) +#define scx_ops_exit(code, fmt, args...) \ + scx_ops_exit_kind(SCX_EXIT_UNREG_KERN, (code), fmt, ##args) #define scx_ops_error(fmt, args...) \ scx_ops_error_kind(SCX_EXIT_ERROR, fmt, ##args) @@ -1537,7 +1537,7 @@ static void dispatch_enqueue(struct scx_dispatch_q *dsq, struct task_struct *p, list_add_tail(&p->scx.dsq_node.list, &dsq->list); } - /* seq records the order tasks are queued, used by BPF iterations */ + /* seq records the order tasks are queued, used by BPF DSQ iterator */ dsq->seq++; p->scx.dsq_seq = dsq->seq; @@ -4561,6 +4561,8 @@ static __printf(3, 4) void scx_ops_exit_kind(enum scx_exit_kind kind, if (!atomic_try_cmpxchg(&scx_exit_kind, &none, kind)) return; + ei->exit_code = exit_code; + if (kind >= SCX_EXIT_ERROR) ei->bt_len = stack_trace_save(ei->bt, SCX_EXIT_BT_LEN, 1); @@ -4568,8 +4570,6 @@ static __printf(3, 4) void scx_ops_exit_kind(enum scx_exit_kind kind, vscnprintf(ei->msg, SCX_EXIT_MSG_LEN, fmt, args); va_end(args); - ei->exit_code = exit_code; - /* * Set ei->kind and ->reason for scx_dump_state(). They'll be set again * in scx_ops_disable_workfn(). @@ -6152,7 +6152,6 @@ static void bpf_exit_bstr_common(enum scx_exit_kind kind, s64 exit_code, unsigned long flags; int ret; - BUILD_BUG_ON(sizeof(enum scx_exit_code) != sizeof_field(struct scx_exit_info, exit_code)); local_irq_save(flags); bufs = this_cpu_ptr(&scx_bpf_error_bstr_bufs); @@ -6389,11 +6388,10 @@ __bpf_kfunc const struct cpumask *scx_bpf_get_idle_smtmask(void) __bpf_kfunc void scx_bpf_put_idle_cpumask(const struct cpumask *idle_mask) { /* - * Empty function body because we aren't actually acquiring or - * releasing a reference to a global idle cpumask, which is read-only - * in the caller and is never released. The acquire / release semantics - * here are just used to make the cpumask a trusted pointer in the - * caller. + * Empty function body because we aren't actually acquiring or releasing + * a reference to a global idle cpumask, which is read-only in the + * caller and is never released. The acquire / release semantics here + * are just used to make the cpumask a trusted pointer in the caller. */ } diff --git a/kernel/sched/ext.h b/kernel/sched/ext.h index 7e47723cd6704..83a48fda6317f 100644 --- a/kernel/sched/ext.h +++ b/kernel/sched/ext.h @@ -33,7 +33,9 @@ static inline bool task_on_scx(const struct task_struct *p) return scx_enabled() && p->sched_class == &ext_sched_class; } -bool task_should_scx(struct task_struct *p); +void scx_next_task_picked(struct rq *rq, struct task_struct *p, + const struct sched_class *active); +void scx_tick(struct rq *rq); void init_scx_entity(struct sched_ext_entity *scx); void scx_pre_fork(struct task_struct *p); int scx_fork(struct task_struct *p); @@ -41,9 +43,7 @@ void scx_post_fork(struct task_struct *p); void scx_cancel_fork(struct task_struct *p); int scx_check_setscheduler(struct task_struct *p, int policy); bool scx_can_stop_tick(struct rq *rq); -void scx_tick(struct rq *rq); -void scx_next_task_picked(struct rq *rq, struct task_struct *p, - const struct sched_class *active); +bool task_should_scx(struct task_struct *p); void init_sched_ext_class(void); static inline u32 scx_cpuperf_target(s32 cpu) @@ -88,18 +88,17 @@ bool scx_prio_less(const struct task_struct *a, const struct task_struct *b, #define scx_enabled() false #define scx_switched_all() false -static inline bool task_on_scx(const struct task_struct *p) { return false; } +static inline void scx_next_task_picked(struct rq *rq, struct task_struct *p, + const struct sched_class *active) {} +static inline void scx_tick(struct rq *rq) {} static inline void init_scx_entity(struct sched_ext_entity *scx) {} static inline void scx_pre_fork(struct task_struct *p) {} static inline int scx_fork(struct task_struct *p) { return 0; } static inline void scx_post_fork(struct task_struct *p) {} static inline void scx_cancel_fork(struct task_struct *p) {} -static inline int scx_check_setscheduler(struct task_struct *p, - int policy) { return 0; } +static inline int scx_check_setscheduler(struct task_struct *p, int policy) { return 0; } static inline bool scx_can_stop_tick(struct rq *rq) { return true; } -static inline void scx_tick(struct rq *rq) {} -static inline void scx_next_task_picked(struct rq *rq, struct task_struct *p, - const struct sched_class *active) {} +static inline bool task_on_scx(const struct task_struct *p) { return false; } static inline void init_sched_ext_class(void) {} static inline u32 scx_cpuperf_target(s32 cpu) { return 0; } diff --git a/tools/sched_ext/include/scx/common.bpf.h b/tools/sched_ext/include/scx/common.bpf.h index e255c6725d46f..f1a24f87ecd23 100644 --- a/tools/sched_ext/include/scx/common.bpf.h +++ b/tools/sched_ext/include/scx/common.bpf.h @@ -62,8 +62,11 @@ bool scx_bpf_task_running(const struct task_struct *p) __ksym; s32 scx_bpf_task_cpu(const struct task_struct *p) __ksym; struct cgroup *scx_bpf_task_cgroup(struct task_struct *p) __ksym; -static inline __attribute__((format(printf, 1, 2))) -void ___scx_bpf_exit_format_checker(const char *fmt, ...) {} +/* + * Use the following as @it when calling scx_bpf_consume_task() from whitin + * bpf_for_each() loops. + */ +#define BPF_FOR_EACH_ITER (&___it) /* hopefully temporary wrapper to work around BPF restriction */ static inline bool scx_bpf_consume_task(struct bpf_iter_scx_dsq *it, @@ -74,11 +77,8 @@ static inline bool scx_bpf_consume_task(struct bpf_iter_scx_dsq *it, return __scx_bpf_consume_task(ptr, p); } -/* - * Use the following as @it when calling scx_bpf_consume_task() from whitin - * bpf_for_each() loops. - */ -#define BPF_FOR_EACH_ITER (&___it) +static inline __attribute__((format(printf, 1, 2))) +void ___scx_bpf_exit_format_checker(const char *fmt, ...) {} /* * Helper macro for initializing the fmt and variadic argument inputs to both @@ -250,7 +250,7 @@ int bpf_rbtree_add_impl(struct bpf_rb_root *root, struct bpf_rb_node *node, struct bpf_rb_node *bpf_rbtree_first(struct bpf_rb_root *root) __ksym; -extern void *bpf_refcount_acquire_impl(void *kptr, void *meta) __ksym; +void *bpf_refcount_acquire_impl(void *kptr, void *meta) __ksym; #define bpf_refcount_acquire(kptr) bpf_refcount_acquire_impl(kptr, NULL) /* task */ diff --git a/tools/sched_ext/include/scx/compat.h b/tools/sched_ext/include/scx/compat.h index e2608ad6ec5cf..5b5b56391ee0c 100644 --- a/tools/sched_ext/include/scx/compat.h +++ b/tools/sched_ext/include/scx/compat.h @@ -143,7 +143,7 @@ static inline long scx_hotplug_seq(void) * o If nonzero and running on an older kernel, the value is set to zero * and a warning is emitted * - * - sched_ext_ops.hotplug_sqn + * - sched_ext_ops.hotplug_seq * o If nonzero and running on an older kernel, the scheduler will fail to * load */ @@ -163,7 +163,7 @@ static inline long scx_hotplug_seq(void) if (__COMPAT_struct_has_field("sched_ext_ops", "exit_dump_len") && \ (__skel)->struct_ops.__ops_name->exit_dump_len) { \ fprintf(stderr, "WARNING: kernel doesn't support setting exit dump len\n"); \ - (__skel)->struct_ops.__ops_name->exit_dump_len = 0; \ + (__skel)->struct_ops.__ops_name->exit_dump_len = 0; \ } \ SCX_BUG_ON(__scx_name##__load((__skel)), "Failed to load skel"); \ }) diff --git a/tools/sched_ext/scx_central.c b/tools/sched_ext/scx_central.c index bc209ad6a6235..d3bd950bb92ee 100644 --- a/tools/sched_ext/scx_central.c +++ b/tools/sched_ext/scx_central.c @@ -46,8 +46,7 @@ int main(int argc, char **argv) libbpf_set_strict_mode(LIBBPF_STRICT_ALL); - skel = scx_central__open(); - SCX_BUG_ON(!skel, "Failed to open skel"); + skel = SCX_OPS_OPEN(central_ops, scx_central); skel->rodata->central_cpu = 0; skel->rodata->nr_cpu_ids = libbpf_num_possible_cpus(); diff --git a/tools/sched_ext/scx_flatcg.c b/tools/sched_ext/scx_flatcg.c index b6f0184f106d2..604996fec37ad 100644 --- a/tools/sched_ext/scx_flatcg.c +++ b/tools/sched_ext/scx_flatcg.c @@ -125,8 +125,7 @@ int main(int argc, char **argv) libbpf_set_strict_mode(LIBBPF_STRICT_ALL); - skel = scx_flatcg__open(); - SCX_BUG_ON(!skel, "Failed to open skel"); + skel = SCX_OPS_OPEN(flatcg_ops, scx_flatcg); skel->rodata->nr_cpus = libbpf_num_possible_cpus(); diff --git a/tools/sched_ext/scx_qmap.bpf.c b/tools/sched_ext/scx_qmap.bpf.c index 76ff91386c99d..1c7fe58f94094 100644 --- a/tools/sched_ext/scx_qmap.bpf.c +++ b/tools/sched_ext/scx_qmap.bpf.c @@ -266,7 +266,6 @@ static bool consume_shared_dsq(void) { struct task_struct *p; bool consumed; - s32 i; if (exp_prefix[0] == '\0') return scx_bpf_consume(SHARED_DSQ); @@ -283,7 +282,8 @@ static bool consume_shared_dsq(void) memcpy(comm, p->comm, sizeof(exp_prefix) - 1); - if (!bpf_strncmp(comm, sizeof(exp_prefix), exp_prefix) && + if (!bpf_strncmp(comm, sizeof(exp_prefix), + (const char *)exp_prefix) && scx_bpf_consume_task(BPF_FOR_EACH_ITER, p)) { consumed = true; __sync_fetch_and_add(&nr_expedited, 1); diff --git a/tools/sched_ext/scx_qmap.c b/tools/sched_ext/scx_qmap.c index 4c5957417d102..a5bc6e05862b5 100644 --- a/tools/sched_ext/scx_qmap.c +++ b/tools/sched_ext/scx_qmap.c @@ -19,8 +19,8 @@ const char help_fmt[] = "\n" "See the top-level comment in .bpf.c for more details.\n" "\n" -"Usage: %s [-s SLICE_US] [-e COUNT] [-t COUNT] [-T COUNT] [-l COUNT] [-d PID]\n" -" [-D LEN] [-p]\n" +"Usage: %s [-s SLICE_US] [-e COUNT] [-t COUNT] [-T COUNT] [-l COUNT] [-b COUNT]\n" +" [-P] [-E PREFIX] [-d PID] [-D LEN] [-p]\n" "\n" " -s SLICE_US Override slice duration\n" " -e COUNT Trigger scx_bpf_error() after COUNT enqueues\n" @@ -54,8 +54,7 @@ int main(int argc, char **argv) libbpf_set_strict_mode(LIBBPF_STRICT_ALL); - skel = scx_qmap__open(); - SCX_BUG_ON(!skel, "Failed to open skel"); + skel = SCX_OPS_OPEN(qmap_ops, scx_qmap); while ((opt = getopt(argc, argv, "s:e:t:T:l:b:PE:d:D:ph")) != -1) { switch (opt) { diff --git a/tools/sched_ext/scx_simple.c b/tools/sched_ext/scx_simple.c index 006a2a5f35cc6..53e3534e6d69d 100644 --- a/tools/sched_ext/scx_simple.c +++ b/tools/sched_ext/scx_simple.c @@ -60,8 +60,7 @@ int main(int argc, char **argv) libbpf_set_strict_mode(LIBBPF_STRICT_ALL); - skel = scx_simple__open(); - SCX_BUG_ON(!skel, "Failed to open skel"); + skel = SCX_OPS_OPEN(simple_ops, scx_simple); while ((opt = getopt(argc, argv, "fh")) != -1) { switch (opt) { From d73ddabf007ade3ff95c374023317ee243663a34 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Fri, 26 Apr 2024 08:42:10 -1000 Subject: [PATCH 580/606] scx: Don't do debug dump on non-error exits (cherry picked from commit dc24d4e95fc06d16fb9dd5912fd5b4da087c2ef2) --- kernel/sched/ext.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/kernel/sched/ext.c b/kernel/sched/ext.c index ab6d0c7a8b8ea..5cf5e21543e20 100644 --- a/kernel/sched/ext.c +++ b/kernel/sched/ext.c @@ -4544,7 +4544,10 @@ static void scx_dump_state(struct scx_exit_info *ei, size_t dump_len) static void scx_ops_error_irq_workfn(struct irq_work *irq_work) { - scx_dump_state(scx_exit_info, scx_ops.exit_dump_len); + struct scx_exit_info *ei = scx_exit_info; + + if (ei->kind >= SCX_EXIT_ERROR) + scx_dump_state(ei, scx_ops.exit_dump_len); schedule_scx_ops_disable_work(); } From 2d8eac69bea639664a2a24d1329fe2a94be3ebad Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Fri, 26 Apr 2024 08:42:53 -1000 Subject: [PATCH 581/606] user_exit_info: Fix and clean up ECODE handling and apply to the example schedulers - ECODE_SYS_ACT_MASK and ECODE_SYS_RSN_MASK were swapped. - UEI_ECODE*() macros are not very useable in that they take skel as an argument but whether to branch for restart or not has to be decided after the skel is destroyed. - SCX_ECODE_* aren't defined for user C code. Let's fix and simplify it so that: - Fix the masks and copy SCX_ECODE_* definitions. Hopefully, we can figure out a way to do this better in the future. - UEI_REPORT() returns the ecode. - UEI_ECODE_*() macros take ecode instead of skel & uei. UEI_ECODE_RESTART() added. scx_simple, scx_flatcg and scx_central are updated to implement restart. scx_qmap is excluded as it implements CPU hotplug ops. (cherry picked from commit 36a08e0eddd54e0432d4d3d04195818a2fc22a71) --- tools/sched_ext/include/scx/user_exit_info.h | 46 ++++++++++++-------- tools/sched_ext/scx_central.c | 9 ++-- tools/sched_ext/scx_flatcg.c | 8 +++- tools/sched_ext/scx_simple.c | 8 +++- 4 files changed, 45 insertions(+), 26 deletions(-) diff --git a/tools/sched_ext/include/scx/user_exit_info.h b/tools/sched_ext/include/scx/user_exit_info.h index 2705b81f8d2ed..2d86d01a95752 100644 --- a/tools/sched_ext/include/scx/user_exit_info.h +++ b/tools/sched_ext/include/scx/user_exit_info.h @@ -64,17 +64,6 @@ struct user_exit_info { __sync_val_compare_and_swap(&(__skel)->data->__uei_name.kind, -1, -1); \ }) -#define UEI_KIND(__skel, __uei_name) ((__skel)->data->__uei_name.kind) - -#define ECODE_USER_MASK ((1LLU << 32) - 1) -#define ECODE_SYS_ACT_MASK (((1LLU << 48) - 1) ^ ECODE_USER_MASK) -#define ECODE_SYS_RSN_MASK (~0LLU ^ (ECODE_SYS_ACT_MASK | ECODE_USER_MASK)) - -#define UEI_ECODE(__skel, __uei_name) (__skel)->data->__uei_name.exit_code -#define UEI_ECODE_SYS_ACT(__skel, __uei_name) (UEI_ECODE(__skel, __uei_name) & ECODE_SYS_ACT_MASK) -#define UEI_ECODE_SYS_RSN(__skel, __uei_name) (UEI_ECODE(__skel, __uei_name) & ECODE_SYS_RSN_MASK) -#define UEI_ECODE_USER(__skel, __uei_name) (UEI_ECODE(__skel, __uei_name) & ECODE_USER_MASK) - #define UEI_REPORT(__skel, __uei_name) ({ \ struct user_exit_info *__uei = &(__skel)->data->__uei_name; \ char *__uei_dump = (__skel)->data_##__uei_name##_dump->__uei_name##_dump; \ @@ -88,16 +77,35 @@ struct user_exit_info { if (__uei->msg[0] != '\0') \ fprintf(stderr, " (%s)", __uei->msg); \ fputs("\n", stderr); \ + __uei->exit_code; \ }) -#define UEI_RESET(__skel, __uei_name) ({ \ - struct user_exit_info *__uei = &(__skel)->data->__uei_name; \ - char *__uei_dump = (__skel)->data_##__uei_name##_dump->__uei_name##_dump; \ - size_t __uei_dump_len = (__skel)->rodata->__uei_name##_dump_len; \ - \ - memset(__uei, 0, sizeof(struct user_exit_info)); \ - memset(__uei_dump, 0, __uei_dump_len); \ -}) +/* + * We can't import vmlinux.h while compiling user C code. Let's duplicate + * scx_exit_code definition. + */ +enum scx_exit_code { + /* Reasons */ + SCX_ECODE_RSN_HOTPLUG = 1LLU << 32, + + /* Actions */ + SCX_ECODE_ACT_RESTART = 1LLU << 48, +}; + +enum uei_ecode_mask { + UEI_ECODE_USER_MASK = ((1LLU << 32) - 1), + UEI_ECODE_SYS_RSN_MASK = ((1LLU << 16) - 1) << 32, + UEI_ECODE_SYS_ACT_MASK = ((1LLU << 16) - 1) << 48, +}; + +/* + * These macro interpret the ecode returned from UEI_REPORT(). + */ +#define UEI_ECODE_USER(__ecode) ((__ecode) & UEI_ECODE_USER_MASK) +#define UEI_ECODE_SYS_RSN(__ecode) ((__ecode) & UEI_ECODE_SYS_RSN_MASK) +#define UEI_ECODE_SYS_ACT(__ecode) ((__ecode) & UEI_ECODE_SYS_ACT_MASK) + +#define UEI_ECODE_RESTART(__ecode) (UEI_ECODE_SYS_ACT((__ecode)) == SCX_ECODE_ACT_RESTART) #endif /* __bpf__ */ #endif /* __USER_EXIT_INFO_H */ diff --git a/tools/sched_ext/scx_central.c b/tools/sched_ext/scx_central.c index d3bd950bb92ee..8639b56097a9d 100644 --- a/tools/sched_ext/scx_central.c +++ b/tools/sched_ext/scx_central.c @@ -37,7 +37,7 @@ int main(int argc, char **argv) { struct scx_central *skel; struct bpf_link *link; - __u64 seq = 0; + __u64 seq = 0, ecode; __s32 opt; cpu_set_t *cpuset; @@ -45,7 +45,7 @@ int main(int argc, char **argv) signal(SIGTERM, sigint_handler); libbpf_set_strict_mode(LIBBPF_STRICT_ALL); - +restart: skel = SCX_OPS_OPEN(central_ops, scx_central); skel->rodata->central_cpu = 0; @@ -115,7 +115,10 @@ int main(int argc, char **argv) } bpf_link__destroy(link); - UEI_REPORT(skel, uei); + ecode = UEI_REPORT(skel, uei); scx_central__destroy(skel); + + if (UEI_ECODE_RESTART(ecode)) + goto restart; return 0; } diff --git a/tools/sched_ext/scx_flatcg.c b/tools/sched_ext/scx_flatcg.c index 604996fec37ad..8557f429bb6a2 100644 --- a/tools/sched_ext/scx_flatcg.c +++ b/tools/sched_ext/scx_flatcg.c @@ -119,12 +119,13 @@ int main(int argc, char **argv) __u64 last_stats[FCG_NR_STATS] = {}; unsigned long seq = 0; __s32 opt; + __u64 ecode; signal(SIGINT, sigint_handler); signal(SIGTERM, sigint_handler); libbpf_set_strict_mode(LIBBPF_STRICT_ALL); - +restart: skel = SCX_OPS_OPEN(flatcg_ops, scx_flatcg); skel->rodata->nr_cpus = libbpf_num_possible_cpus(); @@ -212,7 +213,10 @@ int main(int argc, char **argv) } bpf_link__destroy(link); - UEI_REPORT(skel, uei); + ecode = UEI_REPORT(skel, uei); scx_flatcg__destroy(skel); + + if (UEI_ECODE_RESTART(ecode)) + goto restart; return 0; } diff --git a/tools/sched_ext/scx_simple.c b/tools/sched_ext/scx_simple.c index 53e3534e6d69d..1974fa57735e3 100644 --- a/tools/sched_ext/scx_simple.c +++ b/tools/sched_ext/scx_simple.c @@ -54,12 +54,13 @@ int main(int argc, char **argv) struct scx_simple *skel; struct bpf_link *link; __u32 opt; + __u64 ecode; signal(SIGINT, sigint_handler); signal(SIGTERM, sigint_handler); libbpf_set_strict_mode(LIBBPF_STRICT_ALL); - +restart: skel = SCX_OPS_OPEN(simple_ops, scx_simple); while ((opt = getopt(argc, argv, "fh")) != -1) { @@ -86,7 +87,10 @@ int main(int argc, char **argv) } bpf_link__destroy(link); - UEI_REPORT(skel, uei); + ecode = UEI_REPORT(skel, uei); scx_simple__destroy(skel); + + if (UEI_ECODE_RESTART(ecode)) + goto restart; return 0; } From 2190a2ea562e6a1fb7e8efa3dd2d8c0a905977e3 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Fri, 26 Apr 2024 10:38:32 -1000 Subject: [PATCH 582/606] scx/compat.h: Fix inversed struct_has_field test in SCX_OPS_LOAD() (cherry picked from commit ebbd3d8291d6463cf104ffacc751a19db41125a2) --- tools/sched_ext/include/scx/compat.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/sched_ext/include/scx/compat.h b/tools/sched_ext/include/scx/compat.h index 5b5b56391ee0c..d331070fc8c8c 100644 --- a/tools/sched_ext/include/scx/compat.h +++ b/tools/sched_ext/include/scx/compat.h @@ -160,7 +160,7 @@ static inline long scx_hotplug_seq(void) #define SCX_OPS_LOAD(__skel, __ops_name, __scx_name, __uei_name) ({ \ UEI_SET_SIZE(__skel, __ops_name, __uei_name); \ - if (__COMPAT_struct_has_field("sched_ext_ops", "exit_dump_len") && \ + if (!__COMPAT_struct_has_field("sched_ext_ops", "exit_dump_len") && \ (__skel)->struct_ops.__ops_name->exit_dump_len) { \ fprintf(stderr, "WARNING: kernel doesn't support setting exit dump len\n"); \ (__skel)->struct_ops.__ops_name->exit_dump_len = 0; \ From 32e4a18ef8e2ec2fae2d25a9dfdf747ab839ce96 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Fri, 26 Apr 2024 10:56:31 -1000 Subject: [PATCH 583/606] tools/sched_ext: Add compat helpers for the new features ops.tick() isn't handled yet. (cherry picked from commit d2d4e8a3c2903a38602d072c7d44ed0e9163a1ae) --- tools/sched_ext/include/scx/common.bpf.h | 22 +++++----- tools/sched_ext/include/scx/compat.bpf.h | 53 +++++++++++++++++++++++- tools/sched_ext/include/scx/compat.h | 22 +++++++++- tools/sched_ext/scx_qmap.bpf.c | 9 ++-- tools/sched_ext/scx_qmap.c | 19 +++++---- 5 files changed, 102 insertions(+), 23 deletions(-) diff --git a/tools/sched_ext/include/scx/common.bpf.h b/tools/sched_ext/include/scx/common.bpf.h index f1a24f87ecd23..fa163bd3ac03d 100644 --- a/tools/sched_ext/include/scx/common.bpf.h +++ b/tools/sched_ext/include/scx/common.bpf.h @@ -35,23 +35,23 @@ void scx_bpf_dispatch_vtime(struct task_struct *p, u64 dsq_id, u64 slice, u64 vt u32 scx_bpf_dispatch_nr_slots(void) __ksym; void scx_bpf_dispatch_cancel(void) __ksym; bool scx_bpf_consume(u64 dsq_id) __ksym; -bool __scx_bpf_consume_task(unsigned long it, struct task_struct *p) __ksym; +bool __scx_bpf_consume_task(unsigned long it, struct task_struct *p) __ksym __weak; u32 scx_bpf_reenqueue_local(void) __ksym; void scx_bpf_kick_cpu(s32 cpu, u64 flags) __ksym; s32 scx_bpf_dsq_nr_queued(u64 dsq_id) __ksym; void scx_bpf_destroy_dsq(u64 dsq_id) __ksym; -int bpf_iter_scx_dsq_new(struct bpf_iter_scx_dsq *it, u64 dsq_id, bool rev) __ksym; -struct task_struct *bpf_iter_scx_dsq_next(struct bpf_iter_scx_dsq *it) __ksym; -void bpf_iter_scx_dsq_destroy(struct bpf_iter_scx_dsq *it) __ksym; +int bpf_iter_scx_dsq_new(struct bpf_iter_scx_dsq *it, u64 dsq_id, bool rev) __ksym __weak; +struct task_struct *bpf_iter_scx_dsq_next(struct bpf_iter_scx_dsq *it) __ksym __weak; +void bpf_iter_scx_dsq_destroy(struct bpf_iter_scx_dsq *it) __ksym __weak; void scx_bpf_exit_bstr(s64 exit_code, char *fmt, unsigned long long *data, u32 data__sz) __ksym; void scx_bpf_error_bstr(char *fmt, unsigned long long *data, u32 data_len) __ksym; -u32 scx_bpf_cpuperf_cap(s32 cpu) __ksym; -u32 scx_bpf_cpuperf_cur(s32 cpu) __ksym; -void scx_bpf_cpuperf_set(s32 cpu, u32 perf) __ksym; -u32 scx_bpf_nr_cpu_ids(void) __ksym; -const struct cpumask *scx_bpf_get_possible_cpumask(void) __ksym; -const struct cpumask *scx_bpf_get_online_cpumask(void) __ksym; -void scx_bpf_put_cpumask(const struct cpumask *cpumask) __ksym; +u32 scx_bpf_cpuperf_cap(s32 cpu) __ksym __weak; +u32 scx_bpf_cpuperf_cur(s32 cpu) __ksym __weak; +void scx_bpf_cpuperf_set(s32 cpu, u32 perf) __ksym __weak; +u32 scx_bpf_nr_cpu_ids(void) __ksym __weak; +const struct cpumask *scx_bpf_get_possible_cpumask(void) __ksym __weak; +const struct cpumask *scx_bpf_get_online_cpumask(void) __ksym __weak; +void scx_bpf_put_cpumask(const struct cpumask *cpumask) __ksym __weak; const struct cpumask *scx_bpf_get_idle_cpumask(void) __ksym; const struct cpumask *scx_bpf_get_idle_smtmask(void) __ksym; void scx_bpf_put_idle_cpumask(const struct cpumask *cpumask) __ksym; diff --git a/tools/sched_ext/include/scx/compat.bpf.h b/tools/sched_ext/include/scx/compat.bpf.h index 5da12e378be34..dc5c5f46fa209 100644 --- a/tools/sched_ext/include/scx/compat.bpf.h +++ b/tools/sched_ext/include/scx/compat.bpf.h @@ -18,13 +18,15 @@ /* * %SCX_KICK_IDLE is a later addition. To support both before and after, use * %__COMPAT_SCX_KICK_IDLE which becomes 0 on kernels which don't support it. + * Users can use %SCX_KICK_IDLE directly in the future. */ #define __COMPAT_SCX_KICK_IDLE \ __COMPAT_ENUM_OR_ZERO(enum scx_kick_flags, SCX_KICK_IDLE) /* * scx_switch_all() was replaced by %SCX_OPS_SWITCH_PARTIAL. See - * %__COMPAT_SCX_OPS_SWITCH_PARTIAL in compat.h. + * %__COMPAT_SCX_OPS_SWITCH_PARTIAL in compat.h. This can be dropped in the + * future. */ void scx_bpf_switch_all(void) __ksym __weak; @@ -34,6 +36,55 @@ static inline void __COMPAT_scx_bpf_switch_all(void) scx_bpf_switch_all(); } +/* + * scx_bpf_nr_cpu_ids(), scx_bpf_get_possible/online_cpumask() are new. No good + * way to noop these kfuncs. Provide a test macro. Users will be able to assume + * existence in the future. + */ +#define __COMPAT_HAS_CPUMASKS \ + bpf_ksym_exists(scx_bpf_nr_cpu_ids) + +/* + * cpuperf is new. The followings become noop on older kernels. Callers can be + * updated to call cpuperf kfuncs directly in the future. + */ +static inline u32 __COMPAT_scx_bpf_cpuperf_cap(s32 cpu) +{ + if (bpf_ksym_exists(scx_bpf_cpuperf_cap)) + return scx_bpf_cpuperf_cap(cpu); + else + return 1024; +} + +static inline u32 __COMPAT_scx_bpf_cpuperf_cur(s32 cpu) +{ + if (bpf_ksym_exists(scx_bpf_cpuperf_cur)) + return scx_bpf_cpuperf_cur(cpu); + else + return 1024; +} + +static inline void __COMPAT_scx_bpf_cpuperf_set(s32 cpu, u32 perf) +{ + if (bpf_ksym_exists(scx_bpf_cpuperf_set)) + return scx_bpf_cpuperf_set(cpu, perf); +} + +/* + * Iteration and scx_bpf_consume_task() are new. The following become noop on + * older kernels. The users can switch to bpf_for_each(scx_dsq) and directly + * call scx_bpf_consume_task() in the future. + */ +#define __COMPAT_DSQ_FOR_EACH(p, dsq_id, flags) \ + if (bpf_ksym_exists(bpf_iter_scx_dsq_new)) \ + bpf_for_each(scx_dsq, (p), (dsq_id), (flags)) + +static inline bool __COMPAT_scx_bpf_consume_task(struct bpf_iter_scx_dsq *it, + struct task_struct *p) +{ + return false; +} + /* * Define sched_ext_ops. This may be expanded to define multiple variants for * backward compatibility. See compat.h::SCX_OPS_LOAD/ATTACH(). diff --git a/tools/sched_ext/include/scx/compat.h b/tools/sched_ext/include/scx/compat.h index d331070fc8c8c..0230ca0968676 100644 --- a/tools/sched_ext/include/scx/compat.h +++ b/tools/sched_ext/include/scx/compat.h @@ -72,6 +72,12 @@ static inline bool __COMPAT_read_enum(const char *type, const char *name, u64 *v __val; \ }) +static inline bool __COMPAT_ksym_exists(const char *ksym) +{ + __COMPAT_load_vmlinux_btf(); + return btf__find_by_name(__COMPAT_vmlinux_btf, ksym) >= 0; +} + static inline bool __COMPAT_struct_has_field(const char *type, const char *field) { const struct btf_type *t; @@ -104,11 +110,25 @@ static inline bool __COMPAT_struct_has_field(const char *type, const char *field * An ops flag, %SCX_OPS_SWITCH_PARTIAL, replaced scx_bpf_switch_all() which had * to be called from ops.init(). To support both before and after, use both * %__COMPAT_SCX_OPS_SWITCH_PARTIAL and %__COMPAT_scx_bpf_switch_all() defined - * in compat.bpf.h. + * in compat.bpf.h. Users can switch to directly using %SCX_OPS_SWITCH_PARTIAL + * in the future. */ #define __COMPAT_SCX_OPS_SWITCH_PARTIAL \ __COMPAT_ENUM_OR_ZERO("scx_ops_flags", "SCX_OPS_SWITCH_PARTIAL") +/* + * scx_bpf_nr_cpu_ids(), scx_bpf_get_possible/online_cpumask() are new. Users + * will be able to assume existence in the future. + */ +#define __COMPAT_HAS_CPUMASKS \ + __COMPAT_ksym_exists("scx_bpf_nr_cpu_ids") + +/* + * DSQ iterator is new. Users will be able to assume existence in the future. + */ +#define __COMPAT_HAS_DSQ_ITER \ + __COMPAT_ksym_exists("bpf_iter_scx_dsq_new") + static inline long scx_hotplug_seq(void) { int fd; diff --git a/tools/sched_ext/scx_qmap.bpf.c b/tools/sched_ext/scx_qmap.bpf.c index 1c7fe58f94094..832fa87da6403 100644 --- a/tools/sched_ext/scx_qmap.bpf.c +++ b/tools/sched_ext/scx_qmap.bpf.c @@ -277,14 +277,14 @@ static bool consume_shared_dsq(void) * when dsp_batch is larger than 1. */ consumed = false; - bpf_for_each(scx_dsq, p, SHARED_DSQ, 0) { + __COMPAT_DSQ_FOR_EACH(p, SHARED_DSQ, 0) { char comm[sizeof(exp_prefix)]; memcpy(comm, p->comm, sizeof(exp_prefix) - 1); if (!bpf_strncmp(comm, sizeof(exp_prefix), (const char *)exp_prefix) && - scx_bpf_consume_task(BPF_FOR_EACH_ITER, p)) { + __COMPAT_scx_bpf_consume_task(BPF_FOR_EACH_ITER, p)) { consumed = true; __sync_fetch_and_add(&nr_expedited, 1); } @@ -473,6 +473,9 @@ static void print_cpus(void) char buf[128] = "", *p; int idx; + if (!__COMPAT_HAS_CPUMASKS) + return; + possible = scx_bpf_get_possible_cpumask(); online = scx_bpf_get_online_cpumask(); @@ -603,7 +606,7 @@ static void dump_shared_dsq(void) bpf_printk("Dumping %d tasks in SHARED_DSQ in reverse order", nr); bpf_rcu_read_lock(); - bpf_for_each(scx_dsq, p, SHARED_DSQ, SCX_DSQ_ITER_REV) + __COMPAT_DSQ_FOR_EACH(p, SHARED_DSQ, SCX_DSQ_ITER_REV) bpf_printk("%s[%d]", p->comm, p->pid); bpf_rcu_read_unlock(); } diff --git a/tools/sched_ext/scx_qmap.c b/tools/sched_ext/scx_qmap.c index a5bc6e05862b5..53f5dc2e250ef 100644 --- a/tools/sched_ext/scx_qmap.c +++ b/tools/sched_ext/scx_qmap.c @@ -101,6 +101,10 @@ int main(int argc, char **argv) } } + if (!__COMPAT_HAS_DSQ_ITER && + (skel->rodata->print_shared_dsq || strlen(skel->rodata->exp_prefix))) + fprintf(stderr, "kernel doesn't support DSQ iteration\n"); + SCX_OPS_LOAD(skel, qmap_ops, scx_qmap, uei); link = SCX_OPS_ATTACH(skel, qmap_ops); @@ -112,13 +116,14 @@ int main(int argc, char **argv) nr_enqueued, nr_dispatched, nr_enqueued - nr_dispatched, skel->bss->nr_reenqueued, skel->bss->nr_dequeued, skel->bss->nr_core_sched_execed, skel->bss->nr_expedited); - printf("cpuperf: cur min/avg/max=%u/%u/%u target min/avg/max=%u/%u/%u\n", - skel->bss->cpuperf_min, - skel->bss->cpuperf_avg, - skel->bss->cpuperf_max, - skel->bss->cpuperf_target_min, - skel->bss->cpuperf_target_avg, - skel->bss->cpuperf_target_max); + if (__COMPAT_ksym_exists("scx_bpf_cpuperf_cur")) + printf("cpuperf: cur min/avg/max=%u/%u/%u target min/avg/max=%u/%u/%u\n", + skel->bss->cpuperf_min, + skel->bss->cpuperf_avg, + skel->bss->cpuperf_max, + skel->bss->cpuperf_target_min, + skel->bss->cpuperf_target_avg, + skel->bss->cpuperf_target_max); fflush(stdout); sleep(1); } From c698bc61cf262b741462f3bc01056fa39c281026 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Fri, 26 Apr 2024 11:25:38 -1000 Subject: [PATCH 584/606] scx/compat.h: Add backward compat support for missing ops.tick() Doesn't work yet. (cherry picked from commit 1d59f25acb28fabbbfdf4d25369ed77f54d627d6) --- tools/sched_ext/include/scx/compat.h | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/tools/sched_ext/include/scx/compat.h b/tools/sched_ext/include/scx/compat.h index 0230ca0968676..5649e003f741d 100644 --- a/tools/sched_ext/include/scx/compat.h +++ b/tools/sched_ext/include/scx/compat.h @@ -185,6 +185,11 @@ static inline long scx_hotplug_seq(void) fprintf(stderr, "WARNING: kernel doesn't support setting exit dump len\n"); \ (__skel)->struct_ops.__ops_name->exit_dump_len = 0; \ } \ + if (!__COMPAT_struct_has_field("sched_ext_ops", "tick") && \ + (__skel)->struct_ops.__ops_name->tick) { \ + fprintf(stderr, "WARNING: kernel doesn't support ops.tick()\n"); \ + (__skel)->struct_ops.__ops_name->tick = NULL; \ + } \ SCX_BUG_ON(__scx_name##__load((__skel)), "Failed to load skel"); \ }) From 1b85297f28e95a00bee41921f013beafdbed9724 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Fri, 26 Apr 2024 13:16:02 -1000 Subject: [PATCH 585/606] scx: Cosmetic changes (cherry picked from commit 4598fefd3812eaeeca8095f082b4273bafb0a4a0) --- kernel/sched/ext.c | 1 + tools/sched_ext/include/scx/compat.h | 18 ++++++------------ tools/sched_ext/scx_qmap.c | 6 +++++- 3 files changed, 12 insertions(+), 13 deletions(-) diff --git a/kernel/sched/ext.c b/kernel/sched/ext.c index 5cf5e21543e20..9c1e0cc14c93e 100644 --- a/kernel/sched/ext.c +++ b/kernel/sched/ext.c @@ -4548,6 +4548,7 @@ static void scx_ops_error_irq_workfn(struct irq_work *irq_work) if (ei->kind >= SCX_EXIT_ERROR) scx_dump_state(ei, scx_ops.exit_dump_len); + schedule_scx_ops_disable_work(); } diff --git a/tools/sched_ext/include/scx/compat.h b/tools/sched_ext/include/scx/compat.h index 5649e003f741d..7783c82a8a182 100644 --- a/tools/sched_ext/include/scx/compat.h +++ b/tools/sched_ext/include/scx/compat.h @@ -72,7 +72,7 @@ static inline bool __COMPAT_read_enum(const char *type, const char *name, u64 *v __val; \ }) -static inline bool __COMPAT_ksym_exists(const char *ksym) +static inline bool __COMPAT_has_ksym(const char *ksym) { __COMPAT_load_vmlinux_btf(); return btf__find_by_name(__COMPAT_vmlinux_btf, ksym) >= 0; @@ -121,13 +121,13 @@ static inline bool __COMPAT_struct_has_field(const char *type, const char *field * will be able to assume existence in the future. */ #define __COMPAT_HAS_CPUMASKS \ - __COMPAT_ksym_exists("scx_bpf_nr_cpu_ids") + __COMPAT_has_ksym("scx_bpf_nr_cpu_ids") /* * DSQ iterator is new. Users will be able to assume existence in the future. */ #define __COMPAT_HAS_DSQ_ITER \ - __COMPAT_ksym_exists("bpf_iter_scx_dsq_new") + __COMPAT_has_ksym("bpf_iter_scx_dsq_new") static inline long scx_hotplug_seq(void) { @@ -157,15 +157,9 @@ static inline long scx_hotplug_seq(void) * and attach it, backward compatibility is automatically maintained where * reasonable. * - * The following values were added in newer kernels: - * - * - sched_ext_ops.exit_dump_len - * o If nonzero and running on an older kernel, the value is set to zero - * and a warning is emitted - * - * - sched_ext_ops.hotplug_seq - * o If nonzero and running on an older kernel, the scheduler will fail to - * load + * - ops.tick(): Ignored on older kernels with a warning. + * - ops.exit_dump_len: Cleared to zero on older kernels with a warning. + * - ops.hotplug_seq: Ignored on older kernels. */ #define SCX_OPS_OPEN(__ops_name, __scx_name) ({ \ struct __scx_name *__skel; \ diff --git a/tools/sched_ext/scx_qmap.c b/tools/sched_ext/scx_qmap.c index 53f5dc2e250ef..f8a3e934e4dad 100644 --- a/tools/sched_ext/scx_qmap.c +++ b/tools/sched_ext/scx_qmap.c @@ -116,7 +116,7 @@ int main(int argc, char **argv) nr_enqueued, nr_dispatched, nr_enqueued - nr_dispatched, skel->bss->nr_reenqueued, skel->bss->nr_dequeued, skel->bss->nr_core_sched_execed, skel->bss->nr_expedited); - if (__COMPAT_ksym_exists("scx_bpf_cpuperf_cur")) + if (__COMPAT_has_ksym("scx_bpf_cpuperf_cur")) printf("cpuperf: cur min/avg/max=%u/%u/%u target min/avg/max=%u/%u/%u\n", skel->bss->cpuperf_min, skel->bss->cpuperf_avg, @@ -131,5 +131,9 @@ int main(int argc, char **argv) bpf_link__destroy(link); UEI_REPORT(skel, uei); scx_qmap__destroy(skel); + /* + * scx_qmap implements ops.cpu_on/offline() and doesn't need to restart + * on CPU hotplug events. + */ return 0; } From 60f51bfebe2b5d48fbc7fcddf5c140733ef4035c Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Fri, 26 Apr 2024 13:52:20 -1000 Subject: [PATCH 586/606] tools/sched_ext: Drop libbpf_set_strict_mode() calls Not needed anymore. (cherry picked from commit fa00fb946c615424ce8833abe23c46c2a91d1606) --- tools/sched_ext/scx_central.c | 2 -- tools/sched_ext/scx_flatcg.c | 2 -- tools/sched_ext/scx_qmap.c | 2 -- tools/sched_ext/scx_simple.c | 2 -- 4 files changed, 8 deletions(-) diff --git a/tools/sched_ext/scx_central.c b/tools/sched_ext/scx_central.c index 8639b56097a9d..d2d85deff6e72 100644 --- a/tools/sched_ext/scx_central.c +++ b/tools/sched_ext/scx_central.c @@ -43,8 +43,6 @@ int main(int argc, char **argv) signal(SIGINT, sigint_handler); signal(SIGTERM, sigint_handler); - - libbpf_set_strict_mode(LIBBPF_STRICT_ALL); restart: skel = SCX_OPS_OPEN(central_ops, scx_central); diff --git a/tools/sched_ext/scx_flatcg.c b/tools/sched_ext/scx_flatcg.c index 8557f429bb6a2..6eabf1c965b8f 100644 --- a/tools/sched_ext/scx_flatcg.c +++ b/tools/sched_ext/scx_flatcg.c @@ -123,8 +123,6 @@ int main(int argc, char **argv) signal(SIGINT, sigint_handler); signal(SIGTERM, sigint_handler); - - libbpf_set_strict_mode(LIBBPF_STRICT_ALL); restart: skel = SCX_OPS_OPEN(flatcg_ops, scx_flatcg); diff --git a/tools/sched_ext/scx_qmap.c b/tools/sched_ext/scx_qmap.c index f8a3e934e4dad..95dc46a3bf081 100644 --- a/tools/sched_ext/scx_qmap.c +++ b/tools/sched_ext/scx_qmap.c @@ -52,8 +52,6 @@ int main(int argc, char **argv) signal(SIGINT, sigint_handler); signal(SIGTERM, sigint_handler); - libbpf_set_strict_mode(LIBBPF_STRICT_ALL); - skel = SCX_OPS_OPEN(qmap_ops, scx_qmap); while ((opt = getopt(argc, argv, "s:e:t:T:l:b:PE:d:D:ph")) != -1) { diff --git a/tools/sched_ext/scx_simple.c b/tools/sched_ext/scx_simple.c index 1974fa57735e3..242db6620d7e1 100644 --- a/tools/sched_ext/scx_simple.c +++ b/tools/sched_ext/scx_simple.c @@ -58,8 +58,6 @@ int main(int argc, char **argv) signal(SIGINT, sigint_handler); signal(SIGTERM, sigint_handler); - - libbpf_set_strict_mode(LIBBPF_STRICT_ALL); restart: skel = SCX_OPS_OPEN(simple_ops, scx_simple); From 749e54741530cf3f2b9f4b195abd7caa947cb72a Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Fri, 26 Apr 2024 14:03:07 -1000 Subject: [PATCH 587/606] tools/sched_ext: Add -v option to example schedulers to aid debugging libbpf issues (cherry picked from commit ea9d91c80b92fea56a251dc69fa208f1ffe16f31) --- tools/sched_ext/scx_central.c | 15 ++++++++++++++- tools/sched_ext/scx_flatcg.c | 17 +++++++++++++++-- tools/sched_ext/scx_qmap.c | 17 +++++++++++++++-- tools/sched_ext/scx_simple.c | 17 +++++++++++++++-- 4 files changed, 59 insertions(+), 7 deletions(-) diff --git a/tools/sched_ext/scx_central.c b/tools/sched_ext/scx_central.c index d2d85deff6e72..df692dc0ccb10 100644 --- a/tools/sched_ext/scx_central.c +++ b/tools/sched_ext/scx_central.c @@ -24,10 +24,19 @@ const char help_fmt[] = "\n" " -s SLICE_US Override slice duration\n" " -c CPU Override the central CPU (default: 0)\n" +" -v Print libbpf debug messages\n" " -h Display this help and exit\n"; +static bool verbose; static volatile int exit_req; +static int libbpf_print_fn(enum libbpf_print_level level, const char *format, va_list args) +{ + if (level == LIBBPF_DEBUG && !verbose) + return 0; + return vfprintf(stderr, format, args); +} + static void sigint_handler(int dummy) { exit_req = 1; @@ -41,6 +50,7 @@ int main(int argc, char **argv) __s32 opt; cpu_set_t *cpuset; + libbpf_set_print(libbpf_print_fn); signal(SIGINT, sigint_handler); signal(SIGTERM, sigint_handler); restart: @@ -49,7 +59,7 @@ int main(int argc, char **argv) skel->rodata->central_cpu = 0; skel->rodata->nr_cpu_ids = libbpf_num_possible_cpus(); - while ((opt = getopt(argc, argv, "s:c:ph")) != -1) { + while ((opt = getopt(argc, argv, "s:c:pvh")) != -1) { switch (opt) { case 's': skel->rodata->slice_ns = strtoull(optarg, NULL, 0) * 1000; @@ -57,6 +67,9 @@ int main(int argc, char **argv) case 'c': skel->rodata->central_cpu = strtoul(optarg, NULL, 0); break; + case 'v': + verbose = true; + break; default: fprintf(stderr, help_fmt, basename(argv[0])); return opt != 'h'; diff --git a/tools/sched_ext/scx_flatcg.c b/tools/sched_ext/scx_flatcg.c index 6eabf1c965b8f..1143d5eb389af 100644 --- a/tools/sched_ext/scx_flatcg.c +++ b/tools/sched_ext/scx_flatcg.c @@ -26,15 +26,24 @@ const char help_fmt[] = "\n" "See the top-level comment in .bpf.c for more details.\n" "\n" -"Usage: %s [-s SLICE_US] [-i INTERVAL] [-f]\n" +"Usage: %s [-s SLICE_US] [-i INTERVAL] [-f] [-v]\n" "\n" " -s SLICE_US Override slice duration\n" " -i INTERVAL Report interval\n" " -f Use FIFO scheduling instead of weighted vtime scheduling\n" +" -v Print libbpf debug messages\n" " -h Display this help and exit\n"; +static bool verbose; static volatile int exit_req; +static int libbpf_print_fn(enum libbpf_print_level level, const char *format, va_list args) +{ + if (level == LIBBPF_DEBUG && !verbose) + return 0; + return vfprintf(stderr, format, args); +} + static void sigint_handler(int dummy) { exit_req = 1; @@ -121,6 +130,7 @@ int main(int argc, char **argv) __s32 opt; __u64 ecode; + libbpf_set_print(libbpf_print_fn); signal(SIGINT, sigint_handler); signal(SIGTERM, sigint_handler); restart: @@ -128,7 +138,7 @@ int main(int argc, char **argv) skel->rodata->nr_cpus = libbpf_num_possible_cpus(); - while ((opt = getopt(argc, argv, "s:i:dfph")) != -1) { + while ((opt = getopt(argc, argv, "s:i:dfvh")) != -1) { double v; switch (opt) { @@ -147,6 +157,9 @@ int main(int argc, char **argv) case 'f': skel->rodata->fifo_sched = true; break; + case 'v': + verbose = true; + break; case 'h': default: fprintf(stderr, help_fmt, basename(argv[0])); diff --git a/tools/sched_ext/scx_qmap.c b/tools/sched_ext/scx_qmap.c index 95dc46a3bf081..eb221f0a05800 100644 --- a/tools/sched_ext/scx_qmap.c +++ b/tools/sched_ext/scx_qmap.c @@ -20,7 +20,7 @@ const char help_fmt[] = "See the top-level comment in .bpf.c for more details.\n" "\n" "Usage: %s [-s SLICE_US] [-e COUNT] [-t COUNT] [-T COUNT] [-l COUNT] [-b COUNT]\n" -" [-P] [-E PREFIX] [-d PID] [-D LEN] [-p]\n" +" [-P] [-E PREFIX] [-d PID] [-D LEN] [-p] [-v]\n" "\n" " -s SLICE_US Override slice duration\n" " -e COUNT Trigger scx_bpf_error() after COUNT enqueues\n" @@ -34,10 +34,19 @@ const char help_fmt[] = " -d PID Disallow a process from switching into SCHED_EXT (-1 for self)\n" " -D LEN Set scx_exit_info.dump buffer length\n" " -p Switch only tasks on SCHED_EXT policy intead of all\n" +" -v Print libbpf debug messages\n" " -h Display this help and exit\n"; +static bool verbose; static volatile int exit_req; +static int libbpf_print_fn(enum libbpf_print_level level, const char *format, va_list args) +{ + if (level == LIBBPF_DEBUG && !verbose) + return 0; + return vfprintf(stderr, format, args); +} + static void sigint_handler(int dummy) { exit_req = 1; @@ -49,12 +58,13 @@ int main(int argc, char **argv) struct bpf_link *link; int opt; + libbpf_set_print(libbpf_print_fn); signal(SIGINT, sigint_handler); signal(SIGTERM, sigint_handler); skel = SCX_OPS_OPEN(qmap_ops, scx_qmap); - while ((opt = getopt(argc, argv, "s:e:t:T:l:b:PE:d:D:ph")) != -1) { + while ((opt = getopt(argc, argv, "s:e:t:T:l:b:PE:d:D:pvh")) != -1) { switch (opt) { case 's': skel->rodata->slice_ns = strtoull(optarg, NULL, 0) * 1000; @@ -93,6 +103,9 @@ int main(int argc, char **argv) skel->rodata->switch_partial = true; skel->struct_ops.qmap_ops->flags |= __COMPAT_SCX_OPS_SWITCH_PARTIAL; break; + case 'v': + verbose = true; + break; default: fprintf(stderr, help_fmt, basename(argv[0])); return opt != 'h'; diff --git a/tools/sched_ext/scx_simple.c b/tools/sched_ext/scx_simple.c index 242db6620d7e1..b88c058090b6c 100644 --- a/tools/sched_ext/scx_simple.c +++ b/tools/sched_ext/scx_simple.c @@ -17,13 +17,22 @@ const char help_fmt[] = "\n" "See the top-level comment in .bpf.c for more details.\n" "\n" -"Usage: %s [-f]\n" +"Usage: %s [-f] [-v]\n" "\n" " -f Use FIFO scheduling instead of weighted vtime scheduling\n" +" -v Print libbpf debug messages\n" " -h Display this help and exit\n"; +static bool verbose; static volatile int exit_req; +static int libbpf_print_fn(enum libbpf_print_level level, const char *format, va_list args) +{ + if (level == LIBBPF_DEBUG && !verbose) + return 0; + return vfprintf(stderr, format, args); +} + static void sigint_handler(int simple) { exit_req = 1; @@ -56,16 +65,20 @@ int main(int argc, char **argv) __u32 opt; __u64 ecode; + libbpf_set_print(libbpf_print_fn); signal(SIGINT, sigint_handler); signal(SIGTERM, sigint_handler); restart: skel = SCX_OPS_OPEN(simple_ops, scx_simple); - while ((opt = getopt(argc, argv, "fh")) != -1) { + while ((opt = getopt(argc, argv, "fvh")) != -1) { switch (opt) { case 'f': skel->rodata->fifo_sched = true; break; + case 'v': + verbose = true; + break; default: fprintf(stderr, help_fmt, basename(argv[0])); return opt != 'h'; From 24c2e1ff659533b603242c42e403d9fb1f8517bb Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Fri, 26 Apr 2024 14:45:24 -1000 Subject: [PATCH 588/606] scx: Check for 0 slice after set_next_task_scx() set_next_task_scx() can call ops.running() which can update p->scx.slice. Check and warn zero slice after set_next_task_scx(), not before. (cherry picked from commit ef638e6b46b978c3ca735d2bd11dad5cf4894c58) --- kernel/sched/ext.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/kernel/sched/ext.c b/kernel/sched/ext.c index 9c1e0cc14c93e..b7f856c6b5a89 100644 --- a/kernel/sched/ext.c +++ b/kernel/sched/ext.c @@ -2748,6 +2748,8 @@ static struct task_struct *pick_next_task_scx(struct rq *rq) if (!p) return NULL; + set_next_task_scx(rq, p, true); + if (unlikely(!p->scx.slice)) { if (!scx_ops_bypassing() && !scx_warned_zero_slice) { printk_deferred(KERN_WARNING "sched_ext: %s[%d] has zero slice in pick_next_task_scx()\n", @@ -2757,8 +2759,6 @@ static struct task_struct *pick_next_task_scx(struct rq *rq) p->scx.slice = SCX_SLICE_DFL; } - set_next_task_scx(rq, p, true); - return p; } From 9186ec25d6f52e9593fe15234b712bca88ef0d19 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Fri, 26 Apr 2024 15:05:58 -1000 Subject: [PATCH 589/606] scx: Build fixes for UP and SCX off (cherry picked from commit 89b7ecccebdae8e7f08c31ea02d35eefd46f1820) --- kernel/sched/core.c | 2 ++ kernel/sched/ext.c | 6 +++++- kernel/sched/ext.h | 1 - 3 files changed, 7 insertions(+), 2 deletions(-) diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 68d32504a2dfe..f3d210301e9f0 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -4584,7 +4584,9 @@ static void __sched_fork(unsigned long clone_flags, struct task_struct *p) p->rt.on_rq = 0; p->rt.on_list = 0; +#ifdef CONFIG_SCHED_CLASS_EXT init_scx_entity(&p->scx); +#endif #ifdef CONFIG_PREEMPT_NOTIFIERS INIT_HLIST_HEAD(&p->preempt_notifiers); diff --git a/kernel/sched/ext.c b/kernel/sched/ext.c index b7f856c6b5a89..b09f6b154ed6c 100644 --- a/kernel/sched/ext.c +++ b/kernel/sched/ext.c @@ -5535,8 +5535,12 @@ __bpf_kfunc s32 scx_bpf_select_cpu_dfl(struct task_struct *p, s32 prev_cpu, *is_idle = false; return prev_cpu; } - +#ifdef CONFIG_SMP return scx_select_cpu_dfl(p, prev_cpu, wake_flags, is_idle); +#else + *is_idle = false; + return prev_cpu; +#endif } __bpf_kfunc_end_defs(); diff --git a/kernel/sched/ext.h b/kernel/sched/ext.h index 83a48fda6317f..97064e53f2995 100644 --- a/kernel/sched/ext.h +++ b/kernel/sched/ext.h @@ -91,7 +91,6 @@ bool scx_prio_less(const struct task_struct *a, const struct task_struct *b, static inline void scx_next_task_picked(struct rq *rq, struct task_struct *p, const struct sched_class *active) {} static inline void scx_tick(struct rq *rq) {} -static inline void init_scx_entity(struct sched_ext_entity *scx) {} static inline void scx_pre_fork(struct task_struct *p) {} static inline int scx_fork(struct task_struct *p) { return 0; } static inline void scx_post_fork(struct task_struct *p) {} From fa8add5e42d7d224a547f41fd6312bca832b823f Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Mon, 29 Apr 2024 09:51:44 -1000 Subject: [PATCH 590/606] tools/sched_ext: Add __COMPAT_scx_bpf_exit() (cherry picked from commit 7a3bce8fc3adba0dfa64ab9760f78f3f577f8611) --- tools/sched_ext/include/scx/common.bpf.h | 2 +- tools/sched_ext/include/scx/compat.bpf.h | 16 ++++++++++++++-- 2 files changed, 15 insertions(+), 3 deletions(-) diff --git a/tools/sched_ext/include/scx/common.bpf.h b/tools/sched_ext/include/scx/common.bpf.h index fa163bd3ac03d..20ebb407148ea 100644 --- a/tools/sched_ext/include/scx/common.bpf.h +++ b/tools/sched_ext/include/scx/common.bpf.h @@ -43,7 +43,7 @@ void scx_bpf_destroy_dsq(u64 dsq_id) __ksym; int bpf_iter_scx_dsq_new(struct bpf_iter_scx_dsq *it, u64 dsq_id, bool rev) __ksym __weak; struct task_struct *bpf_iter_scx_dsq_next(struct bpf_iter_scx_dsq *it) __ksym __weak; void bpf_iter_scx_dsq_destroy(struct bpf_iter_scx_dsq *it) __ksym __weak; -void scx_bpf_exit_bstr(s64 exit_code, char *fmt, unsigned long long *data, u32 data__sz) __ksym; +void scx_bpf_exit_bstr(s64 exit_code, char *fmt, unsigned long long *data, u32 data__sz) __ksym __weak; void scx_bpf_error_bstr(char *fmt, unsigned long long *data, u32 data_len) __ksym; u32 scx_bpf_cpuperf_cap(s32 cpu) __ksym __weak; u32 scx_bpf_cpuperf_cur(s32 cpu) __ksym __weak; diff --git a/tools/sched_ext/include/scx/compat.bpf.h b/tools/sched_ext/include/scx/compat.bpf.h index dc5c5f46fa209..8fcba2505ad53 100644 --- a/tools/sched_ext/include/scx/compat.bpf.h +++ b/tools/sched_ext/include/scx/compat.bpf.h @@ -36,10 +36,22 @@ static inline void __COMPAT_scx_bpf_switch_all(void) scx_bpf_switch_all(); } +/* + * scx_bpf_exit() is a new addition. Fall back to scx_bpf_error() if + * unavailable. Users can use scx_bpf_exit() directly in the future. + */ +#define __COMPAT_scx_bpf_exit(code, fmt, args...) \ +({ \ + if (bpf_ksym_exists(scx_bpf_exit_bstr)) \ + scx_bpf_exit((code), fmt, args); \ + else \ + scx_bpf_error(fmt, args); \ +}) + /* * scx_bpf_nr_cpu_ids(), scx_bpf_get_possible/online_cpumask() are new. No good - * way to noop these kfuncs. Provide a test macro. Users will be able to assume - * existence in the future. + * way to noop these kfuncs. Provide a test macro. Users can assume existence in + * the future. */ #define __COMPAT_HAS_CPUMASKS \ bpf_ksym_exists(scx_bpf_nr_cpu_ids) From c8e18a88f362a8016b8bdb9080933726819bac60 Mon Sep 17 00:00:00 2001 From: David Vernet Date: Tue, 30 Apr 2024 10:38:06 -0500 Subject: [PATCH 591/606] scx: Download missing CI packages CI seems to require bc and gcc-multilib. Let's add those to the list of Ubuntu packages we install. Signed-off-by: David Vernet (cherry picked from commit 1524224c62a6dd4b2fcc125413aa4bf813dd4ccd) --- .github/workflows/test-kernel.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/test-kernel.yml b/.github/workflows/test-kernel.yml index 811f37f087a1f..ba1905c5a6107 100644 --- a/.github/workflows/test-kernel.yml +++ b/.github/workflows/test-kernel.yml @@ -16,7 +16,7 @@ jobs: ### DOWNLOAD AND INSTALL DEPENDENCIES ### # Download dependencies packaged by Ubuntu - - run: sudo apt -y install gcc make git coreutils cmake elfutils libelf-dev libunwind-dev libzstd-dev linux-headers-generic linux-tools-common linux-tools-generic ninja-build python3-pip python3-requests qemu-kvm udev iproute2 busybox-static libvirt-clients kbd kmod file rsync zstd pahole flex bison cpio libcap-dev libelf-dev python3-dev cargo rustc + - run: sudo apt -y install bc gcc make git coreutils cmake elfutils libelf-dev libunwind-dev libzstd-dev linux-headers-generic linux-tools-common linux-tools-generic ninja-build python3-pip python3-requests qemu-kvm udev iproute2 busybox-static libvirt-clients kbd kmod file rsync zstd pahole flex bison cpio libcap-dev libelf-dev python3-dev cargo rustc gcc-multilib # clang 17 # Use a custom llvm.sh script which includes the -y flag for From 5ef9c277347e5dec57af668f5395e36d2b626b0a Mon Sep 17 00:00:00 2001 From: David Vernet Date: Tue, 30 Apr 2024 10:45:43 -0500 Subject: [PATCH 592/606] scx: Refactor binary run logic in scx CI We're currently invoking vng only when iterating over the schedulers in tools/sched_ext. We should also be running our selftests in CI, so let's refactor that logic to a new, separate runtest() bash function. Along the way, remove some extra, unnecessary scx_layered-specific logic. We no longer ship scx_layered in the sched_ext repo, so we can remove it. Signed-off-by: David Vernet (cherry picked from commit 92505bf053b18195a6a245ff60bc770d20fef50f) --- .github/workflows/run-schedulers | 34 +++++++++++++++++++++----------- 1 file changed, 22 insertions(+), 12 deletions(-) diff --git a/.github/workflows/run-schedulers b/.github/workflows/run-schedulers index 0aa3ffea3bff7..5cb6cc2303b2b 100755 --- a/.github/workflows/run-schedulers +++ b/.github/workflows/run-schedulers @@ -16,6 +16,26 @@ if [ ! -x `which vng` ]; then exit 1 fi +function runtest() { + local bin="${1}" + + if [ -z "${bin}" ]; then + echo "No binary passed to runtest" + exit 1 + fi + + if ! [ -f "${bin}" ]; then + echo "Binary ${bin} was not a regular file" + exit 1 + fi + + rm -f /tmp/output + (timeout --foreground --preserve-status ${GUEST_TIMEOUT} \ + vng --force-9p --disable-microvm --verbose -- \ + "timeout --foreground --preserve-status ${TEST_TIMEOUT} ${bin}" \ + 2>&1 &1 Date: Tue, 30 Apr 2024 11:23:07 -0500 Subject: [PATCH 593/606] scx: Fix selftests We no longer have UEI_KIND or UEI_ECODE. Adjust the hotplug selftest accordingly. We'll update CI to run these tests in a subsequent patch. Signed-off-by: David Vernet (cherry picked from commit 1ea92e28ee3e6d9ff021ae92124679cf646129aa) --- tools/testing/selftests/sched_ext/hotplug.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/tools/testing/selftests/sched_ext/hotplug.c b/tools/testing/selftests/sched_ext/hotplug.c index ce23d797a6bb1..87bf220b1bcee 100644 --- a/tools/testing/selftests/sched_ext/hotplug.c +++ b/tools/testing/selftests/sched_ext/hotplug.c @@ -83,8 +83,8 @@ static enum scx_test_status test_hotplug(bool onlining, bool cbs_defined) while (!UEI_EXITED(skel, uei)) sched_yield(); - SCX_EQ(UEI_KIND(skel, uei), kind); - SCX_EQ(UEI_ECODE(skel, uei), code); + SCX_EQ(skel->data->uei.kind, kind); + SCX_EQ(UEI_REPORT(skel, uei), code); if (!onlining) toggle_online_status(1); @@ -126,8 +126,8 @@ static enum scx_test_status test_hotplug_attach(void) kind = SCX_KIND_VAL(SCX_EXIT_UNREG_KERN); code = SCX_ECODE_VAL(SCX_ECODE_ACT_RESTART) | SCX_ECODE_VAL(SCX_ECODE_RSN_HOTPLUG); - SCX_EQ(UEI_KIND(skel, uei), kind); - SCX_EQ(UEI_ECODE(skel, uei), code); + SCX_EQ(skel->data->uei.kind, kind); + SCX_EQ(UEI_REPORT(skel, uei), code); bpf_link__destroy(link); hotplug__destroy(skel); From 432e1e2958873c1573edc1ffaab5899475c345f0 Mon Sep 17 00:00:00 2001 From: David Vernet Date: Tue, 30 Apr 2024 10:53:19 -0500 Subject: [PATCH 594/606] scx: Run selftests in scx CI We have a fairly comprehensive set of selftests in tools/selftests/sched_ext. Let's run them in CI. Signed-off-by: David Vernet (cherry picked from commit 1ef9ac40455d1bad7995adb0a7fc1eb4d8aef6c3) --- .github/workflows/run-schedulers | 11 +++++++++++ .github/workflows/test-kernel.yml | 5 ++++- 2 files changed, 15 insertions(+), 1 deletion(-) diff --git a/.github/workflows/run-schedulers b/.github/workflows/run-schedulers index 5cb6cc2303b2b..ef353157d3a04 100755 --- a/.github/workflows/run-schedulers +++ b/.github/workflows/run-schedulers @@ -59,3 +59,14 @@ for sched in $(find tools/sched_ext/build/bin -type f -executable); do echo "OK: ${sched}" fi done + +# Run the selftests suite +runtest "tools/testing/selftests/sched_ext/runner" +sed -n -e '/not ok/q1' /tmp/output +res=$? +if [ ${res} -ne 0 ]; then + echo "FAIL: selftests" + echo "output: $(cat /tmp/output)" +else + echo "OK: selftests" +fi diff --git a/.github/workflows/test-kernel.yml b/.github/workflows/test-kernel.yml index ba1905c5a6107..6e9d059a461a1 100644 --- a/.github/workflows/test-kernel.yml +++ b/.github/workflows/test-kernel.yml @@ -40,8 +40,11 @@ jobs: # Build a minimal kernel (with sched-ext enabled) using virtme-ng - run: vng -v --build --config .github/workflows/sched-ext.config + # Build the selftests suite + - run: make -j $(nproc) -C tools/testing/selftests/sched_ext + # Build the in-kernel schedulers - - run: cd tools/sched_ext && make + - run: make -j $(nproc) -C tools/sched_ext # Test the schedulers inside the recompile kernel - run: .github/workflows/run-schedulers From 471d9b65aa279f91dc2bb5e15ab7cdab03a529fc Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Tue, 30 Apr 2024 11:29:31 -1000 Subject: [PATCH 595/606] MAINTAINERS: Separate out sched_ext tree (cherry picked from commit e15b3512759d5f91753bbb86b3ee1c03ecffbadd) --- MAINTAINERS | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/MAINTAINERS b/MAINTAINERS index 4ce610639fa15..dc42e516683af 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -19614,8 +19614,6 @@ R: Ben Segall (CONFIG_CFS_BANDWIDTH) R: Mel Gorman (CONFIG_NUMA_BALANCING) R: Daniel Bristot de Oliveira (SCHED_DEADLINE) R: Valentin Schneider (TOPOLOGY) -R: Tejun Heo (SCHED_EXT) -R: David Vernet (SCHED_EXT) L: linux-kernel@vger.kernel.org S: Maintained T: git git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git sched/core @@ -19624,7 +19622,18 @@ F: include/linux/sched.h F: include/linux/wait.h F: include/uapi/linux/sched.h F: kernel/sched/ + +SCHEDULER - SCHED_EXT +R: Tejun Heo +R: David Vernet +L: linux-kernel@vger.kernel.org +S: Maintained +T: git://git.kernel.org/pub/scm/linux/kernel/git/tj/sched_ext.git +F: include/linux/sched/ext.h +F: kernel/sched/ext.h +F: kernel/sched/ext.c F: tools/sched_ext/ +F: tools/testing/selftests/sched_ext SCSI LIBSAS SUBSYSTEM R: John Garry From 0a3a02cf8381476f0eb13926d75bbcd16a184914 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Tue, 30 Apr 2024 11:39:10 -1000 Subject: [PATCH 596/606] MAINTAINERS: Add link to SCX repo (cherry picked from commit d22cb10ac644ea6cba95ed71a8b12127bbaf0369) --- MAINTAINERS | 1 + 1 file changed, 1 insertion(+) diff --git a/MAINTAINERS b/MAINTAINERS index dc42e516683af..25ffaf595ec25 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -19628,6 +19628,7 @@ R: Tejun Heo R: David Vernet L: linux-kernel@vger.kernel.org S: Maintained +W: https://github.com/sched-ext/scx T: git://git.kernel.org/pub/scm/linux/kernel/git/tj/sched_ext.git F: include/linux/sched/ext.h F: kernel/sched/ext.h From e3502a7c77ad78870b046e9482a2c6951aa6a9ec Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Wed, 1 May 2024 04:50:54 -1000 Subject: [PATCH 597/606] scx_qmap: monitor_cpuperf() also needs __COMPAT_HAS_CPUMASKS test With this and the latest libbpf fix which hasn't landed yet, scx_qmap can run on compat-base too. (cherry picked from commit 0421c75aca7048a13ad9a9e712fa68205046fde5) --- tools/sched_ext/scx_qmap.bpf.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/tools/sched_ext/scx_qmap.bpf.c b/tools/sched_ext/scx_qmap.bpf.c index 832fa87da6403..977c7cff7b343 100644 --- a/tools/sched_ext/scx_qmap.bpf.c +++ b/tools/sched_ext/scx_qmap.bpf.c @@ -535,13 +535,16 @@ struct { */ static void monitor_cpuperf(void) { - u32 zero = 0; - u32 nr_cpu_ids = scx_bpf_nr_cpu_ids(); + u32 zero = 0, nr_cpu_ids; u64 cap_sum = 0, cur_sum = 0, cur_min = SCX_CPUPERF_ONE, cur_max = 0; u64 target_sum = 0, target_min = SCX_CPUPERF_ONE, target_max = 0; const struct cpumask *online; int i, nr_online_cpus = 0; + if (!__COMPAT_HAS_CPUMASKS) + return; + + nr_cpu_ids = scx_bpf_nr_cpu_ids(); online = scx_bpf_get_online_cpumask(); bpf_for(i, 0, nr_cpu_ids) { From faa06855e6b0b04458ce4e5800c05777baaa4dc6 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Wed, 1 May 2024 15:32:29 -1000 Subject: [PATCH 598/606] bstr - String formatting part in bpf_exit_bstr_common() is factored into bstr_format(). The exit part is moved to the callers. - bstr helpers in common.bpf.h don't necessarily have much to do with exiting. Renamed. (cherry picked from commit dbcf7fb44c5d607d45d5b59e9ddac639adf07214) --- kernel/sched/ext.c | 52 +++++++++++++----------- tools/sched_ext/include/scx/common.bpf.h | 12 +++--- 2 files changed, 34 insertions(+), 30 deletions(-) diff --git a/kernel/sched/ext.c b/kernel/sched/ext.c index b09f6b154ed6c..01f80030d3949 100644 --- a/kernel/sched/ext.c +++ b/kernel/sched/ext.c @@ -6144,58 +6144,51 @@ __bpf_kfunc void bpf_iter_scx_dsq_destroy(struct bpf_iter_scx_dsq *it) __bpf_kfunc_end_defs(); -struct scx_bpf_error_bstr_bufs { +struct scx_bpf_bstr_bufs { u64 data[MAX_BPRINTF_VARARGS]; char msg[SCX_EXIT_MSG_LEN]; }; -static DEFINE_PER_CPU(struct scx_bpf_error_bstr_bufs, scx_bpf_error_bstr_bufs); +static DEFINE_PER_CPU(struct scx_bpf_bstr_bufs, scx_bpf_bstr_bufs); -static void bpf_exit_bstr_common(enum scx_exit_kind kind, s64 exit_code, - char *fmt, unsigned long long *data, - u32 data__sz) +static char *bstr_format(char *fmt, unsigned long long *data, u32 data__sz) { struct bpf_bprintf_data bprintf_data = { .get_bin_args = true }; - struct scx_bpf_error_bstr_bufs *bufs; - unsigned long flags; + struct scx_bpf_bstr_bufs *bufs = this_cpu_ptr(&scx_bpf_bstr_bufs); int ret; - local_irq_save(flags); - bufs = this_cpu_ptr(&scx_bpf_error_bstr_bufs); + lockdep_assert_irqs_disabled(); if (data__sz % 8 || data__sz > MAX_BPRINTF_VARARGS * 8 || (data__sz && !data)) { scx_ops_error("invalid data=%p and data__sz=%u", (void *)data, data__sz); - goto out_restore; + return ERR_PTR(-EINVAL); } ret = copy_from_kernel_nofault(bufs->data, data, data__sz); - if (ret) { + if (ret < 0) { scx_ops_error("failed to read data fields (%d)", ret); - goto out_restore; + return ERR_PTR(ret); } ret = bpf_bprintf_prepare(fmt, UINT_MAX, bufs->data, data__sz / 8, &bprintf_data); if (ret < 0) { - scx_ops_error("failed to format prepration (%d)", ret); - goto out_restore; + scx_ops_error("format preparation failed (%d)", ret); + return ERR_PTR(ret); } ret = bstr_printf(bufs->msg, sizeof(bufs->msg), fmt, bprintf_data.bin_args); bpf_bprintf_cleanup(&bprintf_data); if (ret < 0) { - scx_ops_error("scx_ops_error(\"%s\", %p, %u) failed to format", + scx_ops_error("(\"%s\", %p, %u) failed to format", fmt, data, data__sz); - goto out_restore; + return ERR_PTR(ret); } - scx_ops_exit_kind(kind, exit_code, "%s", bufs->msg); -out_restore: - local_irq_restore(flags); - + return bufs->msg; } __bpf_kfunc_start_defs(); @@ -6213,8 +6206,14 @@ __bpf_kfunc_start_defs(); __bpf_kfunc void scx_bpf_exit_bstr(s64 exit_code, char *fmt, unsigned long long *data, u32 data__sz) { - bpf_exit_bstr_common(SCX_EXIT_UNREG_BPF, exit_code, fmt, data, - data__sz); + unsigned long flags; + char *msg; + + local_irq_save(flags); + msg = bstr_format(fmt, data, data__sz); + if (!IS_ERR(msg)) + scx_ops_exit_kind(SCX_EXIT_UNREG_BPF, exit_code, fmt, "%s", msg); + local_irq_restore(flags); } /** @@ -6229,9 +6228,14 @@ __bpf_kfunc void scx_bpf_exit_bstr(s64 exit_code, char *fmt, __bpf_kfunc void scx_bpf_error_bstr(char *fmt, unsigned long long *data, u32 data__sz) { + unsigned long flags; + char *msg; - bpf_exit_bstr_common(SCX_EXIT_ERROR_BPF, 0, fmt, data, - data__sz); + local_irq_save(flags); + msg = bstr_format(fmt, data, data__sz); + if (!IS_ERR(msg)) + scx_ops_exit_kind(SCX_EXIT_ERROR_BPF, 0, fmt, "%s", msg); + local_irq_restore(flags); } /** diff --git a/tools/sched_ext/include/scx/common.bpf.h b/tools/sched_ext/include/scx/common.bpf.h index 20ebb407148ea..80287e4896b23 100644 --- a/tools/sched_ext/include/scx/common.bpf.h +++ b/tools/sched_ext/include/scx/common.bpf.h @@ -78,14 +78,14 @@ static inline bool scx_bpf_consume_task(struct bpf_iter_scx_dsq *it, } static inline __attribute__((format(printf, 1, 2))) -void ___scx_bpf_exit_format_checker(const char *fmt, ...) {} +void ___scx_bpf_bstr_format_checker(const char *fmt, ...) {} /* * Helper macro for initializing the fmt and variadic argument inputs to both * bstr exit kfuncs. Callers to this function should use ___fmt and ___param to * refer to the initialized list of inputs to the bstr kfunc. */ -#define scx_bpf_exit_preamble(fmt, args...) \ +#define scx_bpf_bstr_preamble(fmt, args...) \ static char ___fmt[] = fmt; \ /* \ * Note that __param[] must have at least one \ @@ -105,9 +105,9 @@ void ___scx_bpf_exit_format_checker(const char *fmt, ...) {} */ #define scx_bpf_exit(code, fmt, args...) \ ({ \ - scx_bpf_exit_preamble(fmt, args) \ + scx_bpf_bstr_preamble(fmt, args) \ scx_bpf_exit_bstr(code, ___fmt, ___param, sizeof(___param)); \ - ___scx_bpf_exit_format_checker(fmt, ##args); \ + ___scx_bpf_bstr_format_checker(fmt, ##args); \ }) /* @@ -118,9 +118,9 @@ void ___scx_bpf_exit_format_checker(const char *fmt, ...) {} */ #define scx_bpf_error(fmt, args...) \ ({ \ - scx_bpf_exit_preamble(fmt, args) \ + scx_bpf_bstr_preamble(fmt, args) \ scx_bpf_error_bstr(___fmt, ___param, sizeof(___param)); \ - ___scx_bpf_exit_format_checker(fmt, ##args); \ + ___scx_bpf_bstr_format_checker(fmt, ##args); \ }) #define BPF_STRUCT_OPS(name, args...) \ From 7f87b3ffd3da76c238c8eb6effe5fb26f8b5f7ab Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Wed, 1 May 2024 15:32:30 -1000 Subject: [PATCH 599/606] implement-ops Implement ops.dump(), dump_cpu() and dump_task() which allows the BPF scheduler to include scheduler specific information in debug dump. The below example output from scx_qmap shows full dump of qmap's FIFO queues and per-CPU and per-task contexts. $ scx_qmap -e 3 stats : enq=9 dsp=0 delta=9 reenq=5 deq=9 core=0 exp=0 cpuperf: cur min/avg/max=0/0/0 target min/avg/max=0/0/0 DEBUG DUMP ================================================================================ scx_qmap[1627] triggered exit kind 1025: scx_bpf_error (test triggering error) Backtrace: scx_bpf_error_bstr+0x60/0x80 bpf_prog_5ab320c9dc9e846d_qmap_enqueue+0xd1/0x283 QMAP FIFO[0]: QMAP FIFO[1]: QMAP FIFO[2]: 1581 1583 1584 1585 1586 1588 1593 1594 1595 QMAP FIFO[3]: QMAP FIFO[4]: CPU states ---------- CPU 0 : nr_run=2 flags=0x0 cpu_rel=1 ops_qseq=4 pnt_seq=12 curr=stress[1590] class=ext_sched_class QMAP: dsp_idx=0 dsp_cnt=0 avg_weight=0 cpuperf_target=0 *R stress[1590] +0ms scx_state/flags=3/0x5 dsq_flags=0x0 ops_state/qseq=0/0 sticky/holding_cpu=-1/-1 dsq_id=(n/a) cpus=ff QMAP: force_local=0 core_sched_seq=0 R stress[1585] +0ms scx_state/flags=3/0x1 dsq_flags=0x0 ops_state/qseq=2/3 sticky/holding_cpu=-1/-1 dsq_id=(n/a) cpus=ff QMAP: force_local=0 core_sched_seq=3 irqentry_exit+0x31/0x80 sysvec_apic_timer_interrupt+0x44/0x80 asm_sysvec_apic_timer_interrupt+0x1b/0x20 ... (cherry picked from commit ec7e3b0463e14d0bbf03967d07c0a679a764a4e2) --- kernel/sched/ext.c | 214 ++++++++++++++++++++--- tools/sched_ext/include/scx/common.bpf.h | 12 ++ tools/sched_ext/include/scx/compat.h | 10 ++ tools/sched_ext/scx_qmap.bpf.c | 55 ++++++ tools/sched_ext/scx_qmap.c | 6 +- 5 files changed, 273 insertions(+), 24 deletions(-) diff --git a/kernel/sched/ext.c b/kernel/sched/ext.c index 01f80030d3949..d3c2a2aa0525c 100644 --- a/kernel/sched/ext.c +++ b/kernel/sched/ext.c @@ -176,7 +176,18 @@ struct scx_cpu_release_args { enum scx_cpu_preempt_reason reason; /* the task that's going to be scheduled on the CPU */ - struct task_struct *task; + struct task_struct *task; +}; + +/* + * Informational context provided to dump operations. + */ +struct scx_dump_ctx { + enum scx_exit_kind kind; + s64 exit_code; + const char *reason; + u64 at_ns; + u64 at_jiffies; }; /** @@ -475,6 +486,36 @@ struct sched_ext_ops { */ void (*disable)(struct task_struct *p); + /** + * dump - Dump BPF scheduler state on error + * @ctx: debug dump context + * + * Use scx_bpf_dump() to generate BPF scheduler specific debug dump. + */ + void (*dump)(struct scx_dump_ctx *ctx); + + /** + * dump_cpu - Dump BPF scheduler state for a CPU on error + * @ctx: debug dump context + * @cpu: CPU to generate debug dump for + * @idle: @cpu is currently idle without any runnable tasks + * + * Use scx_bpf_dump() to generate BPF scheduler specific debug dump for + * @cpu. If @idle is %true and this operation doesn't produce any + * output, @cpu is skipped for dump. + */ + void (*dump_cpu)(struct scx_dump_ctx *ctx, s32 cpu, bool idle); + + /** + * dump_task - Dump BPF scheduler state for a runnable task on error + * @ctx: debug dump context + * @p: runnable task to generate debug dump for + * + * Use scx_bpf_dump() to generate BPF scheduler specific debug dump for + * @p. + */ + void (*dump_task)(struct scx_dump_ctx *ctx, struct task_struct *p); + #ifdef CONFIG_EXT_GROUP_SCHED /** * cgroup_init - Initialize a cgroup @@ -911,6 +952,12 @@ struct scx_dsp_ctx { static DEFINE_PER_CPU(struct scx_dsp_ctx, scx_dsp_ctx); +/* ops debug dump */ +static s32 scx_dump_cpu = -1; +static char scx_dump_last; +static struct seq_buf *scx_dump_seq_buf; +static const char *scx_dump_prefix; + /* /sys/kernel/sched_ext interface */ static struct kset *scx_kset; static struct kobject *scx_root_kobj; @@ -4439,8 +4486,29 @@ static void scx_ops_disable(enum scx_exit_kind kind) schedule_scx_ops_disable_work(); } -static void scx_dump_task(struct seq_buf *s, struct task_struct *p, char marker, - unsigned long now) +static void ops_dump_init(struct seq_buf *s, const char *prefix) +{ + lockdep_assert_irqs_disabled(); + + scx_dump_cpu = smp_processor_id(); /* allow scx_bpf_dump() */ + scx_dump_last = '\0'; /* nothing printed yet */ + scx_dump_seq_buf = s; + scx_dump_prefix = prefix; +} + +static void ops_dump_exit(void) +{ + /* + * If something was printed but the last line wasn't terminated, add a + * new line. + */ + if (scx_dump_last != '\0' && scx_dump_last != '\n') + seq_buf_putc(scx_dump_seq_buf, '\n'); + scx_dump_cpu = -1; +} + +static void scx_dump_task(struct seq_buf *s, struct scx_dump_ctx *dctx, + struct task_struct *p, char marker) { static unsigned long bt[SCX_EXIT_BT_LEN]; char dsq_id_buf[19] = "(n/a)"; @@ -4455,7 +4523,7 @@ static void scx_dump_task(struct seq_buf *s, struct task_struct *p, char marker, seq_buf_printf(s, "\n %c%c %s[%d] %+ldms\n", marker, task_state_to_char(p), p->comm, p->pid, - jiffies_delta_msecs(p->scx.runnable_at, now)); + jiffies_delta_msecs(p->scx.runnable_at, dctx->at_jiffies)); seq_buf_printf(s, " scx_state/flags=%u/0x%x dsq_flags=0x%x ops_state/qseq=%lu/%lu\n", scx_get_task_state(p), p->scx.flags & ~SCX_TASK_STATE_MASK, @@ -4464,19 +4532,33 @@ static void scx_dump_task(struct seq_buf *s, struct task_struct *p, char marker, ops_state >> SCX_OPSS_QSEQ_SHIFT); seq_buf_printf(s, " sticky/holding_cpu=%d/%d dsq_id=%s\n", p->scx.sticky_cpu, p->scx.holding_cpu, dsq_id_buf); - seq_buf_printf(s, " cpus=%*pb\n\n", cpumask_pr_args(p->cpus_ptr)); + seq_buf_printf(s, " cpus=%*pb\n", cpumask_pr_args(p->cpus_ptr)); - bt_len = stack_trace_save_tsk(p, bt, SCX_EXIT_BT_LEN, 1); + if (SCX_HAS_OP(dump_task)) { + ops_dump_init(s, " "); + SCX_CALL_OP(SCX_KF_REST, dump_task, dctx, p); + ops_dump_exit(); + } - avail = seq_buf_get_buf(s, &buf); - used = stack_trace_snprint(buf, avail, bt, bt_len, 3); - seq_buf_commit(s, used < avail ? used : -1); + bt_len = stack_trace_save_tsk(p, bt, SCX_EXIT_BT_LEN, 1); + if (bt_len) { + seq_buf_putc(s, '\n'); + avail = seq_buf_get_buf(s, &buf); + used = stack_trace_snprint(buf, avail, bt, bt_len, 3); + seq_buf_commit(s, used < avail ? used : -1); + } } static void scx_dump_state(struct scx_exit_info *ei, size_t dump_len) { - const char trunc_marker[] = "\n\n~~~~ TRUNCATED ~~~~\n"; - unsigned long now = jiffies; + static const char trunc_marker[] = "\n\n~~~~ TRUNCATED ~~~~\n"; + struct scx_dump_ctx dctx = { + .kind = ei->kind, + .exit_code = ei->exit_code, + .reason = ei->reason, + .at_ns = ktime_get_ns(), + .at_jiffies = jiffies, + }; struct seq_buf s; size_t avail, used; char *buf; @@ -4494,45 +4576,82 @@ static void scx_dump_state(struct scx_exit_info *ei, size_t dump_len) used = stack_trace_snprint(buf, avail, ei->bt, ei->bt_len, 1); seq_buf_commit(&s, used < avail ? used : -1); - seq_buf_printf(&s, "\nRunqueue states\n"); - seq_buf_printf(&s, "---------------\n"); + if (SCX_HAS_OP(dump)) { + ops_dump_init(&s, ""); + SCX_CALL_OP(SCX_KF_UNLOCKED, dump, &dctx); + ops_dump_exit(); + } + + seq_buf_printf(&s, "\nCPU states\n"); + seq_buf_printf(&s, "----------\n"); for_each_possible_cpu(cpu) { struct rq *rq = cpu_rq(cpu); struct rq_flags rf; struct task_struct *p; + struct seq_buf ns; + bool idle; rq_lock(rq, &rf); - if (list_empty(&rq->scx.runnable_list) && - rq->curr->sched_class == &idle_sched_class) + idle = list_empty(&rq->scx.runnable_list) && + rq->curr->sched_class == &idle_sched_class; + + if (idle && !SCX_HAS_OP(dump_cpu)) goto next; - seq_buf_printf(&s, "\nCPU %-4d: nr_run=%u flags=0x%x cpu_rel=%d ops_qseq=%lu pnt_seq=%lu\n", + /* + * We don't yet know whether ops.dump_cpu() will produce output + * and we may want to skip the default CPU dump if it doesn't. + * Use a nested seq_buf to generate the standard dump so that we + * can decide whether to commit later. + */ + avail = seq_buf_get_buf(&s, &buf); + seq_buf_init(&ns, buf, avail); + + seq_buf_printf(&ns, "\nCPU %-4d: nr_run=%u flags=0x%x cpu_rel=%d ops_qseq=%lu pnt_seq=%lu\n", cpu, rq->scx.nr_running, rq->scx.flags, rq->scx.cpu_released, rq->scx.ops_qseq, rq->scx.pnt_seq); - seq_buf_printf(&s, " curr=%s[%d] class=%ps\n", + seq_buf_printf(&ns, " curr=%s[%d] class=%ps\n", rq->curr->comm, rq->curr->pid, rq->curr->sched_class); if (!cpumask_empty(rq->scx.cpus_to_kick)) - seq_buf_printf(&s, " cpus_to_kick : %*pb\n", + seq_buf_printf(&ns, " cpus_to_kick : %*pb\n", cpumask_pr_args(rq->scx.cpus_to_kick)); if (!cpumask_empty(rq->scx.cpus_to_kick_if_idle)) - seq_buf_printf(&s, " idle_to_kick : %*pb\n", + seq_buf_printf(&ns, " idle_to_kick : %*pb\n", cpumask_pr_args(rq->scx.cpus_to_kick_if_idle)); if (!cpumask_empty(rq->scx.cpus_to_preempt)) - seq_buf_printf(&s, " cpus_to_preempt: %*pb\n", + seq_buf_printf(&ns, " cpus_to_preempt: %*pb\n", cpumask_pr_args(rq->scx.cpus_to_preempt)); if (!cpumask_empty(rq->scx.cpus_to_wait)) - seq_buf_printf(&s, " cpus_to_wait : %*pb\n", + seq_buf_printf(&ns, " cpus_to_wait : %*pb\n", cpumask_pr_args(rq->scx.cpus_to_wait)); + used = seq_buf_used(&ns); + if (SCX_HAS_OP(dump_cpu)) { + ops_dump_init(&ns, " "); + SCX_CALL_OP(SCX_KF_REST, dump_cpu, &dctx, cpu, idle); + ops_dump_exit(); + } + + /* + * If idle && nothing generated by ops.dump_cpu(), there's + * nothing interesting. Skip. + */ + if (idle && used == seq_buf_used(&ns)) + goto next; + + seq_buf_commit(&s, seq_buf_used(&ns)); + if (seq_buf_has_overflowed(&ns)) + seq_buf_set_overflow(&s); + if (rq->curr->sched_class == &ext_sched_class) - scx_dump_task(&s, rq->curr, '*', now); + scx_dump_task(&s, &dctx, rq->curr, '*'); list_for_each_entry(p, &rq->scx.runnable_list, scx.runnable_node) - scx_dump_task(&s, p, ' ', now); + scx_dump_task(&s, &dctx, p, ' '); next: rq_unlock(rq, &rf); } @@ -6238,6 +6357,54 @@ __bpf_kfunc void scx_bpf_error_bstr(char *fmt, unsigned long long *data, local_irq_restore(flags); } +/** + * scx_bpf_dump - Generate extra debug dump specific to the BPF scheduler + * @fmt: format string + * @data: format string parameters packaged using ___bpf_fill() macro + * @data__sz: @data len, must end in '__sz' for the verifier + * + * To be called through scx_bpf_dump() helper from ops.dump(), dump_cpu() and + * dump_task() to generate extra debug dump specific to the BPF scheduler. + * + * The extra dump may be multiple lines. A single line may be split over + * multiple calls. The last line is automatically terminated. + */ +__bpf_kfunc void scx_bpf_dump_bstr(char *fmt, unsigned long long *data, + u32 data__sz) +{ + struct seq_buf *s = scx_dump_seq_buf; + char *msg, *p; + + if (raw_smp_processor_id() != scx_dump_cpu) { + scx_ops_error("scx_bpf_dump() must only be called from ops.dump() and friends"); + return; + } + + lockdep_assert_irqs_disabled(); + + msg = bstr_format(fmt, data, data__sz); + if (IS_ERR(msg)) { + seq_buf_printf(s, "%s[!] (\"%s\", %p, %u) failed to format\n", + scx_dump_prefix, fmt, data, data__sz); + return; + } + + if (msg[0] == '\0') + return; + + if (scx_dump_last == '\0') { + seq_buf_putc(s, '\n'); + scx_dump_last = '\n'; + } + + for (p = msg; *p != '\0'; p++) { + if (scx_dump_last == '\n') + seq_buf_printf(s, "%s", scx_dump_prefix); + seq_buf_putc(s, *p); + scx_dump_last = *p; + } +} + /** * scx_bpf_cpuperf_cap - Query the maximum relative capacity of a CPU * @cpu: CPU of interest @@ -6556,6 +6723,7 @@ BTF_ID_FLAGS(func, bpf_iter_scx_dsq_next, KF_ITER_NEXT | KF_RET_NULL) BTF_ID_FLAGS(func, bpf_iter_scx_dsq_destroy, KF_ITER_DESTROY) BTF_ID_FLAGS(func, scx_bpf_exit_bstr, KF_TRUSTED_ARGS) BTF_ID_FLAGS(func, scx_bpf_error_bstr, KF_TRUSTED_ARGS) +BTF_ID_FLAGS(func, scx_bpf_dump_bstr, KF_TRUSTED_ARGS) BTF_ID_FLAGS(func, scx_bpf_cpuperf_cap) BTF_ID_FLAGS(func, scx_bpf_cpuperf_cur) BTF_ID_FLAGS(func, scx_bpf_cpuperf_set) diff --git a/tools/sched_ext/include/scx/common.bpf.h b/tools/sched_ext/include/scx/common.bpf.h index 80287e4896b23..2c2b53a080ac2 100644 --- a/tools/sched_ext/include/scx/common.bpf.h +++ b/tools/sched_ext/include/scx/common.bpf.h @@ -45,6 +45,7 @@ struct task_struct *bpf_iter_scx_dsq_next(struct bpf_iter_scx_dsq *it) __ksym __ void bpf_iter_scx_dsq_destroy(struct bpf_iter_scx_dsq *it) __ksym __weak; void scx_bpf_exit_bstr(s64 exit_code, char *fmt, unsigned long long *data, u32 data__sz) __ksym __weak; void scx_bpf_error_bstr(char *fmt, unsigned long long *data, u32 data_len) __ksym; +void scx_bpf_dump_bstr(char *fmt, unsigned long long *data, u32 data_len) __ksym; u32 scx_bpf_cpuperf_cap(s32 cpu) __ksym __weak; u32 scx_bpf_cpuperf_cur(s32 cpu) __ksym __weak; void scx_bpf_cpuperf_set(s32 cpu, u32 perf) __ksym __weak; @@ -123,6 +124,17 @@ void ___scx_bpf_bstr_format_checker(const char *fmt, ...) {} ___scx_bpf_bstr_format_checker(fmt, ##args); \ }) +/* + * scx_bpf_dump() wraps the scx_bpf_dump_bstr() kfunc with variadic arguments + * instead of an array of u64. To be used from ops.dump() and friends. + */ +#define scx_bpf_dump(fmt, args...) \ +({ \ + scx_bpf_bstr_preamble(fmt, args) \ + scx_bpf_dump_bstr(___fmt, ___param, sizeof(___param)); \ + ___scx_bpf_bstr_format_checker(fmt, ##args); \ +}) + #define BPF_STRUCT_OPS(name, args...) \ SEC("struct_ops/"#name) \ BPF_PROG(name, ##args) diff --git a/tools/sched_ext/include/scx/compat.h b/tools/sched_ext/include/scx/compat.h index 7783c82a8a182..b6f30e012c617 100644 --- a/tools/sched_ext/include/scx/compat.h +++ b/tools/sched_ext/include/scx/compat.h @@ -158,6 +158,7 @@ static inline long scx_hotplug_seq(void) * reasonable. * * - ops.tick(): Ignored on older kernels with a warning. + * - ops.dump*(): Ignored on older kernels with a warning. * - ops.exit_dump_len: Cleared to zero on older kernels with a warning. * - ops.hotplug_seq: Ignored on older kernels. */ @@ -184,6 +185,15 @@ static inline long scx_hotplug_seq(void) fprintf(stderr, "WARNING: kernel doesn't support ops.tick()\n"); \ (__skel)->struct_ops.__ops_name->tick = NULL; \ } \ + if (!__COMPAT_struct_has_field("sched_ext_ops", "dump") && \ + ((__skel)->struct_ops.__ops_name->dump || \ + (__skel)->struct_ops.__ops_name->dump_cpu || \ + (__skel)->struct_ops.__ops_name->dump_task)) { \ + fprintf(stderr, "WARNING: kernel doesn't support ops.dump*()\n"); \ + (__skel)->struct_ops.__ops_name->dump = NULL; \ + (__skel)->struct_ops.__ops_name->dump_cpu = NULL; \ + (__skel)->struct_ops.__ops_name->dump_task = NULL; \ + } \ SCX_BUG_ON(__scx_name##__load((__skel)), "Failed to load skel"); \ }) diff --git a/tools/sched_ext/scx_qmap.bpf.c b/tools/sched_ext/scx_qmap.bpf.c index 977c7cff7b343..2faff64306ba5 100644 --- a/tools/sched_ext/scx_qmap.bpf.c +++ b/tools/sched_ext/scx_qmap.bpf.c @@ -40,6 +40,7 @@ const volatile u32 dsp_batch; const volatile bool print_shared_dsq; const volatile char exp_prefix[17]; const volatile s32 disallow_tgid; +const volatile bool suppress_dump; const volatile bool switch_partial; u32 test_error_cnt; @@ -462,6 +463,57 @@ s32 BPF_STRUCT_OPS(qmap_init_task, struct task_struct *p, return -ENOMEM; } +void BPF_STRUCT_OPS(qmap_dump, struct scx_dump_ctx *dctx) +{ + s32 i, pid; + + if (suppress_dump) + return; + + bpf_for(i, 0, 5) { + void *fifo; + + if (!(fifo = bpf_map_lookup_elem(&queue_arr, &i))) + return; + + scx_bpf_dump("QMAP FIFO[%d]:", i); + bpf_repeat(4096) { + if (bpf_map_pop_elem(fifo, &pid)) + break; + scx_bpf_dump(" %d", pid); + } + scx_bpf_dump("\n"); + } +} + +void BPF_STRUCT_OPS(qmap_dump_cpu, struct scx_dump_ctx *dctx, s32 cpu, bool idle) +{ + u32 zero = 0; + struct cpu_ctx *cpuc; + + if (suppress_dump || idle) + return; + if (!(cpuc = bpf_map_lookup_percpu_elem(&cpu_ctx_stor, &zero, cpu))) + return; + + scx_bpf_dump("QMAP: dsp_idx=%llu dsp_cnt=%llu avg_weight=%u cpuperf_target=%u", + cpuc->dsp_idx, cpuc->dsp_cnt, cpuc->avg_weight, + cpuc->cpuperf_target); +} + +void BPF_STRUCT_OPS(qmap_dump_task, struct scx_dump_ctx *dctx, struct task_struct *p) +{ + struct task_ctx *taskc; + + if (suppress_dump) + return; + if (!(taskc = bpf_task_storage_get(&task_ctx_stor, p, 0, 0))) + return; + + scx_bpf_dump("QMAP: force_local=%d core_sched_seq=%llu", + taskc->force_local, taskc->core_sched_seq); +} + /* * Print out the online and possible CPU map using bpf_printk() as a * demonstration of using the cpumask kfuncs and ops.cpu_on/offline(). @@ -664,6 +716,9 @@ SCX_OPS_DEFINE(qmap_ops, .core_sched_before = (void *)qmap_core_sched_before, .cpu_release = (void *)qmap_cpu_release, .init_task = (void *)qmap_init_task, + .dump = (void *)qmap_dump, + .dump_cpu = (void *)qmap_dump_cpu, + .dump_task = (void *)qmap_dump_task, .cpu_online = (void *)qmap_cpu_online, .cpu_offline = (void *)qmap_cpu_offline, .init = (void *)qmap_init, diff --git a/tools/sched_ext/scx_qmap.c b/tools/sched_ext/scx_qmap.c index eb221f0a05800..91a2c294a0ac8 100644 --- a/tools/sched_ext/scx_qmap.c +++ b/tools/sched_ext/scx_qmap.c @@ -33,6 +33,7 @@ const char help_fmt[] = " (e.g. match shell on a loaded system)\n" " -d PID Disallow a process from switching into SCHED_EXT (-1 for self)\n" " -D LEN Set scx_exit_info.dump buffer length\n" +" -S Suppress qmap-specific debug dump\n" " -p Switch only tasks on SCHED_EXT policy intead of all\n" " -v Print libbpf debug messages\n" " -h Display this help and exit\n"; @@ -64,7 +65,7 @@ int main(int argc, char **argv) skel = SCX_OPS_OPEN(qmap_ops, scx_qmap); - while ((opt = getopt(argc, argv, "s:e:t:T:l:b:PE:d:D:pvh")) != -1) { + while ((opt = getopt(argc, argv, "s:e:t:T:l:b:PE:d:D:Spvh")) != -1) { switch (opt) { case 's': skel->rodata->slice_ns = strtoull(optarg, NULL, 0) * 1000; @@ -99,6 +100,9 @@ int main(int argc, char **argv) case 'D': skel->struct_ops.qmap_ops->exit_dump_len = strtoul(optarg, NULL, 0); break; + case 'S': + skel->rodata->suppress_dump = true; + break; case 'p': skel->rodata->switch_partial = true; skel->struct_ops.qmap_ops->flags |= __COMPAT_SCX_OPS_SWITCH_PARTIAL; From a1df227bd32ea4f4d5855263f5d07068c5da7e7a Mon Sep 17 00:00:00 2001 From: Andrea Righi Date: Fri, 3 May 2024 17:09:29 +0200 Subject: [PATCH 600/606] ci: enable kvm support in the github workflow Enable kvm acceleration and qemu microvm to speed up CI tests inside virtme-ng. Also adjust the regex to catch potential errors excluding a false positive triggered by the new configuration. Link: https://github.blog/changelog/2023-02-23-hardware-accelerated-android-virtualization-on-actions-windows-and-linux-larger-hosted-runners/ Link: https://github.blog/2024-01-17-github-hosted-runners-double-the-power-for-open-source/ Signed-off-by: Andrea Righi (cherry picked from commit 0caf6fc3f2143b9e62698e1b3f2ec50c30be8ed6) --- .github/workflows/run-schedulers | 13 +++++++------ .github/workflows/test-kernel.yml | 6 ++++++ 2 files changed, 13 insertions(+), 6 deletions(-) diff --git a/.github/workflows/run-schedulers b/.github/workflows/run-schedulers index ef353157d3a04..fc1f92270f597 100755 --- a/.github/workflows/run-schedulers +++ b/.github/workflows/run-schedulers @@ -31,7 +31,7 @@ function runtest() { rm -f /tmp/output (timeout --foreground --preserve-status ${GUEST_TIMEOUT} \ - vng --force-9p --disable-microvm --verbose -- \ + vng --force-9p --verbose -- \ "timeout --foreground --preserve-status ${TEST_TIMEOUT} ${bin}" \ 2>&1 Date: Sat, 4 May 2024 06:05:17 -1000 Subject: [PATCH 601/606] [PATCH] SCX: scx_ops_bypass() can't grab a mutex This fixes a bug introduced by 34c60754d33b ("scx: Fix scx_ops_bypass_depth going out of sync"). It added mutex_lock(scx_ops_enable_mutex) to scx_ops_bypass(), which can obviously lead to problem if scx_ops_bypass() is invoked from the disable path while someone is holding the mutex and scheduled out. [ 246.906433] INFO: task sched_ext_ops_h:1531 blocked for more than 122 seconds. [ 246.907862] Not tainted 6.9.0-rc5-work-02067-g7f2a15fc2eb2-dirty #34 [ 246.909213] "echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message. [ 246.910973] task:sched_ext_ops_h state:D stack:0 pid:1531 tgid:1531 ppid:2 flags:0x00004000 [ 246.913004] Sched_ext: qmap (enabled+all) [ 246.913007] Call Trace: [ 246.914486] [ 246.914956] __schedule+0x750/0xcd0 [ 246.915770] schedule+0x41/0xb0 [ 246.916503] schedule_preempt_disabled+0x15/0x20 [ 246.917542] __mutex_lock+0x511/0xb00 [ 246.918379] ? scx_ops_bypass+0x54/0x1a0 [ 246.919232] mutex_lock_nested+0x1b/0x20 [ 246.920110] scx_ops_bypass+0x54/0x1a0 [ 246.920961] ? irqentry_exit+0x59/0x80 [ 246.921814] ? sysvec_irq_work+0x41/0x80 [ 246.922705] scx_ops_disable_workfn+0xd1/0x920 [ 246.923714] ? lock_acquire+0xcb/0x230 [ 246.924567] ? kthread_worker_fn+0x80/0x2a0 [ 246.925504] ? lock_release+0xd8/0x2d0 [ 246.926355] ? kthread_worker_fn+0xbe/0x2a0 [ 246.927262] kthread_worker_fn+0x10a/0x2a0 [ 246.928182] ? scx_dump_task+0x2f0/0x2f0 [ 246.929080] kthread+0xec/0x110 [ 246.929814] ? __kthread_init_worker+0x100/0x100 [ 246.930855] ? kthread_blkcg+0x40/0x40 [ 246.931718] ret_from_fork+0x36/0x40 [ 246.932545] ? kthread_blkcg+0x40/0x40 [ 246.933411] ret_from_fork_asm+0x11/0x20 [ 246.934311] [ 246.934798] [ 246.934798] Showing all locks held in the system: [ 246.936169] 1 lock held by khungtaskd/60: [ 246.937081] #0: ffffffff8335b128 (rcu_read_lock){....}-{1:2}, at: debug_show_all_locks+0x30/0x190 [ 246.939051] 1 lock held by scx_qmap/1529: [ 246.939963] 1 lock held by sched_ext_ops_h/1531: [ 246.940999] #0: ffffffff83272e28 (scx_ops_enable_mutex){+.+.}-{3:3}, at: scx_ops_bypass+0x54/0x1a0 There's no reason to grab the mutex there. We can just test scx_enabled() without holding the mutex. Drop the broken locking. (cherry picked from commit a95e0bf44a0c986ce7620a9dd0bf336999e385cc) --- kernel/sched/ext.c | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/kernel/sched/ext.c b/kernel/sched/ext.c index d3c2a2aa0525c..1a531a84afcd3 100644 --- a/kernel/sched/ext.c +++ b/kernel/sched/ext.c @@ -4213,9 +4213,13 @@ static void scx_ops_bypass(bool bypass) return; } - mutex_lock(&scx_ops_enable_mutex); + /* + * We need to guarantee that no tasks are on the BPF scheduler while + * bypassing. Either we see enabled or the enable path sees the + * increased bypass_depth before moving tasks to SCX. + */ if (!scx_enabled()) - goto out_unlock; + return; /* * No task property is changing. We just need to make sure all currently @@ -4254,9 +4258,6 @@ static void scx_ops_bypass(bool bypass) /* kick to restore ticks */ resched_cpu(cpu); } - -out_unlock: - mutex_unlock(&scx_ops_enable_mutex); } static void free_exit_info(struct scx_exit_info *ei) From 46e926f1aa342f5b12ecbb90b41ba8bb27f47a3b Mon Sep 17 00:00:00 2001 From: David Vernet Date: Fri, 3 May 2024 17:34:34 -0500 Subject: [PATCH 602/606] scx: Dispatch locally until ops.cpu_online() invoked In sched_ext, there's currently a corner case on the hotplug path wherein a CPU may be onlined and begin scheduling _before_ we've invoked ops.cpu_online() to the scheduler. This can cause confusion by e.g. causing a scheduler to error out if it receives an ops.dispatch() call and accidentally dispatches to an invalid DSQ. To avoid this scenario, this patch adds a per-CPU boolean variable that tracks when a CPU has been onlined from the perspective of sched_ext. Note that this patch does not prevent _any_ callback from being invoked on that CPU. It simply prevents us from invoking ops.dispatch(), and forces us to dispatch directly to the local DSQ. We can still, however, receive e.g. ops.runnable() if a per-CPU task is dispatched to the local DSQ of the onlined CPU before the hotplug event is received. Signed-off-by: David Vernet (cherry picked from commit 57845b39e8bb2b9d4daa82332bab9cfa62da5e09) --- kernel/sched/ext.c | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/kernel/sched/ext.c b/kernel/sched/ext.c index 1a531a84afcd3..2699b7328a72e 100644 --- a/kernel/sched/ext.c +++ b/kernel/sched/ext.c @@ -920,6 +920,16 @@ static unsigned long __percpu *scx_kick_cpus_pnt_seqs; */ static DEFINE_PER_CPU(struct task_struct *, direct_dispatch_task); +/* + * Has the current CPU been onlined from the perspective of scx? + * + * A hotplugged CPU may begin scheduling tasks before the core scheduler will + * call into rq_online_scx(). Track whether we've had a chance to invoke + * ops.cpu_online() so we can dispatch locally until the scheduler knows about + * the hotplug event. + */ +static DEFINE_PER_CPU(bool, scx_cpu_online); + /* dispatch queues */ static struct scx_dispatch_q __cacheline_aligned_in_smp scx_dsq_global; @@ -2561,7 +2571,8 @@ static int balance_one(struct rq *rq, struct task_struct *prev, if (consume_dispatch_q(rq, rf, &scx_dsq_global)) goto has_tasks; - if (!SCX_HAS_OP(dispatch) || scx_ops_bypassing()) + if (!SCX_HAS_OP(dispatch) || scx_ops_bypassing() || + unlikely(!*this_cpu_ptr(&scx_cpu_online))) goto out; dspc->rq = rq; @@ -3202,10 +3213,12 @@ static void rq_online_scx(struct rq *rq, enum rq_onoff_reason reason) { if (reason == RQ_ONOFF_HOTPLUG) handle_hotplug(rq, true); + __this_cpu_write(scx_cpu_online, true); } static void rq_offline_scx(struct rq *rq, enum rq_onoff_reason reason) { + __this_cpu_write(scx_cpu_online, false); if (reason == RQ_ONOFF_HOTPLUG) handle_hotplug(rq, false); } From 99903a15e8b5dcd5580c7cb3568b0b0ca5d6941e Mon Sep 17 00:00:00 2001 From: David Vernet Date: Mon, 6 May 2024 10:35:00 -0500 Subject: [PATCH 603/606] scx: Use per-cpu vars to CPU lifecycle In commit 57845b39e8bb ("scx: Dispatch locally until ops.cpu_online() invoked"), we updated ext.c to only issue dispatch callbacks if we know that a CPU has been fully onlined. There were a couple of small issues with this commit, including: 1. The documentation wasn't quite right -- it said that we choose to dispatch locally, but that was a stale comment. 2. We now have two places where we're checking CPU online / offline status, which is unnecessarily confusing. scx_cpu_online matches exactly where the hotplug callbacks are invoked, so we can just use those everywhere. Note that we can still invoke callbacks for e.g. per-CPU tasks that run on onlining CPUs before the CPU has been fully onlined. We'll update our documenation in the next patch to clarify this. Signed-off-by: David Vernet (cherry picked from commit ed5ff6fbaf213efe15a99fa4330e9f81c0c60432) --- kernel/sched/ext.c | 16 ++++++---------- 1 file changed, 6 insertions(+), 10 deletions(-) diff --git a/kernel/sched/ext.c b/kernel/sched/ext.c index 2699b7328a72e..ad9c23dadc475 100644 --- a/kernel/sched/ext.c +++ b/kernel/sched/ext.c @@ -925,8 +925,8 @@ static DEFINE_PER_CPU(struct task_struct *, direct_dispatch_task); * * A hotplugged CPU may begin scheduling tasks before the core scheduler will * call into rq_online_scx(). Track whether we've had a chance to invoke - * ops.cpu_online() so we can dispatch locally until the scheduler knows about - * the hotplug event. + * ops.cpu_online() so we can skip invoking ops.enqueue() or ops.dispatch() on + * that CPU until the scheduler knows about the hotplug event. */ static DEFINE_PER_CPU(bool, scx_cpu_online); @@ -1785,11 +1785,7 @@ static void direct_dispatch(struct task_struct *p, u64 enq_flags) static bool test_rq_online(struct rq *rq) { -#ifdef CONFIG_SMP - return rq->online; -#else - return true; -#endif + return per_cpu(scx_cpu_online, cpu_of(rq)); } static void do_enqueue_task(struct rq *rq, struct task_struct *p, u64 enq_flags, @@ -2572,7 +2568,7 @@ static int balance_one(struct rq *rq, struct task_struct *prev, goto has_tasks; if (!SCX_HAS_OP(dispatch) || scx_ops_bypassing() || - unlikely(!*this_cpu_ptr(&scx_cpu_online))) + unlikely(!test_rq_online(rq))) goto out; dspc->rq = rq; @@ -3213,12 +3209,12 @@ static void rq_online_scx(struct rq *rq, enum rq_onoff_reason reason) { if (reason == RQ_ONOFF_HOTPLUG) handle_hotplug(rq, true); - __this_cpu_write(scx_cpu_online, true); + per_cpu(scx_cpu_online, cpu_of(rq)) = true; } static void rq_offline_scx(struct rq *rq, enum rq_onoff_reason reason) { - __this_cpu_write(scx_cpu_online, false); + per_cpu(scx_cpu_online, cpu_of(rq)) = false; if (reason == RQ_ONOFF_HOTPLUG) handle_hotplug(rq, false); } From c964690abbd20316a0720a7ee0acafeae5b70ae9 Mon Sep 17 00:00:00 2001 From: David Vernet Date: Mon, 6 May 2024 13:16:06 -0500 Subject: [PATCH 604/606] scx: Document semantics for ops during hotplug events During hotplug events, ext.c prevents certain callbacks and kfuncs from being invoked on CPUs. For example, we don't invoke ops.enqueue() or ops.dispatch() on an onlining CPU that didn't associate itself with a DSQ on the init path. That said, there are _some_ semantics we do which may confuse schedulers. For example, we will invoke ops.runnable() on a per-CPU kthread that's required for CPU hotplug to run (cpuhp), but we then won't invoke ops.enqueue() on it. While this isn't that weird, it's probably best that we document the semantics. This commit does that. Signed-off-by: David Vernet (cherry picked from commit 1bb91a4ad2e5ac90c2988692d703008f713ee6dc) --- kernel/sched/ext.c | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/kernel/sched/ext.c b/kernel/sched/ext.c index ad9c23dadc475..9e1c3475ed68c 100644 --- a/kernel/sched/ext.c +++ b/kernel/sched/ext.c @@ -303,9 +303,10 @@ struct sched_ext_ops { * * This and ->enqueue() are related but not coupled. This operation * notifies @p's state transition and may not be followed by ->enqueue() - * e.g. when @p is being dispatched to a remote CPU. Likewise, a task - * may be ->enqueue()'d without being preceded by this operation e.g. - * after exhausting its slice. + * e.g. when @p is being dispatched to a remote CPU, or when @p is + * being enqueued on a CPU experiencing a hotplug event. Likewise, a + * task may be ->enqueue()'d without being preceded by this operation + * e.g. after exhausting its slice. */ void (*runnable)(struct task_struct *p, u64 enq_flags); @@ -597,8 +598,8 @@ struct sched_ext_ops { * cpu_online - A CPU became online * @cpu: CPU which just came up * - * @cpu just came online. @cpu doesn't call ops.enqueue() or run tasks - * associated with other CPUs beforehand. + * @cpu just came online. @cpu will not call ops.enqueue() or + * ops.dispatch(), nor run tasks associated with other CPUs beforehand. */ void (*cpu_online)(s32 cpu); @@ -606,8 +607,8 @@ struct sched_ext_ops { * cpu_offline - A CPU is going offline * @cpu: CPU which is going offline * - * @cpu is going offline. @cpu doesn't call ops.enqueue() or run tasks - * associated with other CPUs afterwards. + * @cpu is going offline. @cpu will not call ops.enqueue() or + * ops.dispatch(), nor run tasks associated with other CPUs afterwards. */ void (*cpu_offline)(s32 cpu); From ef81163ecf856b9a3263e8447feea2cc72bf15d7 Mon Sep 17 00:00:00 2001 From: David Vernet Date: Mon, 6 May 2024 16:16:57 -0500 Subject: [PATCH 605/606] scx: Fix exit/error bstr formatting In a recent commit, we refactored bpf_exit_bstr_common() into bstr_format(). This unfortunately regressed printing output passed from BPF, as we incorrectly invoked bpf_exit_bstr_common() in the caller. Fix it. Signed-off-by: David Vernet (cherry picked from commit b46212c77e54e5745d82d0663a6e8015050d8185) --- kernel/sched/ext.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/kernel/sched/ext.c b/kernel/sched/ext.c index 9e1c3475ed68c..f805b8ee6474b 100644 --- a/kernel/sched/ext.c +++ b/kernel/sched/ext.c @@ -6342,7 +6342,7 @@ __bpf_kfunc void scx_bpf_exit_bstr(s64 exit_code, char *fmt, local_irq_save(flags); msg = bstr_format(fmt, data, data__sz); if (!IS_ERR(msg)) - scx_ops_exit_kind(SCX_EXIT_UNREG_BPF, exit_code, fmt, "%s", msg); + scx_ops_exit_kind(SCX_EXIT_UNREG_BPF, exit_code, "%s", msg); local_irq_restore(flags); } @@ -6364,7 +6364,7 @@ __bpf_kfunc void scx_bpf_error_bstr(char *fmt, unsigned long long *data, local_irq_save(flags); msg = bstr_format(fmt, data, data__sz); if (!IS_ERR(msg)) - scx_ops_exit_kind(SCX_EXIT_ERROR_BPF, 0, fmt, "%s", msg); + scx_ops_exit_kind(SCX_EXIT_ERROR_BPF, 0, "%s", msg); local_irq_restore(flags); } From a76b58e6619fb5ea67c71415cb4efe65d915d8b8 Mon Sep 17 00:00:00 2001 From: David Vernet Date: Wed, 8 May 2024 00:29:36 -0500 Subject: [PATCH 606/606] v6.9-scx1-rc7 Signed-off-by: David Vernet --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 8bd7a6d4941fd..f8278d07aeab4 100644 --- a/Makefile +++ b/Makefile @@ -2,7 +2,7 @@ VERSION = 6 PATCHLEVEL = 9 SUBLEVEL = 0 -EXTRAVERSION = -rc7 +EXTRAVERSION = -scx1-rc7 NAME = Hurr durr I'ma ninja sloth # *DOCUMENTATION*